harness-evolver 2.9.1 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +62 -117
- package/agents/evolver-architect.md +53 -0
- package/agents/evolver-critic.md +44 -0
- package/agents/evolver-proposer.md +128 -0
- package/agents/evolver-testgen.md +67 -0
- package/bin/install.js +181 -171
- package/package.json +7 -7
- package/skills/deploy/SKILL.md +49 -56
- package/skills/evolve/SKILL.md +156 -687
- package/skills/setup/SKILL.md +182 -0
- package/skills/status/SKILL.md +23 -21
- package/tools/read_results.py +240 -0
- package/tools/run_eval.py +202 -0
- package/tools/seed_from_traces.py +36 -8
- package/tools/setup.py +393 -0
- package/tools/trace_insights.py +86 -14
- package/agents/harness-evolver-architect.md +0 -173
- package/agents/harness-evolver-critic.md +0 -132
- package/agents/harness-evolver-judge.md +0 -110
- package/agents/harness-evolver-proposer.md +0 -317
- package/agents/harness-evolver-testgen.md +0 -112
- package/examples/classifier/README.md +0 -25
- package/examples/classifier/config.json +0 -3
- package/examples/classifier/eval.py +0 -58
- package/examples/classifier/harness.py +0 -111
- package/examples/classifier/tasks/task_001.json +0 -1
- package/examples/classifier/tasks/task_002.json +0 -1
- package/examples/classifier/tasks/task_003.json +0 -1
- package/examples/classifier/tasks/task_004.json +0 -1
- package/examples/classifier/tasks/task_005.json +0 -1
- package/examples/classifier/tasks/task_006.json +0 -1
- package/examples/classifier/tasks/task_007.json +0 -1
- package/examples/classifier/tasks/task_008.json +0 -1
- package/examples/classifier/tasks/task_009.json +0 -1
- package/examples/classifier/tasks/task_010.json +0 -1
- package/skills/architect/SKILL.md +0 -93
- package/skills/compare/SKILL.md +0 -73
- package/skills/critic/SKILL.md +0 -67
- package/skills/diagnose/SKILL.md +0 -96
- package/skills/import-traces/SKILL.md +0 -102
- package/skills/init/SKILL.md +0 -293
- package/tools/__pycache__/detect_stack.cpython-313.pyc +0 -0
- package/tools/__pycache__/init.cpython-313.pyc +0 -0
- package/tools/__pycache__/seed_from_traces.cpython-313.pyc +0 -0
- package/tools/__pycache__/trace_logger.cpython-313.pyc +0 -0
- package/tools/eval_llm_judge.py +0 -233
- package/tools/eval_passthrough.py +0 -55
- package/tools/evaluate.py +0 -255
- package/tools/import_traces.py +0 -229
- package/tools/init.py +0 -531
- package/tools/llm_api.py +0 -125
- package/tools/state.py +0 -219
- package/tools/test_growth.py +0 -230
- package/tools/trace_logger.py +0 -42
package/bin/install.js
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
/**
|
|
3
|
-
* Harness Evolver installer.
|
|
4
|
-
* Copies skills/agents/tools
|
|
3
|
+
* Harness Evolver v3 installer.
|
|
4
|
+
* Copies skills/agents/tools to runtime directories (GSD pattern).
|
|
5
|
+
* Installs Python dependencies (langsmith + openevals).
|
|
5
6
|
*
|
|
6
7
|
* Usage: npx harness-evolver@latest
|
|
7
8
|
*/
|
|
@@ -27,7 +28,7 @@ const LOGO = `${BOLD}${GREEN}
|
|
|
27
28
|
╠═╣╠═╣╠╦╝║║║║╣ ╚═╗╚═╗ ║╣ ╚╗╔╝║ ║║ ╚╗╔╝║╣ ╠╦╝
|
|
28
29
|
╩ ╩╩ ╩╩╚═╝╚╝╚═╝╚═╝╚═╝ ╚═╝ ╚╝ ╚═╝╩═╝ ╚╝ ╚═╝╩╚═
|
|
29
30
|
${RESET}
|
|
30
|
-
${DIM}${GREEN}
|
|
31
|
+
${DIM}${GREEN} LangSmith-native agent optimization v${VERSION}${RESET}
|
|
31
32
|
`;
|
|
32
33
|
|
|
33
34
|
function ask(rl, question) {
|
|
@@ -39,6 +40,7 @@ function copyDir(src, dest) {
|
|
|
39
40
|
for (const entry of fs.readdirSync(src, { withFileTypes: true })) {
|
|
40
41
|
const srcPath = path.join(src, entry.name);
|
|
41
42
|
const destPath = path.join(dest, entry.name);
|
|
43
|
+
if (entry.name === "__pycache__") continue;
|
|
42
44
|
if (entry.isDirectory()) {
|
|
43
45
|
copyDir(srcPath, destPath);
|
|
44
46
|
} else {
|
|
@@ -70,7 +72,7 @@ function checkCommand(cmd) {
|
|
|
70
72
|
}
|
|
71
73
|
}
|
|
72
74
|
|
|
73
|
-
function
|
|
75
|
+
function installSkillsAndAgents(runtimeDir, scope) {
|
|
74
76
|
const baseDir = scope === "local"
|
|
75
77
|
? path.join(process.cwd(), runtimeDir)
|
|
76
78
|
: path.join(HOME, runtimeDir);
|
|
@@ -78,204 +80,132 @@ function installForRuntime(runtimeDir, scope) {
|
|
|
78
80
|
const skillsDir = path.join(baseDir, "skills");
|
|
79
81
|
const agentsDir = path.join(baseDir, "agents");
|
|
80
82
|
|
|
81
|
-
// Skills
|
|
83
|
+
// Skills — read SKILL.md name field, use directory name for filesystem
|
|
82
84
|
const skillsSource = path.join(PLUGIN_ROOT, "skills");
|
|
83
85
|
if (fs.existsSync(skillsSource)) {
|
|
84
86
|
for (const skill of fs.readdirSync(skillsSource, { withFileTypes: true })) {
|
|
85
|
-
if (skill.isDirectory())
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
87
|
+
if (!skill.isDirectory()) continue;
|
|
88
|
+
const src = path.join(skillsSource, skill.name);
|
|
89
|
+
const skillMd = path.join(src, "SKILL.md");
|
|
90
|
+
if (!fs.existsSync(skillMd)) continue;
|
|
91
|
+
|
|
92
|
+
// Read the skill name from frontmatter
|
|
93
|
+
const content = fs.readFileSync(skillMd, "utf8");
|
|
94
|
+
const nameMatch = content.match(/^name:\s*(.+)$/m);
|
|
95
|
+
const skillName = nameMatch ? nameMatch[1].trim() : skill.name;
|
|
96
|
+
|
|
97
|
+
const dest = path.join(skillsDir, skill.name);
|
|
98
|
+
copyDir(src, dest);
|
|
99
|
+
console.log(` ${GREEN}✓${RESET} ${skillName}`);
|
|
91
100
|
}
|
|
92
101
|
}
|
|
93
102
|
|
|
94
|
-
// Cleanup old commands/
|
|
103
|
+
// Cleanup old v2 commands/ directory
|
|
95
104
|
const oldCommandsDir = path.join(baseDir, "commands", "harness-evolver");
|
|
96
105
|
if (fs.existsSync(oldCommandsDir)) {
|
|
97
106
|
fs.rmSync(oldCommandsDir, { recursive: true, force: true });
|
|
98
|
-
console.log(` ${
|
|
107
|
+
console.log(` ${DIM}Cleaned up old commands/ directory${RESET}`);
|
|
99
108
|
}
|
|
100
109
|
|
|
101
|
-
// Agents
|
|
110
|
+
// Agents
|
|
102
111
|
const agentsSource = path.join(PLUGIN_ROOT, "agents");
|
|
103
112
|
if (fs.existsSync(agentsSource)) {
|
|
104
113
|
fs.mkdirSync(agentsDir, { recursive: true });
|
|
105
114
|
for (const agent of fs.readdirSync(agentsSource)) {
|
|
115
|
+
if (!agent.endsWith(".md")) continue;
|
|
106
116
|
copyFile(path.join(agentsSource, agent), path.join(agentsDir, agent));
|
|
107
|
-
|
|
117
|
+
const agentName = agent.replace(".md", "");
|
|
118
|
+
console.log(` ${GREEN}✓${RESET} agent: ${agentName}`);
|
|
108
119
|
}
|
|
109
120
|
}
|
|
110
121
|
}
|
|
111
122
|
|
|
112
123
|
function installTools() {
|
|
113
|
-
const toolsDir = path.join(HOME, ".
|
|
124
|
+
const toolsDir = path.join(HOME, ".evolver", "tools");
|
|
114
125
|
const toolsSource = path.join(PLUGIN_ROOT, "tools");
|
|
115
126
|
if (fs.existsSync(toolsSource)) {
|
|
116
127
|
fs.mkdirSync(toolsDir, { recursive: true });
|
|
128
|
+
let count = 0;
|
|
117
129
|
for (const tool of fs.readdirSync(toolsSource)) {
|
|
118
|
-
if (tool.endsWith(".py"))
|
|
119
|
-
|
|
120
|
-
|
|
130
|
+
if (!tool.endsWith(".py")) continue;
|
|
131
|
+
copyFile(path.join(toolsSource, tool), path.join(toolsDir, tool));
|
|
132
|
+
count++;
|
|
121
133
|
}
|
|
122
|
-
console.log(` ${GREEN}✓${RESET}
|
|
134
|
+
console.log(` ${GREEN}✓${RESET} ${count} tools installed to ~/.evolver/tools/`);
|
|
123
135
|
}
|
|
124
136
|
}
|
|
125
137
|
|
|
126
|
-
function
|
|
127
|
-
|
|
128
|
-
const examplesSource = path.join(PLUGIN_ROOT, "examples");
|
|
129
|
-
if (fs.existsSync(examplesSource)) {
|
|
130
|
-
copyDir(examplesSource, examplesDir);
|
|
131
|
-
console.log(` ${GREEN}✓${RESET} Installed examples to ~/.harness-evolver/examples/`);
|
|
132
|
-
}
|
|
133
|
-
}
|
|
138
|
+
function installPythonDeps() {
|
|
139
|
+
console.log(`\n ${YELLOW}Installing Python dependencies...${RESET}`);
|
|
134
140
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
fs.writeFileSync(installedPath, JSON.stringify(data, null, 2) + "\n");
|
|
143
|
-
}
|
|
144
|
-
} catch {}
|
|
141
|
+
// Try multiple pip variants
|
|
142
|
+
const commands = [
|
|
143
|
+
"pip install langsmith openevals",
|
|
144
|
+
"uv pip install langsmith openevals",
|
|
145
|
+
"pip3 install langsmith openevals",
|
|
146
|
+
"python3 -m pip install langsmith openevals",
|
|
147
|
+
];
|
|
145
148
|
|
|
146
|
-
const
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
149
|
+
for (const cmd of commands) {
|
|
150
|
+
try {
|
|
151
|
+
execSync(cmd, { stdio: "pipe", timeout: 120000 });
|
|
152
|
+
console.log(` ${GREEN}✓${RESET} langsmith + openevals installed`);
|
|
153
|
+
return true;
|
|
154
|
+
} catch {
|
|
155
|
+
continue;
|
|
152
156
|
}
|
|
153
|
-
} catch {}
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
async function main() {
|
|
157
|
-
console.log(LOGO);
|
|
158
|
-
console.log(` ${DIM}Harness Evolver v${VERSION}${RESET}`);
|
|
159
|
-
console.log(` ${DIM}Meta-Harness-style autonomous harness optimization${RESET}`);
|
|
160
|
-
console.log();
|
|
161
|
-
|
|
162
|
-
if (!checkPython()) {
|
|
163
|
-
console.error(` ${RED}ERROR:${RESET} python3 not found in PATH. Install Python 3.8+ first.`);
|
|
164
|
-
process.exit(1);
|
|
165
|
-
}
|
|
166
|
-
console.log(` ${GREEN}✓${RESET} python3 found`);
|
|
167
|
-
|
|
168
|
-
const RUNTIMES = [
|
|
169
|
-
{ name: "Claude Code", dir: ".claude" },
|
|
170
|
-
{ name: "Cursor", dir: ".cursor" },
|
|
171
|
-
{ name: "Codex", dir: ".codex" },
|
|
172
|
-
{ name: "Windsurf", dir: ".windsurf" },
|
|
173
|
-
].filter(r => fs.existsSync(path.join(HOME, r.dir)));
|
|
174
|
-
|
|
175
|
-
if (RUNTIMES.length === 0) {
|
|
176
|
-
console.error(`\n ${RED}ERROR:${RESET} No supported runtime detected.`);
|
|
177
|
-
console.error(` Install Claude Code, Cursor, Codex, or Windsurf first.`);
|
|
178
|
-
process.exit(1);
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
182
|
-
|
|
183
|
-
console.log(`\n ${YELLOW}Which runtime(s) would you like to install for?${RESET}\n`);
|
|
184
|
-
RUNTIMES.forEach((r, i) => console.log(` ${i + 1}) ${r.name.padEnd(14)} (~/${r.dir})`));
|
|
185
|
-
if (RUNTIMES.length > 1) {
|
|
186
|
-
console.log(` ${RUNTIMES.length + 1}) All`);
|
|
187
|
-
console.log(`\n ${DIM}Select multiple: 1,2 or 1 2${RESET}`);
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
const runtimeAnswer = await ask(rl, `\n ${YELLOW}Choice [1]:${RESET} `);
|
|
191
|
-
const runtimeInput = (runtimeAnswer.trim() || "1");
|
|
192
|
-
|
|
193
|
-
let selected;
|
|
194
|
-
if (runtimeInput === String(RUNTIMES.length + 1)) {
|
|
195
|
-
selected = RUNTIMES;
|
|
196
|
-
} else {
|
|
197
|
-
const indices = runtimeInput.split(/[,\s]+/).map(s => parseInt(s, 10) - 1);
|
|
198
|
-
selected = indices.filter(i => i >= 0 && i < RUNTIMES.length).map(i => RUNTIMES[i]);
|
|
199
157
|
}
|
|
200
|
-
if (selected.length === 0) selected = [RUNTIMES[0]];
|
|
201
|
-
|
|
202
|
-
console.log(`\n ${YELLOW}Where would you like to install?${RESET}\n`);
|
|
203
|
-
console.log(` 1) Global (~/${selected[0].dir}) - available in all projects`);
|
|
204
|
-
console.log(` 2) Local (./${selected[0].dir}) - this project only`);
|
|
205
|
-
|
|
206
|
-
const scopeAnswer = await ask(rl, `\n ${YELLOW}Choice [1]:${RESET} `);
|
|
207
|
-
const scope = (scopeAnswer.trim() === "2") ? "local" : "global";
|
|
208
158
|
|
|
209
|
-
console.log();
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
cleanupBrokenPluginEntry(runtime.dir);
|
|
214
|
-
installForRuntime(runtime.dir, scope);
|
|
215
|
-
console.log();
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
installTools();
|
|
219
|
-
installExamples();
|
|
220
|
-
|
|
221
|
-
const versionPath = path.join(HOME, ".harness-evolver", "VERSION");
|
|
222
|
-
fs.mkdirSync(path.dirname(versionPath), { recursive: true });
|
|
223
|
-
fs.writeFileSync(versionPath, VERSION);
|
|
224
|
-
console.log(` ${GREEN}✓${RESET} VERSION ${VERSION}`);
|
|
225
|
-
|
|
226
|
-
console.log(`\n ${GREEN}Done!${RESET} Restart Claude Code, then run ${GREEN}/harness-evolver:init${RESET}\n`);
|
|
159
|
+
console.log(` ${YELLOW}!${RESET} Could not auto-install Python packages.`);
|
|
160
|
+
console.log(` Run manually: ${BOLD}pip install langsmith openevals${RESET}`);
|
|
161
|
+
return false;
|
|
162
|
+
}
|
|
227
163
|
|
|
228
|
-
|
|
229
|
-
console.log(
|
|
230
|
-
console.log(` These enhance the proposer with rich traces and up-to-date documentation.\n`);
|
|
164
|
+
async function configureLangSmith(rl) {
|
|
165
|
+
console.log(`\n ${YELLOW}LangSmith Configuration${RESET} ${DIM}(required for v3)${RESET}\n`);
|
|
231
166
|
|
|
232
|
-
//
|
|
233
|
-
const hasLangsmithCli = checkCommand("langsmith-cli --version");
|
|
167
|
+
// Check if already configured
|
|
234
168
|
const langsmithCredsDir = process.platform === "darwin"
|
|
235
169
|
? path.join(HOME, "Library", "Application Support", "langsmith-cli")
|
|
236
170
|
: path.join(HOME, ".config", "langsmith-cli");
|
|
237
171
|
const langsmithCredsFile = path.join(langsmithCredsDir, "credentials");
|
|
238
|
-
const hasLangsmithCreds = fs.existsSync(langsmithCredsFile);
|
|
239
172
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
const lsAnswer = await ask(rl, `\n ${YELLOW}Install langsmith-cli? [y/N]:${RESET} `);
|
|
246
|
-
if (lsAnswer.trim().toLowerCase() === "y") {
|
|
247
|
-
console.log(`\n Installing langsmith-cli...`);
|
|
248
|
-
try {
|
|
249
|
-
execSync("uv tool install langsmith-cli", { stdio: "inherit" });
|
|
250
|
-
console.log(`\n ${GREEN}✓${RESET} langsmith-cli installed`);
|
|
251
|
-
} catch {
|
|
252
|
-
console.log(`\n ${RED}Failed.${RESET} Install manually: uv tool install langsmith-cli\n`);
|
|
253
|
-
}
|
|
254
|
-
}
|
|
255
|
-
} else {
|
|
256
|
-
console.log(` ${GREEN}✓${RESET} langsmith-cli already installed`);
|
|
257
|
-
}
|
|
173
|
+
// Check env var
|
|
174
|
+
if (process.env.LANGSMITH_API_KEY) {
|
|
175
|
+
console.log(` ${GREEN}✓${RESET} LANGSMITH_API_KEY found in environment`);
|
|
176
|
+
return;
|
|
177
|
+
}
|
|
258
178
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
179
|
+
// Check credentials file
|
|
180
|
+
if (fs.existsSync(langsmithCredsFile)) {
|
|
181
|
+
console.log(` ${GREEN}✓${RESET} LangSmith credentials found at ${DIM}${langsmithCredsFile}${RESET}`);
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Ask for API key
|
|
186
|
+
console.log(` ${BOLD}LangSmith API Key${RESET} — get yours at ${DIM}https://smith.langchain.com/settings${RESET}`);
|
|
187
|
+
console.log(` ${DIM}LangSmith is required for v3 (datasets, experiments, evaluators).${RESET}\n`);
|
|
188
|
+
const apiKey = await ask(rl, ` ${YELLOW}Paste your LangSmith API key:${RESET} `);
|
|
189
|
+
const key = apiKey.trim();
|
|
190
|
+
|
|
191
|
+
if (key && key.startsWith("lsv2_")) {
|
|
192
|
+
try {
|
|
193
|
+
fs.mkdirSync(langsmithCredsDir, { recursive: true });
|
|
194
|
+
fs.writeFileSync(langsmithCredsFile, `LANGSMITH_API_KEY=${key}\n`);
|
|
195
|
+
console.log(` ${GREEN}✓${RESET} API key saved to ${DIM}${langsmithCredsFile}${RESET}`);
|
|
196
|
+
} catch {
|
|
197
|
+
console.log(` ${RED}Failed to save.${RESET} Add to your shell: export LANGSMITH_API_KEY=${key}`);
|
|
277
198
|
}
|
|
199
|
+
} else if (key) {
|
|
200
|
+
console.log(` ${YELLOW}Doesn't look like a LangSmith key (should start with lsv2_).${RESET}`);
|
|
201
|
+
console.log(` Add to your shell: ${BOLD}export LANGSMITH_API_KEY=your_key${RESET}`);
|
|
202
|
+
} else {
|
|
203
|
+
console.log(` ${YELLOW}Skipped.${RESET} You must set LANGSMITH_API_KEY before using /evolver:setup`);
|
|
278
204
|
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
async function configureOptionalIntegrations(rl) {
|
|
208
|
+
console.log(`\n ${YELLOW}Optional Integrations${RESET}\n`);
|
|
279
209
|
|
|
280
210
|
// Context7 MCP
|
|
281
211
|
const hasContext7 = (() => {
|
|
@@ -289,19 +219,18 @@ async function main() {
|
|
|
289
219
|
} catch {}
|
|
290
220
|
return false;
|
|
291
221
|
})();
|
|
222
|
+
|
|
292
223
|
if (hasContext7) {
|
|
293
224
|
console.log(` ${GREEN}✓${RESET} Context7 MCP already configured`);
|
|
294
225
|
} else {
|
|
295
|
-
console.log(
|
|
296
|
-
console.log(` ${DIM}claude mcp add context7 -- npx -y @upstash/context7-mcp@latest${RESET}`);
|
|
226
|
+
console.log(` ${BOLD}Context7 MCP${RESET} — up-to-date library documentation (LangChain, OpenAI, etc.)`);
|
|
297
227
|
const c7Answer = await ask(rl, `\n ${YELLOW}Install Context7 MCP? [y/N]:${RESET} `);
|
|
298
228
|
if (c7Answer.trim().toLowerCase() === "y") {
|
|
299
|
-
console.log(`\n Installing Context7 MCP...`);
|
|
300
229
|
try {
|
|
301
230
|
execSync("claude mcp add context7 -- npx -y @upstash/context7-mcp@latest", { stdio: "inherit" });
|
|
302
231
|
console.log(`\n ${GREEN}✓${RESET} Context7 MCP configured`);
|
|
303
232
|
} catch {
|
|
304
|
-
console.log(`\n ${RED}Failed.${RESET} Install manually: claude mcp add context7 -- npx -y @upstash/context7-mcp@latest
|
|
233
|
+
console.log(`\n ${RED}Failed.${RESET} Install manually: claude mcp add context7 -- npx -y @upstash/context7-mcp@latest`);
|
|
305
234
|
}
|
|
306
235
|
}
|
|
307
236
|
}
|
|
@@ -318,28 +247,109 @@ async function main() {
|
|
|
318
247
|
} catch {}
|
|
319
248
|
return false;
|
|
320
249
|
})();
|
|
250
|
+
|
|
321
251
|
if (hasLcDocs) {
|
|
322
252
|
console.log(` ${GREEN}✓${RESET} LangChain Docs MCP already configured`);
|
|
323
253
|
} else {
|
|
324
|
-
console.log(`\n ${BOLD}LangChain Docs MCP${RESET} — LangChain/LangGraph/LangSmith documentation
|
|
325
|
-
console.log(` ${DIM}claude mcp add docs-langchain --transport http https://docs.langchain.com/mcp${RESET}`);
|
|
254
|
+
console.log(`\n ${BOLD}LangChain Docs MCP${RESET} — LangChain/LangGraph/LangSmith documentation`);
|
|
326
255
|
const lcAnswer = await ask(rl, `\n ${YELLOW}Install LangChain Docs MCP? [y/N]:${RESET} `);
|
|
327
256
|
if (lcAnswer.trim().toLowerCase() === "y") {
|
|
328
|
-
console.log(`\n Installing LangChain Docs MCP...`);
|
|
329
257
|
try {
|
|
330
258
|
execSync("claude mcp add docs-langchain --transport http https://docs.langchain.com/mcp", { stdio: "inherit" });
|
|
331
259
|
console.log(`\n ${GREEN}✓${RESET} LangChain Docs MCP configured`);
|
|
332
260
|
} catch {
|
|
333
|
-
console.log(`\n ${RED}Failed.${RESET} Install manually: claude mcp add docs-langchain --transport http https://docs.langchain.com/mcp
|
|
261
|
+
console.log(`\n ${RED}Failed.${RESET} Install manually: claude mcp add docs-langchain --transport http https://docs.langchain.com/mcp`);
|
|
334
262
|
}
|
|
335
263
|
}
|
|
336
264
|
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
async function main() {
|
|
268
|
+
console.log(LOGO);
|
|
269
|
+
|
|
270
|
+
if (!checkPython()) {
|
|
271
|
+
console.error(` ${RED}ERROR:${RESET} python3 not found. Install Python 3.10+ first.`);
|
|
272
|
+
process.exit(1);
|
|
273
|
+
}
|
|
274
|
+
console.log(` ${GREEN}✓${RESET} python3 found`);
|
|
275
|
+
|
|
276
|
+
// Detect runtimes
|
|
277
|
+
const RUNTIMES = [
|
|
278
|
+
{ name: "Claude Code", dir: ".claude" },
|
|
279
|
+
{ name: "Cursor", dir: ".cursor" },
|
|
280
|
+
{ name: "Codex", dir: ".codex" },
|
|
281
|
+
{ name: "Windsurf", dir: ".windsurf" },
|
|
282
|
+
].filter(r => fs.existsSync(path.join(HOME, r.dir)));
|
|
283
|
+
|
|
284
|
+
if (RUNTIMES.length === 0) {
|
|
285
|
+
console.error(`\n ${RED}ERROR:${RESET} No supported runtime detected.`);
|
|
286
|
+
console.error(` Install Claude Code, Cursor, Codex, or Windsurf first.`);
|
|
287
|
+
process.exit(1);
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
291
|
+
|
|
292
|
+
// Runtime selection
|
|
293
|
+
console.log(`\n ${YELLOW}Which runtime(s) to install for?${RESET}\n`);
|
|
294
|
+
RUNTIMES.forEach((r, i) => console.log(` ${i + 1}) ${r.name.padEnd(14)} (~/${r.dir})`));
|
|
295
|
+
if (RUNTIMES.length > 1) {
|
|
296
|
+
console.log(` ${RUNTIMES.length + 1}) All`);
|
|
297
|
+
console.log(`\n ${DIM}Select multiple: 1,2 or 1 2${RESET}`);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
const runtimeAnswer = await ask(rl, `\n ${YELLOW}Choice [1]:${RESET} `);
|
|
301
|
+
const runtimeInput = (runtimeAnswer.trim() || "1");
|
|
337
302
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
303
|
+
let selected;
|
|
304
|
+
if (runtimeInput === String(RUNTIMES.length + 1)) {
|
|
305
|
+
selected = RUNTIMES;
|
|
306
|
+
} else {
|
|
307
|
+
const indices = runtimeInput.split(/[,\s]+/).map(s => parseInt(s, 10) - 1);
|
|
308
|
+
selected = indices.filter(i => i >= 0 && i < RUNTIMES.length).map(i => RUNTIMES[i]);
|
|
309
|
+
}
|
|
310
|
+
if (selected.length === 0) selected = [RUNTIMES[0]];
|
|
311
|
+
|
|
312
|
+
// Scope selection
|
|
313
|
+
console.log(`\n ${YELLOW}Where to install?${RESET}\n`);
|
|
314
|
+
console.log(` 1) Global (~/${selected[0].dir}) — available in all projects`);
|
|
315
|
+
console.log(` 2) Local (./${selected[0].dir}) — this project only`);
|
|
316
|
+
|
|
317
|
+
const scopeAnswer = await ask(rl, `\n ${YELLOW}Choice [1]:${RESET} `);
|
|
318
|
+
const scope = (scopeAnswer.trim() === "2") ? "local" : "global";
|
|
319
|
+
|
|
320
|
+
// Install skills + agents
|
|
321
|
+
console.log(`\n ${BOLD}Installing skills & agents${RESET}\n`);
|
|
322
|
+
for (const runtime of selected) {
|
|
323
|
+
console.log(` ${GREEN}${runtime.name}${RESET}:`);
|
|
324
|
+
installSkillsAndAgents(runtime.dir, scope);
|
|
325
|
+
console.log();
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// Install tools
|
|
329
|
+
console.log(` ${BOLD}Installing tools${RESET}`);
|
|
330
|
+
installTools();
|
|
331
|
+
|
|
332
|
+
// Version marker
|
|
333
|
+
const versionPath = path.join(HOME, ".evolver", "VERSION");
|
|
334
|
+
fs.mkdirSync(path.dirname(versionPath), { recursive: true });
|
|
335
|
+
fs.writeFileSync(versionPath, VERSION);
|
|
336
|
+
|
|
337
|
+
// Install Python deps
|
|
338
|
+
installPythonDeps();
|
|
339
|
+
|
|
340
|
+
// Configure LangSmith (required)
|
|
341
|
+
await configureLangSmith(rl);
|
|
342
|
+
|
|
343
|
+
// Optional integrations
|
|
344
|
+
await configureOptionalIntegrations(rl);
|
|
345
|
+
|
|
346
|
+
// Done
|
|
347
|
+
console.log(`\n ${GREEN}${BOLD}Setup complete!${RESET}\n`);
|
|
348
|
+
console.log(` ${DIM}Restart Claude Code, then:${RESET}`);
|
|
349
|
+
console.log(` ${GREEN}/evolver:setup${RESET} — configure LangSmith for your project`);
|
|
350
|
+
console.log(` ${GREEN}/evolver:evolve${RESET} — run the optimization loop`);
|
|
351
|
+
console.log(` ${GREEN}/evolver:status${RESET} — check progress`);
|
|
352
|
+
console.log(` ${GREEN}/evolver:deploy${RESET} — finalize and push`);
|
|
343
353
|
console.log(`\n ${DIM}GitHub: https://github.com/raphaelchristi/harness-evolver${RESET}\n`);
|
|
344
354
|
|
|
345
355
|
rl.close();
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "harness-evolver",
|
|
3
|
-
"version": "
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "3.0.1",
|
|
4
|
+
"description": "LangSmith-native autonomous agent optimization for Claude Code",
|
|
5
5
|
"author": "Raphael Valdetaro",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"repository": {
|
|
@@ -10,11 +10,12 @@
|
|
|
10
10
|
},
|
|
11
11
|
"keywords": [
|
|
12
12
|
"claude-code",
|
|
13
|
-
"
|
|
14
|
-
"meta-harness",
|
|
13
|
+
"langsmith",
|
|
15
14
|
"llm",
|
|
16
15
|
"optimization",
|
|
17
|
-
"agent"
|
|
16
|
+
"agent",
|
|
17
|
+
"evolution",
|
|
18
|
+
"meta-harness"
|
|
18
19
|
],
|
|
19
20
|
"bin": {
|
|
20
21
|
"harness-evolver": "bin/install.js"
|
|
@@ -23,7 +24,6 @@
|
|
|
23
24
|
"bin/",
|
|
24
25
|
"skills/",
|
|
25
26
|
"agents/",
|
|
26
|
-
"tools/"
|
|
27
|
-
"examples/"
|
|
27
|
+
"tools/"
|
|
28
28
|
]
|
|
29
29
|
}
|
package/skills/deploy/SKILL.md
CHANGED
|
@@ -1,82 +1,75 @@
|
|
|
1
1
|
---
|
|
2
|
-
name:
|
|
3
|
-
description: "Use when the user
|
|
4
|
-
argument-hint: "[version]"
|
|
2
|
+
name: evolver:deploy
|
|
3
|
+
description: "Use when the user is done evolving and wants to finalize, clean up, tag the result, or push the optimized agent."
|
|
5
4
|
allowed-tools: [Read, Write, Bash, Glob, AskUserQuestion]
|
|
6
5
|
---
|
|
7
6
|
|
|
8
|
-
# /
|
|
7
|
+
# /evolver:deploy
|
|
9
8
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
## Arguments
|
|
13
|
-
|
|
14
|
-
- `version` — optional. If not given, deploys the best version from `summary.json`.
|
|
9
|
+
Finalize the evolution results. In v3, the best code is already in the main branch (auto-merged during evolve). Deploy is about cleanup, tagging, and pushing.
|
|
15
10
|
|
|
16
11
|
## What To Do
|
|
17
12
|
|
|
18
|
-
### 1.
|
|
13
|
+
### 1. Show Results
|
|
19
14
|
|
|
20
15
|
```bash
|
|
21
|
-
python3 -c "
|
|
16
|
+
python3 -c "
|
|
17
|
+
import json
|
|
18
|
+
c = json.load(open('.evolver.json'))
|
|
19
|
+
baseline = c['history'][0]['score'] if c['history'] else 0
|
|
20
|
+
best = c['best_score']
|
|
21
|
+
improvement = best - baseline
|
|
22
|
+
print(f'Baseline: {baseline:.3f}')
|
|
23
|
+
print(f'Best: {best:.3f} (+{improvement:.3f}, {improvement/max(baseline,0.001)*100:.0f}% improvement)')
|
|
24
|
+
print(f'Iterations: {c[\"iterations\"]}')
|
|
25
|
+
print(f'Experiment: {c[\"best_experiment\"]}')
|
|
26
|
+
"
|
|
22
27
|
```
|
|
23
28
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
### 2. Show What Will Be Deployed
|
|
27
|
-
|
|
29
|
+
Show git diff from before evolution started:
|
|
28
30
|
```bash
|
|
29
|
-
|
|
30
|
-
cat .harness-evolver/harnesses/{version}/scores.json
|
|
31
|
-
```
|
|
32
|
-
|
|
33
|
-
Report: version, score, improvement over baseline, what changed.
|
|
34
|
-
|
|
35
|
-
### 3. Ask Deploy Options (Interactive)
|
|
36
|
-
|
|
37
|
-
Use AskUserQuestion with TWO questions:
|
|
38
|
-
|
|
39
|
-
```
|
|
40
|
-
Question 1: "Where should the evolved harness go?"
|
|
41
|
-
Header: "Deploy to"
|
|
42
|
-
Options:
|
|
43
|
-
- "Overwrite original" — Replace {original_harness_path} with the evolved version
|
|
44
|
-
- "Copy to new file" — Save as harness_evolved.py alongside the original
|
|
45
|
-
- "Just show the diff" — Don't copy anything, just show what changed
|
|
31
|
+
git log --oneline --since="$(python3 -c "import json; print(json.load(open('.evolver.json'))['created_at'][:10])")" | head -20
|
|
46
32
|
```
|
|
47
33
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
34
|
+
### 2. Ask What To Do (interactive)
|
|
35
|
+
|
|
36
|
+
```json
|
|
37
|
+
{
|
|
38
|
+
"questions": [{
|
|
39
|
+
"question": "Evolution complete. What would you like to do?",
|
|
40
|
+
"header": "Deploy",
|
|
41
|
+
"multiSelect": false,
|
|
42
|
+
"options": [
|
|
43
|
+
{"label": "Tag and push", "description": "Create a git tag with the score and push to remote"},
|
|
44
|
+
{"label": "Just review", "description": "Show the full diff of all changes made during evolution"},
|
|
45
|
+
{"label": "Clean up only", "description": "Remove temporary files (trace_insights.json, etc.) but don't push"}
|
|
46
|
+
]
|
|
47
|
+
}]
|
|
48
|
+
}
|
|
55
49
|
```
|
|
56
50
|
|
|
57
|
-
###
|
|
51
|
+
### 3. Execute
|
|
58
52
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
-
|
|
63
|
-
-
|
|
64
|
-
|
|
53
|
+
**If "Tag and push"**:
|
|
54
|
+
```bash
|
|
55
|
+
VERSION=$(python3 -c "import json; c=json.load(open('.evolver.json')); print(f'evolver-v{c[\"iterations\"]}')")
|
|
56
|
+
SCORE=$(python3 -c "import json; print(f'{json.load(open(\".evolver.json\"))[\"best_score\"]:.3f}')")
|
|
57
|
+
git tag -a "$VERSION" -m "Evolver: score $SCORE"
|
|
58
|
+
git push origin main --tags
|
|
59
|
+
```
|
|
65
60
|
|
|
66
|
-
**If "
|
|
61
|
+
**If "Just review"**:
|
|
67
62
|
```bash
|
|
68
|
-
|
|
69
|
-
cp .harness-evolver/harnesses/{version}/config.json ./config_evolved.json # if exists
|
|
63
|
+
git diff HEAD~{iterations} HEAD
|
|
70
64
|
```
|
|
71
65
|
|
|
72
|
-
**If "
|
|
66
|
+
**If "Clean up only"**:
|
|
73
67
|
```bash
|
|
74
|
-
|
|
68
|
+
rm -f trace_insights.json best_results.json comparison.json production_seed.md production_seed.json
|
|
75
69
|
```
|
|
76
|
-
Do not copy anything.
|
|
77
70
|
|
|
78
|
-
###
|
|
71
|
+
### 4. Report
|
|
79
72
|
|
|
80
|
-
- What was
|
|
81
|
-
-
|
|
82
|
-
- Suggest
|
|
73
|
+
- What was done
|
|
74
|
+
- LangSmith experiment URL for the best result
|
|
75
|
+
- Suggest reviewing the changes before deploying to production
|