harness-evolver 2.9.1 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/README.md +62 -117
  2. package/agents/evolver-architect.md +53 -0
  3. package/agents/evolver-critic.md +44 -0
  4. package/agents/evolver-proposer.md +128 -0
  5. package/agents/evolver-testgen.md +67 -0
  6. package/bin/install.js +181 -171
  7. package/package.json +7 -7
  8. package/skills/deploy/SKILL.md +49 -56
  9. package/skills/evolve/SKILL.md +156 -687
  10. package/skills/setup/SKILL.md +182 -0
  11. package/skills/status/SKILL.md +23 -21
  12. package/tools/read_results.py +240 -0
  13. package/tools/run_eval.py +202 -0
  14. package/tools/seed_from_traces.py +36 -8
  15. package/tools/setup.py +393 -0
  16. package/tools/trace_insights.py +86 -14
  17. package/agents/harness-evolver-architect.md +0 -173
  18. package/agents/harness-evolver-critic.md +0 -132
  19. package/agents/harness-evolver-judge.md +0 -110
  20. package/agents/harness-evolver-proposer.md +0 -317
  21. package/agents/harness-evolver-testgen.md +0 -112
  22. package/examples/classifier/README.md +0 -25
  23. package/examples/classifier/config.json +0 -3
  24. package/examples/classifier/eval.py +0 -58
  25. package/examples/classifier/harness.py +0 -111
  26. package/examples/classifier/tasks/task_001.json +0 -1
  27. package/examples/classifier/tasks/task_002.json +0 -1
  28. package/examples/classifier/tasks/task_003.json +0 -1
  29. package/examples/classifier/tasks/task_004.json +0 -1
  30. package/examples/classifier/tasks/task_005.json +0 -1
  31. package/examples/classifier/tasks/task_006.json +0 -1
  32. package/examples/classifier/tasks/task_007.json +0 -1
  33. package/examples/classifier/tasks/task_008.json +0 -1
  34. package/examples/classifier/tasks/task_009.json +0 -1
  35. package/examples/classifier/tasks/task_010.json +0 -1
  36. package/skills/architect/SKILL.md +0 -93
  37. package/skills/compare/SKILL.md +0 -73
  38. package/skills/critic/SKILL.md +0 -67
  39. package/skills/diagnose/SKILL.md +0 -96
  40. package/skills/import-traces/SKILL.md +0 -102
  41. package/skills/init/SKILL.md +0 -293
  42. package/tools/__pycache__/detect_stack.cpython-313.pyc +0 -0
  43. package/tools/__pycache__/init.cpython-313.pyc +0 -0
  44. package/tools/__pycache__/seed_from_traces.cpython-313.pyc +0 -0
  45. package/tools/__pycache__/trace_logger.cpython-313.pyc +0 -0
  46. package/tools/eval_llm_judge.py +0 -233
  47. package/tools/eval_passthrough.py +0 -55
  48. package/tools/evaluate.py +0 -255
  49. package/tools/import_traces.py +0 -229
  50. package/tools/init.py +0 -531
  51. package/tools/llm_api.py +0 -125
  52. package/tools/state.py +0 -219
  53. package/tools/test_growth.py +0 -230
  54. package/tools/trace_logger.py +0 -42
package/bin/install.js CHANGED
@@ -1,7 +1,8 @@
1
1
  #!/usr/bin/env node
2
2
  /**
3
- * Harness Evolver installer.
4
- * Copies skills/agents/tools directly to runtime directories (GSD pattern).
3
+ * Harness Evolver v3 installer.
4
+ * Copies skills/agents/tools to runtime directories (GSD pattern).
5
+ * Installs Python dependencies (langsmith + openevals).
5
6
  *
6
7
  * Usage: npx harness-evolver@latest
7
8
  */
@@ -27,7 +28,7 @@ const LOGO = `${BOLD}${GREEN}
27
28
  ╠═╣╠═╣╠╦╝║║║║╣ ╚═╗╚═╗ ║╣ ╚╗╔╝║ ║║ ╚╗╔╝║╣ ╠╦╝
28
29
  ╩ ╩╩ ╩╩╚═╝╚╝╚═╝╚═╝╚═╝ ╚═╝ ╚╝ ╚═╝╩═╝ ╚╝ ╚═╝╩╚═
29
30
  ${RESET}
30
- ${DIM}${GREEN} End-to-end harness optimization for AI agents${RESET}
31
+ ${DIM}${GREEN} LangSmith-native agent optimization v${VERSION}${RESET}
31
32
  `;
32
33
 
33
34
  function ask(rl, question) {
@@ -39,6 +40,7 @@ function copyDir(src, dest) {
39
40
  for (const entry of fs.readdirSync(src, { withFileTypes: true })) {
40
41
  const srcPath = path.join(src, entry.name);
41
42
  const destPath = path.join(dest, entry.name);
43
+ if (entry.name === "__pycache__") continue;
42
44
  if (entry.isDirectory()) {
43
45
  copyDir(srcPath, destPath);
44
46
  } else {
@@ -70,7 +72,7 @@ function checkCommand(cmd) {
70
72
  }
71
73
  }
72
74
 
73
- function installForRuntime(runtimeDir, scope) {
75
+ function installSkillsAndAgents(runtimeDir, scope) {
74
76
  const baseDir = scope === "local"
75
77
  ? path.join(process.cwd(), runtimeDir)
76
78
  : path.join(HOME, runtimeDir);
@@ -78,204 +80,132 @@ function installForRuntime(runtimeDir, scope) {
78
80
  const skillsDir = path.join(baseDir, "skills");
79
81
  const agentsDir = path.join(baseDir, "agents");
80
82
 
81
- // Skills ~/.claude/skills/<skill-name>/SKILL.md (proper skills format)
83
+ // Skills read SKILL.md name field, use directory name for filesystem
82
84
  const skillsSource = path.join(PLUGIN_ROOT, "skills");
83
85
  if (fs.existsSync(skillsSource)) {
84
86
  for (const skill of fs.readdirSync(skillsSource, { withFileTypes: true })) {
85
- if (skill.isDirectory()) {
86
- const src = path.join(skillsSource, skill.name);
87
- const dest = path.join(skillsDir, "harness-evolver:" + skill.name);
88
- copyDir(src, dest);
89
- console.log(` ${GREEN}✓${RESET} Installed skill: harness-evolver:${skill.name}`);
90
- }
87
+ if (!skill.isDirectory()) continue;
88
+ const src = path.join(skillsSource, skill.name);
89
+ const skillMd = path.join(src, "SKILL.md");
90
+ if (!fs.existsSync(skillMd)) continue;
91
+
92
+ // Read the skill name from frontmatter
93
+ const content = fs.readFileSync(skillMd, "utf8");
94
+ const nameMatch = content.match(/^name:\s*(.+)$/m);
95
+ const skillName = nameMatch ? nameMatch[1].trim() : skill.name;
96
+
97
+ const dest = path.join(skillsDir, skill.name);
98
+ copyDir(src, dest);
99
+ console.log(` ${GREEN}✓${RESET} ${skillName}`);
91
100
  }
92
101
  }
93
102
 
94
- // Cleanup old commands/ install (from previous versions)
103
+ // Cleanup old v2 commands/ directory
95
104
  const oldCommandsDir = path.join(baseDir, "commands", "harness-evolver");
96
105
  if (fs.existsSync(oldCommandsDir)) {
97
106
  fs.rmSync(oldCommandsDir, { recursive: true, force: true });
98
- console.log(` ${GREEN}✓${RESET} Cleaned up old commands/ directory`);
107
+ console.log(` ${DIM}Cleaned up old commands/ directory${RESET}`);
99
108
  }
100
109
 
101
- // Agents → agents/
110
+ // Agents
102
111
  const agentsSource = path.join(PLUGIN_ROOT, "agents");
103
112
  if (fs.existsSync(agentsSource)) {
104
113
  fs.mkdirSync(agentsDir, { recursive: true });
105
114
  for (const agent of fs.readdirSync(agentsSource)) {
115
+ if (!agent.endsWith(".md")) continue;
106
116
  copyFile(path.join(agentsSource, agent), path.join(agentsDir, agent));
107
- console.log(` ${GREEN}✓${RESET} Installed agent: ${agent}`);
117
+ const agentName = agent.replace(".md", "");
118
+ console.log(` ${GREEN}✓${RESET} agent: ${agentName}`);
108
119
  }
109
120
  }
110
121
  }
111
122
 
112
123
  function installTools() {
113
- const toolsDir = path.join(HOME, ".harness-evolver", "tools");
124
+ const toolsDir = path.join(HOME, ".evolver", "tools");
114
125
  const toolsSource = path.join(PLUGIN_ROOT, "tools");
115
126
  if (fs.existsSync(toolsSource)) {
116
127
  fs.mkdirSync(toolsDir, { recursive: true });
128
+ let count = 0;
117
129
  for (const tool of fs.readdirSync(toolsSource)) {
118
- if (tool.endsWith(".py")) {
119
- copyFile(path.join(toolsSource, tool), path.join(toolsDir, tool));
120
- }
130
+ if (!tool.endsWith(".py")) continue;
131
+ copyFile(path.join(toolsSource, tool), path.join(toolsDir, tool));
132
+ count++;
121
133
  }
122
- console.log(` ${GREEN}✓${RESET} Installed tools to ~/.harness-evolver/tools/`);
134
+ console.log(` ${GREEN}✓${RESET} ${count} tools installed to ~/.evolver/tools/`);
123
135
  }
124
136
  }
125
137
 
126
- function installExamples() {
127
- const examplesDir = path.join(HOME, ".harness-evolver", "examples");
128
- const examplesSource = path.join(PLUGIN_ROOT, "examples");
129
- if (fs.existsSync(examplesSource)) {
130
- copyDir(examplesSource, examplesDir);
131
- console.log(` ${GREEN}✓${RESET} Installed examples to ~/.harness-evolver/examples/`);
132
- }
133
- }
138
+ function installPythonDeps() {
139
+ console.log(`\n ${YELLOW}Installing Python dependencies...${RESET}`);
134
140
 
135
- function cleanupBrokenPluginEntry(runtimeDir) {
136
- // Remove the harness-evolver@local entry that doesn't work
137
- const installedPath = path.join(HOME, runtimeDir, "plugins", "installed_plugins.json");
138
- try {
139
- const data = JSON.parse(fs.readFileSync(installedPath, "utf8"));
140
- if (data.plugins && data.plugins["harness-evolver@local"]) {
141
- delete data.plugins["harness-evolver@local"];
142
- fs.writeFileSync(installedPath, JSON.stringify(data, null, 2) + "\n");
143
- }
144
- } catch {}
141
+ // Try multiple pip variants
142
+ const commands = [
143
+ "pip install langsmith openevals",
144
+ "uv pip install langsmith openevals",
145
+ "pip3 install langsmith openevals",
146
+ "python3 -m pip install langsmith openevals",
147
+ ];
145
148
 
146
- const settingsPath = path.join(HOME, runtimeDir, "settings.json");
147
- try {
148
- const data = JSON.parse(fs.readFileSync(settingsPath, "utf8"));
149
- if (data.enabledPlugins && data.enabledPlugins["harness-evolver@local"] !== undefined) {
150
- delete data.enabledPlugins["harness-evolver@local"];
151
- fs.writeFileSync(settingsPath, JSON.stringify(data, null, 2) + "\n");
149
+ for (const cmd of commands) {
150
+ try {
151
+ execSync(cmd, { stdio: "pipe", timeout: 120000 });
152
+ console.log(` ${GREEN}✓${RESET} langsmith + openevals installed`);
153
+ return true;
154
+ } catch {
155
+ continue;
152
156
  }
153
- } catch {}
154
- }
155
-
156
- async function main() {
157
- console.log(LOGO);
158
- console.log(` ${DIM}Harness Evolver v${VERSION}${RESET}`);
159
- console.log(` ${DIM}Meta-Harness-style autonomous harness optimization${RESET}`);
160
- console.log();
161
-
162
- if (!checkPython()) {
163
- console.error(` ${RED}ERROR:${RESET} python3 not found in PATH. Install Python 3.8+ first.`);
164
- process.exit(1);
165
- }
166
- console.log(` ${GREEN}✓${RESET} python3 found`);
167
-
168
- const RUNTIMES = [
169
- { name: "Claude Code", dir: ".claude" },
170
- { name: "Cursor", dir: ".cursor" },
171
- { name: "Codex", dir: ".codex" },
172
- { name: "Windsurf", dir: ".windsurf" },
173
- ].filter(r => fs.existsSync(path.join(HOME, r.dir)));
174
-
175
- if (RUNTIMES.length === 0) {
176
- console.error(`\n ${RED}ERROR:${RESET} No supported runtime detected.`);
177
- console.error(` Install Claude Code, Cursor, Codex, or Windsurf first.`);
178
- process.exit(1);
179
- }
180
-
181
- const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
182
-
183
- console.log(`\n ${YELLOW}Which runtime(s) would you like to install for?${RESET}\n`);
184
- RUNTIMES.forEach((r, i) => console.log(` ${i + 1}) ${r.name.padEnd(14)} (~/${r.dir})`));
185
- if (RUNTIMES.length > 1) {
186
- console.log(` ${RUNTIMES.length + 1}) All`);
187
- console.log(`\n ${DIM}Select multiple: 1,2 or 1 2${RESET}`);
188
- }
189
-
190
- const runtimeAnswer = await ask(rl, `\n ${YELLOW}Choice [1]:${RESET} `);
191
- const runtimeInput = (runtimeAnswer.trim() || "1");
192
-
193
- let selected;
194
- if (runtimeInput === String(RUNTIMES.length + 1)) {
195
- selected = RUNTIMES;
196
- } else {
197
- const indices = runtimeInput.split(/[,\s]+/).map(s => parseInt(s, 10) - 1);
198
- selected = indices.filter(i => i >= 0 && i < RUNTIMES.length).map(i => RUNTIMES[i]);
199
157
  }
200
- if (selected.length === 0) selected = [RUNTIMES[0]];
201
-
202
- console.log(`\n ${YELLOW}Where would you like to install?${RESET}\n`);
203
- console.log(` 1) Global (~/${selected[0].dir}) - available in all projects`);
204
- console.log(` 2) Local (./${selected[0].dir}) - this project only`);
205
-
206
- const scopeAnswer = await ask(rl, `\n ${YELLOW}Choice [1]:${RESET} `);
207
- const scope = (scopeAnswer.trim() === "2") ? "local" : "global";
208
158
 
209
- console.log();
210
-
211
- for (const runtime of selected) {
212
- console.log(` Installing for ${GREEN}${runtime.name}${RESET}\n`);
213
- cleanupBrokenPluginEntry(runtime.dir);
214
- installForRuntime(runtime.dir, scope);
215
- console.log();
216
- }
217
-
218
- installTools();
219
- installExamples();
220
-
221
- const versionPath = path.join(HOME, ".harness-evolver", "VERSION");
222
- fs.mkdirSync(path.dirname(versionPath), { recursive: true });
223
- fs.writeFileSync(versionPath, VERSION);
224
- console.log(` ${GREEN}✓${RESET} VERSION ${VERSION}`);
225
-
226
- console.log(`\n ${GREEN}Done!${RESET} Restart Claude Code, then run ${GREEN}/harness-evolver:init${RESET}\n`);
159
+ console.log(` ${YELLOW}!${RESET} Could not auto-install Python packages.`);
160
+ console.log(` Run manually: ${BOLD}pip install langsmith openevals${RESET}`);
161
+ return false;
162
+ }
227
163
 
228
- // Optional integrations
229
- console.log(` ${YELLOW}Install optional integrations?${RESET}\n`);
230
- console.log(` These enhance the proposer with rich traces and up-to-date documentation.\n`);
164
+ async function configureLangSmith(rl) {
165
+ console.log(`\n ${YELLOW}LangSmith Configuration${RESET} ${DIM}(required for v3)${RESET}\n`);
231
166
 
232
- // LangSmith CLI
233
- const hasLangsmithCli = checkCommand("langsmith-cli --version");
167
+ // Check if already configured
234
168
  const langsmithCredsDir = process.platform === "darwin"
235
169
  ? path.join(HOME, "Library", "Application Support", "langsmith-cli")
236
170
  : path.join(HOME, ".config", "langsmith-cli");
237
171
  const langsmithCredsFile = path.join(langsmithCredsDir, "credentials");
238
- const hasLangsmithCreds = fs.existsSync(langsmithCredsFile);
239
172
 
240
- if (hasLangsmithCli && hasLangsmithCreds) {
241
- console.log(` ${GREEN}✓${RESET} langsmith-cli installed and authenticated`);
242
- } else {
243
- if (!hasLangsmithCli) {
244
- console.log(` ${BOLD}LangSmith CLI${RESET} — rich trace analysis (error rates, latency, token usage)`);
245
- const lsAnswer = await ask(rl, `\n ${YELLOW}Install langsmith-cli? [y/N]:${RESET} `);
246
- if (lsAnswer.trim().toLowerCase() === "y") {
247
- console.log(`\n Installing langsmith-cli...`);
248
- try {
249
- execSync("uv tool install langsmith-cli", { stdio: "inherit" });
250
- console.log(`\n ${GREEN}✓${RESET} langsmith-cli installed`);
251
- } catch {
252
- console.log(`\n ${RED}Failed.${RESET} Install manually: uv tool install langsmith-cli\n`);
253
- }
254
- }
255
- } else {
256
- console.log(` ${GREEN}✓${RESET} langsmith-cli already installed`);
257
- }
173
+ // Check env var
174
+ if (process.env.LANGSMITH_API_KEY) {
175
+ console.log(` ${GREEN}✓${RESET} LANGSMITH_API_KEY found in environment`);
176
+ return;
177
+ }
258
178
 
259
- // Auth ask for API key inline if not already configured
260
- if (!hasLangsmithCreds) {
261
- console.log(`\n ${BOLD}LangSmith API Key${RESET} get yours at ${DIM}https://smith.langchain.com/settings${RESET}`);
262
- const apiKey = await ask(rl, ` ${YELLOW}Paste your LangSmith API key (or Enter to skip):${RESET} `);
263
- const key = apiKey.trim();
264
- if (key && key.startsWith("lsv2_")) {
265
- try {
266
- fs.mkdirSync(langsmithCredsDir, { recursive: true });
267
- fs.writeFileSync(langsmithCredsFile, `LANGSMITH_API_KEY=${key}\n`);
268
- console.log(` ${GREEN}✓${RESET} LangSmith API key saved`);
269
- } catch {
270
- console.log(` ${RED}Failed to save credentials.${RESET} Set LANGSMITH_API_KEY in your shell instead.`);
271
- }
272
- } else if (key) {
273
- console.log(` ${YELLOW}Doesn't look like a LangSmith key (should start with lsv2_). Skipped.${RESET}`);
274
- } else {
275
- console.log(` ${DIM}Skipped. Set LANGSMITH_API_KEY later or run: langsmith-cli auth login${RESET}`);
276
- }
179
+ // Check credentials file
180
+ if (fs.existsSync(langsmithCredsFile)) {
181
+ console.log(` ${GREEN}✓${RESET} LangSmith credentials found at ${DIM}${langsmithCredsFile}${RESET}`);
182
+ return;
183
+ }
184
+
185
+ // Ask for API key
186
+ console.log(` ${BOLD}LangSmith API Key${RESET} get yours at ${DIM}https://smith.langchain.com/settings${RESET}`);
187
+ console.log(` ${DIM}LangSmith is required for v3 (datasets, experiments, evaluators).${RESET}\n`);
188
+ const apiKey = await ask(rl, ` ${YELLOW}Paste your LangSmith API key:${RESET} `);
189
+ const key = apiKey.trim();
190
+
191
+ if (key && key.startsWith("lsv2_")) {
192
+ try {
193
+ fs.mkdirSync(langsmithCredsDir, { recursive: true });
194
+ fs.writeFileSync(langsmithCredsFile, `LANGSMITH_API_KEY=${key}\n`);
195
+ console.log(` ${GREEN}✓${RESET} API key saved to ${DIM}${langsmithCredsFile}${RESET}`);
196
+ } catch {
197
+ console.log(` ${RED}Failed to save.${RESET} Add to your shell: export LANGSMITH_API_KEY=${key}`);
277
198
  }
199
+ } else if (key) {
200
+ console.log(` ${YELLOW}Doesn't look like a LangSmith key (should start with lsv2_).${RESET}`);
201
+ console.log(` Add to your shell: ${BOLD}export LANGSMITH_API_KEY=your_key${RESET}`);
202
+ } else {
203
+ console.log(` ${YELLOW}Skipped.${RESET} You must set LANGSMITH_API_KEY before using /evolver:setup`);
278
204
  }
205
+ }
206
+
207
+ async function configureOptionalIntegrations(rl) {
208
+ console.log(`\n ${YELLOW}Optional Integrations${RESET}\n`);
279
209
 
280
210
  // Context7 MCP
281
211
  const hasContext7 = (() => {
@@ -289,19 +219,18 @@ async function main() {
289
219
  } catch {}
290
220
  return false;
291
221
  })();
222
+
292
223
  if (hasContext7) {
293
224
  console.log(` ${GREEN}✓${RESET} Context7 MCP already configured`);
294
225
  } else {
295
- console.log(`\n ${BOLD}Context7 MCP${RESET} — up-to-date library documentation (LangChain, OpenAI, etc.)`);
296
- console.log(` ${DIM}claude mcp add context7 -- npx -y @upstash/context7-mcp@latest${RESET}`);
226
+ console.log(` ${BOLD}Context7 MCP${RESET} — up-to-date library documentation (LangChain, OpenAI, etc.)`);
297
227
  const c7Answer = await ask(rl, `\n ${YELLOW}Install Context7 MCP? [y/N]:${RESET} `);
298
228
  if (c7Answer.trim().toLowerCase() === "y") {
299
- console.log(`\n Installing Context7 MCP...`);
300
229
  try {
301
230
  execSync("claude mcp add context7 -- npx -y @upstash/context7-mcp@latest", { stdio: "inherit" });
302
231
  console.log(`\n ${GREEN}✓${RESET} Context7 MCP configured`);
303
232
  } catch {
304
- console.log(`\n ${RED}Failed.${RESET} Install manually: claude mcp add context7 -- npx -y @upstash/context7-mcp@latest\n`);
233
+ console.log(`\n ${RED}Failed.${RESET} Install manually: claude mcp add context7 -- npx -y @upstash/context7-mcp@latest`);
305
234
  }
306
235
  }
307
236
  }
@@ -318,28 +247,109 @@ async function main() {
318
247
  } catch {}
319
248
  return false;
320
249
  })();
250
+
321
251
  if (hasLcDocs) {
322
252
  console.log(` ${GREEN}✓${RESET} LangChain Docs MCP already configured`);
323
253
  } else {
324
- console.log(`\n ${BOLD}LangChain Docs MCP${RESET} — LangChain/LangGraph/LangSmith documentation search`);
325
- console.log(` ${DIM}claude mcp add docs-langchain --transport http https://docs.langchain.com/mcp${RESET}`);
254
+ console.log(`\n ${BOLD}LangChain Docs MCP${RESET} — LangChain/LangGraph/LangSmith documentation`);
326
255
  const lcAnswer = await ask(rl, `\n ${YELLOW}Install LangChain Docs MCP? [y/N]:${RESET} `);
327
256
  if (lcAnswer.trim().toLowerCase() === "y") {
328
- console.log(`\n Installing LangChain Docs MCP...`);
329
257
  try {
330
258
  execSync("claude mcp add docs-langchain --transport http https://docs.langchain.com/mcp", { stdio: "inherit" });
331
259
  console.log(`\n ${GREEN}✓${RESET} LangChain Docs MCP configured`);
332
260
  } catch {
333
- console.log(`\n ${RED}Failed.${RESET} Install manually: claude mcp add docs-langchain --transport http https://docs.langchain.com/mcp\n`);
261
+ console.log(`\n ${RED}Failed.${RESET} Install manually: claude mcp add docs-langchain --transport http https://docs.langchain.com/mcp`);
334
262
  }
335
263
  }
336
264
  }
265
+ }
266
+
267
+ async function main() {
268
+ console.log(LOGO);
269
+
270
+ if (!checkPython()) {
271
+ console.error(` ${RED}ERROR:${RESET} python3 not found. Install Python 3.10+ first.`);
272
+ process.exit(1);
273
+ }
274
+ console.log(` ${GREEN}✓${RESET} python3 found`);
275
+
276
+ // Detect runtimes
277
+ const RUNTIMES = [
278
+ { name: "Claude Code", dir: ".claude" },
279
+ { name: "Cursor", dir: ".cursor" },
280
+ { name: "Codex", dir: ".codex" },
281
+ { name: "Windsurf", dir: ".windsurf" },
282
+ ].filter(r => fs.existsSync(path.join(HOME, r.dir)));
283
+
284
+ if (RUNTIMES.length === 0) {
285
+ console.error(`\n ${RED}ERROR:${RESET} No supported runtime detected.`);
286
+ console.error(` Install Claude Code, Cursor, Codex, or Windsurf first.`);
287
+ process.exit(1);
288
+ }
289
+
290
+ const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
291
+
292
+ // Runtime selection
293
+ console.log(`\n ${YELLOW}Which runtime(s) to install for?${RESET}\n`);
294
+ RUNTIMES.forEach((r, i) => console.log(` ${i + 1}) ${r.name.padEnd(14)} (~/${r.dir})`));
295
+ if (RUNTIMES.length > 1) {
296
+ console.log(` ${RUNTIMES.length + 1}) All`);
297
+ console.log(`\n ${DIM}Select multiple: 1,2 or 1 2${RESET}`);
298
+ }
299
+
300
+ const runtimeAnswer = await ask(rl, `\n ${YELLOW}Choice [1]:${RESET} `);
301
+ const runtimeInput = (runtimeAnswer.trim() || "1");
337
302
 
338
- console.log(`\n ${DIM}Quick start with example:${RESET}`);
339
- console.log(` cp -r ~/.harness-evolver/examples/classifier ./my-project`);
340
- console.log(` cd my-project && claude`);
341
- console.log(` /harness-evolver:init`);
342
- console.log(` /harness-evolver:evolve`);
303
+ let selected;
304
+ if (runtimeInput === String(RUNTIMES.length + 1)) {
305
+ selected = RUNTIMES;
306
+ } else {
307
+ const indices = runtimeInput.split(/[,\s]+/).map(s => parseInt(s, 10) - 1);
308
+ selected = indices.filter(i => i >= 0 && i < RUNTIMES.length).map(i => RUNTIMES[i]);
309
+ }
310
+ if (selected.length === 0) selected = [RUNTIMES[0]];
311
+
312
+ // Scope selection
313
+ console.log(`\n ${YELLOW}Where to install?${RESET}\n`);
314
+ console.log(` 1) Global (~/${selected[0].dir}) — available in all projects`);
315
+ console.log(` 2) Local (./${selected[0].dir}) — this project only`);
316
+
317
+ const scopeAnswer = await ask(rl, `\n ${YELLOW}Choice [1]:${RESET} `);
318
+ const scope = (scopeAnswer.trim() === "2") ? "local" : "global";
319
+
320
+ // Install skills + agents
321
+ console.log(`\n ${BOLD}Installing skills & agents${RESET}\n`);
322
+ for (const runtime of selected) {
323
+ console.log(` ${GREEN}${runtime.name}${RESET}:`);
324
+ installSkillsAndAgents(runtime.dir, scope);
325
+ console.log();
326
+ }
327
+
328
+ // Install tools
329
+ console.log(` ${BOLD}Installing tools${RESET}`);
330
+ installTools();
331
+
332
+ // Version marker
333
+ const versionPath = path.join(HOME, ".evolver", "VERSION");
334
+ fs.mkdirSync(path.dirname(versionPath), { recursive: true });
335
+ fs.writeFileSync(versionPath, VERSION);
336
+
337
+ // Install Python deps
338
+ installPythonDeps();
339
+
340
+ // Configure LangSmith (required)
341
+ await configureLangSmith(rl);
342
+
343
+ // Optional integrations
344
+ await configureOptionalIntegrations(rl);
345
+
346
+ // Done
347
+ console.log(`\n ${GREEN}${BOLD}Setup complete!${RESET}\n`);
348
+ console.log(` ${DIM}Restart Claude Code, then:${RESET}`);
349
+ console.log(` ${GREEN}/evolver:setup${RESET} — configure LangSmith for your project`);
350
+ console.log(` ${GREEN}/evolver:evolve${RESET} — run the optimization loop`);
351
+ console.log(` ${GREEN}/evolver:status${RESET} — check progress`);
352
+ console.log(` ${GREEN}/evolver:deploy${RESET} — finalize and push`);
343
353
  console.log(`\n ${DIM}GitHub: https://github.com/raphaelchristi/harness-evolver${RESET}\n`);
344
354
 
345
355
  rl.close();
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "harness-evolver",
3
- "version": "2.9.1",
4
- "description": "Meta-Harness-style autonomous harness optimization for Claude Code",
3
+ "version": "3.0.1",
4
+ "description": "LangSmith-native autonomous agent optimization for Claude Code",
5
5
  "author": "Raphael Valdetaro",
6
6
  "license": "MIT",
7
7
  "repository": {
@@ -10,11 +10,12 @@
10
10
  },
11
11
  "keywords": [
12
12
  "claude-code",
13
- "harness",
14
- "meta-harness",
13
+ "langsmith",
15
14
  "llm",
16
15
  "optimization",
17
- "agent"
16
+ "agent",
17
+ "evolution",
18
+ "meta-harness"
18
19
  ],
19
20
  "bin": {
20
21
  "harness-evolver": "bin/install.js"
@@ -23,7 +24,6 @@
23
24
  "bin/",
24
25
  "skills/",
25
26
  "agents/",
26
- "tools/",
27
- "examples/"
27
+ "tools/"
28
28
  ]
29
29
  }
@@ -1,82 +1,75 @@
1
1
  ---
2
- name: harness-evolver:deploy
3
- description: "Use when the user wants to use the best evolved harness in their project, promote a version to production, copy the winning harness back, or is done evolving and wants to apply the result."
4
- argument-hint: "[version]"
2
+ name: evolver:deploy
3
+ description: "Use when the user is done evolving and wants to finalize, clean up, tag the result, or push the optimized agent."
5
4
  allowed-tools: [Read, Write, Bash, Glob, AskUserQuestion]
6
5
  ---
7
6
 
8
- # /harness-evolver:deploy
7
+ # /evolver:deploy
9
8
 
10
- Promote the best (or specified) harness version back to the user's project.
11
-
12
- ## Arguments
13
-
14
- - `version` — optional. If not given, deploys the best version from `summary.json`.
9
+ Finalize the evolution results. In v3, the best code is already in the main branch (auto-merged during evolve). Deploy is about cleanup, tagging, and pushing.
15
10
 
16
11
  ## What To Do
17
12
 
18
- ### 1. Identify Best Version
13
+ ### 1. Show Results
19
14
 
20
15
  ```bash
21
- python3 -c "import json; s=json.load(open('.harness-evolver/summary.json')); print(s['best']['version'], s['best']['combined_score'])"
16
+ python3 -c "
17
+ import json
18
+ c = json.load(open('.evolver.json'))
19
+ baseline = c['history'][0]['score'] if c['history'] else 0
20
+ best = c['best_score']
21
+ improvement = best - baseline
22
+ print(f'Baseline: {baseline:.3f}')
23
+ print(f'Best: {best:.3f} (+{improvement:.3f}, {improvement/max(baseline,0.001)*100:.0f}% improvement)')
24
+ print(f'Iterations: {c[\"iterations\"]}')
25
+ print(f'Experiment: {c[\"best_experiment\"]}')
26
+ "
22
27
  ```
23
28
 
24
- Or use the user-specified version.
25
-
26
- ### 2. Show What Will Be Deployed
27
-
29
+ Show git diff from before evolution started:
28
30
  ```bash
29
- cat .harness-evolver/harnesses/{version}/proposal.md
30
- cat .harness-evolver/harnesses/{version}/scores.json
31
- ```
32
-
33
- Report: version, score, improvement over baseline, what changed.
34
-
35
- ### 3. Ask Deploy Options (Interactive)
36
-
37
- Use AskUserQuestion with TWO questions:
38
-
39
- ```
40
- Question 1: "Where should the evolved harness go?"
41
- Header: "Deploy to"
42
- Options:
43
- - "Overwrite original" — Replace {original_harness_path} with the evolved version
44
- - "Copy to new file" — Save as harness_evolved.py alongside the original
45
- - "Just show the diff" — Don't copy anything, just show what changed
31
+ git log --oneline --since="$(python3 -c "import json; print(json.load(open('.evolver.json'))['created_at'][:10])")" | head -20
46
32
  ```
47
33
 
48
- ```
49
- Question 2 (ONLY if user chose "Overwrite original"):
50
- "Back up the current harness before overwriting?"
51
- Header: "Backup"
52
- Options:
53
- - "Yes, backup first" Save current as {harness}.bak before overwriting
54
- - "No, just overwrite" — Replace directly (git history has the original)
34
+ ### 2. Ask What To Do (interactive)
35
+
36
+ ```json
37
+ {
38
+ "questions": [{
39
+ "question": "Evolution complete. What would you like to do?",
40
+ "header": "Deploy",
41
+ "multiSelect": false,
42
+ "options": [
43
+ {"label": "Tag and push", "description": "Create a git tag with the score and push to remote"},
44
+ {"label": "Just review", "description": "Show the full diff of all changes made during evolution"},
45
+ {"label": "Clean up only", "description": "Remove temporary files (trace_insights.json, etc.) but don't push"}
46
+ ]
47
+ }]
48
+ }
55
49
  ```
56
50
 
57
- ### 4. Copy Files
51
+ ### 3. Execute
58
52
 
59
- Based on the user's choices:
60
-
61
- **If "Overwrite original"**:
62
- - If backup: `cp {original_harness} {original_harness}.bak`
63
- - Then: `cp .harness-evolver/harnesses/{version}/harness.py {original_harness}`
64
- - Copy config.json if exists
53
+ **If "Tag and push"**:
54
+ ```bash
55
+ VERSION=$(python3 -c "import json; c=json.load(open('.evolver.json')); print(f'evolver-v{c[\"iterations\"]}')")
56
+ SCORE=$(python3 -c "import json; print(f'{json.load(open(\".evolver.json\"))[\"best_score\"]:.3f}')")
57
+ git tag -a "$VERSION" -m "Evolver: score $SCORE"
58
+ git push origin main --tags
59
+ ```
65
60
 
66
- **If "Copy to new file"**:
61
+ **If "Just review"**:
67
62
  ```bash
68
- cp .harness-evolver/harnesses/{version}/harness.py ./harness_evolved.py
69
- cp .harness-evolver/harnesses/{version}/config.json ./config_evolved.json # if exists
63
+ git diff HEAD~{iterations} HEAD
70
64
  ```
71
65
 
72
- **If "Just show the diff"**:
66
+ **If "Clean up only"**:
73
67
  ```bash
74
- diff {original_harness} .harness-evolver/harnesses/{version}/harness.py
68
+ rm -f trace_insights.json best_results.json comparison.json production_seed.md production_seed.json
75
69
  ```
76
- Do not copy anything.
77
70
 
78
- ### 5. Report
71
+ ### 4. Report
79
72
 
80
- - What was copied and where
81
- - Score improvement: baseline deployed version
82
- - Suggest: review the diff before committing
73
+ - What was done
74
+ - LangSmith experiment URL for the best result
75
+ - Suggest reviewing the changes before deploying to production