npm - harness-evolver - Versions diffs - 2.9.1 → 3.0.1 - Mend

harness-evolver 2.9.1 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/README.md +62 -117
package/agents/evolver-architect.md +53 -0
package/agents/evolver-critic.md +44 -0
package/agents/evolver-proposer.md +128 -0
package/agents/evolver-testgen.md +67 -0
package/bin/install.js +181 -171
package/package.json +7 -7
package/skills/deploy/SKILL.md +49 -56
package/skills/evolve/SKILL.md +156 -687
package/skills/setup/SKILL.md +182 -0
package/skills/status/SKILL.md +23 -21
package/tools/read_results.py +240 -0
package/tools/run_eval.py +202 -0
package/tools/seed_from_traces.py +36 -8
package/tools/setup.py +393 -0
package/tools/trace_insights.py +86 -14
package/agents/harness-evolver-architect.md +0 -173
package/agents/harness-evolver-critic.md +0 -132
package/agents/harness-evolver-judge.md +0 -110
package/agents/harness-evolver-proposer.md +0 -317
package/agents/harness-evolver-testgen.md +0 -112
package/examples/classifier/README.md +0 -25
package/examples/classifier/config.json +0 -3
package/examples/classifier/eval.py +0 -58
package/examples/classifier/harness.py +0 -111
package/examples/classifier/tasks/task_001.json +0 -1
package/examples/classifier/tasks/task_002.json +0 -1
package/examples/classifier/tasks/task_003.json +0 -1
package/examples/classifier/tasks/task_004.json +0 -1
package/examples/classifier/tasks/task_005.json +0 -1
package/examples/classifier/tasks/task_006.json +0 -1
package/examples/classifier/tasks/task_007.json +0 -1
package/examples/classifier/tasks/task_008.json +0 -1
package/examples/classifier/tasks/task_009.json +0 -1
package/examples/classifier/tasks/task_010.json +0 -1
package/skills/architect/SKILL.md +0 -93
package/skills/compare/SKILL.md +0 -73
package/skills/critic/SKILL.md +0 -67
package/skills/diagnose/SKILL.md +0 -96
package/skills/import-traces/SKILL.md +0 -102
package/skills/init/SKILL.md +0 -293
package/tools/__pycache__/detect_stack.cpython-313.pyc +0 -0
package/tools/__pycache__/init.cpython-313.pyc +0 -0
package/tools/__pycache__/seed_from_traces.cpython-313.pyc +0 -0
package/tools/__pycache__/trace_logger.cpython-313.pyc +0 -0
package/tools/eval_llm_judge.py +0 -233
package/tools/eval_passthrough.py +0 -55
package/tools/evaluate.py +0 -255
package/tools/import_traces.py +0 -229
package/tools/init.py +0 -531
package/tools/llm_api.py +0 -125
package/tools/state.py +0 -219
package/tools/test_growth.py +0 -230
package/tools/trace_logger.py +0 -42

package/bin/install.js CHANGED Viewed

@@ -1,7 +1,8 @@
 #!/usr/bin/env node
 /**
- * Harness Evolver installer.
- * Copies skills/agents/tools directly to runtime directories (GSD pattern).
+ * Harness Evolver v3 installer.
+ * Copies skills/agents/tools to runtime directories (GSD pattern).
+ * Installs Python dependencies (langsmith + openevals).
  *
  * Usage: npx harness-evolver@latest
  */
@@ -27,7 +28,7 @@ const LOGO = `${BOLD}${GREEN}
   ╠═╣╠═╣╠╦╝║║║║╣ ╚═╗╚═╗  ║╣ ╚╗╔╝║ ║║  ╚╗╔╝║╣ ╠╦╝
   ╩ ╩╩ ╩╩╚═╝╚╝╚═╝╚═╝╚═╝  ╚═╝ ╚╝ ╚═╝╩═╝ ╚╝ ╚═╝╩╚═
 ${RESET}
-${DIM}${GREEN}  End-to-end harness optimization for AI agents${RESET}
+${DIM}${GREEN}  LangSmith-native agent optimization  v${VERSION}${RESET}
 `;
 function ask(rl, question) {
@@ -39,6 +40,7 @@ function copyDir(src, dest) {
   for (const entry of fs.readdirSync(src, { withFileTypes: true })) {
     const srcPath = path.join(src, entry.name);
     const destPath = path.join(dest, entry.name);
+    if (entry.name === "__pycache__") continue;
     if (entry.isDirectory()) {
       copyDir(srcPath, destPath);
     } else {
@@ -70,7 +72,7 @@ function checkCommand(cmd) {
   }
 }
-function installForRuntime(runtimeDir, scope) {
+function installSkillsAndAgents(runtimeDir, scope) {
   const baseDir = scope === "local"
     ? path.join(process.cwd(), runtimeDir)
     : path.join(HOME, runtimeDir);
@@ -78,204 +80,132 @@ function installForRuntime(runtimeDir, scope) {
   const skillsDir = path.join(baseDir, "skills");
   const agentsDir = path.join(baseDir, "agents");
-  // Skills → ~/.claude/skills/<skill-name>/SKILL.md (proper skills format)
+  // Skills — read SKILL.md name field, use directory name for filesystem
   const skillsSource = path.join(PLUGIN_ROOT, "skills");
   if (fs.existsSync(skillsSource)) {
     for (const skill of fs.readdirSync(skillsSource, { withFileTypes: true })) {
-      if (skill.isDirectory()) {
-        const src = path.join(skillsSource, skill.name);
-        const dest = path.join(skillsDir, "harness-evolver:" + skill.name);
-        copyDir(src, dest);
-        console.log(`  ${GREEN}✓${RESET} Installed skill: harness-evolver:${skill.name}`);
-      }
+      if (!skill.isDirectory()) continue;
+      const src = path.join(skillsSource, skill.name);
+      const skillMd = path.join(src, "SKILL.md");
+      if (!fs.existsSync(skillMd)) continue;
+      // Read the skill name from frontmatter
+      const content = fs.readFileSync(skillMd, "utf8");
+      const nameMatch = content.match(/^name:\s*(.+)$/m);
+      const skillName = nameMatch ? nameMatch[1].trim() : skill.name;
+      const dest = path.join(skillsDir, skill.name);
+      copyDir(src, dest);
+      console.log(`  ${GREEN}✓${RESET} ${skillName}`);
     }
   }
-  // Cleanup old commands/ install (from previous versions)
+  // Cleanup old v2 commands/ directory
   const oldCommandsDir = path.join(baseDir, "commands", "harness-evolver");
   if (fs.existsSync(oldCommandsDir)) {
     fs.rmSync(oldCommandsDir, { recursive: true, force: true });
-    console.log(`  ${GREEN}✓${RESET} Cleaned up old commands/ directory`);
+    console.log(`  ${DIM}Cleaned up old commands/ directory${RESET}`);
   }
-  // Agents → agents/
+  // Agents
   const agentsSource = path.join(PLUGIN_ROOT, "agents");
   if (fs.existsSync(agentsSource)) {
     fs.mkdirSync(agentsDir, { recursive: true });
     for (const agent of fs.readdirSync(agentsSource)) {
+      if (!agent.endsWith(".md")) continue;
       copyFile(path.join(agentsSource, agent), path.join(agentsDir, agent));
-      console.log(`  ${GREEN}✓${RESET} Installed agent: ${agent}`);
+      const agentName = agent.replace(".md", "");
+      console.log(`  ${GREEN}✓${RESET} agent: ${agentName}`);
     }
   }
 }
 function installTools() {
-  const toolsDir = path.join(HOME, ".harness-evolver", "tools");
+  const toolsDir = path.join(HOME, ".evolver", "tools");
   const toolsSource = path.join(PLUGIN_ROOT, "tools");
   if (fs.existsSync(toolsSource)) {
     fs.mkdirSync(toolsDir, { recursive: true });
+    let count = 0;
     for (const tool of fs.readdirSync(toolsSource)) {
-      if (tool.endsWith(".py")) {
-        copyFile(path.join(toolsSource, tool), path.join(toolsDir, tool));
-      }
+      if (!tool.endsWith(".py")) continue;
+      copyFile(path.join(toolsSource, tool), path.join(toolsDir, tool));
+      count++;
     }
-    console.log(`  ${GREEN}✓${RESET} Installed tools to ~/.harness-evolver/tools/`);
+    console.log(`  ${GREEN}✓${RESET} ${count} tools installed to ~/.evolver/tools/`);
   }
 }
-function installExamples() {
-  const examplesDir = path.join(HOME, ".harness-evolver", "examples");
-  const examplesSource = path.join(PLUGIN_ROOT, "examples");
-  if (fs.existsSync(examplesSource)) {
-    copyDir(examplesSource, examplesDir);
-    console.log(`  ${GREEN}✓${RESET} Installed examples to ~/.harness-evolver/examples/`);
-  }
-}
+function installPythonDeps() {
+  console.log(`\n  ${YELLOW}Installing Python dependencies...${RESET}`);
-function cleanupBrokenPluginEntry(runtimeDir) {
-  // Remove the harness-evolver@local entry that doesn't work
-  const installedPath = path.join(HOME, runtimeDir, "plugins", "installed_plugins.json");
-  try {
-    const data = JSON.parse(fs.readFileSync(installedPath, "utf8"));
-    if (data.plugins && data.plugins["harness-evolver@local"]) {
-      delete data.plugins["harness-evolver@local"];
-      fs.writeFileSync(installedPath, JSON.stringify(data, null, 2) + "\n");
-    }
-  } catch {}
+  // Try multiple pip variants
+  const commands = [
+    "pip install langsmith openevals",
+    "uv pip install langsmith openevals",
+    "pip3 install langsmith openevals",
+    "python3 -m pip install langsmith openevals",
+  ];
-  const settingsPath = path.join(HOME, runtimeDir, "settings.json");
-  try {
-    const data = JSON.parse(fs.readFileSync(settingsPath, "utf8"));
-    if (data.enabledPlugins && data.enabledPlugins["harness-evolver@local"] !== undefined) {
-      delete data.enabledPlugins["harness-evolver@local"];
-      fs.writeFileSync(settingsPath, JSON.stringify(data, null, 2) + "\n");
+  for (const cmd of commands) {
+    try {
+      execSync(cmd, { stdio: "pipe", timeout: 120000 });
+      console.log(`  ${GREEN}✓${RESET} langsmith + openevals installed`);
+      return true;
+    } catch {
+      continue;
     }
-  } catch {}
-}
-async function main() {
-  console.log(LOGO);
-  console.log(`  ${DIM}Harness Evolver v${VERSION}${RESET}`);
-  console.log(`  ${DIM}Meta-Harness-style autonomous harness optimization${RESET}`);
-  console.log();
-  if (!checkPython()) {
-    console.error(`  ${RED}ERROR:${RESET} python3 not found in PATH. Install Python 3.8+ first.`);
-    process.exit(1);
-  }
-  console.log(`  ${GREEN}✓${RESET} python3 found`);
-  const RUNTIMES = [
-    { name: "Claude Code", dir: ".claude" },
-    { name: "Cursor", dir: ".cursor" },
-    { name: "Codex", dir: ".codex" },
-    { name: "Windsurf", dir: ".windsurf" },
-  ].filter(r => fs.existsSync(path.join(HOME, r.dir)));
-  if (RUNTIMES.length === 0) {
-    console.error(`\n  ${RED}ERROR:${RESET} No supported runtime detected.`);
-    console.error(`  Install Claude Code, Cursor, Codex, or Windsurf first.`);
-    process.exit(1);
-  }
-  const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
-  console.log(`\n  ${YELLOW}Which runtime(s) would you like to install for?${RESET}\n`);
-  RUNTIMES.forEach((r, i) => console.log(`  ${i + 1}) ${r.name.padEnd(14)} (~/${r.dir})`));
-  if (RUNTIMES.length > 1) {
-    console.log(`  ${RUNTIMES.length + 1}) All`);
-    console.log(`\n  ${DIM}Select multiple: 1,2 or 1 2${RESET}`);
-  }
-  const runtimeAnswer = await ask(rl, `\n  ${YELLOW}Choice [1]:${RESET} `);
-  const runtimeInput = (runtimeAnswer.trim() || "1");
-  let selected;
-  if (runtimeInput === String(RUNTIMES.length + 1)) {
-    selected = RUNTIMES;
-  } else {
-    const indices = runtimeInput.split(/[,\s]+/).map(s => parseInt(s, 10) - 1);
-    selected = indices.filter(i => i >= 0 && i < RUNTIMES.length).map(i => RUNTIMES[i]);
   }
-  if (selected.length === 0) selected = [RUNTIMES[0]];
-  console.log(`\n  ${YELLOW}Where would you like to install?${RESET}\n`);
-  console.log(`  1) Global  (~/${selected[0].dir}) - available in all projects`);
-  console.log(`  2) Local   (./${selected[0].dir}) - this project only`);
-  const scopeAnswer = await ask(rl, `\n  ${YELLOW}Choice [1]:${RESET} `);
-  const scope = (scopeAnswer.trim() === "2") ? "local" : "global";
-  console.log();
-  for (const runtime of selected) {
-    console.log(`  Installing for ${GREEN}${runtime.name}${RESET}\n`);
-    cleanupBrokenPluginEntry(runtime.dir);
-    installForRuntime(runtime.dir, scope);
-    console.log();
-  }
-  installTools();
-  installExamples();
-  const versionPath = path.join(HOME, ".harness-evolver", "VERSION");
-  fs.mkdirSync(path.dirname(versionPath), { recursive: true });
-  fs.writeFileSync(versionPath, VERSION);
-  console.log(`  ${GREEN}✓${RESET} VERSION ${VERSION}`);
-  console.log(`\n  ${GREEN}Done!${RESET} Restart Claude Code, then run ${GREEN}/harness-evolver:init${RESET}\n`);
+  console.log(`  ${YELLOW}!${RESET} Could not auto-install Python packages.`);
+  console.log(`    Run manually: ${BOLD}pip install langsmith openevals${RESET}`);
+  return false;
+}
-  // Optional integrations
-  console.log(`  ${YELLOW}Install optional integrations?${RESET}\n`);
-  console.log(`  These enhance the proposer with rich traces and up-to-date documentation.\n`);
+async function configureLangSmith(rl) {
+  console.log(`\n  ${YELLOW}LangSmith Configuration${RESET} ${DIM}(required for v3)${RESET}\n`);
-  // LangSmith CLI
-  const hasLangsmithCli = checkCommand("langsmith-cli --version");
+  // Check if already configured
   const langsmithCredsDir = process.platform === "darwin"
     ? path.join(HOME, "Library", "Application Support", "langsmith-cli")
     : path.join(HOME, ".config", "langsmith-cli");
   const langsmithCredsFile = path.join(langsmithCredsDir, "credentials");
-  const hasLangsmithCreds = fs.existsSync(langsmithCredsFile);
-  if (hasLangsmithCli && hasLangsmithCreds) {
-    console.log(`  ${GREEN}✓${RESET} langsmith-cli installed and authenticated`);
-  } else {
-    if (!hasLangsmithCli) {
-      console.log(`  ${BOLD}LangSmith CLI${RESET} — rich trace analysis (error rates, latency, token usage)`);
-      const lsAnswer = await ask(rl, `\n  ${YELLOW}Install langsmith-cli? [y/N]:${RESET} `);
-      if (lsAnswer.trim().toLowerCase() === "y") {
-        console.log(`\n  Installing langsmith-cli...`);
-        try {
-          execSync("uv tool install langsmith-cli", { stdio: "inherit" });
-          console.log(`\n  ${GREEN}✓${RESET} langsmith-cli installed`);
-        } catch {
-          console.log(`\n  ${RED}Failed.${RESET} Install manually: uv tool install langsmith-cli\n`);
-        }
-      }
-    } else {
-      console.log(`  ${GREEN}✓${RESET} langsmith-cli already installed`);
-    }
+  // Check env var
+  if (process.env.LANGSMITH_API_KEY) {
+    console.log(`  ${GREEN}✓${RESET} LANGSMITH_API_KEY found in environment`);
+    return;
+  }
-    // Auth — ask for API key inline if not already configured
-    if (!hasLangsmithCreds) {
-      console.log(`\n  ${BOLD}LangSmith API Key${RESET} — get yours at ${DIM}https://smith.langchain.com/settings${RESET}`);
-      const apiKey = await ask(rl, `  ${YELLOW}Paste your LangSmith API key (or Enter to skip):${RESET} `);
-      const key = apiKey.trim();
-      if (key && key.startsWith("lsv2_")) {
-        try {
-          fs.mkdirSync(langsmithCredsDir, { recursive: true });
-          fs.writeFileSync(langsmithCredsFile, `LANGSMITH_API_KEY=${key}\n`);
-          console.log(`  ${GREEN}✓${RESET} LangSmith API key saved`);
-        } catch {
-          console.log(`  ${RED}Failed to save credentials.${RESET} Set LANGSMITH_API_KEY in your shell instead.`);
-        }
-      } else if (key) {
-        console.log(`  ${YELLOW}Doesn't look like a LangSmith key (should start with lsv2_). Skipped.${RESET}`);
-      } else {
-        console.log(`  ${DIM}Skipped. Set LANGSMITH_API_KEY later or run: langsmith-cli auth login${RESET}`);
-      }
+  // Check credentials file
+  if (fs.existsSync(langsmithCredsFile)) {
+    console.log(`  ${GREEN}✓${RESET} LangSmith credentials found at ${DIM}${langsmithCredsFile}${RESET}`);
+    return;
+  }
+  // Ask for API key
+  console.log(`  ${BOLD}LangSmith API Key${RESET} — get yours at ${DIM}https://smith.langchain.com/settings${RESET}`);
+  console.log(`  ${DIM}LangSmith is required for v3 (datasets, experiments, evaluators).${RESET}\n`);
+  const apiKey = await ask(rl, `  ${YELLOW}Paste your LangSmith API key:${RESET} `);
+  const key = apiKey.trim();
+  if (key && key.startsWith("lsv2_")) {
+    try {
+      fs.mkdirSync(langsmithCredsDir, { recursive: true });
+      fs.writeFileSync(langsmithCredsFile, `LANGSMITH_API_KEY=${key}\n`);
+      console.log(`  ${GREEN}✓${RESET} API key saved to ${DIM}${langsmithCredsFile}${RESET}`);
+    } catch {
+      console.log(`  ${RED}Failed to save.${RESET} Add to your shell: export LANGSMITH_API_KEY=${key}`);
     }
+  } else if (key) {
+    console.log(`  ${YELLOW}Doesn't look like a LangSmith key (should start with lsv2_).${RESET}`);
+    console.log(`  Add to your shell: ${BOLD}export LANGSMITH_API_KEY=your_key${RESET}`);
+  } else {
+    console.log(`  ${YELLOW}Skipped.${RESET} You must set LANGSMITH_API_KEY before using /evolver:setup`);
   }
+}
+async function configureOptionalIntegrations(rl) {
+  console.log(`\n  ${YELLOW}Optional Integrations${RESET}\n`);
   // Context7 MCP
   const hasContext7 = (() => {
@@ -289,19 +219,18 @@ async function main() {
     } catch {}
     return false;
   })();
   if (hasContext7) {
     console.log(`  ${GREEN}✓${RESET} Context7 MCP already configured`);
   } else {
-    console.log(`\n  ${BOLD}Context7 MCP${RESET} — up-to-date library documentation (LangChain, OpenAI, etc.)`);
-    console.log(`    ${DIM}claude mcp add context7 -- npx -y @upstash/context7-mcp@latest${RESET}`);
+    console.log(`  ${BOLD}Context7 MCP${RESET} — up-to-date library documentation (LangChain, OpenAI, etc.)`);
     const c7Answer = await ask(rl, `\n  ${YELLOW}Install Context7 MCP? [y/N]:${RESET} `);
     if (c7Answer.trim().toLowerCase() === "y") {
-      console.log(`\n  Installing Context7 MCP...`);
       try {
         execSync("claude mcp add context7 -- npx -y @upstash/context7-mcp@latest", { stdio: "inherit" });
         console.log(`\n  ${GREEN}✓${RESET} Context7 MCP configured`);
       } catch {
-        console.log(`\n  ${RED}Failed.${RESET} Install manually: claude mcp add context7 -- npx -y @upstash/context7-mcp@latest\n`);
+        console.log(`\n  ${RED}Failed.${RESET} Install manually: claude mcp add context7 -- npx -y @upstash/context7-mcp@latest`);
       }
     }
   }
@@ -318,28 +247,109 @@ async function main() {
     } catch {}
     return false;
   })();
   if (hasLcDocs) {
     console.log(`  ${GREEN}✓${RESET} LangChain Docs MCP already configured`);
   } else {
-    console.log(`\n  ${BOLD}LangChain Docs MCP${RESET} — LangChain/LangGraph/LangSmith documentation search`);
-    console.log(`    ${DIM}claude mcp add docs-langchain --transport http https://docs.langchain.com/mcp${RESET}`);
+    console.log(`\n  ${BOLD}LangChain Docs MCP${RESET} — LangChain/LangGraph/LangSmith documentation`);
     const lcAnswer = await ask(rl, `\n  ${YELLOW}Install LangChain Docs MCP? [y/N]:${RESET} `);
     if (lcAnswer.trim().toLowerCase() === "y") {
-      console.log(`\n  Installing LangChain Docs MCP...`);
       try {
         execSync("claude mcp add docs-langchain --transport http https://docs.langchain.com/mcp", { stdio: "inherit" });
         console.log(`\n  ${GREEN}✓${RESET} LangChain Docs MCP configured`);
       } catch {
-        console.log(`\n  ${RED}Failed.${RESET} Install manually: claude mcp add docs-langchain --transport http https://docs.langchain.com/mcp\n`);
+        console.log(`\n  ${RED}Failed.${RESET} Install manually: claude mcp add docs-langchain --transport http https://docs.langchain.com/mcp`);
       }
     }
   }
+}
+async function main() {
+  console.log(LOGO);
+  if (!checkPython()) {
+    console.error(`  ${RED}ERROR:${RESET} python3 not found. Install Python 3.10+ first.`);
+    process.exit(1);
+  }
+  console.log(`  ${GREEN}✓${RESET} python3 found`);
+  // Detect runtimes
+  const RUNTIMES = [
+    { name: "Claude Code", dir: ".claude" },
+    { name: "Cursor", dir: ".cursor" },
+    { name: "Codex", dir: ".codex" },
+    { name: "Windsurf", dir: ".windsurf" },
+  ].filter(r => fs.existsSync(path.join(HOME, r.dir)));
+  if (RUNTIMES.length === 0) {
+    console.error(`\n  ${RED}ERROR:${RESET} No supported runtime detected.`);
+    console.error(`  Install Claude Code, Cursor, Codex, or Windsurf first.`);
+    process.exit(1);
+  }
+  const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+  // Runtime selection
+  console.log(`\n  ${YELLOW}Which runtime(s) to install for?${RESET}\n`);
+  RUNTIMES.forEach((r, i) => console.log(`  ${i + 1}) ${r.name.padEnd(14)} (~/${r.dir})`));
+  if (RUNTIMES.length > 1) {
+    console.log(`  ${RUNTIMES.length + 1}) All`);
+    console.log(`\n  ${DIM}Select multiple: 1,2 or 1 2${RESET}`);
+  }
+  const runtimeAnswer = await ask(rl, `\n  ${YELLOW}Choice [1]:${RESET} `);
+  const runtimeInput = (runtimeAnswer.trim() || "1");
-  console.log(`\n  ${DIM}Quick start with example:${RESET}`);
-  console.log(`    cp -r ~/.harness-evolver/examples/classifier ./my-project`);
-  console.log(`    cd my-project && claude`);
-  console.log(`    /harness-evolver:init`);
-  console.log(`    /harness-evolver:evolve`);
+  let selected;
+  if (runtimeInput === String(RUNTIMES.length + 1)) {
+    selected = RUNTIMES;
+  } else {
+    const indices = runtimeInput.split(/[,\s]+/).map(s => parseInt(s, 10) - 1);
+    selected = indices.filter(i => i >= 0 && i < RUNTIMES.length).map(i => RUNTIMES[i]);
+  }
+  if (selected.length === 0) selected = [RUNTIMES[0]];
+  // Scope selection
+  console.log(`\n  ${YELLOW}Where to install?${RESET}\n`);
+  console.log(`  1) Global  (~/${selected[0].dir}) — available in all projects`);
+  console.log(`  2) Local   (./${selected[0].dir}) — this project only`);
+  const scopeAnswer = await ask(rl, `\n  ${YELLOW}Choice [1]:${RESET} `);
+  const scope = (scopeAnswer.trim() === "2") ? "local" : "global";
+  // Install skills + agents
+  console.log(`\n  ${BOLD}Installing skills & agents${RESET}\n`);
+  for (const runtime of selected) {
+    console.log(`  ${GREEN}${runtime.name}${RESET}:`);
+    installSkillsAndAgents(runtime.dir, scope);
+    console.log();
+  }
+  // Install tools
+  console.log(`  ${BOLD}Installing tools${RESET}`);
+  installTools();
+  // Version marker
+  const versionPath = path.join(HOME, ".evolver", "VERSION");
+  fs.mkdirSync(path.dirname(versionPath), { recursive: true });
+  fs.writeFileSync(versionPath, VERSION);
+  // Install Python deps
+  installPythonDeps();
+  // Configure LangSmith (required)
+  await configureLangSmith(rl);
+  // Optional integrations
+  await configureOptionalIntegrations(rl);
+  // Done
+  console.log(`\n  ${GREEN}${BOLD}Setup complete!${RESET}\n`);
+  console.log(`  ${DIM}Restart Claude Code, then:${RESET}`);
+  console.log(`    ${GREEN}/evolver:setup${RESET}     — configure LangSmith for your project`);
+  console.log(`    ${GREEN}/evolver:evolve${RESET}    — run the optimization loop`);
+  console.log(`    ${GREEN}/evolver:status${RESET}    — check progress`);
+  console.log(`    ${GREEN}/evolver:deploy${RESET}    — finalize and push`);
   console.log(`\n  ${DIM}GitHub: https://github.com/raphaelchristi/harness-evolver${RESET}\n`);
   rl.close();

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "harness-evolver",
-  "version": "2.9.1",
-  "description": "Meta-Harness-style autonomous harness optimization for Claude Code",
+  "version": "3.0.1",
+  "description": "LangSmith-native autonomous agent optimization for Claude Code",
   "author": "Raphael Valdetaro",
   "license": "MIT",
   "repository": {
@@ -10,11 +10,12 @@
   },
   "keywords": [
     "claude-code",
-    "harness",
-    "meta-harness",
+    "langsmith",
     "llm",
     "optimization",
-    "agent"
+    "agent",
+    "evolution",
+    "meta-harness"
   ],
   "bin": {
     "harness-evolver": "bin/install.js"
@@ -23,7 +24,6 @@
     "bin/",
     "skills/",
     "agents/",
-    "tools/",
-    "examples/"
+    "tools/"
   ]
 }

package/skills/deploy/SKILL.md CHANGED Viewed

@@ -1,82 +1,75 @@
 ---
-name: harness-evolver:deploy
-description: "Use when the user wants to use the best evolved harness in their project, promote a version to production, copy the winning harness back, or is done evolving and wants to apply the result."
-argument-hint: "[version]"
+name: evolver:deploy
+description: "Use when the user is done evolving and wants to finalize, clean up, tag the result, or push the optimized agent."
 allowed-tools: [Read, Write, Bash, Glob, AskUserQuestion]
 ---
-# /harness-evolver:deploy
+# /evolver:deploy
-Promote the best (or specified) harness version back to the user's project.
-## Arguments
-- `version` — optional. If not given, deploys the best version from `summary.json`.
+Finalize the evolution results. In v3, the best code is already in the main branch (auto-merged during evolve). Deploy is about cleanup, tagging, and pushing.
 ## What To Do
-### 1. Identify Best Version
+### 1. Show Results
 ```bash
-python3 -c "import json; s=json.load(open('.harness-evolver/summary.json')); print(s['best']['version'], s['best']['combined_score'])"
+python3 -c "
+import json
+c = json.load(open('.evolver.json'))
+baseline = c['history'][0]['score'] if c['history'] else 0
+best = c['best_score']
+improvement = best - baseline
+print(f'Baseline: {baseline:.3f}')
+print(f'Best: {best:.3f} (+{improvement:.3f}, {improvement/max(baseline,0.001)*100:.0f}% improvement)')
+print(f'Iterations: {c[\"iterations\"]}')
+print(f'Experiment: {c[\"best_experiment\"]}')
+"
 ```
-Or use the user-specified version.
-### 2. Show What Will Be Deployed
+Show git diff from before evolution started:
 ```bash
-cat .harness-evolver/harnesses/{version}/proposal.md
-cat .harness-evolver/harnesses/{version}/scores.json
-```
-Report: version, score, improvement over baseline, what changed.
-### 3. Ask Deploy Options (Interactive)
-Use AskUserQuestion with TWO questions:
-```
-Question 1: "Where should the evolved harness go?"
-Header: "Deploy to"
-Options:
-  - "Overwrite original" — Replace {original_harness_path} with the evolved version
-  - "Copy to new file" — Save as harness_evolved.py alongside the original
-  - "Just show the diff" — Don't copy anything, just show what changed
+git log --oneline --since="$(python3 -c "import json; print(json.load(open('.evolver.json'))['created_at'][:10])")" | head -20
 ```
-```
-Question 2 (ONLY if user chose "Overwrite original"):
-"Back up the current harness before overwriting?"
-Header: "Backup"
-Options:
-  - "Yes, backup first" — Save current as {harness}.bak before overwriting
-  - "No, just overwrite" — Replace directly (git history has the original)
+### 2. Ask What To Do (interactive)
+```json
+{
+  "questions": [{
+    "question": "Evolution complete. What would you like to do?",
+    "header": "Deploy",
+    "multiSelect": false,
+    "options": [
+      {"label": "Tag and push", "description": "Create a git tag with the score and push to remote"},
+      {"label": "Just review", "description": "Show the full diff of all changes made during evolution"},
+      {"label": "Clean up only", "description": "Remove temporary files (trace_insights.json, etc.) but don't push"}
+    ]
+  }]
+}
 ```
-### 4. Copy Files
+### 3. Execute
-Based on the user's choices:
-**If "Overwrite original"**:
-- If backup: `cp {original_harness} {original_harness}.bak`
-- Then: `cp .harness-evolver/harnesses/{version}/harness.py {original_harness}`
-- Copy config.json if exists
+**If "Tag and push"**:
+```bash
+VERSION=$(python3 -c "import json; c=json.load(open('.evolver.json')); print(f'evolver-v{c[\"iterations\"]}')")
+SCORE=$(python3 -c "import json; print(f'{json.load(open(\".evolver.json\"))[\"best_score\"]:.3f}')")
+git tag -a "$VERSION" -m "Evolver: score $SCORE"
+git push origin main --tags
+```
-**If "Copy to new file"**:
+**If "Just review"**:
 ```bash
-cp .harness-evolver/harnesses/{version}/harness.py ./harness_evolved.py
-cp .harness-evolver/harnesses/{version}/config.json ./config_evolved.json  # if exists
+git diff HEAD~{iterations} HEAD
 ```
-**If "Just show the diff"**:
+**If "Clean up only"**:
 ```bash
-diff {original_harness} .harness-evolver/harnesses/{version}/harness.py
+rm -f trace_insights.json best_results.json comparison.json production_seed.md production_seed.json
 ```
-Do not copy anything.
-### 5. Report
+### 4. Report
-- What was copied and where
-- Score improvement: baseline → deployed version
-- Suggest: review the diff before committing
+- What was done
+- LangSmith experiment URL for the best result
+- Suggest reviewing the changes before deploying to production