harness-evolver 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2026 Raphael Valdetaro Christi Cordeiro
3
+ Copyright (c) 2026 Raphael Valdetaro
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -106,6 +106,8 @@ Append a summary to `PROPOSER_HISTORY.md`.
106
106
 
107
107
  6. **Prefer readable harnesses over defensive ones.** If the harness has grown past 2x the baseline size without proportional score improvement, consider simplifying. Accumulated try/catch blocks, redundant fallbacks, and growing if-chains are a code smell in evolved harnesses.
108
108
 
109
+ 7. **Use available API keys from environment.** Check `config.json` field `api_keys` to see which LLM APIs are available (Anthropic, OpenAI, Gemini, OpenRouter, etc.). Always read keys via `os.environ.get("KEY_NAME")` — never hardcode values. If an evolution strategy requires an API that isn't available, note it in `proposal.md` and choose an alternative.
110
+
109
111
  ## Documentation Lookup (if Context7 available)
110
112
 
111
113
  - Read `config.json` field `stack.detected` to see which libraries the harness uses.
package/bin/install.js CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  /**
3
3
  * Harness Evolver installer.
4
- * Interactive setup with runtime selection, global/local choice.
4
+ * Copies plugin to Claude Code plugin cache and registers it.
5
5
  *
6
6
  * Usage: npx harness-evolver@latest
7
7
  */
@@ -15,8 +15,8 @@ const VERSION = require("../package.json").version;
15
15
  const PLUGIN_ROOT = path.resolve(__dirname, "..");
16
16
  const HOME = process.env.HOME || process.env.USERPROFILE;
17
17
 
18
- // ANSI colors
19
- const CYAN = "\x1b[36m";
18
+ const MAGENTA = "\x1b[35m";
19
+ const BRIGHT_MAGENTA = "\x1b[95m";
20
20
  const GREEN = "\x1b[32m";
21
21
  const YELLOW = "\x1b[33m";
22
22
  const RED = "\x1b[31m";
@@ -25,13 +25,13 @@ const BOLD = "\x1b[1m";
25
25
  const RESET = "\x1b[0m";
26
26
 
27
27
  const LOGO = `
28
- ${CYAN} ██╗ ██╗ █████╗ ██████╗ ███╗ ██╗███████╗███████╗███████╗
28
+ ${BRIGHT_MAGENTA} ██╗ ██╗ █████╗ ██████╗ ███╗ ██╗███████╗███████╗███████╗
29
29
  ██║ ██║██╔══██╗██╔══██╗████╗ ██║██╔════╝██╔════╝██╔════╝
30
30
  ███████║███████║██████╔╝██╔██╗ ██║█████╗ ███████╗███████╗
31
31
  ██╔══██║██╔══██║██╔══██╗██║╚██╗██║██╔══╝ ╚════██║╚════██║
32
32
  ██║ ██║██║ ██║██║ ██║██║ ╚████║███████╗███████║███████║
33
33
  ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═══╝╚══════╝╚══════╝╚══════╝
34
- ${BOLD}███████╗██╗ ██╗ ██████╗ ██╗ ██╗ ██╗███████╗██████╗
34
+ ${MAGENTA}${BOLD}███████╗██╗ ██╗ ██████╗ ██╗ ██╗ ██╗███████╗██████╗
35
35
  ██╔════╝██║ ██║██╔═══██╗██║ ██║ ██║██╔════╝██╔══██╗
36
36
  █████╗ ██║ ██║██║ ██║██║ ██║ ██║█████╗ ██████╔╝
37
37
  ██╔══╝ ╚██╗ ██╔╝██║ ██║██║ ╚██╗ ██╔╝██╔══╝ ██╔══██╗
@@ -39,13 +39,6 @@ ${CYAN} ██╗ ██╗ █████╗ ██████╗ ███
39
39
  ╚══════╝ ╚═══╝ ╚═════╝ ╚══════╝ ╚═══╝ ╚══════╝╚═╝ ╚═╝${RESET}
40
40
  `;
41
41
 
42
- const RUNTIMES = [
43
- { name: "Claude Code", dir: ".claude", detected: () => fs.existsSync(path.join(HOME, ".claude")) },
44
- { name: "Cursor", dir: ".cursor", detected: () => fs.existsSync(path.join(HOME, ".cursor")) },
45
- { name: "Codex", dir: ".codex", detected: () => fs.existsSync(path.join(HOME, ".codex")) },
46
- { name: "Windsurf", dir: ".windsurf", detected: () => fs.existsSync(path.join(HOME, ".windsurf")) },
47
- ];
48
-
49
42
  function ask(rl, question) {
50
43
  return new Promise((resolve) => rl.question(question, resolve));
51
44
  }
@@ -56,6 +49,7 @@ function copyDir(src, dest) {
56
49
  const srcPath = path.join(src, entry.name);
57
50
  const destPath = path.join(dest, entry.name);
58
51
  if (entry.isDirectory()) {
52
+ if (entry.name === "node_modules" || entry.name === ".git" || entry.name === "__pycache__" || entry.name === "tests" || entry.name === "docs") continue;
59
53
  copyDir(srcPath, destPath);
60
54
  } else {
61
55
  fs.copyFileSync(srcPath, destPath);
@@ -63,11 +57,6 @@ function copyDir(src, dest) {
63
57
  }
64
58
  }
65
59
 
66
- function copyFile(src, dest) {
67
- fs.mkdirSync(path.dirname(dest), { recursive: true });
68
- fs.copyFileSync(src, dest);
69
- }
70
-
71
60
  function checkPython() {
72
61
  try {
73
62
  execSync("python3 --version", { stdio: "pipe" });
@@ -77,47 +66,80 @@ function checkPython() {
77
66
  }
78
67
  }
79
68
 
80
- function installForRuntime(runtimeDir, scope) {
69
+ function readJSON(filepath) {
70
+ try {
71
+ return JSON.parse(fs.readFileSync(filepath, "utf8"));
72
+ } catch {
73
+ return null;
74
+ }
75
+ }
76
+
77
+ function writeJSON(filepath, data) {
78
+ fs.mkdirSync(path.dirname(filepath), { recursive: true });
79
+ fs.writeFileSync(filepath, JSON.stringify(data, null, 2) + "\n");
80
+ }
81
+
82
+ function installPlugin(runtimeDir, scope) {
81
83
  const baseDir = scope === "local"
82
84
  ? path.join(process.cwd(), runtimeDir)
83
85
  : path.join(HOME, runtimeDir);
84
86
 
85
- const commandsDir = path.join(baseDir, "commands", "harness-evolver");
86
- const agentsDir = path.join(baseDir, "agents");
87
-
88
- // Skills
89
- const skillsSource = path.join(PLUGIN_ROOT, "skills");
90
- if (fs.existsSync(skillsSource)) {
91
- for (const skill of fs.readdirSync(skillsSource, { withFileTypes: true })) {
92
- if (skill.isDirectory()) {
93
- copyDir(path.join(skillsSource, skill.name), path.join(commandsDir, skill.name));
94
- console.log(` ${GREEN}✓${RESET} Installed skill: ${skill.name}`);
95
- }
96
- }
97
- }
98
-
99
- // Agents
100
- const agentsSource = path.join(PLUGIN_ROOT, "agents");
101
- if (fs.existsSync(agentsSource)) {
102
- fs.mkdirSync(agentsDir, { recursive: true });
103
- for (const agent of fs.readdirSync(agentsSource)) {
104
- copyFile(path.join(agentsSource, agent), path.join(agentsDir, agent));
105
- console.log(` ${GREEN}✓${RESET} Installed agent: ${agent}`);
106
- }
107
- }
87
+ // 1. Copy plugin to cache
88
+ const cacheDir = path.join(baseDir, "plugins", "cache", "local", "harness-evolver", VERSION);
89
+ console.log(` Copying plugin to ${scope === "local" ? "." : "~"}/${runtimeDir}/plugins/cache/...`);
90
+ copyDir(PLUGIN_ROOT, cacheDir);
91
+ console.log(` ${GREEN}✓${RESET} Plugin files copied`);
92
+
93
+ // 2. Register in installed_plugins.json
94
+ const installedPath = path.join(baseDir, "plugins", "installed_plugins.json");
95
+ let installed = readJSON(installedPath) || { version: 2, plugins: {} };
96
+ if (!installed.plugins) installed.plugins = {};
97
+
98
+ installed.plugins["harness-evolver@local"] = [{
99
+ scope: "user",
100
+ installPath: cacheDir,
101
+ version: VERSION,
102
+ installedAt: new Date().toISOString(),
103
+ lastUpdated: new Date().toISOString(),
104
+ }];
105
+ writeJSON(installedPath, installed);
106
+ console.log(` ${GREEN}✓${RESET} Registered in installed_plugins.json`);
107
+
108
+ // 3. Enable in settings.json
109
+ const settingsPath = path.join(baseDir, "settings.json");
110
+ let settings = readJSON(settingsPath) || {};
111
+ if (!settings.enabledPlugins) settings.enabledPlugins = {};
112
+ settings.enabledPlugins["harness-evolver@local"] = true;
113
+ writeJSON(settingsPath, settings);
114
+ console.log(` ${GREEN}✓${RESET} Enabled in settings.json`);
115
+
116
+ // Count installed items
117
+ const skillCount = fs.existsSync(path.join(cacheDir, "skills"))
118
+ ? fs.readdirSync(path.join(cacheDir, "skills")).filter(f =>
119
+ fs.statSync(path.join(cacheDir, "skills", f)).isDirectory()
120
+ ).length
121
+ : 0;
122
+ const agentCount = fs.existsSync(path.join(cacheDir, "agents"))
123
+ ? fs.readdirSync(path.join(cacheDir, "agents")).length
124
+ : 0;
125
+ const toolCount = fs.existsSync(path.join(cacheDir, "tools"))
126
+ ? fs.readdirSync(path.join(cacheDir, "tools")).filter(f => f.endsWith(".py")).length
127
+ : 0;
128
+
129
+ console.log(` ${GREEN}✓${RESET} ${skillCount} skills, ${agentCount} agent, ${toolCount} tools`);
108
130
  }
109
131
 
110
- function installTools() {
132
+ function installToolsGlobal() {
111
133
  const toolsDir = path.join(HOME, ".harness-evolver", "tools");
112
134
  const toolsSource = path.join(PLUGIN_ROOT, "tools");
113
135
  if (fs.existsSync(toolsSource)) {
114
136
  fs.mkdirSync(toolsDir, { recursive: true });
115
137
  for (const tool of fs.readdirSync(toolsSource)) {
116
138
  if (tool.endsWith(".py")) {
117
- copyFile(path.join(toolsSource, tool), path.join(toolsDir, tool));
118
- console.log(` ${GREEN}✓${RESET} Installed tool: ${tool}`);
139
+ fs.copyFileSync(path.join(toolsSource, tool), path.join(toolsDir, tool));
119
140
  }
120
141
  }
142
+ console.log(` ${GREEN}✓${RESET} Tools copied to ~/.harness-evolver/tools/`);
121
143
  }
122
144
  }
123
145
 
@@ -126,7 +148,7 @@ function installExamples() {
126
148
  const examplesSource = path.join(PLUGIN_ROOT, "examples");
127
149
  if (fs.existsSync(examplesSource)) {
128
150
  copyDir(examplesSource, examplesDir);
129
- console.log(` ${GREEN}✓${RESET} Installed examples: classifier`);
151
+ console.log(` ${GREEN}✓${RESET} Examples copied to ~/.harness-evolver/examples/`);
130
152
  }
131
153
  }
132
154
 
@@ -136,7 +158,6 @@ async function main() {
136
158
  console.log(` ${DIM}Meta-Harness-style autonomous harness optimization${RESET}`);
137
159
  console.log();
138
160
 
139
- // Check python
140
161
  if (!checkPython()) {
141
162
  console.error(` ${RED}ERROR:${RESET} python3 not found in PATH. Install Python 3.8+ first.`);
142
163
  process.exit(1);
@@ -144,8 +165,14 @@ async function main() {
144
165
  console.log(` ${GREEN}✓${RESET} python3 found`);
145
166
 
146
167
  // Detect runtimes
147
- const available = RUNTIMES.filter((r) => r.detected());
148
- if (available.length === 0) {
168
+ const RUNTIMES = [
169
+ { name: "Claude Code", dir: ".claude" },
170
+ { name: "Cursor", dir: ".cursor" },
171
+ { name: "Codex", dir: ".codex" },
172
+ { name: "Windsurf", dir: ".windsurf" },
173
+ ].filter(r => fs.existsSync(path.join(HOME, r.dir)));
174
+
175
+ if (RUNTIMES.length === 0) {
149
176
  console.error(`\n ${RED}ERROR:${RESET} No supported runtime detected.`);
150
177
  console.error(` Install Claude Code, Cursor, Codex, or Windsurf first.`);
151
178
  process.exit(1);
@@ -155,74 +182,62 @@ async function main() {
155
182
 
156
183
  // Runtime selection
157
184
  console.log(`\n ${YELLOW}Which runtime(s) would you like to install for?${RESET}\n`);
158
- available.forEach((r, i) => {
159
- console.log(` ${i + 1}) ${r.name.padEnd(14)} (~/${r.dir})`);
160
- });
161
- if (available.length > 1) {
162
- console.log(` ${available.length + 1}) All`);
185
+ RUNTIMES.forEach((r, i) => console.log(` ${i + 1}) ${r.name.padEnd(14)} (~/${r.dir})`));
186
+ if (RUNTIMES.length > 1) {
187
+ console.log(` ${RUNTIMES.length + 1}) All`);
163
188
  console.log(`\n ${DIM}Select multiple: 1,2 or 1 2${RESET}`);
164
189
  }
165
190
 
166
- const defaultChoice = "1";
167
- const runtimeAnswer = await ask(rl, `\n ${YELLOW}Choice [${defaultChoice}]:${RESET} `);
168
- const runtimeInput = (runtimeAnswer.trim() || defaultChoice);
191
+ const runtimeAnswer = await ask(rl, `\n ${YELLOW}Choice [1]:${RESET} `);
192
+ const runtimeInput = (runtimeAnswer.trim() || "1");
169
193
 
170
- let selectedRuntimes;
171
- if (runtimeInput === String(available.length + 1)) {
172
- selectedRuntimes = available;
194
+ let selected;
195
+ if (runtimeInput === String(RUNTIMES.length + 1)) {
196
+ selected = RUNTIMES;
173
197
  } else {
174
- const indices = runtimeInput.split(/[,\s]+/).map((s) => parseInt(s, 10) - 1);
175
- selectedRuntimes = indices
176
- .filter((i) => i >= 0 && i < available.length)
177
- .map((i) => available[i]);
178
- }
179
-
180
- if (selectedRuntimes.length === 0) {
181
- selectedRuntimes = [available[0]];
198
+ const indices = runtimeInput.split(/[,\s]+/).map(s => parseInt(s, 10) - 1);
199
+ selected = indices.filter(i => i >= 0 && i < RUNTIMES.length).map(i => RUNTIMES[i]);
182
200
  }
201
+ if (selected.length === 0) selected = [RUNTIMES[0]];
183
202
 
184
203
  // Scope selection
185
204
  console.log(`\n ${YELLOW}Where would you like to install?${RESET}\n`);
186
- console.log(` 1) Global (~/${selectedRuntimes[0].dir}) - available in all projects`);
187
- console.log(` 2) Local (./${selectedRuntimes[0].dir}) - this project only`);
205
+ console.log(` 1) Global (~/${selected[0].dir}) - available in all projects`);
206
+ console.log(` 2) Local (./${selected[0].dir}) - this project only`);
188
207
 
189
208
  const scopeAnswer = await ask(rl, `\n ${YELLOW}Choice [1]:${RESET} `);
190
209
  const scope = (scopeAnswer.trim() === "2") ? "local" : "global";
191
210
 
192
211
  console.log();
193
212
 
194
- // Install for each selected runtime
195
- for (const runtime of selectedRuntimes) {
196
- const target = scope === "local" ? `./${runtime.dir}` : `~/${runtime.dir}`;
197
- console.log(` Installing for ${CYAN}${runtime.name}${RESET} to ${target}`);
213
+ // Install
214
+ for (const runtime of selected) {
215
+ console.log(` Installing for ${BRIGHT_MAGENTA}${runtime.name}${RESET}\n`);
216
+ installPlugin(runtime.dir, scope);
198
217
  console.log();
199
- installForRuntime(runtime.dir, scope);
200
218
  }
201
219
 
202
- // Tools and examples are always global
203
- installTools();
220
+ installToolsGlobal();
204
221
  installExamples();
205
222
 
206
- // Write version file
223
+ // Version marker
207
224
  const versionPath = path.join(HOME, ".harness-evolver", "VERSION");
208
225
  fs.mkdirSync(path.dirname(versionPath), { recursive: true });
209
226
  fs.writeFileSync(versionPath, VERSION);
210
- console.log(` ${GREEN}✓${RESET} Wrote VERSION (${VERSION})`);
227
+ console.log(` ${GREEN}✓${RESET} VERSION ${VERSION}`);
211
228
 
212
- console.log(`\n ${GREEN}Done!${RESET} Open a project in Claude Code and run ${CYAN}/harness-evolver:init${RESET}`);
229
+ console.log(`\n ${GREEN}Done!${RESET} Open a project in Claude Code and run ${BRIGHT_MAGENTA}/harness-evolver:init${RESET}`);
213
230
  console.log(`\n ${DIM}Quick start with example:${RESET}`);
214
231
  console.log(` cp -r ~/.harness-evolver/examples/classifier ./my-project`);
215
232
  console.log(` cd my-project && claude`);
216
233
  console.log(` /harness-evolver:init`);
217
234
  console.log(` /harness-evolver:evolve`);
218
-
219
- console.log(`\n ${DIM}GitHub: https://github.com/raphaelchristi/harness-evolver${RESET}`);
220
- console.log();
235
+ console.log(`\n ${DIM}GitHub: https://github.com/raphaelchristi/harness-evolver${RESET}\n`);
221
236
 
222
237
  rl.close();
223
238
  }
224
239
 
225
- main().catch((err) => {
240
+ main().catch(err => {
226
241
  console.error(` ${RED}ERROR:${RESET} ${err.message}`);
227
242
  process.exit(1);
228
243
  });
@@ -39,7 +39,7 @@ def classify_mock(text):
39
39
  def classify_llm(text, config):
40
40
  import urllib.request
41
41
 
42
- api_key = config.get("api_key", os.environ.get("ANTHROPIC_API_KEY", ""))
42
+ api_key = os.environ.get("ANTHROPIC_API_KEY", "")
43
43
  model = config.get("model", "claude-haiku-4-5-20251001")
44
44
 
45
45
  prompt = (
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "harness-evolver",
3
- "version": "0.5.0",
3
+ "version": "0.6.0",
4
4
  "description": "Meta-Harness-style autonomous harness optimization for Claude Code",
5
- "author": "Raphael Valdetaro Christi Cordeiro",
5
+ "author": "Raphael Valdetaro",
6
6
  "license": "MIT",
7
7
  "repository": {
8
8
  "type": "git",
@@ -1,5 +1,5 @@
1
1
  ---
2
- name: compare
2
+ name: harness-evolver:compare
3
3
  description: "Use when the user wants to compare two harness versions, understand what changed between iterations, see why one version scored better than another, or debug a regression."
4
4
  argument-hint: "<vA> <vB>"
5
5
  allowed-tools: [Read, Bash, Glob, Grep]
@@ -1,5 +1,5 @@
1
1
  ---
2
- name: deploy
2
+ name: harness-evolver:deploy
3
3
  description: "Use when the user wants to use the best evolved harness in their project, promote a version to production, copy the winning harness back, or is done evolving and wants to apply the result."
4
4
  argument-hint: "[version]"
5
5
  allowed-tools: [Read, Write, Bash, Glob]
@@ -1,5 +1,5 @@
1
1
  ---
2
- name: diagnose
2
+ name: harness-evolver:diagnose
3
3
  description: "Use when the user wants to understand why a specific harness version failed, investigate a regression, analyze trace data, or debug a low score. Also use when the user says 'why did v003 fail' or 'what went wrong'."
4
4
  argument-hint: "[version]"
5
5
  allowed-tools: [Read, Bash, Glob, Grep]
@@ -1,5 +1,5 @@
1
1
  ---
2
- name: evolve
2
+ name: harness-evolver:evolve
3
3
  description: "Use when the user wants to run the optimization loop, improve harness performance, evolve the harness, or iterate on harness quality. Requires .harness-evolver/ to exist (run harness-evolver:init first)."
4
4
  argument-hint: "[--iterations N]"
5
5
  allowed-tools: [Read, Write, Edit, Bash, Glob, Grep, Agent]
@@ -1,5 +1,5 @@
1
1
  ---
2
- name: init
2
+ name: harness-evolver:init
3
3
  description: "Use when the user wants to set up harness optimization in their project, optimize an LLM agent, improve a harness, or mentions harness-evolver for the first time in a project without .harness-evolver/ directory."
4
4
  argument-hint: "[directory]"
5
5
  allowed-tools: [Read, Write, Edit, Bash, Glob, Grep, Agent]
@@ -1,5 +1,5 @@
1
1
  ---
2
- name: status
2
+ name: harness-evolver:status
3
3
  description: "Use when the user asks about evolution progress, current scores, best harness version, how many iterations ran, or whether the loop is stagnating. Also use when the user says 'status', 'progress', or 'how is it going'."
4
4
  allowed-tools: [Read, Bash]
5
5
  ---
package/tools/init.py CHANGED
@@ -89,6 +89,30 @@ def _auto_detect(search_dir):
89
89
  return harness, eval_script, tasks, config
90
90
 
91
91
 
92
+ def _detect_api_keys():
93
+ """Detect which LLM/service API keys are available in the environment."""
94
+ KNOWN_KEYS = {
95
+ "ANTHROPIC_API_KEY": "Anthropic (Claude)",
96
+ "OPENAI_API_KEY": "OpenAI (GPT)",
97
+ "GOOGLE_API_KEY": "Google (Gemini)",
98
+ "GEMINI_API_KEY": "Google Gemini",
99
+ "OPENROUTER_API_KEY": "OpenRouter",
100
+ "LANGSMITH_API_KEY": "LangSmith",
101
+ "TOGETHER_API_KEY": "Together AI",
102
+ "GROQ_API_KEY": "Groq",
103
+ "MISTRAL_API_KEY": "Mistral",
104
+ "COHERE_API_KEY": "Cohere",
105
+ "FIREWORKS_API_KEY": "Fireworks AI",
106
+ "DEEPSEEK_API_KEY": "DeepSeek",
107
+ "XAI_API_KEY": "xAI (Grok)",
108
+ }
109
+ detected = {}
110
+ for env_var, display_name in KNOWN_KEYS.items():
111
+ if os.environ.get(env_var):
112
+ detected[env_var] = {"name": display_name, "status": "detected"}
113
+ return detected
114
+
115
+
92
116
  def _detect_langsmith():
93
117
  """Auto-detect LangSmith API key and return config section."""
94
118
  if os.environ.get("LANGSMITH_API_KEY"):
@@ -252,9 +276,19 @@ def main():
252
276
  "harnesses": "harnesses/",
253
277
  },
254
278
  }
279
+ # Detect API keys available in environment
280
+ api_keys = _detect_api_keys()
281
+ config["api_keys"] = api_keys
282
+
255
283
  with open(os.path.join(base, "config.json"), "w") as f:
256
284
  json.dump(config, f, indent=2)
257
285
 
286
+ if api_keys:
287
+ print("API keys detected:")
288
+ for env_var, info in api_keys.items():
289
+ print(f" {info['name']} ({env_var})")
290
+ print()
291
+
258
292
  ls_config = config["eval"].get("langsmith", {})
259
293
  if ls_config.get("enabled"):
260
294
  print(" LangSmith tracing enabled (LANGSMITH_API_KEY detected)")