harness-evolver 0.4.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/harness-evolver-proposer.md +2 -0
- package/bin/install.js +156 -52
- package/examples/classifier/harness.py +1 -1
- package/package.json +1 -1
- package/tools/init.py +34 -0
|
@@ -106,6 +106,8 @@ Append a summary to `PROPOSER_HISTORY.md`.
|
|
|
106
106
|
|
|
107
107
|
6. **Prefer readable harnesses over defensive ones.** If the harness has grown past 2x the baseline size without proportional score improvement, consider simplifying. Accumulated try/catch blocks, redundant fallbacks, and growing if-chains are a code smell in evolved harnesses.
|
|
108
108
|
|
|
109
|
+
7. **Use available API keys from environment.** Check `config.json` field `api_keys` to see which LLM APIs are available (Anthropic, OpenAI, Gemini, OpenRouter, etc.). Always read keys via `os.environ.get("KEY_NAME")` — never hardcode values. If an evolution strategy requires an API that isn't available, note it in `proposal.md` and choose an alternative.
|
|
110
|
+
|
|
109
111
|
## Documentation Lookup (if Context7 available)
|
|
110
112
|
|
|
111
113
|
- Read `config.json` field `stack.detected` to see which libraries the harness uses.
|
package/bin/install.js
CHANGED
|
@@ -1,26 +1,54 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
/**
|
|
3
3
|
* Harness Evolver installer.
|
|
4
|
-
*
|
|
4
|
+
* Interactive setup with runtime selection, global/local choice.
|
|
5
5
|
*
|
|
6
6
|
* Usage: npx harness-evolver@latest
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
9
|
const fs = require("fs");
|
|
10
10
|
const path = require("path");
|
|
11
|
+
const readline = require("readline");
|
|
11
12
|
const { execSync } = require("child_process");
|
|
12
13
|
|
|
14
|
+
const VERSION = require("../package.json").version;
|
|
13
15
|
const PLUGIN_ROOT = path.resolve(__dirname, "..");
|
|
14
16
|
const HOME = process.env.HOME || process.env.USERPROFILE;
|
|
15
17
|
|
|
16
|
-
|
|
17
|
-
const
|
|
18
|
-
const
|
|
19
|
-
const
|
|
20
|
-
const
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
18
|
+
// ANSI colors
|
|
19
|
+
const MAGENTA = "\x1b[35m";
|
|
20
|
+
const BRIGHT_MAGENTA = "\x1b[95m";
|
|
21
|
+
const GREEN = "\x1b[32m";
|
|
22
|
+
const YELLOW = "\x1b[33m";
|
|
23
|
+
const RED = "\x1b[31m";
|
|
24
|
+
const DIM = "\x1b[2m";
|
|
25
|
+
const BOLD = "\x1b[1m";
|
|
26
|
+
const RESET = "\x1b[0m";
|
|
27
|
+
|
|
28
|
+
const LOGO = `
|
|
29
|
+
${BRIGHT_MAGENTA} ██╗ ██╗ █████╗ ██████╗ ███╗ ██╗███████╗███████╗███████╗
|
|
30
|
+
██║ ██║██╔══██╗██╔══██╗████╗ ██║██╔════╝██╔════╝██╔════╝
|
|
31
|
+
███████║███████║██████╔╝██╔██╗ ██║█████╗ ███████╗███████╗
|
|
32
|
+
██╔══██║██╔══██║██╔══██╗██║╚██╗██║██╔══╝ ╚════██║╚════██║
|
|
33
|
+
██║ ██║██║ ██║██║ ██║██║ ╚████║███████╗███████║███████║
|
|
34
|
+
╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═══╝╚══════╝╚══════╝╚══════╝
|
|
35
|
+
${MAGENTA}${BOLD}███████╗██╗ ██╗ ██████╗ ██╗ ██╗ ██╗███████╗██████╗
|
|
36
|
+
██╔════╝██║ ██║██╔═══██╗██║ ██║ ██║██╔════╝██╔══██╗
|
|
37
|
+
█████╗ ██║ ██║██║ ██║██║ ██║ ██║█████╗ ██████╔╝
|
|
38
|
+
██╔══╝ ╚██╗ ██╔╝██║ ██║██║ ╚██╗ ██╔╝██╔══╝ ██╔══██╗
|
|
39
|
+
███████╗ ╚████╔╝ ╚██████╔╝███████╗╚████╔╝ ███████╗██║ ██║
|
|
40
|
+
╚══════╝ ╚═══╝ ╚═════╝ ╚══════╝ ╚═══╝ ╚══════╝╚═╝ ╚═╝${RESET}
|
|
41
|
+
`;
|
|
42
|
+
|
|
43
|
+
const RUNTIMES = [
|
|
44
|
+
{ name: "Claude Code", dir: ".claude", detected: () => fs.existsSync(path.join(HOME, ".claude")) },
|
|
45
|
+
{ name: "Cursor", dir: ".cursor", detected: () => fs.existsSync(path.join(HOME, ".cursor")) },
|
|
46
|
+
{ name: "Codex", dir: ".codex", detected: () => fs.existsSync(path.join(HOME, ".codex")) },
|
|
47
|
+
{ name: "Windsurf", dir: ".windsurf", detected: () => fs.existsSync(path.join(HOME, ".windsurf")) },
|
|
48
|
+
];
|
|
49
|
+
|
|
50
|
+
function ask(rl, question) {
|
|
51
|
+
return new Promise((resolve) => rl.question(question, resolve));
|
|
24
52
|
}
|
|
25
53
|
|
|
26
54
|
function copyDir(src, dest) {
|
|
@@ -50,76 +78,152 @@ function checkPython() {
|
|
|
50
78
|
}
|
|
51
79
|
}
|
|
52
80
|
|
|
53
|
-
function
|
|
54
|
-
|
|
81
|
+
function installForRuntime(runtimeDir, scope) {
|
|
82
|
+
const baseDir = scope === "local"
|
|
83
|
+
? path.join(process.cwd(), runtimeDir)
|
|
84
|
+
: path.join(HOME, runtimeDir);
|
|
55
85
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
process.exit(1);
|
|
59
|
-
}
|
|
60
|
-
log("\u2713 python3 found");
|
|
86
|
+
const commandsDir = path.join(baseDir, "commands", "harness-evolver");
|
|
87
|
+
const agentsDir = path.join(baseDir, "agents");
|
|
61
88
|
|
|
62
|
-
|
|
63
|
-
console.error(` ERROR: Claude Code directory not found at ${CLAUDE_DIR}`);
|
|
64
|
-
console.error(" Install Claude Code first: https://claude.ai/code");
|
|
65
|
-
process.exit(1);
|
|
66
|
-
}
|
|
67
|
-
log("\u2713 Claude Code detected");
|
|
68
|
-
|
|
69
|
-
// Copy skills
|
|
89
|
+
// Skills
|
|
70
90
|
const skillsSource = path.join(PLUGIN_ROOT, "skills");
|
|
71
91
|
if (fs.existsSync(skillsSource)) {
|
|
72
92
|
for (const skill of fs.readdirSync(skillsSource, { withFileTypes: true })) {
|
|
73
93
|
if (skill.isDirectory()) {
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
copyDir(src, dest);
|
|
77
|
-
log(` skill: ${skill.name}`);
|
|
94
|
+
copyDir(path.join(skillsSource, skill.name), path.join(commandsDir, skill.name));
|
|
95
|
+
console.log(` ${GREEN}✓${RESET} Installed skill: ${skill.name}`);
|
|
78
96
|
}
|
|
79
97
|
}
|
|
80
98
|
}
|
|
81
99
|
|
|
82
|
-
//
|
|
100
|
+
// Agents
|
|
83
101
|
const agentsSource = path.join(PLUGIN_ROOT, "agents");
|
|
84
102
|
if (fs.existsSync(agentsSource)) {
|
|
85
|
-
fs.mkdirSync(
|
|
103
|
+
fs.mkdirSync(agentsDir, { recursive: true });
|
|
86
104
|
for (const agent of fs.readdirSync(agentsSource)) {
|
|
87
|
-
copyFile(
|
|
88
|
-
|
|
89
|
-
path.join(AGENTS_DIR, agent)
|
|
90
|
-
);
|
|
91
|
-
log(` agent: ${agent}`);
|
|
105
|
+
copyFile(path.join(agentsSource, agent), path.join(agentsDir, agent));
|
|
106
|
+
console.log(` ${GREEN}✓${RESET} Installed agent: ${agent}`);
|
|
92
107
|
}
|
|
93
108
|
}
|
|
109
|
+
}
|
|
94
110
|
|
|
95
|
-
|
|
111
|
+
function installTools() {
|
|
112
|
+
const toolsDir = path.join(HOME, ".harness-evolver", "tools");
|
|
96
113
|
const toolsSource = path.join(PLUGIN_ROOT, "tools");
|
|
97
114
|
if (fs.existsSync(toolsSource)) {
|
|
98
|
-
fs.mkdirSync(
|
|
115
|
+
fs.mkdirSync(toolsDir, { recursive: true });
|
|
99
116
|
for (const tool of fs.readdirSync(toolsSource)) {
|
|
100
117
|
if (tool.endsWith(".py")) {
|
|
101
|
-
copyFile(
|
|
102
|
-
|
|
103
|
-
path.join(TOOLS_DIR, tool)
|
|
104
|
-
);
|
|
105
|
-
log(` tool: ${tool}`);
|
|
118
|
+
copyFile(path.join(toolsSource, tool), path.join(toolsDir, tool));
|
|
119
|
+
console.log(` ${GREEN}✓${RESET} Installed tool: ${tool}`);
|
|
106
120
|
}
|
|
107
121
|
}
|
|
108
122
|
}
|
|
123
|
+
}
|
|
109
124
|
|
|
110
|
-
|
|
125
|
+
function installExamples() {
|
|
126
|
+
const examplesDir = path.join(HOME, ".harness-evolver", "examples");
|
|
111
127
|
const examplesSource = path.join(PLUGIN_ROOT, "examples");
|
|
112
128
|
if (fs.existsSync(examplesSource)) {
|
|
113
|
-
copyDir(examplesSource,
|
|
114
|
-
log(
|
|
129
|
+
copyDir(examplesSource, examplesDir);
|
|
130
|
+
console.log(` ${GREEN}✓${RESET} Installed examples: classifier`);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
async function main() {
|
|
135
|
+
console.log(LOGO);
|
|
136
|
+
console.log(` ${DIM}Harness Evolver v${VERSION}${RESET}`);
|
|
137
|
+
console.log(` ${DIM}Meta-Harness-style autonomous harness optimization${RESET}`);
|
|
138
|
+
console.log();
|
|
139
|
+
|
|
140
|
+
// Check python
|
|
141
|
+
if (!checkPython()) {
|
|
142
|
+
console.error(` ${RED}ERROR:${RESET} python3 not found in PATH. Install Python 3.8+ first.`);
|
|
143
|
+
process.exit(1);
|
|
144
|
+
}
|
|
145
|
+
console.log(` ${GREEN}✓${RESET} python3 found`);
|
|
146
|
+
|
|
147
|
+
// Detect runtimes
|
|
148
|
+
const available = RUNTIMES.filter((r) => r.detected());
|
|
149
|
+
if (available.length === 0) {
|
|
150
|
+
console.error(`\n ${RED}ERROR:${RESET} No supported runtime detected.`);
|
|
151
|
+
console.error(` Install Claude Code, Cursor, Codex, or Windsurf first.`);
|
|
152
|
+
process.exit(1);
|
|
115
153
|
}
|
|
116
154
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
console.log(
|
|
121
|
-
|
|
122
|
-
|
|
155
|
+
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
156
|
+
|
|
157
|
+
// Runtime selection
|
|
158
|
+
console.log(`\n ${YELLOW}Which runtime(s) would you like to install for?${RESET}\n`);
|
|
159
|
+
available.forEach((r, i) => {
|
|
160
|
+
console.log(` ${i + 1}) ${r.name.padEnd(14)} (~/${r.dir})`);
|
|
161
|
+
});
|
|
162
|
+
if (available.length > 1) {
|
|
163
|
+
console.log(` ${available.length + 1}) All`);
|
|
164
|
+
console.log(`\n ${DIM}Select multiple: 1,2 or 1 2${RESET}`);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
const defaultChoice = "1";
|
|
168
|
+
const runtimeAnswer = await ask(rl, `\n ${YELLOW}Choice [${defaultChoice}]:${RESET} `);
|
|
169
|
+
const runtimeInput = (runtimeAnswer.trim() || defaultChoice);
|
|
170
|
+
|
|
171
|
+
let selectedRuntimes;
|
|
172
|
+
if (runtimeInput === String(available.length + 1)) {
|
|
173
|
+
selectedRuntimes = available;
|
|
174
|
+
} else {
|
|
175
|
+
const indices = runtimeInput.split(/[,\s]+/).map((s) => parseInt(s, 10) - 1);
|
|
176
|
+
selectedRuntimes = indices
|
|
177
|
+
.filter((i) => i >= 0 && i < available.length)
|
|
178
|
+
.map((i) => available[i]);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if (selectedRuntimes.length === 0) {
|
|
182
|
+
selectedRuntimes = [available[0]];
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Scope selection
|
|
186
|
+
console.log(`\n ${YELLOW}Where would you like to install?${RESET}\n`);
|
|
187
|
+
console.log(` 1) Global (~/${selectedRuntimes[0].dir}) - available in all projects`);
|
|
188
|
+
console.log(` 2) Local (./${selectedRuntimes[0].dir}) - this project only`);
|
|
189
|
+
|
|
190
|
+
const scopeAnswer = await ask(rl, `\n ${YELLOW}Choice [1]:${RESET} `);
|
|
191
|
+
const scope = (scopeAnswer.trim() === "2") ? "local" : "global";
|
|
192
|
+
|
|
193
|
+
console.log();
|
|
194
|
+
|
|
195
|
+
// Install for each selected runtime
|
|
196
|
+
for (const runtime of selectedRuntimes) {
|
|
197
|
+
const target = scope === "local" ? `./${runtime.dir}` : `~/${runtime.dir}`;
|
|
198
|
+
console.log(` Installing for ${BRIGHT_MAGENTA}${runtime.name}${RESET} to ${target}`);
|
|
199
|
+
console.log();
|
|
200
|
+
installForRuntime(runtime.dir, scope);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Tools and examples are always global
|
|
204
|
+
installTools();
|
|
205
|
+
installExamples();
|
|
206
|
+
|
|
207
|
+
// Write version file
|
|
208
|
+
const versionPath = path.join(HOME, ".harness-evolver", "VERSION");
|
|
209
|
+
fs.mkdirSync(path.dirname(versionPath), { recursive: true });
|
|
210
|
+
fs.writeFileSync(versionPath, VERSION);
|
|
211
|
+
console.log(` ${GREEN}✓${RESET} Wrote VERSION (${VERSION})`);
|
|
212
|
+
|
|
213
|
+
console.log(`\n ${GREEN}Done!${RESET} Open a project in Claude Code and run ${BRIGHT_MAGENTA}/harness-evolver:init${RESET}`);
|
|
214
|
+
console.log(`\n ${DIM}Quick start with example:${RESET}`);
|
|
215
|
+
console.log(` cp -r ~/.harness-evolver/examples/classifier ./my-project`);
|
|
216
|
+
console.log(` cd my-project && claude`);
|
|
217
|
+
console.log(` /harness-evolver:init`);
|
|
218
|
+
console.log(` /harness-evolver:evolve`);
|
|
219
|
+
|
|
220
|
+
console.log(`\n ${DIM}GitHub: https://github.com/raphaelchristi/harness-evolver${RESET}`);
|
|
221
|
+
console.log();
|
|
222
|
+
|
|
223
|
+
rl.close();
|
|
123
224
|
}
|
|
124
225
|
|
|
125
|
-
main()
|
|
226
|
+
main().catch((err) => {
|
|
227
|
+
console.error(` ${RED}ERROR:${RESET} ${err.message}`);
|
|
228
|
+
process.exit(1);
|
|
229
|
+
});
|
|
@@ -39,7 +39,7 @@ def classify_mock(text):
|
|
|
39
39
|
def classify_llm(text, config):
|
|
40
40
|
import urllib.request
|
|
41
41
|
|
|
42
|
-
api_key =
|
|
42
|
+
api_key = os.environ.get("ANTHROPIC_API_KEY", "")
|
|
43
43
|
model = config.get("model", "claude-haiku-4-5-20251001")
|
|
44
44
|
|
|
45
45
|
prompt = (
|
package/package.json
CHANGED
package/tools/init.py
CHANGED
|
@@ -89,6 +89,30 @@ def _auto_detect(search_dir):
|
|
|
89
89
|
return harness, eval_script, tasks, config
|
|
90
90
|
|
|
91
91
|
|
|
92
|
+
def _detect_api_keys():
|
|
93
|
+
"""Detect which LLM/service API keys are available in the environment."""
|
|
94
|
+
KNOWN_KEYS = {
|
|
95
|
+
"ANTHROPIC_API_KEY": "Anthropic (Claude)",
|
|
96
|
+
"OPENAI_API_KEY": "OpenAI (GPT)",
|
|
97
|
+
"GOOGLE_API_KEY": "Google (Gemini)",
|
|
98
|
+
"GEMINI_API_KEY": "Google Gemini",
|
|
99
|
+
"OPENROUTER_API_KEY": "OpenRouter",
|
|
100
|
+
"LANGSMITH_API_KEY": "LangSmith",
|
|
101
|
+
"TOGETHER_API_KEY": "Together AI",
|
|
102
|
+
"GROQ_API_KEY": "Groq",
|
|
103
|
+
"MISTRAL_API_KEY": "Mistral",
|
|
104
|
+
"COHERE_API_KEY": "Cohere",
|
|
105
|
+
"FIREWORKS_API_KEY": "Fireworks AI",
|
|
106
|
+
"DEEPSEEK_API_KEY": "DeepSeek",
|
|
107
|
+
"XAI_API_KEY": "xAI (Grok)",
|
|
108
|
+
}
|
|
109
|
+
detected = {}
|
|
110
|
+
for env_var, display_name in KNOWN_KEYS.items():
|
|
111
|
+
if os.environ.get(env_var):
|
|
112
|
+
detected[env_var] = {"name": display_name, "status": "detected"}
|
|
113
|
+
return detected
|
|
114
|
+
|
|
115
|
+
|
|
92
116
|
def _detect_langsmith():
|
|
93
117
|
"""Auto-detect LangSmith API key and return config section."""
|
|
94
118
|
if os.environ.get("LANGSMITH_API_KEY"):
|
|
@@ -252,9 +276,19 @@ def main():
|
|
|
252
276
|
"harnesses": "harnesses/",
|
|
253
277
|
},
|
|
254
278
|
}
|
|
279
|
+
# Detect API keys available in environment
|
|
280
|
+
api_keys = _detect_api_keys()
|
|
281
|
+
config["api_keys"] = api_keys
|
|
282
|
+
|
|
255
283
|
with open(os.path.join(base, "config.json"), "w") as f:
|
|
256
284
|
json.dump(config, f, indent=2)
|
|
257
285
|
|
|
286
|
+
if api_keys:
|
|
287
|
+
print("API keys detected:")
|
|
288
|
+
for env_var, info in api_keys.items():
|
|
289
|
+
print(f" {info['name']} ({env_var})")
|
|
290
|
+
print()
|
|
291
|
+
|
|
258
292
|
ls_config = config["eval"].get("langsmith", {})
|
|
259
293
|
if ls_config.get("enabled"):
|
|
260
294
|
print(" LangSmith tracing enabled (LANGSMITH_API_KEY detected)")
|