harness-evolver 3.0.3 → 3.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/install.js +106 -37
- package/package.json +1 -1
- package/skills/evolve/SKILL.md +9 -6
- package/skills/setup/SKILL.md +6 -9
package/bin/install.js
CHANGED
|
@@ -190,71 +190,140 @@ function installTools() {
|
|
|
190
190
|
}
|
|
191
191
|
|
|
192
192
|
function installPythonDeps() {
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
193
|
+
const venvDir = path.join(HOME, ".evolver", "venv");
|
|
194
|
+
const venvPython = path.join(venvDir, "bin", "python");
|
|
195
|
+
const venvPip = path.join(venvDir, "bin", "pip");
|
|
196
|
+
|
|
197
|
+
console.log(`\n ${YELLOW}Setting up Python environment...${RESET}`);
|
|
198
|
+
|
|
199
|
+
// Create venv if it doesn't exist
|
|
200
|
+
if (!fs.existsSync(venvPython)) {
|
|
201
|
+
console.log(` Creating isolated venv at ~/.evolver/venv/`);
|
|
202
|
+
const venvCommands = [
|
|
203
|
+
`uv venv "${venvDir}"`,
|
|
204
|
+
`python3 -m venv "${venvDir}"`,
|
|
205
|
+
];
|
|
206
|
+
let created = false;
|
|
207
|
+
for (const cmd of venvCommands) {
|
|
208
|
+
try {
|
|
209
|
+
execSync(cmd, { stdio: "pipe", timeout: 30000 });
|
|
210
|
+
created = true;
|
|
211
|
+
break;
|
|
212
|
+
} catch {
|
|
213
|
+
continue;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
if (!created) {
|
|
217
|
+
console.log(` ${RED}Failed to create venv.${RESET}`);
|
|
218
|
+
console.log(` Run manually: ${BOLD}python3 -m venv ~/.evolver/venv${RESET}`);
|
|
219
|
+
return false;
|
|
220
|
+
}
|
|
221
|
+
console.log(` ${GREEN}✓${RESET} venv created`);
|
|
222
|
+
} else {
|
|
223
|
+
console.log(` ${GREEN}✓${RESET} venv exists at ~/.evolver/venv/`);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// Install/upgrade deps in the venv
|
|
227
|
+
const installCommands = [
|
|
228
|
+
`uv pip install --python "${venvPython}" langsmith openevals`,
|
|
229
|
+
`"${venvPip}" install --upgrade langsmith openevals`,
|
|
230
|
+
`"${venvPython}" -m pip install --upgrade langsmith openevals`,
|
|
201
231
|
];
|
|
202
232
|
|
|
203
|
-
for (const cmd of
|
|
233
|
+
for (const cmd of installCommands) {
|
|
204
234
|
try {
|
|
205
235
|
execSync(cmd, { stdio: "pipe", timeout: 120000 });
|
|
206
|
-
console.log(` ${GREEN}✓${RESET} langsmith + openevals installed`);
|
|
236
|
+
console.log(` ${GREEN}✓${RESET} langsmith + openevals installed in venv`);
|
|
207
237
|
return true;
|
|
208
238
|
} catch {
|
|
209
239
|
continue;
|
|
210
240
|
}
|
|
211
241
|
}
|
|
212
242
|
|
|
213
|
-
console.log(` ${YELLOW}!${RESET} Could not
|
|
214
|
-
console.log(` Run manually: ${BOLD}pip install langsmith openevals${RESET}`);
|
|
243
|
+
console.log(` ${YELLOW}!${RESET} Could not install packages in venv.`);
|
|
244
|
+
console.log(` Run manually: ${BOLD}~/.evolver/venv/bin/pip install langsmith openevals${RESET}`);
|
|
215
245
|
return false;
|
|
216
246
|
}
|
|
217
247
|
|
|
218
248
|
async function configureLangSmith(rl) {
|
|
219
|
-
console.log(`\n ${
|
|
249
|
+
console.log(`\n ${BOLD}${GREEN}LangSmith Configuration${RESET} ${DIM}(required)${RESET}\n`);
|
|
220
250
|
|
|
221
|
-
// Check if already configured
|
|
222
251
|
const langsmithCredsDir = process.platform === "darwin"
|
|
223
252
|
? path.join(HOME, "Library", "Application Support", "langsmith-cli")
|
|
224
253
|
: path.join(HOME, ".config", "langsmith-cli");
|
|
225
254
|
const langsmithCredsFile = path.join(langsmithCredsDir, "credentials");
|
|
255
|
+
const hasLangsmithCli = checkCommand("langsmith-cli --version");
|
|
256
|
+
|
|
257
|
+
// --- Step 1: API Key ---
|
|
258
|
+
let hasKey = false;
|
|
226
259
|
|
|
227
|
-
// Check env var
|
|
228
260
|
if (process.env.LANGSMITH_API_KEY) {
|
|
229
261
|
console.log(` ${GREEN}✓${RESET} LANGSMITH_API_KEY found in environment`);
|
|
230
|
-
|
|
262
|
+
hasKey = true;
|
|
263
|
+
} else if (fs.existsSync(langsmithCredsFile)) {
|
|
264
|
+
try {
|
|
265
|
+
const content = fs.readFileSync(langsmithCredsFile, "utf8");
|
|
266
|
+
if (content.includes("LANGSMITH_API_KEY=lsv2_")) {
|
|
267
|
+
console.log(` ${GREEN}✓${RESET} API key found in credentials file`);
|
|
268
|
+
hasKey = true;
|
|
269
|
+
}
|
|
270
|
+
} catch {}
|
|
231
271
|
}
|
|
232
272
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
console.log(` ${
|
|
236
|
-
|
|
273
|
+
if (!hasKey) {
|
|
274
|
+
console.log(` ${BOLD}LangSmith API Key${RESET} — get yours at ${DIM}https://smith.langchain.com/settings${RESET}`);
|
|
275
|
+
console.log(` ${DIM}LangSmith is required. The evolver won't work without it.${RESET}\n`);
|
|
276
|
+
|
|
277
|
+
// Keep asking until they provide a key or explicitly skip
|
|
278
|
+
let attempts = 0;
|
|
279
|
+
while (!hasKey && attempts < 3) {
|
|
280
|
+
const apiKey = await ask(rl, ` ${YELLOW}Paste your LangSmith API key (lsv2_pt_...):${RESET} `);
|
|
281
|
+
const key = apiKey.trim();
|
|
282
|
+
|
|
283
|
+
if (key && key.startsWith("lsv2_")) {
|
|
284
|
+
try {
|
|
285
|
+
fs.mkdirSync(langsmithCredsDir, { recursive: true });
|
|
286
|
+
fs.writeFileSync(langsmithCredsFile, `LANGSMITH_API_KEY=${key}\n`);
|
|
287
|
+
console.log(` ${GREEN}✓${RESET} API key saved`);
|
|
288
|
+
hasKey = true;
|
|
289
|
+
} catch {
|
|
290
|
+
console.log(` ${RED}Failed to save.${RESET} Add to your shell: export LANGSMITH_API_KEY=${key}`);
|
|
291
|
+
hasKey = true; // they have the key, just couldn't save
|
|
292
|
+
}
|
|
293
|
+
} else if (key) {
|
|
294
|
+
console.log(` ${YELLOW}Invalid — LangSmith keys start with lsv2_${RESET}`);
|
|
295
|
+
attempts++;
|
|
296
|
+
} else {
|
|
297
|
+
// Empty input — skip
|
|
298
|
+
console.log(`\n ${RED}WARNING:${RESET} No API key configured.`);
|
|
299
|
+
console.log(` ${BOLD}/evolver:setup will not work${RESET} until you set LANGSMITH_API_KEY.`);
|
|
300
|
+
console.log(` Run: ${DIM}export LANGSMITH_API_KEY=lsv2_pt_your_key${RESET}\n`);
|
|
301
|
+
break;
|
|
302
|
+
}
|
|
303
|
+
}
|
|
237
304
|
}
|
|
238
305
|
|
|
239
|
-
//
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
306
|
+
// --- Step 2: langsmith-cli ---
|
|
307
|
+
if (hasLangsmithCli) {
|
|
308
|
+
console.log(` ${GREEN}✓${RESET} langsmith-cli installed`);
|
|
309
|
+
} else {
|
|
310
|
+
console.log(`\n ${BOLD}langsmith-cli${RESET} — optional but useful for debugging traces`);
|
|
311
|
+
console.log(` ${DIM}Quick project listing, trace inspection, run stats from terminal.${RESET}`);
|
|
312
|
+
const lsCliAnswer = await ask(rl, `\n ${YELLOW}Install langsmith-cli? [Y/n]:${RESET} `);
|
|
313
|
+
if (lsCliAnswer.trim().toLowerCase() !== "n") {
|
|
314
|
+
console.log(`\n Installing langsmith-cli...`);
|
|
315
|
+
try {
|
|
316
|
+
execSync("uv tool install langsmith-cli 2>/dev/null || pip install langsmith-cli 2>/dev/null || pip3 install langsmith-cli", { stdio: "pipe", timeout: 60000 });
|
|
317
|
+
console.log(` ${GREEN}✓${RESET} langsmith-cli installed`);
|
|
244
318
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
319
|
+
// If we have a key, auto-authenticate
|
|
320
|
+
if (hasKey && fs.existsSync(langsmithCredsFile)) {
|
|
321
|
+
console.log(` ${GREEN}✓${RESET} langsmith-cli auto-authenticated (credentials file exists)`);
|
|
322
|
+
}
|
|
323
|
+
} catch {
|
|
324
|
+
console.log(` ${YELLOW}!${RESET} Could not install. Try manually: ${DIM}uv tool install langsmith-cli${RESET}`);
|
|
325
|
+
}
|
|
252
326
|
}
|
|
253
|
-
} else if (key) {
|
|
254
|
-
console.log(` ${YELLOW}Doesn't look like a LangSmith key (should start with lsv2_).${RESET}`);
|
|
255
|
-
console.log(` Add to your shell: ${BOLD}export LANGSMITH_API_KEY=your_key${RESET}`);
|
|
256
|
-
} else {
|
|
257
|
-
console.log(` ${YELLOW}Skipped.${RESET} You must set LANGSMITH_API_KEY before using /evolver:setup`);
|
|
258
327
|
}
|
|
259
328
|
}
|
|
260
329
|
|
package/package.json
CHANGED
package/skills/evolve/SKILL.md
CHANGED
|
@@ -13,12 +13,15 @@ Run the autonomous propose-evaluate-iterate loop using LangSmith as the evaluati
|
|
|
13
13
|
|
|
14
14
|
`.evolver.json` must exist. If not, tell user to run `evolver:setup`.
|
|
15
15
|
|
|
16
|
-
## Resolve Tool Path
|
|
16
|
+
## Resolve Tool Path and Python
|
|
17
17
|
|
|
18
18
|
```bash
|
|
19
19
|
TOOLS=$([ -d ".evolver/tools" ] && echo ".evolver/tools" || echo "$HOME/.evolver/tools")
|
|
20
|
+
EVOLVER_PY=$([ -f "$HOME/.evolver/venv/bin/python" ] && echo "$HOME/.evolver/venv/bin/python" || echo "python3")
|
|
20
21
|
```
|
|
21
22
|
|
|
23
|
+
Use `$EVOLVER_PY` instead of `python3` for ALL tool invocations.
|
|
24
|
+
|
|
22
25
|
## Parse Arguments
|
|
23
26
|
|
|
24
27
|
- `--iterations N` (default: from interactive question or 5)
|
|
@@ -76,7 +79,7 @@ Run trace insights from the best experiment:
|
|
|
76
79
|
|
|
77
80
|
```bash
|
|
78
81
|
BEST=$(python3 -c "import json; print(json.load(open('.evolver.json'))['best_experiment'])")
|
|
79
|
-
|
|
82
|
+
$EVOLVER_PY $TOOLS/trace_insights.py \
|
|
80
83
|
--from-experiment "$BEST" \
|
|
81
84
|
--output trace_insights.json 2>/dev/null
|
|
82
85
|
```
|
|
@@ -86,7 +89,7 @@ If a production project is configured, also gather production insights:
|
|
|
86
89
|
```bash
|
|
87
90
|
PROD=$(python3 -c "import json; c=json.load(open('.evolver.json')); print(c.get('production_project',''))")
|
|
88
91
|
if [ -n "$PROD" ] && [ ! -f "production_seed.json" ]; then
|
|
89
|
-
|
|
92
|
+
$EVOLVER_PY $TOOLS/seed_from_traces.py \
|
|
90
93
|
--project "$PROD" --use-sdk \
|
|
91
94
|
--output-md production_seed.md \
|
|
92
95
|
--output-json production_seed.json \
|
|
@@ -99,7 +102,7 @@ fi
|
|
|
99
102
|
Read the best experiment results and cluster failures:
|
|
100
103
|
|
|
101
104
|
```bash
|
|
102
|
-
|
|
105
|
+
$EVOLVER_PY $TOOLS/read_results.py \
|
|
103
106
|
--experiment "$BEST" \
|
|
104
107
|
--config .evolver.json \
|
|
105
108
|
--output best_results.json 2>/dev/null
|
|
@@ -174,7 +177,7 @@ Wait for all 5 to complete.
|
|
|
174
177
|
For each worktree that has changes (proposer committed something):
|
|
175
178
|
|
|
176
179
|
```bash
|
|
177
|
-
|
|
180
|
+
$EVOLVER_PY $TOOLS/run_eval.py \
|
|
178
181
|
--config .evolver.json \
|
|
179
182
|
--worktree-path {worktree_path} \
|
|
180
183
|
--experiment-prefix v{NNN}{suffix} \
|
|
@@ -186,7 +189,7 @@ Each candidate becomes a separate LangSmith experiment.
|
|
|
186
189
|
### 4. Compare All Candidates
|
|
187
190
|
|
|
188
191
|
```bash
|
|
189
|
-
|
|
192
|
+
$EVOLVER_PY $TOOLS/read_results.py \
|
|
190
193
|
--experiments "v{NNN}a,v{NNN}b,v{NNN}c,v{NNN}d,v{NNN}e" \
|
|
191
194
|
--config .evolver.json \
|
|
192
195
|
--output comparison.json
|
package/skills/setup/SKILL.md
CHANGED
|
@@ -35,23 +35,20 @@ If `MISSING`: "Set your LangSmith API key: `export LANGSMITH_API_KEY=lsv2_pt_...
|
|
|
35
35
|
|
|
36
36
|
The tools auto-load the key from the credentials file, but the env var takes precedence.
|
|
37
37
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
```bash
|
|
41
|
-
pip install langsmith openevals 2>/dev/null || uv pip install langsmith openevals
|
|
42
|
-
```
|
|
43
|
-
|
|
44
|
-
## Resolve Tool Path
|
|
38
|
+
## Resolve Tool Path and Python
|
|
45
39
|
|
|
46
40
|
```bash
|
|
47
41
|
TOOLS=$([ -d ".evolver/tools" ] && echo ".evolver/tools" || echo "$HOME/.evolver/tools")
|
|
42
|
+
EVOLVER_PY=$([ -f "$HOME/.evolver/venv/bin/python" ] && echo "$HOME/.evolver/venv/bin/python" || echo "python3")
|
|
48
43
|
```
|
|
49
44
|
|
|
45
|
+
Use `$EVOLVER_PY` instead of `python3` for ALL tool invocations. This ensures the venv with langsmith+openevals is used.
|
|
46
|
+
|
|
50
47
|
## Phase 1: Explore Project (automatic)
|
|
51
48
|
|
|
52
49
|
```bash
|
|
53
50
|
find . -maxdepth 3 -type f -name "*.py" | head -30
|
|
54
|
-
|
|
51
|
+
$EVOLVER_PY $TOOLS/detect_stack.py .
|
|
55
52
|
```
|
|
56
53
|
|
|
57
54
|
Look for:
|
|
@@ -145,7 +142,7 @@ If "I have test data": ask for file path.
|
|
|
145
142
|
Build the setup.py command based on all gathered information:
|
|
146
143
|
|
|
147
144
|
```bash
|
|
148
|
-
|
|
145
|
+
$EVOLVER_PY $TOOLS/setup.py \
|
|
149
146
|
--project-name "{project_name}" \
|
|
150
147
|
--entry-point "{run_command}" \
|
|
151
148
|
--framework "{framework}" \
|