harness-evolver 4.2.4 → 4.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/bin/install.js +71 -12
- package/package.json +1 -1
- package/skills/evolve/SKILL.md +2 -0
- package/skills/setup/SKILL.md +2 -0
- package/tools/setup.py +116 -88
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "harness-evolver",
|
|
3
3
|
"description": "LangSmith-native autonomous agent optimization — evolves LLM agent code using multi-agent proposers, LangSmith experiments, and git worktrees",
|
|
4
|
-
"version": "4.2.
|
|
4
|
+
"version": "4.2.6",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Raphael Valdetaro"
|
|
7
7
|
},
|
package/bin/install.js
CHANGED
|
@@ -374,7 +374,7 @@ function installPythonDeps() {
|
|
|
374
374
|
return false;
|
|
375
375
|
}
|
|
376
376
|
|
|
377
|
-
async function configureLangSmith(rl) {
|
|
377
|
+
async function configureLangSmith(rl, nonInteractive) {
|
|
378
378
|
const langsmithCredsDir = process.platform === "darwin"
|
|
379
379
|
? path.join(HOME, "Library", "Application Support", "langsmith-cli")
|
|
380
380
|
: path.join(HOME, ".config", "langsmith-cli");
|
|
@@ -393,13 +393,28 @@ async function configureLangSmith(rl) {
|
|
|
393
393
|
try {
|
|
394
394
|
const content = fs.readFileSync(langsmithCredsFile, "utf8");
|
|
395
395
|
if (content.includes("LANGSMITH_API_KEY=lsv2_")) {
|
|
396
|
-
|
|
397
|
-
|
|
396
|
+
// Validate existing key with a real request
|
|
397
|
+
const existingKey = content.match(/LANGSMITH_API_KEY=(lsv2_[^\s\n]+)/)?.[1];
|
|
398
|
+
if (existingKey) {
|
|
399
|
+
try {
|
|
400
|
+
execSync(`curl -sf -o /dev/null -w "%{http_code}" -H "x-api-key: ${existingKey}" https://api.smith.langchain.com/info`, { stdio: "pipe", timeout: 10000 });
|
|
401
|
+
stepDone("API key found and validated");
|
|
402
|
+
hasKey = true;
|
|
403
|
+
} catch {
|
|
404
|
+
barLine(c.yellow("API key found but could not be validated — LangSmith may be unreachable"));
|
|
405
|
+
barLine(c.dim("Will ask for a new key just in case."));
|
|
406
|
+
}
|
|
407
|
+
}
|
|
398
408
|
}
|
|
399
409
|
} catch {}
|
|
400
410
|
}
|
|
401
411
|
|
|
402
412
|
if (!hasKey) {
|
|
413
|
+
if (nonInteractive) {
|
|
414
|
+
stepError("No API key found — set LANGSMITH_API_KEY in environment and re-run");
|
|
415
|
+
barLine(c.dim("Run: export LANGSMITH_API_KEY=lsv2_pt_your_key"));
|
|
416
|
+
return;
|
|
417
|
+
}
|
|
403
418
|
barLine(c.dim("Get yours at https://smith.langchain.com/settings"));
|
|
404
419
|
barLine(c.dim("LangSmith is required. The evolver won't work without it."));
|
|
405
420
|
barEmpty();
|
|
@@ -410,6 +425,13 @@ async function configureLangSmith(rl) {
|
|
|
410
425
|
const key = apiKey.trim();
|
|
411
426
|
|
|
412
427
|
if (key && key.startsWith("lsv2_")) {
|
|
428
|
+
// Validate key with a real request before saving
|
|
429
|
+
try {
|
|
430
|
+
execSync(`curl -sf -o /dev/null -w "%{http_code}" -H "x-api-key: ${key}" https://api.smith.langchain.com/info`, { stdio: "pipe", timeout: 10000 });
|
|
431
|
+
} catch {
|
|
432
|
+
barLine(c.yellow("Key could not be validated — LangSmith may be unreachable"));
|
|
433
|
+
barLine(c.dim("Saving anyway. If it doesn't work, re-run the installer."));
|
|
434
|
+
}
|
|
413
435
|
try {
|
|
414
436
|
fs.mkdirSync(langsmithCredsDir, { recursive: true });
|
|
415
437
|
fs.writeFileSync(langsmithCredsFile, `LANGSMITH_API_KEY=${key}\n`);
|
|
@@ -454,7 +476,7 @@ async function configureLangSmith(rl) {
|
|
|
454
476
|
}
|
|
455
477
|
}
|
|
456
478
|
|
|
457
|
-
async function configureOptionalIntegrations(rl) {
|
|
479
|
+
async function configureOptionalIntegrations(rl, nonInteractive) {
|
|
458
480
|
barEmpty();
|
|
459
481
|
step(c.bold("Optional Integrations"));
|
|
460
482
|
barEmpty();
|
|
@@ -474,7 +496,7 @@ async function configureOptionalIntegrations(rl) {
|
|
|
474
496
|
|
|
475
497
|
if (hasContext7) {
|
|
476
498
|
stepDone("Context7 MCP already configured");
|
|
477
|
-
} else {
|
|
499
|
+
} else if (!nonInteractive) {
|
|
478
500
|
barLine(c.bold("Context7 MCP") + " \u2014 " + c.dim("up-to-date library documentation"));
|
|
479
501
|
const c7Answer = await ask(rl, `${c.cyan(S.stepActive)} Install Context7 MCP? [y/N]: `);
|
|
480
502
|
if (c7Answer.trim().toLowerCase() === "y") {
|
|
@@ -506,7 +528,7 @@ async function configureOptionalIntegrations(rl) {
|
|
|
506
528
|
|
|
507
529
|
if (hasLcDocs) {
|
|
508
530
|
stepDone("LangChain Docs MCP already configured");
|
|
509
|
-
} else {
|
|
531
|
+
} else if (!nonInteractive) {
|
|
510
532
|
barLine(c.bold("LangChain Docs MCP") + " \u2014 " + c.dim("LangChain/LangGraph/LangSmith docs"));
|
|
511
533
|
const lcAnswer = await ask(rl, `${c.cyan(S.stepActive)} Install LangChain Docs MCP? [y/N]: `);
|
|
512
534
|
if (lcAnswer.trim().toLowerCase() === "y") {
|
|
@@ -525,6 +547,8 @@ async function configureOptionalIntegrations(rl) {
|
|
|
525
547
|
// ─── Main ───────────────────────────────────────────────────────────────────
|
|
526
548
|
|
|
527
549
|
async function main() {
|
|
550
|
+
const nonInteractive = process.argv.includes("--yes") || process.argv.includes("-y");
|
|
551
|
+
|
|
528
552
|
banner();
|
|
529
553
|
|
|
530
554
|
header("harness-evolver");
|
|
@@ -540,6 +564,21 @@ async function main() {
|
|
|
540
564
|
}
|
|
541
565
|
} catch {}
|
|
542
566
|
|
|
567
|
+
// Check installed version
|
|
568
|
+
const versionPath = path.join(HOME, ".evolver", "VERSION");
|
|
569
|
+
let installedVersion = null;
|
|
570
|
+
if (fs.existsSync(versionPath)) {
|
|
571
|
+
installedVersion = fs.readFileSync(versionPath, "utf8").trim();
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
if (installedVersion && installedVersion !== VERSION) {
|
|
575
|
+
step(`Upgrading ${c.dim(installedVersion)} → ${c.cyan(VERSION)}`);
|
|
576
|
+
} else if (installedVersion === VERSION) {
|
|
577
|
+
step(`Reinstalling ${c.cyan(VERSION)}`);
|
|
578
|
+
} else {
|
|
579
|
+
step(`Fresh install ${c.cyan(VERSION)}`);
|
|
580
|
+
}
|
|
581
|
+
|
|
543
582
|
barEmpty();
|
|
544
583
|
|
|
545
584
|
// Python check
|
|
@@ -567,6 +606,11 @@ async function main() {
|
|
|
567
606
|
|
|
568
607
|
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
569
608
|
|
|
609
|
+
function askOrDefault(question, defaultValue) {
|
|
610
|
+
if (nonInteractive) return Promise.resolve(defaultValue);
|
|
611
|
+
return ask(rl, question);
|
|
612
|
+
}
|
|
613
|
+
|
|
570
614
|
// Runtime selection
|
|
571
615
|
barEmpty();
|
|
572
616
|
stepPrompt("Which runtime(s) to install for?");
|
|
@@ -577,7 +621,7 @@ async function main() {
|
|
|
577
621
|
barLine(c.dim("Select multiple: 1,2 or 1 2"));
|
|
578
622
|
}
|
|
579
623
|
|
|
580
|
-
const runtimeAnswer = await
|
|
624
|
+
const runtimeAnswer = await askOrDefault(`${c.cyan(S.stepActive)} Choice [1]: `, "1");
|
|
581
625
|
const runtimeInput = (runtimeAnswer.trim() || "1");
|
|
582
626
|
|
|
583
627
|
let selected;
|
|
@@ -598,7 +642,7 @@ async function main() {
|
|
|
598
642
|
barLine(` ${c.bold("1")} Global ${c.dim(`(~/${selected[0].dir})`)}`);
|
|
599
643
|
barLine(` ${c.bold("2")} Local ${c.dim(`(./${selected[0].dir})`)}`);
|
|
600
644
|
|
|
601
|
-
const scopeAnswer = await
|
|
645
|
+
const scopeAnswer = await askOrDefault(`${c.cyan(S.stepActive)} Choice [1]: `, "1");
|
|
602
646
|
const scope = (scopeAnswer.trim() === "2") ? "local" : "global";
|
|
603
647
|
|
|
604
648
|
stepDone(`Scope: ${c.cyan(scope)}`);
|
|
@@ -632,8 +676,20 @@ async function main() {
|
|
|
632
676
|
const toolCount = installTools();
|
|
633
677
|
stepDone(`${toolCount} tools installed to ~/.evolver/tools/`);
|
|
634
678
|
|
|
635
|
-
//
|
|
636
|
-
|
|
679
|
+
// Suggest .worktreeinclude for worktree support
|
|
680
|
+
barEmpty();
|
|
681
|
+
const cwdGit = fs.existsSync(path.join(process.cwd(), ".git"));
|
|
682
|
+
const cwdWorktreeInclude = fs.existsSync(path.join(process.cwd(), ".worktreeinclude"));
|
|
683
|
+
if (cwdGit && !cwdWorktreeInclude) {
|
|
684
|
+
step("Worktree support");
|
|
685
|
+
barLine(c.dim("For /evolver:evolve to work, .evolver.json needs to be in worktrees."));
|
|
686
|
+
barLine(c.dim("Create .worktreeinclude in your project root with:"));
|
|
687
|
+
barLine(c.dim(" .evolver.json"));
|
|
688
|
+
barLine(c.dim(" .env"));
|
|
689
|
+
stepDone("Tip shown");
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
// Version marker (versionPath declared earlier for upgrade check)
|
|
637
693
|
fs.mkdirSync(path.dirname(versionPath), { recursive: true });
|
|
638
694
|
fs.writeFileSync(versionPath, VERSION);
|
|
639
695
|
|
|
@@ -642,10 +698,10 @@ async function main() {
|
|
|
642
698
|
installPythonDeps();
|
|
643
699
|
|
|
644
700
|
// Configure LangSmith
|
|
645
|
-
await configureLangSmith(rl);
|
|
701
|
+
await configureLangSmith(rl, nonInteractive);
|
|
646
702
|
|
|
647
703
|
// Optional integrations
|
|
648
|
-
await configureOptionalIntegrations(rl);
|
|
704
|
+
await configureOptionalIntegrations(rl, nonInteractive);
|
|
649
705
|
|
|
650
706
|
// Done
|
|
651
707
|
barEmpty();
|
|
@@ -657,6 +713,9 @@ async function main() {
|
|
|
657
713
|
barLine(` ${c.cyan("/evolver:status")} \u2014 check progress`);
|
|
658
714
|
barLine(` ${c.cyan("/evolver:deploy")} \u2014 finalize and push`);
|
|
659
715
|
barEmpty();
|
|
716
|
+
barLine(c.dim("Plugin marketplace (auto-updates):"));
|
|
717
|
+
barLine(` ${c.cyan("/plugin install harness-evolver")} ${c.dim("— from Claude Code marketplace")}`);
|
|
718
|
+
barEmpty();
|
|
660
719
|
barLine(c.dim("GitHub: https://github.com/raphaelchristi/harness-evolver"));
|
|
661
720
|
footer();
|
|
662
721
|
|
package/package.json
CHANGED
package/skills/evolve/SKILL.md
CHANGED
|
@@ -23,6 +23,8 @@ EVOLVER_PY="${EVOLVER_PY:-$([ -f "$HOME/.evolver/venv/bin/python" ] && echo "$HO
|
|
|
23
23
|
|
|
24
24
|
Use `$EVOLVER_PY` instead of `python3` for ALL tool invocations.
|
|
25
25
|
|
|
26
|
+
**IMPORTANT: Never pass `LANGSMITH_API_KEY` inline in Bash commands.** The key is loaded automatically by the SessionStart hook and by each tool's `ensure_langsmith_api_key()`. Passing it inline exposes it in the output.
|
|
27
|
+
|
|
26
28
|
## Parse Arguments
|
|
27
29
|
|
|
28
30
|
- `--iterations N` (default: from interactive question or 5)
|
package/skills/setup/SKILL.md
CHANGED
|
@@ -45,6 +45,8 @@ EVOLVER_PY="${EVOLVER_PY:-$([ -f "$HOME/.evolver/venv/bin/python" ] && echo "$HO
|
|
|
45
45
|
|
|
46
46
|
Use `$EVOLVER_PY` instead of `python3` for ALL tool invocations. This ensures the venv with langsmith is used.
|
|
47
47
|
|
|
48
|
+
**IMPORTANT: Never pass `LANGSMITH_API_KEY` inline in Bash commands.** The key is loaded automatically by the SessionStart hook (from credentials file or environment) and by each Python tool's `ensure_langsmith_api_key()`. Passing it inline exposes it in the output. If the key is missing, tell the user to run `export LANGSMITH_API_KEY=lsv2_pt_...` instead.
|
|
49
|
+
|
|
48
50
|
## Phase 1: Explore Project (automatic)
|
|
49
51
|
|
|
50
52
|
```bash
|
package/tools/setup.py
CHANGED
|
@@ -462,101 +462,129 @@ def main():
|
|
|
462
462
|
else:
|
|
463
463
|
print(f"Dataset: '{dataset_name}'")
|
|
464
464
|
|
|
465
|
-
# Create dataset
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
dataset
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
465
|
+
# Create dataset — wrapped in try/except to clean up orphaned datasets on failure
|
|
466
|
+
dataset = None
|
|
467
|
+
try:
|
|
468
|
+
print(f"Creating dataset '{dataset_name}'...")
|
|
469
|
+
if args.dataset_from_file:
|
|
470
|
+
dataset, count = create_dataset_from_file(client, dataset_name, args.dataset_from_file)
|
|
471
|
+
print(f" Created from file: {count} examples")
|
|
472
|
+
elif args.dataset_from_langsmith:
|
|
473
|
+
dataset, count = create_dataset_from_langsmith(
|
|
474
|
+
client, dataset_name, args.dataset_from_langsmith,
|
|
475
|
+
)
|
|
476
|
+
if not dataset:
|
|
477
|
+
print(" No traces found in source project. Creating empty dataset.")
|
|
478
|
+
dataset = create_empty_dataset(client, dataset_name)
|
|
479
|
+
count = 0
|
|
480
|
+
else:
|
|
481
|
+
print(f" Created from LangSmith traces: {count} examples")
|
|
482
|
+
else:
|
|
476
483
|
dataset = create_empty_dataset(client, dataset_name)
|
|
477
484
|
count = 0
|
|
485
|
+
print(" Created empty dataset (testgen will populate)")
|
|
486
|
+
|
|
487
|
+
# Configure evaluators
|
|
488
|
+
print(f"Configuring evaluators for goals: {goals}")
|
|
489
|
+
evaluators, evaluator_keys = get_evaluators(goals, args.evaluators)
|
|
490
|
+
print(f" Active evaluators: {evaluator_keys}")
|
|
491
|
+
llm_evaluators = [k for k in evaluator_keys if k in ("correctness", "conciseness")]
|
|
492
|
+
if llm_evaluators:
|
|
493
|
+
print(f" LLM evaluators (agent-based): {llm_evaluators}")
|
|
494
|
+
|
|
495
|
+
# Run baseline (code-based evaluators only; LLM scoring done by evaluator agent)
|
|
496
|
+
baseline_experiment = None
|
|
497
|
+
baseline_score = 0.0
|
|
498
|
+
if not args.skip_baseline and count > 0:
|
|
499
|
+
print(f"Running baseline target ({count} examples)...")
|
|
500
|
+
try:
|
|
501
|
+
baseline_experiment, baseline_score = run_baseline(
|
|
502
|
+
client, dataset_name, args.entry_point, evaluators,
|
|
503
|
+
)
|
|
504
|
+
print(f" Baseline has_output score: {baseline_score:.3f}")
|
|
505
|
+
print(f" Experiment: {baseline_experiment}")
|
|
506
|
+
if llm_evaluators:
|
|
507
|
+
print(f" Note: LLM scoring pending — evaluator agent will run during /evolver:evolve")
|
|
508
|
+
except Exception as e:
|
|
509
|
+
print(f" Baseline evaluation failed: {e}", file=sys.stderr)
|
|
510
|
+
print(" Continuing with score 0.0")
|
|
511
|
+
elif count == 0:
|
|
512
|
+
print("Skipping baseline (no examples in dataset yet)")
|
|
478
513
|
else:
|
|
479
|
-
print(
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
print(f"Running baseline target ({count} examples)...")
|
|
514
|
+
print("Skipping baseline (--skip-baseline)")
|
|
515
|
+
|
|
516
|
+
# Resolve Python interpreter in entry_point to absolute path
|
|
517
|
+
# This ensures the entry point works in worktrees where venvs don't exist
|
|
518
|
+
entry_point = args.entry_point
|
|
519
|
+
parts = entry_point.split()
|
|
520
|
+
if parts:
|
|
521
|
+
python_path = parts[0]
|
|
522
|
+
# Resolve relative Python paths (e.g., ../.venv/bin/python, .venv/bin/python)
|
|
523
|
+
if "/" in python_path and not os.path.isabs(python_path):
|
|
524
|
+
abs_python = os.path.abspath(python_path)
|
|
525
|
+
if os.path.exists(abs_python):
|
|
526
|
+
parts[0] = abs_python
|
|
527
|
+
entry_point = " ".join(parts)
|
|
528
|
+
print(f" Resolved Python path: {abs_python}")
|
|
529
|
+
|
|
530
|
+
# Compute project_dir relative to git root (for worktree path resolution)
|
|
531
|
+
project_dir = ""
|
|
498
532
|
try:
|
|
499
|
-
|
|
500
|
-
|
|
533
|
+
git_prefix = subprocess.run(
|
|
534
|
+
["git", "rev-parse", "--show-prefix"],
|
|
535
|
+
capture_output=True, text=True, timeout=5,
|
|
501
536
|
)
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
except Exception as e:
|
|
507
|
-
print(f" Baseline evaluation failed: {e}", file=sys.stderr)
|
|
508
|
-
print(" Continuing with score 0.0")
|
|
509
|
-
elif count == 0:
|
|
510
|
-
print("Skipping baseline (no examples in dataset yet)")
|
|
511
|
-
else:
|
|
512
|
-
print("Skipping baseline (--skip-baseline)")
|
|
513
|
-
|
|
514
|
-
# Compute project_dir relative to git root (for worktree path resolution)
|
|
515
|
-
project_dir = ""
|
|
516
|
-
try:
|
|
517
|
-
git_prefix = subprocess.run(
|
|
518
|
-
["git", "rev-parse", "--show-prefix"],
|
|
519
|
-
capture_output=True, text=True, timeout=5,
|
|
520
|
-
)
|
|
521
|
-
if git_prefix.returncode == 0:
|
|
522
|
-
project_dir = git_prefix.stdout.strip().rstrip("/")
|
|
523
|
-
except Exception:
|
|
524
|
-
pass
|
|
525
|
-
|
|
526
|
-
# Write config
|
|
527
|
-
config = {
|
|
528
|
-
"version": "3.0.0",
|
|
529
|
-
"project": project_name,
|
|
530
|
-
"dataset": dataset_name,
|
|
531
|
-
"dataset_id": str(dataset.id) if dataset else None,
|
|
532
|
-
"project_dir": project_dir,
|
|
533
|
-
"entry_point": args.entry_point,
|
|
534
|
-
"evaluators": evaluator_keys,
|
|
535
|
-
"optimization_goals": goals,
|
|
536
|
-
"production_project": args.production_project,
|
|
537
|
-
"baseline_experiment": baseline_experiment,
|
|
538
|
-
"best_experiment": baseline_experiment,
|
|
539
|
-
"best_score": baseline_score,
|
|
540
|
-
"iterations": 0,
|
|
541
|
-
"framework": args.framework,
|
|
542
|
-
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
543
|
-
"history": [{
|
|
544
|
-
"version": "baseline",
|
|
545
|
-
"experiment": baseline_experiment,
|
|
546
|
-
"score": baseline_score,
|
|
547
|
-
}] if baseline_experiment else [],
|
|
548
|
-
}
|
|
537
|
+
if git_prefix.returncode == 0:
|
|
538
|
+
project_dir = git_prefix.stdout.strip().rstrip("/")
|
|
539
|
+
except Exception:
|
|
540
|
+
pass
|
|
549
541
|
|
|
550
|
-
|
|
551
|
-
|
|
542
|
+
# Write config
|
|
543
|
+
config = {
|
|
544
|
+
"version": "3.0.0",
|
|
545
|
+
"project": project_name,
|
|
546
|
+
"dataset": dataset_name,
|
|
547
|
+
"dataset_id": str(dataset.id) if dataset else None,
|
|
548
|
+
"project_dir": project_dir,
|
|
549
|
+
"entry_point": entry_point,
|
|
550
|
+
"evaluators": evaluator_keys,
|
|
551
|
+
"optimization_goals": goals,
|
|
552
|
+
"production_project": args.production_project,
|
|
553
|
+
"baseline_experiment": baseline_experiment,
|
|
554
|
+
"best_experiment": baseline_experiment,
|
|
555
|
+
"best_score": baseline_score,
|
|
556
|
+
"iterations": 0,
|
|
557
|
+
"framework": args.framework,
|
|
558
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
559
|
+
"history": [{
|
|
560
|
+
"version": "baseline",
|
|
561
|
+
"experiment": baseline_experiment,
|
|
562
|
+
"score": baseline_score,
|
|
563
|
+
}] if baseline_experiment else [],
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
with open(args.output, "w") as f:
|
|
567
|
+
json.dump(config, f, indent=2)
|
|
568
|
+
|
|
569
|
+
print(f"\nSetup complete. Config saved to {args.output}")
|
|
570
|
+
print(f" Project: {project_name}")
|
|
571
|
+
print(f" Dataset: {dataset_name} ({count} examples)")
|
|
572
|
+
print(f" Evaluators: {evaluator_keys}")
|
|
573
|
+
if baseline_experiment:
|
|
574
|
+
print(f" Baseline: {baseline_score:.3f}")
|
|
575
|
+
print(f"\nNext: run /evolver:evolve")
|
|
552
576
|
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
577
|
+
except Exception as e:
|
|
578
|
+
# Cleanup orphaned dataset if setup fails after dataset creation
|
|
579
|
+
if dataset:
|
|
580
|
+
print(f"Setup failed: {e}", file=sys.stderr)
|
|
581
|
+
print(f"Cleaning up orphaned dataset '{dataset_name}'...", file=sys.stderr)
|
|
582
|
+
try:
|
|
583
|
+
client.delete_dataset(dataset_id=dataset.id)
|
|
584
|
+
print(" Dataset deleted.", file=sys.stderr)
|
|
585
|
+
except Exception:
|
|
586
|
+
print(f" WARNING: Could not delete dataset. Clean up manually in LangSmith.", file=sys.stderr)
|
|
587
|
+
raise
|
|
560
588
|
|
|
561
589
|
|
|
562
590
|
if __name__ == "__main__":
|