npm - harness-evolver - Versions diffs - 3.1.0 → 3.1.1 - Mend

harness-evolver 3.1.0 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "harness-evolver",
-  "version": "3.1.0",
+  "version": "3.1.1",
   "description": "LangSmith-native autonomous agent optimization for Claude Code",
   "author": "Raphael Valdetaro",
   "license": "MIT",

package/skills/evolve/SKILL.md CHANGED Viewed

@@ -75,13 +75,15 @@ python3 -c "import json; c=json.load(open('.evolver.json')); print(f'v{c[\"itera
 ### 1.5. Gather Trace Insights
-Run trace insights from the best experiment:
+Read the best experiment from config. If null (no baseline was run), skip trace insights for this iteration — proposers will work blind on the first pass:
 ```bash
-BEST=$(python3 -c "import json; print(json.load(open('.evolver.json'))['best_experiment'])")
-$EVOLVER_PY $TOOLS/trace_insights.py \
-    --from-experiment "$BEST" \
-    --output trace_insights.json 2>/dev/null
+BEST=$(python3 -c "import json; b=json.load(open('.evolver.json')).get('best_experiment'); print(b if b else '')")
+if [ -n "$BEST" ]; then
+    $EVOLVER_PY $TOOLS/trace_insights.py \
+        --from-experiment "$BEST" \
+        --output trace_insights.json 2>/dev/null
+fi
 ```
 If a production project is configured, also gather production insights:
@@ -99,17 +101,20 @@ fi
 ### 1.8. Analyze Per-Task Failures
-Read the best experiment results and cluster failures:
+If `$BEST` is set (not the first iteration without baseline), read results and cluster failures:
 ```bash
-$EVOLVER_PY $TOOLS/read_results.py \
-    --experiment "$BEST" \
-    --config .evolver.json \
-    --output best_results.json 2>/dev/null
+if [ -n "$BEST" ]; then
+    $EVOLVER_PY $TOOLS/read_results.py \
+        --experiment "$BEST" \
+        --config .evolver.json \
+        --output best_results.json 2>/dev/null
+fi
 ```
-Parse `best_results.json` to find failing examples (score < 0.7). Group by metadata or error pattern.
+If `best_results.json` exists, parse it to find failing examples (score < 0.7). Group by metadata or error pattern.
 Generate adaptive briefings for Candidates D and E (same logic as v2).
+If no best_results.json (first iteration without baseline), all proposers work from code analysis only — no failure data available.
 ### 2. Spawn 5 Proposers in Parallel

package/tools/setup.py CHANGED Viewed

@@ -87,6 +87,29 @@ def check_dependencies():
     return missing
+def resolve_dataset_name(client, base_name):
+    """Find an available dataset name by auto-incrementing the version suffix.
+    Tries base_name-eval-v1, v2, v3... until an unused name is found.
+    Returns (resolved_name, version_number).
+    """
+    existing = set()
+    try:
+        for ds in client.list_datasets():
+            existing.add(ds.name)
+    except Exception:
+        pass
+    for v in range(1, 100):
+        candidate = f"{base_name}-eval-v{v}"
+        if candidate not in existing:
+            return candidate, v
+    # Fallback: timestamp-based
+    ts = datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S")
+    return f"{base_name}-eval-{ts}", 0
 def create_dataset_from_file(client, dataset_name, file_path):
     """Create a LangSmith dataset from a JSON file of inputs."""
     with open(file_path) as f:
@@ -320,6 +343,7 @@ def main():
     parser.add_argument("--dataset-from-file", default=None, help="Create dataset from JSON file")
     parser.add_argument("--dataset-from-langsmith", default=None, help="Create dataset from LangSmith project")
     parser.add_argument("--production-project", default=None, help="Production LangSmith project")
+    parser.add_argument("--dataset-name", default=None, help="Explicit dataset name (skip auto-versioning)")
     parser.add_argument("--evaluators", default=None, help="Comma-separated evaluator names")
     parser.add_argument("--skip-baseline", action="store_true", help="Skip baseline evaluation")
     parser.add_argument("--output", default=".evolver.json", help="Output config path")
@@ -351,9 +375,19 @@ def main():
         sys.exit(1)
     project_name = f"evolver-{args.project_name}"
-    dataset_name = f"{args.project_name}-eval-v1"
     goals = [g.strip() for g in args.goals.split(",")]
+    # Resolve dataset name (explicit or auto-versioned)
+    if args.dataset_name:
+        dataset_name = args.dataset_name
+        print(f"Using explicit dataset name: '{dataset_name}'")
+    else:
+        dataset_name, version = resolve_dataset_name(client, args.project_name)
+        if version > 1:
+            print(f"Dataset name auto-versioned to '{dataset_name}' (v1-v{version-1} already exist)")
+        else:
+            print(f"Dataset: '{dataset_name}'")
     # Create dataset
     print(f"Creating dataset '{dataset_name}'...")
     if args.dataset_from_file: