harness-evolver 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: harness-evolve-init
|
|
3
|
-
description: "Initialize harness evolution in the current project.
|
|
4
|
-
argument-hint: "--harness <path> --eval <path> --tasks <path>"
|
|
3
|
+
description: "Initialize harness evolution in the current project. Auto-detects harness.py, eval.py, and tasks/ in the working directory."
|
|
4
|
+
argument-hint: "[directory] [--harness <path>] [--eval <path>] [--tasks <path>]"
|
|
5
5
|
allowed-tools: [Read, Write, Bash, Glob]
|
|
6
6
|
---
|
|
7
7
|
|
|
@@ -9,45 +9,42 @@ allowed-tools: [Read, Write, Bash, Glob]
|
|
|
9
9
|
|
|
10
10
|
Initialize the Harness Evolver for this project.
|
|
11
11
|
|
|
12
|
-
##
|
|
12
|
+
## Usage
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
-
|
|
16
|
-
-
|
|
14
|
+
```
|
|
15
|
+
/harness-evolve-init # auto-detect everything in CWD
|
|
16
|
+
/harness-evolve-init ./my-project # auto-detect in a specific directory
|
|
17
|
+
/harness-evolve-init --harness run.py # override one path, auto-detect the rest
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## How Auto-Detection Works
|
|
21
|
+
|
|
22
|
+
The tool scans the directory for:
|
|
23
|
+
1. **Exact names:** `harness.py`, `eval.py`, `tasks/`, `config.json`
|
|
24
|
+
2. **Fuzzy fallback:** `*harness*`, `*agent*`, `*run*` for harness; `*eval*`, `*score*` for eval; any dir with JSON files containing `id`/`input` fields for tasks
|
|
25
|
+
|
|
26
|
+
If all 3 are found, init proceeds immediately. If something is missing, it reports what's needed.
|
|
17
27
|
|
|
18
28
|
## What To Do
|
|
19
29
|
|
|
20
30
|
Run the init tool:
|
|
21
31
|
|
|
22
32
|
```bash
|
|
23
|
-
python3 ~/.harness-evolver/tools/init.py \
|
|
24
|
-
--harness {harness} \
|
|
25
|
-
--eval {eval} \
|
|
26
|
-
--tasks {tasks} \
|
|
27
|
-
--base-dir .harness-evolver \
|
|
28
|
-
--harness-config {config if provided, else omit} \
|
|
33
|
+
python3 ~/.harness-evolver/tools/init.py {directory if provided} \
|
|
29
34
|
--tools-dir ~/.harness-evolver/tools
|
|
30
35
|
```
|
|
31
36
|
|
|
37
|
+
Add explicit flags only if the user provided them:
|
|
38
|
+
- `--harness PATH` — override harness auto-detection
|
|
39
|
+
- `--eval PATH` — override eval auto-detection
|
|
40
|
+
- `--tasks PATH` — override tasks auto-detection
|
|
41
|
+
- `--harness-config PATH` — optional config for the harness
|
|
42
|
+
|
|
32
43
|
If `~/.harness-evolver/tools/init.py` does not exist, check `.harness-evolver/tools/init.py` (local override).
|
|
33
44
|
|
|
34
45
|
After init completes, report:
|
|
46
|
+
- What was detected (harness, eval, tasks)
|
|
35
47
|
- Baseline score
|
|
36
48
|
- Number of tasks
|
|
49
|
+
- Integrations detected (LangSmith, Context7, stack)
|
|
37
50
|
- Next step: run `/harness-evolve` to start the optimization loop
|
|
38
|
-
|
|
39
|
-
## LangSmith Dataset (optional)
|
|
40
|
-
|
|
41
|
-
If the user provides `--langsmith-dataset <dataset_id>`:
|
|
42
|
-
|
|
43
|
-
```bash
|
|
44
|
-
python3 ~/.harness-evolver/tools/init.py \
|
|
45
|
-
--harness {harness} \
|
|
46
|
-
--eval {eval} \
|
|
47
|
-
--tasks {tasks} \
|
|
48
|
-
--base-dir .harness-evolver \
|
|
49
|
-
--langsmith-dataset {dataset_id}
|
|
50
|
-
```
|
|
51
|
-
|
|
52
|
-
This pulls examples from a LangSmith dataset to use as tasks.
|
|
53
|
-
Requires `LANGSMITH_API_KEY` in the environment.
|
|
Binary file
|
|
Binary file
|
package/tools/init.py
CHANGED
|
@@ -2,15 +2,17 @@
|
|
|
2
2
|
"""Project initializer for Harness Evolver.
|
|
3
3
|
|
|
4
4
|
Usage:
|
|
5
|
-
init.py
|
|
6
|
-
|
|
5
|
+
init.py [DIR] # auto-detect in DIR (or CWD)
|
|
6
|
+
init.py --harness PATH --eval PATH --tasks PATH # explicit paths
|
|
7
|
+
init.py --base-dir PATH [--harness-config PATH] # advanced options
|
|
7
8
|
|
|
8
|
-
|
|
9
|
-
|
|
9
|
+
Auto-detects harness.py, eval.py, tasks/ and config.json in the working directory.
|
|
10
|
+
Falls back to fuzzy matching (*harness*, *eval*, *score*, dirs with .json files).
|
|
10
11
|
Stdlib-only. No external dependencies.
|
|
11
12
|
"""
|
|
12
13
|
|
|
13
14
|
import argparse
|
|
15
|
+
import glob
|
|
14
16
|
import json
|
|
15
17
|
import os
|
|
16
18
|
import shutil
|
|
@@ -19,6 +21,74 @@ import sys
|
|
|
19
21
|
import tempfile
|
|
20
22
|
|
|
21
23
|
|
|
24
|
+
def _auto_detect(search_dir):
|
|
25
|
+
"""Auto-detect harness, eval, and tasks in a directory.
|
|
26
|
+
|
|
27
|
+
Returns (harness_path, eval_path, tasks_path, config_path) or raises SystemExit.
|
|
28
|
+
"""
|
|
29
|
+
search_dir = os.path.abspath(search_dir)
|
|
30
|
+
|
|
31
|
+
# Exact convention names first
|
|
32
|
+
harness = None
|
|
33
|
+
eval_script = None
|
|
34
|
+
tasks = None
|
|
35
|
+
config = None
|
|
36
|
+
|
|
37
|
+
# 1. Exact matches
|
|
38
|
+
for name in ["harness.py"]:
|
|
39
|
+
p = os.path.join(search_dir, name)
|
|
40
|
+
if os.path.isfile(p):
|
|
41
|
+
harness = p
|
|
42
|
+
for name in ["eval.py"]:
|
|
43
|
+
p = os.path.join(search_dir, name)
|
|
44
|
+
if os.path.isfile(p):
|
|
45
|
+
eval_script = p
|
|
46
|
+
for name in ["tasks", "tasks/"]:
|
|
47
|
+
p = os.path.join(search_dir, name.rstrip("/"))
|
|
48
|
+
if os.path.isdir(p):
|
|
49
|
+
tasks = p
|
|
50
|
+
for name in ["config.json"]:
|
|
51
|
+
p = os.path.join(search_dir, name)
|
|
52
|
+
if os.path.isfile(p):
|
|
53
|
+
config = p
|
|
54
|
+
|
|
55
|
+
# 2. Fuzzy fallback for harness
|
|
56
|
+
if not harness:
|
|
57
|
+
candidates = [f for f in glob.glob(os.path.join(search_dir, "*.py"))
|
|
58
|
+
if any(k in os.path.basename(f).lower() for k in ["harness", "agent", "run"])]
|
|
59
|
+
if len(candidates) == 1:
|
|
60
|
+
harness = candidates[0]
|
|
61
|
+
|
|
62
|
+
# 3. Fuzzy fallback for eval
|
|
63
|
+
if not eval_script:
|
|
64
|
+
candidates = [f for f in glob.glob(os.path.join(search_dir, "*.py"))
|
|
65
|
+
if any(k in os.path.basename(f).lower() for k in ["eval", "score", "judge"])
|
|
66
|
+
and f != harness]
|
|
67
|
+
if len(candidates) == 1:
|
|
68
|
+
eval_script = candidates[0]
|
|
69
|
+
|
|
70
|
+
# 4. Fuzzy fallback for tasks
|
|
71
|
+
if not tasks:
|
|
72
|
+
for d in os.listdir(search_dir):
|
|
73
|
+
dp = os.path.join(search_dir, d)
|
|
74
|
+
if os.path.isdir(dp) and any(f.endswith(".json") for f in os.listdir(dp)):
|
|
75
|
+
# Check if at least one JSON has "id" and "input" keys
|
|
76
|
+
for f in os.listdir(dp):
|
|
77
|
+
if f.endswith(".json"):
|
|
78
|
+
try:
|
|
79
|
+
with open(os.path.join(dp, f)) as fh:
|
|
80
|
+
data = json.load(fh)
|
|
81
|
+
if "id" in data and "input" in data:
|
|
82
|
+
tasks = dp
|
|
83
|
+
break
|
|
84
|
+
except (json.JSONDecodeError, KeyError):
|
|
85
|
+
pass
|
|
86
|
+
if tasks:
|
|
87
|
+
break
|
|
88
|
+
|
|
89
|
+
return harness, eval_script, tasks, config
|
|
90
|
+
|
|
91
|
+
|
|
22
92
|
def _detect_langsmith():
|
|
23
93
|
"""Auto-detect LangSmith API key and return config section."""
|
|
24
94
|
if os.environ.get("LANGSMITH_API_KEY"):
|
|
@@ -77,16 +147,59 @@ def _check_context7_available():
|
|
|
77
147
|
|
|
78
148
|
|
|
79
149
|
def main():
|
|
80
|
-
parser = argparse.ArgumentParser(
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
parser.add_argument("
|
|
150
|
+
parser = argparse.ArgumentParser(
|
|
151
|
+
description="Initialize Harness Evolver project",
|
|
152
|
+
usage="init.py [DIR] [--harness PATH] [--eval PATH] [--tasks PATH]",
|
|
153
|
+
)
|
|
154
|
+
parser.add_argument("dir", nargs="?", default=".",
|
|
155
|
+
help="Directory to scan (default: current directory)")
|
|
156
|
+
parser.add_argument("--harness", default=None, help="Path to harness script")
|
|
157
|
+
parser.add_argument("--eval", default=None, help="Path to eval script")
|
|
158
|
+
parser.add_argument("--tasks", default=None, help="Path to tasks directory")
|
|
159
|
+
parser.add_argument("--base-dir", default=None, help="Path for .harness-evolver/")
|
|
85
160
|
parser.add_argument("--harness-config", default=None, help="Path to harness config.json")
|
|
86
161
|
parser.add_argument("--tools-dir", default=None, help="Path to tools directory")
|
|
87
162
|
args = parser.parse_args()
|
|
88
163
|
|
|
89
|
-
|
|
164
|
+
# Auto-detect missing args
|
|
165
|
+
search_dir = os.path.abspath(args.dir)
|
|
166
|
+
if not args.harness or not args.eval or not args.tasks:
|
|
167
|
+
detected_harness, detected_eval, detected_tasks, detected_config = _auto_detect(search_dir)
|
|
168
|
+
if not args.harness:
|
|
169
|
+
args.harness = detected_harness
|
|
170
|
+
if not args.eval:
|
|
171
|
+
args.eval = detected_eval
|
|
172
|
+
if not args.tasks:
|
|
173
|
+
args.tasks = detected_tasks
|
|
174
|
+
if not args.harness_config and detected_config:
|
|
175
|
+
args.harness_config = detected_config
|
|
176
|
+
|
|
177
|
+
# Validate we have everything
|
|
178
|
+
missing = []
|
|
179
|
+
if not args.harness:
|
|
180
|
+
missing.append("harness (no harness.py or *harness*.py found)")
|
|
181
|
+
if not args.eval:
|
|
182
|
+
missing.append("eval (no eval.py or *eval*.py found)")
|
|
183
|
+
if not args.tasks:
|
|
184
|
+
missing.append("tasks (no tasks/ directory with JSON files found)")
|
|
185
|
+
if missing:
|
|
186
|
+
print("Could not auto-detect:", file=sys.stderr)
|
|
187
|
+
for m in missing:
|
|
188
|
+
print(f" - {m}", file=sys.stderr)
|
|
189
|
+
print(f"\nSearched in: {search_dir}", file=sys.stderr)
|
|
190
|
+
print("\nProvide explicitly:", file=sys.stderr)
|
|
191
|
+
print(" /harness-evolve-init --harness PATH --eval PATH --tasks PATH", file=sys.stderr)
|
|
192
|
+
sys.exit(1)
|
|
193
|
+
|
|
194
|
+
# Print what was detected
|
|
195
|
+
print(f"Harness: {os.path.relpath(args.harness, search_dir)}")
|
|
196
|
+
print(f"Eval: {os.path.relpath(args.eval, search_dir)}")
|
|
197
|
+
print(f"Tasks: {os.path.relpath(args.tasks, search_dir)}/")
|
|
198
|
+
if args.harness_config:
|
|
199
|
+
print(f"Config: {os.path.relpath(args.harness_config, search_dir)}")
|
|
200
|
+
print()
|
|
201
|
+
|
|
202
|
+
base = args.base_dir or os.path.join(search_dir, ".harness-evolver")
|
|
90
203
|
tools = args.tools_dir or os.path.dirname(__file__)
|
|
91
204
|
|
|
92
205
|
evaluate_py = os.path.join(tools, "evaluate.py")
|