harness-evolver 2.4.0 → 2.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/install.js +9 -18
- package/package.json +1 -1
- package/skills/init/SKILL.md +44 -24
package/bin/install.js
CHANGED
|
@@ -15,28 +15,19 @@ const VERSION = require("../package.json").version;
|
|
|
15
15
|
const PLUGIN_ROOT = path.resolve(__dirname, "..");
|
|
16
16
|
const HOME = process.env.HOME || process.env.USERPROFILE;
|
|
17
17
|
|
|
18
|
-
const
|
|
19
|
-
const BRIGHT_MAGENTA = "\x1b[95m";
|
|
20
|
-
const GREEN = "\x1b[32m";
|
|
18
|
+
const GREEN = "\x1b[38;2;0;255;136m";
|
|
21
19
|
const YELLOW = "\x1b[33m";
|
|
22
20
|
const RED = "\x1b[31m";
|
|
23
21
|
const DIM = "\x1b[2m";
|
|
24
22
|
const BOLD = "\x1b[1m";
|
|
25
23
|
const RESET = "\x1b[0m";
|
|
26
24
|
|
|
27
|
-
const LOGO =
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═══╝╚══════╝╚══════╝╚══════╝
|
|
34
|
-
${MAGENTA}${BOLD}███████╗██╗ ██╗ ██████╗ ██╗ ██╗ ██╗███████╗██████╗
|
|
35
|
-
██╔════╝██║ ██║██╔═══██╗██║ ██║ ██║██╔════╝██╔══██╗
|
|
36
|
-
█████╗ ██║ ██║██║ ██║██║ ██║ ██║█████╗ ██████╔╝
|
|
37
|
-
██╔══╝ ╚██╗ ██╔╝██║ ██║██║ ╚██╗ ██╔╝██╔══╝ ██╔══██╗
|
|
38
|
-
███████╗ ╚████╔╝ ╚██████╔╝███████╗╚████╔╝ ███████╗██║ ██║
|
|
39
|
-
╚══════╝ ╚═══╝ ╚═════╝ ╚══════╝ ╚═══╝ ╚══════╝╚═╝ ╚═╝${RESET}
|
|
25
|
+
const LOGO = `${BOLD}${GREEN}
|
|
26
|
+
╦ ╦╔═╗╦═╗╔╗╔╔═╗╔═╗╔═╗ ╔═╗╦ ╦╔═╗╦ ╦ ╦╔═╗╦═╗
|
|
27
|
+
╠═╣╠═╣╠╦╝║║║║╣ ╚═╗╚═╗ ║╣ ╚╗╔╝║ ║║ ╚╗╔╝║╣ ╠╦╝
|
|
28
|
+
╩ ╩╩ ╩╩╚═╝╚╝╚═╝╚═╝╚═╝ ╚═╝ ╚╝ ╚═╝╩═╝ ╚╝ ╚═╝╩╚═
|
|
29
|
+
${RESET}
|
|
30
|
+
${DIM}${GREEN} End-to-end harness optimization for AI agents${RESET}
|
|
40
31
|
`;
|
|
41
32
|
|
|
42
33
|
function ask(rl, question) {
|
|
@@ -218,7 +209,7 @@ async function main() {
|
|
|
218
209
|
console.log();
|
|
219
210
|
|
|
220
211
|
for (const runtime of selected) {
|
|
221
|
-
console.log(` Installing for ${
|
|
212
|
+
console.log(` Installing for ${GREEN}${runtime.name}${RESET}\n`);
|
|
222
213
|
cleanupBrokenPluginEntry(runtime.dir);
|
|
223
214
|
installForRuntime(runtime.dir, scope);
|
|
224
215
|
console.log();
|
|
@@ -232,7 +223,7 @@ async function main() {
|
|
|
232
223
|
fs.writeFileSync(versionPath, VERSION);
|
|
233
224
|
console.log(` ${GREEN}✓${RESET} VERSION ${VERSION}`);
|
|
234
225
|
|
|
235
|
-
console.log(`\n ${GREEN}Done!${RESET} Restart Claude Code, then run ${
|
|
226
|
+
console.log(`\n ${GREEN}Done!${RESET} Restart Claude Code, then run ${GREEN}/harness-evolver:init${RESET}\n`);
|
|
236
227
|
|
|
237
228
|
// Optional integrations
|
|
238
229
|
console.log(` ${YELLOW}Install optional integrations?${RESET}\n`);
|
package/package.json
CHANGED
package/skills/init/SKILL.md
CHANGED
|
@@ -48,30 +48,50 @@ If NO eval exists:
|
|
|
48
48
|
|
|
49
49
|
**Tasks** (`tasks/`): If test tasks exist, use them.
|
|
50
50
|
|
|
51
|
-
If NO tasks exist:
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
51
|
+
If NO tasks exist, generate them. First, identify all relevant source files:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
find . -name "*.py" -not -path "./.venv/*" -not -path "./.harness-evolver/*" | head -10
|
|
55
|
+
find . -name "*.json" -o -name "*.md" -o -name "*.txt" -o -name "*.yaml" -o -name "*.yml" | grep -v .venv | grep -v .harness-evolver | head -10
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Then spawn testgen subagent with CONCRETE file paths (not placeholders):
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
Agent(
|
|
62
|
+
subagent_type: "harness-evolver-testgen",
|
|
63
|
+
description: "TestGen: generate 30 test cases",
|
|
64
|
+
prompt: |
|
|
65
|
+
<objective>
|
|
66
|
+
Generate 30 diverse test cases for this project. Write them to the tasks/ directory
|
|
67
|
+
in the current working directory.
|
|
68
|
+
</objective>
|
|
69
|
+
|
|
70
|
+
<project_context>
|
|
71
|
+
This project is at: {absolute path to project root}
|
|
72
|
+
Entry point: {the harness/agent file you identified, e.g., crew.py or pipeline/moderator.py}
|
|
73
|
+
Framework: {what you detected — CrewAI, LangGraph, etc.}
|
|
74
|
+
</project_context>
|
|
75
|
+
|
|
76
|
+
<files_to_read>
|
|
77
|
+
{LIST EVERY .py file and data file you found above — use ABSOLUTE PATHS}
|
|
78
|
+
Example:
|
|
79
|
+
- /home/rp/Desktop/test-crewai/crew.py
|
|
80
|
+
- /home/rp/Desktop/test-crewai/README.md
|
|
81
|
+
</files_to_read>
|
|
82
|
+
|
|
83
|
+
<output>
|
|
84
|
+
Create directory tasks/ (at project root) with 30 files: task_001.json through task_030.json.
|
|
85
|
+
Format: {"id": "task_001", "input": "...", "metadata": {"difficulty": "easy|medium|hard", "type": "standard|edge|cross_domain|adversarial"}}
|
|
86
|
+
No "expected" field needed — the judge subagent will score outputs.
|
|
87
|
+
Distribution: 40% standard, 20% edge, 20% cross-domain, 20% adversarial.
|
|
88
|
+
</output>
|
|
89
|
+
)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Wait for `## TESTGEN COMPLETE`. If the subagent fails or returns with no tasks, generate them yourself inline (fallback).
|
|
93
|
+
|
|
94
|
+
Print: "Generated {N} test cases from code analysis."
|
|
75
95
|
|
|
76
96
|
## Phase 3: Run Init
|
|
77
97
|
|