polyharness 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -19
- package/bin/ph.mjs +4 -4
- package/bin/postinstall.mjs +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -129,13 +129,20 @@ This auto-detects which agent backends (Claude Code, Codex, etc.) are installed
|
|
|
129
129
|
### 3. Initialize a workspace
|
|
130
130
|
|
|
131
131
|
```bash
|
|
132
|
-
ph init --agent claude-code
|
|
133
|
-
--base-harness ./my_harness/ \
|
|
134
|
-
--task-dir ./my_tasks/ \
|
|
135
|
-
--eval-script ./evaluate.py
|
|
132
|
+
ph init --agent claude-code --base-harness ./my_harness/ --task-dir ./my_tasks/ --eval-script ./evaluate.py
|
|
136
133
|
```
|
|
137
134
|
|
|
138
|
-
This copies your harness code, test cases, and evaluation script into
|
|
135
|
+
This copies your harness code, test cases, and evaluation script into an isolated **optimization workspace** (by default `.ph_workspace` in the current directory, or the folder specified by `--workspace`).
|
|
136
|
+
|
|
137
|
+
**Configure Your Agent**
|
|
138
|
+
|
|
139
|
+
PolyHarness automatically sandboxes your agent inside this workspace, ensuring it only edits candidate copies and safely reads history traces.
|
|
140
|
+
|
|
141
|
+
| Scenario | How to configure |
|
|
142
|
+
|----------|------------------|
|
|
143
|
+
| **Supported CLI Tools** | Run `ph init --agent <name>`. PolyHarness auto-injects required instructions (e.g., `CLAUDE.md`).<br>*(Supported: claude-code, claw-code, codex, opencode)* |
|
|
144
|
+
| **API / LLM Directly** | Run `ph init --agent api`. No CLI tool required, just run `export OPENAI_API_KEY="sk-..."` before `ph run`. |
|
|
145
|
+
| **Custom CLI path** | If your CLI agent uses a non-standard command, edit `config.yaml` in the workspace before running:<br>`proposer: { cli_path: "npx @anthropic-ai/claude-code" }`|
|
|
139
146
|
|
|
140
147
|
### 4. Run the optimization loop
|
|
141
148
|
|
|
@@ -172,7 +179,6 @@ ph init --agent local \
|
|
|
172
179
|
--task-dir . \
|
|
173
180
|
--workspace .ph_workspace
|
|
174
181
|
|
|
175
|
-
ph run --workspace .ph_workspace --max-iterations 5
|
|
176
182
|
ph log --workspace .ph_workspace
|
|
177
183
|
|
|
178
184
|
# Search Tree
|
|
@@ -265,7 +271,7 @@ npm install -g polyharness # postinstall auto-installs Python package
|
|
|
265
271
|
npx polyharness doctor # or run without global install
|
|
266
272
|
```
|
|
267
273
|
|
|
268
|
-
The npm package is a thin Node.js wrapper (`bin/ph.mjs`) that finds and invokes the Python CLI. It checks: `ph` on PATH → `python -m
|
|
274
|
+
The npm package is a thin Node.js wrapper (`bin/ph.mjs`) that finds and invokes the Python CLI. It checks: `ph` on PATH → `python -m polyharness` → auto-discovers `.venv` in parent directories.
|
|
269
275
|
|
|
270
276
|
### From source
|
|
271
277
|
|
|
@@ -277,7 +283,7 @@ python -m venv .venv && source .venv/bin/activate
|
|
|
277
283
|
pip install -e ".[dev]"
|
|
278
284
|
# or: pip install anthropic click pydantic pyyaml rich && export PYTHONPATH="$PWD/src"
|
|
279
285
|
|
|
280
|
-
python -m
|
|
286
|
+
python -m polyharness --version
|
|
281
287
|
```
|
|
282
288
|
|
|
283
289
|
---
|
|
@@ -340,8 +346,8 @@ The score trajectories below are measured from the bundled examples using the cu
|
|
|
340
346
|
|
|
341
347
|
```bash
|
|
342
348
|
cd examples/text-classification
|
|
343
|
-
ph init --agent local --base-harness ./base_harness --task-dir .
|
|
344
|
-
ph run --
|
|
349
|
+
ph init --agent local --base-harness ./base_harness --task-dir .
|
|
350
|
+
ph run --max-iterations 3
|
|
345
351
|
|
|
346
352
|
# iter_0: 0.65 → iter_1: 1.00 ★ (naive word list → expanded lexicon)
|
|
347
353
|
```
|
|
@@ -350,8 +356,8 @@ ph run --workspace .ws --max-iterations 3
|
|
|
350
356
|
|
|
351
357
|
```bash
|
|
352
358
|
cd examples/math-word-problems
|
|
353
|
-
ph init --agent local --base-harness ./base_harness --task-dir .
|
|
354
|
-
ph run --
|
|
359
|
+
ph init --agent local --base-harness ./base_harness --task-dir .
|
|
360
|
+
ph run --max-iterations 5
|
|
355
361
|
|
|
356
362
|
# iter_0: 0.35 → iter_1: 0.50 → iter_2: 0.65 → iter_3: 0.90 ★
|
|
357
363
|
# (naive multiply → operation detection → averages/% → multi-step reasoning)
|
|
@@ -361,8 +367,8 @@ ph run --workspace .ws --max-iterations 5
|
|
|
361
367
|
|
|
362
368
|
```bash
|
|
363
369
|
cd examples/code-generation
|
|
364
|
-
ph init --agent local --base-harness ./base_harness --task-dir .
|
|
365
|
-
ph run --
|
|
370
|
+
ph init --agent local --base-harness ./base_harness --task-dir .
|
|
371
|
+
ph run --max-iterations 5
|
|
366
372
|
|
|
367
373
|
# iter_0: 0.27 → iter_1: 0.50 → iter_2: 0.68 → iter_3: 0.95 ★
|
|
368
374
|
# (5 keywords → 10 patterns → composite logic → comprehensive coverage)
|
|
@@ -372,8 +378,8 @@ ph run --workspace .ws --max-iterations 5
|
|
|
372
378
|
|
|
373
379
|
```bash
|
|
374
380
|
cd examples/api-calling
|
|
375
|
-
ph init --agent local --base-harness ./base_harness --task-dir .
|
|
376
|
-
ph run --
|
|
381
|
+
ph init --agent local --base-harness ./base_harness --task-dir .
|
|
382
|
+
ph run --max-iterations 5
|
|
377
383
|
|
|
378
384
|
# iter_0: 0.19 → iter_1: 0.55 → iter_2: 0.77 → iter_3: 0.87 ★
|
|
379
385
|
# (keyword matching → broad routing → param helpers → full regex extraction)
|
|
@@ -383,8 +389,8 @@ ph run --workspace .ws --max-iterations 5
|
|
|
383
389
|
|
|
384
390
|
```bash
|
|
385
391
|
cd examples/rag-qa
|
|
386
|
-
ph init --agent local --base-harness ./base_harness --task-dir .
|
|
387
|
-
ph run --
|
|
392
|
+
ph init --agent local --base-harness ./base_harness --task-dir .
|
|
393
|
+
ph run --max-iterations 5
|
|
388
394
|
|
|
389
395
|
# iter_0: 0.51 → iter_1: 0.79 ★
|
|
390
396
|
# (word overlap → stopword-filtered retrieval + sentence scoring)
|
|
@@ -395,7 +401,7 @@ ph run --workspace .ws --max-iterations 5
|
|
|
395
401
|
## Project Structure
|
|
396
402
|
|
|
397
403
|
```
|
|
398
|
-
src/
|
|
404
|
+
src/polyharness/
|
|
399
405
|
├── cli.py # Click CLI — 16 commands/subcommands
|
|
400
406
|
├── config.py # Pydantic config models
|
|
401
407
|
├── orchestrator.py # Meta-Harness search loop + progress bar + error recovery
|
package/bin/ph.mjs
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* This thin wrapper finds and invokes the Python `ph` CLI.
|
|
7
7
|
* Resolution order:
|
|
8
8
|
* 1. `ph` on PATH (pip-installed entry point)
|
|
9
|
-
* 2. `python -m
|
|
9
|
+
* 2. `python -m polyharness` (PYTHONPATH / editable install)
|
|
10
10
|
* 3. Local .venv (auto-detect venv in cwd or parents)
|
|
11
11
|
*/
|
|
12
12
|
|
|
@@ -44,14 +44,14 @@ function findVenvPython() {
|
|
|
44
44
|
// Strategy 1: `ph` on PATH
|
|
45
45
|
if (tryExec("ph", args)) process.exit(0);
|
|
46
46
|
|
|
47
|
-
// Strategy 2: system python -m
|
|
47
|
+
// Strategy 2: system python -m polyharness
|
|
48
48
|
for (const py of ["python3", "python"]) {
|
|
49
|
-
if (tryExec(py, ["-m", "
|
|
49
|
+
if (tryExec(py, ["-m", "polyharness", ...args])) process.exit(0);
|
|
50
50
|
}
|
|
51
51
|
|
|
52
52
|
// Strategy 3: auto-detect .venv
|
|
53
53
|
const venvPy = findVenvPython();
|
|
54
|
-
if (venvPy && tryExec(venvPy, ["-m", "
|
|
54
|
+
if (venvPy && tryExec(venvPy, ["-m", "polyharness", ...args])) {
|
|
55
55
|
process.exit(0);
|
|
56
56
|
}
|
|
57
57
|
|
package/bin/postinstall.mjs
CHANGED
|
@@ -10,7 +10,7 @@ import { execSync } from "node:child_process";
|
|
|
10
10
|
|
|
11
11
|
function isInstalled() {
|
|
12
12
|
try {
|
|
13
|
-
execSync('python3 -c "import
|
|
13
|
+
execSync('python3 -c "import polyharness"', { stdio: "ignore" });
|
|
14
14
|
return true;
|
|
15
15
|
} catch {
|
|
16
16
|
return false;
|
package/package.json
CHANGED