polyharness 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -129,13 +129,20 @@ This auto-detects which agent backends (Claude Code, Codex, etc.) are installed
129
129
  ### 3. Initialize a workspace
130
130
 
131
131
  ```bash
132
- ph init --agent claude-code \
133
- --base-harness ./my_harness/ \
134
- --task-dir ./my_tasks/ \
135
- --eval-script ./evaluate.py
132
+ ph init --agent claude-code --base-harness ./my_harness/ --task-dir ./my_tasks/ --eval-script ./evaluate.py
136
133
  ```
137
134
 
138
- This copies your harness code, test cases, and evaluation script into a structured workspace and auto-configures everything. No manual YAML editing.
135
+ This copies your harness code, test cases, and evaluation script into an isolated **optimization workspace** (by default `.ph_workspace` in the current directory, or the folder specified by `--workspace`).
136
+
137
+ **Configure Your Agent**
138
+
139
+ PolyHarness automatically sandboxes your agent inside this workspace, ensuring it only edits candidate copies and safely reads history traces.
140
+
141
+ | Scenario | How to configure |
142
+ |----------|------------------|
143
+ | **Supported CLI Tools** | Run `ph init --agent <name>`. PolyHarness auto-injects required instructions (e.g., `CLAUDE.md`).<br>*(Supported: claude-code, claw-code, codex, opencode)* |
144
+ | **API / LLM Directly** | Run `ph init --agent api`. No CLI tool required, just run `export OPENAI_API_KEY="sk-..."` before `ph run`. |
145
+ | **Custom CLI path** | If your CLI agent uses a non-standard command, edit `config.yaml` in the workspace before running:<br>`proposer: { cli_path: "npx @anthropic-ai/claude-code" }`|
139
146
 
140
147
  ### 4. Run the optimization loop
141
148
 
@@ -172,7 +179,6 @@ ph init --agent local \
172
179
  --task-dir . \
173
180
  --workspace .ph_workspace
174
181
 
175
- ph run --workspace .ph_workspace --max-iterations 5
176
182
  ph log --workspace .ph_workspace
177
183
 
178
184
  # Search Tree
@@ -265,7 +271,7 @@ npm install -g polyharness # postinstall auto-installs Python package
265
271
  npx polyharness doctor # or run without global install
266
272
  ```
267
273
 
268
- The npm package is a thin Node.js wrapper (`bin/ph.mjs`) that finds and invokes the Python CLI. It checks: `ph` on PATH → `python -m poly_harness` → auto-discovers `.venv` in parent directories.
274
+ The npm package is a thin Node.js wrapper (`bin/ph.mjs`) that finds and invokes the Python CLI. It checks: `ph` on PATH → `python -m polyharness` → auto-discovers `.venv` in parent directories.
269
275
 
270
276
  ### From source
271
277
 
@@ -277,7 +283,7 @@ python -m venv .venv && source .venv/bin/activate
277
283
  pip install -e ".[dev]"
278
284
  # or: pip install anthropic click pydantic pyyaml rich && export PYTHONPATH="$PWD/src"
279
285
 
280
- python -m poly_harness --version
286
+ python -m polyharness --version
281
287
  ```
282
288
 
283
289
  ---
@@ -340,8 +346,8 @@ The score trajectories below are measured from the bundled examples using the cu
340
346
 
341
347
  ```bash
342
348
  cd examples/text-classification
343
- ph init --agent local --base-harness ./base_harness --task-dir . --workspace .ws
344
- ph run --workspace .ws --max-iterations 3
349
+ ph init --agent local --base-harness ./base_harness --task-dir .
350
+ ph run --max-iterations 3
345
351
 
346
352
  # iter_0: 0.65 → iter_1: 1.00 ★ (naive word list → expanded lexicon)
347
353
  ```
@@ -350,8 +356,8 @@ ph run --workspace .ws --max-iterations 3
350
356
 
351
357
  ```bash
352
358
  cd examples/math-word-problems
353
- ph init --agent local --base-harness ./base_harness --task-dir . --workspace .ws
354
- ph run --workspace .ws --max-iterations 5
359
+ ph init --agent local --base-harness ./base_harness --task-dir .
360
+ ph run --max-iterations 5
355
361
 
356
362
  # iter_0: 0.35 → iter_1: 0.50 → iter_2: 0.65 → iter_3: 0.90 ★
357
363
  # (naive multiply → operation detection → averages/% → multi-step reasoning)
@@ -361,8 +367,8 @@ ph run --workspace .ws --max-iterations 5
361
367
 
362
368
  ```bash
363
369
  cd examples/code-generation
364
- ph init --agent local --base-harness ./base_harness --task-dir . --workspace .ws
365
- ph run --workspace .ws --max-iterations 5
370
+ ph init --agent local --base-harness ./base_harness --task-dir .
371
+ ph run --max-iterations 5
366
372
 
367
373
  # iter_0: 0.27 → iter_1: 0.50 → iter_2: 0.68 → iter_3: 0.95 ★
368
374
  # (5 keywords → 10 patterns → composite logic → comprehensive coverage)
@@ -372,8 +378,8 @@ ph run --workspace .ws --max-iterations 5
372
378
 
373
379
  ```bash
374
380
  cd examples/api-calling
375
- ph init --agent local --base-harness ./base_harness --task-dir . --workspace .ws
376
- ph run --workspace .ws --max-iterations 5
381
+ ph init --agent local --base-harness ./base_harness --task-dir .
382
+ ph run --max-iterations 5
377
383
 
378
384
  # iter_0: 0.19 → iter_1: 0.55 → iter_2: 0.77 → iter_3: 0.87 ★
379
385
  # (keyword matching → broad routing → param helpers → full regex extraction)
@@ -383,8 +389,8 @@ ph run --workspace .ws --max-iterations 5
383
389
 
384
390
  ```bash
385
391
  cd examples/rag-qa
386
- ph init --agent local --base-harness ./base_harness --task-dir . --workspace .ws
387
- ph run --workspace .ws --max-iterations 5
392
+ ph init --agent local --base-harness ./base_harness --task-dir .
393
+ ph run --max-iterations 5
388
394
 
389
395
  # iter_0: 0.51 → iter_1: 0.79 ★
390
396
  # (word overlap → stopword-filtered retrieval + sentence scoring)
@@ -395,7 +401,7 @@ ph run --workspace .ws --max-iterations 5
395
401
  ## Project Structure
396
402
 
397
403
  ```
398
- src/poly_harness/
404
+ src/polyharness/
399
405
  ├── cli.py # Click CLI — 16 commands/subcommands
400
406
  ├── config.py # Pydantic config models
401
407
  ├── orchestrator.py # Meta-Harness search loop + progress bar + error recovery
package/bin/ph.mjs CHANGED
@@ -6,7 +6,7 @@
6
6
  * This thin wrapper finds and invokes the Python `ph` CLI.
7
7
  * Resolution order:
8
8
  * 1. `ph` on PATH (pip-installed entry point)
9
- * 2. `python -m poly_harness` (PYTHONPATH / editable install)
9
+ * 2. `python -m polyharness` (PYTHONPATH / editable install)
10
10
  * 3. Local .venv (auto-detect venv in cwd or parents)
11
11
  */
12
12
 
@@ -44,14 +44,14 @@ function findVenvPython() {
44
44
  // Strategy 1: `ph` on PATH
45
45
  if (tryExec("ph", args)) process.exit(0);
46
46
 
47
- // Strategy 2: system python -m poly_harness
47
+ // Strategy 2: system python -m polyharness
48
48
  for (const py of ["python3", "python"]) {
49
- if (tryExec(py, ["-m", "poly_harness", ...args])) process.exit(0);
49
+ if (tryExec(py, ["-m", "polyharness", ...args])) process.exit(0);
50
50
  }
51
51
 
52
52
  // Strategy 3: auto-detect .venv
53
53
  const venvPy = findVenvPython();
54
- if (venvPy && tryExec(venvPy, ["-m", "poly_harness", ...args])) {
54
+ if (venvPy && tryExec(venvPy, ["-m", "polyharness", ...args])) {
55
55
  process.exit(0);
56
56
  }
57
57
 
@@ -10,7 +10,7 @@ import { execSync } from "node:child_process";
10
10
 
11
11
  function isInstalled() {
12
12
  try {
13
- execSync('python3 -c "import poly_harness"', { stdio: "ignore" });
13
+ execSync('python3 -c "import polyharness"', { stdio: "ignore" });
14
14
  return true;
15
15
  } catch {
16
16
  return false;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "polyharness",
3
- "version": "0.1.0",
3
+ "version": "0.1.1",
4
4
  "description": "Make your AI agent evolve automatically through iterative harness optimization.",
5
5
  "keywords": ["agent", "harness", "optimization", "meta-harness", "cli"],
6
6
  "license": "MIT",