polyharness 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {polyharness-0.2.2/src/polyharness.egg-info → polyharness-0.2.3}/PKG-INFO +10 -10
- {polyharness-0.2.2 → polyharness-0.2.3}/README.md +9 -9
- {polyharness-0.2.2 → polyharness-0.2.3}/pyproject.toml +1 -1
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/__init__.py +1 -1
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/config.py +2 -1
- polyharness-0.2.3/src/polyharness/proposer/adapters/claude_code.py +43 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/adapters/codex.py +6 -3
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/adapters/opencode.py +4 -2
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/api_proposer.py +1 -1
- {polyharness-0.2.2 → polyharness-0.2.3/src/polyharness.egg-info}/PKG-INFO +10 -10
- {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_cli_adapters.py +13 -1
- polyharness-0.2.2/src/polyharness/proposer/adapters/claude_code.py +0 -31
- {polyharness-0.2.2 → polyharness-0.2.3}/LICENSE +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/setup.cfg +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/__main__.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/cli.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/collector.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/doctor.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/evaluator/__init__.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/evaluator/evaluator.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/orchestrator.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/__init__.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/adapters/__init__.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/adapters/base.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/adapters/claw_code.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/adapters/hermes.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/bandit.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/base.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/cli_proposer.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/local_proposer.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/openai_proposer.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/search_log.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/__init__.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/api-calling/base_harness/harness.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/api-calling/evaluate.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/api-calling/tasks/test_cases.json +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/code-generation/base_harness/harness.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/code-generation/evaluate.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/code-generation/tasks/test_cases.json +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/math-word-problems/base_harness/harness.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/math-word-problems/evaluate.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/math-word-problems/tasks/test_cases.json +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/rag-qa/base_harness/harness.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/rag-qa/evaluate.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/rag-qa/tasks/test_cases.json +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/text-classification/base_harness/harness.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/text-classification/evaluate.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/text-classification/tasks/test_cases.json +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/utils/__init__.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/workspace.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness.egg-info/SOURCES.txt +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness.egg-info/dependency_links.txt +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness.egg-info/entry_points.txt +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness.egg-info/requires.txt +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness.egg-info/top_level.txt +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_bandit.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_cli_features.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_collector.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_compare.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_config.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_evaluator.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_evolution.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_example.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_export.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_log.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_orchestrator.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_search_log.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_smoke.py +0 -0
- {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_workspace.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: polyharness
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Automated harness optimization for AI agents — make your agent evolve.
|
|
5
5
|
Author: weijt606
|
|
6
6
|
License-Expression: MIT
|
|
@@ -336,9 +336,9 @@ Just add `ph wrap --auto-evolve` in front of your agent command (pick the one ma
|
|
|
336
336
|
# CLI agent backends — wrap the agent you already use
|
|
337
337
|
ph wrap --auto-evolve claude -p "Refactor the auth module to use JWT" # Claude Code
|
|
338
338
|
ph wrap --auto-evolve claw -p "Write integration tests for payments" # Claw Code
|
|
339
|
-
ph wrap --auto-evolve codex "Add retry logic to the API client" # Codex
|
|
339
|
+
ph wrap --auto-evolve codex exec "Add retry logic to the API client" # Codex
|
|
340
340
|
ph wrap --auto-evolve hermes chat -q "Refactor the DB connection pool" # Hermes Agent
|
|
341
|
-
ph wrap --auto-evolve opencode
|
|
341
|
+
ph wrap --auto-evolve opencode run "Fix the flaky parser test" # OpenCode
|
|
342
342
|
|
|
343
343
|
# Local models — wrap the CLI command directly
|
|
344
344
|
ph wrap --auto-evolve ollama run gemma3 "Summarize this document" # Ollama
|
|
@@ -406,9 +406,9 @@ After that, just use your agent as usual:
|
|
|
406
406
|
```bash
|
|
407
407
|
claude -p "Refactor auth to JWT" # automatically becomes: ph wrap --auto-evolve claude -p ...
|
|
408
408
|
claw -p "Write payment tests" # same — auto-wrapped
|
|
409
|
-
codex "Add retry logic" # same
|
|
409
|
+
codex exec "Add retry logic" # same
|
|
410
410
|
hermes chat -q "Refactor pool" # same
|
|
411
|
-
opencode
|
|
411
|
+
opencode run "Fix flaky test" # same
|
|
412
412
|
```
|
|
413
413
|
|
|
414
414
|
How it works: a `preexec` hook in your shell detects `claude`/`claw`/`codex`/`hermes`/`opencode` commands and transparently redirects them through `ph wrap --auto-evolve`. Your output is unchanged.
|
|
@@ -499,9 +499,9 @@ The Proposer reads **all of this** before generating the next candidate. It can
|
|
|
499
499
|
| `openai` | — | OpenAI-compatible API (Ollama, vLLM, LM Studio, etc). Needs `OPENAI_API_KEY` |
|
|
500
500
|
| `claude-code` | `claude -p` | Official Claude Code CLI (Pro/Teams subscription) |
|
|
501
501
|
| `claw-code` | `claw -p` | Open-source Claw Code CLI |
|
|
502
|
-
| `codex` | `codex
|
|
502
|
+
| `codex` | `codex exec` | OpenAI Codex CLI |
|
|
503
503
|
| `hermes` | `hermes chat -q` | Nous Research [Hermes Agent](https://github.com/NousResearch/hermes-agent) CLI |
|
|
504
|
-
| `opencode` | `opencode
|
|
504
|
+
| `opencode` | `opencode run` | OpenCode CLI |
|
|
505
505
|
| `local` | — | Offline rule-based engine for development & testing |
|
|
506
506
|
|
|
507
507
|
`ph doctor` auto-detects all available backends and shows their status.
|
|
@@ -576,7 +576,7 @@ proposer:
|
|
|
576
576
|
backend: api # api | openai | claude-code | claw-code | codex | hermes | opencode | local
|
|
577
577
|
ensemble: [] # If non-empty, pick among these backends per iteration via a UCB bandit
|
|
578
578
|
bandit_c: 1.41421356 # UCB exploration constant (higher = more exploration)
|
|
579
|
-
model: claude-sonnet-4-
|
|
579
|
+
model: claude-sonnet-4-6 # Model name (for api/openai backends)
|
|
580
580
|
base_url: null # Custom API endpoint (for openai backend)
|
|
581
581
|
api_key: null # API key override (null = use env var)
|
|
582
582
|
max_tokens: 16384 # Max output tokens per proposer turn
|
|
@@ -805,9 +805,9 @@ polyharness/
|
|
|
805
805
|
│ │ └── adapters/ # Per-agent CLI adapters
|
|
806
806
|
│ │ ├── claude_code.py # claude -p
|
|
807
807
|
│ │ ├── claw_code.py # claw -p
|
|
808
|
-
│ │ ├── codex.py # codex
|
|
808
|
+
│ │ ├── codex.py # codex exec
|
|
809
809
|
│ │ ├── hermes.py # hermes chat -q
|
|
810
|
-
│ │ └── opencode.py # opencode
|
|
810
|
+
│ │ └── opencode.py # opencode run
|
|
811
811
|
│ └── templates/ # 5 built-in task templates
|
|
812
812
|
│ ├── text-classification/
|
|
813
813
|
│ ├── math-word-problems/
|
|
@@ -303,9 +303,9 @@ Just add `ph wrap --auto-evolve` in front of your agent command (pick the one ma
|
|
|
303
303
|
# CLI agent backends — wrap the agent you already use
|
|
304
304
|
ph wrap --auto-evolve claude -p "Refactor the auth module to use JWT" # Claude Code
|
|
305
305
|
ph wrap --auto-evolve claw -p "Write integration tests for payments" # Claw Code
|
|
306
|
-
ph wrap --auto-evolve codex "Add retry logic to the API client" # Codex
|
|
306
|
+
ph wrap --auto-evolve codex exec "Add retry logic to the API client" # Codex
|
|
307
307
|
ph wrap --auto-evolve hermes chat -q "Refactor the DB connection pool" # Hermes Agent
|
|
308
|
-
ph wrap --auto-evolve opencode
|
|
308
|
+
ph wrap --auto-evolve opencode run "Fix the flaky parser test" # OpenCode
|
|
309
309
|
|
|
310
310
|
# Local models — wrap the CLI command directly
|
|
311
311
|
ph wrap --auto-evolve ollama run gemma3 "Summarize this document" # Ollama
|
|
@@ -373,9 +373,9 @@ After that, just use your agent as usual:
|
|
|
373
373
|
```bash
|
|
374
374
|
claude -p "Refactor auth to JWT" # automatically becomes: ph wrap --auto-evolve claude -p ...
|
|
375
375
|
claw -p "Write payment tests" # same — auto-wrapped
|
|
376
|
-
codex "Add retry logic" # same
|
|
376
|
+
codex exec "Add retry logic" # same
|
|
377
377
|
hermes chat -q "Refactor pool" # same
|
|
378
|
-
opencode
|
|
378
|
+
opencode run "Fix flaky test" # same
|
|
379
379
|
```
|
|
380
380
|
|
|
381
381
|
How it works: a `preexec` hook in your shell detects `claude`/`claw`/`codex`/`hermes`/`opencode` commands and transparently redirects them through `ph wrap --auto-evolve`. Your output is unchanged.
|
|
@@ -466,9 +466,9 @@ The Proposer reads **all of this** before generating the next candidate. It can
|
|
|
466
466
|
| `openai` | — | OpenAI-compatible API (Ollama, vLLM, LM Studio, etc). Needs `OPENAI_API_KEY` |
|
|
467
467
|
| `claude-code` | `claude -p` | Official Claude Code CLI (Pro/Teams subscription) |
|
|
468
468
|
| `claw-code` | `claw -p` | Open-source Claw Code CLI |
|
|
469
|
-
| `codex` | `codex
|
|
469
|
+
| `codex` | `codex exec` | OpenAI Codex CLI |
|
|
470
470
|
| `hermes` | `hermes chat -q` | Nous Research [Hermes Agent](https://github.com/NousResearch/hermes-agent) CLI |
|
|
471
|
-
| `opencode` | `opencode
|
|
471
|
+
| `opencode` | `opencode run` | OpenCode CLI |
|
|
472
472
|
| `local` | — | Offline rule-based engine for development & testing |
|
|
473
473
|
|
|
474
474
|
`ph doctor` auto-detects all available backends and shows their status.
|
|
@@ -543,7 +543,7 @@ proposer:
|
|
|
543
543
|
backend: api # api | openai | claude-code | claw-code | codex | hermes | opencode | local
|
|
544
544
|
ensemble: [] # If non-empty, pick among these backends per iteration via a UCB bandit
|
|
545
545
|
bandit_c: 1.41421356 # UCB exploration constant (higher = more exploration)
|
|
546
|
-
model: claude-sonnet-4-
|
|
546
|
+
model: claude-sonnet-4-6 # Model name (for api/openai backends)
|
|
547
547
|
base_url: null # Custom API endpoint (for openai backend)
|
|
548
548
|
api_key: null # API key override (null = use env var)
|
|
549
549
|
max_tokens: 16384 # Max output tokens per proposer turn
|
|
@@ -772,9 +772,9 @@ polyharness/
|
|
|
772
772
|
│ │ └── adapters/ # Per-agent CLI adapters
|
|
773
773
|
│ │ ├── claude_code.py # claude -p
|
|
774
774
|
│ │ ├── claw_code.py # claw -p
|
|
775
|
-
│ │ ├── codex.py # codex
|
|
775
|
+
│ │ ├── codex.py # codex exec
|
|
776
776
|
│ │ ├── hermes.py # hermes chat -q
|
|
777
|
-
│ │ └── opencode.py # opencode
|
|
777
|
+
│ │ └── opencode.py # opencode run
|
|
778
778
|
│ └── templates/ # 5 built-in task templates
|
|
779
779
|
│ ├── text-classification/
|
|
780
780
|
│ ├── math-word-problems/
|
|
@@ -84,7 +84,8 @@ class ProposerConfig(BaseModel):
|
|
|
84
84
|
description="UCB exploration constant for ensemble selection. Higher = more exploration.",
|
|
85
85
|
)
|
|
86
86
|
model: str = Field(
|
|
87
|
-
default="claude-sonnet-4-
|
|
87
|
+
default="claude-sonnet-4-6",
|
|
88
|
+
description="Model for the Proposer agent (api/openai backends; CLI backends use their own).",
|
|
88
89
|
)
|
|
89
90
|
base_url: str | None = Field(
|
|
90
91
|
default=None, description="Optional base URL for the API (useful for local models)."
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Claude Code CLI adapter.
|
|
2
|
+
|
|
3
|
+
Invokes the official `claude` CLI in print mode (-p).
|
|
4
|
+
Requires an active Claude Code subscription.
|
|
5
|
+
|
|
6
|
+
Verified against Claude Code (May 2026):
|
|
7
|
+
- `-p` headless mode and `--output-format text` are current.
|
|
8
|
+
- `--permission-mode acceptEdits` is REQUIRED for the agent to write files
|
|
9
|
+
non-interactively (auto-approves Read/Edit/Write); without it, headless edits
|
|
10
|
+
are blocked. `acceptEdits` still gates arbitrary Bash/network (least-privilege,
|
|
11
|
+
appropriate for the isolated workspace).
|
|
12
|
+
- `--model claude-opus-4-7` pins to Opus 4.7 (full name for reproducibility).
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from polyharness.proposer.adapters.base import CLIAdapter
|
|
18
|
+
|
|
19
|
+
# Pinned Proposer model for the Claude Code backend (highest-capability).
|
|
20
|
+
CLAUDE_CODE_MODEL = "claude-opus-4-7"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ClaudeCodeAdapter(CLIAdapter):
|
|
24
|
+
"""Adapter for the Claude Code CLI (`claude`)."""
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def name(self) -> str:
|
|
28
|
+
return "claude-code"
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def default_binary(self) -> str:
|
|
32
|
+
return "claude"
|
|
33
|
+
|
|
34
|
+
def build_command(self, prompt: str, *, cli_path: str | None = None) -> list[str]:
|
|
35
|
+
binary = cli_path or self.default_binary
|
|
36
|
+
return [
|
|
37
|
+
binary,
|
|
38
|
+
"-p", # print mode (non-interactive)
|
|
39
|
+
prompt,
|
|
40
|
+
"--model", CLAUDE_CODE_MODEL, # pin to Opus 4.7
|
|
41
|
+
"--permission-mode", "acceptEdits", # auto-approve file edits (headless)
|
|
42
|
+
"--output-format", "text",
|
|
43
|
+
]
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
"""Codex CLI adapter.
|
|
2
2
|
|
|
3
|
-
Invokes OpenAI's `codex` CLI agent in
|
|
3
|
+
Invokes OpenAI's `codex` CLI agent in headless/non-interactive mode via
|
|
4
|
+
`codex exec` (the old `--quiet`/`--auto-edit` flags were removed upstream).
|
|
5
|
+
See: developers.openai.com/codex/noninteractive
|
|
4
6
|
"""
|
|
5
7
|
|
|
6
8
|
from __future__ import annotations
|
|
@@ -23,7 +25,8 @@ class CodexAdapter(CLIAdapter):
|
|
|
23
25
|
binary = cli_path or self.default_binary
|
|
24
26
|
return [
|
|
25
27
|
binary,
|
|
26
|
-
"
|
|
27
|
-
"--
|
|
28
|
+
"exec", # headless, non-interactive mode
|
|
29
|
+
"--skip-git-repo-check", # the workspace is not a git repo
|
|
30
|
+
"--sandbox", "workspace-write", # allow edits within the workspace cwd
|
|
28
31
|
prompt,
|
|
29
32
|
]
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
"""OpenCode CLI adapter.
|
|
2
2
|
|
|
3
|
-
Invokes the open-source `opencode` CLI agent
|
|
3
|
+
Invokes the open-source `opencode` CLI agent in non-interactive mode via the
|
|
4
|
+
`run` subcommand (the old top-level `-p` flag is no longer supported upstream).
|
|
5
|
+
See: opencode.ai/docs/cli
|
|
4
6
|
"""
|
|
5
7
|
|
|
6
8
|
from __future__ import annotations
|
|
@@ -23,6 +25,6 @@ class OpenCodeAdapter(CLIAdapter):
|
|
|
23
25
|
binary = cli_path or self.default_binary
|
|
24
26
|
return [
|
|
25
27
|
binary,
|
|
26
|
-
"
|
|
28
|
+
"run", # non-interactive mode (replaces old -p)
|
|
27
29
|
prompt,
|
|
28
30
|
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: polyharness
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Automated harness optimization for AI agents — make your agent evolve.
|
|
5
5
|
Author: weijt606
|
|
6
6
|
License-Expression: MIT
|
|
@@ -336,9 +336,9 @@ Just add `ph wrap --auto-evolve` in front of your agent command (pick the one ma
|
|
|
336
336
|
# CLI agent backends — wrap the agent you already use
|
|
337
337
|
ph wrap --auto-evolve claude -p "Refactor the auth module to use JWT" # Claude Code
|
|
338
338
|
ph wrap --auto-evolve claw -p "Write integration tests for payments" # Claw Code
|
|
339
|
-
ph wrap --auto-evolve codex "Add retry logic to the API client" # Codex
|
|
339
|
+
ph wrap --auto-evolve codex exec "Add retry logic to the API client" # Codex
|
|
340
340
|
ph wrap --auto-evolve hermes chat -q "Refactor the DB connection pool" # Hermes Agent
|
|
341
|
-
ph wrap --auto-evolve opencode
|
|
341
|
+
ph wrap --auto-evolve opencode run "Fix the flaky parser test" # OpenCode
|
|
342
342
|
|
|
343
343
|
# Local models — wrap the CLI command directly
|
|
344
344
|
ph wrap --auto-evolve ollama run gemma3 "Summarize this document" # Ollama
|
|
@@ -406,9 +406,9 @@ After that, just use your agent as usual:
|
|
|
406
406
|
```bash
|
|
407
407
|
claude -p "Refactor auth to JWT" # automatically becomes: ph wrap --auto-evolve claude -p ...
|
|
408
408
|
claw -p "Write payment tests" # same — auto-wrapped
|
|
409
|
-
codex "Add retry logic" # same
|
|
409
|
+
codex exec "Add retry logic" # same
|
|
410
410
|
hermes chat -q "Refactor pool" # same
|
|
411
|
-
opencode
|
|
411
|
+
opencode run "Fix flaky test" # same
|
|
412
412
|
```
|
|
413
413
|
|
|
414
414
|
How it works: a `preexec` hook in your shell detects `claude`/`claw`/`codex`/`hermes`/`opencode` commands and transparently redirects them through `ph wrap --auto-evolve`. Your output is unchanged.
|
|
@@ -499,9 +499,9 @@ The Proposer reads **all of this** before generating the next candidate. It can
|
|
|
499
499
|
| `openai` | — | OpenAI-compatible API (Ollama, vLLM, LM Studio, etc). Needs `OPENAI_API_KEY` |
|
|
500
500
|
| `claude-code` | `claude -p` | Official Claude Code CLI (Pro/Teams subscription) |
|
|
501
501
|
| `claw-code` | `claw -p` | Open-source Claw Code CLI |
|
|
502
|
-
| `codex` | `codex
|
|
502
|
+
| `codex` | `codex exec` | OpenAI Codex CLI |
|
|
503
503
|
| `hermes` | `hermes chat -q` | Nous Research [Hermes Agent](https://github.com/NousResearch/hermes-agent) CLI |
|
|
504
|
-
| `opencode` | `opencode
|
|
504
|
+
| `opencode` | `opencode run` | OpenCode CLI |
|
|
505
505
|
| `local` | — | Offline rule-based engine for development & testing |
|
|
506
506
|
|
|
507
507
|
`ph doctor` auto-detects all available backends and shows their status.
|
|
@@ -576,7 +576,7 @@ proposer:
|
|
|
576
576
|
backend: api # api | openai | claude-code | claw-code | codex | hermes | opencode | local
|
|
577
577
|
ensemble: [] # If non-empty, pick among these backends per iteration via a UCB bandit
|
|
578
578
|
bandit_c: 1.41421356 # UCB exploration constant (higher = more exploration)
|
|
579
|
-
model: claude-sonnet-4-
|
|
579
|
+
model: claude-sonnet-4-6 # Model name (for api/openai backends)
|
|
580
580
|
base_url: null # Custom API endpoint (for openai backend)
|
|
581
581
|
api_key: null # API key override (null = use env var)
|
|
582
582
|
max_tokens: 16384 # Max output tokens per proposer turn
|
|
@@ -805,9 +805,9 @@ polyharness/
|
|
|
805
805
|
│ │ └── adapters/ # Per-agent CLI adapters
|
|
806
806
|
│ │ ├── claude_code.py # claude -p
|
|
807
807
|
│ │ ├── claw_code.py # claw -p
|
|
808
|
-
│ │ ├── codex.py # codex
|
|
808
|
+
│ │ ├── codex.py # codex exec
|
|
809
809
|
│ │ ├── hermes.py # hermes chat -q
|
|
810
|
-
│ │ └── opencode.py # opencode
|
|
810
|
+
│ │ └── opencode.py # opencode run
|
|
811
811
|
│ └── templates/ # 5 built-in task templates
|
|
812
812
|
│ ├── text-classification/
|
|
813
813
|
│ ├── math-word-problems/
|
|
@@ -50,6 +50,14 @@ def test_claude_code_command():
|
|
|
50
50
|
assert cmd[0] == "claude"
|
|
51
51
|
assert "-p" in cmd
|
|
52
52
|
assert "do stuff" in cmd
|
|
53
|
+
# Pinned to Opus 4.7
|
|
54
|
+
assert "--model" in cmd
|
|
55
|
+
assert "claude-opus-4-7" in cmd
|
|
56
|
+
# Headless edits must be auto-approved or the agent can't write candidates
|
|
57
|
+
assert "--permission-mode" in cmd
|
|
58
|
+
assert "acceptEdits" in cmd
|
|
59
|
+
# --verbose is noise in print mode; should be gone
|
|
60
|
+
assert "--verbose" not in cmd
|
|
53
61
|
|
|
54
62
|
|
|
55
63
|
def test_claude_code_custom_path():
|
|
@@ -70,7 +78,9 @@ def test_codex_command():
|
|
|
70
78
|
adapter = CodexAdapter()
|
|
71
79
|
cmd = adapter.build_command("fix it")
|
|
72
80
|
assert cmd[0] == "codex"
|
|
73
|
-
assert "
|
|
81
|
+
assert "exec" in cmd # headless mode (replaces old --quiet)
|
|
82
|
+
assert "--skip-git-repo-check" in cmd # workspace isn't a git repo
|
|
83
|
+
assert "--quiet" not in cmd # removed upstream
|
|
74
84
|
assert "fix it" in cmd
|
|
75
85
|
|
|
76
86
|
|
|
@@ -78,6 +88,8 @@ def test_opencode_command():
|
|
|
78
88
|
adapter = OpenCodeAdapter()
|
|
79
89
|
cmd = adapter.build_command("optimize")
|
|
80
90
|
assert cmd[0] == "opencode"
|
|
91
|
+
assert "run" in cmd # non-interactive subcommand (replaces old -p)
|
|
92
|
+
assert "-p" not in cmd # no longer supported upstream
|
|
81
93
|
assert "optimize" in cmd
|
|
82
94
|
|
|
83
95
|
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
"""Claude Code CLI adapter.
|
|
2
|
-
|
|
3
|
-
Invokes the official `claude` CLI in print mode (-p).
|
|
4
|
-
Requires an active Claude Code subscription.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from __future__ import annotations
|
|
8
|
-
|
|
9
|
-
from polyharness.proposer.adapters.base import CLIAdapter
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class ClaudeCodeAdapter(CLIAdapter):
|
|
13
|
-
"""Adapter for the Claude Code CLI (`claude`)."""
|
|
14
|
-
|
|
15
|
-
@property
|
|
16
|
-
def name(self) -> str:
|
|
17
|
-
return "claude-code"
|
|
18
|
-
|
|
19
|
-
@property
|
|
20
|
-
def default_binary(self) -> str:
|
|
21
|
-
return "claude"
|
|
22
|
-
|
|
23
|
-
def build_command(self, prompt: str, *, cli_path: str | None = None) -> list[str]:
|
|
24
|
-
binary = cli_path or self.default_binary
|
|
25
|
-
return [
|
|
26
|
-
binary,
|
|
27
|
-
"-p", # print mode (non-interactive, stdout output)
|
|
28
|
-
prompt,
|
|
29
|
-
"--output-format", "text",
|
|
30
|
-
"--verbose",
|
|
31
|
-
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/api-calling/tasks/test_cases.json
RENAMED
|
File without changes
|
|
File without changes
|
{polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/code-generation/evaluate.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/math-word-problems/evaluate.py
RENAMED
|
File without changes
|
|
File without changes
|
{polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/rag-qa/base_harness/harness.py
RENAMED
|
File without changes
|
|
File without changes
|
{polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/rag-qa/tasks/test_cases.json
RENAMED
|
File without changes
|
|
File without changes
|
{polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/text-classification/evaluate.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|