polyharness 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {polyharness-0.2.2/src/polyharness.egg-info → polyharness-0.2.3}/PKG-INFO +10 -10
  2. {polyharness-0.2.2 → polyharness-0.2.3}/README.md +9 -9
  3. {polyharness-0.2.2 → polyharness-0.2.3}/pyproject.toml +1 -1
  4. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/__init__.py +1 -1
  5. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/config.py +2 -1
  6. polyharness-0.2.3/src/polyharness/proposer/adapters/claude_code.py +43 -0
  7. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/adapters/codex.py +6 -3
  8. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/adapters/opencode.py +4 -2
  9. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/api_proposer.py +1 -1
  10. {polyharness-0.2.2 → polyharness-0.2.3/src/polyharness.egg-info}/PKG-INFO +10 -10
  11. {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_cli_adapters.py +13 -1
  12. polyharness-0.2.2/src/polyharness/proposer/adapters/claude_code.py +0 -31
  13. {polyharness-0.2.2 → polyharness-0.2.3}/LICENSE +0 -0
  14. {polyharness-0.2.2 → polyharness-0.2.3}/setup.cfg +0 -0
  15. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/__main__.py +0 -0
  16. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/cli.py +0 -0
  17. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/collector.py +0 -0
  18. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/doctor.py +0 -0
  19. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/evaluator/__init__.py +0 -0
  20. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/evaluator/evaluator.py +0 -0
  21. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/orchestrator.py +0 -0
  22. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/__init__.py +0 -0
  23. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/adapters/__init__.py +0 -0
  24. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/adapters/base.py +0 -0
  25. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/adapters/claw_code.py +0 -0
  26. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/adapters/hermes.py +0 -0
  27. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/bandit.py +0 -0
  28. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/base.py +0 -0
  29. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/cli_proposer.py +0 -0
  30. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/local_proposer.py +0 -0
  31. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/openai_proposer.py +0 -0
  32. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/search_log.py +0 -0
  33. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/__init__.py +0 -0
  34. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/api-calling/base_harness/harness.py +0 -0
  35. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/api-calling/evaluate.py +0 -0
  36. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/api-calling/tasks/test_cases.json +0 -0
  37. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/code-generation/base_harness/harness.py +0 -0
  38. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/code-generation/evaluate.py +0 -0
  39. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/code-generation/tasks/test_cases.json +0 -0
  40. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/math-word-problems/base_harness/harness.py +0 -0
  41. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/math-word-problems/evaluate.py +0 -0
  42. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/math-word-problems/tasks/test_cases.json +0 -0
  43. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/rag-qa/base_harness/harness.py +0 -0
  44. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/rag-qa/evaluate.py +0 -0
  45. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/rag-qa/tasks/test_cases.json +0 -0
  46. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/text-classification/base_harness/harness.py +0 -0
  47. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/text-classification/evaluate.py +0 -0
  48. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/templates/text-classification/tasks/test_cases.json +0 -0
  49. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/utils/__init__.py +0 -0
  50. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/workspace.py +0 -0
  51. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness.egg-info/SOURCES.txt +0 -0
  52. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness.egg-info/dependency_links.txt +0 -0
  53. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness.egg-info/entry_points.txt +0 -0
  54. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness.egg-info/requires.txt +0 -0
  55. {polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness.egg-info/top_level.txt +0 -0
  56. {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_bandit.py +0 -0
  57. {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_cli_features.py +0 -0
  58. {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_collector.py +0 -0
  59. {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_compare.py +0 -0
  60. {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_config.py +0 -0
  61. {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_evaluator.py +0 -0
  62. {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_evolution.py +0 -0
  63. {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_example.py +0 -0
  64. {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_export.py +0 -0
  65. {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_log.py +0 -0
  66. {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_orchestrator.py +0 -0
  67. {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_search_log.py +0 -0
  68. {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_smoke.py +0 -0
  69. {polyharness-0.2.2 → polyharness-0.2.3}/tests/test_workspace.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: polyharness
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Automated harness optimization for AI agents — make your agent evolve.
5
5
  Author: weijt606
6
6
  License-Expression: MIT
@@ -336,9 +336,9 @@ Just add `ph wrap --auto-evolve` in front of your agent command (pick the one ma
336
336
  # CLI agent backends — wrap the agent you already use
337
337
  ph wrap --auto-evolve claude -p "Refactor the auth module to use JWT" # Claude Code
338
338
  ph wrap --auto-evolve claw -p "Write integration tests for payments" # Claw Code
339
- ph wrap --auto-evolve codex "Add retry logic to the API client" # Codex
339
+ ph wrap --auto-evolve codex exec "Add retry logic to the API client" # Codex
340
340
  ph wrap --auto-evolve hermes chat -q "Refactor the DB connection pool" # Hermes Agent
341
- ph wrap --auto-evolve opencode -p "Fix the flaky parser test" # OpenCode
341
+ ph wrap --auto-evolve opencode run "Fix the flaky parser test" # OpenCode
342
342
 
343
343
  # Local models — wrap the CLI command directly
344
344
  ph wrap --auto-evolve ollama run gemma3 "Summarize this document" # Ollama
@@ -406,9 +406,9 @@ After that, just use your agent as usual:
406
406
  ```bash
407
407
  claude -p "Refactor auth to JWT" # automatically becomes: ph wrap --auto-evolve claude -p ...
408
408
  claw -p "Write payment tests" # same — auto-wrapped
409
- codex "Add retry logic" # same
409
+ codex exec "Add retry logic" # same
410
410
  hermes chat -q "Refactor pool" # same
411
- opencode -p "Fix flaky test" # same
411
+ opencode run "Fix flaky test" # same
412
412
  ```
413
413
 
414
414
  How it works: a `preexec` hook in your shell detects `claude`/`claw`/`codex`/`hermes`/`opencode` commands and transparently redirects them through `ph wrap --auto-evolve`. Your output is unchanged.
@@ -499,9 +499,9 @@ The Proposer reads **all of this** before generating the next candidate. It can
499
499
  | `openai` | — | OpenAI-compatible API (Ollama, vLLM, LM Studio, etc). Needs `OPENAI_API_KEY` |
500
500
  | `claude-code` | `claude -p` | Official Claude Code CLI (Pro/Teams subscription) |
501
501
  | `claw-code` | `claw -p` | Open-source Claw Code CLI |
502
- | `codex` | `codex --quiet` | OpenAI Codex CLI |
502
+ | `codex` | `codex exec` | OpenAI Codex CLI |
503
503
  | `hermes` | `hermes chat -q` | Nous Research [Hermes Agent](https://github.com/NousResearch/hermes-agent) CLI |
504
- | `opencode` | `opencode -p` | OpenCode CLI |
504
+ | `opencode` | `opencode run` | OpenCode CLI |
505
505
  | `local` | — | Offline rule-based engine for development & testing |
506
506
 
507
507
  `ph doctor` auto-detects all available backends and shows their status.
@@ -576,7 +576,7 @@ proposer:
576
576
  backend: api # api | openai | claude-code | claw-code | codex | hermes | opencode | local
577
577
  ensemble: [] # If non-empty, pick among these backends per iteration via a UCB bandit
578
578
  bandit_c: 1.41421356 # UCB exploration constant (higher = more exploration)
579
- model: claude-sonnet-4-20250514 # Model name (for api/openai backends)
579
+ model: claude-sonnet-4-6 # Model name (for api/openai backends)
580
580
  base_url: null # Custom API endpoint (for openai backend)
581
581
  api_key: null # API key override (null = use env var)
582
582
  max_tokens: 16384 # Max output tokens per proposer turn
@@ -805,9 +805,9 @@ polyharness/
805
805
  │ │ └── adapters/ # Per-agent CLI adapters
806
806
  │ │ ├── claude_code.py # claude -p
807
807
  │ │ ├── claw_code.py # claw -p
808
- │ │ ├── codex.py # codex --quiet --auto-edit
808
+ │ │ ├── codex.py # codex exec
809
809
  │ │ ├── hermes.py # hermes chat -q
810
- │ │ └── opencode.py # opencode -p
810
+ │ │ └── opencode.py # opencode run
811
811
  │ └── templates/ # 5 built-in task templates
812
812
  │ ├── text-classification/
813
813
  │ ├── math-word-problems/
@@ -303,9 +303,9 @@ Just add `ph wrap --auto-evolve` in front of your agent command (pick the one ma
303
303
  # CLI agent backends — wrap the agent you already use
304
304
  ph wrap --auto-evolve claude -p "Refactor the auth module to use JWT" # Claude Code
305
305
  ph wrap --auto-evolve claw -p "Write integration tests for payments" # Claw Code
306
- ph wrap --auto-evolve codex "Add retry logic to the API client" # Codex
306
+ ph wrap --auto-evolve codex exec "Add retry logic to the API client" # Codex
307
307
  ph wrap --auto-evolve hermes chat -q "Refactor the DB connection pool" # Hermes Agent
308
- ph wrap --auto-evolve opencode -p "Fix the flaky parser test" # OpenCode
308
+ ph wrap --auto-evolve opencode run "Fix the flaky parser test" # OpenCode
309
309
 
310
310
  # Local models — wrap the CLI command directly
311
311
  ph wrap --auto-evolve ollama run gemma3 "Summarize this document" # Ollama
@@ -373,9 +373,9 @@ After that, just use your agent as usual:
373
373
  ```bash
374
374
  claude -p "Refactor auth to JWT" # automatically becomes: ph wrap --auto-evolve claude -p ...
375
375
  claw -p "Write payment tests" # same — auto-wrapped
376
- codex "Add retry logic" # same
376
+ codex exec "Add retry logic" # same
377
377
  hermes chat -q "Refactor pool" # same
378
- opencode -p "Fix flaky test" # same
378
+ opencode run "Fix flaky test" # same
379
379
  ```
380
380
 
381
381
  How it works: a `preexec` hook in your shell detects `claude`/`claw`/`codex`/`hermes`/`opencode` commands and transparently redirects them through `ph wrap --auto-evolve`. Your output is unchanged.
@@ -466,9 +466,9 @@ The Proposer reads **all of this** before generating the next candidate. It can
466
466
  | `openai` | — | OpenAI-compatible API (Ollama, vLLM, LM Studio, etc). Needs `OPENAI_API_KEY` |
467
467
  | `claude-code` | `claude -p` | Official Claude Code CLI (Pro/Teams subscription) |
468
468
  | `claw-code` | `claw -p` | Open-source Claw Code CLI |
469
- | `codex` | `codex --quiet` | OpenAI Codex CLI |
469
+ | `codex` | `codex exec` | OpenAI Codex CLI |
470
470
  | `hermes` | `hermes chat -q` | Nous Research [Hermes Agent](https://github.com/NousResearch/hermes-agent) CLI |
471
- | `opencode` | `opencode -p` | OpenCode CLI |
471
+ | `opencode` | `opencode run` | OpenCode CLI |
472
472
  | `local` | — | Offline rule-based engine for development & testing |
473
473
 
474
474
  `ph doctor` auto-detects all available backends and shows their status.
@@ -543,7 +543,7 @@ proposer:
543
543
  backend: api # api | openai | claude-code | claw-code | codex | hermes | opencode | local
544
544
  ensemble: [] # If non-empty, pick among these backends per iteration via a UCB bandit
545
545
  bandit_c: 1.41421356 # UCB exploration constant (higher = more exploration)
546
- model: claude-sonnet-4-20250514 # Model name (for api/openai backends)
546
+ model: claude-sonnet-4-6 # Model name (for api/openai backends)
547
547
  base_url: null # Custom API endpoint (for openai backend)
548
548
  api_key: null # API key override (null = use env var)
549
549
  max_tokens: 16384 # Max output tokens per proposer turn
@@ -772,9 +772,9 @@ polyharness/
772
772
  │ │ └── adapters/ # Per-agent CLI adapters
773
773
  │ │ ├── claude_code.py # claude -p
774
774
  │ │ ├── claw_code.py # claw -p
775
- │ │ ├── codex.py # codex --quiet --auto-edit
775
+ │ │ ├── codex.py # codex exec
776
776
  │ │ ├── hermes.py # hermes chat -q
777
- │ │ └── opencode.py # opencode -p
777
+ │ │ └── opencode.py # opencode run
778
778
  │ └── templates/ # 5 built-in task templates
779
779
  │ ├── text-classification/
780
780
  │ ├── math-word-problems/
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "polyharness"
7
- version = "0.2.2"
7
+ version = "0.2.3"
8
8
  description = "Automated harness optimization for AI agents — make your agent evolve."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -1,3 +1,3 @@
1
1
  """PolyHarness — Automated harness optimization for AI agents."""
2
2
 
3
- __version__ = "0.2.2"
3
+ __version__ = "0.2.3"
@@ -84,7 +84,8 @@ class ProposerConfig(BaseModel):
84
84
  description="UCB exploration constant for ensemble selection. Higher = more exploration.",
85
85
  )
86
86
  model: str = Field(
87
- default="claude-sonnet-4-20250514", description="Model for the Proposer agent."
87
+ default="claude-sonnet-4-6",
88
+ description="Model for the Proposer agent (api/openai backends; CLI backends use their own).",
88
89
  )
89
90
  base_url: str | None = Field(
90
91
  default=None, description="Optional base URL for the API (useful for local models)."
@@ -0,0 +1,43 @@
1
+ """Claude Code CLI adapter.
2
+
3
+ Invokes the official `claude` CLI in print mode (-p).
4
+ Requires an active Claude Code subscription.
5
+
6
+ Verified against Claude Code (May 2026):
7
+ - `-p` headless mode and `--output-format text` are current.
8
+ - `--permission-mode acceptEdits` is REQUIRED for the agent to write files
9
+ non-interactively (auto-approves Read/Edit/Write); without it, headless edits
10
+ are blocked. `acceptEdits` still gates arbitrary Bash/network (least-privilege,
11
+ appropriate for the isolated workspace).
12
+ - `--model claude-opus-4-7` pins to Opus 4.7 (full name for reproducibility).
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from polyharness.proposer.adapters.base import CLIAdapter
18
+
19
+ # Pinned Proposer model for the Claude Code backend (highest-capability).
20
+ CLAUDE_CODE_MODEL = "claude-opus-4-7"
21
+
22
+
23
+ class ClaudeCodeAdapter(CLIAdapter):
24
+ """Adapter for the Claude Code CLI (`claude`)."""
25
+
26
+ @property
27
+ def name(self) -> str:
28
+ return "claude-code"
29
+
30
+ @property
31
+ def default_binary(self) -> str:
32
+ return "claude"
33
+
34
+ def build_command(self, prompt: str, *, cli_path: str | None = None) -> list[str]:
35
+ binary = cli_path or self.default_binary
36
+ return [
37
+ binary,
38
+ "-p", # print mode (non-interactive)
39
+ prompt,
40
+ "--model", CLAUDE_CODE_MODEL, # pin to Opus 4.7
41
+ "--permission-mode", "acceptEdits", # auto-approve file edits (headless)
42
+ "--output-format", "text",
43
+ ]
@@ -1,6 +1,8 @@
1
1
  """Codex CLI adapter.
2
2
 
3
- Invokes OpenAI's `codex` CLI agent in quiet/non-interactive mode.
3
+ Invokes OpenAI's `codex` CLI agent in headless/non-interactive mode via
4
+ `codex exec` (the old `--quiet`/`--auto-edit` flags were removed upstream).
5
+ See: developers.openai.com/codex/noninteractive
4
6
  """
5
7
 
6
8
  from __future__ import annotations
@@ -23,7 +25,8 @@ class CodexAdapter(CLIAdapter):
23
25
  binary = cli_path or self.default_binary
24
26
  return [
25
27
  binary,
26
- "--quiet",
27
- "--auto-edit", # allow file edits without confirmation
28
+ "exec", # headless, non-interactive mode
29
+ "--skip-git-repo-check", # the workspace is not a git repo
30
+ "--sandbox", "workspace-write", # allow edits within the workspace cwd
28
31
  prompt,
29
32
  ]
@@ -1,6 +1,8 @@
1
1
  """OpenCode CLI adapter.
2
2
 
3
- Invokes the open-source `opencode` CLI agent.
3
+ Invokes the open-source `opencode` CLI agent in non-interactive mode via the
4
+ `run` subcommand (the old top-level `-p` flag is no longer supported upstream).
5
+ See: opencode.ai/docs/cli
4
6
  """
5
7
 
6
8
  from __future__ import annotations
@@ -23,6 +25,6 @@ class OpenCodeAdapter(CLIAdapter):
23
25
  binary = cli_path or self.default_binary
24
26
  return [
25
27
  binary,
26
- "-p", # prompt mode
28
+ "run", # non-interactive mode (replaces old -p)
27
29
  prompt,
28
30
  ]
@@ -123,7 +123,7 @@ class APIProposer(BaseProposer):
123
123
 
124
124
  def __init__(
125
125
  self,
126
- model: str = "claude-sonnet-4-20250514",
126
+ model: str = "claude-sonnet-4-6",
127
127
  max_tokens: int = 16384,
128
128
  temperature: float = 0.7,
129
129
  ):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: polyharness
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Automated harness optimization for AI agents — make your agent evolve.
5
5
  Author: weijt606
6
6
  License-Expression: MIT
@@ -336,9 +336,9 @@ Just add `ph wrap --auto-evolve` in front of your agent command (pick the one ma
336
336
  # CLI agent backends — wrap the agent you already use
337
337
  ph wrap --auto-evolve claude -p "Refactor the auth module to use JWT" # Claude Code
338
338
  ph wrap --auto-evolve claw -p "Write integration tests for payments" # Claw Code
339
- ph wrap --auto-evolve codex "Add retry logic to the API client" # Codex
339
+ ph wrap --auto-evolve codex exec "Add retry logic to the API client" # Codex
340
340
  ph wrap --auto-evolve hermes chat -q "Refactor the DB connection pool" # Hermes Agent
341
- ph wrap --auto-evolve opencode -p "Fix the flaky parser test" # OpenCode
341
+ ph wrap --auto-evolve opencode run "Fix the flaky parser test" # OpenCode
342
342
 
343
343
  # Local models — wrap the CLI command directly
344
344
  ph wrap --auto-evolve ollama run gemma3 "Summarize this document" # Ollama
@@ -406,9 +406,9 @@ After that, just use your agent as usual:
406
406
  ```bash
407
407
  claude -p "Refactor auth to JWT" # automatically becomes: ph wrap --auto-evolve claude -p ...
408
408
  claw -p "Write payment tests" # same — auto-wrapped
409
- codex "Add retry logic" # same
409
+ codex exec "Add retry logic" # same
410
410
  hermes chat -q "Refactor pool" # same
411
- opencode -p "Fix flaky test" # same
411
+ opencode run "Fix flaky test" # same
412
412
  ```
413
413
 
414
414
  How it works: a `preexec` hook in your shell detects `claude`/`claw`/`codex`/`hermes`/`opencode` commands and transparently redirects them through `ph wrap --auto-evolve`. Your output is unchanged.
@@ -499,9 +499,9 @@ The Proposer reads **all of this** before generating the next candidate. It can
499
499
  | `openai` | — | OpenAI-compatible API (Ollama, vLLM, LM Studio, etc). Needs `OPENAI_API_KEY` |
500
500
  | `claude-code` | `claude -p` | Official Claude Code CLI (Pro/Teams subscription) |
501
501
  | `claw-code` | `claw -p` | Open-source Claw Code CLI |
502
- | `codex` | `codex --quiet` | OpenAI Codex CLI |
502
+ | `codex` | `codex exec` | OpenAI Codex CLI |
503
503
  | `hermes` | `hermes chat -q` | Nous Research [Hermes Agent](https://github.com/NousResearch/hermes-agent) CLI |
504
- | `opencode` | `opencode -p` | OpenCode CLI |
504
+ | `opencode` | `opencode run` | OpenCode CLI |
505
505
  | `local` | — | Offline rule-based engine for development & testing |
506
506
 
507
507
  `ph doctor` auto-detects all available backends and shows their status.
@@ -576,7 +576,7 @@ proposer:
576
576
  backend: api # api | openai | claude-code | claw-code | codex | hermes | opencode | local
577
577
  ensemble: [] # If non-empty, pick among these backends per iteration via a UCB bandit
578
578
  bandit_c: 1.41421356 # UCB exploration constant (higher = more exploration)
579
- model: claude-sonnet-4-20250514 # Model name (for api/openai backends)
579
+ model: claude-sonnet-4-6 # Model name (for api/openai backends)
580
580
  base_url: null # Custom API endpoint (for openai backend)
581
581
  api_key: null # API key override (null = use env var)
582
582
  max_tokens: 16384 # Max output tokens per proposer turn
@@ -805,9 +805,9 @@ polyharness/
805
805
  │ │ └── adapters/ # Per-agent CLI adapters
806
806
  │ │ ├── claude_code.py # claude -p
807
807
  │ │ ├── claw_code.py # claw -p
808
- │ │ ├── codex.py # codex --quiet --auto-edit
808
+ │ │ ├── codex.py # codex exec
809
809
  │ │ ├── hermes.py # hermes chat -q
810
- │ │ └── opencode.py # opencode -p
810
+ │ │ └── opencode.py # opencode run
811
811
  │ └── templates/ # 5 built-in task templates
812
812
  │ ├── text-classification/
813
813
  │ ├── math-word-problems/
@@ -50,6 +50,14 @@ def test_claude_code_command():
50
50
  assert cmd[0] == "claude"
51
51
  assert "-p" in cmd
52
52
  assert "do stuff" in cmd
53
+ # Pinned to Opus 4.7
54
+ assert "--model" in cmd
55
+ assert "claude-opus-4-7" in cmd
56
+ # Headless edits must be auto-approved or the agent can't write candidates
57
+ assert "--permission-mode" in cmd
58
+ assert "acceptEdits" in cmd
59
+ # --verbose is noise in print mode; should be gone
60
+ assert "--verbose" not in cmd
53
61
 
54
62
 
55
63
  def test_claude_code_custom_path():
@@ -70,7 +78,9 @@ def test_codex_command():
70
78
  adapter = CodexAdapter()
71
79
  cmd = adapter.build_command("fix it")
72
80
  assert cmd[0] == "codex"
73
- assert "--quiet" in cmd
81
+ assert "exec" in cmd # headless mode (replaces old --quiet)
82
+ assert "--skip-git-repo-check" in cmd # workspace isn't a git repo
83
+ assert "--quiet" not in cmd # removed upstream
74
84
  assert "fix it" in cmd
75
85
 
76
86
 
@@ -78,6 +88,8 @@ def test_opencode_command():
78
88
  adapter = OpenCodeAdapter()
79
89
  cmd = adapter.build_command("optimize")
80
90
  assert cmd[0] == "opencode"
91
+ assert "run" in cmd # non-interactive subcommand (replaces old -p)
92
+ assert "-p" not in cmd # no longer supported upstream
81
93
  assert "optimize" in cmd
82
94
 
83
95
 
@@ -1,31 +0,0 @@
1
- """Claude Code CLI adapter.
2
-
3
- Invokes the official `claude` CLI in print mode (-p).
4
- Requires an active Claude Code subscription.
5
- """
6
-
7
- from __future__ import annotations
8
-
9
- from polyharness.proposer.adapters.base import CLIAdapter
10
-
11
-
12
- class ClaudeCodeAdapter(CLIAdapter):
13
- """Adapter for the Claude Code CLI (`claude`)."""
14
-
15
- @property
16
- def name(self) -> str:
17
- return "claude-code"
18
-
19
- @property
20
- def default_binary(self) -> str:
21
- return "claude"
22
-
23
- def build_command(self, prompt: str, *, cli_path: str | None = None) -> list[str]:
24
- binary = cli_path or self.default_binary
25
- return [
26
- binary,
27
- "-p", # print mode (non-interactive, stdout output)
28
- prompt,
29
- "--output-format", "text",
30
- "--verbose",
31
- ]
File without changes
File without changes