PyPI - polyharness - Versions diffs - 0.2.2__tar.gz → 0.2.3__tar.gz - Mend

polyharness 0.2.2tar.gz → 0.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

{polyharness-0.2.2/src/polyharness.egg-info → polyharness-0.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: polyharness
-Version: 0.2.2
+Version: 0.2.3
 Summary: Automated harness optimization for AI agents — make your agent evolve.
 Author: weijt606
 License-Expression: MIT
@@ -336,9 +336,9 @@ Just add `ph wrap --auto-evolve` in front of your agent command (pick the one ma
 # CLI agent backends — wrap the agent you already use
 ph wrap --auto-evolve claude -p "Refactor the auth module to use JWT"   # Claude Code
 ph wrap --auto-evolve claw -p "Write integration tests for payments"     # Claw Code
-ph wrap --auto-evolve codex "Add retry logic to the API client"          # Codex
+ph wrap --auto-evolve codex exec "Add retry logic to the API client"          # Codex
 ph wrap --auto-evolve hermes chat -q "Refactor the DB connection pool"   # Hermes Agent
-ph wrap --auto-evolve opencode -p "Fix the flaky parser test"            # OpenCode
+ph wrap --auto-evolve opencode run "Fix the flaky parser test"            # OpenCode
 # Local models — wrap the CLI command directly
 ph wrap --auto-evolve ollama run gemma3 "Summarize this document"         # Ollama
@@ -406,9 +406,9 @@ After that, just use your agent as usual:
 ```bash
 claude -p "Refactor auth to JWT"        # automatically becomes: ph wrap --auto-evolve claude -p ...
 claw -p "Write payment tests"            # same — auto-wrapped
-codex "Add retry logic"                  # same
+codex exec "Add retry logic"                  # same
 hermes chat -q "Refactor pool"           # same
-opencode -p "Fix flaky test"             # same
+opencode run "Fix flaky test"             # same
 ```
 How it works: a `preexec` hook in your shell detects `claude`/`claw`/`codex`/`hermes`/`opencode` commands and transparently redirects them through `ph wrap --auto-evolve`. Your output is unchanged.
@@ -499,9 +499,9 @@ The Proposer reads **all of this** before generating the next candidate. It can
 | `openai` | — | OpenAI-compatible API (Ollama, vLLM, LM Studio, etc). Needs `OPENAI_API_KEY` |
 | `claude-code` | `claude -p` | Official Claude Code CLI (Pro/Teams subscription) |
 | `claw-code` | `claw -p` | Open-source Claw Code CLI |
-| `codex` | `codex --quiet` | OpenAI Codex CLI |
+| `codex` | `codex exec` | OpenAI Codex CLI |
 | `hermes` | `hermes chat -q` | Nous Research [Hermes Agent](https://github.com/NousResearch/hermes-agent) CLI |
-| `opencode` | `opencode -p` | OpenCode CLI |
+| `opencode` | `opencode run` | OpenCode CLI |
 | `local` | — | Offline rule-based engine for development & testing |
 `ph doctor` auto-detects all available backends and shows their status.
@@ -576,7 +576,7 @@ proposer:
   backend: api                 # api | openai | claude-code | claw-code | codex | hermes | opencode | local
   ensemble: []                 # If non-empty, pick among these backends per iteration via a UCB bandit
   bandit_c: 1.41421356         # UCB exploration constant (higher = more exploration)
-  model: claude-sonnet-4-20250514  # Model name (for api/openai backends)
+  model: claude-sonnet-4-6  # Model name (for api/openai backends)
   base_url: null               # Custom API endpoint (for openai backend)
   api_key: null                # API key override (null = use env var)
   max_tokens: 16384            # Max output tokens per proposer turn
@@ -805,9 +805,9 @@ polyharness/
 │   │   └── adapters/            # Per-agent CLI adapters
 │   │       ├── claude_code.py   # claude -p
 │   │       ├── claw_code.py     # claw -p
-│   │       ├── codex.py         # codex --quiet --auto-edit
+│   │       ├── codex.py         # codex exec
 │   │       ├── hermes.py        # hermes chat -q
-│   │       └── opencode.py      # opencode -p
+│   │       └── opencode.py      # opencode run
 │   └── templates/               # 5 built-in task templates
 │       ├── text-classification/
 │       ├── math-word-problems/

{polyharness-0.2.2 → polyharness-0.2.3}/README.md RENAMED Viewed

@@ -303,9 +303,9 @@ Just add `ph wrap --auto-evolve` in front of your agent command (pick the one ma
 # CLI agent backends — wrap the agent you already use
 ph wrap --auto-evolve claude -p "Refactor the auth module to use JWT"   # Claude Code
 ph wrap --auto-evolve claw -p "Write integration tests for payments"     # Claw Code
-ph wrap --auto-evolve codex "Add retry logic to the API client"          # Codex
+ph wrap --auto-evolve codex exec "Add retry logic to the API client"          # Codex
 ph wrap --auto-evolve hermes chat -q "Refactor the DB connection pool"   # Hermes Agent
-ph wrap --auto-evolve opencode -p "Fix the flaky parser test"            # OpenCode
+ph wrap --auto-evolve opencode run "Fix the flaky parser test"            # OpenCode
 # Local models — wrap the CLI command directly
 ph wrap --auto-evolve ollama run gemma3 "Summarize this document"         # Ollama
@@ -373,9 +373,9 @@ After that, just use your agent as usual:
 ```bash
 claude -p "Refactor auth to JWT"        # automatically becomes: ph wrap --auto-evolve claude -p ...
 claw -p "Write payment tests"            # same — auto-wrapped
-codex "Add retry logic"                  # same
+codex exec "Add retry logic"                  # same
 hermes chat -q "Refactor pool"           # same
-opencode -p "Fix flaky test"             # same
+opencode run "Fix flaky test"             # same
 ```
 How it works: a `preexec` hook in your shell detects `claude`/`claw`/`codex`/`hermes`/`opencode` commands and transparently redirects them through `ph wrap --auto-evolve`. Your output is unchanged.
@@ -466,9 +466,9 @@ The Proposer reads **all of this** before generating the next candidate. It can
 | `openai` | — | OpenAI-compatible API (Ollama, vLLM, LM Studio, etc). Needs `OPENAI_API_KEY` |
 | `claude-code` | `claude -p` | Official Claude Code CLI (Pro/Teams subscription) |
 | `claw-code` | `claw -p` | Open-source Claw Code CLI |
-| `codex` | `codex --quiet` | OpenAI Codex CLI |
+| `codex` | `codex exec` | OpenAI Codex CLI |
 | `hermes` | `hermes chat -q` | Nous Research [Hermes Agent](https://github.com/NousResearch/hermes-agent) CLI |
-| `opencode` | `opencode -p` | OpenCode CLI |
+| `opencode` | `opencode run` | OpenCode CLI |
 | `local` | — | Offline rule-based engine for development & testing |
 `ph doctor` auto-detects all available backends and shows their status.
@@ -543,7 +543,7 @@ proposer:
   backend: api                 # api | openai | claude-code | claw-code | codex | hermes | opencode | local
   ensemble: []                 # If non-empty, pick among these backends per iteration via a UCB bandit
   bandit_c: 1.41421356         # UCB exploration constant (higher = more exploration)
-  model: claude-sonnet-4-20250514  # Model name (for api/openai backends)
+  model: claude-sonnet-4-6  # Model name (for api/openai backends)
   base_url: null               # Custom API endpoint (for openai backend)
   api_key: null                # API key override (null = use env var)
   max_tokens: 16384            # Max output tokens per proposer turn
@@ -772,9 +772,9 @@ polyharness/
 │   │   └── adapters/            # Per-agent CLI adapters
 │   │       ├── claude_code.py   # claude -p
 │   │       ├── claw_code.py     # claw -p
-│   │       ├── codex.py         # codex --quiet --auto-edit
+│   │       ├── codex.py         # codex exec
 │   │       ├── hermes.py        # hermes chat -q
-│   │       └── opencode.py      # opencode -p
+│   │       └── opencode.py      # opencode run
 │   └── templates/               # 5 built-in task templates
 │       ├── text-classification/
 │       ├── math-word-problems/

{polyharness-0.2.2 → polyharness-0.2.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "polyharness"
-version = "0.2.2"
+version = "0.2.3"
 description = "Automated harness optimization for AI agents — make your agent evolve."
 readme = "README.md"
 license = "MIT"

{polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """PolyHarness — Automated harness optimization for AI agents."""
-__version__ = "0.2.2"
+__version__ = "0.2.3"

{polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/config.py RENAMED Viewed

@@ -84,7 +84,8 @@ class ProposerConfig(BaseModel):
         description="UCB exploration constant for ensemble selection. Higher = more exploration.",
     )
     model: str = Field(
-        default="claude-sonnet-4-20250514", description="Model for the Proposer agent."
+        default="claude-sonnet-4-6",
+        description="Model for the Proposer agent (api/openai backends; CLI backends use their own).",
     )
     base_url: str | None = Field(
         default=None, description="Optional base URL for the API (useful for local models)."

polyharness-0.2.3/src/polyharness/proposer/adapters/claude_code.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""Claude Code CLI adapter.
+Invokes the official `claude` CLI in print mode (-p).
+Requires an active Claude Code subscription.
+Verified against Claude Code (May 2026):
+- `-p` headless mode and `--output-format text` are current.
+- `--permission-mode acceptEdits` is REQUIRED for the agent to write files
+  non-interactively (auto-approves Read/Edit/Write); without it, headless edits
+  are blocked. `acceptEdits` still gates arbitrary Bash/network (least-privilege,
+  appropriate for the isolated workspace).
+- `--model claude-opus-4-7` pins to Opus 4.7 (full name for reproducibility).
+"""
+from __future__ import annotations
+from polyharness.proposer.adapters.base import CLIAdapter
+# Pinned Proposer model for the Claude Code backend (highest-capability).
+CLAUDE_CODE_MODEL = "claude-opus-4-7"
+class ClaudeCodeAdapter(CLIAdapter):
+    """Adapter for the Claude Code CLI (`claude`)."""
+    @property
+    def name(self) -> str:
+        return "claude-code"
+    @property
+    def default_binary(self) -> str:
+        return "claude"
+    def build_command(self, prompt: str, *, cli_path: str | None = None) -> list[str]:
+        binary = cli_path or self.default_binary
+        return [
+            binary,
+            "-p",                                # print mode (non-interactive)
+            prompt,
+            "--model", CLAUDE_CODE_MODEL,        # pin to Opus 4.7
+            "--permission-mode", "acceptEdits",  # auto-approve file edits (headless)
+            "--output-format", "text",
+        ]

{polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/adapters/codex.py RENAMED Viewed

@@ -1,6 +1,8 @@
 """Codex CLI adapter.
-Invokes OpenAI's `codex` CLI agent in quiet/non-interactive mode.
+Invokes OpenAI's `codex` CLI agent in headless/non-interactive mode via
+`codex exec` (the old `--quiet`/`--auto-edit` flags were removed upstream).
+See: developers.openai.com/codex/noninteractive
 """
 from __future__ import annotations
@@ -23,7 +25,8 @@ class CodexAdapter(CLIAdapter):
         binary = cli_path or self.default_binary
         return [
             binary,
-            "--quiet",
-            "--auto-edit",       # allow file edits without confirmation
+            "exec",                          # headless, non-interactive mode
+            "--skip-git-repo-check",         # the workspace is not a git repo
+            "--sandbox", "workspace-write",  # allow edits within the workspace cwd
             prompt,
         ]

{polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/adapters/opencode.py RENAMED Viewed

@@ -1,6 +1,8 @@
 """OpenCode CLI adapter.
-Invokes the open-source `opencode` CLI agent.
+Invokes the open-source `opencode` CLI agent in non-interactive mode via the
+`run` subcommand (the old top-level `-p` flag is no longer supported upstream).
+See: opencode.ai/docs/cli
 """
 from __future__ import annotations
@@ -23,6 +25,6 @@ class OpenCodeAdapter(CLIAdapter):
         binary = cli_path or self.default_binary
         return [
             binary,
-            "-p",                # prompt mode
+            "run",               # non-interactive mode (replaces old -p)
             prompt,
         ]

{polyharness-0.2.2 → polyharness-0.2.3}/src/polyharness/proposer/api_proposer.py RENAMED Viewed

@@ -123,7 +123,7 @@ class APIProposer(BaseProposer):
     def __init__(
         self,
-        model: str = "claude-sonnet-4-20250514",
+        model: str = "claude-sonnet-4-6",
         max_tokens: int = 16384,
         temperature: float = 0.7,
     ):

{polyharness-0.2.2 → polyharness-0.2.3/src/polyharness.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: polyharness
-Version: 0.2.2
+Version: 0.2.3
 Summary: Automated harness optimization for AI agents — make your agent evolve.
 Author: weijt606
 License-Expression: MIT
@@ -336,9 +336,9 @@ Just add `ph wrap --auto-evolve` in front of your agent command (pick the one ma
 # CLI agent backends — wrap the agent you already use
 ph wrap --auto-evolve claude -p "Refactor the auth module to use JWT"   # Claude Code
 ph wrap --auto-evolve claw -p "Write integration tests for payments"     # Claw Code
-ph wrap --auto-evolve codex "Add retry logic to the API client"          # Codex
+ph wrap --auto-evolve codex exec "Add retry logic to the API client"          # Codex
 ph wrap --auto-evolve hermes chat -q "Refactor the DB connection pool"   # Hermes Agent
-ph wrap --auto-evolve opencode -p "Fix the flaky parser test"            # OpenCode
+ph wrap --auto-evolve opencode run "Fix the flaky parser test"            # OpenCode
 # Local models — wrap the CLI command directly
 ph wrap --auto-evolve ollama run gemma3 "Summarize this document"         # Ollama
@@ -406,9 +406,9 @@ After that, just use your agent as usual:
 ```bash
 claude -p "Refactor auth to JWT"        # automatically becomes: ph wrap --auto-evolve claude -p ...
 claw -p "Write payment tests"            # same — auto-wrapped
-codex "Add retry logic"                  # same
+codex exec "Add retry logic"                  # same
 hermes chat -q "Refactor pool"           # same
-opencode -p "Fix flaky test"             # same
+opencode run "Fix flaky test"             # same
 ```
 How it works: a `preexec` hook in your shell detects `claude`/`claw`/`codex`/`hermes`/`opencode` commands and transparently redirects them through `ph wrap --auto-evolve`. Your output is unchanged.
@@ -499,9 +499,9 @@ The Proposer reads **all of this** before generating the next candidate. It can
 | `openai` | — | OpenAI-compatible API (Ollama, vLLM, LM Studio, etc). Needs `OPENAI_API_KEY` |
 | `claude-code` | `claude -p` | Official Claude Code CLI (Pro/Teams subscription) |
 | `claw-code` | `claw -p` | Open-source Claw Code CLI |
-| `codex` | `codex --quiet` | OpenAI Codex CLI |
+| `codex` | `codex exec` | OpenAI Codex CLI |
 | `hermes` | `hermes chat -q` | Nous Research [Hermes Agent](https://github.com/NousResearch/hermes-agent) CLI |
-| `opencode` | `opencode -p` | OpenCode CLI |
+| `opencode` | `opencode run` | OpenCode CLI |
 | `local` | — | Offline rule-based engine for development & testing |
 `ph doctor` auto-detects all available backends and shows their status.
@@ -576,7 +576,7 @@ proposer:
   backend: api                 # api | openai | claude-code | claw-code | codex | hermes | opencode | local
   ensemble: []                 # If non-empty, pick among these backends per iteration via a UCB bandit
   bandit_c: 1.41421356         # UCB exploration constant (higher = more exploration)
-  model: claude-sonnet-4-20250514  # Model name (for api/openai backends)
+  model: claude-sonnet-4-6  # Model name (for api/openai backends)
   base_url: null               # Custom API endpoint (for openai backend)
   api_key: null                # API key override (null = use env var)
   max_tokens: 16384            # Max output tokens per proposer turn
@@ -805,9 +805,9 @@ polyharness/
 │   │   └── adapters/            # Per-agent CLI adapters
 │   │       ├── claude_code.py   # claude -p
 │   │       ├── claw_code.py     # claw -p
-│   │       ├── codex.py         # codex --quiet --auto-edit
+│   │       ├── codex.py         # codex exec
 │   │       ├── hermes.py        # hermes chat -q
-│   │       └── opencode.py      # opencode -p
+│   │       └── opencode.py      # opencode run
 │   └── templates/               # 5 built-in task templates
 │       ├── text-classification/
 │       ├── math-word-problems/

{polyharness-0.2.2 → polyharness-0.2.3}/tests/test_cli_adapters.py RENAMED Viewed

@@ -50,6 +50,14 @@ def test_claude_code_command():
     assert cmd[0] == "claude"
     assert "-p" in cmd
     assert "do stuff" in cmd
+    # Pinned to Opus 4.7
+    assert "--model" in cmd
+    assert "claude-opus-4-7" in cmd
+    # Headless edits must be auto-approved or the agent can't write candidates
+    assert "--permission-mode" in cmd
+    assert "acceptEdits" in cmd
+    # --verbose is noise in print mode; should be gone
+    assert "--verbose" not in cmd
 def test_claude_code_custom_path():
@@ -70,7 +78,9 @@ def test_codex_command():
     adapter = CodexAdapter()
     cmd = adapter.build_command("fix it")
     assert cmd[0] == "codex"
-    assert "--quiet" in cmd
+    assert "exec" in cmd                    # headless mode (replaces old --quiet)
+    assert "--skip-git-repo-check" in cmd   # workspace isn't a git repo
+    assert "--quiet" not in cmd             # removed upstream
     assert "fix it" in cmd
@@ -78,6 +88,8 @@ def test_opencode_command():
     adapter = OpenCodeAdapter()
     cmd = adapter.build_command("optimize")
     assert cmd[0] == "opencode"
+    assert "run" in cmd          # non-interactive subcommand (replaces old -p)
+    assert "-p" not in cmd       # no longer supported upstream
     assert "optimize" in cmd

polyharness-0.2.2/src/polyharness/proposer/adapters/claude_code.py DELETED Viewed

@@ -1,31 +0,0 @@
-"""Claude Code CLI adapter.
-Invokes the official `claude` CLI in print mode (-p).
-Requires an active Claude Code subscription.
-"""
-from __future__ import annotations
-from polyharness.proposer.adapters.base import CLIAdapter
-class ClaudeCodeAdapter(CLIAdapter):
-    """Adapter for the Claude Code CLI (`claude`)."""
-    @property
-    def name(self) -> str:
-        return "claude-code"
-    @property
-    def default_binary(self) -> str:
-        return "claude"
-    def build_command(self, prompt: str, *, cli_path: str | None = None) -> list[str]:
-        binary = cli_path or self.default_binary
-        return [
-            binary,
-            "-p",                # print mode (non-interactive, stdout output)
-            prompt,
-            "--output-format", "text",
-            "--verbose",
-        ]