cih-agent 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cih_agent-0.1.0/LICENSE +21 -0
- cih_agent-0.1.0/PKG-INFO +148 -0
- cih_agent-0.1.0/README.md +121 -0
- cih_agent-0.1.0/cih/__init__.py +1 -0
- cih_agent-0.1.0/cih/agents.py +57 -0
- cih_agent-0.1.0/cih/attempts.py +61 -0
- cih_agent-0.1.0/cih/config.py +81 -0
- cih_agent-0.1.0/cih/contracts.py +32 -0
- cih_agent-0.1.0/cih/integration.py +192 -0
- cih_agent-0.1.0/cih/ledger.py +102 -0
- cih_agent-0.1.0/cih/merge_queue.py +37 -0
- cih_agent-0.1.0/cih/orchestrator.py +233 -0
- cih_agent-0.1.0/cih/progress.py +16 -0
- cih_agent-0.1.0/cih/report.py +176 -0
- cih_agent-0.1.0/cih/roles.py +59 -0
- cih_agent-0.1.0/cih/runner.py +80 -0
- cih_agent-0.1.0/cih/safety.py +68 -0
- cih_agent-0.1.0/cih/staging.py +65 -0
- cih_agent-0.1.0/cih/state.py +46 -0
- cih_agent-0.1.0/cih/tdd_verifier.py +131 -0
- cih_agent-0.1.0/cih/team.py +78 -0
- cih_agent-0.1.0/cih/transitions.py +32 -0
- cih_agent-0.1.0/cih/worktree.py +39 -0
- cih_agent-0.1.0/cih_agent.egg-info/PKG-INFO +148 -0
- cih_agent-0.1.0/cih_agent.egg-info/SOURCES.txt +54 -0
- cih_agent-0.1.0/cih_agent.egg-info/dependency_links.txt +1 -0
- cih_agent-0.1.0/cih_agent.egg-info/entry_points.txt +2 -0
- cih_agent-0.1.0/cih_agent.egg-info/requires.txt +6 -0
- cih_agent-0.1.0/cih_agent.egg-info/top_level.txt +1 -0
- cih_agent-0.1.0/pyproject.toml +41 -0
- cih_agent-0.1.0/setup.cfg +4 -0
- cih_agent-0.1.0/tests/test_agents.py +28 -0
- cih_agent-0.1.0/tests/test_attempts.py +32 -0
- cih_agent-0.1.0/tests/test_claude_cli_runner.py +53 -0
- cih_agent-0.1.0/tests/test_config.py +77 -0
- cih_agent-0.1.0/tests/test_conformance.py +29 -0
- cih_agent-0.1.0/tests/test_contracts.py +40 -0
- cih_agent-0.1.0/tests/test_e2e_smoke.py +80 -0
- cih_agent-0.1.0/tests/test_integration.py +286 -0
- cih_agent-0.1.0/tests/test_ledger.py +68 -0
- cih_agent-0.1.0/tests/test_merge_queue.py +36 -0
- cih_agent-0.1.0/tests/test_orchestrator.py +299 -0
- cih_agent-0.1.0/tests/test_progress.py +30 -0
- cih_agent-0.1.0/tests/test_report.py +177 -0
- cih_agent-0.1.0/tests/test_resume.py +114 -0
- cih_agent-0.1.0/tests/test_roles.py +30 -0
- cih_agent-0.1.0/tests/test_runner_cli.py +162 -0
- cih_agent-0.1.0/tests/test_safety.py +90 -0
- cih_agent-0.1.0/tests/test_scaffold.py +5 -0
- cih_agent-0.1.0/tests/test_skill_doc.py +13 -0
- cih_agent-0.1.0/tests/test_staging.py +48 -0
- cih_agent-0.1.0/tests/test_state.py +36 -0
- cih_agent-0.1.0/tests/test_tdd_verifier.py +119 -0
- cih_agent-0.1.0/tests/test_team.py +87 -0
- cih_agent-0.1.0/tests/test_transitions.py +32 -0
- cih_agent-0.1.0/tests/test_worktree.py +39 -0
cih_agent-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Huijo Kim
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
cih_agent-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cih-agent
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A hierarchical multi-agent harness that autonomously audits a codebase, finds high-value improvements, and applies them in TDD-gated iterations.
|
|
5
|
+
Author-email: Huijo Kim <huijo.kim@voids.ai>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ccomkhj/continuous-improvement-harness
|
|
8
|
+
Project-URL: Repository, https://github.com/ccomkhj/continuous-improvement-harness
|
|
9
|
+
Project-URL: Issues, https://github.com/ccomkhj/continuous-improvement-harness/issues
|
|
10
|
+
Keywords: agents,tdd,code-improvement,claude,automation
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: jsonschema>=4.0
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
24
|
+
Requires-Dist: build>=1.0; extra == "dev"
|
|
25
|
+
Requires-Dist: twine>=5.0; extra == "dev"
|
|
26
|
+
Dynamic: license-file
|
|
27
|
+
|
|
28
|
+
# Continuous Improvement Harness (CIH)
|
|
29
|
+
|
|
30
|
+
A hierarchical multi-agent harness that autonomously **audits a target codebase, finds
|
|
31
|
+
high-value improvements, and applies them in TDD-gated iterations** — runnable both as a
|
|
32
|
+
headless Python runner and as an interactive Claude Code skill, over one shared on-disk JSON
|
|
33
|
+
state format.
|
|
34
|
+
|
|
35
|
+
The target repo is always a **separate parameter** from the harness itself. CIH never pushes,
|
|
36
|
+
never stages files implicitly, and does all work in disposable per-team git worktrees.
|
|
37
|
+
|
|
38
|
+
## How it works
|
|
39
|
+
|
|
40
|
+
Each iteration, a **high-planner** audits the target and decomposes the work into
|
|
41
|
+
non-overlapping **team charters**. Every charter runs in its own isolated worktree through a
|
|
42
|
+
four-agent pipeline, gated by a mechanical pytest verifier and a skeptical reviewer. Passing
|
|
43
|
+
teams are integrated one at a time through a **bounded merge queue** that re-runs the full suite
|
|
44
|
+
before advancing the integration head. An **opportunity ledger** tracks what's been tried and
|
|
45
|
+
drives convergence.
|
|
46
|
+
|
|
47
|
+
```mermaid
|
|
48
|
+
flowchart TB
|
|
49
|
+
subgraph scope["scoping (skill only, once)"]
|
|
50
|
+
QA["Q&A interview<br/>--depth low/med/high<br/>→ fills run.json"]
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
QA --> ORCH
|
|
54
|
+
|
|
55
|
+
subgraph loop["per iteration"]
|
|
56
|
+
ORCH["orchestrator<br/><i>pure control flow + state</i>"]
|
|
57
|
+
HP["high-planner<br/>audit → ledger → charters"]
|
|
58
|
+
ORCH --> HP
|
|
59
|
+
|
|
60
|
+
subgraph teams["parallel teams · one git worktree each"]
|
|
61
|
+
direction TB
|
|
62
|
+
T1["planner → plan-reviewer →<br/>executor → tdd_verifier (pytest) →<br/>execution-reviewer"]
|
|
63
|
+
T2["team-02 …"]
|
|
64
|
+
T3["team-NN …"]
|
|
65
|
+
end
|
|
66
|
+
HP --> T1 & T2 & T3
|
|
67
|
+
|
|
68
|
+
MQ["merge queue<br/>rebase → re-verify → fast-forward<br/><i>(bounded retries)</i>"]
|
|
69
|
+
T1 & T2 & T3 --> MQ
|
|
70
|
+
MQ --> DEC{"ledger dry?<br/>/ N reached?"}
|
|
71
|
+
DEC -->|no| ORCH
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
DEC -->|yes| DONE["stop · final report.html"]
|
|
75
|
+
|
|
76
|
+
LED[("opportunity<br/>ledger")]
|
|
77
|
+
HP <-.-> LED
|
|
78
|
+
MQ -.-> LED
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
**Termination** is either `fixed-N` (exactly N iterations) or `until-converged` (stop once the
|
|
82
|
+
ledger has no open opportunity above the value threshold for `convergence_dry_streak`
|
|
83
|
+
iterations). Both are hard-bounded by `--max-iterations` and a budget cap.
|
|
84
|
+
|
|
85
|
+
## Run (headless)
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
python -m cih.runner --mode fixed-N --iterations 3 \
|
|
89
|
+
--target-repo /abs/path/to/target --state-dir /abs/path/to/state \
|
|
90
|
+
--focus tests --focus performance
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
`until-converged` runs until the ledger is dry, bounded by `--max-iterations`:
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
python -m cih.runner --mode until-converged \
|
|
97
|
+
--target-repo /abs/path/to/target --state-dir /abs/path/to/state \
|
|
98
|
+
--max-iterations 25
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Run (interactive)
|
|
102
|
+
|
|
103
|
+
Invoke the `cih` skill in Claude Code (`.claude/skills/cih/SKILL.md`) with the target repo and
|
|
104
|
+
state dir. The skill renders the same agent contracts and orchestration steps, delegating to the
|
|
105
|
+
Agent/Task tools instead of `claude -p`.
|
|
106
|
+
|
|
107
|
+
Before the loop starts, the skill runs a short **Q&A scoping interview** to fill `run.json`. A
|
|
108
|
+
`--depth` flag caps how many questions it asks:
|
|
109
|
+
|
|
110
|
+
| `--depth` | question budget |
|
|
111
|
+
|-----------|-----------------|
|
|
112
|
+
| `low` | up to 3 |
|
|
113
|
+
| `medium` | up to 6 (default) |
|
|
114
|
+
| `high` | up to 10 |
|
|
115
|
+
|
|
116
|
+
It asks one question at a time about *intent only* (`focus_areas`, `mode` + caps,
|
|
117
|
+
`value_threshold`), stops early once it understands the goal, shows a summary for a single
|
|
118
|
+
confirmation, then runs **fully autonomously** with no further interruptions. `--depth` itself
|
|
119
|
+
is never written to `run.json`.
|
|
120
|
+
|
|
121
|
+
## Visual report
|
|
122
|
+
|
|
123
|
+
Generate a self-contained HTML view of a run's state:
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
python -m cih.report --state-dir /abs/path/to/state # writes <state_dir>/report.html
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Or pass `--report` to the runner to (re)write `report.html` after every iteration; open it in a
|
|
130
|
+
browser — it auto-refreshes while the run is `in_progress` and stops once it's `done`/`failed`.
|
|
131
|
+
The page is fully self-contained (inline CSS, no network) and read-only over the state directory.
|
|
132
|
+
|
|
133
|
+
## Safety
|
|
134
|
+
|
|
135
|
+
- The harness **never pushes** and **never uses `git add -A`** — staging is explicit-only and
|
|
136
|
+
the bypass is structurally unreachable, not merely discouraged.
|
|
137
|
+
- `target_repo` and `state_dir` are absolute, distinct, and non-nested; state lives **outside**
|
|
138
|
+
the target repo, so agents can never stage harness artifacts.
|
|
139
|
+
- All work happens in disposable per-team worktrees; the target's working tree is never dirtied.
|
|
140
|
+
- Every git command is logged.
|
|
141
|
+
|
|
142
|
+
## Tests
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
python -m pytest -q
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
> Design specs and implementation plans live locally under `docs/superpowers/` (untracked).
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# Continuous Improvement Harness (CIH)
|
|
2
|
+
|
|
3
|
+
A hierarchical multi-agent harness that autonomously **audits a target codebase, finds
|
|
4
|
+
high-value improvements, and applies them in TDD-gated iterations** — runnable both as a
|
|
5
|
+
headless Python runner and as an interactive Claude Code skill, over one shared on-disk JSON
|
|
6
|
+
state format.
|
|
7
|
+
|
|
8
|
+
The target repo is always a **separate parameter** from the harness itself. CIH never pushes,
|
|
9
|
+
never stages files implicitly, and does all work in disposable per-team git worktrees.
|
|
10
|
+
|
|
11
|
+
## How it works
|
|
12
|
+
|
|
13
|
+
Each iteration, a **high-planner** audits the target and decomposes the work into
|
|
14
|
+
non-overlapping **team charters**. Every charter runs in its own isolated worktree through a
|
|
15
|
+
four-agent pipeline, gated by a mechanical pytest verifier and a skeptical reviewer. Passing
|
|
16
|
+
teams are integrated one at a time through a **bounded merge queue** that re-runs the full suite
|
|
17
|
+
before advancing the integration head. An **opportunity ledger** tracks what's been tried and
|
|
18
|
+
drives convergence.
|
|
19
|
+
|
|
20
|
+
```mermaid
|
|
21
|
+
flowchart TB
|
|
22
|
+
subgraph scope["scoping (skill only, once)"]
|
|
23
|
+
QA["Q&A interview<br/>--depth low/med/high<br/>→ fills run.json"]
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
QA --> ORCH
|
|
27
|
+
|
|
28
|
+
subgraph loop["per iteration"]
|
|
29
|
+
ORCH["orchestrator<br/><i>pure control flow + state</i>"]
|
|
30
|
+
HP["high-planner<br/>audit → ledger → charters"]
|
|
31
|
+
ORCH --> HP
|
|
32
|
+
|
|
33
|
+
subgraph teams["parallel teams · one git worktree each"]
|
|
34
|
+
direction TB
|
|
35
|
+
T1["planner → plan-reviewer →<br/>executor → tdd_verifier (pytest) →<br/>execution-reviewer"]
|
|
36
|
+
T2["team-02 …"]
|
|
37
|
+
T3["team-NN …"]
|
|
38
|
+
end
|
|
39
|
+
HP --> T1 & T2 & T3
|
|
40
|
+
|
|
41
|
+
MQ["merge queue<br/>rebase → re-verify → fast-forward<br/><i>(bounded retries)</i>"]
|
|
42
|
+
T1 & T2 & T3 --> MQ
|
|
43
|
+
MQ --> DEC{"ledger dry?<br/>/ N reached?"}
|
|
44
|
+
DEC -->|no| ORCH
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
DEC -->|yes| DONE["stop · final report.html"]
|
|
48
|
+
|
|
49
|
+
LED[("opportunity<br/>ledger")]
|
|
50
|
+
HP <-.-> LED
|
|
51
|
+
MQ -.-> LED
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
**Termination** is either `fixed-N` (exactly N iterations) or `until-converged` (stop once the
|
|
55
|
+
ledger has no open opportunity above the value threshold for `convergence_dry_streak`
|
|
56
|
+
iterations). Both are hard-bounded by `--max-iterations` and a budget cap.
|
|
57
|
+
|
|
58
|
+
## Run (headless)
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
python -m cih.runner --mode fixed-N --iterations 3 \
|
|
62
|
+
--target-repo /abs/path/to/target --state-dir /abs/path/to/state \
|
|
63
|
+
--focus tests --focus performance
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
`until-converged` runs until the ledger is dry, bounded by `--max-iterations`:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
python -m cih.runner --mode until-converged \
|
|
70
|
+
--target-repo /abs/path/to/target --state-dir /abs/path/to/state \
|
|
71
|
+
--max-iterations 25
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Run (interactive)
|
|
75
|
+
|
|
76
|
+
Invoke the `cih` skill in Claude Code (`.claude/skills/cih/SKILL.md`) with the target repo and
|
|
77
|
+
state dir. The skill renders the same agent contracts and orchestration steps, delegating to the
|
|
78
|
+
Agent/Task tools instead of `claude -p`.
|
|
79
|
+
|
|
80
|
+
Before the loop starts, the skill runs a short **Q&A scoping interview** to fill `run.json`. A
|
|
81
|
+
`--depth` flag caps how many questions it asks:
|
|
82
|
+
|
|
83
|
+
| `--depth` | question budget |
|
|
84
|
+
|-----------|-----------------|
|
|
85
|
+
| `low` | up to 3 |
|
|
86
|
+
| `medium` | up to 6 (default) |
|
|
87
|
+
| `high` | up to 10 |
|
|
88
|
+
|
|
89
|
+
It asks one question at a time about *intent only* (`focus_areas`, `mode` + caps,
|
|
90
|
+
`value_threshold`), stops early once it understands the goal, shows a summary for a single
|
|
91
|
+
confirmation, then runs **fully autonomously** with no further interruptions. `--depth` itself
|
|
92
|
+
is never written to `run.json`.
|
|
93
|
+
|
|
94
|
+
## Visual report
|
|
95
|
+
|
|
96
|
+
Generate a self-contained HTML view of a run's state:
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
python -m cih.report --state-dir /abs/path/to/state # writes <state_dir>/report.html
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Or pass `--report` to the runner to (re)write `report.html` after every iteration; open it in a
|
|
103
|
+
browser — it auto-refreshes while the run is `in_progress` and stops once it's `done`/`failed`.
|
|
104
|
+
The page is fully self-contained (inline CSS, no network) and read-only over the state directory.
|
|
105
|
+
|
|
106
|
+
## Safety
|
|
107
|
+
|
|
108
|
+
- The harness **never pushes** and **never uses `git add -A`** — staging is explicit-only and
|
|
109
|
+
the bypass is structurally unreachable, not merely discouraged.
|
|
110
|
+
- `target_repo` and `state_dir` are absolute, distinct, and non-nested; state lives **outside**
|
|
111
|
+
the target repo, so agents can never stage harness artifacts.
|
|
112
|
+
- All work happens in disposable per-team worktrees; the target's working tree is never dirtied.
|
|
113
|
+
- Every git command is logged.
|
|
114
|
+
|
|
115
|
+
## Tests
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
python -m pytest -q
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
> Design specs and implementation plans live locally under `docs/superpowers/` (untracked).
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# cih/agents.py
|
|
2
|
+
import json
|
|
3
|
+
import subprocess
|
|
4
|
+
from typing import Protocol
|
|
5
|
+
from cih.contracts import AgentContract
|
|
6
|
+
|
|
7
|
+
class AgentRunner(Protocol):
|
|
8
|
+
def run(self, contract: AgentContract, input_data: dict) -> dict: ...
|
|
9
|
+
|
|
10
|
+
class StubRunner:
|
|
11
|
+
"""Test double: returns canned responses keyed by role."""
|
|
12
|
+
def __init__(self, responses: dict):
|
|
13
|
+
self.responses = responses
|
|
14
|
+
self.calls: list[dict] = []
|
|
15
|
+
|
|
16
|
+
def run(self, contract: AgentContract, input_data: dict) -> dict:
|
|
17
|
+
self.calls.append({"role": contract.role, "input": input_data})
|
|
18
|
+
if contract.role not in self.responses:
|
|
19
|
+
raise KeyError(f"no stub response for role {contract.role}")
|
|
20
|
+
return self.responses[contract.role]
|
|
21
|
+
|
|
22
|
+
class ClaudeCliRunner:
|
|
23
|
+
"""Headless adapter: drives `claude -p --append-system-prompt`.
|
|
24
|
+
|
|
25
|
+
Flags precede the prompt; output is expected as JSON on stdout.
|
|
26
|
+
"""
|
|
27
|
+
def __init__(self, cwd: str, extra_args: list[str] | None = None):
|
|
28
|
+
self.cwd = cwd
|
|
29
|
+
self.extra_args = extra_args or []
|
|
30
|
+
|
|
31
|
+
def run(self, contract: AgentContract, input_data: dict) -> dict:
|
|
32
|
+
prompt = json.dumps(input_data)
|
|
33
|
+
cmd = ["claude", "-p", "--output-format", "json",
|
|
34
|
+
"--append-system-prompt", contract.role_prompt,
|
|
35
|
+
*self.extra_args, "--", prompt]
|
|
36
|
+
proc = subprocess.run(cmd, cwd=self.cwd, capture_output=True, text=True)
|
|
37
|
+
if proc.returncode != 0:
|
|
38
|
+
raise RuntimeError(f"claude failed for {contract.role}: {proc.stderr}")
|
|
39
|
+
try:
|
|
40
|
+
envelope = json.loads(proc.stdout)
|
|
41
|
+
except json.JSONDecodeError as e:
|
|
42
|
+
raise RuntimeError(f"{contract.role}: non-JSON stdout from claude -p: {proc.stdout[:500]!r}") from e
|
|
43
|
+
if envelope.get("is_error"):
|
|
44
|
+
raise RuntimeError(f"{contract.role}: claude reported error: {envelope.get('result')}")
|
|
45
|
+
result = envelope.get("result")
|
|
46
|
+
if isinstance(result, dict):
|
|
47
|
+
return result
|
|
48
|
+
try:
|
|
49
|
+
return json.loads(result)
|
|
50
|
+
except (TypeError, json.JSONDecodeError) as e:
|
|
51
|
+
from cih.contracts import OutputValidationError
|
|
52
|
+
raise OutputValidationError(f"{contract.role}: result was not JSON: {result!r}") from e
|
|
53
|
+
|
|
54
|
+
def invoke(runner: AgentRunner, contract: AgentContract, input_data: dict) -> dict:
|
|
55
|
+
output = runner.run(contract, input_data)
|
|
56
|
+
contract.validate_output(output)
|
|
57
|
+
return output
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from dataclasses import dataclass, asdict
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
class AttemptKind(str, Enum):
|
|
6
|
+
PLAN = "plan_retry"
|
|
7
|
+
EXECUTION = "execution_retry"
|
|
8
|
+
INTEGRATION = "integration_retry"
|
|
9
|
+
FINAL_REJECT = "final_reject"
|
|
10
|
+
|
|
11
|
+
class AttemptCapExceeded(Exception):
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class Attempt:
|
|
16
|
+
attempt_id: str
|
|
17
|
+
kind: str
|
|
18
|
+
base_sha: str
|
|
19
|
+
branch: str
|
|
20
|
+
worktree_path: str
|
|
21
|
+
feedback_input: str
|
|
22
|
+
parent_attempt_id: Optional[str] = None
|
|
23
|
+
is_current: bool = True
|
|
24
|
+
|
|
25
|
+
class AttemptLog:
|
|
26
|
+
def __init__(self, team_id: str, cap: int):
|
|
27
|
+
self.team_id = team_id
|
|
28
|
+
self.cap = cap
|
|
29
|
+
self._attempts: list[Attempt] = []
|
|
30
|
+
|
|
31
|
+
def start(self, kind: AttemptKind, base_sha: str, branch: str,
|
|
32
|
+
worktree_path: str, feedback: str,
|
|
33
|
+
parent: Optional[str] = None) -> Attempt:
|
|
34
|
+
if len(self._attempts) >= self.cap:
|
|
35
|
+
raise AttemptCapExceeded(
|
|
36
|
+
f"{self.team_id}: attempt cap {self.cap} reached")
|
|
37
|
+
for a in self._attempts:
|
|
38
|
+
a.is_current = False
|
|
39
|
+
att = Attempt(
|
|
40
|
+
attempt_id=f"attempt-{len(self._attempts)+1:02d}",
|
|
41
|
+
kind=kind.value if isinstance(kind, AttemptKind) else kind,
|
|
42
|
+
base_sha=base_sha, branch=branch, worktree_path=worktree_path,
|
|
43
|
+
feedback_input=feedback, parent_attempt_id=parent)
|
|
44
|
+
self._attempts.append(att)
|
|
45
|
+
return att
|
|
46
|
+
|
|
47
|
+
def current(self) -> Optional[Attempt]:
|
|
48
|
+
return self._attempts[-1] if self._attempts else None
|
|
49
|
+
|
|
50
|
+
def all(self) -> list[Attempt]:
|
|
51
|
+
return list(self._attempts)
|
|
52
|
+
|
|
53
|
+
def to_dict(self) -> dict:
|
|
54
|
+
return {"team_id": self.team_id, "cap": self.cap,
|
|
55
|
+
"attempts": [asdict(a) for a in self._attempts]}
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def from_dict(cls, d: dict) -> "AttemptLog":
|
|
59
|
+
log = cls(team_id=d["team_id"], cap=d["cap"])
|
|
60
|
+
log._attempts = [Attempt(**a) for a in d["attempts"]]
|
|
61
|
+
return log
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from dataclasses import dataclass, field, asdict
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
class ConfigError(Exception):
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
_MODES = {"fixed-N", "until-converged"}
|
|
10
|
+
|
|
11
|
+
DEPTH_BUDGET = {"low": 3, "medium": 6, "high": 10}
|
|
12
|
+
DEFAULT_DEPTH = "medium"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def depth_budget(name: Optional[str] = None) -> int:
|
|
16
|
+
"""Map a --depth name to its question budget (upper bound). None → default."""
|
|
17
|
+
if name is None:
|
|
18
|
+
name = DEFAULT_DEPTH
|
|
19
|
+
if name not in DEPTH_BUDGET:
|
|
20
|
+
raise ConfigError(
|
|
21
|
+
f"depth must be one of {sorted(DEPTH_BUDGET, key=DEPTH_BUDGET.__getitem__)} (got {name!r})"
|
|
22
|
+
)
|
|
23
|
+
return DEPTH_BUDGET[name]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class RunConfig:
|
|
28
|
+
mode: str
|
|
29
|
+
target_repo: str
|
|
30
|
+
state_dir: str
|
|
31
|
+
iterations: Optional[int] = None
|
|
32
|
+
max_iterations: int = 25
|
|
33
|
+
budget_cap: Optional[int] = None
|
|
34
|
+
focus_areas: list[str] = field(default_factory=list)
|
|
35
|
+
value_threshold: float = 0.5
|
|
36
|
+
convergence_dry_streak: int = 2
|
|
37
|
+
plan_review_retries: int = 2
|
|
38
|
+
exec_review_retries: int = 2
|
|
39
|
+
max_teams_per_iteration: int = 4
|
|
40
|
+
integration_retries: int = 2
|
|
41
|
+
per_team_attempt_cap: int = 4
|
|
42
|
+
cooldown_iterations: int = 2
|
|
43
|
+
opportunity_max_attempts: int = 3
|
|
44
|
+
tdd_adapter: str = "pytest"
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def _validate_paths(target_repo: str, state_dir: str) -> None:
|
|
48
|
+
for label, p in (("target_repo", target_repo), ("state_dir", state_dir)):
|
|
49
|
+
if not os.path.isabs(p):
|
|
50
|
+
raise ConfigError(f"{label} must be an absolute path: {p}")
|
|
51
|
+
t = Path(target_repo).resolve()
|
|
52
|
+
s = Path(state_dir).resolve()
|
|
53
|
+
if t == s:
|
|
54
|
+
raise ConfigError("target_repo and state_dir must be distinct")
|
|
55
|
+
if t in s.parents or s in t.parents:
|
|
56
|
+
raise ConfigError("state_dir must not be nested inside target_repo (or vice versa)")
|
|
57
|
+
for label, p in (("target_repo", t), ("state_dir", s)):
|
|
58
|
+
if not p.is_dir():
|
|
59
|
+
raise ConfigError(f"{label} must be an existing directory: {p}")
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def create(cls, **kwargs) -> "RunConfig":
|
|
63
|
+
mode = kwargs.get("mode")
|
|
64
|
+
if mode not in _MODES:
|
|
65
|
+
raise ConfigError(f"mode must be one of {_MODES}")
|
|
66
|
+
iterations = kwargs.get("iterations")
|
|
67
|
+
if mode == "fixed-N":
|
|
68
|
+
if not isinstance(iterations, int) or iterations <= 0:
|
|
69
|
+
raise ConfigError("fixed-N mode requires iterations to be a positive int")
|
|
70
|
+
elif mode == "until-converged":
|
|
71
|
+
if iterations is not None:
|
|
72
|
+
raise ConfigError("until-converged mode must not set iterations")
|
|
73
|
+
cls._validate_paths(kwargs["target_repo"], kwargs["state_dir"])
|
|
74
|
+
return cls(**kwargs)
|
|
75
|
+
|
|
76
|
+
def to_dict(self) -> dict:
|
|
77
|
+
return asdict(self)
|
|
78
|
+
|
|
79
|
+
@classmethod
|
|
80
|
+
def from_dict(cls, d: dict) -> "RunConfig":
|
|
81
|
+
return cls.create(**d)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# cih/contracts.py
|
|
2
|
+
import hashlib
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from jsonschema import validate, ValidationError
|
|
6
|
+
|
|
7
|
+
class OutputValidationError(Exception):
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class AgentContract:
|
|
12
|
+
role: str
|
|
13
|
+
agent_version: str
|
|
14
|
+
role_prompt: str
|
|
15
|
+
input_schema: dict
|
|
16
|
+
output_schema: dict
|
|
17
|
+
allowed_tools: list = field(default_factory=list)
|
|
18
|
+
runtime_adapter_settings: dict = field(default_factory=dict)
|
|
19
|
+
|
|
20
|
+
def validate_output(self, output: dict) -> None:
|
|
21
|
+
try:
|
|
22
|
+
validate(instance=output, schema=self.output_schema)
|
|
23
|
+
except ValidationError as e:
|
|
24
|
+
raise OutputValidationError(f"{self.role} output invalid: {e.message}") from e
|
|
25
|
+
|
|
26
|
+
def prompt_hash(self) -> str:
|
|
27
|
+
blob = json.dumps({"prompt": self.role_prompt, "in": self.input_schema,
|
|
28
|
+
"out": self.output_schema, "v": self.agent_version,
|
|
29
|
+
"tools": self.allowed_tools,
|
|
30
|
+
"adapter": self.runtime_adapter_settings},
|
|
31
|
+
sort_keys=True)
|
|
32
|
+
return hashlib.sha256(blob.encode()).hexdigest()[:16]
|