cih-agent 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. cih_agent-0.1.0/LICENSE +21 -0
  2. cih_agent-0.1.0/PKG-INFO +148 -0
  3. cih_agent-0.1.0/README.md +121 -0
  4. cih_agent-0.1.0/cih/__init__.py +1 -0
  5. cih_agent-0.1.0/cih/agents.py +57 -0
  6. cih_agent-0.1.0/cih/attempts.py +61 -0
  7. cih_agent-0.1.0/cih/config.py +81 -0
  8. cih_agent-0.1.0/cih/contracts.py +32 -0
  9. cih_agent-0.1.0/cih/integration.py +192 -0
  10. cih_agent-0.1.0/cih/ledger.py +102 -0
  11. cih_agent-0.1.0/cih/merge_queue.py +37 -0
  12. cih_agent-0.1.0/cih/orchestrator.py +233 -0
  13. cih_agent-0.1.0/cih/progress.py +16 -0
  14. cih_agent-0.1.0/cih/report.py +176 -0
  15. cih_agent-0.1.0/cih/roles.py +59 -0
  16. cih_agent-0.1.0/cih/runner.py +80 -0
  17. cih_agent-0.1.0/cih/safety.py +68 -0
  18. cih_agent-0.1.0/cih/staging.py +65 -0
  19. cih_agent-0.1.0/cih/state.py +46 -0
  20. cih_agent-0.1.0/cih/tdd_verifier.py +131 -0
  21. cih_agent-0.1.0/cih/team.py +78 -0
  22. cih_agent-0.1.0/cih/transitions.py +32 -0
  23. cih_agent-0.1.0/cih/worktree.py +39 -0
  24. cih_agent-0.1.0/cih_agent.egg-info/PKG-INFO +148 -0
  25. cih_agent-0.1.0/cih_agent.egg-info/SOURCES.txt +54 -0
  26. cih_agent-0.1.0/cih_agent.egg-info/dependency_links.txt +1 -0
  27. cih_agent-0.1.0/cih_agent.egg-info/entry_points.txt +2 -0
  28. cih_agent-0.1.0/cih_agent.egg-info/requires.txt +6 -0
  29. cih_agent-0.1.0/cih_agent.egg-info/top_level.txt +1 -0
  30. cih_agent-0.1.0/pyproject.toml +41 -0
  31. cih_agent-0.1.0/setup.cfg +4 -0
  32. cih_agent-0.1.0/tests/test_agents.py +28 -0
  33. cih_agent-0.1.0/tests/test_attempts.py +32 -0
  34. cih_agent-0.1.0/tests/test_claude_cli_runner.py +53 -0
  35. cih_agent-0.1.0/tests/test_config.py +77 -0
  36. cih_agent-0.1.0/tests/test_conformance.py +29 -0
  37. cih_agent-0.1.0/tests/test_contracts.py +40 -0
  38. cih_agent-0.1.0/tests/test_e2e_smoke.py +80 -0
  39. cih_agent-0.1.0/tests/test_integration.py +286 -0
  40. cih_agent-0.1.0/tests/test_ledger.py +68 -0
  41. cih_agent-0.1.0/tests/test_merge_queue.py +36 -0
  42. cih_agent-0.1.0/tests/test_orchestrator.py +299 -0
  43. cih_agent-0.1.0/tests/test_progress.py +30 -0
  44. cih_agent-0.1.0/tests/test_report.py +177 -0
  45. cih_agent-0.1.0/tests/test_resume.py +114 -0
  46. cih_agent-0.1.0/tests/test_roles.py +30 -0
  47. cih_agent-0.1.0/tests/test_runner_cli.py +162 -0
  48. cih_agent-0.1.0/tests/test_safety.py +90 -0
  49. cih_agent-0.1.0/tests/test_scaffold.py +5 -0
  50. cih_agent-0.1.0/tests/test_skill_doc.py +13 -0
  51. cih_agent-0.1.0/tests/test_staging.py +48 -0
  52. cih_agent-0.1.0/tests/test_state.py +36 -0
  53. cih_agent-0.1.0/tests/test_tdd_verifier.py +119 -0
  54. cih_agent-0.1.0/tests/test_team.py +87 -0
  55. cih_agent-0.1.0/tests/test_transitions.py +32 -0
  56. cih_agent-0.1.0/tests/test_worktree.py +39 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Huijo Kim
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,148 @@
1
+ Metadata-Version: 2.4
2
+ Name: cih-agent
3
+ Version: 0.1.0
4
+ Summary: A hierarchical multi-agent harness that autonomously audits a codebase, finds high-value improvements, and applies them in TDD-gated iterations.
5
+ Author-email: Huijo Kim <huijo.kim@voids.ai>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/ccomkhj/continuous-improvement-harness
8
+ Project-URL: Repository, https://github.com/ccomkhj/continuous-improvement-harness
9
+ Project-URL: Issues, https://github.com/ccomkhj/continuous-improvement-harness/issues
10
+ Keywords: agents,tdd,code-improvement,claude,automation
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Software Development :: Quality Assurance
18
+ Requires-Python: >=3.11
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: jsonschema>=4.0
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest>=8.0; extra == "dev"
24
+ Requires-Dist: build>=1.0; extra == "dev"
25
+ Requires-Dist: twine>=5.0; extra == "dev"
26
+ Dynamic: license-file
27
+
28
+ # Continuous Improvement Harness (CIH)
29
+
30
+ A hierarchical multi-agent harness that autonomously **audits a target codebase, finds
31
+ high-value improvements, and applies them in TDD-gated iterations** — runnable both as a
32
+ headless Python runner and as an interactive Claude Code skill, over one shared on-disk JSON
33
+ state format.
34
+
35
+ The target repo is always a **separate parameter** from the harness itself. CIH never pushes,
36
+ never stages files implicitly, and does all work in disposable per-team git worktrees.
37
+
38
+ ## How it works
39
+
40
+ Each iteration, a **high-planner** audits the target and decomposes the work into
41
+ non-overlapping **team charters**. Every charter runs in its own isolated worktree through a
42
+ four-agent pipeline, gated by a mechanical pytest verifier and a skeptical reviewer. Passing
43
+ teams are integrated one at a time through a **bounded merge queue** that re-runs the full suite
44
+ before advancing the integration head. An **opportunity ledger** tracks what's been tried and
45
+ drives convergence.
46
+
47
+ ```mermaid
48
+ flowchart TB
49
+ subgraph scope["scoping (skill only, once)"]
50
+ QA["Q&A interview<br/>--depth low/med/high<br/>→ fills run.json"]
51
+ end
52
+
53
+ QA --> ORCH
54
+
55
+ subgraph loop["per iteration"]
56
+ ORCH["orchestrator<br/><i>pure control flow + state</i>"]
57
+ HP["high-planner<br/>audit → ledger → charters"]
58
+ ORCH --> HP
59
+
60
+ subgraph teams["parallel teams · one git worktree each"]
61
+ direction TB
62
+ T1["planner → plan-reviewer →<br/>executor → tdd_verifier (pytest) →<br/>execution-reviewer"]
63
+ T2["team-02 …"]
64
+ T3["team-NN …"]
65
+ end
66
+ HP --> T1 & T2 & T3
67
+
68
+ MQ["merge queue<br/>rebase → re-verify → fast-forward<br/><i>(bounded retries)</i>"]
69
+ T1 & T2 & T3 --> MQ
70
+ MQ --> DEC{"ledger dry?<br/>/ N reached?"}
71
+ DEC -->|no| ORCH
72
+ end
73
+
74
+ DEC -->|yes| DONE["stop · final report.html"]
75
+
76
+ LED[("opportunity<br/>ledger")]
77
+ HP <-.-> LED
78
+ MQ -.-> LED
79
+ ```
80
+
81
+ **Termination** is either `fixed-N` (exactly N iterations) or `until-converged` (stop once the
82
+ ledger has no open opportunity above the value threshold for `convergence_dry_streak`
83
+ iterations). Both are hard-bounded by `--max-iterations` and a budget cap.
84
+
85
+ ## Run (headless)
86
+
87
+ ```bash
88
+ python -m cih.runner --mode fixed-N --iterations 3 \
89
+ --target-repo /abs/path/to/target --state-dir /abs/path/to/state \
90
+ --focus tests --focus performance
91
+ ```
92
+
93
+ `until-converged` runs until the ledger is dry, bounded by `--max-iterations`:
94
+
95
+ ```bash
96
+ python -m cih.runner --mode until-converged \
97
+ --target-repo /abs/path/to/target --state-dir /abs/path/to/state \
98
+ --max-iterations 25
99
+ ```
100
+
101
+ ## Run (interactive)
102
+
103
+ Invoke the `cih` skill in Claude Code (`.claude/skills/cih/SKILL.md`) with the target repo and
104
+ state dir. The skill renders the same agent contracts and orchestration steps, delegating to the
105
+ Agent/Task tools instead of `claude -p`.
106
+
107
+ Before the loop starts, the skill runs a short **Q&A scoping interview** to fill `run.json`. A
108
+ `--depth` flag caps how many questions it asks:
109
+
110
+ | `--depth` | question budget |
111
+ |-----------|-----------------|
112
+ | `low` | up to 3 |
113
+ | `medium` | up to 6 (default) |
114
+ | `high` | up to 10 |
115
+
116
+ It asks one question at a time about *intent only* (`focus_areas`, `mode` + caps,
117
+ `value_threshold`), stops early once it understands the goal, shows a summary for a single
118
+ confirmation, then runs **fully autonomously** with no further interruptions. `--depth` itself
119
+ is never written to `run.json`.
120
+
121
+ ## Visual report
122
+
123
+ Generate a self-contained HTML view of a run's state:
124
+
125
+ ```bash
126
+ python -m cih.report --state-dir /abs/path/to/state # writes <state_dir>/report.html
127
+ ```
128
+
129
+ Or pass `--report` to the runner to (re)write `report.html` after every iteration; open it in a
130
+ browser — it auto-refreshes while the run is `in_progress` and stops once it's `done`/`failed`.
131
+ The page is fully self-contained (inline CSS, no network) and read-only over the state directory.
132
+
133
+ ## Safety
134
+
135
+ - The harness **never pushes** and **never uses `git add -A`** — staging is explicit-only and
136
+ the bypass is structurally unreachable, not merely discouraged.
137
+ - `target_repo` and `state_dir` are absolute, distinct, and non-nested; state lives **outside**
138
+ the target repo, so agents can never stage harness artifacts.
139
+ - All work happens in disposable per-team worktrees; the target's working tree is never dirtied.
140
+ - Every git command is logged.
141
+
142
+ ## Tests
143
+
144
+ ```bash
145
+ python -m pytest -q
146
+ ```
147
+
148
+ > Design specs and implementation plans live locally under `docs/superpowers/` (untracked).
@@ -0,0 +1,121 @@
1
+ # Continuous Improvement Harness (CIH)
2
+
3
+ A hierarchical multi-agent harness that autonomously **audits a target codebase, finds
4
+ high-value improvements, and applies them in TDD-gated iterations** — runnable both as a
5
+ headless Python runner and as an interactive Claude Code skill, over one shared on-disk JSON
6
+ state format.
7
+
8
+ The target repo is always a **separate parameter** from the harness itself. CIH never pushes,
9
+ never stages files implicitly, and does all work in disposable per-team git worktrees.
10
+
11
+ ## How it works
12
+
13
+ Each iteration, a **high-planner** audits the target and decomposes the work into
14
+ non-overlapping **team charters**. Every charter runs in its own isolated worktree through a
15
+ four-agent pipeline, gated by a mechanical pytest verifier and a skeptical reviewer. Passing
16
+ teams are integrated one at a time through a **bounded merge queue** that re-runs the full suite
17
+ before advancing the integration head. An **opportunity ledger** tracks what's been tried and
18
+ drives convergence.
19
+
20
+ ```mermaid
21
+ flowchart TB
22
+ subgraph scope["scoping (skill only, once)"]
23
+ QA["Q&A interview<br/>--depth low/med/high<br/>→ fills run.json"]
24
+ end
25
+
26
+ QA --> ORCH
27
+
28
+ subgraph loop["per iteration"]
29
+ ORCH["orchestrator<br/><i>pure control flow + state</i>"]
30
+ HP["high-planner<br/>audit → ledger → charters"]
31
+ ORCH --> HP
32
+
33
+ subgraph teams["parallel teams · one git worktree each"]
34
+ direction TB
35
+ T1["planner → plan-reviewer →<br/>executor → tdd_verifier (pytest) →<br/>execution-reviewer"]
36
+ T2["team-02 …"]
37
+ T3["team-NN …"]
38
+ end
39
+ HP --> T1 & T2 & T3
40
+
41
+ MQ["merge queue<br/>rebase → re-verify → fast-forward<br/><i>(bounded retries)</i>"]
42
+ T1 & T2 & T3 --> MQ
43
+ MQ --> DEC{"ledger dry?<br/>/ N reached?"}
44
+ DEC -->|no| ORCH
45
+ end
46
+
47
+ DEC -->|yes| DONE["stop · final report.html"]
48
+
49
+ LED[("opportunity<br/>ledger")]
50
+ HP <-.-> LED
51
+ MQ -.-> LED
52
+ ```
53
+
54
+ **Termination** is either `fixed-N` (exactly N iterations) or `until-converged` (stop once the
55
+ ledger has no open opportunity above the value threshold for `convergence_dry_streak`
56
+ iterations). Both are hard-bounded by `--max-iterations` and a budget cap.
57
+
58
+ ## Run (headless)
59
+
60
+ ```bash
61
+ python -m cih.runner --mode fixed-N --iterations 3 \
62
+ --target-repo /abs/path/to/target --state-dir /abs/path/to/state \
63
+ --focus tests --focus performance
64
+ ```
65
+
66
+ `until-converged` runs until the ledger is dry, bounded by `--max-iterations`:
67
+
68
+ ```bash
69
+ python -m cih.runner --mode until-converged \
70
+ --target-repo /abs/path/to/target --state-dir /abs/path/to/state \
71
+ --max-iterations 25
72
+ ```
73
+
74
+ ## Run (interactive)
75
+
76
+ Invoke the `cih` skill in Claude Code (`.claude/skills/cih/SKILL.md`) with the target repo and
77
+ state dir. The skill renders the same agent contracts and orchestration steps, delegating to the
78
+ Agent/Task tools instead of `claude -p`.
79
+
80
+ Before the loop starts, the skill runs a short **Q&A scoping interview** to fill `run.json`. A
81
+ `--depth` flag caps how many questions it asks:
82
+
83
+ | `--depth` | question budget |
84
+ |-----------|-----------------|
85
+ | `low` | up to 3 |
86
+ | `medium` | up to 6 (default) |
87
+ | `high` | up to 10 |
88
+
89
+ It asks one question at a time about *intent only* (`focus_areas`, `mode` + caps,
90
+ `value_threshold`), stops early once it understands the goal, shows a summary for a single
91
+ confirmation, then runs **fully autonomously** with no further interruptions. `--depth` itself
92
+ is never written to `run.json`.
93
+
94
+ ## Visual report
95
+
96
+ Generate a self-contained HTML view of a run's state:
97
+
98
+ ```bash
99
+ python -m cih.report --state-dir /abs/path/to/state # writes <state_dir>/report.html
100
+ ```
101
+
102
+ Or pass `--report` to the runner to (re)write `report.html` after every iteration; open it in a
103
+ browser — it auto-refreshes while the run is `in_progress` and stops once it's `done`/`failed`.
104
+ The page is fully self-contained (inline CSS, no network) and read-only over the state directory.
105
+
106
+ ## Safety
107
+
108
+ - The harness **never pushes** and **never uses `git add -A`** — staging is explicit-only and
109
+ the bypass is structurally unreachable, not merely discouraged.
110
+ - `target_repo` and `state_dir` are absolute, distinct, and non-nested; state lives **outside**
111
+ the target repo, so agents can never stage harness artifacts.
112
+ - All work happens in disposable per-team worktrees; the target's working tree is never dirtied.
113
+ - Every git command is logged.
114
+
115
+ ## Tests
116
+
117
+ ```bash
118
+ python -m pytest -q
119
+ ```
120
+
121
+ > Design specs and implementation plans live locally under `docs/superpowers/` (untracked).
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
@@ -0,0 +1,57 @@
1
+ # cih/agents.py
2
+ import json
3
+ import subprocess
4
+ from typing import Protocol
5
+ from cih.contracts import AgentContract
6
+
7
+ class AgentRunner(Protocol):
8
+ def run(self, contract: AgentContract, input_data: dict) -> dict: ...
9
+
10
+ class StubRunner:
11
+ """Test double: returns canned responses keyed by role."""
12
+ def __init__(self, responses: dict):
13
+ self.responses = responses
14
+ self.calls: list[dict] = []
15
+
16
+ def run(self, contract: AgentContract, input_data: dict) -> dict:
17
+ self.calls.append({"role": contract.role, "input": input_data})
18
+ if contract.role not in self.responses:
19
+ raise KeyError(f"no stub response for role {contract.role}")
20
+ return self.responses[contract.role]
21
+
22
+ class ClaudeCliRunner:
23
+ """Headless adapter: drives `claude -p --append-system-prompt`.
24
+
25
+ Flags precede the prompt; output is expected as JSON on stdout.
26
+ """
27
+ def __init__(self, cwd: str, extra_args: list[str] | None = None):
28
+ self.cwd = cwd
29
+ self.extra_args = extra_args or []
30
+
31
+ def run(self, contract: AgentContract, input_data: dict) -> dict:
32
+ prompt = json.dumps(input_data)
33
+ cmd = ["claude", "-p", "--output-format", "json",
34
+ "--append-system-prompt", contract.role_prompt,
35
+ *self.extra_args, "--", prompt]
36
+ proc = subprocess.run(cmd, cwd=self.cwd, capture_output=True, text=True)
37
+ if proc.returncode != 0:
38
+ raise RuntimeError(f"claude failed for {contract.role}: {proc.stderr}")
39
+ try:
40
+ envelope = json.loads(proc.stdout)
41
+ except json.JSONDecodeError as e:
42
+ raise RuntimeError(f"{contract.role}: non-JSON stdout from claude -p: {proc.stdout[:500]!r}") from e
43
+ if envelope.get("is_error"):
44
+ raise RuntimeError(f"{contract.role}: claude reported error: {envelope.get('result')}")
45
+ result = envelope.get("result")
46
+ if isinstance(result, dict):
47
+ return result
48
+ try:
49
+ return json.loads(result)
50
+ except (TypeError, json.JSONDecodeError) as e:
51
+ from cih.contracts import OutputValidationError
52
+ raise OutputValidationError(f"{contract.role}: result was not JSON: {result!r}") from e
53
+
54
+ def invoke(runner: AgentRunner, contract: AgentContract, input_data: dict) -> dict:
55
+ output = runner.run(contract, input_data)
56
+ contract.validate_output(output)
57
+ return output
@@ -0,0 +1,61 @@
1
+ from dataclasses import dataclass, asdict
2
+ from enum import Enum
3
+ from typing import Optional
4
+
5
+ class AttemptKind(str, Enum):
6
+ PLAN = "plan_retry"
7
+ EXECUTION = "execution_retry"
8
+ INTEGRATION = "integration_retry"
9
+ FINAL_REJECT = "final_reject"
10
+
11
+ class AttemptCapExceeded(Exception):
12
+ pass
13
+
14
+ @dataclass
15
+ class Attempt:
16
+ attempt_id: str
17
+ kind: str
18
+ base_sha: str
19
+ branch: str
20
+ worktree_path: str
21
+ feedback_input: str
22
+ parent_attempt_id: Optional[str] = None
23
+ is_current: bool = True
24
+
25
+ class AttemptLog:
26
+ def __init__(self, team_id: str, cap: int):
27
+ self.team_id = team_id
28
+ self.cap = cap
29
+ self._attempts: list[Attempt] = []
30
+
31
+ def start(self, kind: AttemptKind, base_sha: str, branch: str,
32
+ worktree_path: str, feedback: str,
33
+ parent: Optional[str] = None) -> Attempt:
34
+ if len(self._attempts) >= self.cap:
35
+ raise AttemptCapExceeded(
36
+ f"{self.team_id}: attempt cap {self.cap} reached")
37
+ for a in self._attempts:
38
+ a.is_current = False
39
+ att = Attempt(
40
+ attempt_id=f"attempt-{len(self._attempts)+1:02d}",
41
+ kind=kind.value if isinstance(kind, AttemptKind) else kind,
42
+ base_sha=base_sha, branch=branch, worktree_path=worktree_path,
43
+ feedback_input=feedback, parent_attempt_id=parent)
44
+ self._attempts.append(att)
45
+ return att
46
+
47
+ def current(self) -> Optional[Attempt]:
48
+ return self._attempts[-1] if self._attempts else None
49
+
50
+ def all(self) -> list[Attempt]:
51
+ return list(self._attempts)
52
+
53
+ def to_dict(self) -> dict:
54
+ return {"team_id": self.team_id, "cap": self.cap,
55
+ "attempts": [asdict(a) for a in self._attempts]}
56
+
57
+ @classmethod
58
+ def from_dict(cls, d: dict) -> "AttemptLog":
59
+ log = cls(team_id=d["team_id"], cap=d["cap"])
60
+ log._attempts = [Attempt(**a) for a in d["attempts"]]
61
+ return log
@@ -0,0 +1,81 @@
1
+ import os
2
+ from dataclasses import dataclass, field, asdict
3
+ from pathlib import Path
4
+ from typing import Optional
5
+
6
+ class ConfigError(Exception):
7
+ pass
8
+
9
+ _MODES = {"fixed-N", "until-converged"}
10
+
11
+ DEPTH_BUDGET = {"low": 3, "medium": 6, "high": 10}
12
+ DEFAULT_DEPTH = "medium"
13
+
14
+
15
+ def depth_budget(name: Optional[str] = None) -> int:
16
+ """Map a --depth name to its question budget (upper bound). None → default."""
17
+ if name is None:
18
+ name = DEFAULT_DEPTH
19
+ if name not in DEPTH_BUDGET:
20
+ raise ConfigError(
21
+ f"depth must be one of {sorted(DEPTH_BUDGET, key=DEPTH_BUDGET.__getitem__)} (got {name!r})"
22
+ )
23
+ return DEPTH_BUDGET[name]
24
+
25
+
26
+ @dataclass
27
+ class RunConfig:
28
+ mode: str
29
+ target_repo: str
30
+ state_dir: str
31
+ iterations: Optional[int] = None
32
+ max_iterations: int = 25
33
+ budget_cap: Optional[int] = None
34
+ focus_areas: list[str] = field(default_factory=list)
35
+ value_threshold: float = 0.5
36
+ convergence_dry_streak: int = 2
37
+ plan_review_retries: int = 2
38
+ exec_review_retries: int = 2
39
+ max_teams_per_iteration: int = 4
40
+ integration_retries: int = 2
41
+ per_team_attempt_cap: int = 4
42
+ cooldown_iterations: int = 2
43
+ opportunity_max_attempts: int = 3
44
+ tdd_adapter: str = "pytest"
45
+
46
+ @staticmethod
47
+ def _validate_paths(target_repo: str, state_dir: str) -> None:
48
+ for label, p in (("target_repo", target_repo), ("state_dir", state_dir)):
49
+ if not os.path.isabs(p):
50
+ raise ConfigError(f"{label} must be an absolute path: {p}")
51
+ t = Path(target_repo).resolve()
52
+ s = Path(state_dir).resolve()
53
+ if t == s:
54
+ raise ConfigError("target_repo and state_dir must be distinct")
55
+ if t in s.parents or s in t.parents:
56
+ raise ConfigError("state_dir must not be nested inside target_repo (or vice versa)")
57
+ for label, p in (("target_repo", t), ("state_dir", s)):
58
+ if not p.is_dir():
59
+ raise ConfigError(f"{label} must be an existing directory: {p}")
60
+
61
+ @classmethod
62
+ def create(cls, **kwargs) -> "RunConfig":
63
+ mode = kwargs.get("mode")
64
+ if mode not in _MODES:
65
+ raise ConfigError(f"mode must be one of {_MODES}")
66
+ iterations = kwargs.get("iterations")
67
+ if mode == "fixed-N":
68
+ if not isinstance(iterations, int) or iterations <= 0:
69
+ raise ConfigError("fixed-N mode requires iterations to be a positive int")
70
+ elif mode == "until-converged":
71
+ if iterations is not None:
72
+ raise ConfigError("until-converged mode must not set iterations")
73
+ cls._validate_paths(kwargs["target_repo"], kwargs["state_dir"])
74
+ return cls(**kwargs)
75
+
76
+ def to_dict(self) -> dict:
77
+ return asdict(self)
78
+
79
+ @classmethod
80
+ def from_dict(cls, d: dict) -> "RunConfig":
81
+ return cls.create(**d)
@@ -0,0 +1,32 @@
1
+ # cih/contracts.py
2
+ import hashlib
3
+ import json
4
+ from dataclasses import dataclass, field
5
+ from jsonschema import validate, ValidationError
6
+
7
+ class OutputValidationError(Exception):
8
+ pass
9
+
10
+ @dataclass
11
+ class AgentContract:
12
+ role: str
13
+ agent_version: str
14
+ role_prompt: str
15
+ input_schema: dict
16
+ output_schema: dict
17
+ allowed_tools: list = field(default_factory=list)
18
+ runtime_adapter_settings: dict = field(default_factory=dict)
19
+
20
+ def validate_output(self, output: dict) -> None:
21
+ try:
22
+ validate(instance=output, schema=self.output_schema)
23
+ except ValidationError as e:
24
+ raise OutputValidationError(f"{self.role} output invalid: {e.message}") from e
25
+
26
+ def prompt_hash(self) -> str:
27
+ blob = json.dumps({"prompt": self.role_prompt, "in": self.input_schema,
28
+ "out": self.output_schema, "v": self.agent_version,
29
+ "tools": self.allowed_tools,
30
+ "adapter": self.runtime_adapter_settings},
31
+ sort_keys=True)
32
+ return hashlib.sha256(blob.encode()).hexdigest()[:16]