multi-forge 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (311) hide show
  1. forge/__init__.py +3 -0
  2. forge/_extensions/agents/.gitkeep +0 -0
  3. forge/_extensions/commands/.gitkeep +0 -0
  4. forge/_extensions/skills/analyze/SKILL.md +87 -0
  5. forge/_extensions/skills/challenge/SKILL.md +91 -0
  6. forge/_extensions/skills/consensus/SKILL.md +120 -0
  7. forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
  8. forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
  9. forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
  10. forge/_extensions/skills/debate/SKILL.md +116 -0
  11. forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
  12. forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
  13. forge/_extensions/skills/panel/SKILL.md +141 -0
  14. forge/_extensions/skills/panel/resources/synthesis.md +103 -0
  15. forge/_extensions/skills/qa/SKILL.md +704 -0
  16. forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
  17. forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
  18. forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
  19. forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
  20. forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
  21. forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
  22. forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
  23. forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
  24. forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
  25. forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
  26. forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
  27. forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
  28. forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
  29. forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
  30. forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
  31. forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
  32. forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
  33. forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
  34. forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
  35. forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
  36. forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
  37. forge/_extensions/skills/qa/resources/checklist.md +103 -0
  38. forge/_extensions/skills/qa/resources/report-template.md +62 -0
  39. forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
  40. forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
  41. forge/_extensions/skills/review/SKILL.md +125 -0
  42. forge/_extensions/skills/review/references/claude-4.6.md +474 -0
  43. forge/_extensions/skills/review/references/claude-4.7.md +710 -0
  44. forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
  45. forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
  46. forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
  47. forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
  48. forge/_extensions/skills/review/resources/code-gemini.md +184 -0
  49. forge/_extensions/skills/review/resources/code-openai.md +203 -0
  50. forge/_extensions/skills/review/resources/code.md +160 -0
  51. forge/_extensions/skills/review-docs/SKILL.md +121 -0
  52. forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
  53. forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
  54. forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
  55. forge/_extensions/skills/review-docs/resources/docs.md +170 -0
  56. forge/_extensions/skills/smoke-test/SKILL.md +27 -0
  57. forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
  58. forge/_extensions/skills/understand/SKILL.md +148 -0
  59. forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
  60. forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
  61. forge/_extensions/skills/understand/resources/code-openai.md +181 -0
  62. forge/_extensions/skills/understand/resources/code.md +163 -0
  63. forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
  64. forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
  65. forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
  66. forge/_extensions/skills/understand/resources/docs.md +177 -0
  67. forge/_extensions/skills/walkthrough/SKILL.md +599 -0
  68. forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
  69. forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
  70. forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
  71. forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
  72. forge/backend/__init__.py +174 -0
  73. forge/backend/adapters/__init__.py +38 -0
  74. forge/backend/adapters/litellm.py +158 -0
  75. forge/backend/creation.py +89 -0
  76. forge/backend/registry.py +178 -0
  77. forge/cli/__init__.py +16 -0
  78. forge/cli/auth.py +483 -0
  79. forge/cli/backend.py +298 -0
  80. forge/cli/claude.py +411 -0
  81. forge/cli/config_cmd.py +303 -0
  82. forge/cli/extensions.py +1001 -0
  83. forge/cli/gc.py +165 -0
  84. forge/cli/guard.py +1018 -0
  85. forge/cli/guards.py +106 -0
  86. forge/cli/handoff.py +110 -0
  87. forge/cli/hooks/__init__.py +36 -0
  88. forge/cli/hooks/_group.py +20 -0
  89. forge/cli/hooks/_helpers.py +149 -0
  90. forge/cli/hooks/commands.py +1677 -0
  91. forge/cli/hooks/direct_commands.py +1304 -0
  92. forge/cli/hooks/install.py +232 -0
  93. forge/cli/hooks/policy.py +151 -0
  94. forge/cli/hooks/read_hygiene.py +74 -0
  95. forge/cli/hooks/verification.py +370 -0
  96. forge/cli/logs.py +406 -0
  97. forge/cli/main.py +292 -0
  98. forge/cli/proxy.py +1821 -0
  99. forge/cli/proxy_costs.py +313 -0
  100. forge/cli/search.py +416 -0
  101. forge/cli/session.py +892 -0
  102. forge/cli/session_addendum.py +81 -0
  103. forge/cli/session_fork.py +750 -0
  104. forge/cli/session_handoff.py +141 -0
  105. forge/cli/session_lifecycle.py +2053 -0
  106. forge/cli/session_manage.py +1336 -0
  107. forge/cli/session_memory.py +201 -0
  108. forge/cli/status_line.py +1398 -0
  109. forge/cli/workflow.py +1964 -0
  110. forge/config/__init__.py +110 -0
  111. forge/config/dataclass_utils.py +88 -0
  112. forge/config/defaults/__init__.py +0 -0
  113. forge/config/defaults/backends/__init__.py +0 -0
  114. forge/config/defaults/backends/litellm.yaml +196 -0
  115. forge/config/defaults/templates/__init__.py +0 -0
  116. forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
  117. forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
  118. forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
  119. forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
  120. forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
  121. forge/config/defaults/templates/litellm-gemini.yaml +21 -0
  122. forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
  123. forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
  124. forge/config/defaults/templates/litellm-openai.yaml +28 -0
  125. forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
  126. forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
  127. forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
  128. forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
  129. forge/config/defaults/templates/openrouter-glm.yaml +23 -0
  130. forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
  131. forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
  132. forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
  133. forge/config/defaults/templates/openrouter-openai.yaml +28 -0
  134. forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
  135. forge/config/loader.py +675 -0
  136. forge/config/schema.py +448 -0
  137. forge/core/__init__.py +5 -0
  138. forge/core/auth/__init__.py +67 -0
  139. forge/core/auth/capabilities.py +219 -0
  140. forge/core/auth/credentials_file.py +244 -0
  141. forge/core/auth/protocols.py +18 -0
  142. forge/core/auth/secrets.py +243 -0
  143. forge/core/auth/template_secrets.py +112 -0
  144. forge/core/data/__init__.py +5 -0
  145. forge/core/data/model_catalog.yaml +1522 -0
  146. forge/core/data/pricing.yaml +140 -0
  147. forge/core/data/system_prompt_addendums/__init__.py +0 -0
  148. forge/core/data/system_prompt_addendums/gemini.md +330 -0
  149. forge/core/data/system_prompt_addendums/openai.md +328 -0
  150. forge/core/llm/__init__.py +231 -0
  151. forge/core/llm/clients/__init__.py +14 -0
  152. forge/core/llm/clients/base.py +115 -0
  153. forge/core/llm/clients/litellm.py +619 -0
  154. forge/core/llm/clients/openai_compat.py +244 -0
  155. forge/core/llm/clients/openrouter.py +234 -0
  156. forge/core/llm/credentials.py +439 -0
  157. forge/core/llm/detection.py +86 -0
  158. forge/core/llm/errors.py +44 -0
  159. forge/core/llm/protocols.py +80 -0
  160. forge/core/llm/types.py +176 -0
  161. forge/core/logging.py +146 -0
  162. forge/core/models/__init__.py +91 -0
  163. forge/core/models/catalog.py +467 -0
  164. forge/core/models/pricing.py +165 -0
  165. forge/core/models/types.py +167 -0
  166. forge/core/naming.py +212 -0
  167. forge/core/ops/__init__.py +73 -0
  168. forge/core/ops/context.py +141 -0
  169. forge/core/ops/gc.py +802 -0
  170. forge/core/ops/proxy.py +146 -0
  171. forge/core/ops/resolution.py +135 -0
  172. forge/core/ops/session.py +344 -0
  173. forge/core/ops/session_context.py +548 -0
  174. forge/core/paths.py +38 -0
  175. forge/core/process.py +54 -0
  176. forge/core/reactive/__init__.py +38 -0
  177. forge/core/reactive/cost_tracking.py +300 -0
  178. forge/core/reactive/env.py +180 -0
  179. forge/core/reactive/proxy.py +78 -0
  180. forge/core/reactive/routing.py +622 -0
  181. forge/core/reactive/session_runner.py +185 -0
  182. forge/core/reactive/structured_output.py +62 -0
  183. forge/core/reactive/tagger.py +94 -0
  184. forge/core/reactive/throttle.py +132 -0
  185. forge/core/state/__init__.py +59 -0
  186. forge/core/state/exceptions.py +59 -0
  187. forge/core/state/io.py +140 -0
  188. forge/core/state/lock.py +99 -0
  189. forge/core/state/timestamps.py +60 -0
  190. forge/core/transcript.py +78 -0
  191. forge/core/typing_helpers.py +24 -0
  192. forge/core/workqueue/__init__.py +67 -0
  193. forge/core/workqueue/queue.py +552 -0
  194. forge/core/workqueue/types.py +63 -0
  195. forge/guard/__init__.py +26 -0
  196. forge/guard/deterministic/__init__.py +26 -0
  197. forge/guard/deterministic/base.py +158 -0
  198. forge/guard/deterministic/coding_standards.py +256 -0
  199. forge/guard/deterministic/registry.py +148 -0
  200. forge/guard/deterministic/tdd.py +171 -0
  201. forge/guard/engine.py +216 -0
  202. forge/guard/protocols.py +91 -0
  203. forge/guard/queries.py +96 -0
  204. forge/guard/semantic/__init__.py +34 -0
  205. forge/guard/semantic/promotion.py +18 -0
  206. forge/guard/semantic/supervisor.py +813 -0
  207. forge/guard/semantic/verdict.py +183 -0
  208. forge/guard/store.py +124 -0
  209. forge/guard/team/__init__.py +6 -0
  210. forge/guard/team/config.py +24 -0
  211. forge/guard/team/handlers.py +209 -0
  212. forge/guard/team/prompts.py +41 -0
  213. forge/guard/types.py +125 -0
  214. forge/guard/workflow/__init__.py +17 -0
  215. forge/guard/workflow/branches.py +67 -0
  216. forge/guard/workflow/config.py +63 -0
  217. forge/guard/workflow/divergence.py +113 -0
  218. forge/guard/workflow/policy.py +87 -0
  219. forge/guard/workflow/stages.py +205 -0
  220. forge/install/__init__.py +55 -0
  221. forge/install/cli.py +281 -0
  222. forge/install/exceptions.py +163 -0
  223. forge/install/hooks.py +109 -0
  224. forge/install/installer.py +1037 -0
  225. forge/install/models.py +321 -0
  226. forge/install/preset.py +272 -0
  227. forge/install/settings_merge.py +831 -0
  228. forge/install/tracking.py +238 -0
  229. forge/install/version.py +141 -0
  230. forge/proxy/__init__.py +0 -0
  231. forge/proxy/base_client.py +181 -0
  232. forge/proxy/client_adapter.py +476 -0
  233. forge/proxy/client_factory.py +531 -0
  234. forge/proxy/converters.py +1206 -0
  235. forge/proxy/cost_logger.py +132 -0
  236. forge/proxy/cost_tracker.py +242 -0
  237. forge/proxy/data_models.py +338 -0
  238. forge/proxy/error_hints.py +92 -0
  239. forge/proxy/metrics.py +222 -0
  240. forge/proxy/model_spec.py +158 -0
  241. forge/proxy/proxies.py +333 -0
  242. forge/proxy/proxy_identity.py +134 -0
  243. forge/proxy/proxy_orchestrator.py +1018 -0
  244. forge/proxy/proxy_startup.py +54 -0
  245. forge/proxy/server.py +1561 -0
  246. forge/proxy/utils.py +537 -0
  247. forge/review/__init__.py +6 -0
  248. forge/review/adversarial.py +111 -0
  249. forge/review/consensus.py +236 -0
  250. forge/review/engine.py +356 -0
  251. forge/review/models.py +437 -0
  252. forge/review/resources/__init__.py +5 -0
  253. forge/review/resources/codereview-performance.md +85 -0
  254. forge/review/resources/codereview-quick.md +75 -0
  255. forge/review/resources/codereview-security.md +92 -0
  256. forge/review/resources/codereview.md +85 -0
  257. forge/review/resources/docreview-quick.md +75 -0
  258. forge/review/resources/docreview.md +86 -0
  259. forge/review/resources/thinkdeep.md +89 -0
  260. forge/review/routing.py +368 -0
  261. forge/review/synthesis.py +73 -0
  262. forge/runtime_config.py +438 -0
  263. forge/search/__init__.py +55 -0
  264. forge/search/bm25_store.py +264 -0
  265. forge/search/content_store.py +197 -0
  266. forge/search/engine.py +352 -0
  267. forge/search/exceptions.py +51 -0
  268. forge/search/extractor.py +234 -0
  269. forge/search/index_state.py +295 -0
  270. forge/search/store.py +215 -0
  271. forge/search/tokenizer.py +24 -0
  272. forge/session/__init__.py +130 -0
  273. forge/session/active.py +339 -0
  274. forge/session/artifacts.py +202 -0
  275. forge/session/claude/__init__.py +50 -0
  276. forge/session/claude/cleanup.py +105 -0
  277. forge/session/claude/invoke.py +236 -0
  278. forge/session/claude/paths.py +200 -0
  279. forge/session/cleanup.py +216 -0
  280. forge/session/config.py +34 -0
  281. forge/session/direct_model.py +107 -0
  282. forge/session/effective.py +169 -0
  283. forge/session/exceptions.py +255 -0
  284. forge/session/handoff.py +881 -0
  285. forge/session/handoff_agent.py +544 -0
  286. forge/session/hooks/__init__.py +35 -0
  287. forge/session/hooks/models.py +73 -0
  288. forge/session/hooks/session_start.py +507 -0
  289. forge/session/identity.py +84 -0
  290. forge/session/index.py +553 -0
  291. forge/session/manager.py +1506 -0
  292. forge/session/models.py +572 -0
  293. forge/session/overrides.py +344 -0
  294. forge/session/plan_resolution.py +286 -0
  295. forge/session/prev_sessions.py +128 -0
  296. forge/session/store.py +431 -0
  297. forge/session/validation.py +47 -0
  298. forge/session/worktree/__init__.py +65 -0
  299. forge/session/worktree/cleanup.py +262 -0
  300. forge/session/worktree/config_copy.py +203 -0
  301. forge/session/worktree/create.py +332 -0
  302. forge/sidecar/__init__.py +29 -0
  303. forge/sidecar/container.py +161 -0
  304. forge/sidecar/docker.py +86 -0
  305. forge/sidecar/secrets.py +19 -0
  306. multi_forge-0.2.0.dist-info/METADATA +242 -0
  307. multi_forge-0.2.0.dist-info/RECORD +311 -0
  308. multi_forge-0.2.0.dist-info/WHEEL +4 -0
  309. multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
  310. multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
  311. multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,183 @@
1
+ """Supervisor verdict parsing and conversion.
2
+
3
+ Parses structured JSON responses from the semantic supervisor and
4
+ converts them to PolicyDecision objects.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ from dataclasses import dataclass, field
11
+ from typing import Any, Literal
12
+
13
+ from forge.core.reactive.structured_output import extract_json_from_response
14
+ from forge.guard.types import PolicyDecision, Severity, Violation
15
+
16
+ _log = logging.getLogger(__name__)
17
+
18
+ # Confidence threshold for blocking (require high confidence + citations)
19
+ CONFIDENCE_THRESHOLD = 0.8
20
+
21
+
22
+ @dataclass
23
+ class SupervisorVerdict:
24
+ """Parsed verdict from the semantic supervisor.
25
+
26
+ Attributes:
27
+ verdict: "aligned" (action matches plan) or "divergent" (action deviates)
28
+ confidence: 0.0-1.0 confidence in the verdict
29
+ violations: List of violation details for divergent verdicts
30
+ """
31
+
32
+ verdict: Literal["aligned", "divergent"]
33
+ confidence: float = 1.0
34
+ violations: list[dict[str, Any]] = field(default_factory=list)
35
+
36
+
37
+ def _warn_verdict(evidence: str, suggested_fix: str) -> SupervisorVerdict:
38
+ """Create a divergent verdict with 0.0 confidence (maps to warn, not deny)."""
39
+ return SupervisorVerdict(
40
+ verdict="divergent",
41
+ confidence=0.0,
42
+ violations=[
43
+ {
44
+ "severity": "low",
45
+ "evidence": evidence,
46
+ "suggested_fix": suggested_fix,
47
+ "citations": [],
48
+ }
49
+ ],
50
+ )
51
+
52
+
53
+ def parse_supervisor_verdict(response: str) -> SupervisorVerdict:
54
+ """Extract JSON verdict from supervisor response.
55
+
56
+ Uses ``extract_json_from_response`` for code-fence/raw JSON extraction,
57
+ then validates the verdict structure. Unparseable responses return a
58
+ divergent verdict with 0.0 confidence (maps to "warn", not deny or
59
+ silent allow).
60
+
61
+ Args:
62
+ response: Raw text response from the supervisor
63
+
64
+ Returns:
65
+ Parsed SupervisorVerdict
66
+ """
67
+ if not response:
68
+ _log.warning("Empty supervisor response, failing open with warning")
69
+ return _warn_verdict(
70
+ "Supervisor response was empty — check supervisor session health",
71
+ "Verify supervisor resume_id and proxy connectivity",
72
+ )
73
+
74
+ data = extract_json_from_response(response)
75
+ if data is None:
76
+ _log.warning("Could not parse supervisor verdict, failing open with warning")
77
+ return _warn_verdict(
78
+ "Supervisor verdict could not be parsed — check supervisor response format",
79
+ "Verify supervisor session responds with valid JSON verdict",
80
+ )
81
+
82
+ return _parse_verdict_data(data)
83
+
84
+
85
+ def _parse_verdict_data(data: dict[str, Any]) -> SupervisorVerdict:
86
+ """Parse verdict from JSON data."""
87
+ verdict = data.get("verdict", "aligned")
88
+ if verdict not in ("aligned", "divergent"):
89
+ _log.warning("Unknown verdict '%s', treating as aligned", verdict)
90
+ verdict = "aligned"
91
+
92
+ confidence = data.get("confidence", 1.0)
93
+ if not isinstance(confidence, (int, float)):
94
+ confidence = 1.0
95
+ confidence = max(0.0, min(1.0, float(confidence)))
96
+
97
+ violations = data.get("violations", [])
98
+ if not isinstance(violations, list):
99
+ violations = []
100
+
101
+ return SupervisorVerdict(
102
+ verdict=verdict, # type: ignore[arg-type] # mypy doesn't track narrowing from reassignment
103
+ confidence=confidence,
104
+ violations=violations,
105
+ )
106
+
107
+
108
+ def verdict_to_decision(verdict: SupervisorVerdict, *, intent: str | None = None) -> PolicyDecision:
109
+ """Convert a SupervisorVerdict to a PolicyDecision.
110
+
111
+ Blocking rules:
112
+ - Aligned verdicts always allow
113
+ - Divergent verdicts only block if:
114
+ - Confidence >= CONFIDENCE_THRESHOLD (0.8)
115
+ - At least one violation has citations
116
+ - Low confidence or no citations → warn only
117
+
118
+ Args:
119
+ verdict: Parsed supervisor verdict
120
+ intent: Policy intent to attach to deny decisions.
121
+
122
+ Returns:
123
+ PolicyDecision (allow, deny, or warn)
124
+ """
125
+ policy_id = "semantic.supervisor"
126
+
127
+ # Aligned = allow
128
+ if verdict.verdict == "aligned":
129
+ return PolicyDecision(
130
+ decision="allow",
131
+ policy_id=policy_id,
132
+ )
133
+
134
+ # Divergent: check confidence and citations
135
+ blocking_violations: list[Violation] = []
136
+ warnings: list[str] = []
137
+
138
+ for v in verdict.violations:
139
+ citations = v.get("citations", [])
140
+ severity_str = v.get("severity", "medium")
141
+ severity: Severity = (
142
+ severity_str if severity_str in ("critical", "high", "medium", "low") else "medium"
143
+ ) # type: ignore[assignment] # membership check narrows str to Literal at runtime
144
+
145
+ violation = Violation(
146
+ rule_id=f"{policy_id}.alignment",
147
+ message=v.get("evidence", "Divergent from plan"),
148
+ severity=severity,
149
+ evidence=v.get("evidence"),
150
+ suggested_fix=v.get("suggested_fix"),
151
+ citations=citations if isinstance(citations, list) else [],
152
+ )
153
+
154
+ # Only block on high-confidence violations with citations
155
+ if verdict.confidence >= CONFIDENCE_THRESHOLD and citations:
156
+ blocking_violations.append(violation)
157
+ else:
158
+ # Low confidence or no citations → warning only
159
+ warnings.append(f"Possible divergence: {violation.message} (confidence: {verdict.confidence:.0%})")
160
+
161
+ if blocking_violations:
162
+ return PolicyDecision(
163
+ decision="deny",
164
+ policy_id=policy_id,
165
+ violations=blocking_violations,
166
+ warnings=warnings,
167
+ intent=intent,
168
+ )
169
+
170
+ # No blocking violations (low confidence or no citations)
171
+ if warnings:
172
+ return PolicyDecision(
173
+ decision="warn",
174
+ policy_id=policy_id,
175
+ warnings=warnings,
176
+ )
177
+
178
+ # No violations at all (shouldn't happen for divergent, but handle gracefully)
179
+ return PolicyDecision(
180
+ decision="warn",
181
+ policy_id=policy_id,
182
+ warnings=[f"Divergent verdict with no specific violations (confidence: {verdict.confidence:.0%})"],
183
+ )
forge/guard/store.py ADDED
@@ -0,0 +1,124 @@
1
+ """Helpers for reading/writing policy state to the session manifest.
2
+
3
+ Policy state is persisted to confirmed.policy in the session manifest.
4
+ This enables stateful policies (like TDD) to track state across hook
5
+ invocations, since hooks are short-lived processes.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ from typing import Any
12
+
13
+ from forge.core.state import now_iso
14
+ from forge.guard.types import CompositeDecision, PolicyDecision, Violation
15
+
16
+ _log = logging.getLogger(__name__)
17
+
18
+ # Maximum number of decisions to keep in the log
19
+ MAX_DECISION_LOG = 100
20
+
21
+
22
+ def serialize_decision(decision: PolicyDecision) -> dict[str, Any]:
23
+ """Serialize a PolicyDecision for persistence.
24
+
25
+ Args:
26
+ decision: The decision to serialize
27
+
28
+ Returns:
29
+ Dict suitable for JSON serialization
30
+ """
31
+ return {
32
+ "decision": decision.decision,
33
+ "policy_id": decision.policy_id,
34
+ "violations": [_serialize_violation(v) for v in decision.violations],
35
+ "warnings": decision.warnings,
36
+ "cached": decision.cached,
37
+ "evaluated_at": decision.evaluated_at,
38
+ }
39
+
40
+
41
+ def _serialize_violation(violation: Violation) -> dict[str, Any]:
42
+ """Serialize a Violation for persistence."""
43
+ return {
44
+ "rule_id": violation.rule_id,
45
+ "message": violation.message,
46
+ "severity": violation.severity,
47
+ "evidence": violation.evidence,
48
+ "suggested_fix": violation.suggested_fix,
49
+ "citations": violation.citations,
50
+ }
51
+
52
+
53
+ def serialize_composite_decision(
54
+ composite: CompositeDecision,
55
+ context_summary: str | None = None,
56
+ ) -> dict[str, Any]:
57
+ """Serialize a CompositeDecision for the decision log.
58
+
59
+ Args:
60
+ composite: The composite decision to serialize
61
+ context_summary: Optional summary of the action context
62
+
63
+ Returns:
64
+ Dict suitable for JSON serialization
65
+ """
66
+ return {
67
+ "final_decision": composite.final_decision,
68
+ "context_summary": context_summary,
69
+ "blocking_violations": [_serialize_violation(v) for v in composite.blocking_violations],
70
+ "warnings": composite.all_warnings,
71
+ "evaluated_at": now_iso(),
72
+ "decisions": [serialize_decision(d) for d in composite.decisions],
73
+ }
74
+
75
+
76
+ def build_policy_state_update(
77
+ result: CompositeDecision,
78
+ engine_state: dict[str, dict[str, Any]],
79
+ existing_state: dict[str, Any] | None,
80
+ *,
81
+ forge_version: str | None = None,
82
+ bundles: list[str] | None = None,
83
+ rules_active: list[str] | None = None,
84
+ context_summary: str | None = None,
85
+ ) -> dict[str, Any]:
86
+ """Build the policy state update for the session manifest.
87
+
88
+ Appends to the decision log and merges the engine's collected policy states
89
+ into existing states. Policies that weren't evaluated (applies_to() returned
90
+ False) retain their prior state — only policies that ran get updated.
91
+
92
+ Args:
93
+ result: The composite decision from evaluation
94
+ engine_state: Collected state from evaluated stateful policies (keyed by policy_id)
95
+ existing_state: Current confirmed.policy state (may be None)
96
+ forge_version: Forge version for provenance
97
+ bundles: Active bundle names for provenance
98
+ rules_active: Active rule IDs for provenance
99
+ context_summary: Summary of the action for logging
100
+
101
+ Returns:
102
+ Dict to write to confirmed.policy
103
+ """
104
+ existing = existing_state or {}
105
+
106
+ # Append to decision log (with bounded size)
107
+ decisions_log = list(existing.get("decisions", []))
108
+ decisions_log.append(serialize_composite_decision(result, context_summary))
109
+ if len(decisions_log) > MAX_DECISION_LOG:
110
+ decisions_log = decisions_log[-MAX_DECISION_LOG:]
111
+
112
+ # Merge engine state into existing policy_states.
113
+ # Policies that weren't evaluated (applies_to() returned False) retain
114
+ # their prior state. Only policies that ran get their state updated.
115
+ merged_states = dict(existing.get("policy_states", {}))
116
+ merged_states.update(engine_state)
117
+
118
+ return {
119
+ "forge_version": forge_version or existing.get("forge_version"),
120
+ "bundles": bundles or existing.get("bundles", []),
121
+ "rules_active": rules_active or existing.get("rules_active", []),
122
+ "decisions": decisions_log,
123
+ "policy_states": merged_states,
124
+ }
@@ -0,0 +1,6 @@
1
+ """Team hook handlers for Claude Code Agent Teams.
2
+
3
+ Quality gates at TeammateIdle and TaskCompleted boundaries using
4
+ the shared reactive library (SyncAdapter, run_claude_session,
5
+ extract_json_from_response).
6
+ """
@@ -0,0 +1,24 @@
1
+ """Configuration for team quality gate hooks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+
8
+ @dataclass
9
+ class TeamSupervisorConfig:
10
+ """Configuration for team quality gate hooks.
11
+
12
+ Lives on ``PolicyIntent.team_supervisor``. When ``None``, team hooks
13
+ are no-ops (allow everything, fail-open).
14
+ """
15
+
16
+ enabled: bool = False
17
+ tagger_model: str = "gemini/gemini-2.0-flash"
18
+ resume_id: str | None = None
19
+ proxy: str | None = None
20
+ direct: bool = False
21
+ base_url: str | None = None
22
+ timeout_seconds: int = 45
23
+ throttle_seconds: int = 60
24
+ max_blocks_per_task: int = 3
@@ -0,0 +1,209 @@
1
+ """Team hook handler logic for TeammateIdle and TaskCompleted.
2
+
3
+ Handlers return ``(exit_code, stderr_message)``:
4
+ - ``(0, "")`` = allow (teammate goes idle / task marked completed)
5
+ - ``(2, "feedback")`` = block (teammate continues / task stays open)
6
+
7
+ All errors fail-open (return 0). Uses file-backed cache at
8
+ ``~/.forge/team-hooks/<session_id>.json`` for throttle + escape hatch.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import logging
14
+ from datetime import datetime, timezone
15
+ from typing import Any
16
+
17
+ from forge.core.reactive.proxy import lookup_proxy_base_url
18
+ from forge.core.reactive.session_runner import run_claude_session
19
+ from forge.core.reactive.structured_output import extract_json_from_response
20
+ from forge.core.state import now_iso
21
+ from forge.guard.team.config import TeamSupervisorConfig
22
+ from forge.guard.team.prompts import (
23
+ IDLE_TAGGER_PROMPT,
24
+ TASK_TAGGER_PROMPT,
25
+ TEAM_SUPERVISOR_PROMPT,
26
+ )
27
+
28
+ _log = logging.getLogger(__name__)
29
+
30
+
31
+ def handle_teammate_idle(
32
+ data: dict[str, Any],
33
+ config: TeamSupervisorConfig,
34
+ cache: dict[str, Any],
35
+ ) -> tuple[int, str]:
36
+ """Handle TeammateIdle event.
37
+
38
+ Args:
39
+ data: Raw hook event payload from Claude Code.
40
+ config: Team supervisor configuration.
41
+ cache: File-backed dict (loaded/saved by caller).
42
+
43
+ Returns:
44
+ ``(exit_code, stderr_feedback)``.
45
+ """
46
+ teammate = data.get("teammate_name") or "unknown"
47
+ team = data.get("team_name") or "unknown"
48
+ cache_key = f"{teammate}:idle"
49
+
50
+ cached = cache.get(cache_key)
51
+ if cached and _is_fresh(cached, config.throttle_seconds):
52
+ return cached.get("exit_code", 0), cached.get("feedback", "")
53
+
54
+ tag = _classify_event(config.tagger_model, IDLE_TAGGER_PROMPT, teammate, team)
55
+ if tag != "needs-review":
56
+ cache[cache_key] = {"checked_at": now_iso(), "exit_code": 0, "feedback": ""}
57
+ return 0, ""
58
+
59
+ if not config.resume_id:
60
+ return 0, ""
61
+
62
+ exit_code, feedback = _run_supervisor(config, teammate, team, "idle", "")
63
+ cache[cache_key] = {
64
+ "checked_at": now_iso(),
65
+ "exit_code": exit_code,
66
+ "feedback": feedback,
67
+ }
68
+ return exit_code, feedback
69
+
70
+
71
+ def handle_task_completed(
72
+ data: dict[str, Any],
73
+ config: TeamSupervisorConfig,
74
+ cache: dict[str, Any],
75
+ ) -> tuple[int, str]:
76
+ """Handle TaskCompleted event.
77
+
78
+ Args:
79
+ data: Raw hook event payload from Claude Code.
80
+ config: Team supervisor configuration.
81
+ cache: File-backed dict (loaded/saved by caller).
82
+
83
+ Returns:
84
+ ``(exit_code, stderr_feedback)``.
85
+ """
86
+ teammate = data.get("teammate_name") or "unknown"
87
+ team = data.get("team_name") or "unknown"
88
+ task_id = data.get("task_id") or "unknown"
89
+ task_subject = data.get("task_subject")
90
+ cache_key = f"{teammate}:{task_id}"
91
+
92
+ cached = cache.get(cache_key, {})
93
+
94
+ # Escape hatch: auto-allow after max_blocks_per_task
95
+ if cached.get("block_count", 0) >= config.max_blocks_per_task:
96
+ _log.info(
97
+ "Escape hatch: auto-allowing %s after %d blocks",
98
+ cache_key,
99
+ config.max_blocks_per_task,
100
+ )
101
+ return 0, ""
102
+
103
+ if _is_fresh(cached, config.throttle_seconds):
104
+ return cached.get("exit_code", 0), cached.get("feedback", "")
105
+
106
+ tag = _classify_event(config.tagger_model, TASK_TAGGER_PROMPT, teammate, team, task_subject)
107
+ if tag != "needs-review":
108
+ cache[cache_key] = {"checked_at": now_iso(), "exit_code": 0, "feedback": ""}
109
+ return 0, ""
110
+
111
+ if not config.resume_id:
112
+ return 0, ""
113
+
114
+ task_context = f"Task: {task_subject or 'unknown'} (id: {task_id})"
115
+ exit_code, feedback = _run_supervisor(config, teammate, team, "task-completed", task_context)
116
+
117
+ block_count = cached.get("block_count", 0) + (1 if exit_code == 2 else 0)
118
+ cache[cache_key] = {
119
+ "checked_at": now_iso(),
120
+ "exit_code": exit_code,
121
+ "feedback": feedback,
122
+ "block_count": block_count,
123
+ }
124
+ return exit_code, feedback
125
+
126
+
127
+ def _is_fresh(entry: dict[str, Any], throttle_seconds: int) -> bool:
128
+ """Return True if the cache entry is within the throttle window."""
129
+ checked_at = entry.get("checked_at")
130
+ if not checked_at:
131
+ return False
132
+ try:
133
+ checked_time = datetime.fromisoformat(checked_at.replace("Z", "+00:00"))
134
+ age = (datetime.now(timezone.utc) - checked_time).total_seconds()
135
+ return age < throttle_seconds
136
+ except (ValueError, TypeError):
137
+ return False
138
+
139
+
140
+ def _classify_event(
141
+ model: str,
142
+ prompt_template: str,
143
+ teammate: str,
144
+ team: str,
145
+ task_subject: str | None = None,
146
+ ) -> str:
147
+ """Classify event via cheap LLM call. Returns single tag string."""
148
+ try:
149
+ from forge.core.llm import SyncAdapter, get_client
150
+
151
+ prompt = prompt_template.format(
152
+ teammate_name=teammate,
153
+ team_name=team,
154
+ task_subject=task_subject or "",
155
+ )
156
+ adapter = SyncAdapter(get_client(model))
157
+ response = adapter.ask(prompt)
158
+ words = response.strip().lower().split()
159
+ return words[0] if words else "routine"
160
+ except Exception as e:
161
+ _log.warning("Team tagger failed: %s", e)
162
+ return "routine"
163
+
164
+
165
+ def _run_supervisor(
166
+ config: TeamSupervisorConfig,
167
+ teammate: str,
168
+ team: str,
169
+ event_type: str,
170
+ task_context: str,
171
+ ) -> tuple[int, str]:
172
+ """Run cross-team supervisor. Returns ``(exit_code, feedback)``.
173
+
174
+ Fail-open on: subprocess failure, parse failure, missing "verdict",
175
+ non-dict extraction, verdict != "divergent", or FORGE_DEPTH limit.
176
+ """
177
+ from forge.core.reactive.env import should_spawn_subprocesses
178
+
179
+ if not should_spawn_subprocesses():
180
+ _log.debug("Skipping team supervisor at FORGE_DEPTH limit")
181
+ return 0, ""
182
+
183
+ try:
184
+ base_url = None if config.direct else (config.base_url or lookup_proxy_base_url(config.proxy))
185
+ except Exception as e:
186
+ _log.warning("Team supervisor proxy '%s' not found: %s", config.proxy, e)
187
+ return 0, ""
188
+ prompt = TEAM_SUPERVISOR_PROMPT.format(
189
+ teammate_name=teammate,
190
+ team_name=team,
191
+ event_type=event_type,
192
+ task_context=task_context,
193
+ )
194
+ result = run_claude_session(
195
+ prompt,
196
+ resume_id=config.resume_id,
197
+ base_url=base_url,
198
+ timeout_seconds=config.timeout_seconds,
199
+ )
200
+ if not result.success:
201
+ _log.warning("Team supervisor failed: %s", result.error)
202
+ return 0, ""
203
+
204
+ verdict = extract_json_from_response(result.stdout)
205
+ if not isinstance(verdict, dict) or verdict.get("verdict") != "divergent":
206
+ return 0, ""
207
+
208
+ feedback = verdict.get("feedback", "Supervisor flagged work as divergent")
209
+ return 2, feedback
@@ -0,0 +1,41 @@
1
+ """Prompt templates for team hook handlers."""
2
+
3
+ IDLE_TAGGER_PROMPT = """\
4
+ A teammate went idle. Classify why (respond with just the tag):
5
+
6
+ - needs-review: work may need verification before proceeding
7
+ - routine: normal idle (thinking, waiting for dependency)
8
+ - trivial: brief pause, no action needed
9
+
10
+ Teammate: {teammate_name}, Team: {team_name}
11
+ Tag:"""
12
+
13
+ TASK_TAGGER_PROMPT = """\
14
+ A teammate completed a task. Classify the result (respond with just the tag):
15
+
16
+ - needs-review: completed work should be verified for quality/alignment
17
+ - routine: standard task completion, no concerns
18
+ - trivial: minor task, no review needed
19
+
20
+ Teammate: {teammate_name}, Team: {team_name}
21
+ Task: {task_subject}
22
+ Tag:"""
23
+
24
+ TEAM_SUPERVISOR_PROMPT = """\
25
+ You are a team supervisor reviewing teammate work against the approved plan.
26
+
27
+ Teammate: {teammate_name} ({team_name})
28
+ Event: {event_type}
29
+ {task_context}
30
+
31
+ Evaluate whether this work aligns with the approved plan.
32
+ Focus on: correct approach, right files modified, tests included.
33
+
34
+ Respond with JSON in a code fence:
35
+ ```json
36
+ {{
37
+ "verdict": "aligned" | "divergent",
38
+ "confidence": 0.0-1.0,
39
+ "feedback": "Brief feedback message for the teammate"
40
+ }}
41
+ ```"""