multi-forge 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- forge/__init__.py +3 -0
- forge/_extensions/agents/.gitkeep +0 -0
- forge/_extensions/commands/.gitkeep +0 -0
- forge/_extensions/skills/analyze/SKILL.md +87 -0
- forge/_extensions/skills/challenge/SKILL.md +91 -0
- forge/_extensions/skills/consensus/SKILL.md +120 -0
- forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
- forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
- forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
- forge/_extensions/skills/debate/SKILL.md +116 -0
- forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
- forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
- forge/_extensions/skills/panel/SKILL.md +141 -0
- forge/_extensions/skills/panel/resources/synthesis.md +103 -0
- forge/_extensions/skills/qa/SKILL.md +704 -0
- forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
- forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
- forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
- forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
- forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
- forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
- forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
- forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
- forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
- forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
- forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
- forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
- forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
- forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
- forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
- forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
- forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
- forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
- forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
- forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
- forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
- forge/_extensions/skills/qa/resources/checklist.md +103 -0
- forge/_extensions/skills/qa/resources/report-template.md +62 -0
- forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
- forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
- forge/_extensions/skills/review/SKILL.md +125 -0
- forge/_extensions/skills/review/references/claude-4.6.md +474 -0
- forge/_extensions/skills/review/references/claude-4.7.md +710 -0
- forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
- forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
- forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
- forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
- forge/_extensions/skills/review/resources/code-gemini.md +184 -0
- forge/_extensions/skills/review/resources/code-openai.md +203 -0
- forge/_extensions/skills/review/resources/code.md +160 -0
- forge/_extensions/skills/review-docs/SKILL.md +121 -0
- forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
- forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
- forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
- forge/_extensions/skills/review-docs/resources/docs.md +170 -0
- forge/_extensions/skills/smoke-test/SKILL.md +27 -0
- forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
- forge/_extensions/skills/understand/SKILL.md +148 -0
- forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
- forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
- forge/_extensions/skills/understand/resources/code-openai.md +181 -0
- forge/_extensions/skills/understand/resources/code.md +163 -0
- forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
- forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
- forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
- forge/_extensions/skills/understand/resources/docs.md +177 -0
- forge/_extensions/skills/walkthrough/SKILL.md +599 -0
- forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
- forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
- forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
- forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
- forge/backend/__init__.py +174 -0
- forge/backend/adapters/__init__.py +38 -0
- forge/backend/adapters/litellm.py +158 -0
- forge/backend/creation.py +89 -0
- forge/backend/registry.py +178 -0
- forge/cli/__init__.py +16 -0
- forge/cli/auth.py +483 -0
- forge/cli/backend.py +298 -0
- forge/cli/claude.py +411 -0
- forge/cli/config_cmd.py +303 -0
- forge/cli/extensions.py +1001 -0
- forge/cli/gc.py +165 -0
- forge/cli/guard.py +1018 -0
- forge/cli/guards.py +106 -0
- forge/cli/handoff.py +110 -0
- forge/cli/hooks/__init__.py +36 -0
- forge/cli/hooks/_group.py +20 -0
- forge/cli/hooks/_helpers.py +149 -0
- forge/cli/hooks/commands.py +1677 -0
- forge/cli/hooks/direct_commands.py +1304 -0
- forge/cli/hooks/install.py +232 -0
- forge/cli/hooks/policy.py +151 -0
- forge/cli/hooks/read_hygiene.py +74 -0
- forge/cli/hooks/verification.py +370 -0
- forge/cli/logs.py +406 -0
- forge/cli/main.py +292 -0
- forge/cli/proxy.py +1821 -0
- forge/cli/proxy_costs.py +313 -0
- forge/cli/search.py +416 -0
- forge/cli/session.py +892 -0
- forge/cli/session_addendum.py +81 -0
- forge/cli/session_fork.py +750 -0
- forge/cli/session_handoff.py +141 -0
- forge/cli/session_lifecycle.py +2053 -0
- forge/cli/session_manage.py +1336 -0
- forge/cli/session_memory.py +201 -0
- forge/cli/status_line.py +1398 -0
- forge/cli/workflow.py +1964 -0
- forge/config/__init__.py +110 -0
- forge/config/dataclass_utils.py +88 -0
- forge/config/defaults/__init__.py +0 -0
- forge/config/defaults/backends/__init__.py +0 -0
- forge/config/defaults/backends/litellm.yaml +196 -0
- forge/config/defaults/templates/__init__.py +0 -0
- forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
- forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
- forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
- forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
- forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
- forge/config/defaults/templates/litellm-gemini.yaml +21 -0
- forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
- forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
- forge/config/defaults/templates/litellm-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
- forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
- forge/config/defaults/templates/openrouter-glm.yaml +23 -0
- forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
- forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
- forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
- forge/config/defaults/templates/openrouter-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
- forge/config/loader.py +675 -0
- forge/config/schema.py +448 -0
- forge/core/__init__.py +5 -0
- forge/core/auth/__init__.py +67 -0
- forge/core/auth/capabilities.py +219 -0
- forge/core/auth/credentials_file.py +244 -0
- forge/core/auth/protocols.py +18 -0
- forge/core/auth/secrets.py +243 -0
- forge/core/auth/template_secrets.py +112 -0
- forge/core/data/__init__.py +5 -0
- forge/core/data/model_catalog.yaml +1522 -0
- forge/core/data/pricing.yaml +140 -0
- forge/core/data/system_prompt_addendums/__init__.py +0 -0
- forge/core/data/system_prompt_addendums/gemini.md +330 -0
- forge/core/data/system_prompt_addendums/openai.md +328 -0
- forge/core/llm/__init__.py +231 -0
- forge/core/llm/clients/__init__.py +14 -0
- forge/core/llm/clients/base.py +115 -0
- forge/core/llm/clients/litellm.py +619 -0
- forge/core/llm/clients/openai_compat.py +244 -0
- forge/core/llm/clients/openrouter.py +234 -0
- forge/core/llm/credentials.py +439 -0
- forge/core/llm/detection.py +86 -0
- forge/core/llm/errors.py +44 -0
- forge/core/llm/protocols.py +80 -0
- forge/core/llm/types.py +176 -0
- forge/core/logging.py +146 -0
- forge/core/models/__init__.py +91 -0
- forge/core/models/catalog.py +467 -0
- forge/core/models/pricing.py +165 -0
- forge/core/models/types.py +167 -0
- forge/core/naming.py +212 -0
- forge/core/ops/__init__.py +73 -0
- forge/core/ops/context.py +141 -0
- forge/core/ops/gc.py +802 -0
- forge/core/ops/proxy.py +146 -0
- forge/core/ops/resolution.py +135 -0
- forge/core/ops/session.py +344 -0
- forge/core/ops/session_context.py +548 -0
- forge/core/paths.py +38 -0
- forge/core/process.py +54 -0
- forge/core/reactive/__init__.py +38 -0
- forge/core/reactive/cost_tracking.py +300 -0
- forge/core/reactive/env.py +180 -0
- forge/core/reactive/proxy.py +78 -0
- forge/core/reactive/routing.py +622 -0
- forge/core/reactive/session_runner.py +185 -0
- forge/core/reactive/structured_output.py +62 -0
- forge/core/reactive/tagger.py +94 -0
- forge/core/reactive/throttle.py +132 -0
- forge/core/state/__init__.py +59 -0
- forge/core/state/exceptions.py +59 -0
- forge/core/state/io.py +140 -0
- forge/core/state/lock.py +99 -0
- forge/core/state/timestamps.py +60 -0
- forge/core/transcript.py +78 -0
- forge/core/typing_helpers.py +24 -0
- forge/core/workqueue/__init__.py +67 -0
- forge/core/workqueue/queue.py +552 -0
- forge/core/workqueue/types.py +63 -0
- forge/guard/__init__.py +26 -0
- forge/guard/deterministic/__init__.py +26 -0
- forge/guard/deterministic/base.py +158 -0
- forge/guard/deterministic/coding_standards.py +256 -0
- forge/guard/deterministic/registry.py +148 -0
- forge/guard/deterministic/tdd.py +171 -0
- forge/guard/engine.py +216 -0
- forge/guard/protocols.py +91 -0
- forge/guard/queries.py +96 -0
- forge/guard/semantic/__init__.py +34 -0
- forge/guard/semantic/promotion.py +18 -0
- forge/guard/semantic/supervisor.py +813 -0
- forge/guard/semantic/verdict.py +183 -0
- forge/guard/store.py +124 -0
- forge/guard/team/__init__.py +6 -0
- forge/guard/team/config.py +24 -0
- forge/guard/team/handlers.py +209 -0
- forge/guard/team/prompts.py +41 -0
- forge/guard/types.py +125 -0
- forge/guard/workflow/__init__.py +17 -0
- forge/guard/workflow/branches.py +67 -0
- forge/guard/workflow/config.py +63 -0
- forge/guard/workflow/divergence.py +113 -0
- forge/guard/workflow/policy.py +87 -0
- forge/guard/workflow/stages.py +205 -0
- forge/install/__init__.py +55 -0
- forge/install/cli.py +281 -0
- forge/install/exceptions.py +163 -0
- forge/install/hooks.py +109 -0
- forge/install/installer.py +1037 -0
- forge/install/models.py +321 -0
- forge/install/preset.py +272 -0
- forge/install/settings_merge.py +831 -0
- forge/install/tracking.py +238 -0
- forge/install/version.py +141 -0
- forge/proxy/__init__.py +0 -0
- forge/proxy/base_client.py +181 -0
- forge/proxy/client_adapter.py +476 -0
- forge/proxy/client_factory.py +531 -0
- forge/proxy/converters.py +1206 -0
- forge/proxy/cost_logger.py +132 -0
- forge/proxy/cost_tracker.py +242 -0
- forge/proxy/data_models.py +338 -0
- forge/proxy/error_hints.py +92 -0
- forge/proxy/metrics.py +222 -0
- forge/proxy/model_spec.py +158 -0
- forge/proxy/proxies.py +333 -0
- forge/proxy/proxy_identity.py +134 -0
- forge/proxy/proxy_orchestrator.py +1018 -0
- forge/proxy/proxy_startup.py +54 -0
- forge/proxy/server.py +1561 -0
- forge/proxy/utils.py +537 -0
- forge/review/__init__.py +6 -0
- forge/review/adversarial.py +111 -0
- forge/review/consensus.py +236 -0
- forge/review/engine.py +356 -0
- forge/review/models.py +437 -0
- forge/review/resources/__init__.py +5 -0
- forge/review/resources/codereview-performance.md +85 -0
- forge/review/resources/codereview-quick.md +75 -0
- forge/review/resources/codereview-security.md +92 -0
- forge/review/resources/codereview.md +85 -0
- forge/review/resources/docreview-quick.md +75 -0
- forge/review/resources/docreview.md +86 -0
- forge/review/resources/thinkdeep.md +89 -0
- forge/review/routing.py +368 -0
- forge/review/synthesis.py +73 -0
- forge/runtime_config.py +438 -0
- forge/search/__init__.py +55 -0
- forge/search/bm25_store.py +264 -0
- forge/search/content_store.py +197 -0
- forge/search/engine.py +352 -0
- forge/search/exceptions.py +51 -0
- forge/search/extractor.py +234 -0
- forge/search/index_state.py +295 -0
- forge/search/store.py +215 -0
- forge/search/tokenizer.py +24 -0
- forge/session/__init__.py +130 -0
- forge/session/active.py +339 -0
- forge/session/artifacts.py +202 -0
- forge/session/claude/__init__.py +50 -0
- forge/session/claude/cleanup.py +105 -0
- forge/session/claude/invoke.py +236 -0
- forge/session/claude/paths.py +200 -0
- forge/session/cleanup.py +216 -0
- forge/session/config.py +34 -0
- forge/session/direct_model.py +107 -0
- forge/session/effective.py +169 -0
- forge/session/exceptions.py +255 -0
- forge/session/handoff.py +881 -0
- forge/session/handoff_agent.py +544 -0
- forge/session/hooks/__init__.py +35 -0
- forge/session/hooks/models.py +73 -0
- forge/session/hooks/session_start.py +507 -0
- forge/session/identity.py +84 -0
- forge/session/index.py +553 -0
- forge/session/manager.py +1506 -0
- forge/session/models.py +572 -0
- forge/session/overrides.py +344 -0
- forge/session/plan_resolution.py +286 -0
- forge/session/prev_sessions.py +128 -0
- forge/session/store.py +431 -0
- forge/session/validation.py +47 -0
- forge/session/worktree/__init__.py +65 -0
- forge/session/worktree/cleanup.py +262 -0
- forge/session/worktree/config_copy.py +203 -0
- forge/session/worktree/create.py +332 -0
- forge/sidecar/__init__.py +29 -0
- forge/sidecar/container.py +161 -0
- forge/sidecar/docker.py +86 -0
- forge/sidecar/secrets.py +19 -0
- multi_forge-0.2.0.dist-info/METADATA +242 -0
- multi_forge-0.2.0.dist-info/RECORD +311 -0
- multi_forge-0.2.0.dist-info/WHEEL +4 -0
- multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
- multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
- multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""Supervisor verdict parsing and conversion.
|
|
2
|
+
|
|
3
|
+
Parses structured JSON responses from the semantic supervisor and
|
|
4
|
+
converts them to PolicyDecision objects.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from typing import Any, Literal
|
|
12
|
+
|
|
13
|
+
from forge.core.reactive.structured_output import extract_json_from_response
|
|
14
|
+
from forge.guard.types import PolicyDecision, Severity, Violation
|
|
15
|
+
|
|
16
|
+
_log = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
# Confidence threshold for blocking (require high confidence + citations)
|
|
19
|
+
CONFIDENCE_THRESHOLD = 0.8
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class SupervisorVerdict:
|
|
24
|
+
"""Parsed verdict from the semantic supervisor.
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
verdict: "aligned" (action matches plan) or "divergent" (action deviates)
|
|
28
|
+
confidence: 0.0-1.0 confidence in the verdict
|
|
29
|
+
violations: List of violation details for divergent verdicts
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
verdict: Literal["aligned", "divergent"]
|
|
33
|
+
confidence: float = 1.0
|
|
34
|
+
violations: list[dict[str, Any]] = field(default_factory=list)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _warn_verdict(evidence: str, suggested_fix: str) -> SupervisorVerdict:
|
|
38
|
+
"""Create a divergent verdict with 0.0 confidence (maps to warn, not deny)."""
|
|
39
|
+
return SupervisorVerdict(
|
|
40
|
+
verdict="divergent",
|
|
41
|
+
confidence=0.0,
|
|
42
|
+
violations=[
|
|
43
|
+
{
|
|
44
|
+
"severity": "low",
|
|
45
|
+
"evidence": evidence,
|
|
46
|
+
"suggested_fix": suggested_fix,
|
|
47
|
+
"citations": [],
|
|
48
|
+
}
|
|
49
|
+
],
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def parse_supervisor_verdict(response: str) -> SupervisorVerdict:
|
|
54
|
+
"""Extract JSON verdict from supervisor response.
|
|
55
|
+
|
|
56
|
+
Uses ``extract_json_from_response`` for code-fence/raw JSON extraction,
|
|
57
|
+
then validates the verdict structure. Unparseable responses return a
|
|
58
|
+
divergent verdict with 0.0 confidence (maps to "warn", not deny or
|
|
59
|
+
silent allow).
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
response: Raw text response from the supervisor
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
Parsed SupervisorVerdict
|
|
66
|
+
"""
|
|
67
|
+
if not response:
|
|
68
|
+
_log.warning("Empty supervisor response, failing open with warning")
|
|
69
|
+
return _warn_verdict(
|
|
70
|
+
"Supervisor response was empty — check supervisor session health",
|
|
71
|
+
"Verify supervisor resume_id and proxy connectivity",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
data = extract_json_from_response(response)
|
|
75
|
+
if data is None:
|
|
76
|
+
_log.warning("Could not parse supervisor verdict, failing open with warning")
|
|
77
|
+
return _warn_verdict(
|
|
78
|
+
"Supervisor verdict could not be parsed — check supervisor response format",
|
|
79
|
+
"Verify supervisor session responds with valid JSON verdict",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
return _parse_verdict_data(data)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _parse_verdict_data(data: dict[str, Any]) -> SupervisorVerdict:
|
|
86
|
+
"""Parse verdict from JSON data."""
|
|
87
|
+
verdict = data.get("verdict", "aligned")
|
|
88
|
+
if verdict not in ("aligned", "divergent"):
|
|
89
|
+
_log.warning("Unknown verdict '%s', treating as aligned", verdict)
|
|
90
|
+
verdict = "aligned"
|
|
91
|
+
|
|
92
|
+
confidence = data.get("confidence", 1.0)
|
|
93
|
+
if not isinstance(confidence, (int, float)):
|
|
94
|
+
confidence = 1.0
|
|
95
|
+
confidence = max(0.0, min(1.0, float(confidence)))
|
|
96
|
+
|
|
97
|
+
violations = data.get("violations", [])
|
|
98
|
+
if not isinstance(violations, list):
|
|
99
|
+
violations = []
|
|
100
|
+
|
|
101
|
+
return SupervisorVerdict(
|
|
102
|
+
verdict=verdict, # type: ignore[arg-type] # mypy doesn't track narrowing from reassignment
|
|
103
|
+
confidence=confidence,
|
|
104
|
+
violations=violations,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def verdict_to_decision(verdict: SupervisorVerdict, *, intent: str | None = None) -> PolicyDecision:
|
|
109
|
+
"""Convert a SupervisorVerdict to a PolicyDecision.
|
|
110
|
+
|
|
111
|
+
Blocking rules:
|
|
112
|
+
- Aligned verdicts always allow
|
|
113
|
+
- Divergent verdicts only block if:
|
|
114
|
+
- Confidence >= CONFIDENCE_THRESHOLD (0.8)
|
|
115
|
+
- At least one violation has citations
|
|
116
|
+
- Low confidence or no citations → warn only
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
verdict: Parsed supervisor verdict
|
|
120
|
+
intent: Policy intent to attach to deny decisions.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
PolicyDecision (allow, deny, or warn)
|
|
124
|
+
"""
|
|
125
|
+
policy_id = "semantic.supervisor"
|
|
126
|
+
|
|
127
|
+
# Aligned = allow
|
|
128
|
+
if verdict.verdict == "aligned":
|
|
129
|
+
return PolicyDecision(
|
|
130
|
+
decision="allow",
|
|
131
|
+
policy_id=policy_id,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# Divergent: check confidence and citations
|
|
135
|
+
blocking_violations: list[Violation] = []
|
|
136
|
+
warnings: list[str] = []
|
|
137
|
+
|
|
138
|
+
for v in verdict.violations:
|
|
139
|
+
citations = v.get("citations", [])
|
|
140
|
+
severity_str = v.get("severity", "medium")
|
|
141
|
+
severity: Severity = (
|
|
142
|
+
severity_str if severity_str in ("critical", "high", "medium", "low") else "medium"
|
|
143
|
+
) # type: ignore[assignment] # membership check narrows str to Literal at runtime
|
|
144
|
+
|
|
145
|
+
violation = Violation(
|
|
146
|
+
rule_id=f"{policy_id}.alignment",
|
|
147
|
+
message=v.get("evidence", "Divergent from plan"),
|
|
148
|
+
severity=severity,
|
|
149
|
+
evidence=v.get("evidence"),
|
|
150
|
+
suggested_fix=v.get("suggested_fix"),
|
|
151
|
+
citations=citations if isinstance(citations, list) else [],
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Only block on high-confidence violations with citations
|
|
155
|
+
if verdict.confidence >= CONFIDENCE_THRESHOLD and citations:
|
|
156
|
+
blocking_violations.append(violation)
|
|
157
|
+
else:
|
|
158
|
+
# Low confidence or no citations → warning only
|
|
159
|
+
warnings.append(f"Possible divergence: {violation.message} (confidence: {verdict.confidence:.0%})")
|
|
160
|
+
|
|
161
|
+
if blocking_violations:
|
|
162
|
+
return PolicyDecision(
|
|
163
|
+
decision="deny",
|
|
164
|
+
policy_id=policy_id,
|
|
165
|
+
violations=blocking_violations,
|
|
166
|
+
warnings=warnings,
|
|
167
|
+
intent=intent,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# No blocking violations (low confidence or no citations)
|
|
171
|
+
if warnings:
|
|
172
|
+
return PolicyDecision(
|
|
173
|
+
decision="warn",
|
|
174
|
+
policy_id=policy_id,
|
|
175
|
+
warnings=warnings,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# No violations at all (shouldn't happen for divergent, but handle gracefully)
|
|
179
|
+
return PolicyDecision(
|
|
180
|
+
decision="warn",
|
|
181
|
+
policy_id=policy_id,
|
|
182
|
+
warnings=[f"Divergent verdict with no specific violations (confidence: {verdict.confidence:.0%})"],
|
|
183
|
+
)
|
forge/guard/store.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Helpers for reading/writing policy state to the session manifest.
|
|
2
|
+
|
|
3
|
+
Policy state is persisted to confirmed.policy in the session manifest.
|
|
4
|
+
This enables stateful policies (like TDD) to track state across hook
|
|
5
|
+
invocations, since hooks are short-lived processes.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from forge.core.state import now_iso
|
|
14
|
+
from forge.guard.types import CompositeDecision, PolicyDecision, Violation
|
|
15
|
+
|
|
16
|
+
_log = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
# Maximum number of decisions to keep in the log
|
|
19
|
+
MAX_DECISION_LOG = 100
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def serialize_decision(decision: PolicyDecision) -> dict[str, Any]:
|
|
23
|
+
"""Serialize a PolicyDecision for persistence.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
decision: The decision to serialize
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Dict suitable for JSON serialization
|
|
30
|
+
"""
|
|
31
|
+
return {
|
|
32
|
+
"decision": decision.decision,
|
|
33
|
+
"policy_id": decision.policy_id,
|
|
34
|
+
"violations": [_serialize_violation(v) for v in decision.violations],
|
|
35
|
+
"warnings": decision.warnings,
|
|
36
|
+
"cached": decision.cached,
|
|
37
|
+
"evaluated_at": decision.evaluated_at,
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _serialize_violation(violation: Violation) -> dict[str, Any]:
|
|
42
|
+
"""Serialize a Violation for persistence."""
|
|
43
|
+
return {
|
|
44
|
+
"rule_id": violation.rule_id,
|
|
45
|
+
"message": violation.message,
|
|
46
|
+
"severity": violation.severity,
|
|
47
|
+
"evidence": violation.evidence,
|
|
48
|
+
"suggested_fix": violation.suggested_fix,
|
|
49
|
+
"citations": violation.citations,
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def serialize_composite_decision(
|
|
54
|
+
composite: CompositeDecision,
|
|
55
|
+
context_summary: str | None = None,
|
|
56
|
+
) -> dict[str, Any]:
|
|
57
|
+
"""Serialize a CompositeDecision for the decision log.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
composite: The composite decision to serialize
|
|
61
|
+
context_summary: Optional summary of the action context
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Dict suitable for JSON serialization
|
|
65
|
+
"""
|
|
66
|
+
return {
|
|
67
|
+
"final_decision": composite.final_decision,
|
|
68
|
+
"context_summary": context_summary,
|
|
69
|
+
"blocking_violations": [_serialize_violation(v) for v in composite.blocking_violations],
|
|
70
|
+
"warnings": composite.all_warnings,
|
|
71
|
+
"evaluated_at": now_iso(),
|
|
72
|
+
"decisions": [serialize_decision(d) for d in composite.decisions],
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def build_policy_state_update(
|
|
77
|
+
result: CompositeDecision,
|
|
78
|
+
engine_state: dict[str, dict[str, Any]],
|
|
79
|
+
existing_state: dict[str, Any] | None,
|
|
80
|
+
*,
|
|
81
|
+
forge_version: str | None = None,
|
|
82
|
+
bundles: list[str] | None = None,
|
|
83
|
+
rules_active: list[str] | None = None,
|
|
84
|
+
context_summary: str | None = None,
|
|
85
|
+
) -> dict[str, Any]:
|
|
86
|
+
"""Build the policy state update for the session manifest.
|
|
87
|
+
|
|
88
|
+
Appends to the decision log and merges the engine's collected policy states
|
|
89
|
+
into existing states. Policies that weren't evaluated (applies_to() returned
|
|
90
|
+
False) retain their prior state — only policies that ran get updated.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
result: The composite decision from evaluation
|
|
94
|
+
engine_state: Collected state from evaluated stateful policies (keyed by policy_id)
|
|
95
|
+
existing_state: Current confirmed.policy state (may be None)
|
|
96
|
+
forge_version: Forge version for provenance
|
|
97
|
+
bundles: Active bundle names for provenance
|
|
98
|
+
rules_active: Active rule IDs for provenance
|
|
99
|
+
context_summary: Summary of the action for logging
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Dict to write to confirmed.policy
|
|
103
|
+
"""
|
|
104
|
+
existing = existing_state or {}
|
|
105
|
+
|
|
106
|
+
# Append to decision log (with bounded size)
|
|
107
|
+
decisions_log = list(existing.get("decisions", []))
|
|
108
|
+
decisions_log.append(serialize_composite_decision(result, context_summary))
|
|
109
|
+
if len(decisions_log) > MAX_DECISION_LOG:
|
|
110
|
+
decisions_log = decisions_log[-MAX_DECISION_LOG:]
|
|
111
|
+
|
|
112
|
+
# Merge engine state into existing policy_states.
|
|
113
|
+
# Policies that weren't evaluated (applies_to() returned False) retain
|
|
114
|
+
# their prior state. Only policies that ran get their state updated.
|
|
115
|
+
merged_states = dict(existing.get("policy_states", {}))
|
|
116
|
+
merged_states.update(engine_state)
|
|
117
|
+
|
|
118
|
+
return {
|
|
119
|
+
"forge_version": forge_version or existing.get("forge_version"),
|
|
120
|
+
"bundles": bundles or existing.get("bundles", []),
|
|
121
|
+
"rules_active": rules_active or existing.get("rules_active", []),
|
|
122
|
+
"decisions": decisions_log,
|
|
123
|
+
"policy_states": merged_states,
|
|
124
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Configuration for team quality gate hooks."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class TeamSupervisorConfig:
|
|
10
|
+
"""Configuration for team quality gate hooks.
|
|
11
|
+
|
|
12
|
+
Lives on ``PolicyIntent.team_supervisor``. When ``None``, team hooks
|
|
13
|
+
are no-ops (allow everything, fail-open).
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
enabled: bool = False
|
|
17
|
+
tagger_model: str = "gemini/gemini-2.0-flash"
|
|
18
|
+
resume_id: str | None = None
|
|
19
|
+
proxy: str | None = None
|
|
20
|
+
direct: bool = False
|
|
21
|
+
base_url: str | None = None
|
|
22
|
+
timeout_seconds: int = 45
|
|
23
|
+
throttle_seconds: int = 60
|
|
24
|
+
max_blocks_per_task: int = 3
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
"""Team hook handler logic for TeammateIdle and TaskCompleted.
|
|
2
|
+
|
|
3
|
+
Handlers return ``(exit_code, stderr_message)``:
|
|
4
|
+
- ``(0, "")`` = allow (teammate goes idle / task marked completed)
|
|
5
|
+
- ``(2, "feedback")`` = block (teammate continues / task stays open)
|
|
6
|
+
|
|
7
|
+
All errors fail-open (return 0). Uses file-backed cache at
|
|
8
|
+
``~/.forge/team-hooks/<session_id>.json`` for throttle + escape hatch.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from forge.core.reactive.proxy import lookup_proxy_base_url
|
|
18
|
+
from forge.core.reactive.session_runner import run_claude_session
|
|
19
|
+
from forge.core.reactive.structured_output import extract_json_from_response
|
|
20
|
+
from forge.core.state import now_iso
|
|
21
|
+
from forge.guard.team.config import TeamSupervisorConfig
|
|
22
|
+
from forge.guard.team.prompts import (
|
|
23
|
+
IDLE_TAGGER_PROMPT,
|
|
24
|
+
TASK_TAGGER_PROMPT,
|
|
25
|
+
TEAM_SUPERVISOR_PROMPT,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
_log = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def handle_teammate_idle(
|
|
32
|
+
data: dict[str, Any],
|
|
33
|
+
config: TeamSupervisorConfig,
|
|
34
|
+
cache: dict[str, Any],
|
|
35
|
+
) -> tuple[int, str]:
|
|
36
|
+
"""Handle TeammateIdle event.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
data: Raw hook event payload from Claude Code.
|
|
40
|
+
config: Team supervisor configuration.
|
|
41
|
+
cache: File-backed dict (loaded/saved by caller).
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
``(exit_code, stderr_feedback)``.
|
|
45
|
+
"""
|
|
46
|
+
teammate = data.get("teammate_name") or "unknown"
|
|
47
|
+
team = data.get("team_name") or "unknown"
|
|
48
|
+
cache_key = f"{teammate}:idle"
|
|
49
|
+
|
|
50
|
+
cached = cache.get(cache_key)
|
|
51
|
+
if cached and _is_fresh(cached, config.throttle_seconds):
|
|
52
|
+
return cached.get("exit_code", 0), cached.get("feedback", "")
|
|
53
|
+
|
|
54
|
+
tag = _classify_event(config.tagger_model, IDLE_TAGGER_PROMPT, teammate, team)
|
|
55
|
+
if tag != "needs-review":
|
|
56
|
+
cache[cache_key] = {"checked_at": now_iso(), "exit_code": 0, "feedback": ""}
|
|
57
|
+
return 0, ""
|
|
58
|
+
|
|
59
|
+
if not config.resume_id:
|
|
60
|
+
return 0, ""
|
|
61
|
+
|
|
62
|
+
exit_code, feedback = _run_supervisor(config, teammate, team, "idle", "")
|
|
63
|
+
cache[cache_key] = {
|
|
64
|
+
"checked_at": now_iso(),
|
|
65
|
+
"exit_code": exit_code,
|
|
66
|
+
"feedback": feedback,
|
|
67
|
+
}
|
|
68
|
+
return exit_code, feedback
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def handle_task_completed(
|
|
72
|
+
data: dict[str, Any],
|
|
73
|
+
config: TeamSupervisorConfig,
|
|
74
|
+
cache: dict[str, Any],
|
|
75
|
+
) -> tuple[int, str]:
|
|
76
|
+
"""Handle TaskCompleted event.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
data: Raw hook event payload from Claude Code.
|
|
80
|
+
config: Team supervisor configuration.
|
|
81
|
+
cache: File-backed dict (loaded/saved by caller).
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
``(exit_code, stderr_feedback)``.
|
|
85
|
+
"""
|
|
86
|
+
teammate = data.get("teammate_name") or "unknown"
|
|
87
|
+
team = data.get("team_name") or "unknown"
|
|
88
|
+
task_id = data.get("task_id") or "unknown"
|
|
89
|
+
task_subject = data.get("task_subject")
|
|
90
|
+
cache_key = f"{teammate}:{task_id}"
|
|
91
|
+
|
|
92
|
+
cached = cache.get(cache_key, {})
|
|
93
|
+
|
|
94
|
+
# Escape hatch: auto-allow after max_blocks_per_task
|
|
95
|
+
if cached.get("block_count", 0) >= config.max_blocks_per_task:
|
|
96
|
+
_log.info(
|
|
97
|
+
"Escape hatch: auto-allowing %s after %d blocks",
|
|
98
|
+
cache_key,
|
|
99
|
+
config.max_blocks_per_task,
|
|
100
|
+
)
|
|
101
|
+
return 0, ""
|
|
102
|
+
|
|
103
|
+
if _is_fresh(cached, config.throttle_seconds):
|
|
104
|
+
return cached.get("exit_code", 0), cached.get("feedback", "")
|
|
105
|
+
|
|
106
|
+
tag = _classify_event(config.tagger_model, TASK_TAGGER_PROMPT, teammate, team, task_subject)
|
|
107
|
+
if tag != "needs-review":
|
|
108
|
+
cache[cache_key] = {"checked_at": now_iso(), "exit_code": 0, "feedback": ""}
|
|
109
|
+
return 0, ""
|
|
110
|
+
|
|
111
|
+
if not config.resume_id:
|
|
112
|
+
return 0, ""
|
|
113
|
+
|
|
114
|
+
task_context = f"Task: {task_subject or 'unknown'} (id: {task_id})"
|
|
115
|
+
exit_code, feedback = _run_supervisor(config, teammate, team, "task-completed", task_context)
|
|
116
|
+
|
|
117
|
+
block_count = cached.get("block_count", 0) + (1 if exit_code == 2 else 0)
|
|
118
|
+
cache[cache_key] = {
|
|
119
|
+
"checked_at": now_iso(),
|
|
120
|
+
"exit_code": exit_code,
|
|
121
|
+
"feedback": feedback,
|
|
122
|
+
"block_count": block_count,
|
|
123
|
+
}
|
|
124
|
+
return exit_code, feedback
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _is_fresh(entry: dict[str, Any], throttle_seconds: int) -> bool:
|
|
128
|
+
"""Return True if the cache entry is within the throttle window."""
|
|
129
|
+
checked_at = entry.get("checked_at")
|
|
130
|
+
if not checked_at:
|
|
131
|
+
return False
|
|
132
|
+
try:
|
|
133
|
+
checked_time = datetime.fromisoformat(checked_at.replace("Z", "+00:00"))
|
|
134
|
+
age = (datetime.now(timezone.utc) - checked_time).total_seconds()
|
|
135
|
+
return age < throttle_seconds
|
|
136
|
+
except (ValueError, TypeError):
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _classify_event(
|
|
141
|
+
model: str,
|
|
142
|
+
prompt_template: str,
|
|
143
|
+
teammate: str,
|
|
144
|
+
team: str,
|
|
145
|
+
task_subject: str | None = None,
|
|
146
|
+
) -> str:
|
|
147
|
+
"""Classify event via cheap LLM call. Returns single tag string."""
|
|
148
|
+
try:
|
|
149
|
+
from forge.core.llm import SyncAdapter, get_client
|
|
150
|
+
|
|
151
|
+
prompt = prompt_template.format(
|
|
152
|
+
teammate_name=teammate,
|
|
153
|
+
team_name=team,
|
|
154
|
+
task_subject=task_subject or "",
|
|
155
|
+
)
|
|
156
|
+
adapter = SyncAdapter(get_client(model))
|
|
157
|
+
response = adapter.ask(prompt)
|
|
158
|
+
words = response.strip().lower().split()
|
|
159
|
+
return words[0] if words else "routine"
|
|
160
|
+
except Exception as e:
|
|
161
|
+
_log.warning("Team tagger failed: %s", e)
|
|
162
|
+
return "routine"
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _run_supervisor(
|
|
166
|
+
config: TeamSupervisorConfig,
|
|
167
|
+
teammate: str,
|
|
168
|
+
team: str,
|
|
169
|
+
event_type: str,
|
|
170
|
+
task_context: str,
|
|
171
|
+
) -> tuple[int, str]:
|
|
172
|
+
"""Run cross-team supervisor. Returns ``(exit_code, feedback)``.
|
|
173
|
+
|
|
174
|
+
Fail-open on: subprocess failure, parse failure, missing "verdict",
|
|
175
|
+
non-dict extraction, verdict != "divergent", or FORGE_DEPTH limit.
|
|
176
|
+
"""
|
|
177
|
+
from forge.core.reactive.env import should_spawn_subprocesses
|
|
178
|
+
|
|
179
|
+
if not should_spawn_subprocesses():
|
|
180
|
+
_log.debug("Skipping team supervisor at FORGE_DEPTH limit")
|
|
181
|
+
return 0, ""
|
|
182
|
+
|
|
183
|
+
try:
|
|
184
|
+
base_url = None if config.direct else (config.base_url or lookup_proxy_base_url(config.proxy))
|
|
185
|
+
except Exception as e:
|
|
186
|
+
_log.warning("Team supervisor proxy '%s' not found: %s", config.proxy, e)
|
|
187
|
+
return 0, ""
|
|
188
|
+
prompt = TEAM_SUPERVISOR_PROMPT.format(
|
|
189
|
+
teammate_name=teammate,
|
|
190
|
+
team_name=team,
|
|
191
|
+
event_type=event_type,
|
|
192
|
+
task_context=task_context,
|
|
193
|
+
)
|
|
194
|
+
result = run_claude_session(
|
|
195
|
+
prompt,
|
|
196
|
+
resume_id=config.resume_id,
|
|
197
|
+
base_url=base_url,
|
|
198
|
+
timeout_seconds=config.timeout_seconds,
|
|
199
|
+
)
|
|
200
|
+
if not result.success:
|
|
201
|
+
_log.warning("Team supervisor failed: %s", result.error)
|
|
202
|
+
return 0, ""
|
|
203
|
+
|
|
204
|
+
verdict = extract_json_from_response(result.stdout)
|
|
205
|
+
if not isinstance(verdict, dict) or verdict.get("verdict") != "divergent":
|
|
206
|
+
return 0, ""
|
|
207
|
+
|
|
208
|
+
feedback = verdict.get("feedback", "Supervisor flagged work as divergent")
|
|
209
|
+
return 2, feedback
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Prompt templates for team hook handlers."""
|
|
2
|
+
|
|
3
|
+
IDLE_TAGGER_PROMPT = """\
|
|
4
|
+
A teammate went idle. Classify why (respond with just the tag):
|
|
5
|
+
|
|
6
|
+
- needs-review: work may need verification before proceeding
|
|
7
|
+
- routine: normal idle (thinking, waiting for dependency)
|
|
8
|
+
- trivial: brief pause, no action needed
|
|
9
|
+
|
|
10
|
+
Teammate: {teammate_name}, Team: {team_name}
|
|
11
|
+
Tag:"""
|
|
12
|
+
|
|
13
|
+
TASK_TAGGER_PROMPT = """\
|
|
14
|
+
A teammate completed a task. Classify the result (respond with just the tag):
|
|
15
|
+
|
|
16
|
+
- needs-review: completed work should be verified for quality/alignment
|
|
17
|
+
- routine: standard task completion, no concerns
|
|
18
|
+
- trivial: minor task, no review needed
|
|
19
|
+
|
|
20
|
+
Teammate: {teammate_name}, Team: {team_name}
|
|
21
|
+
Task: {task_subject}
|
|
22
|
+
Tag:"""
|
|
23
|
+
|
|
24
|
+
TEAM_SUPERVISOR_PROMPT = """\
|
|
25
|
+
You are a team supervisor reviewing teammate work against the approved plan.
|
|
26
|
+
|
|
27
|
+
Teammate: {teammate_name} ({team_name})
|
|
28
|
+
Event: {event_type}
|
|
29
|
+
{task_context}
|
|
30
|
+
|
|
31
|
+
Evaluate whether this work aligns with the approved plan.
|
|
32
|
+
Focus on: correct approach, right files modified, tests included.
|
|
33
|
+
|
|
34
|
+
Respond with JSON in a code fence:
|
|
35
|
+
```json
|
|
36
|
+
{{
|
|
37
|
+
"verdict": "aligned" | "divergent",
|
|
38
|
+
"confidence": 0.0-1.0,
|
|
39
|
+
"feedback": "Brief feedback message for the teammate"
|
|
40
|
+
}}
|
|
41
|
+
```"""
|