multi-forge 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- forge/__init__.py +3 -0
- forge/_extensions/agents/.gitkeep +0 -0
- forge/_extensions/commands/.gitkeep +0 -0
- forge/_extensions/skills/analyze/SKILL.md +87 -0
- forge/_extensions/skills/challenge/SKILL.md +91 -0
- forge/_extensions/skills/consensus/SKILL.md +120 -0
- forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
- forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
- forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
- forge/_extensions/skills/debate/SKILL.md +116 -0
- forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
- forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
- forge/_extensions/skills/panel/SKILL.md +141 -0
- forge/_extensions/skills/panel/resources/synthesis.md +103 -0
- forge/_extensions/skills/qa/SKILL.md +704 -0
- forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
- forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
- forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
- forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
- forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
- forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
- forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
- forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
- forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
- forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
- forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
- forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
- forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
- forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
- forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
- forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
- forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
- forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
- forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
- forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
- forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
- forge/_extensions/skills/qa/resources/checklist.md +103 -0
- forge/_extensions/skills/qa/resources/report-template.md +62 -0
- forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
- forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
- forge/_extensions/skills/review/SKILL.md +125 -0
- forge/_extensions/skills/review/references/claude-4.6.md +474 -0
- forge/_extensions/skills/review/references/claude-4.7.md +710 -0
- forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
- forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
- forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
- forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
- forge/_extensions/skills/review/resources/code-gemini.md +184 -0
- forge/_extensions/skills/review/resources/code-openai.md +203 -0
- forge/_extensions/skills/review/resources/code.md +160 -0
- forge/_extensions/skills/review-docs/SKILL.md +121 -0
- forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
- forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
- forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
- forge/_extensions/skills/review-docs/resources/docs.md +170 -0
- forge/_extensions/skills/smoke-test/SKILL.md +27 -0
- forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
- forge/_extensions/skills/understand/SKILL.md +148 -0
- forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
- forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
- forge/_extensions/skills/understand/resources/code-openai.md +181 -0
- forge/_extensions/skills/understand/resources/code.md +163 -0
- forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
- forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
- forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
- forge/_extensions/skills/understand/resources/docs.md +177 -0
- forge/_extensions/skills/walkthrough/SKILL.md +599 -0
- forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
- forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
- forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
- forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
- forge/backend/__init__.py +174 -0
- forge/backend/adapters/__init__.py +38 -0
- forge/backend/adapters/litellm.py +158 -0
- forge/backend/creation.py +89 -0
- forge/backend/registry.py +178 -0
- forge/cli/__init__.py +16 -0
- forge/cli/auth.py +483 -0
- forge/cli/backend.py +298 -0
- forge/cli/claude.py +411 -0
- forge/cli/config_cmd.py +303 -0
- forge/cli/extensions.py +1001 -0
- forge/cli/gc.py +165 -0
- forge/cli/guard.py +1018 -0
- forge/cli/guards.py +106 -0
- forge/cli/handoff.py +110 -0
- forge/cli/hooks/__init__.py +36 -0
- forge/cli/hooks/_group.py +20 -0
- forge/cli/hooks/_helpers.py +149 -0
- forge/cli/hooks/commands.py +1677 -0
- forge/cli/hooks/direct_commands.py +1304 -0
- forge/cli/hooks/install.py +232 -0
- forge/cli/hooks/policy.py +151 -0
- forge/cli/hooks/read_hygiene.py +74 -0
- forge/cli/hooks/verification.py +370 -0
- forge/cli/logs.py +406 -0
- forge/cli/main.py +292 -0
- forge/cli/proxy.py +1821 -0
- forge/cli/proxy_costs.py +313 -0
- forge/cli/search.py +416 -0
- forge/cli/session.py +892 -0
- forge/cli/session_addendum.py +81 -0
- forge/cli/session_fork.py +750 -0
- forge/cli/session_handoff.py +141 -0
- forge/cli/session_lifecycle.py +2053 -0
- forge/cli/session_manage.py +1336 -0
- forge/cli/session_memory.py +201 -0
- forge/cli/status_line.py +1398 -0
- forge/cli/workflow.py +1964 -0
- forge/config/__init__.py +110 -0
- forge/config/dataclass_utils.py +88 -0
- forge/config/defaults/__init__.py +0 -0
- forge/config/defaults/backends/__init__.py +0 -0
- forge/config/defaults/backends/litellm.yaml +196 -0
- forge/config/defaults/templates/__init__.py +0 -0
- forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
- forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
- forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
- forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
- forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
- forge/config/defaults/templates/litellm-gemini.yaml +21 -0
- forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
- forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
- forge/config/defaults/templates/litellm-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
- forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
- forge/config/defaults/templates/openrouter-glm.yaml +23 -0
- forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
- forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
- forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
- forge/config/defaults/templates/openrouter-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
- forge/config/loader.py +675 -0
- forge/config/schema.py +448 -0
- forge/core/__init__.py +5 -0
- forge/core/auth/__init__.py +67 -0
- forge/core/auth/capabilities.py +219 -0
- forge/core/auth/credentials_file.py +244 -0
- forge/core/auth/protocols.py +18 -0
- forge/core/auth/secrets.py +243 -0
- forge/core/auth/template_secrets.py +112 -0
- forge/core/data/__init__.py +5 -0
- forge/core/data/model_catalog.yaml +1522 -0
- forge/core/data/pricing.yaml +140 -0
- forge/core/data/system_prompt_addendums/__init__.py +0 -0
- forge/core/data/system_prompt_addendums/gemini.md +330 -0
- forge/core/data/system_prompt_addendums/openai.md +328 -0
- forge/core/llm/__init__.py +231 -0
- forge/core/llm/clients/__init__.py +14 -0
- forge/core/llm/clients/base.py +115 -0
- forge/core/llm/clients/litellm.py +619 -0
- forge/core/llm/clients/openai_compat.py +244 -0
- forge/core/llm/clients/openrouter.py +234 -0
- forge/core/llm/credentials.py +439 -0
- forge/core/llm/detection.py +86 -0
- forge/core/llm/errors.py +44 -0
- forge/core/llm/protocols.py +80 -0
- forge/core/llm/types.py +176 -0
- forge/core/logging.py +146 -0
- forge/core/models/__init__.py +91 -0
- forge/core/models/catalog.py +467 -0
- forge/core/models/pricing.py +165 -0
- forge/core/models/types.py +167 -0
- forge/core/naming.py +212 -0
- forge/core/ops/__init__.py +73 -0
- forge/core/ops/context.py +141 -0
- forge/core/ops/gc.py +802 -0
- forge/core/ops/proxy.py +146 -0
- forge/core/ops/resolution.py +135 -0
- forge/core/ops/session.py +344 -0
- forge/core/ops/session_context.py +548 -0
- forge/core/paths.py +38 -0
- forge/core/process.py +54 -0
- forge/core/reactive/__init__.py +38 -0
- forge/core/reactive/cost_tracking.py +300 -0
- forge/core/reactive/env.py +180 -0
- forge/core/reactive/proxy.py +78 -0
- forge/core/reactive/routing.py +622 -0
- forge/core/reactive/session_runner.py +185 -0
- forge/core/reactive/structured_output.py +62 -0
- forge/core/reactive/tagger.py +94 -0
- forge/core/reactive/throttle.py +132 -0
- forge/core/state/__init__.py +59 -0
- forge/core/state/exceptions.py +59 -0
- forge/core/state/io.py +140 -0
- forge/core/state/lock.py +99 -0
- forge/core/state/timestamps.py +60 -0
- forge/core/transcript.py +78 -0
- forge/core/typing_helpers.py +24 -0
- forge/core/workqueue/__init__.py +67 -0
- forge/core/workqueue/queue.py +552 -0
- forge/core/workqueue/types.py +63 -0
- forge/guard/__init__.py +26 -0
- forge/guard/deterministic/__init__.py +26 -0
- forge/guard/deterministic/base.py +158 -0
- forge/guard/deterministic/coding_standards.py +256 -0
- forge/guard/deterministic/registry.py +148 -0
- forge/guard/deterministic/tdd.py +171 -0
- forge/guard/engine.py +216 -0
- forge/guard/protocols.py +91 -0
- forge/guard/queries.py +96 -0
- forge/guard/semantic/__init__.py +34 -0
- forge/guard/semantic/promotion.py +18 -0
- forge/guard/semantic/supervisor.py +813 -0
- forge/guard/semantic/verdict.py +183 -0
- forge/guard/store.py +124 -0
- forge/guard/team/__init__.py +6 -0
- forge/guard/team/config.py +24 -0
- forge/guard/team/handlers.py +209 -0
- forge/guard/team/prompts.py +41 -0
- forge/guard/types.py +125 -0
- forge/guard/workflow/__init__.py +17 -0
- forge/guard/workflow/branches.py +67 -0
- forge/guard/workflow/config.py +63 -0
- forge/guard/workflow/divergence.py +113 -0
- forge/guard/workflow/policy.py +87 -0
- forge/guard/workflow/stages.py +205 -0
- forge/install/__init__.py +55 -0
- forge/install/cli.py +281 -0
- forge/install/exceptions.py +163 -0
- forge/install/hooks.py +109 -0
- forge/install/installer.py +1037 -0
- forge/install/models.py +321 -0
- forge/install/preset.py +272 -0
- forge/install/settings_merge.py +831 -0
- forge/install/tracking.py +238 -0
- forge/install/version.py +141 -0
- forge/proxy/__init__.py +0 -0
- forge/proxy/base_client.py +181 -0
- forge/proxy/client_adapter.py +476 -0
- forge/proxy/client_factory.py +531 -0
- forge/proxy/converters.py +1206 -0
- forge/proxy/cost_logger.py +132 -0
- forge/proxy/cost_tracker.py +242 -0
- forge/proxy/data_models.py +338 -0
- forge/proxy/error_hints.py +92 -0
- forge/proxy/metrics.py +222 -0
- forge/proxy/model_spec.py +158 -0
- forge/proxy/proxies.py +333 -0
- forge/proxy/proxy_identity.py +134 -0
- forge/proxy/proxy_orchestrator.py +1018 -0
- forge/proxy/proxy_startup.py +54 -0
- forge/proxy/server.py +1561 -0
- forge/proxy/utils.py +537 -0
- forge/review/__init__.py +6 -0
- forge/review/adversarial.py +111 -0
- forge/review/consensus.py +236 -0
- forge/review/engine.py +356 -0
- forge/review/models.py +437 -0
- forge/review/resources/__init__.py +5 -0
- forge/review/resources/codereview-performance.md +85 -0
- forge/review/resources/codereview-quick.md +75 -0
- forge/review/resources/codereview-security.md +92 -0
- forge/review/resources/codereview.md +85 -0
- forge/review/resources/docreview-quick.md +75 -0
- forge/review/resources/docreview.md +86 -0
- forge/review/resources/thinkdeep.md +89 -0
- forge/review/routing.py +368 -0
- forge/review/synthesis.py +73 -0
- forge/runtime_config.py +438 -0
- forge/search/__init__.py +55 -0
- forge/search/bm25_store.py +264 -0
- forge/search/content_store.py +197 -0
- forge/search/engine.py +352 -0
- forge/search/exceptions.py +51 -0
- forge/search/extractor.py +234 -0
- forge/search/index_state.py +295 -0
- forge/search/store.py +215 -0
- forge/search/tokenizer.py +24 -0
- forge/session/__init__.py +130 -0
- forge/session/active.py +339 -0
- forge/session/artifacts.py +202 -0
- forge/session/claude/__init__.py +50 -0
- forge/session/claude/cleanup.py +105 -0
- forge/session/claude/invoke.py +236 -0
- forge/session/claude/paths.py +200 -0
- forge/session/cleanup.py +216 -0
- forge/session/config.py +34 -0
- forge/session/direct_model.py +107 -0
- forge/session/effective.py +169 -0
- forge/session/exceptions.py +255 -0
- forge/session/handoff.py +881 -0
- forge/session/handoff_agent.py +544 -0
- forge/session/hooks/__init__.py +35 -0
- forge/session/hooks/models.py +73 -0
- forge/session/hooks/session_start.py +507 -0
- forge/session/identity.py +84 -0
- forge/session/index.py +553 -0
- forge/session/manager.py +1506 -0
- forge/session/models.py +572 -0
- forge/session/overrides.py +344 -0
- forge/session/plan_resolution.py +286 -0
- forge/session/prev_sessions.py +128 -0
- forge/session/store.py +431 -0
- forge/session/validation.py +47 -0
- forge/session/worktree/__init__.py +65 -0
- forge/session/worktree/cleanup.py +262 -0
- forge/session/worktree/config_copy.py +203 -0
- forge/session/worktree/create.py +332 -0
- forge/sidecar/__init__.py +29 -0
- forge/sidecar/container.py +161 -0
- forge/sidecar/docker.py +86 -0
- forge/sidecar/secrets.py +19 -0
- multi_forge-0.2.0.dist-info/METADATA +242 -0
- multi_forge-0.2.0.dist-info/RECORD +311 -0
- multi_forge-0.2.0.dist-info/WHEEL +4 -0
- multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
- multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
- multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
forge/guard/types.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""Type definitions for the Policy Engine.
|
|
2
|
+
|
|
3
|
+
All types are dataclasses for easy serialization and dacite compatibility.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Any, Literal
|
|
10
|
+
|
|
11
|
+
# Type aliases for clarity
|
|
12
|
+
DecisionType = Literal["allow", "deny", "warn", "needs_review"]
|
|
13
|
+
Severity = Literal["critical", "high", "medium", "low"]
|
|
14
|
+
FailMode = Literal["open", "closed"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def extract_added_lines(diff_chunk: str) -> str:
|
|
18
|
+
"""Extract only the added lines from a unified diff chunk.
|
|
19
|
+
|
|
20
|
+
Strips diff headers, context lines, and removed lines, returning only
|
|
21
|
+
the content of ``+`` lines (with the ``+`` prefix removed). This makes
|
|
22
|
+
on-demand diff content semantically consistent with hook-provided content
|
|
23
|
+
(i.e., "what's being introduced", not "what's being removed").
|
|
24
|
+
"""
|
|
25
|
+
lines = []
|
|
26
|
+
for line in diff_chunk.splitlines():
|
|
27
|
+
if line.startswith("+") and not line.startswith("+++"):
|
|
28
|
+
lines.append(line[1:])
|
|
29
|
+
return "\n".join(lines)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(frozen=True)
|
|
33
|
+
class ActionContext:
|
|
34
|
+
"""Normalized view of what Claude Code is about to do.
|
|
35
|
+
|
|
36
|
+
This is the input to all policy evaluations. Built from hook payload.
|
|
37
|
+
|
|
38
|
+
Attributes:
|
|
39
|
+
event: Hook event type (e.g., "PreToolUse.Write")
|
|
40
|
+
tool_name: Tool being invoked (e.g., "Write", "Edit")
|
|
41
|
+
tool_args: Raw tool input arguments from Claude Code
|
|
42
|
+
repo_root: Absolute path to repository root
|
|
43
|
+
session_name: Current Forge session name
|
|
44
|
+
target_path: Normalized file path being modified (if applicable)
|
|
45
|
+
new_content: Content being introduced — new file content (Write), new_string
|
|
46
|
+
(Edit), or added lines extracted from a unified diff (on-demand check).
|
|
47
|
+
Regex policies match against this field.
|
|
48
|
+
raw_diff: Full unified diff chunk (on-demand checks only). Provides richer
|
|
49
|
+
context for LLM-based policies. None for hook-triggered evaluations.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
event: str
|
|
53
|
+
tool_name: str
|
|
54
|
+
tool_args: dict[str, Any]
|
|
55
|
+
repo_root: str
|
|
56
|
+
session_name: str
|
|
57
|
+
target_path: str | None = None
|
|
58
|
+
new_content: str | None = None
|
|
59
|
+
raw_diff: str | None = None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class Violation:
|
|
64
|
+
"""A single policy violation.
|
|
65
|
+
|
|
66
|
+
Attributes:
|
|
67
|
+
rule_id: Unique identifier (e.g., "tdd.tests-before-impl")
|
|
68
|
+
message: Human-readable explanation
|
|
69
|
+
severity: How serious this violation is
|
|
70
|
+
evidence: What triggered this violation (code snippet, etc.)
|
|
71
|
+
suggested_fix: How to resolve the violation
|
|
72
|
+
citations: For semantic policies, quoted plan sections that were violated
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
rule_id: str
|
|
76
|
+
message: str
|
|
77
|
+
severity: Severity
|
|
78
|
+
evidence: str | None = None
|
|
79
|
+
suggested_fix: str | None = None
|
|
80
|
+
citations: list[str] = field(default_factory=list)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class PolicyDecision:
|
|
85
|
+
"""Result of a single policy evaluation.
|
|
86
|
+
|
|
87
|
+
Attributes:
|
|
88
|
+
decision: The policy's verdict. ``needs_review`` must be resolved by
|
|
89
|
+
semantic supervision before a hook allows the action.
|
|
90
|
+
policy_id: Which policy made this decision
|
|
91
|
+
violations: List of violations found (for deny/warn decisions)
|
|
92
|
+
warnings: Non-blocking warnings to display
|
|
93
|
+
intent: Why the policy exists (shown on deny to help models understand
|
|
94
|
+
the goal and surface conflicts instead of working around them)
|
|
95
|
+
cached: Whether this was a cached verdict (for debugging)
|
|
96
|
+
evaluated_at: ISO8601 timestamp when evaluated (for logging)
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
decision: DecisionType
|
|
100
|
+
policy_id: str
|
|
101
|
+
violations: list[Violation] = field(default_factory=list)
|
|
102
|
+
warnings: list[str] = field(default_factory=list)
|
|
103
|
+
intent: str | None = None
|
|
104
|
+
cached: bool = False
|
|
105
|
+
evaluated_at: str | None = None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@dataclass
|
|
109
|
+
class CompositeDecision:
|
|
110
|
+
"""Result of composing multiple policies.
|
|
111
|
+
|
|
112
|
+
The PolicyEngine evaluates all applicable policies and composes
|
|
113
|
+
their decisions using the "any deny blocks" rule.
|
|
114
|
+
|
|
115
|
+
Attributes:
|
|
116
|
+
final_decision: Composed result (any deny → deny)
|
|
117
|
+
decisions: Individual policy decisions for debugging
|
|
118
|
+
blocking_violations: Violations that caused the deny
|
|
119
|
+
all_warnings: Accumulated warnings from all policies
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
final_decision: DecisionType
|
|
123
|
+
decisions: list[PolicyDecision] = field(default_factory=list)
|
|
124
|
+
blocking_violations: list[Violation] = field(default_factory=list)
|
|
125
|
+
all_warnings: list[str] = field(default_factory=list)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""WorkflowPolicy — composable tagger → branch → stage pipeline.
|
|
2
|
+
|
|
3
|
+
Provides a configurable policy that classifies actions via cheap LLM triage,
|
|
4
|
+
routes them through matching branches, and escalates through filter → checker →
|
|
5
|
+
reviewer stages. Plugs into the existing PolicyEngine via bundle registration.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .config import BranchConfig, WorkflowConfig
|
|
9
|
+
from .divergence import build_divergence_config
|
|
10
|
+
from .policy import WorkflowPolicy
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"BranchConfig",
|
|
14
|
+
"WorkflowConfig",
|
|
15
|
+
"WorkflowPolicy",
|
|
16
|
+
"build_divergence_config",
|
|
17
|
+
]
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""Branch routing for WorkflowPolicy.
|
|
2
|
+
|
|
3
|
+
A branch is a routing target selected by tag match. It contains
|
|
4
|
+
optional stages (filter → checker → reviewer) that execute in order.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
|
|
11
|
+
from forge.guard.types import ActionContext, PolicyDecision
|
|
12
|
+
from forge.guard.workflow.config import BranchConfig
|
|
13
|
+
from forge.guard.workflow.stages import CheckerStage, FilterStage, ReviewerStage
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class Branch:
|
|
18
|
+
"""A routing target selected by tag match."""
|
|
19
|
+
|
|
20
|
+
name: str
|
|
21
|
+
match_tags: list[str]
|
|
22
|
+
match_mode: str
|
|
23
|
+
filter: FilterStage | None
|
|
24
|
+
checker: CheckerStage | None
|
|
25
|
+
reviewer: ReviewerStage | None
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def from_config(cls, config: BranchConfig) -> Branch:
|
|
29
|
+
"""Instantiate stages from config."""
|
|
30
|
+
return cls(
|
|
31
|
+
name=config.name,
|
|
32
|
+
match_tags=config.match_tags,
|
|
33
|
+
match_mode=config.match_mode,
|
|
34
|
+
filter=FilterStage(config.filter) if config.filter else None,
|
|
35
|
+
checker=CheckerStage(config.checker) if config.checker else None,
|
|
36
|
+
reviewer=ReviewerStage(config.reviewer) if config.reviewer else None,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
def matches(self, tags: list[str]) -> bool:
|
|
40
|
+
"""Return True if tags match this branch."""
|
|
41
|
+
if not self.match_tags:
|
|
42
|
+
return False
|
|
43
|
+
if self.match_mode == "all":
|
|
44
|
+
return all(t in tags for t in self.match_tags)
|
|
45
|
+
return any(t in tags for t in self.match_tags)
|
|
46
|
+
|
|
47
|
+
def execute(self, context: ActionContext, tags: list[str], policy_id: str) -> PolicyDecision:
|
|
48
|
+
"""Run stages in order: filter → checker → reviewer.
|
|
49
|
+
|
|
50
|
+
- filter fails (passes()=False) → allow
|
|
51
|
+
- checker returns allow → short-circuit
|
|
52
|
+
- checker returns None → continue to reviewer
|
|
53
|
+
- reviewer returns final decision
|
|
54
|
+
- No stages configured → allow
|
|
55
|
+
"""
|
|
56
|
+
if self.filter and not self.filter.passes(context):
|
|
57
|
+
return PolicyDecision(decision="allow", policy_id=policy_id)
|
|
58
|
+
|
|
59
|
+
if self.checker:
|
|
60
|
+
result = self.checker.check(context, tags, policy_id)
|
|
61
|
+
if result is not None:
|
|
62
|
+
return result
|
|
63
|
+
|
|
64
|
+
if self.reviewer:
|
|
65
|
+
return self.reviewer.review(context, tags, policy_id)
|
|
66
|
+
|
|
67
|
+
return PolicyDecision(decision="allow", policy_id=policy_id)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Configuration dataclasses for WorkflowPolicy.
|
|
2
|
+
|
|
3
|
+
Deserialized from ``bundle_config["workflow"]["workflows"]`` dicts
|
|
4
|
+
via ``dacite.from_dict(WorkflowConfig, data)``.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class FilterConfig:
|
|
14
|
+
"""Deterministic gating config for FilterStage."""
|
|
15
|
+
|
|
16
|
+
path_patterns: list[str] = field(default_factory=list)
|
|
17
|
+
exclude_patterns: list[str] = field(default_factory=list)
|
|
18
|
+
max_content_length: int | None = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class CheckerConfig:
|
|
23
|
+
"""Cheap LLM check config for CheckerStage."""
|
|
24
|
+
|
|
25
|
+
model: str = "gemini/gemini-2.0-flash"
|
|
26
|
+
prompt_template: str = ""
|
|
27
|
+
system_prompt: str | None = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class ReviewerConfig:
|
|
32
|
+
"""Deep LLM review config for ReviewerStage."""
|
|
33
|
+
|
|
34
|
+
model: str = "gemini/gemini-2.0-flash"
|
|
35
|
+
prompt_template: str = ""
|
|
36
|
+
system_prompt: str | None = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class BranchConfig:
|
|
41
|
+
"""Config for a single routing branch."""
|
|
42
|
+
|
|
43
|
+
name: str
|
|
44
|
+
match_tags: list[str]
|
|
45
|
+
match_mode: str = "any"
|
|
46
|
+
filter: FilterConfig | None = None
|
|
47
|
+
checker: CheckerConfig | None = None
|
|
48
|
+
reviewer: ReviewerConfig | None = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class WorkflowConfig:
|
|
53
|
+
"""Top-level config for a single WorkflowPolicy instance."""
|
|
54
|
+
|
|
55
|
+
name: str
|
|
56
|
+
description: str
|
|
57
|
+
intent: str = ""
|
|
58
|
+
tool_names: list[str] = field(default_factory=lambda: ["Write", "Edit"])
|
|
59
|
+
tagger_model: str = "gemini/gemini-2.0-flash"
|
|
60
|
+
tagger_prompt: str = ""
|
|
61
|
+
branches: list[BranchConfig] = field(default_factory=list)
|
|
62
|
+
throttle_seconds: int = 30
|
|
63
|
+
max_cache_entries: int = 50
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""Divergence-from-mean workflow — first concrete WorkflowPolicy instance.
|
|
2
|
+
|
|
3
|
+
Cost model: tagger (~$0.001) filters 80% → checker (~$0.001) short-circuits 80%
|
|
4
|
+
→ only ~4% reach reviewer (~$0.05). Total: ~$0.32/100 changes vs $5.00 reviewing
|
|
5
|
+
everything.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from forge.guard.workflow.config import (
|
|
13
|
+
BranchConfig,
|
|
14
|
+
CheckerConfig,
|
|
15
|
+
FilterConfig,
|
|
16
|
+
ReviewerConfig,
|
|
17
|
+
WorkflowConfig,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
DIVERGENCE_TAGGER_PROMPT = """\
|
|
21
|
+
Classify this code change into exactly one category (respond with just the tag):
|
|
22
|
+
|
|
23
|
+
- architectural: changes to module structure, public APIs, cross-cutting patterns
|
|
24
|
+
- migration: database schema, data migration scripts
|
|
25
|
+
- config: configuration files, environment setup
|
|
26
|
+
- routine: standard implementation, bug fixes, test updates
|
|
27
|
+
- trivial: whitespace, comments, import reordering
|
|
28
|
+
|
|
29
|
+
Tool: {tool_name}
|
|
30
|
+
File: {target_path}
|
|
31
|
+
Content (truncated):
|
|
32
|
+
{content}
|
|
33
|
+
|
|
34
|
+
Tag:"""
|
|
35
|
+
|
|
36
|
+
DIVERGENCE_CHECKER_PROMPT = """\
|
|
37
|
+
Does this code change follow the project's established patterns?
|
|
38
|
+
Tool: {tool_name}, File: {target_path}, Tags: {tags}
|
|
39
|
+
|
|
40
|
+
Content:
|
|
41
|
+
{content}
|
|
42
|
+
|
|
43
|
+
Respond with JSON: {{"aligned": true/false, "reason": "one sentence"}}"""
|
|
44
|
+
|
|
45
|
+
DIVERGENCE_REVIEWER_PROMPT = """\
|
|
46
|
+
Review this code change for architectural consistency.
|
|
47
|
+
Tool: {tool_name}, File: {target_path}, Tags: {tags}
|
|
48
|
+
|
|
49
|
+
Content:
|
|
50
|
+
{content}
|
|
51
|
+
|
|
52
|
+
Evaluate whether this change aligns with the project's established patterns.
|
|
53
|
+
If divergent, cite specific evidence and suggest corrections.
|
|
54
|
+
|
|
55
|
+
Respond with JSON in a code fence:
|
|
56
|
+
```json
|
|
57
|
+
{{
|
|
58
|
+
"verdict": "aligned" | "divergent",
|
|
59
|
+
"confidence": 0.0-1.0,
|
|
60
|
+
"violations": [
|
|
61
|
+
{{
|
|
62
|
+
"severity": "high" | "medium" | "low",
|
|
63
|
+
"evidence": "what diverges from established patterns",
|
|
64
|
+
"suggested_fix": "what should be done instead",
|
|
65
|
+
"citations": ["specific pattern or convention being violated"]
|
|
66
|
+
}}
|
|
67
|
+
]
|
|
68
|
+
}}
|
|
69
|
+
```"""
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def build_divergence_config(**overrides: Any) -> WorkflowConfig:
|
|
73
|
+
"""Build the divergence-from-mean workflow config.
|
|
74
|
+
|
|
75
|
+
The "needs-review" branch triggers on architectural/migration tags,
|
|
76
|
+
filters out test files, runs a cheap checker, then a deep reviewer.
|
|
77
|
+
Routine/trivial actions don't match any branch and are allowed.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
**overrides: Override any WorkflowConfig field (e.g., tagger_model).
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
WorkflowConfig ready for WorkflowPolicy instantiation.
|
|
84
|
+
"""
|
|
85
|
+
defaults: dict[str, Any] = {
|
|
86
|
+
"name": "divergence",
|
|
87
|
+
"description": "Flag code changes that diverge from established project patterns",
|
|
88
|
+
"intent": (
|
|
89
|
+
"Catch architectural drift early. Code changes that deviate from established "
|
|
90
|
+
"patterns need review to ensure they are intentional improvements, not accidental "
|
|
91
|
+
"divergence from project conventions."
|
|
92
|
+
),
|
|
93
|
+
"tagger_model": "gemini/gemini-2.0-flash",
|
|
94
|
+
"tagger_prompt": DIVERGENCE_TAGGER_PROMPT,
|
|
95
|
+
"branches": [
|
|
96
|
+
BranchConfig(
|
|
97
|
+
name="needs-review",
|
|
98
|
+
match_tags=["architectural", "migration"],
|
|
99
|
+
match_mode="any",
|
|
100
|
+
filter=FilterConfig(
|
|
101
|
+
exclude_patterns=[r"^tests/", r"^test_"],
|
|
102
|
+
),
|
|
103
|
+
checker=CheckerConfig(
|
|
104
|
+
prompt_template=DIVERGENCE_CHECKER_PROMPT,
|
|
105
|
+
),
|
|
106
|
+
reviewer=ReviewerConfig(
|
|
107
|
+
prompt_template=DIVERGENCE_REVIEWER_PROMPT,
|
|
108
|
+
),
|
|
109
|
+
),
|
|
110
|
+
],
|
|
111
|
+
}
|
|
112
|
+
defaults.update(overrides)
|
|
113
|
+
return WorkflowConfig(**defaults)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""WorkflowPolicy — composable tagger → branch → stage pipeline.
|
|
2
|
+
|
|
3
|
+
Plugs into the existing PolicyEngine via bundle registration.
|
|
4
|
+
Zero engine changes required.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from forge.core.reactive import ThrottleCache, compute_cache_key, tag_action
|
|
13
|
+
from forge.guard.deterministic.base import StatefulDeterministicPolicy
|
|
14
|
+
from forge.guard.types import ActionContext, PolicyDecision
|
|
15
|
+
from forge.guard.workflow.branches import Branch
|
|
16
|
+
from forge.guard.workflow.config import WorkflowConfig
|
|
17
|
+
|
|
18
|
+
_log = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class WorkflowPolicy(StatefulDeterministicPolicy):
|
|
22
|
+
"""Composable tagger → branch → stage pipeline.
|
|
23
|
+
|
|
24
|
+
Pipeline:
|
|
25
|
+
1. Cache check (ThrottleCache) — reuse recent verdicts
|
|
26
|
+
2. Tag (tag_action) — classify action via cheap LLM
|
|
27
|
+
3. Route (Branch.matches) — first-match by tags
|
|
28
|
+
4. Execute (Branch.execute) — filter → checker → reviewer
|
|
29
|
+
5. Cache result (clean allows only)
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, config: WorkflowConfig) -> None:
|
|
33
|
+
self._config = config
|
|
34
|
+
self._cache = ThrottleCache(
|
|
35
|
+
ttl_seconds=config.throttle_seconds,
|
|
36
|
+
max_entries=config.max_cache_entries,
|
|
37
|
+
)
|
|
38
|
+
self._branches = [Branch.from_config(b) for b in config.branches]
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def policy_id(self) -> str:
|
|
42
|
+
return f"workflow.{self._config.name}"
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def description(self) -> str:
|
|
46
|
+
return self._config.description
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def intent(self) -> str:
|
|
50
|
+
return self._config.intent
|
|
51
|
+
|
|
52
|
+
def applies_to(self, context: ActionContext) -> bool:
|
|
53
|
+
return context.tool_name in self._config.tool_names
|
|
54
|
+
|
|
55
|
+
def _evaluate(self, context: ActionContext) -> PolicyDecision:
|
|
56
|
+
cache_key = compute_cache_key(context.tool_name, context.target_path, context.new_content)
|
|
57
|
+
|
|
58
|
+
cached = self._cache.check(cache_key)
|
|
59
|
+
if cached is not None:
|
|
60
|
+
decision = self._allow()
|
|
61
|
+
decision.cached = True
|
|
62
|
+
return decision
|
|
63
|
+
|
|
64
|
+
tags = tag_action(
|
|
65
|
+
context,
|
|
66
|
+
model=self._config.tagger_model,
|
|
67
|
+
prompt_template=self._config.tagger_prompt,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
for branch in self._branches:
|
|
71
|
+
if branch.matches(tags):
|
|
72
|
+
decision = branch.execute(context, tags, self.policy_id)
|
|
73
|
+
if decision.decision == "deny":
|
|
74
|
+
decision.intent = self.intent
|
|
75
|
+
if decision.decision == "allow" and not decision.warnings:
|
|
76
|
+
self._cache.update(cache_key, decision="allow", tags=tags)
|
|
77
|
+
return decision
|
|
78
|
+
|
|
79
|
+
# No branch matched → allow (not cached: tagger may have transiently
|
|
80
|
+
# failed, and caching would suppress re-evaluation after recovery)
|
|
81
|
+
return self._allow()
|
|
82
|
+
|
|
83
|
+
def get_state(self) -> dict[str, Any]:
|
|
84
|
+
return {"cache": self._cache.get_state()}
|
|
85
|
+
|
|
86
|
+
def set_state(self, state: dict[str, Any]) -> None:
|
|
87
|
+
self._cache.set_state(state.get("cache", {}))
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"""Stage implementations for WorkflowPolicy branches.
|
|
2
|
+
|
|
3
|
+
Stages are plain classes (not policies). They produce PolicyDecision objects
|
|
4
|
+
but don't implement the Policy protocol — WorkflowPolicy owns that.
|
|
5
|
+
|
|
6
|
+
UX constraint: Non-blocking findings go in ``PolicyDecision.warnings`` (printed
|
|
7
|
+
by the hook), not ``violations`` (only shown on deny). Stages must return a
|
|
8
|
+
resolved allow/warn/deny decision rather than emitting ``needs_review``.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import re
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from forge.core.reactive.structured_output import extract_json_from_response
|
|
18
|
+
from forge.guard.types import ActionContext, PolicyDecision, Severity, Violation
|
|
19
|
+
from forge.guard.workflow.config import CheckerConfig, FilterConfig, ReviewerConfig
|
|
20
|
+
|
|
21
|
+
_log = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
# Confidence threshold for blocking (same as supervisor)
|
|
24
|
+
CONFIDENCE_THRESHOLD = 0.8
|
|
25
|
+
|
|
26
|
+
_VALID_SEVERITIES: set[str] = {"critical", "high", "medium", "low"}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _normalize_severity(raw: str) -> Severity:
|
|
30
|
+
"""Coerce arbitrary severity string to a valid Severity literal."""
|
|
31
|
+
normalized = raw.lower().strip()
|
|
32
|
+
if normalized in _VALID_SEVERITIES:
|
|
33
|
+
return normalized # type: ignore[return-value]
|
|
34
|
+
return "medium"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class FilterStage:
|
|
38
|
+
"""Deterministic gating. Free (no LLM calls).
|
|
39
|
+
|
|
40
|
+
Compiles regexes in ``__init__`` so invalid patterns fail fast at engine
|
|
41
|
+
build time rather than silently allowing everything at evaluation time.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, config: FilterConfig) -> None:
|
|
45
|
+
self._path_patterns = [re.compile(p) for p in config.path_patterns]
|
|
46
|
+
self._exclude_patterns = [re.compile(p) for p in config.exclude_patterns]
|
|
47
|
+
self._max_content_length = config.max_content_length
|
|
48
|
+
|
|
49
|
+
def passes(self, context: ActionContext) -> bool:
|
|
50
|
+
"""Return True if the action should proceed to later stages.
|
|
51
|
+
|
|
52
|
+
Check order:
|
|
53
|
+
1. exclude_patterns — any match → False (skip this branch)
|
|
54
|
+
2. path_patterns — at least one must match → True (if list is non-empty)
|
|
55
|
+
3. max_content_length — content exceeds limit → False
|
|
56
|
+
"""
|
|
57
|
+
path = context.target_path or ""
|
|
58
|
+
|
|
59
|
+
for pattern in self._exclude_patterns:
|
|
60
|
+
if pattern.search(path):
|
|
61
|
+
return False
|
|
62
|
+
|
|
63
|
+
if self._path_patterns:
|
|
64
|
+
if not any(p.search(path) for p in self._path_patterns):
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
if self._max_content_length is not None:
|
|
68
|
+
content_len = len(context.new_content or "")
|
|
69
|
+
if content_len > self._max_content_length:
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
return True
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class CheckerStage:
|
|
76
|
+
"""Cheap LLM intermediate check via SyncAdapter.ask()."""
|
|
77
|
+
|
|
78
|
+
def __init__(self, config: CheckerConfig) -> None:
|
|
79
|
+
self._config = config
|
|
80
|
+
|
|
81
|
+
def check(self, context: ActionContext, tags: list[str], policy_id: str) -> PolicyDecision | None:
|
|
82
|
+
"""Quick LLM check.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
PolicyDecision(allow) to short-circuit (no reviewer needed), or
|
|
86
|
+
None to escalate to the reviewer stage.
|
|
87
|
+
"""
|
|
88
|
+
try:
|
|
89
|
+
from forge.core.llm import SyncAdapter, get_client
|
|
90
|
+
|
|
91
|
+
prompt = self._config.prompt_template.format(
|
|
92
|
+
tool_name=context.tool_name,
|
|
93
|
+
target_path=context.target_path or "N/A",
|
|
94
|
+
content=(context.raw_diff or context.new_content or "")[:2000],
|
|
95
|
+
tags=", ".join(tags),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
client = get_client(self._config.model)
|
|
99
|
+
adapter = SyncAdapter(client)
|
|
100
|
+
response = adapter.ask(prompt, system=self._config.system_prompt)
|
|
101
|
+
|
|
102
|
+
data = extract_json_from_response(response)
|
|
103
|
+
if data is None:
|
|
104
|
+
_log.debug("Checker could not parse response, escalating to reviewer")
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
if data.get("aligned") is True:
|
|
108
|
+
return PolicyDecision(decision="allow", policy_id=policy_id)
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
except Exception as e:
|
|
112
|
+
_log.warning("CheckerStage failed: %s", e)
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class ReviewerStage:
|
|
117
|
+
"""Deep LLM review via SyncAdapter.ask()."""
|
|
118
|
+
|
|
119
|
+
def __init__(self, config: ReviewerConfig) -> None:
|
|
120
|
+
self._config = config
|
|
121
|
+
|
|
122
|
+
def review(self, context: ActionContext, tags: list[str], policy_id: str) -> PolicyDecision:
|
|
123
|
+
"""Deep review. Returns allow/deny/warn.
|
|
124
|
+
|
|
125
|
+
Verdict mapping (mirrors supervisor, configurable policy_id):
|
|
126
|
+
- aligned → allow
|
|
127
|
+
- divergent + high confidence (≥0.8) + citations → deny
|
|
128
|
+
- divergent + low confidence or no citations → warn
|
|
129
|
+
- parse failure → warn (fail-open)
|
|
130
|
+
"""
|
|
131
|
+
try:
|
|
132
|
+
from forge.core.llm import SyncAdapter, get_client
|
|
133
|
+
|
|
134
|
+
prompt = self._config.prompt_template.format(
|
|
135
|
+
tool_name=context.tool_name,
|
|
136
|
+
target_path=context.target_path or "N/A",
|
|
137
|
+
content=(context.raw_diff or context.new_content or "")[:4000],
|
|
138
|
+
tags=", ".join(tags),
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
client = get_client(self._config.model)
|
|
142
|
+
adapter = SyncAdapter(client)
|
|
143
|
+
response = adapter.ask(prompt, system=self._config.system_prompt)
|
|
144
|
+
|
|
145
|
+
data = extract_json_from_response(response)
|
|
146
|
+
if data is None:
|
|
147
|
+
return PolicyDecision(
|
|
148
|
+
decision="warn",
|
|
149
|
+
policy_id=policy_id,
|
|
150
|
+
warnings=["Reviewer could not parse LLM response"],
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
return _map_verdict(data, policy_id)
|
|
154
|
+
|
|
155
|
+
except Exception as e:
|
|
156
|
+
_log.warning("ReviewerStage failed: %s", e)
|
|
157
|
+
return PolicyDecision(
|
|
158
|
+
decision="warn",
|
|
159
|
+
policy_id=policy_id,
|
|
160
|
+
warnings=[f"Reviewer error: {e}, failing open"],
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _map_verdict(data: dict[str, Any], policy_id: str) -> PolicyDecision:
|
|
165
|
+
"""Map a JSON verdict dict to a PolicyDecision.
|
|
166
|
+
|
|
167
|
+
Non-blocking findings go in ``warnings`` (visible in hook UX).
|
|
168
|
+
Only high-confidence denials with citations use ``violations``.
|
|
169
|
+
"""
|
|
170
|
+
verdict = data.get("verdict", "aligned")
|
|
171
|
+
confidence = float(data.get("confidence", 0.0))
|
|
172
|
+
raw_violations = data.get("violations", [])
|
|
173
|
+
|
|
174
|
+
if verdict == "aligned":
|
|
175
|
+
return PolicyDecision(decision="allow", policy_id=policy_id)
|
|
176
|
+
|
|
177
|
+
has_citations = any(v.get("citations") for v in raw_violations if isinstance(v, dict))
|
|
178
|
+
|
|
179
|
+
if confidence >= CONFIDENCE_THRESHOLD and has_citations:
|
|
180
|
+
violations = [
|
|
181
|
+
Violation(
|
|
182
|
+
rule_id=f"{policy_id}.reviewer",
|
|
183
|
+
severity=_normalize_severity(v.get("severity", "medium")),
|
|
184
|
+
message=v.get("evidence", "Divergent change detected"),
|
|
185
|
+
evidence=v.get("evidence"),
|
|
186
|
+
suggested_fix=v.get("suggested_fix"),
|
|
187
|
+
citations=v.get("citations", []),
|
|
188
|
+
)
|
|
189
|
+
for v in raw_violations
|
|
190
|
+
if isinstance(v, dict)
|
|
191
|
+
]
|
|
192
|
+
return PolicyDecision(
|
|
193
|
+
decision="deny",
|
|
194
|
+
policy_id=policy_id,
|
|
195
|
+
violations=violations,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# Low confidence or no citations → warn (visible in hook UX)
|
|
199
|
+
reasons = [v.get("evidence", str(v)) for v in raw_violations if isinstance(v, dict)]
|
|
200
|
+
warnings = reasons if reasons else [f"Reviewer flagged as divergent (confidence={confidence:.2f})"]
|
|
201
|
+
return PolicyDecision(
|
|
202
|
+
decision="warn",
|
|
203
|
+
policy_id=policy_id,
|
|
204
|
+
warnings=warnings,
|
|
205
|
+
)
|