multi-forge 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- forge/__init__.py +3 -0
- forge/_extensions/agents/.gitkeep +0 -0
- forge/_extensions/commands/.gitkeep +0 -0
- forge/_extensions/skills/analyze/SKILL.md +87 -0
- forge/_extensions/skills/challenge/SKILL.md +91 -0
- forge/_extensions/skills/consensus/SKILL.md +120 -0
- forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
- forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
- forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
- forge/_extensions/skills/debate/SKILL.md +116 -0
- forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
- forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
- forge/_extensions/skills/panel/SKILL.md +141 -0
- forge/_extensions/skills/panel/resources/synthesis.md +103 -0
- forge/_extensions/skills/qa/SKILL.md +704 -0
- forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
- forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
- forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
- forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
- forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
- forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
- forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
- forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
- forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
- forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
- forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
- forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
- forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
- forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
- forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
- forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
- forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
- forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
- forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
- forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
- forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
- forge/_extensions/skills/qa/resources/checklist.md +103 -0
- forge/_extensions/skills/qa/resources/report-template.md +62 -0
- forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
- forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
- forge/_extensions/skills/review/SKILL.md +125 -0
- forge/_extensions/skills/review/references/claude-4.6.md +474 -0
- forge/_extensions/skills/review/references/claude-4.7.md +710 -0
- forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
- forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
- forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
- forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
- forge/_extensions/skills/review/resources/code-gemini.md +184 -0
- forge/_extensions/skills/review/resources/code-openai.md +203 -0
- forge/_extensions/skills/review/resources/code.md +160 -0
- forge/_extensions/skills/review-docs/SKILL.md +121 -0
- forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
- forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
- forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
- forge/_extensions/skills/review-docs/resources/docs.md +170 -0
- forge/_extensions/skills/smoke-test/SKILL.md +27 -0
- forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
- forge/_extensions/skills/understand/SKILL.md +148 -0
- forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
- forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
- forge/_extensions/skills/understand/resources/code-openai.md +181 -0
- forge/_extensions/skills/understand/resources/code.md +163 -0
- forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
- forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
- forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
- forge/_extensions/skills/understand/resources/docs.md +177 -0
- forge/_extensions/skills/walkthrough/SKILL.md +599 -0
- forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
- forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
- forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
- forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
- forge/backend/__init__.py +174 -0
- forge/backend/adapters/__init__.py +38 -0
- forge/backend/adapters/litellm.py +158 -0
- forge/backend/creation.py +89 -0
- forge/backend/registry.py +178 -0
- forge/cli/__init__.py +16 -0
- forge/cli/auth.py +483 -0
- forge/cli/backend.py +298 -0
- forge/cli/claude.py +411 -0
- forge/cli/config_cmd.py +303 -0
- forge/cli/extensions.py +1001 -0
- forge/cli/gc.py +165 -0
- forge/cli/guard.py +1018 -0
- forge/cli/guards.py +106 -0
- forge/cli/handoff.py +110 -0
- forge/cli/hooks/__init__.py +36 -0
- forge/cli/hooks/_group.py +20 -0
- forge/cli/hooks/_helpers.py +149 -0
- forge/cli/hooks/commands.py +1677 -0
- forge/cli/hooks/direct_commands.py +1304 -0
- forge/cli/hooks/install.py +232 -0
- forge/cli/hooks/policy.py +151 -0
- forge/cli/hooks/read_hygiene.py +74 -0
- forge/cli/hooks/verification.py +370 -0
- forge/cli/logs.py +406 -0
- forge/cli/main.py +292 -0
- forge/cli/proxy.py +1821 -0
- forge/cli/proxy_costs.py +313 -0
- forge/cli/search.py +416 -0
- forge/cli/session.py +892 -0
- forge/cli/session_addendum.py +81 -0
- forge/cli/session_fork.py +750 -0
- forge/cli/session_handoff.py +141 -0
- forge/cli/session_lifecycle.py +2053 -0
- forge/cli/session_manage.py +1336 -0
- forge/cli/session_memory.py +201 -0
- forge/cli/status_line.py +1398 -0
- forge/cli/workflow.py +1964 -0
- forge/config/__init__.py +110 -0
- forge/config/dataclass_utils.py +88 -0
- forge/config/defaults/__init__.py +0 -0
- forge/config/defaults/backends/__init__.py +0 -0
- forge/config/defaults/backends/litellm.yaml +196 -0
- forge/config/defaults/templates/__init__.py +0 -0
- forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
- forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
- forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
- forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
- forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
- forge/config/defaults/templates/litellm-gemini.yaml +21 -0
- forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
- forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
- forge/config/defaults/templates/litellm-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
- forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
- forge/config/defaults/templates/openrouter-glm.yaml +23 -0
- forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
- forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
- forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
- forge/config/defaults/templates/openrouter-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
- forge/config/loader.py +675 -0
- forge/config/schema.py +448 -0
- forge/core/__init__.py +5 -0
- forge/core/auth/__init__.py +67 -0
- forge/core/auth/capabilities.py +219 -0
- forge/core/auth/credentials_file.py +244 -0
- forge/core/auth/protocols.py +18 -0
- forge/core/auth/secrets.py +243 -0
- forge/core/auth/template_secrets.py +112 -0
- forge/core/data/__init__.py +5 -0
- forge/core/data/model_catalog.yaml +1522 -0
- forge/core/data/pricing.yaml +140 -0
- forge/core/data/system_prompt_addendums/__init__.py +0 -0
- forge/core/data/system_prompt_addendums/gemini.md +330 -0
- forge/core/data/system_prompt_addendums/openai.md +328 -0
- forge/core/llm/__init__.py +231 -0
- forge/core/llm/clients/__init__.py +14 -0
- forge/core/llm/clients/base.py +115 -0
- forge/core/llm/clients/litellm.py +619 -0
- forge/core/llm/clients/openai_compat.py +244 -0
- forge/core/llm/clients/openrouter.py +234 -0
- forge/core/llm/credentials.py +439 -0
- forge/core/llm/detection.py +86 -0
- forge/core/llm/errors.py +44 -0
- forge/core/llm/protocols.py +80 -0
- forge/core/llm/types.py +176 -0
- forge/core/logging.py +146 -0
- forge/core/models/__init__.py +91 -0
- forge/core/models/catalog.py +467 -0
- forge/core/models/pricing.py +165 -0
- forge/core/models/types.py +167 -0
- forge/core/naming.py +212 -0
- forge/core/ops/__init__.py +73 -0
- forge/core/ops/context.py +141 -0
- forge/core/ops/gc.py +802 -0
- forge/core/ops/proxy.py +146 -0
- forge/core/ops/resolution.py +135 -0
- forge/core/ops/session.py +344 -0
- forge/core/ops/session_context.py +548 -0
- forge/core/paths.py +38 -0
- forge/core/process.py +54 -0
- forge/core/reactive/__init__.py +38 -0
- forge/core/reactive/cost_tracking.py +300 -0
- forge/core/reactive/env.py +180 -0
- forge/core/reactive/proxy.py +78 -0
- forge/core/reactive/routing.py +622 -0
- forge/core/reactive/session_runner.py +185 -0
- forge/core/reactive/structured_output.py +62 -0
- forge/core/reactive/tagger.py +94 -0
- forge/core/reactive/throttle.py +132 -0
- forge/core/state/__init__.py +59 -0
- forge/core/state/exceptions.py +59 -0
- forge/core/state/io.py +140 -0
- forge/core/state/lock.py +99 -0
- forge/core/state/timestamps.py +60 -0
- forge/core/transcript.py +78 -0
- forge/core/typing_helpers.py +24 -0
- forge/core/workqueue/__init__.py +67 -0
- forge/core/workqueue/queue.py +552 -0
- forge/core/workqueue/types.py +63 -0
- forge/guard/__init__.py +26 -0
- forge/guard/deterministic/__init__.py +26 -0
- forge/guard/deterministic/base.py +158 -0
- forge/guard/deterministic/coding_standards.py +256 -0
- forge/guard/deterministic/registry.py +148 -0
- forge/guard/deterministic/tdd.py +171 -0
- forge/guard/engine.py +216 -0
- forge/guard/protocols.py +91 -0
- forge/guard/queries.py +96 -0
- forge/guard/semantic/__init__.py +34 -0
- forge/guard/semantic/promotion.py +18 -0
- forge/guard/semantic/supervisor.py +813 -0
- forge/guard/semantic/verdict.py +183 -0
- forge/guard/store.py +124 -0
- forge/guard/team/__init__.py +6 -0
- forge/guard/team/config.py +24 -0
- forge/guard/team/handlers.py +209 -0
- forge/guard/team/prompts.py +41 -0
- forge/guard/types.py +125 -0
- forge/guard/workflow/__init__.py +17 -0
- forge/guard/workflow/branches.py +67 -0
- forge/guard/workflow/config.py +63 -0
- forge/guard/workflow/divergence.py +113 -0
- forge/guard/workflow/policy.py +87 -0
- forge/guard/workflow/stages.py +205 -0
- forge/install/__init__.py +55 -0
- forge/install/cli.py +281 -0
- forge/install/exceptions.py +163 -0
- forge/install/hooks.py +109 -0
- forge/install/installer.py +1037 -0
- forge/install/models.py +321 -0
- forge/install/preset.py +272 -0
- forge/install/settings_merge.py +831 -0
- forge/install/tracking.py +238 -0
- forge/install/version.py +141 -0
- forge/proxy/__init__.py +0 -0
- forge/proxy/base_client.py +181 -0
- forge/proxy/client_adapter.py +476 -0
- forge/proxy/client_factory.py +531 -0
- forge/proxy/converters.py +1206 -0
- forge/proxy/cost_logger.py +132 -0
- forge/proxy/cost_tracker.py +242 -0
- forge/proxy/data_models.py +338 -0
- forge/proxy/error_hints.py +92 -0
- forge/proxy/metrics.py +222 -0
- forge/proxy/model_spec.py +158 -0
- forge/proxy/proxies.py +333 -0
- forge/proxy/proxy_identity.py +134 -0
- forge/proxy/proxy_orchestrator.py +1018 -0
- forge/proxy/proxy_startup.py +54 -0
- forge/proxy/server.py +1561 -0
- forge/proxy/utils.py +537 -0
- forge/review/__init__.py +6 -0
- forge/review/adversarial.py +111 -0
- forge/review/consensus.py +236 -0
- forge/review/engine.py +356 -0
- forge/review/models.py +437 -0
- forge/review/resources/__init__.py +5 -0
- forge/review/resources/codereview-performance.md +85 -0
- forge/review/resources/codereview-quick.md +75 -0
- forge/review/resources/codereview-security.md +92 -0
- forge/review/resources/codereview.md +85 -0
- forge/review/resources/docreview-quick.md +75 -0
- forge/review/resources/docreview.md +86 -0
- forge/review/resources/thinkdeep.md +89 -0
- forge/review/routing.py +368 -0
- forge/review/synthesis.py +73 -0
- forge/runtime_config.py +438 -0
- forge/search/__init__.py +55 -0
- forge/search/bm25_store.py +264 -0
- forge/search/content_store.py +197 -0
- forge/search/engine.py +352 -0
- forge/search/exceptions.py +51 -0
- forge/search/extractor.py +234 -0
- forge/search/index_state.py +295 -0
- forge/search/store.py +215 -0
- forge/search/tokenizer.py +24 -0
- forge/session/__init__.py +130 -0
- forge/session/active.py +339 -0
- forge/session/artifacts.py +202 -0
- forge/session/claude/__init__.py +50 -0
- forge/session/claude/cleanup.py +105 -0
- forge/session/claude/invoke.py +236 -0
- forge/session/claude/paths.py +200 -0
- forge/session/cleanup.py +216 -0
- forge/session/config.py +34 -0
- forge/session/direct_model.py +107 -0
- forge/session/effective.py +169 -0
- forge/session/exceptions.py +255 -0
- forge/session/handoff.py +881 -0
- forge/session/handoff_agent.py +544 -0
- forge/session/hooks/__init__.py +35 -0
- forge/session/hooks/models.py +73 -0
- forge/session/hooks/session_start.py +507 -0
- forge/session/identity.py +84 -0
- forge/session/index.py +553 -0
- forge/session/manager.py +1506 -0
- forge/session/models.py +572 -0
- forge/session/overrides.py +344 -0
- forge/session/plan_resolution.py +286 -0
- forge/session/prev_sessions.py +128 -0
- forge/session/store.py +431 -0
- forge/session/validation.py +47 -0
- forge/session/worktree/__init__.py +65 -0
- forge/session/worktree/cleanup.py +262 -0
- forge/session/worktree/config_copy.py +203 -0
- forge/session/worktree/create.py +332 -0
- forge/sidecar/__init__.py +29 -0
- forge/sidecar/container.py +161 -0
- forge/sidecar/docker.py +86 -0
- forge/sidecar/secrets.py +19 -0
- multi_forge-0.2.0.dist-info/METADATA +242 -0
- multi_forge-0.2.0.dist-info/RECORD +311 -0
- multi_forge-0.2.0.dist-info/WHEEL +4 -0
- multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
- multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
- multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""Two-round consensus workflow with role-assigned workers.
|
|
2
|
+
|
|
3
|
+
Round 1: Each worker evaluates the subject from their assigned role.
|
|
4
|
+
Blinded (``resume_id=None``). Workers don't see each other.
|
|
5
|
+
|
|
6
|
+
Round 2: Each worker receives the reconciliation brief (all Round 1
|
|
7
|
+
positions) and produces a reconciled recommendation.
|
|
8
|
+
Still blinded (no conversation context).
|
|
9
|
+
|
|
10
|
+
Both rounds delegate to ``run_multi_review()`` for parallel fan-out.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
from forge.core.reactive.structured_output import extract_json_from_response
|
|
19
|
+
|
|
20
|
+
from .engine import run_multi_review
|
|
21
|
+
from .models import ConsensusOutput, ModelSpec, RoleSpec
|
|
22
|
+
from .routing import WorkerRoutingPlan
|
|
23
|
+
|
|
24
|
+
ROLE_MARKER = "{role_prompt}"
|
|
25
|
+
|
|
26
|
+
CONSENSUS_GUARDRAIL = (
|
|
27
|
+
"\n\nIMPORTANT: You are participating in a structured consensus-building exercise. "
|
|
28
|
+
"Provide your honest expert assessment from your assigned perspective. "
|
|
29
|
+
"Support claims with evidence and reasoning. Do not fabricate evidence "
|
|
30
|
+
"or misrepresent trade-offs. When you lack certainty, say so explicitly."
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
_MAX_EXCERPT_LEN = 1500
|
|
34
|
+
|
|
35
|
+
_ROUND2_OUTPUT_CONTRACT = (
|
|
36
|
+
"\n## Required Output Format\n\n"
|
|
37
|
+
"Respond with your reconciled assessment in JSON wrapped in a ```json code fence:\n\n"
|
|
38
|
+
"```\n"
|
|
39
|
+
"{\n"
|
|
40
|
+
' "position": "SUPPORT" | "SUPPORT_WITH_CONDITIONS" | "OPPOSE",\n'
|
|
41
|
+
' "confidence": "LOW" | "MEDIUM" | "HIGH",\n'
|
|
42
|
+
' "agreements": ["point of agreement 1", ...],\n'
|
|
43
|
+
' "disagreements": ["unresolved point 1", ...],\n'
|
|
44
|
+
' "recommendation": "1-2 sentence reconciled recommendation",\n'
|
|
45
|
+
' "conditions": ["condition 1", ...]\n'
|
|
46
|
+
"}\n"
|
|
47
|
+
"```\n"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def validate_resource(resource_path: str) -> str:
|
|
52
|
+
"""Load a resource file and verify it contains the role marker.
|
|
53
|
+
|
|
54
|
+
Raises ValueError if the marker is missing.
|
|
55
|
+
"""
|
|
56
|
+
content = Path(resource_path).read_text()
|
|
57
|
+
if ROLE_MARKER not in content:
|
|
58
|
+
raise ValueError(f"Resource {resource_path} must contain '{ROLE_MARKER}' marker " "for role injection.")
|
|
59
|
+
return content
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _build_reconciliation_brief(
|
|
63
|
+
round1_results: list,
|
|
64
|
+
role_map: dict[str, str],
|
|
65
|
+
original_subject: str = "",
|
|
66
|
+
) -> str:
|
|
67
|
+
"""Build a structured reconciliation brief from Round 1 positions.
|
|
68
|
+
|
|
69
|
+
Each worker's output is labeled by role (not model name) to minimize
|
|
70
|
+
anchoring bias. Parse-resilient: tries JSON extraction with fallback
|
|
71
|
+
to truncated raw text. Includes the original subject and output contract
|
|
72
|
+
so Round 2 workers retain scope and produce parseable output.
|
|
73
|
+
"""
|
|
74
|
+
sections: list[str] = []
|
|
75
|
+
|
|
76
|
+
if original_subject:
|
|
77
|
+
sections.append(f"# Original Subject\n\n{original_subject}\n")
|
|
78
|
+
|
|
79
|
+
sections.append("# Round 1 Positions\n")
|
|
80
|
+
|
|
81
|
+
for result in round1_results:
|
|
82
|
+
role = role_map.get(result.model_name, "unknown")
|
|
83
|
+
section = f"## {role} perspective\n\n"
|
|
84
|
+
|
|
85
|
+
if not result.success:
|
|
86
|
+
section += f"Status: failed ({result.error})\n"
|
|
87
|
+
sections.append(section)
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
section += "Status: success\n"
|
|
91
|
+
|
|
92
|
+
# Try structured extraction; fall back to truncated text
|
|
93
|
+
parsed = extract_json_from_response(result.stdout)
|
|
94
|
+
if parsed is not None:
|
|
95
|
+
section += f"Position: {json.dumps(parsed, indent=2)}\n"
|
|
96
|
+
else:
|
|
97
|
+
excerpt = result.stdout[:_MAX_EXCERPT_LEN]
|
|
98
|
+
if len(result.stdout) > _MAX_EXCERPT_LEN:
|
|
99
|
+
excerpt += "..."
|
|
100
|
+
section += f"Position: {excerpt}\n"
|
|
101
|
+
|
|
102
|
+
sections.append(section)
|
|
103
|
+
|
|
104
|
+
sections.append(
|
|
105
|
+
"\n---\n\n"
|
|
106
|
+
"# Reconciliation Task\n\n"
|
|
107
|
+
"You have seen all initial positions above. Now:\n\n"
|
|
108
|
+
"1. Identify the specific points of AGREEMENT across perspectives.\n"
|
|
109
|
+
"2. Identify the specific points of DISAGREEMENT.\n"
|
|
110
|
+
"3. For each disagreement, assess which position has stronger evidence.\n"
|
|
111
|
+
"4. Propose a RECONCILED RECOMMENDATION that incorporates the strongest "
|
|
112
|
+
"points from each perspective.\n"
|
|
113
|
+
"5. If genuine consensus is not possible on a point, explicitly state "
|
|
114
|
+
"'NO CONSENSUS' for that point and explain why.\n\n"
|
|
115
|
+
"Maintain your assigned role perspective but be willing to update your "
|
|
116
|
+
"position based on compelling evidence from other perspectives.\n"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
sections.append(_ROUND2_OUTPUT_CONTRACT)
|
|
120
|
+
|
|
121
|
+
return "\n".join(sections)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def run_consensus(
|
|
125
|
+
resource_path: str,
|
|
126
|
+
roles: list[RoleSpec],
|
|
127
|
+
*,
|
|
128
|
+
timeout_seconds: int = 600,
|
|
129
|
+
cwd: str | None = None,
|
|
130
|
+
original_subject: str = "",
|
|
131
|
+
via: str | None = None,
|
|
132
|
+
routing_plan: WorkerRoutingPlan | None = None,
|
|
133
|
+
) -> ConsensusOutput:
|
|
134
|
+
"""Run two-round consensus workflow with role-assigned workers.
|
|
135
|
+
|
|
136
|
+
Round 1: Each worker evaluates the subject from their assigned role,
|
|
137
|
+
blinded. Round 2: Each worker receives the reconciliation brief and
|
|
138
|
+
produces a reconciled recommendation, still blinded.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
original_subject: The raw subject/target text (before template
|
|
142
|
+
wrapping). Included in the reconciliation brief so Round 2
|
|
143
|
+
workers retain scope context.
|
|
144
|
+
via: Route all workers through this proxy (passed to routing).
|
|
145
|
+
Ignored when routing_plan is provided.
|
|
146
|
+
routing_plan: Pre-resolved routing plan. When provided, skips
|
|
147
|
+
internal routing resolution and reuses the same route decisions
|
|
148
|
+
for both rounds; Round 2 changes prompts but not route-bearing
|
|
149
|
+
model fields or order.
|
|
150
|
+
|
|
151
|
+
Raises ValueError if the resource lacks the role marker.
|
|
152
|
+
"""
|
|
153
|
+
from forge.review.routing import resolve_invocation_routing
|
|
154
|
+
|
|
155
|
+
template = validate_resource(resource_path)
|
|
156
|
+
|
|
157
|
+
# --- Build Round 1 specs ---
|
|
158
|
+
specs_r1: list[ModelSpec] = []
|
|
159
|
+
seen: dict[str, int] = {}
|
|
160
|
+
for role_spec in roles:
|
|
161
|
+
filled = template.replace(
|
|
162
|
+
ROLE_MARKER,
|
|
163
|
+
role_spec.role_prompt + CONSENSUS_GUARDRAIL,
|
|
164
|
+
)
|
|
165
|
+
label = role_spec.effective_label
|
|
166
|
+
base_id = f"{role_spec.model.name}-{label}"
|
|
167
|
+
count = seen.get(base_id, 0)
|
|
168
|
+
seen[base_id] = count + 1
|
|
169
|
+
worker_id = base_id if count == 0 else f"{base_id}-{count}"
|
|
170
|
+
specs_r1.append(
|
|
171
|
+
ModelSpec(
|
|
172
|
+
name=role_spec.model.name,
|
|
173
|
+
model_id=role_spec.model.model_id,
|
|
174
|
+
family=role_spec.model.family,
|
|
175
|
+
provider_refs=role_spec.model.provider_refs,
|
|
176
|
+
description=f"{label} role via {role_spec.model.name}",
|
|
177
|
+
preferred_proxy=role_spec.model.preferred_proxy,
|
|
178
|
+
prompt=filled,
|
|
179
|
+
worker_id=worker_id,
|
|
180
|
+
)
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
role_map = {spec.effective_worker_id: r.effective_label for spec, r in zip(specs_r1, roles)}
|
|
184
|
+
|
|
185
|
+
plan_r1 = routing_plan if routing_plan is not None else resolve_invocation_routing(specs_r1, via=via)
|
|
186
|
+
|
|
187
|
+
# --- Round 1: Independent positions (blinded) ---
|
|
188
|
+
round1_output = run_multi_review(
|
|
189
|
+
prompt="",
|
|
190
|
+
models=specs_r1,
|
|
191
|
+
routing_plan=plan_r1,
|
|
192
|
+
timeout_seconds=timeout_seconds,
|
|
193
|
+
cwd=cwd,
|
|
194
|
+
resume_id=None,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
# --- Build reconciliation brief ---
|
|
198
|
+
brief = _build_reconciliation_brief(round1_output.results, role_map, original_subject=original_subject)
|
|
199
|
+
|
|
200
|
+
# --- Build Round 2 specs (same worker_ids for correlation) ---
|
|
201
|
+
specs_r2: list[ModelSpec] = []
|
|
202
|
+
for spec_r1, role_spec in zip(specs_r1, roles):
|
|
203
|
+
reconciliation_prompt = f"[ROLE: {role_spec.effective_label}]\n" f"{role_spec.role_prompt}\n\n" f"{brief}"
|
|
204
|
+
specs_r2.append(
|
|
205
|
+
ModelSpec(
|
|
206
|
+
name=spec_r1.name,
|
|
207
|
+
model_id=spec_r1.model_id,
|
|
208
|
+
family=spec_r1.family,
|
|
209
|
+
provider_refs=spec_r1.provider_refs,
|
|
210
|
+
description=f"{role_spec.effective_label} reconciliation via {spec_r1.name}",
|
|
211
|
+
preferred_proxy=spec_r1.preferred_proxy,
|
|
212
|
+
prompt=reconciliation_prompt,
|
|
213
|
+
worker_id=spec_r1.effective_worker_id,
|
|
214
|
+
)
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
plan_r2 = routing_plan if routing_plan is not None else resolve_invocation_routing(specs_r2, via=via)
|
|
218
|
+
|
|
219
|
+
# --- Round 2: Reconciliation (blinded) ---
|
|
220
|
+
round2_output = run_multi_review(
|
|
221
|
+
prompt="",
|
|
222
|
+
models=specs_r2,
|
|
223
|
+
routing_plan=plan_r2,
|
|
224
|
+
timeout_seconds=timeout_seconds,
|
|
225
|
+
cwd=cwd,
|
|
226
|
+
resume_id=None,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
return ConsensusOutput(
|
|
230
|
+
subject=original_subject or resource_path,
|
|
231
|
+
roles=[r.effective_label for r in roles],
|
|
232
|
+
round1_results=round1_output.results,
|
|
233
|
+
round2_results=round2_output.results,
|
|
234
|
+
role_map=role_map,
|
|
235
|
+
reconciliation_brief=brief,
|
|
236
|
+
)
|
forge/review/engine.py
ADDED
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
"""Multi-model review engine with parallel fan-out.
|
|
2
|
+
|
|
3
|
+
Spawns N ``claude -p`` subprocesses in parallel via ThreadPoolExecutor,
|
|
4
|
+
one per model backend. Each subprocess runs in its own process group
|
|
5
|
+
(``start_new_session=True``) so that cleanup via ``os.killpg`` can
|
|
6
|
+
terminate orphaned children if the parent is interrupted.
|
|
7
|
+
|
|
8
|
+
Routing is pre-resolved: the engine receives a ``WorkerRoutingPlan``
|
|
9
|
+
and passes each worker its ``RoutingResult``. No per-worker registry
|
|
10
|
+
lookups during fan-out.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
import os
|
|
17
|
+
import shutil
|
|
18
|
+
import signal
|
|
19
|
+
import subprocess
|
|
20
|
+
import threading
|
|
21
|
+
import time
|
|
22
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
23
|
+
|
|
24
|
+
from forge.core.auth.capabilities import CREDENTIALS, format_missing_credential_error
|
|
25
|
+
from forge.core.auth.template_secrets import resolve_env_or_credential
|
|
26
|
+
from forge.core.reactive.env import (
|
|
27
|
+
build_claude_env,
|
|
28
|
+
can_use_bare,
|
|
29
|
+
should_spawn_subprocesses,
|
|
30
|
+
)
|
|
31
|
+
from forge.core.reactive.routing import RoutingResult
|
|
32
|
+
from forge.review.routing import (
|
|
33
|
+
WorkerRoutingPlan,
|
|
34
|
+
resolve_invocation_routing,
|
|
35
|
+
resolve_model_flag,
|
|
36
|
+
)
|
|
37
|
+
from forge.session.direct_model import direct_model_env
|
|
38
|
+
|
|
39
|
+
from .models import (
|
|
40
|
+
DEFAULT_MODELS,
|
|
41
|
+
ModelSpec,
|
|
42
|
+
MultiReviewOutput,
|
|
43
|
+
ReviewResult,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
_log = logging.getLogger(__name__)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def preflight_check(
|
|
50
|
+
specs: list[ModelSpec],
|
|
51
|
+
routing_plan: WorkerRoutingPlan | None = None,
|
|
52
|
+
) -> list[str]:
|
|
53
|
+
"""Validate routing before spawning workers.
|
|
54
|
+
|
|
55
|
+
When a routing_plan is provided, validates each result has a route.
|
|
56
|
+
Otherwise falls back to check_model_availability().
|
|
57
|
+
|
|
58
|
+
Returns a list of error strings (empty means all OK).
|
|
59
|
+
"""
|
|
60
|
+
errors: list[str] = []
|
|
61
|
+
|
|
62
|
+
if should_spawn_subprocesses() and shutil.which("claude") is None:
|
|
63
|
+
errors.append(
|
|
64
|
+
"claude CLI not found in PATH. `forge workflow` workers run through local `claude -p`, "
|
|
65
|
+
"even for proxy-routed models; install Claude Code or expose `claude` on PATH in the "
|
|
66
|
+
"environment running `forge workflow`."
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
if routing_plan is not None:
|
|
70
|
+
for spec, result in zip(specs, routing_plan.routes):
|
|
71
|
+
if result.route is None:
|
|
72
|
+
reason = result.warning or "No compatible route found"
|
|
73
|
+
errors.append(f"{spec.name}: {reason}")
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
credential_error = _credential_preflight_error(spec, result)
|
|
77
|
+
if credential_error:
|
|
78
|
+
errors.append(credential_error)
|
|
79
|
+
return errors
|
|
80
|
+
|
|
81
|
+
from .models import check_model_availability
|
|
82
|
+
|
|
83
|
+
availabilities = check_model_availability(specs)
|
|
84
|
+
for avail in availabilities:
|
|
85
|
+
if avail.status == "ready":
|
|
86
|
+
continue
|
|
87
|
+
if avail.spec.preferred_proxy:
|
|
88
|
+
hint = f" Run 'forge proxy create {avail.spec.preferred_proxy}' to set it up."
|
|
89
|
+
else:
|
|
90
|
+
hint = " Run 'forge auth login -c anthropic-api' or use --models to select only proxy-backed models."
|
|
91
|
+
errors.append(f"{avail.spec.name}: {avail.reason}.{hint}")
|
|
92
|
+
return errors
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _credential_preflight_error(spec: ModelSpec, result: RoutingResult) -> str | None:
|
|
96
|
+
"""Return an actionable missing-credential error for direct workflow routes."""
|
|
97
|
+
route = result.route
|
|
98
|
+
if route is None or route.provider != "direct":
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
credential = CREDENTIALS.get(route.credential)
|
|
102
|
+
if credential is None:
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
missing_vars = [
|
|
106
|
+
env_var.name
|
|
107
|
+
for env_var in credential.env_vars
|
|
108
|
+
if env_var.required and not resolve_env_or_credential(env_var.name)
|
|
109
|
+
]
|
|
110
|
+
if not missing_vars:
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
return format_missing_credential_error(
|
|
114
|
+
credential,
|
|
115
|
+
missing_vars=missing_vars,
|
|
116
|
+
context=f"Workflow model '{spec.name}'",
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def run_multi_review(
|
|
121
|
+
prompt: str,
|
|
122
|
+
*,
|
|
123
|
+
models: list[ModelSpec] | None = None,
|
|
124
|
+
routing_plan: WorkerRoutingPlan | None = None,
|
|
125
|
+
timeout_seconds: int = 600,
|
|
126
|
+
cwd: str | None = None,
|
|
127
|
+
resume_id: str | None = None,
|
|
128
|
+
) -> MultiReviewOutput:
|
|
129
|
+
"""Fan out a review prompt to multiple models in parallel.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
prompt: The review prompt to send to each model.
|
|
133
|
+
models: Model specs to use. Defaults to DEFAULT_MODELS values.
|
|
134
|
+
routing_plan: Pre-resolved routing for all workers. When None,
|
|
135
|
+
resolves routing once at the top before the thread pool.
|
|
136
|
+
timeout_seconds: Per-model timeout in seconds.
|
|
137
|
+
cwd: Working directory for each subprocess.
|
|
138
|
+
resume_id: If set, adds ``--resume <id>`` to each subprocess.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
MultiReviewOutput with per-model results in input order.
|
|
142
|
+
Returns empty results if FORGE_DEPTH limit reached.
|
|
143
|
+
"""
|
|
144
|
+
if not should_spawn_subprocesses():
|
|
145
|
+
_log.debug("Skipping ensemble review at FORGE_DEPTH limit")
|
|
146
|
+
return MultiReviewOutput(prompt=prompt)
|
|
147
|
+
|
|
148
|
+
specs = models if models is not None else list(DEFAULT_MODELS.values())
|
|
149
|
+
|
|
150
|
+
if not specs:
|
|
151
|
+
return MultiReviewOutput(prompt=prompt)
|
|
152
|
+
|
|
153
|
+
# Resolve routing once if not provided by caller
|
|
154
|
+
if routing_plan is None:
|
|
155
|
+
try:
|
|
156
|
+
routing_plan = resolve_invocation_routing(specs)
|
|
157
|
+
except Exception as e:
|
|
158
|
+
_log.warning("Routing resolution failed: %s", e)
|
|
159
|
+
return MultiReviewOutput(
|
|
160
|
+
prompt=prompt,
|
|
161
|
+
results=[
|
|
162
|
+
ReviewResult(
|
|
163
|
+
model_name=s.effective_worker_id,
|
|
164
|
+
stdout="",
|
|
165
|
+
stderr="",
|
|
166
|
+
success=False,
|
|
167
|
+
duration_seconds=0.0,
|
|
168
|
+
error=str(e),
|
|
169
|
+
)
|
|
170
|
+
for s in specs
|
|
171
|
+
],
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Thread-safe list for tracking child processes
|
|
175
|
+
children: list[subprocess.Popen[str]] = []
|
|
176
|
+
children_lock = threading.Lock()
|
|
177
|
+
|
|
178
|
+
def _cleanup() -> None:
|
|
179
|
+
"""Terminate and reap all running children. SIGTERM -> wait -> SIGKILL."""
|
|
180
|
+
with children_lock:
|
|
181
|
+
for proc in children:
|
|
182
|
+
if proc.poll() is None:
|
|
183
|
+
try:
|
|
184
|
+
os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
|
|
185
|
+
except (OSError, ProcessLookupError):
|
|
186
|
+
pass
|
|
187
|
+
for proc in children:
|
|
188
|
+
try:
|
|
189
|
+
proc.wait(timeout=5)
|
|
190
|
+
except subprocess.TimeoutExpired:
|
|
191
|
+
try:
|
|
192
|
+
os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
|
|
193
|
+
proc.wait(timeout=2)
|
|
194
|
+
except (OSError, ProcessLookupError, subprocess.TimeoutExpired):
|
|
195
|
+
pass
|
|
196
|
+
except OSError:
|
|
197
|
+
pass
|
|
198
|
+
|
|
199
|
+
def _run_single(spec: ModelSpec, routing_result: RoutingResult) -> ReviewResult:
|
|
200
|
+
"""Run a single model review with pre-resolved routing."""
|
|
201
|
+
start = time.monotonic()
|
|
202
|
+
if spec.prompt is None:
|
|
203
|
+
worker_prompt = prompt
|
|
204
|
+
elif spec.prompt_mode == "prefix":
|
|
205
|
+
worker_prompt = f"{spec.prompt}\n\n{prompt}" if prompt else spec.prompt
|
|
206
|
+
else:
|
|
207
|
+
worker_prompt = spec.prompt
|
|
208
|
+
|
|
209
|
+
extra_env: dict[str, str] = {}
|
|
210
|
+
if not os.environ.get("ANTHROPIC_API_KEY"):
|
|
211
|
+
ak = resolve_env_or_credential("ANTHROPIC_API_KEY")
|
|
212
|
+
if ak:
|
|
213
|
+
extra_env["ANTHROPIC_API_KEY"] = ak
|
|
214
|
+
|
|
215
|
+
route = routing_result.route
|
|
216
|
+
if route is None:
|
|
217
|
+
duration = time.monotonic() - start
|
|
218
|
+
return ReviewResult(
|
|
219
|
+
model_name=spec.effective_worker_id,
|
|
220
|
+
stdout="",
|
|
221
|
+
stderr="",
|
|
222
|
+
success=False,
|
|
223
|
+
duration_seconds=duration,
|
|
224
|
+
error=f"No route resolved for '{spec.name}'",
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
if route.provider == "direct":
|
|
228
|
+
try:
|
|
229
|
+
extra_env.update(direct_model_env(route.model_ref))
|
|
230
|
+
except ValueError as e:
|
|
231
|
+
duration = time.monotonic() - start
|
|
232
|
+
return ReviewResult(
|
|
233
|
+
model_name=spec.effective_worker_id,
|
|
234
|
+
stdout="",
|
|
235
|
+
stderr="",
|
|
236
|
+
success=False,
|
|
237
|
+
duration_seconds=duration,
|
|
238
|
+
error=str(e),
|
|
239
|
+
)
|
|
240
|
+
env = build_claude_env(direct=True, extra_vars=extra_env or None)
|
|
241
|
+
else:
|
|
242
|
+
env = build_claude_env(base_url=routing_result.base_url, extra_vars=extra_env or None)
|
|
243
|
+
|
|
244
|
+
cmd = ["claude", "-p"]
|
|
245
|
+
if can_use_bare(env):
|
|
246
|
+
cmd.append("--bare")
|
|
247
|
+
if resume_id:
|
|
248
|
+
cmd.extend(["--resume", resume_id])
|
|
249
|
+
|
|
250
|
+
model_flag = resolve_model_flag(route)
|
|
251
|
+
if model_flag:
|
|
252
|
+
cmd.extend(["--model", model_flag])
|
|
253
|
+
|
|
254
|
+
try:
|
|
255
|
+
proc = subprocess.Popen(
|
|
256
|
+
cmd,
|
|
257
|
+
stdin=subprocess.PIPE,
|
|
258
|
+
stdout=subprocess.PIPE,
|
|
259
|
+
stderr=subprocess.PIPE,
|
|
260
|
+
text=True,
|
|
261
|
+
cwd=cwd,
|
|
262
|
+
env=env,
|
|
263
|
+
start_new_session=True,
|
|
264
|
+
)
|
|
265
|
+
with children_lock:
|
|
266
|
+
children.append(proc)
|
|
267
|
+
|
|
268
|
+
stdout, stderr = proc.communicate(input=worker_prompt, timeout=timeout_seconds)
|
|
269
|
+
duration = time.monotonic() - start
|
|
270
|
+
|
|
271
|
+
if proc.returncode != 0:
|
|
272
|
+
error_msg = stderr.strip() or f"Exit code {proc.returncode}"
|
|
273
|
+
return ReviewResult(
|
|
274
|
+
model_name=spec.effective_worker_id,
|
|
275
|
+
stdout=stdout,
|
|
276
|
+
stderr=stderr,
|
|
277
|
+
success=False,
|
|
278
|
+
duration_seconds=duration,
|
|
279
|
+
error=error_msg,
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
return ReviewResult(
|
|
283
|
+
model_name=spec.effective_worker_id,
|
|
284
|
+
stdout=stdout.strip(),
|
|
285
|
+
stderr=stderr,
|
|
286
|
+
success=True,
|
|
287
|
+
duration_seconds=duration,
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
except subprocess.TimeoutExpired:
|
|
291
|
+
try:
|
|
292
|
+
os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
|
|
293
|
+
proc.wait(timeout=5)
|
|
294
|
+
except (OSError, ProcessLookupError, subprocess.TimeoutExpired):
|
|
295
|
+
pass
|
|
296
|
+
return ReviewResult(
|
|
297
|
+
model_name=spec.effective_worker_id,
|
|
298
|
+
stdout="",
|
|
299
|
+
stderr="",
|
|
300
|
+
success=False,
|
|
301
|
+
duration_seconds=float(timeout_seconds),
|
|
302
|
+
error=f"Timeout after {timeout_seconds}s",
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
except FileNotFoundError:
|
|
306
|
+
duration = time.monotonic() - start
|
|
307
|
+
return ReviewResult(
|
|
308
|
+
model_name=spec.effective_worker_id,
|
|
309
|
+
stdout="",
|
|
310
|
+
stderr="",
|
|
311
|
+
success=False,
|
|
312
|
+
duration_seconds=duration,
|
|
313
|
+
error="claude CLI not found in PATH",
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
except (OSError, subprocess.SubprocessError) as e:
|
|
317
|
+
duration = time.monotonic() - start
|
|
318
|
+
return ReviewResult(
|
|
319
|
+
model_name=spec.effective_worker_id,
|
|
320
|
+
stdout="",
|
|
321
|
+
stderr="",
|
|
322
|
+
success=False,
|
|
323
|
+
duration_seconds=duration,
|
|
324
|
+
error=str(e),
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
# Fan out with ThreadPoolExecutor, preserving input order and duplicate workers.
|
|
328
|
+
result_map: dict[int, ReviewResult] = {}
|
|
329
|
+
max_workers = min(len(specs), 5)
|
|
330
|
+
|
|
331
|
+
try:
|
|
332
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
333
|
+
future_to_item = {
|
|
334
|
+
executor.submit(_run_single, spec, routing_plan.routes[idx]): (idx, spec)
|
|
335
|
+
for idx, spec in enumerate(specs)
|
|
336
|
+
}
|
|
337
|
+
for future in as_completed(future_to_item):
|
|
338
|
+
idx, spec = future_to_item[future]
|
|
339
|
+
wid = spec.effective_worker_id
|
|
340
|
+
try:
|
|
341
|
+
result_map[idx] = future.result()
|
|
342
|
+
except Exception as e:
|
|
343
|
+
result_map[idx] = ReviewResult(
|
|
344
|
+
model_name=wid,
|
|
345
|
+
stdout="",
|
|
346
|
+
stderr="",
|
|
347
|
+
success=False,
|
|
348
|
+
duration_seconds=0.0,
|
|
349
|
+
error=f"Thread error: {e}",
|
|
350
|
+
)
|
|
351
|
+
finally:
|
|
352
|
+
_cleanup()
|
|
353
|
+
|
|
354
|
+
# Return in deterministic input order
|
|
355
|
+
ordered = [result_map[idx] for idx in range(len(specs)) if idx in result_map]
|
|
356
|
+
return MultiReviewOutput(prompt=prompt, results=ordered)
|