multi-forge 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (311) hide show
  1. forge/__init__.py +3 -0
  2. forge/_extensions/agents/.gitkeep +0 -0
  3. forge/_extensions/commands/.gitkeep +0 -0
  4. forge/_extensions/skills/analyze/SKILL.md +87 -0
  5. forge/_extensions/skills/challenge/SKILL.md +91 -0
  6. forge/_extensions/skills/consensus/SKILL.md +120 -0
  7. forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
  8. forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
  9. forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
  10. forge/_extensions/skills/debate/SKILL.md +116 -0
  11. forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
  12. forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
  13. forge/_extensions/skills/panel/SKILL.md +141 -0
  14. forge/_extensions/skills/panel/resources/synthesis.md +103 -0
  15. forge/_extensions/skills/qa/SKILL.md +704 -0
  16. forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
  17. forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
  18. forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
  19. forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
  20. forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
  21. forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
  22. forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
  23. forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
  24. forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
  25. forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
  26. forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
  27. forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
  28. forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
  29. forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
  30. forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
  31. forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
  32. forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
  33. forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
  34. forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
  35. forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
  36. forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
  37. forge/_extensions/skills/qa/resources/checklist.md +103 -0
  38. forge/_extensions/skills/qa/resources/report-template.md +62 -0
  39. forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
  40. forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
  41. forge/_extensions/skills/review/SKILL.md +125 -0
  42. forge/_extensions/skills/review/references/claude-4.6.md +474 -0
  43. forge/_extensions/skills/review/references/claude-4.7.md +710 -0
  44. forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
  45. forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
  46. forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
  47. forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
  48. forge/_extensions/skills/review/resources/code-gemini.md +184 -0
  49. forge/_extensions/skills/review/resources/code-openai.md +203 -0
  50. forge/_extensions/skills/review/resources/code.md +160 -0
  51. forge/_extensions/skills/review-docs/SKILL.md +121 -0
  52. forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
  53. forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
  54. forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
  55. forge/_extensions/skills/review-docs/resources/docs.md +170 -0
  56. forge/_extensions/skills/smoke-test/SKILL.md +27 -0
  57. forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
  58. forge/_extensions/skills/understand/SKILL.md +148 -0
  59. forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
  60. forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
  61. forge/_extensions/skills/understand/resources/code-openai.md +181 -0
  62. forge/_extensions/skills/understand/resources/code.md +163 -0
  63. forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
  64. forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
  65. forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
  66. forge/_extensions/skills/understand/resources/docs.md +177 -0
  67. forge/_extensions/skills/walkthrough/SKILL.md +599 -0
  68. forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
  69. forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
  70. forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
  71. forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
  72. forge/backend/__init__.py +174 -0
  73. forge/backend/adapters/__init__.py +38 -0
  74. forge/backend/adapters/litellm.py +158 -0
  75. forge/backend/creation.py +89 -0
  76. forge/backend/registry.py +178 -0
  77. forge/cli/__init__.py +16 -0
  78. forge/cli/auth.py +483 -0
  79. forge/cli/backend.py +298 -0
  80. forge/cli/claude.py +411 -0
  81. forge/cli/config_cmd.py +303 -0
  82. forge/cli/extensions.py +1001 -0
  83. forge/cli/gc.py +165 -0
  84. forge/cli/guard.py +1018 -0
  85. forge/cli/guards.py +106 -0
  86. forge/cli/handoff.py +110 -0
  87. forge/cli/hooks/__init__.py +36 -0
  88. forge/cli/hooks/_group.py +20 -0
  89. forge/cli/hooks/_helpers.py +149 -0
  90. forge/cli/hooks/commands.py +1677 -0
  91. forge/cli/hooks/direct_commands.py +1304 -0
  92. forge/cli/hooks/install.py +232 -0
  93. forge/cli/hooks/policy.py +151 -0
  94. forge/cli/hooks/read_hygiene.py +74 -0
  95. forge/cli/hooks/verification.py +370 -0
  96. forge/cli/logs.py +406 -0
  97. forge/cli/main.py +292 -0
  98. forge/cli/proxy.py +1821 -0
  99. forge/cli/proxy_costs.py +313 -0
  100. forge/cli/search.py +416 -0
  101. forge/cli/session.py +892 -0
  102. forge/cli/session_addendum.py +81 -0
  103. forge/cli/session_fork.py +750 -0
  104. forge/cli/session_handoff.py +141 -0
  105. forge/cli/session_lifecycle.py +2053 -0
  106. forge/cli/session_manage.py +1336 -0
  107. forge/cli/session_memory.py +201 -0
  108. forge/cli/status_line.py +1398 -0
  109. forge/cli/workflow.py +1964 -0
  110. forge/config/__init__.py +110 -0
  111. forge/config/dataclass_utils.py +88 -0
  112. forge/config/defaults/__init__.py +0 -0
  113. forge/config/defaults/backends/__init__.py +0 -0
  114. forge/config/defaults/backends/litellm.yaml +196 -0
  115. forge/config/defaults/templates/__init__.py +0 -0
  116. forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
  117. forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
  118. forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
  119. forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
  120. forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
  121. forge/config/defaults/templates/litellm-gemini.yaml +21 -0
  122. forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
  123. forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
  124. forge/config/defaults/templates/litellm-openai.yaml +28 -0
  125. forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
  126. forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
  127. forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
  128. forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
  129. forge/config/defaults/templates/openrouter-glm.yaml +23 -0
  130. forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
  131. forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
  132. forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
  133. forge/config/defaults/templates/openrouter-openai.yaml +28 -0
  134. forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
  135. forge/config/loader.py +675 -0
  136. forge/config/schema.py +448 -0
  137. forge/core/__init__.py +5 -0
  138. forge/core/auth/__init__.py +67 -0
  139. forge/core/auth/capabilities.py +219 -0
  140. forge/core/auth/credentials_file.py +244 -0
  141. forge/core/auth/protocols.py +18 -0
  142. forge/core/auth/secrets.py +243 -0
  143. forge/core/auth/template_secrets.py +112 -0
  144. forge/core/data/__init__.py +5 -0
  145. forge/core/data/model_catalog.yaml +1522 -0
  146. forge/core/data/pricing.yaml +140 -0
  147. forge/core/data/system_prompt_addendums/__init__.py +0 -0
  148. forge/core/data/system_prompt_addendums/gemini.md +330 -0
  149. forge/core/data/system_prompt_addendums/openai.md +328 -0
  150. forge/core/llm/__init__.py +231 -0
  151. forge/core/llm/clients/__init__.py +14 -0
  152. forge/core/llm/clients/base.py +115 -0
  153. forge/core/llm/clients/litellm.py +619 -0
  154. forge/core/llm/clients/openai_compat.py +244 -0
  155. forge/core/llm/clients/openrouter.py +234 -0
  156. forge/core/llm/credentials.py +439 -0
  157. forge/core/llm/detection.py +86 -0
  158. forge/core/llm/errors.py +44 -0
  159. forge/core/llm/protocols.py +80 -0
  160. forge/core/llm/types.py +176 -0
  161. forge/core/logging.py +146 -0
  162. forge/core/models/__init__.py +91 -0
  163. forge/core/models/catalog.py +467 -0
  164. forge/core/models/pricing.py +165 -0
  165. forge/core/models/types.py +167 -0
  166. forge/core/naming.py +212 -0
  167. forge/core/ops/__init__.py +73 -0
  168. forge/core/ops/context.py +141 -0
  169. forge/core/ops/gc.py +802 -0
  170. forge/core/ops/proxy.py +146 -0
  171. forge/core/ops/resolution.py +135 -0
  172. forge/core/ops/session.py +344 -0
  173. forge/core/ops/session_context.py +548 -0
  174. forge/core/paths.py +38 -0
  175. forge/core/process.py +54 -0
  176. forge/core/reactive/__init__.py +38 -0
  177. forge/core/reactive/cost_tracking.py +300 -0
  178. forge/core/reactive/env.py +180 -0
  179. forge/core/reactive/proxy.py +78 -0
  180. forge/core/reactive/routing.py +622 -0
  181. forge/core/reactive/session_runner.py +185 -0
  182. forge/core/reactive/structured_output.py +62 -0
  183. forge/core/reactive/tagger.py +94 -0
  184. forge/core/reactive/throttle.py +132 -0
  185. forge/core/state/__init__.py +59 -0
  186. forge/core/state/exceptions.py +59 -0
  187. forge/core/state/io.py +140 -0
  188. forge/core/state/lock.py +99 -0
  189. forge/core/state/timestamps.py +60 -0
  190. forge/core/transcript.py +78 -0
  191. forge/core/typing_helpers.py +24 -0
  192. forge/core/workqueue/__init__.py +67 -0
  193. forge/core/workqueue/queue.py +552 -0
  194. forge/core/workqueue/types.py +63 -0
  195. forge/guard/__init__.py +26 -0
  196. forge/guard/deterministic/__init__.py +26 -0
  197. forge/guard/deterministic/base.py +158 -0
  198. forge/guard/deterministic/coding_standards.py +256 -0
  199. forge/guard/deterministic/registry.py +148 -0
  200. forge/guard/deterministic/tdd.py +171 -0
  201. forge/guard/engine.py +216 -0
  202. forge/guard/protocols.py +91 -0
  203. forge/guard/queries.py +96 -0
  204. forge/guard/semantic/__init__.py +34 -0
  205. forge/guard/semantic/promotion.py +18 -0
  206. forge/guard/semantic/supervisor.py +813 -0
  207. forge/guard/semantic/verdict.py +183 -0
  208. forge/guard/store.py +124 -0
  209. forge/guard/team/__init__.py +6 -0
  210. forge/guard/team/config.py +24 -0
  211. forge/guard/team/handlers.py +209 -0
  212. forge/guard/team/prompts.py +41 -0
  213. forge/guard/types.py +125 -0
  214. forge/guard/workflow/__init__.py +17 -0
  215. forge/guard/workflow/branches.py +67 -0
  216. forge/guard/workflow/config.py +63 -0
  217. forge/guard/workflow/divergence.py +113 -0
  218. forge/guard/workflow/policy.py +87 -0
  219. forge/guard/workflow/stages.py +205 -0
  220. forge/install/__init__.py +55 -0
  221. forge/install/cli.py +281 -0
  222. forge/install/exceptions.py +163 -0
  223. forge/install/hooks.py +109 -0
  224. forge/install/installer.py +1037 -0
  225. forge/install/models.py +321 -0
  226. forge/install/preset.py +272 -0
  227. forge/install/settings_merge.py +831 -0
  228. forge/install/tracking.py +238 -0
  229. forge/install/version.py +141 -0
  230. forge/proxy/__init__.py +0 -0
  231. forge/proxy/base_client.py +181 -0
  232. forge/proxy/client_adapter.py +476 -0
  233. forge/proxy/client_factory.py +531 -0
  234. forge/proxy/converters.py +1206 -0
  235. forge/proxy/cost_logger.py +132 -0
  236. forge/proxy/cost_tracker.py +242 -0
  237. forge/proxy/data_models.py +338 -0
  238. forge/proxy/error_hints.py +92 -0
  239. forge/proxy/metrics.py +222 -0
  240. forge/proxy/model_spec.py +158 -0
  241. forge/proxy/proxies.py +333 -0
  242. forge/proxy/proxy_identity.py +134 -0
  243. forge/proxy/proxy_orchestrator.py +1018 -0
  244. forge/proxy/proxy_startup.py +54 -0
  245. forge/proxy/server.py +1561 -0
  246. forge/proxy/utils.py +537 -0
  247. forge/review/__init__.py +6 -0
  248. forge/review/adversarial.py +111 -0
  249. forge/review/consensus.py +236 -0
  250. forge/review/engine.py +356 -0
  251. forge/review/models.py +437 -0
  252. forge/review/resources/__init__.py +5 -0
  253. forge/review/resources/codereview-performance.md +85 -0
  254. forge/review/resources/codereview-quick.md +75 -0
  255. forge/review/resources/codereview-security.md +92 -0
  256. forge/review/resources/codereview.md +85 -0
  257. forge/review/resources/docreview-quick.md +75 -0
  258. forge/review/resources/docreview.md +86 -0
  259. forge/review/resources/thinkdeep.md +89 -0
  260. forge/review/routing.py +368 -0
  261. forge/review/synthesis.py +73 -0
  262. forge/runtime_config.py +438 -0
  263. forge/search/__init__.py +55 -0
  264. forge/search/bm25_store.py +264 -0
  265. forge/search/content_store.py +197 -0
  266. forge/search/engine.py +352 -0
  267. forge/search/exceptions.py +51 -0
  268. forge/search/extractor.py +234 -0
  269. forge/search/index_state.py +295 -0
  270. forge/search/store.py +215 -0
  271. forge/search/tokenizer.py +24 -0
  272. forge/session/__init__.py +130 -0
  273. forge/session/active.py +339 -0
  274. forge/session/artifacts.py +202 -0
  275. forge/session/claude/__init__.py +50 -0
  276. forge/session/claude/cleanup.py +105 -0
  277. forge/session/claude/invoke.py +236 -0
  278. forge/session/claude/paths.py +200 -0
  279. forge/session/cleanup.py +216 -0
  280. forge/session/config.py +34 -0
  281. forge/session/direct_model.py +107 -0
  282. forge/session/effective.py +169 -0
  283. forge/session/exceptions.py +255 -0
  284. forge/session/handoff.py +881 -0
  285. forge/session/handoff_agent.py +544 -0
  286. forge/session/hooks/__init__.py +35 -0
  287. forge/session/hooks/models.py +73 -0
  288. forge/session/hooks/session_start.py +507 -0
  289. forge/session/identity.py +84 -0
  290. forge/session/index.py +553 -0
  291. forge/session/manager.py +1506 -0
  292. forge/session/models.py +572 -0
  293. forge/session/overrides.py +344 -0
  294. forge/session/plan_resolution.py +286 -0
  295. forge/session/prev_sessions.py +128 -0
  296. forge/session/store.py +431 -0
  297. forge/session/validation.py +47 -0
  298. forge/session/worktree/__init__.py +65 -0
  299. forge/session/worktree/cleanup.py +262 -0
  300. forge/session/worktree/config_copy.py +203 -0
  301. forge/session/worktree/create.py +332 -0
  302. forge/sidecar/__init__.py +29 -0
  303. forge/sidecar/container.py +161 -0
  304. forge/sidecar/docker.py +86 -0
  305. forge/sidecar/secrets.py +19 -0
  306. multi_forge-0.2.0.dist-info/METADATA +242 -0
  307. multi_forge-0.2.0.dist-info/RECORD +311 -0
  308. multi_forge-0.2.0.dist-info/WHEEL +4 -0
  309. multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
  310. multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
  311. multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
forge/cli/workflow.py ADDED
@@ -0,0 +1,1964 @@
1
+ """Workflow runner CLI commands.
2
+
3
+ Provides:
4
+ - forge workflow panel: Fan out review with check gating
5
+ - forge workflow analyze: Deep single-model analysis
6
+ - forge workflow debate: Adversarial evaluation with stance injection
7
+ - forge workflow consensus: Two-round multi-model consensus building
8
+ - forge workflow list-models: Show available model backends
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import sys
15
+ import tempfile
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+ import click
20
+ from rich.console import Console
21
+
22
+ from forge.proxy.proxies import ProxyResolutionError
23
+ from forge.review.models import (
24
+ NAMED_ROLES,
25
+ AdversarialOutput,
26
+ ConsensusOutput,
27
+ ModelSpec,
28
+ MultiReviewOutput,
29
+ ReviewResult,
30
+ RoleSpec,
31
+ StanceSpec,
32
+ resolve_model_specs,
33
+ )
34
+
35
+ # Verdict strings treated as "pass" by --check gating.
36
+ # ACCEPT/ACCEPT_WITH_CONDITIONS from debate resources;
37
+ # PASS/PASSED/TRUE as general-purpose aliases for other resources.
38
+ _ACCEPTING_VERDICTS = frozenset(
39
+ {
40
+ "ACCEPT",
41
+ "ACCEPT_WITH_CONDITIONS",
42
+ "PASS",
43
+ "PASSED",
44
+ "TRUE",
45
+ "SUPPORT",
46
+ "SUPPORT_WITH_CONDITIONS",
47
+ }
48
+ )
49
+
50
+
51
+ def _coerce_passed(val: Any) -> bool:
52
+ """Coerce a 'passed' field to bool, handling string 'false' correctly.
53
+
54
+ Without this, ``bool("false")`` is ``True`` in Python -- a real CI bug
55
+ when models emit ``{"passed": "false"}`` as a string.
56
+ """
57
+ if isinstance(val, bool):
58
+ return val
59
+ if isinstance(val, str):
60
+ return val.lower() in ("true", "1", "yes")
61
+ return bool(val)
62
+
63
+
64
+ console = Console()
65
+
66
+
67
+ def _run_preflight(
68
+ specs: list[ModelSpec],
69
+ *,
70
+ json_output: bool = False,
71
+ routing_plan: Any | None = None,
72
+ ) -> None:
73
+ """Check resolved routing/auth before spawning workers. Exit 1 on failure."""
74
+ from forge.review.engine import preflight_check
75
+
76
+ errors = preflight_check(specs, routing_plan=routing_plan)
77
+ warnings = _routing_plan_warnings(specs, routing_plan)
78
+ if not errors:
79
+ if not json_output:
80
+ for warning in warnings:
81
+ console.print(f"[yellow]Routing warning:[/yellow] {warning}")
82
+ return
83
+ if json_output:
84
+ data: dict[str, Any] = {"preflight_errors": errors}
85
+ if warnings:
86
+ data["routing_warnings"] = warnings
87
+ click.echo(json.dumps(data))
88
+ else:
89
+ console.print("[red]Error:[/red] Workflow preflight failed:")
90
+ for err in errors:
91
+ console.print(f" - {err}")
92
+ console.print(
93
+ "\n[dim]Tip: Check model availability with 'forge workflow list-models'.\n"
94
+ "Check proxy status: 'forge proxy list'\n"
95
+ "Check auth status: 'forge auth status'\n"
96
+ "Create a proxy: 'forge proxy create <template>'\n"
97
+ "Check worker runtime: 'command -v claude'[/dim]"
98
+ )
99
+ sys.exit(1)
100
+
101
+
102
+ def _routing_plan_warnings(specs: list[ModelSpec], routing_plan: Any | None) -> list[str]:
103
+ """Return deduped route warnings for human-facing workflow output."""
104
+ if routing_plan is None:
105
+ return []
106
+
107
+ warnings: list[str] = []
108
+ seen: set[str] = set()
109
+ for spec, result in zip(specs, routing_plan.routes):
110
+ if not result.warning:
111
+ continue
112
+ message = f"{spec.name}: {result.warning}"
113
+ if message in seen:
114
+ continue
115
+ seen.add(message)
116
+ warnings.append(message)
117
+ return warnings
118
+
119
+
120
+ def _resolved_models_summary(
121
+ specs: list[ModelSpec],
122
+ routing_plan: Any | None,
123
+ *,
124
+ worker_ids: list[str] | None = None,
125
+ roles: dict[str, str] | None = None,
126
+ role_field: str = "role",
127
+ ) -> dict[str, dict[str, Any]]:
128
+ """Return user-facing model routing metadata for workflow output."""
129
+ if routing_plan is None:
130
+ return {}
131
+
132
+ summary: dict[str, dict[str, Any]] = {}
133
+ for idx, (spec, result) in enumerate(zip(specs, routing_plan.routes)):
134
+ route = result.route
135
+ worker_id = worker_ids[idx] if worker_ids and idx < len(worker_ids) else spec.effective_worker_id
136
+ entry: dict[str, Any] = {
137
+ "requested_model": spec.name,
138
+ "model_id": spec.model_id,
139
+ "resolved_model": route.model_ref if route else None,
140
+ "provider": route.provider if route else None,
141
+ "source": result.source,
142
+ "proxy": result.proxy_id,
143
+ "template": result.template or (route.template_id if route else None),
144
+ }
145
+ if roles and worker_id in roles:
146
+ entry[role_field] = roles[worker_id]
147
+ if result.warning:
148
+ entry["warning"] = result.warning
149
+ summary[worker_id] = entry
150
+ return summary
151
+
152
+
153
+ def _format_resolved_models(summary: dict[str, dict[str, Any]]) -> str:
154
+ """Format resolved model metadata for non-JSON workflow output."""
155
+ if not summary:
156
+ return ""
157
+
158
+ lines = ["Resolved models:"]
159
+ for worker_id, item in summary.items():
160
+ resolved = item.get("resolved_model") or "(unresolved)"
161
+ provider = item.get("provider") or "unknown"
162
+ proxy = item.get("proxy") or "(direct)"
163
+ template = item.get("template") or "(direct)"
164
+ requested = item.get("requested_model") or worker_id
165
+ role = f", role={item['role']}" if item.get("role") else ""
166
+ stance = f", stance={item['stance']}" if item.get("stance") else ""
167
+ lines.append(
168
+ f"- {worker_id}: requested={requested}, resolved={resolved}, "
169
+ f"provider={provider}, proxy={proxy}, template={template}{role}{stance}"
170
+ )
171
+ return "\n".join(lines) + "\n\n"
172
+
173
+
174
+ def _handle_routing_error(error: Exception, *, json_output: bool = False) -> None:
175
+ """Handle routing resolution errors with clean CLI output. Calls sys.exit(1)."""
176
+ msg = str(error)
177
+ if json_output:
178
+ click.echo(json.dumps({"routing_error": msg}))
179
+ else:
180
+ console.print(f"[red]Error:[/red] Routing failed: {msg}")
181
+ sys.exit(1)
182
+
183
+
184
+ _ROUTING_ERRORS = (RuntimeError, ValueError, ProxyResolutionError)
185
+
186
+
187
+ def _load_workflow_resource(name: str) -> str:
188
+ """Load a bundled workflow resource by name via importlib.resources."""
189
+ from importlib import resources
190
+
191
+ ref = resources.files("forge.review.resources").joinpath(name)
192
+ return ref.read_text(encoding="utf-8")
193
+
194
+
195
+ @click.group(context_settings={"help_option_names": ["-h", "--help"]})
196
+ def workflow_cmd() -> None:
197
+ """Run multi-model workflows.
198
+
199
+ \b
200
+ Examples:
201
+ forge workflow panel docs/design.md # Multi-model doc review
202
+ forge workflow analyze "Should we use X?" # Deep single-model analysis
203
+ forge workflow debate "Proposal" --code # Adversarial code eval
204
+ """
205
+
206
+
207
+ @workflow_cmd.command(name="list-models")
208
+ @click.option("--json", "json_output", is_flag=True, help="Output as JSON")
209
+ @click.option("--available", "available_only", is_flag=True, help="Show only ready models")
210
+ def list_models(json_output: bool, available_only: bool) -> None:
211
+ """Show available model backends for workflow runners."""
212
+ from forge.review.models import available_model_specs, check_model_availability
213
+
214
+ availabilities = check_model_availability(available_model_specs())
215
+
216
+ if available_only:
217
+ availabilities = [a for a in availabilities if a.status == "ready"]
218
+
219
+ if json_output:
220
+ items = [
221
+ {
222
+ "name": a.spec.name,
223
+ "model_id": a.spec.model_id,
224
+ "family": a.spec.family,
225
+ "provider_refs": list(a.spec.provider_refs),
226
+ "preferred_proxy": a.spec.preferred_proxy,
227
+ "description": a.spec.description,
228
+ "status": a.status,
229
+ "reason": a.reason,
230
+ }
231
+ for a in availabilities
232
+ ]
233
+ click.echo(json.dumps(items, indent=2))
234
+ return
235
+
236
+ if not availabilities:
237
+ console.print(
238
+ "[yellow]No models are currently ready.[/yellow]\n"
239
+ "[dim]Tip: Check 'forge proxy list' and 'forge auth status'.[/dim]"
240
+ )
241
+ return
242
+
243
+ _print_grouped_models(availabilities)
244
+
245
+
246
+ def _primary_credential(spec: ModelSpec) -> str:
247
+ """Determine the primary credential for a model spec.
248
+
249
+ Uses derive_model_routes() to get the first route's credential,
250
+ which is stable and deterministic (no registry read).
251
+ """
252
+ from forge.review.routing import derive_model_routes
253
+
254
+ routes = derive_model_routes(spec)
255
+ if routes:
256
+ return routes[0].credential
257
+ return "unknown"
258
+
259
+
260
+ def _credential_env_var(credential_name: str) -> str:
261
+ """Map a credential name to its primary env var for display."""
262
+ from forge.core.auth.capabilities import CREDENTIALS
263
+
264
+ cred = CREDENTIALS.get(credential_name)
265
+ if cred:
266
+ for ev in cred.env_vars:
267
+ if ev.required and ev.secret:
268
+ return ev.name
269
+ return ""
270
+
271
+
272
+ def _credential_configured(credential_name: str) -> bool:
273
+ """Check whether a credential's primary secret is available."""
274
+ env_var = _credential_env_var(credential_name)
275
+ if not env_var:
276
+ return False
277
+ from forge.core.auth.template_secrets import resolve_env_or_credential
278
+
279
+ return resolve_env_or_credential(env_var) is not None
280
+
281
+
282
+ def _print_grouped_models(availabilities: list) -> None:
283
+ """Print models grouped by primary credential."""
284
+ from collections import OrderedDict
285
+
286
+ groups: OrderedDict[str, list] = OrderedDict()
287
+ for a in availabilities:
288
+ cred = _primary_credential(a.spec)
289
+ groups.setdefault(cred, []).append(a)
290
+
291
+ _STATUS_STYLES = {"ready": "green", "unavailable": "yellow", "error": "red"}
292
+
293
+ console.print("\n[bold]Available Models[/bold]\n")
294
+
295
+ for cred_name, items in groups.items():
296
+ env_var = _credential_env_var(cred_name)
297
+ configured = _credential_configured(cred_name)
298
+ config_tag = "[green]configured[/green]" if configured else "[yellow]not configured[/yellow]"
299
+ env_display = f" ({env_var})" if env_var else ""
300
+ console.print(f" [bold]{cred_name}[/bold]{env_display} [{config_tag}]")
301
+
302
+ for a in items:
303
+ style = _STATUS_STYLES.get(a.status, "")
304
+ desc = a.spec.description
305
+ if a.reason:
306
+ desc += f" [dim]({a.reason})[/dim]"
307
+ console.print(f" [cyan]{a.spec.name:<24}[/cyan] {desc:<50} [{style}]{a.status}[/{style}]")
308
+ console.print()
309
+
310
+
311
+ @workflow_cmd.command(name="panel")
312
+ @click.argument("target", nargs=-1)
313
+ @click.option("-p", "--prompt", type=str, default=None, help="Review prompt")
314
+ @click.option(
315
+ "--code",
316
+ "code_mode",
317
+ is_flag=True,
318
+ help="Use code review framework (default: document review)",
319
+ )
320
+ @click.option(
321
+ "--context",
322
+ "context_mode",
323
+ type=str,
324
+ default="blind",
325
+ help='Context mode: "blind" (default) or "resume:<uuid>"',
326
+ )
327
+ @click.option(
328
+ "--models",
329
+ "-m",
330
+ type=str,
331
+ default=None,
332
+ help="Comma-separated model names (default: all)",
333
+ )
334
+ @click.option("--timeout", "-t", type=int, default=600, help="Per-model timeout in seconds")
335
+ @click.option("--json", "json_output", is_flag=True, help="Output structured JSON")
336
+ @click.option(
337
+ "--check",
338
+ "check_mode",
339
+ is_flag=True,
340
+ help="Gate on results: exit 0 if passed, exit 1 if failed",
341
+ )
342
+ @click.option(
343
+ "--roles",
344
+ type=str,
345
+ default=None,
346
+ help=f"Comma-separated reviewer roles ({','.join(sorted(NAMED_ROLES))})",
347
+ )
348
+ @click.option(
349
+ "--review-type",
350
+ type=click.Choice(["full", "security", "performance", "quick"]),
351
+ default="full",
352
+ help="Review focus area (security/performance require --code)",
353
+ )
354
+ @click.option(
355
+ "--severity",
356
+ type=click.Choice(["high", "critical"]),
357
+ default=None,
358
+ help="Minimum severity to report",
359
+ )
360
+ @click.option("--proxy", "via", type=str, default=None, help="Route proxy-backed workers through this proxy")
361
+ @click.option("--cwd", type=click.Path(exists=True), default=None, help="Working directory")
362
+ @click.pass_context
363
+ def panel(
364
+ ctx: click.Context,
365
+ target: tuple[str, ...],
366
+ prompt: str | None,
367
+ code_mode: bool,
368
+ context_mode: str,
369
+ models: str | None,
370
+ timeout: int,
371
+ json_output: bool,
372
+ check_mode: bool,
373
+ roles: str | None,
374
+ review_type: str,
375
+ severity: str | None,
376
+ via: str | None,
377
+ cwd: str | None,
378
+ ) -> None:
379
+ """Fan out a review to multiple models.
380
+
381
+ \b
382
+ Examples:
383
+ forge workflow panel docs/design.md # docs review (default)
384
+ forge workflow panel src/forge/cli/ --code # code review
385
+ forge workflow panel -p "Review the error handling" # custom prompt
386
+ forge workflow panel src/ --code --roles security,architecture
387
+ forge workflow panel src/ --code --review-type security --severity high
388
+ """
389
+ resume_id: str | None = None
390
+ if context_mode == "blind":
391
+ pass
392
+ elif context_mode.startswith("resume:"):
393
+ resume_id = context_mode[len("resume:") :]
394
+ if not resume_id:
395
+ console.print("[red]Error:[/red] --context resume:<uuid> requires a UUID.")
396
+ ctx.exit(2)
397
+ return
398
+ else:
399
+ console.print(f'[red]Error:[/red] Invalid --context "{context_mode}".' ' Use "blind" or "resume:<uuid>".')
400
+ ctx.exit(2)
401
+ return
402
+
403
+ # Prompt composition: (1) resolve base prompt/resource
404
+ resolved_prompt = _resolve_panel_prompt(target, prompt, code_mode, review_type)
405
+ if resolved_prompt is None:
406
+ console.print("[red]Error:[/red] No prompt provided. Use target argument, -p, or stdin.")
407
+ ctx.exit(2)
408
+ return
409
+
410
+ # Validate review-type/code-mode interaction.
411
+ # Only applies when a review resource is loaded (target-based prompt).
412
+ # Skip when -p or stdin provided a custom prompt (review_type is ignored).
413
+ uses_resource = not prompt and bool(target)
414
+ if uses_resource and review_type in ("security", "performance") and not code_mode:
415
+ console.print(f"[red]Error:[/red] --review-type {review_type} requires --code.")
416
+ ctx.exit(2)
417
+ return
418
+
419
+ # Prompt composition: (2) append severity suffix
420
+ if severity:
421
+ resolved_prompt += (
422
+ f"\n\nIMPORTANT: Report only {severity}-severity findings or above. "
423
+ f"Skip lower-severity issues. If no findings meet the {severity} threshold, "
424
+ f"explicitly state: 'No findings at or above {severity} severity.'"
425
+ )
426
+
427
+ try:
428
+ specs = resolve_model_specs(models)
429
+ except ValueError as e:
430
+ console.print(f"[red]Error:[/red] {e}")
431
+ ctx.exit(2)
432
+ return
433
+
434
+ # Prompt composition: (3) prepend per-worker role prefix
435
+ if roles:
436
+ try:
437
+ role_list = _parse_roles(roles)
438
+ except ValueError as e:
439
+ console.print(f"[red]Error:[/red] {e}")
440
+ ctx.exit(2)
441
+ return
442
+ specs = _apply_panel_roles(specs, role_list, resolved_prompt)
443
+
444
+ from forge.core.reactive.cost_tracking import (
445
+ resolve_proxy_urls_from_plan,
446
+ track_verb_cost,
447
+ )
448
+ from forge.review.engine import run_multi_review
449
+ from forge.review.routing import resolve_invocation_routing
450
+
451
+ try:
452
+ routing_plan = resolve_invocation_routing(specs, via=via)
453
+ except _ROUTING_ERRORS as e:
454
+ _handle_routing_error(e, json_output=json_output)
455
+ return
456
+
457
+ _run_preflight(specs, json_output=json_output, routing_plan=routing_plan)
458
+
459
+ with track_verb_cost("panel", resolve_proxy_urls_from_plan(routing_plan)):
460
+ output = run_multi_review(
461
+ resolved_prompt,
462
+ models=specs,
463
+ routing_plan=routing_plan,
464
+ timeout_seconds=timeout,
465
+ cwd=cwd or str(Path.cwd()),
466
+ resume_id=resume_id,
467
+ )
468
+
469
+ _handle_review_output(
470
+ ctx,
471
+ output,
472
+ check_mode=check_mode,
473
+ json_output=json_output,
474
+ resolved_models=_resolved_models_summary(specs, routing_plan),
475
+ routing_warnings=_routing_plan_warnings(specs, routing_plan),
476
+ )
477
+
478
+
479
+ def _resolve_panel_prompt(
480
+ target: tuple[str, ...],
481
+ prompt: str | None,
482
+ code_mode: bool,
483
+ review_type: str = "full",
484
+ ) -> str | None:
485
+ """Resolve prompt for panel command. Priority: -p > target+framework > stdin.
486
+
487
+ When -p is provided, review_type is ignored (custom prompt overrides).
488
+ """
489
+ if prompt:
490
+ return prompt
491
+
492
+ resolved_target = " ".join(target) if target else None
493
+ if resolved_target:
494
+ resource_name = _load_review_resource_name(code_mode, review_type)
495
+ framework = _load_workflow_resource(resource_name)
496
+ return f"{framework}\n\n---\n\n## Review Target\n\n{resolved_target}\n"
497
+
498
+ if not sys.stdin.isatty():
499
+ text = sys.stdin.read().strip()
500
+ return text if text else None
501
+ return None
502
+
503
+
504
+ # Review-type to resource file mapping
505
+ _CODE_REVIEW_RESOURCES = {
506
+ "full": "codereview.md",
507
+ "security": "codereview-security.md",
508
+ "performance": "codereview-performance.md",
509
+ "quick": "codereview-quick.md",
510
+ }
511
+
512
+ _DOC_REVIEW_RESOURCES = {
513
+ "full": "docreview.md",
514
+ "quick": "docreview-quick.md",
515
+ }
516
+
517
+
518
+ def _load_review_resource_name(code_mode: bool, review_type: str) -> str:
519
+ """Map code_mode + review_type to a resource file name.
520
+
521
+ Falls back to the full resource if the variant doesn't exist.
522
+ """
523
+ resources = _CODE_REVIEW_RESOURCES if code_mode else _DOC_REVIEW_RESOURCES
524
+ return resources.get(review_type, resources["full"])
525
+
526
+
527
+ def _parse_roles(roles_str: str) -> list[str]:
528
+ """Parse and validate comma-separated role names.
529
+
530
+ Raises ValueError for unknown or empty roles.
531
+ """
532
+ roles = [r.strip() for r in roles_str.split(",") if r.strip()]
533
+ if not roles:
534
+ raise ValueError("No roles specified. Provide comma-separated role names.")
535
+ invalid = [r for r in roles if r not in NAMED_ROLES]
536
+ if invalid:
537
+ available = sorted(NAMED_ROLES.keys())
538
+ raise ValueError(f"Unknown roles: {invalid}. Available: {available}")
539
+ return roles
540
+
541
+
542
+ def _apply_panel_roles(
543
+ specs: list[ModelSpec],
544
+ roles: list[str],
545
+ base_prompt: str,
546
+ ) -> list[ModelSpec]:
547
+ """Create per-worker specs with role-prefixed prompts.
548
+
549
+ Roles cycle across models when fewer roles than models.
550
+ Uses dataclasses.replace() on frozen ModelSpec.
551
+ """
552
+ import dataclasses
553
+
554
+ result: list[ModelSpec] = []
555
+ seen: dict[str, int] = {}
556
+ for i, spec in enumerate(specs):
557
+ role_name = roles[i % len(roles)]
558
+ role_prompt = NAMED_ROLES[role_name]
559
+ worker_prompt = f"[ROLE: {role_name}]\n{role_prompt}\n\n{base_prompt}"
560
+ base_id = f"{spec.name}-{role_name}"
561
+ count = seen.get(base_id, 0)
562
+ seen[base_id] = count + 1
563
+ wid = base_id if count == 0 else f"{base_id}-{count}"
564
+ result.append(
565
+ dataclasses.replace(
566
+ spec,
567
+ prompt=worker_prompt,
568
+ worker_id=wid,
569
+ )
570
+ )
571
+ return result
572
+
573
+
574
+ def _evaluate_verdicts(results: list[ReviewResult]) -> tuple[bool, str]:
575
+ """Evaluate --check gate with fail-closed semantics.
576
+
577
+ Every worker must succeed AND emit a parseable verdict. Missing verdicts
578
+ from successful workers count as failures. This is the unified check logic
579
+ shared by both panel and debate --check.
580
+
581
+ Returns:
582
+ (passed, reason) where reason is a diagnostic string for the check JSON.
583
+ """
584
+ from forge.core.reactive.structured_output import extract_json_from_response
585
+
586
+ if not results:
587
+ return False, "no results"
588
+
589
+ verdicts: list[tuple[bool, str]] = []
590
+ for result in results:
591
+ if not result.success:
592
+ verdicts.append((False, f"worker {result.model_name} failed"))
593
+ continue
594
+
595
+ parsed = extract_json_from_response(result.stdout)
596
+ if parsed is None or not isinstance(parsed, dict):
597
+ verdicts.append((False, f"worker {result.model_name} emitted no verdict"))
598
+ continue
599
+
600
+ if "passed" in parsed:
601
+ v = _coerce_passed(parsed["passed"])
602
+ label = "accepted" if v else "rejected"
603
+ verdicts.append((v, f"worker {result.model_name} {label}"))
604
+ elif "verdict" in parsed:
605
+ v_str = str(parsed["verdict"]).upper()
606
+ v = v_str in _ACCEPTING_VERDICTS
607
+ label = "accepted" if v else "rejected"
608
+ verdicts.append((v, f"worker {result.model_name} {label}"))
609
+ elif "position" in parsed:
610
+ v_str = str(parsed["position"]).upper()
611
+ v = v_str in _ACCEPTING_VERDICTS
612
+ label = "accepted" if v else "rejected"
613
+ verdicts.append((v, f"worker {result.model_name} {label}"))
614
+ else:
615
+ verdicts.append(
616
+ (
617
+ False,
618
+ f"worker {result.model_name} emitted JSON without verdict fields",
619
+ )
620
+ )
621
+
622
+ if all(v for v, _ in verdicts):
623
+ return True, f"all {len(verdicts)} verdicts accepting"
624
+
625
+ # all() was False, so at least one entry has v=False
626
+ for v, reason in verdicts:
627
+ if not v:
628
+ return False, reason
629
+
630
+ # Unreachable: the loop above always finds a match when all() is False.
631
+ # Explicit raise instead of a silent fallback string.
632
+ raise AssertionError("unreachable: all() was False but no failing verdict found")
633
+
634
+
635
+ _CONSENSUS_ACCEPTING = frozenset({"SUPPORT", "SUPPORT_WITH_CONDITIONS"})
636
+
637
+
638
+ def _evaluate_consensus_positions(results: list[ReviewResult]) -> tuple[bool, str]:
639
+ """Evaluate consensus --check gate with schema-strict semantics.
640
+
641
+ Unlike ``_evaluate_verdicts``, this requires the ``position`` field
642
+ specifically (rejects ``passed``/``verdict`` fallbacks) and only
643
+ accepts SUPPORT / SUPPORT_WITH_CONDITIONS.
644
+
645
+ Returns:
646
+ (passed, reason) where reason is a diagnostic string for the check JSON.
647
+ """
648
+ from forge.core.reactive.structured_output import extract_json_from_response
649
+
650
+ if not results:
651
+ return False, "no results"
652
+
653
+ verdicts: list[tuple[bool, str]] = []
654
+ for result in results:
655
+ if not result.success:
656
+ verdicts.append((False, f"worker {result.model_name} failed"))
657
+ continue
658
+
659
+ parsed = extract_json_from_response(result.stdout)
660
+ if parsed is None or not isinstance(parsed, dict):
661
+ verdicts.append((False, f"worker {result.model_name} emitted no position"))
662
+ continue
663
+
664
+ if "position" not in parsed:
665
+ verdicts.append((False, f"worker {result.model_name} emitted JSON without position field"))
666
+ continue
667
+
668
+ v_str = str(parsed["position"]).upper()
669
+ v = v_str in _CONSENSUS_ACCEPTING
670
+ label = "supporting" if v else "opposing"
671
+ verdicts.append((v, f"worker {result.model_name} {label}"))
672
+
673
+ if all(v for v, _ in verdicts):
674
+ return True, f"all {len(verdicts)} positions supporting"
675
+
676
+ for v, reason in verdicts:
677
+ if not v:
678
+ return False, reason
679
+
680
+ raise AssertionError("unreachable: all() was False but no failing position found")
681
+
682
+
683
+ def _build_check_json(
684
+ output: MultiReviewOutput,
685
+ passed: bool,
686
+ reason: str,
687
+ resolved_models: dict[str, dict[str, Any]] | None = None,
688
+ routing_warnings: list[str] | None = None,
689
+ ) -> dict[str, Any]:
690
+ """Build JSON output for --check mode with gating fields."""
691
+ from forge.review.synthesis import build_json_dict
692
+
693
+ data = build_json_dict(output)
694
+ data["passed"] = passed
695
+ data["check_mode"] = "verdict"
696
+ data["reason"] = reason
697
+ if resolved_models:
698
+ data["resolved_models"] = resolved_models
699
+ if routing_warnings:
700
+ data["routing_warnings"] = routing_warnings
701
+ return data
702
+
703
+
704
+ def _handle_review_output(
705
+ ctx: click.Context,
706
+ output: MultiReviewOutput,
707
+ *,
708
+ check_mode: bool,
709
+ json_output: bool,
710
+ resolved_models: dict[str, dict[str, Any]] | None = None,
711
+ routing_warnings: list[str] | None = None,
712
+ ) -> None:
713
+ """Shared output handler for panel-based commands."""
714
+ from forge.review.synthesis import build_json_dict, format_synthesis_prompt
715
+
716
+ if check_mode:
717
+ passed, reason = _evaluate_verdicts(output.results)
718
+ data = _build_check_json(
719
+ output,
720
+ passed,
721
+ reason,
722
+ resolved_models=resolved_models,
723
+ routing_warnings=routing_warnings,
724
+ )
725
+ click.echo(json.dumps(data, indent=2))
726
+ ctx.exit(0 if passed else 1)
727
+ return
728
+
729
+ if json_output:
730
+ data = build_json_dict(output)
731
+ if resolved_models:
732
+ data["resolved_models"] = resolved_models
733
+ if routing_warnings:
734
+ data["routing_warnings"] = routing_warnings
735
+ click.echo(json.dumps(data, indent=2))
736
+ else:
737
+ click.echo(_format_resolved_models(resolved_models or {}) + format_synthesis_prompt(output))
738
+
739
+
740
+ # --- Analyze subcommand ---
741
+
742
+
743
+ @workflow_cmd.command(name="analyze")
744
+ @click.argument("topic", nargs=-1)
745
+ @click.option(
746
+ "-p",
747
+ "--prompt",
748
+ "prompt_text",
749
+ type=str,
750
+ default=None,
751
+ help="Topic to analyze (alternative to positional)",
752
+ )
753
+ @click.option(
754
+ "--models",
755
+ "-m",
756
+ type=str,
757
+ default="claude-opus",
758
+ help="Comma-separated model names (default: claude-opus)",
759
+ )
760
+ @click.option("--timeout", "-t", type=int, default=600, help="Per-model timeout in seconds")
761
+ @click.option("--json", "json_output", is_flag=True, help="Output structured JSON")
762
+ @click.option(
763
+ "--check",
764
+ "check_mode",
765
+ is_flag=True,
766
+ help="Gate on verdict: exit 0 if passed, exit 1 if failed",
767
+ )
768
+ @click.option("--proxy", "via", type=str, default=None, help="Route proxy-backed workers through this proxy")
769
+ @click.option("--cwd", type=click.Path(exists=True), default=None, help="Working directory")
770
+ @click.pass_context
771
+ def analyze(
772
+ ctx: click.Context,
773
+ topic: tuple[str, ...],
774
+ prompt_text: str | None,
775
+ models: str,
776
+ timeout: int,
777
+ json_output: bool,
778
+ check_mode: bool,
779
+ via: str | None,
780
+ cwd: str | None,
781
+ ) -> None:
782
+ """Deep structured analysis on a topic (single-model).
783
+
784
+ \b
785
+ Examples:
786
+ forge workflow analyze "Should we use event sourcing?"
787
+ forge workflow analyze -p "Evaluate migration strategy" --json
788
+ forge workflow analyze "Architecture review" --check
789
+ """
790
+ resolved_topic = " ".join(topic) if topic else prompt_text
791
+ if not resolved_topic:
792
+ console.print("[red]Error:[/red] No topic provided. Pass as argument or use -p.")
793
+ ctx.exit(2)
794
+ return
795
+
796
+ try:
797
+ specs = resolve_model_specs(models)
798
+ except ValueError as e:
799
+ console.print(f"[red]Error:[/red] {e}")
800
+ ctx.exit(2)
801
+ return
802
+
803
+ framework = _load_workflow_resource("thinkdeep.md")
804
+ combined_prompt = f"{framework}\n\n---\n\n## Topic to Analyze\n\n{resolved_topic}\n"
805
+
806
+ from forge.core.reactive.cost_tracking import (
807
+ resolve_proxy_urls_from_plan,
808
+ track_verb_cost,
809
+ )
810
+ from forge.review.engine import run_multi_review
811
+ from forge.review.routing import resolve_invocation_routing
812
+
813
+ try:
814
+ routing_plan = resolve_invocation_routing(specs, via=via)
815
+ except _ROUTING_ERRORS as e:
816
+ _handle_routing_error(e, json_output=json_output)
817
+ return
818
+
819
+ _run_preflight(specs, json_output=json_output, routing_plan=routing_plan)
820
+
821
+ with track_verb_cost("analyze", resolve_proxy_urls_from_plan(routing_plan)):
822
+ output = run_multi_review(
823
+ combined_prompt,
824
+ models=specs,
825
+ routing_plan=routing_plan,
826
+ timeout_seconds=timeout,
827
+ cwd=cwd or str(Path.cwd()),
828
+ )
829
+
830
+ _handle_review_output(
831
+ ctx,
832
+ output,
833
+ check_mode=check_mode,
834
+ json_output=json_output,
835
+ resolved_models=_resolved_models_summary(specs, routing_plan),
836
+ routing_warnings=_routing_plan_warnings(specs, routing_plan),
837
+ )
838
+
839
+
840
+ # --- Debate subcommand ---
841
+
842
+ _DEFAULT_PROPOSAL_STANCE_PROMPTS = {
843
+ "for": (
844
+ "You are evaluating this proposal as a SUPPORTER. "
845
+ "Identify strengths, viable implementation paths, and reasons to proceed. "
846
+ "Acknowledge genuine weaknesses but focus on how they can be addressed."
847
+ ),
848
+ "against": (
849
+ "You are evaluating this proposal as a CRITIC. "
850
+ "Attack on these specific vectors: "
851
+ "(1) correctness -- are there logical gaps, incorrect assumptions, or unstated prerequisites? "
852
+ "(2) feasibility -- can this actually be done with the stated constraints and resources? "
853
+ "(3) internal contradictions -- does the proposal contradict itself across sections? "
854
+ "(4) unstated assumptions -- what is being taken for granted without evidence? "
855
+ "(5) alternatives -- are there simpler or better-established approaches being ignored? "
856
+ "Acknowledge genuine strengths but focus relentlessly on potential problems."
857
+ ),
858
+ "neutral": (
859
+ "You are evaluating this proposal as a NEUTRAL ANALYST. "
860
+ "Weigh strengths against weaknesses objectively. "
861
+ "Provide a balanced assessment without advocating for or against."
862
+ ),
863
+ }
864
+
865
+ _DEFAULT_CODE_STANCE_PROMPTS = {
866
+ "for": (
867
+ "You are evaluating this code as a SUPPORTER. "
868
+ "Identify good design, correct implementations, and production readiness. "
869
+ "Acknowledge genuine issues but focus on what works well and why."
870
+ ),
871
+ "against": (
872
+ "You are evaluating this code as a CRITIC. "
873
+ "Attack on these specific vectors: "
874
+ "(1) correctness -- logic errors, edge cases, off-by-one, null handling? "
875
+ "(2) security -- injection, validation gaps, secrets, auth boundaries? "
876
+ "(3) performance -- unnecessary allocations, N+1 patterns, blocking in async? "
877
+ "(4) architecture -- coupling violations, wrong abstraction level, unstable contracts? "
878
+ "(5) test coverage -- are critical paths tested? are failure modes covered? "
879
+ "Acknowledge genuine strengths but focus relentlessly on potential problems."
880
+ ),
881
+ "neutral": (
882
+ "You are evaluating this code as a NEUTRAL ANALYST. "
883
+ "Weigh quality, security, performance, and architecture objectively. "
884
+ "Provide a balanced assessment with specific file:line evidence."
885
+ ),
886
+ }
887
+
888
+ _STANCE_CYCLE = ["for", "against", "neutral"]
889
+
890
+ # Debate evaluation template (canonical copy in src/skills/debate/resources/debate_evaluation.md).
891
+ # Embedded here so the CLI doesn't depend on skill installation.
892
+ _DEBATE_EVALUATION_TEMPLATE = """\
893
+ # Structured Evaluation
894
+
895
+ ```xml
896
+ <role>
897
+ You are a technical evaluator performing a structured assessment.
898
+ {stance_prompt}
899
+ </role>
900
+
901
+ <behavior>
902
+ - Evaluate strictly on technical merits
903
+ - Support every claim with evidence or reasoning
904
+ - Be specific: cite exact trade-offs, not vague concerns
905
+ - Provide a clear verdict with confidence level
906
+ </behavior>
907
+ ```
908
+
909
+ ---
910
+
911
+ ## Proposal Under Evaluation
912
+
913
+ {proposal}
914
+
915
+ ---
916
+
917
+ ## Evaluation Framework
918
+
919
+ ### 1. Feasibility
920
+
921
+ - Can this be implemented with the available technology and resources?
922
+ - What are the key technical dependencies?
923
+ - Are there proven precedents or is this novel?
924
+
925
+ ### 2. Correctness
926
+
927
+ - Does the proposal solve the stated problem?
928
+ - Are there logical gaps or incorrect assumptions?
929
+ - Does it handle edge cases and failure modes?
930
+
931
+ ### 3. Trade-offs
932
+
933
+ - What does this approach gain vs alternatives?
934
+ - What does it cost (complexity, performance, maintenance)?
935
+ - Are the trade-offs appropriate for the context?
936
+
937
+ ### 4. Risks
938
+
939
+ - What could go wrong in implementation?
940
+ - What could go wrong in production?
941
+ - What is the blast radius of failure?
942
+
943
+ ### 5. Completeness
944
+
945
+ - Are all requirements addressed?
946
+ - Are there missing considerations?
947
+ - What would need to be added before this is production-ready?
948
+
949
+ ### 6. Alternatives
950
+
951
+ - What other approaches could solve this problem?
952
+ - Why might they be better or worse?
953
+
954
+ ### 7. Recommendation
955
+
956
+ - Overall verdict: ACCEPT, ACCEPT_WITH_CONDITIONS, or REJECT
957
+ - Confidence level: LOW, MEDIUM, HIGH
958
+ - Key conditions (if ACCEPT_WITH_CONDITIONS)
959
+
960
+ ---
961
+
962
+ ## Output Format
963
+
964
+ ````xml
965
+ <output_format>
966
+ Respond with a structured evaluation in JSON:
967
+
968
+ {
969
+ "verdict": "ACCEPT" | "ACCEPT_WITH_CONDITIONS" | "REJECT",
970
+ "confidence": "LOW" | "MEDIUM" | "HIGH",
971
+ "key_findings": [
972
+ {"category": "feasibility|correctness|trade-offs|risks|completeness",
973
+ "finding": "specific finding",
974
+ "severity": "critical|high|medium|low"}
975
+ ],
976
+ "recommendation": "1-2 sentence summary of your recommendation",
977
+ "conditions": ["condition 1", "condition 2"]
978
+ }
979
+
980
+ Wrap the JSON in a ```json code fence.
981
+ </output_format>
982
+ ````
983
+ """
984
+
985
+ # Code debate evaluation template (canonical copy in src/skills/debate/resources/code_debate_evaluation.md).
986
+ # Embedded here so the CLI doesn't depend on skill installation.
987
+ _CODE_DEBATE_EVALUATION_TEMPLATE = """\
988
+ # Adversarial Code Evaluation
989
+
990
+ ```xml
991
+ <role>
992
+ You are a senior code evaluator performing a structured adversarial assessment.
993
+ {stance_prompt}
994
+ You identify bugs, design issues, security concerns, and performance problems.
995
+ You provide actionable feedback with specific code references.
996
+ </role>
997
+
998
+ <behavior>
999
+ - Read all code in scope before forming opinions
1000
+ - Cite specific file:line references for every finding
1001
+ - Evaluate strictly on technical merits
1002
+ - Support every claim with evidence or reasoning
1003
+ - Cover ALL files in ONE pass -- do not present partial results
1004
+ - Be specific: "potential null dereference at auth.py:45" not "might have issues"
1005
+ - Provide a clear verdict with confidence level
1006
+ </behavior>
1007
+
1008
+ <scope_constraints>
1009
+ - Review only what's in scope
1010
+ - Do not expand to adjacent code unless directly affected
1011
+ - If tests exist for reviewed code, check them for coverage gaps
1012
+ </scope_constraints>
1013
+ ```
1014
+
1015
+ ---
1016
+
1017
+ ## Code Under Evaluation
1018
+
1019
+ {target}
1020
+
1021
+ ---
1022
+
1023
+ ## Evaluation Framework
1024
+
1025
+ ### 1. Quality
1026
+
1027
+ - Logic errors and edge cases
1028
+ - Error handling: are errors caught, propagated, and surfaced correctly?
1029
+ - Type safety: do type annotations match runtime behavior?
1030
+ - Test coverage: are critical paths tested?
1031
+
1032
+ ### 2. Security
1033
+
1034
+ - Input validation at trust boundaries
1035
+ - Injection vectors (command, SQL, path traversal)
1036
+ - Secrets in code or logs
1037
+ - Authentication and authorization gaps
1038
+
1039
+ ### 3. Performance
1040
+
1041
+ - Unnecessary allocations or copies in hot paths
1042
+ - N+1 query patterns
1043
+ - Missing caching where data is reused
1044
+ - Blocking calls in async contexts
1045
+
1046
+ ### 4. Architecture
1047
+
1048
+ - Component boundaries: is coupling appropriate?
1049
+ - Dependency direction: do imports flow the right way?
1050
+ - Abstraction level: is complexity in the right place?
1051
+ - Interface contracts: are public APIs stable and well-defined?
1052
+
1053
+ ### 5. Risks
1054
+
1055
+ - What could go wrong in production?
1056
+ - What is the blast radius of failure?
1057
+ - Missing error recovery or graceful degradation?
1058
+ - Deployment or migration risks?
1059
+
1060
+ ### 6. Recommendation
1061
+
1062
+ - Overall verdict: ACCEPT, ACCEPT_WITH_CONDITIONS, or REJECT
1063
+ - Confidence level: LOW, MEDIUM, HIGH
1064
+ - Key conditions (if ACCEPT_WITH_CONDITIONS)
1065
+
1066
+ ---
1067
+
1068
+ ## Output Format
1069
+
1070
+ ````xml
1071
+ <output_format>
1072
+ Respond with a structured evaluation in JSON:
1073
+
1074
+ {
1075
+ "verdict": "ACCEPT" | "ACCEPT_WITH_CONDITIONS" | "REJECT",
1076
+ "confidence": "LOW" | "MEDIUM" | "HIGH",
1077
+ "key_findings": [
1078
+ {"category": "quality|security|performance|architecture|risks",
1079
+ "finding": "specific finding with file:line reference",
1080
+ "severity": "critical|high|medium|low"}
1081
+ ],
1082
+ "recommendation": "1-2 sentence summary of your recommendation",
1083
+ "conditions": ["condition 1", "condition 2"]
1084
+ }
1085
+
1086
+ Wrap the JSON in a ```json code fence.
1087
+ </output_format>
1088
+ ````
1089
+ """
1090
+
1091
+
1092
+ def _resolve_debate_prompt(
1093
+ subject: tuple[str, ...],
1094
+ prompt: str | None,
1095
+ code_mode: bool,
1096
+ ) -> str | None:
1097
+ """Resolve prompt for debate command. Priority: -p > subject+framework > stdin.
1098
+
1099
+ Unlike panel, all inputs are wrapped in a template because the adversarial
1100
+ runner requires ``{stance_prompt}`` in the resource file.
1101
+ """
1102
+ resolved = prompt or (" ".join(subject) if subject else None)
1103
+ if not resolved and not sys.stdin.isatty():
1104
+ resolved = sys.stdin.read().strip() or None
1105
+
1106
+ if not resolved:
1107
+ return None
1108
+
1109
+ if code_mode:
1110
+ return _CODE_DEBATE_EVALUATION_TEMPLATE.replace("{target}", resolved)
1111
+ return _DEBATE_EVALUATION_TEMPLATE.replace("{proposal}", resolved)
1112
+
1113
+
1114
+ @workflow_cmd.command(name="debate")
1115
+ @click.argument("subject", nargs=-1)
1116
+ @click.option(
1117
+ "-p",
1118
+ "--prompt",
1119
+ "prompt_text",
1120
+ type=str,
1121
+ default=None,
1122
+ help="Subject to evaluate (alternative to positional)",
1123
+ )
1124
+ @click.option(
1125
+ "--code",
1126
+ "code_mode",
1127
+ is_flag=True,
1128
+ help="Use code evaluation framework (default: proposal evaluation)",
1129
+ )
1130
+ @click.option(
1131
+ "--models",
1132
+ "-m",
1133
+ type=str,
1134
+ default=None,
1135
+ help="Comma-separated model names (default: all)",
1136
+ )
1137
+ @click.option("--timeout", "-t", type=int, default=600, help="Per-model timeout in seconds")
1138
+ @click.option("--json", "json_output", is_flag=True, help="Output structured JSON")
1139
+ @click.option("--check", "check_mode", is_flag=True, help="Gate on verdicts: any REJECT exits 1")
1140
+ @click.option(
1141
+ "--worker",
1142
+ "workers",
1143
+ multiple=True,
1144
+ type=str,
1145
+ help='Worker spec: model:stance or model:"custom prompt" (repeatable)',
1146
+ )
1147
+ @click.option("--proxy", "via", type=str, default=None, help="Route proxy-backed workers through this proxy")
1148
+ @click.option("--cwd", type=click.Path(exists=True), default=None, help="Working directory")
1149
+ @click.pass_context
1150
+ def debate(
1151
+ ctx: click.Context,
1152
+ subject: tuple[str, ...],
1153
+ prompt_text: str | None,
1154
+ code_mode: bool,
1155
+ models: str | None,
1156
+ timeout: int,
1157
+ json_output: bool,
1158
+ check_mode: bool,
1159
+ workers: tuple[str, ...],
1160
+ via: str | None,
1161
+ cwd: str | None,
1162
+ ) -> None:
1163
+ """Adversarial evaluation with stance-injected workers.
1164
+
1165
+ Each model receives the evaluation template with its assigned stance prompt
1166
+ injected via {stance_prompt} replacement. Models are assigned stances
1167
+ cyclically: for, against, neutral.
1168
+
1169
+ Use --worker for explicit model:stance mapping or custom prompts.
1170
+
1171
+ Blinding is mandatory -- workers never see conversation context.
1172
+
1173
+ \b
1174
+ Examples:
1175
+ forge workflow debate "Should we use event sourcing?" --json
1176
+ forge workflow debate src/forge/cli/ --code --check
1177
+ forge workflow debate --worker gpt-5.5:for --worker "claude-opus:Focus on security" "proposal"
1178
+ """
1179
+ from forge.review.adversarial import run_adversarial, validate_resource
1180
+
1181
+ if workers and models:
1182
+ console.print("[red]Error:[/red] --worker and --models are mutually exclusive.")
1183
+ ctx.exit(2)
1184
+ return
1185
+
1186
+ resolved = _resolve_debate_prompt(subject, prompt_text, code_mode)
1187
+ if not resolved:
1188
+ label = "target" if code_mode else "subject"
1189
+ console.print(f"[red]Error:[/red] No {label} provided. Pass as argument or use -p.")
1190
+ ctx.exit(2)
1191
+ return
1192
+
1193
+ # Write filled evaluation resource to a temp file for the adversarial runner
1194
+ tmp_file = None
1195
+ try:
1196
+ tmp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False)
1197
+ tmp_file.write(resolved)
1198
+ tmp_file.close()
1199
+ resource_path = tmp_file.name
1200
+
1201
+ try:
1202
+ validate_resource(resource_path)
1203
+ except ValueError as e:
1204
+ console.print(f"[red]Error:[/red] {e}")
1205
+ ctx.exit(2)
1206
+ return
1207
+
1208
+ if workers:
1209
+ try:
1210
+ stances = _parse_worker_specs(workers, code_mode=code_mode)
1211
+ except ValueError as e:
1212
+ console.print(f"[red]Error:[/red] {e}")
1213
+ ctx.exit(2)
1214
+ return
1215
+ else:
1216
+ try:
1217
+ specs = resolve_model_specs(models)
1218
+ except ValueError as e:
1219
+ console.print(f"[red]Error:[/red] {e}")
1220
+ ctx.exit(2)
1221
+ return
1222
+ stances = _build_stances(specs, code_mode=code_mode)
1223
+
1224
+ from forge.core.reactive.cost_tracking import (
1225
+ resolve_proxy_urls_from_plan,
1226
+ track_verb_cost,
1227
+ )
1228
+ from forge.review.routing import resolve_invocation_routing
1229
+
1230
+ stance_models = [s.model for s in stances]
1231
+ try:
1232
+ routing_plan = resolve_invocation_routing(stance_models, via=via)
1233
+ except _ROUTING_ERRORS as e:
1234
+ _handle_routing_error(e, json_output=json_output)
1235
+ return
1236
+
1237
+ _run_preflight(stance_models, json_output=json_output, routing_plan=routing_plan)
1238
+
1239
+ with track_verb_cost("debate", resolve_proxy_urls_from_plan(routing_plan)):
1240
+ output = run_adversarial(
1241
+ resource_path,
1242
+ stances,
1243
+ timeout_seconds=timeout,
1244
+ cwd=cwd or str(Path.cwd()),
1245
+ routing_plan=routing_plan,
1246
+ )
1247
+ finally:
1248
+ if tmp_file is not None:
1249
+ Path(tmp_file.name).unlink(missing_ok=True)
1250
+
1251
+ debate_warnings = _routing_plan_warnings(stance_models, routing_plan)
1252
+ debate_resolved_models = _resolved_models_summary(
1253
+ stance_models,
1254
+ routing_plan,
1255
+ worker_ids=[result.model_name for result in output.results],
1256
+ roles=output.stance_map,
1257
+ role_field="stance",
1258
+ )
1259
+
1260
+ if check_mode:
1261
+ passed, reason = _evaluate_verdicts(output.results)
1262
+ data = _build_adversarial_json(
1263
+ output,
1264
+ passed=passed,
1265
+ check_mode_str="verdict",
1266
+ reason=reason,
1267
+ resolved_models=debate_resolved_models,
1268
+ routing_warnings=debate_warnings,
1269
+ )
1270
+ click.echo(json.dumps(data, indent=2))
1271
+ ctx.exit(0 if passed else 1)
1272
+ return
1273
+
1274
+ if json_output:
1275
+ data = _build_adversarial_json(
1276
+ output,
1277
+ resolved_models=debate_resolved_models,
1278
+ routing_warnings=debate_warnings,
1279
+ )
1280
+ click.echo(json.dumps(data, indent=2))
1281
+ else:
1282
+ _print_debate_text(output, debate_resolved_models)
1283
+
1284
+
1285
+ def _build_stances(specs: list[ModelSpec], *, code_mode: bool = False) -> list[StanceSpec]:
1286
+ """Assign stances cyclically to model specs."""
1287
+ prompts = _DEFAULT_CODE_STANCE_PROMPTS if code_mode else _DEFAULT_PROPOSAL_STANCE_PROMPTS
1288
+ stances: list[StanceSpec] = []
1289
+ for i, spec in enumerate(specs):
1290
+ stance = _STANCE_CYCLE[i % len(_STANCE_CYCLE)]
1291
+ stances.append(
1292
+ StanceSpec(
1293
+ stance=stance,
1294
+ stance_prompt=prompts[stance],
1295
+ model=spec,
1296
+ )
1297
+ )
1298
+ return stances
1299
+
1300
+
1301
+ def _parse_worker_specs(worker_args: tuple[str, ...] | list[str], *, code_mode: bool = False) -> list[StanceSpec]:
1302
+ """Parse --worker arguments into StanceSpec list.
1303
+
1304
+ Formats:
1305
+ model:stance — stock stance (for/against/neutral)
1306
+ model:custom text — custom prompt (anything not a known stance)
1307
+
1308
+ Shells strip quotes before Click sees them, so ``model:"Focus on X"``
1309
+ arrives as ``model:Focus on X``. The parser treats any RHS that is not
1310
+ a known stance name as a custom prompt — no quote detection needed.
1311
+
1312
+ Raises ValueError for unknown models or missing colon.
1313
+ """
1314
+ from forge.review.models import AVAILABLE_MODELS
1315
+
1316
+ prompts = _DEFAULT_CODE_STANCE_PROMPTS if code_mode else _DEFAULT_PROPOSAL_STANCE_PROMPTS
1317
+ stances: list[StanceSpec] = []
1318
+ for arg in worker_args:
1319
+ if ":" not in arg:
1320
+ raise ValueError(f"Invalid --worker '{arg}'. Expected model:stance or model:custom prompt.")
1321
+
1322
+ model_name, rest = arg.split(":", 1)
1323
+ model_name = model_name.strip()
1324
+
1325
+ if model_name not in AVAILABLE_MODELS:
1326
+ available = list(AVAILABLE_MODELS.keys())
1327
+ raise ValueError(f"Unknown model '{model_name}'. Available: {available}")
1328
+
1329
+ spec = AVAILABLE_MODELS[model_name]
1330
+ rest = rest.strip()
1331
+
1332
+ # Strip optional surrounding quotes (may survive in some shell contexts)
1333
+ if len(rest) >= 2 and rest[0] in ('"', "'") and rest[-1] == rest[0]:
1334
+ rest = rest[1:-1]
1335
+
1336
+ if not rest:
1337
+ raise ValueError(f"Empty stance/prompt for model '{model_name}'.")
1338
+
1339
+ if rest in prompts:
1340
+ stances.append(
1341
+ StanceSpec(
1342
+ stance=rest,
1343
+ stance_prompt=prompts[rest],
1344
+ model=spec,
1345
+ )
1346
+ )
1347
+ else:
1348
+ # Anything not a known stance is a custom prompt
1349
+ label = rest[:30] + ("..." if len(rest) > 30 else "")
1350
+ stances.append(
1351
+ StanceSpec(
1352
+ stance="custom",
1353
+ stance_prompt=rest,
1354
+ model=spec,
1355
+ display_label=label,
1356
+ )
1357
+ )
1358
+
1359
+ return stances
1360
+
1361
+
1362
+ def _build_adversarial_json(
1363
+ output: AdversarialOutput,
1364
+ *,
1365
+ passed: bool | None = None,
1366
+ check_mode_str: str | None = None,
1367
+ reason: str | None = None,
1368
+ resolved_models: dict[str, dict[str, Any]] | None = None,
1369
+ routing_warnings: list[str] | None = None,
1370
+ ) -> dict[str, Any]:
1371
+ """Build JSON output for adversarial evaluation."""
1372
+ data: dict[str, Any] = {
1373
+ "resource_path": "(generated)",
1374
+ "stances": output.stances,
1375
+ "results": {
1376
+ r.model_name: {
1377
+ "stance": output.stance_map.get(r.model_name, "unknown"),
1378
+ "response": r.stdout if r.success else None,
1379
+ "error": r.error,
1380
+ "duration_seconds": round(r.duration_seconds, 2),
1381
+ "success": r.success,
1382
+ }
1383
+ for r in output.results
1384
+ },
1385
+ "successful": output.successful,
1386
+ "failed": output.failed,
1387
+ }
1388
+ if resolved_models:
1389
+ data["resolved_models"] = resolved_models
1390
+ if passed is not None:
1391
+ data["passed"] = passed
1392
+ if check_mode_str is not None:
1393
+ data["check_mode"] = check_mode_str
1394
+ if reason is not None:
1395
+ data["reason"] = reason
1396
+ if routing_warnings:
1397
+ data["routing_warnings"] = routing_warnings
1398
+ return data
1399
+
1400
+
1401
+ def _print_debate_text(output: AdversarialOutput, resolved_models: dict[str, dict[str, Any]] | None = None) -> None:
1402
+ """Print adversarial results as human-readable text."""
1403
+ console.print(f"\n[bold]Adversarial Evaluation[/bold] ({len(output.results)} workers)\n")
1404
+ if resolved_models:
1405
+ console.print(_format_resolved_models(resolved_models).rstrip())
1406
+ console.print()
1407
+
1408
+ for i, result in enumerate(output.results):
1409
+ stance = output.stances[i] if i < len(output.stances) else "unknown"
1410
+ header = f"[cyan]{result.model_name}[/cyan] ([dim]{stance}[/dim])"
1411
+ if result.success:
1412
+ console.print(f"--- {header} ---")
1413
+ console.print(result.stdout)
1414
+ console.print()
1415
+ else:
1416
+ console.print(f"--- {header} [red]FAILED[/red] ---")
1417
+ console.print(f"[red]{result.error}[/red]\n")
1418
+
1419
+
1420
+ # --- Consensus subcommand ---
1421
+
1422
+ _PROPOSAL_ROLE_CYCLE = ["architecture", "security", "correctness"]
1423
+ _CODE_ROLE_CYCLE = ["architecture", "security", "maintainability"]
1424
+
1425
+ _CONSENSUS_EVALUATION_TEMPLATE = """\
1426
+ # Consensus Evaluation
1427
+
1428
+ ```xml
1429
+ <role>
1430
+ You are a technical expert participating in a multi-perspective consensus process.
1431
+ {role_prompt}
1432
+ </role>
1433
+
1434
+ <behavior>
1435
+ - Evaluate from your assigned perspective
1436
+ - Support every claim with evidence or reasoning
1437
+ - Be specific about trade-offs and constraints
1438
+ - Identify both strengths and weaknesses from your viewpoint
1439
+ - Provide a clear position with confidence level
1440
+ </behavior>
1441
+ ```
1442
+
1443
+ ---
1444
+
1445
+ ## Subject Under Evaluation
1446
+
1447
+ {subject}
1448
+
1449
+ ---
1450
+
1451
+ ## Evaluation Framework
1452
+
1453
+ ### 1. Assessment from Your Perspective
1454
+
1455
+ - What are the key considerations from your assigned viewpoint?
1456
+ - What risks or opportunities do you see that others might miss?
1457
+
1458
+ ### 2. Strengths
1459
+
1460
+ - What aspects of this proposal align well with your area of focus?
1461
+
1462
+ ### 3. Concerns
1463
+
1464
+ - What issues or risks do you identify from your perspective?
1465
+ - How severe are they? What is the mitigation path?
1466
+
1467
+ ### 4. Recommendation
1468
+
1469
+ - Your position: SUPPORT, SUPPORT_WITH_CONDITIONS, or OPPOSE
1470
+ - Confidence level: LOW, MEDIUM, HIGH
1471
+ - Key conditions (if SUPPORT_WITH_CONDITIONS)
1472
+
1473
+ ---
1474
+
1475
+ ## Output Format
1476
+
1477
+ ````xml
1478
+ <output_format>
1479
+ Respond with your assessment in JSON:
1480
+
1481
+ {
1482
+ "position": "SUPPORT" | "SUPPORT_WITH_CONDITIONS" | "OPPOSE",
1483
+ "confidence": "LOW" | "MEDIUM" | "HIGH",
1484
+ "key_points": [
1485
+ {"category": "strength|concern|risk|opportunity",
1486
+ "point": "specific finding from your perspective",
1487
+ "severity": "critical|high|medium|low"}
1488
+ ],
1489
+ "recommendation": "1-2 sentence summary from your perspective",
1490
+ "conditions": ["condition 1", "condition 2"]
1491
+ }
1492
+
1493
+ Wrap the JSON in a ```json code fence.
1494
+ </output_format>
1495
+ ````
1496
+ """
1497
+
1498
+ _CODE_CONSENSUS_EVALUATION_TEMPLATE = """\
1499
+ # Code Consensus Evaluation
1500
+
1501
+ ```xml
1502
+ <role>
1503
+ You are a senior code evaluator participating in a multi-perspective consensus process.
1504
+ {role_prompt}
1505
+ You identify issues and opportunities from your assigned perspective.
1506
+ You provide actionable feedback with specific code references.
1507
+ </role>
1508
+
1509
+ <behavior>
1510
+ - Read all code in scope before forming opinions
1511
+ - Cite specific file:line references for every finding
1512
+ - Evaluate from your assigned perspective
1513
+ - Support every claim with evidence or reasoning
1514
+ - Cover ALL files in ONE pass -- do not present partial results
1515
+ - Be specific: "potential null dereference at auth.py:45" not "might have issues"
1516
+ - Provide a clear position with confidence level
1517
+ </behavior>
1518
+
1519
+ <scope_constraints>
1520
+ - Review only what's in scope
1521
+ - Do not expand to adjacent code unless directly affected
1522
+ - If tests exist for reviewed code, check them for coverage gaps
1523
+ </scope_constraints>
1524
+ ```
1525
+
1526
+ ---
1527
+
1528
+ ## Code Under Evaluation
1529
+
1530
+ {target}
1531
+
1532
+ ---
1533
+
1534
+ ## Evaluation Framework
1535
+
1536
+ ### 1. Quality
1537
+
1538
+ - Logic errors and edge cases
1539
+ - Error handling: are errors caught, propagated, and surfaced correctly?
1540
+ - Type safety: do type annotations match runtime behavior?
1541
+ - Test coverage: are critical paths tested?
1542
+
1543
+ ### 2. Security
1544
+
1545
+ - Input validation at trust boundaries
1546
+ - Injection vectors (command, SQL, path traversal)
1547
+ - Secrets in code or logs
1548
+ - Authentication and authorization gaps
1549
+
1550
+ ### 3. Performance
1551
+
1552
+ - Unnecessary allocations or copies in hot paths
1553
+ - N+1 query patterns
1554
+ - Missing caching where data is reused
1555
+ - Blocking calls in async contexts
1556
+
1557
+ ### 4. Architecture
1558
+
1559
+ - Component boundaries: is coupling appropriate?
1560
+ - Dependency direction: do imports flow the right way?
1561
+ - Abstraction level: is complexity in the right place?
1562
+ - Interface contracts: are public APIs stable and well-defined?
1563
+
1564
+ ### 5. Recommendation
1565
+
1566
+ - Your position: SUPPORT, SUPPORT_WITH_CONDITIONS, or OPPOSE
1567
+ - Confidence level: LOW, MEDIUM, HIGH
1568
+ - Key conditions (if SUPPORT_WITH_CONDITIONS)
1569
+
1570
+ ---
1571
+
1572
+ ## Output Format
1573
+
1574
+ ````xml
1575
+ <output_format>
1576
+ Respond with your assessment in JSON:
1577
+
1578
+ {
1579
+ "position": "SUPPORT" | "SUPPORT_WITH_CONDITIONS" | "OPPOSE",
1580
+ "confidence": "LOW" | "MEDIUM" | "HIGH",
1581
+ "key_points": [
1582
+ {"category": "quality|security|performance|architecture|maintainability",
1583
+ "point": "specific finding with file:line reference",
1584
+ "severity": "critical|high|medium|low"}
1585
+ ],
1586
+ "recommendation": "1-2 sentence summary from your perspective",
1587
+ "conditions": ["condition 1", "condition 2"]
1588
+ }
1589
+
1590
+ Wrap the JSON in a ```json code fence.
1591
+ </output_format>
1592
+ ````
1593
+ """
1594
+
1595
+
1596
+ def _resolve_consensus_prompt(
1597
+ subject: tuple[str, ...],
1598
+ prompt: str | None,
1599
+ code_mode: bool,
1600
+ ) -> str | None:
1601
+ """Resolve prompt for consensus. Wraps subject in template with {role_prompt} marker."""
1602
+ resolved = prompt or (" ".join(subject) if subject else None)
1603
+ if not resolved and not sys.stdin.isatty():
1604
+ resolved = sys.stdin.read().strip() or None
1605
+
1606
+ if not resolved:
1607
+ return None
1608
+
1609
+ if code_mode:
1610
+ return _CODE_CONSENSUS_EVALUATION_TEMPLATE.replace("{target}", resolved)
1611
+ return _CONSENSUS_EVALUATION_TEMPLATE.replace("{subject}", resolved)
1612
+
1613
+
1614
+ def _build_consensus_roles(
1615
+ specs: list[ModelSpec],
1616
+ code_mode: bool,
1617
+ ) -> list[RoleSpec]:
1618
+ """Assign roles cyclically to model specs. Cycle depends on mode."""
1619
+ cycle = _CODE_ROLE_CYCLE if code_mode else _PROPOSAL_ROLE_CYCLE
1620
+ role_specs: list[RoleSpec] = []
1621
+ for i, spec in enumerate(specs):
1622
+ role_name = cycle[i % len(cycle)]
1623
+ role_specs.append(
1624
+ RoleSpec(
1625
+ role=role_name,
1626
+ role_prompt=NAMED_ROLES[role_name],
1627
+ model=spec,
1628
+ )
1629
+ )
1630
+ return role_specs
1631
+
1632
+
1633
+ def _parse_consensus_worker_specs(
1634
+ worker_args: tuple[str, ...] | list[str],
1635
+ ) -> list[RoleSpec]:
1636
+ """Parse --worker arguments into RoleSpec list.
1637
+
1638
+ Formats:
1639
+ model:role -- named role (architecture, security, etc.)
1640
+ model:custom text -- custom role prompt
1641
+
1642
+ Raises ValueError for unknown models or missing colon.
1643
+ """
1644
+ from forge.review.models import AVAILABLE_MODELS
1645
+
1646
+ role_specs: list[RoleSpec] = []
1647
+ for arg in worker_args:
1648
+ if ":" not in arg:
1649
+ raise ValueError(f"Invalid --worker '{arg}'. Expected model:role or model:custom prompt.")
1650
+
1651
+ model_name, rest = arg.split(":", 1)
1652
+ model_name = model_name.strip()
1653
+
1654
+ if model_name not in AVAILABLE_MODELS:
1655
+ available = list(AVAILABLE_MODELS.keys())
1656
+ raise ValueError(f"Unknown model '{model_name}'. Available: {available}")
1657
+
1658
+ spec = AVAILABLE_MODELS[model_name]
1659
+ rest = rest.strip()
1660
+
1661
+ # Strip optional surrounding quotes (may survive in some shell contexts)
1662
+ if len(rest) >= 2 and rest[0] in ('"', "'") and rest[-1] == rest[0]:
1663
+ rest = rest[1:-1]
1664
+
1665
+ if not rest:
1666
+ raise ValueError(f"Empty role/prompt for model '{model_name}'.")
1667
+
1668
+ if rest in NAMED_ROLES:
1669
+ role_specs.append(RoleSpec(role=rest, role_prompt=NAMED_ROLES[rest], model=spec))
1670
+ else:
1671
+ label = rest[:30] + ("..." if len(rest) > 30 else "")
1672
+ role_specs.append(
1673
+ RoleSpec(
1674
+ role="custom",
1675
+ role_prompt=rest,
1676
+ model=spec,
1677
+ display_label=label,
1678
+ )
1679
+ )
1680
+
1681
+ return role_specs
1682
+
1683
+
1684
+ def _build_consensus_json(
1685
+ output: ConsensusOutput,
1686
+ *,
1687
+ passed: bool | None = None,
1688
+ check_mode_str: str | None = None,
1689
+ reason: str | None = None,
1690
+ resolved_models: dict[str, dict[str, Any]] | None = None,
1691
+ routing_warnings: list[str] | None = None,
1692
+ ) -> dict[str, Any]:
1693
+ """Build JSON output for consensus workflow."""
1694
+ data: dict[str, Any] = {
1695
+ "subject": output.subject,
1696
+ "roles": output.roles,
1697
+ "role_map": output.role_map,
1698
+ "round1": {
1699
+ r.model_name: {
1700
+ "role": output.role_map.get(r.model_name, "unknown"),
1701
+ "response": r.stdout if r.success else None,
1702
+ "error": r.error,
1703
+ "duration_seconds": round(r.duration_seconds, 2),
1704
+ "success": r.success,
1705
+ }
1706
+ for r in output.round1_results
1707
+ },
1708
+ "round2": {
1709
+ r.model_name: {
1710
+ "role": output.role_map.get(r.model_name, "unknown"),
1711
+ "response": r.stdout if r.success else None,
1712
+ "error": r.error,
1713
+ "duration_seconds": round(r.duration_seconds, 2),
1714
+ "success": r.success,
1715
+ }
1716
+ for r in output.round2_results
1717
+ },
1718
+ "reconciliation_brief": output.reconciliation_brief,
1719
+ "successful": output.successful,
1720
+ "failed": output.failed,
1721
+ }
1722
+ if resolved_models:
1723
+ data["resolved_models"] = resolved_models
1724
+ if passed is not None:
1725
+ data["passed"] = passed
1726
+ if check_mode_str is not None:
1727
+ data["check_mode"] = check_mode_str
1728
+ if reason is not None:
1729
+ data["reason"] = reason
1730
+ if routing_warnings:
1731
+ data["routing_warnings"] = routing_warnings
1732
+ return data
1733
+
1734
+
1735
+ def _print_consensus_text(output: ConsensusOutput, resolved_models: dict[str, dict[str, Any]] | None = None) -> None:
1736
+ """Print consensus results as structured human-readable text."""
1737
+ console.print(f"\n[bold]Consensus Workflow[/bold] " f"({len(output.round2_results)} workers, 2 rounds)\n")
1738
+ if resolved_models:
1739
+ console.print(_format_resolved_models(resolved_models).rstrip())
1740
+ console.print()
1741
+
1742
+ # Round 1 positions (truncated)
1743
+ console.print("[dim]Round 1: Initial Positions[/dim]\n")
1744
+ for result in output.round1_results:
1745
+ role = output.role_map.get(result.model_name, "unknown")
1746
+ header = f"[cyan]{result.model_name}[/cyan] ([dim]{role}[/dim])"
1747
+ if result.success:
1748
+ console.print(f"--- {header} ---")
1749
+ excerpt = result.stdout[:500]
1750
+ if len(result.stdout) > 500:
1751
+ excerpt += "..."
1752
+ console.print(excerpt)
1753
+ console.print()
1754
+ else:
1755
+ console.print(f"--- {header} [red]FAILED[/red] ---")
1756
+ console.print(f"[red]{result.error}[/red]\n")
1757
+
1758
+ # Reconciliation brief (dimmed)
1759
+ console.print("[dim]--- Reconciliation Brief ---[/dim]")
1760
+ console.print(f"[dim]{output.reconciliation_brief[:300]}...[/dim]\n")
1761
+
1762
+ # Round 2 recommendations (full)
1763
+ console.print("[dim]Round 2: Reconciliation[/dim]\n")
1764
+ for result in output.round2_results:
1765
+ role = output.role_map.get(result.model_name, "unknown")
1766
+ header = f"[cyan]{result.model_name}[/cyan] ([dim]{role}[/dim])"
1767
+ if result.success:
1768
+ console.print(f"--- {header} ---")
1769
+ console.print(result.stdout)
1770
+ console.print()
1771
+ else:
1772
+ console.print(f"--- {header} [red]FAILED[/red] ---")
1773
+ console.print(f"[red]{result.error}[/red]\n")
1774
+
1775
+ # Status line (execution status only; actual convergence is in the synthesis)
1776
+ completed = sum(1 for r in output.round2_results if r.success)
1777
+ total = len(output.round2_results)
1778
+ console.print(f"[bold]Completed: {completed}/{total} workers finished reconciliation[/bold]")
1779
+
1780
+
1781
+ @workflow_cmd.command(name="consensus")
1782
+ @click.argument("subject", nargs=-1)
1783
+ @click.option(
1784
+ "-p",
1785
+ "--prompt",
1786
+ "prompt_text",
1787
+ type=str,
1788
+ default=None,
1789
+ help="Subject to build consensus on (alternative to positional)",
1790
+ )
1791
+ @click.option(
1792
+ "--code",
1793
+ "code_mode",
1794
+ is_flag=True,
1795
+ help="Use code evaluation framework (default: proposal evaluation)",
1796
+ )
1797
+ @click.option(
1798
+ "--models",
1799
+ "-m",
1800
+ type=str,
1801
+ default=None,
1802
+ help="Comma-separated model names (default: all)",
1803
+ )
1804
+ @click.option(
1805
+ "--timeout",
1806
+ "-t",
1807
+ type=int,
1808
+ default=600,
1809
+ help="Per-round timeout in seconds (total wall time ~2x for two rounds)",
1810
+ )
1811
+ @click.option("--json", "json_output", is_flag=True, help="Output structured JSON")
1812
+ @click.option(
1813
+ "--check",
1814
+ "check_mode",
1815
+ is_flag=True,
1816
+ help="Gate on positions: exit 0 if all supporting, exit 1 otherwise",
1817
+ )
1818
+ @click.option(
1819
+ "--worker",
1820
+ "workers",
1821
+ multiple=True,
1822
+ type=str,
1823
+ help='Worker spec: model:role or model:"custom prompt" (repeatable)',
1824
+ )
1825
+ @click.option("--proxy", "via", type=str, default=None, help="Route proxy-backed workers through this proxy")
1826
+ @click.option("--cwd", type=click.Path(exists=True), default=None, help="Working directory")
1827
+ @click.pass_context
1828
+ def consensus(
1829
+ ctx: click.Context,
1830
+ subject: tuple[str, ...],
1831
+ prompt_text: str | None,
1832
+ code_mode: bool,
1833
+ models: str | None,
1834
+ timeout: int,
1835
+ json_output: bool,
1836
+ check_mode: bool,
1837
+ workers: tuple[str, ...],
1838
+ via: str | None,
1839
+ cwd: str | None,
1840
+ ) -> None:
1841
+ """Two-round consensus building with role-assigned workers.
1842
+
1843
+ Round 1: Each model evaluates the subject from an assigned role
1844
+ (architecture, security, etc.) independently.
1845
+ Round 2: Each model receives all Round 1 positions and produces
1846
+ a reconciled recommendation.
1847
+
1848
+ Default roles: architecture, security, correctness (proposals)
1849
+ or architecture, security, maintainability (code).
1850
+
1851
+ \b
1852
+ Examples:
1853
+ forge workflow consensus "Should we use event sourcing?" --json
1854
+ forge workflow consensus src/forge/cli/ --code --check
1855
+ forge workflow consensus --worker gpt-5.5:security --worker "claude-opus:Focus on DX" "proposal"
1856
+ """
1857
+ from forge.review.consensus import run_consensus, validate_resource
1858
+
1859
+ if workers and models:
1860
+ console.print("[red]Error:[/red] --worker and --models are mutually exclusive.")
1861
+ ctx.exit(2)
1862
+ return
1863
+
1864
+ # Resolve raw subject once (positional > -p > stdin) to avoid double-read
1865
+ raw_subject = prompt_text or (" ".join(subject) if subject else None)
1866
+ if not raw_subject and not sys.stdin.isatty():
1867
+ raw_subject = sys.stdin.read().strip() or None
1868
+
1869
+ resolved = _resolve_consensus_prompt((), raw_subject, code_mode)
1870
+ if not resolved:
1871
+ label = "target" if code_mode else "subject"
1872
+ console.print(f"[red]Error:[/red] No {label} provided. Pass as argument or use -p.")
1873
+ ctx.exit(2)
1874
+ return
1875
+
1876
+ tmp_file = None
1877
+ try:
1878
+ tmp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False)
1879
+ tmp_file.write(resolved)
1880
+ tmp_file.close()
1881
+ resource_path = tmp_file.name
1882
+
1883
+ try:
1884
+ validate_resource(resource_path)
1885
+ except ValueError as e:
1886
+ console.print(f"[red]Error:[/red] {e}")
1887
+ ctx.exit(2)
1888
+ return
1889
+
1890
+ if workers:
1891
+ try:
1892
+ role_specs = _parse_consensus_worker_specs(workers)
1893
+ except ValueError as e:
1894
+ console.print(f"[red]Error:[/red] {e}")
1895
+ ctx.exit(2)
1896
+ return
1897
+ else:
1898
+ try:
1899
+ specs = resolve_model_specs(models)
1900
+ except ValueError as e:
1901
+ console.print(f"[red]Error:[/red] {e}")
1902
+ ctx.exit(2)
1903
+ return
1904
+ role_specs = _build_consensus_roles(specs, code_mode)
1905
+
1906
+ from forge.core.reactive.cost_tracking import (
1907
+ resolve_proxy_urls_from_plan,
1908
+ track_verb_cost,
1909
+ )
1910
+ from forge.review.routing import resolve_invocation_routing
1911
+
1912
+ role_models = [r.model for r in role_specs]
1913
+ try:
1914
+ routing_plan = resolve_invocation_routing(role_models, via=via)
1915
+ except _ROUTING_ERRORS as e:
1916
+ _handle_routing_error(e, json_output=json_output)
1917
+ return
1918
+
1919
+ _run_preflight(role_models, json_output=json_output, routing_plan=routing_plan)
1920
+
1921
+ with track_verb_cost("consensus", resolve_proxy_urls_from_plan(routing_plan)):
1922
+ output = run_consensus(
1923
+ resource_path,
1924
+ role_specs,
1925
+ timeout_seconds=timeout,
1926
+ cwd=cwd or str(Path.cwd()),
1927
+ original_subject=raw_subject or "",
1928
+ routing_plan=routing_plan,
1929
+ )
1930
+ finally:
1931
+ if tmp_file is not None:
1932
+ Path(tmp_file.name).unlink(missing_ok=True)
1933
+
1934
+ consensus_warnings = _routing_plan_warnings(role_models, routing_plan)
1935
+ consensus_resolved_models = _resolved_models_summary(
1936
+ role_models,
1937
+ routing_plan,
1938
+ worker_ids=[result.model_name for result in output.round1_results],
1939
+ roles=output.role_map,
1940
+ )
1941
+
1942
+ if check_mode:
1943
+ passed, reason = _evaluate_consensus_positions(output.round2_results)
1944
+ data = _build_consensus_json(
1945
+ output,
1946
+ passed=passed,
1947
+ check_mode_str="position",
1948
+ reason=reason,
1949
+ resolved_models=consensus_resolved_models,
1950
+ routing_warnings=consensus_warnings,
1951
+ )
1952
+ click.echo(json.dumps(data, indent=2))
1953
+ ctx.exit(0 if passed else 1)
1954
+ return
1955
+
1956
+ if json_output:
1957
+ data = _build_consensus_json(
1958
+ output,
1959
+ resolved_models=consensus_resolved_models,
1960
+ routing_warnings=consensus_warnings,
1961
+ )
1962
+ click.echo(json.dumps(data, indent=2))
1963
+ else:
1964
+ _print_consensus_text(output, consensus_resolved_models)