multi-forge 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (311) hide show
  1. forge/__init__.py +3 -0
  2. forge/_extensions/agents/.gitkeep +0 -0
  3. forge/_extensions/commands/.gitkeep +0 -0
  4. forge/_extensions/skills/analyze/SKILL.md +87 -0
  5. forge/_extensions/skills/challenge/SKILL.md +91 -0
  6. forge/_extensions/skills/consensus/SKILL.md +120 -0
  7. forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
  8. forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
  9. forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
  10. forge/_extensions/skills/debate/SKILL.md +116 -0
  11. forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
  12. forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
  13. forge/_extensions/skills/panel/SKILL.md +141 -0
  14. forge/_extensions/skills/panel/resources/synthesis.md +103 -0
  15. forge/_extensions/skills/qa/SKILL.md +704 -0
  16. forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
  17. forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
  18. forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
  19. forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
  20. forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
  21. forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
  22. forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
  23. forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
  24. forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
  25. forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
  26. forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
  27. forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
  28. forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
  29. forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
  30. forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
  31. forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
  32. forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
  33. forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
  34. forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
  35. forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
  36. forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
  37. forge/_extensions/skills/qa/resources/checklist.md +103 -0
  38. forge/_extensions/skills/qa/resources/report-template.md +62 -0
  39. forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
  40. forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
  41. forge/_extensions/skills/review/SKILL.md +125 -0
  42. forge/_extensions/skills/review/references/claude-4.6.md +474 -0
  43. forge/_extensions/skills/review/references/claude-4.7.md +710 -0
  44. forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
  45. forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
  46. forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
  47. forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
  48. forge/_extensions/skills/review/resources/code-gemini.md +184 -0
  49. forge/_extensions/skills/review/resources/code-openai.md +203 -0
  50. forge/_extensions/skills/review/resources/code.md +160 -0
  51. forge/_extensions/skills/review-docs/SKILL.md +121 -0
  52. forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
  53. forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
  54. forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
  55. forge/_extensions/skills/review-docs/resources/docs.md +170 -0
  56. forge/_extensions/skills/smoke-test/SKILL.md +27 -0
  57. forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
  58. forge/_extensions/skills/understand/SKILL.md +148 -0
  59. forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
  60. forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
  61. forge/_extensions/skills/understand/resources/code-openai.md +181 -0
  62. forge/_extensions/skills/understand/resources/code.md +163 -0
  63. forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
  64. forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
  65. forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
  66. forge/_extensions/skills/understand/resources/docs.md +177 -0
  67. forge/_extensions/skills/walkthrough/SKILL.md +599 -0
  68. forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
  69. forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
  70. forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
  71. forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
  72. forge/backend/__init__.py +174 -0
  73. forge/backend/adapters/__init__.py +38 -0
  74. forge/backend/adapters/litellm.py +158 -0
  75. forge/backend/creation.py +89 -0
  76. forge/backend/registry.py +178 -0
  77. forge/cli/__init__.py +16 -0
  78. forge/cli/auth.py +483 -0
  79. forge/cli/backend.py +298 -0
  80. forge/cli/claude.py +411 -0
  81. forge/cli/config_cmd.py +303 -0
  82. forge/cli/extensions.py +1001 -0
  83. forge/cli/gc.py +165 -0
  84. forge/cli/guard.py +1018 -0
  85. forge/cli/guards.py +106 -0
  86. forge/cli/handoff.py +110 -0
  87. forge/cli/hooks/__init__.py +36 -0
  88. forge/cli/hooks/_group.py +20 -0
  89. forge/cli/hooks/_helpers.py +149 -0
  90. forge/cli/hooks/commands.py +1677 -0
  91. forge/cli/hooks/direct_commands.py +1304 -0
  92. forge/cli/hooks/install.py +232 -0
  93. forge/cli/hooks/policy.py +151 -0
  94. forge/cli/hooks/read_hygiene.py +74 -0
  95. forge/cli/hooks/verification.py +370 -0
  96. forge/cli/logs.py +406 -0
  97. forge/cli/main.py +292 -0
  98. forge/cli/proxy.py +1821 -0
  99. forge/cli/proxy_costs.py +313 -0
  100. forge/cli/search.py +416 -0
  101. forge/cli/session.py +892 -0
  102. forge/cli/session_addendum.py +81 -0
  103. forge/cli/session_fork.py +750 -0
  104. forge/cli/session_handoff.py +141 -0
  105. forge/cli/session_lifecycle.py +2053 -0
  106. forge/cli/session_manage.py +1336 -0
  107. forge/cli/session_memory.py +201 -0
  108. forge/cli/status_line.py +1398 -0
  109. forge/cli/workflow.py +1964 -0
  110. forge/config/__init__.py +110 -0
  111. forge/config/dataclass_utils.py +88 -0
  112. forge/config/defaults/__init__.py +0 -0
  113. forge/config/defaults/backends/__init__.py +0 -0
  114. forge/config/defaults/backends/litellm.yaml +196 -0
  115. forge/config/defaults/templates/__init__.py +0 -0
  116. forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
  117. forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
  118. forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
  119. forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
  120. forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
  121. forge/config/defaults/templates/litellm-gemini.yaml +21 -0
  122. forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
  123. forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
  124. forge/config/defaults/templates/litellm-openai.yaml +28 -0
  125. forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
  126. forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
  127. forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
  128. forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
  129. forge/config/defaults/templates/openrouter-glm.yaml +23 -0
  130. forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
  131. forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
  132. forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
  133. forge/config/defaults/templates/openrouter-openai.yaml +28 -0
  134. forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
  135. forge/config/loader.py +675 -0
  136. forge/config/schema.py +448 -0
  137. forge/core/__init__.py +5 -0
  138. forge/core/auth/__init__.py +67 -0
  139. forge/core/auth/capabilities.py +219 -0
  140. forge/core/auth/credentials_file.py +244 -0
  141. forge/core/auth/protocols.py +18 -0
  142. forge/core/auth/secrets.py +243 -0
  143. forge/core/auth/template_secrets.py +112 -0
  144. forge/core/data/__init__.py +5 -0
  145. forge/core/data/model_catalog.yaml +1522 -0
  146. forge/core/data/pricing.yaml +140 -0
  147. forge/core/data/system_prompt_addendums/__init__.py +0 -0
  148. forge/core/data/system_prompt_addendums/gemini.md +330 -0
  149. forge/core/data/system_prompt_addendums/openai.md +328 -0
  150. forge/core/llm/__init__.py +231 -0
  151. forge/core/llm/clients/__init__.py +14 -0
  152. forge/core/llm/clients/base.py +115 -0
  153. forge/core/llm/clients/litellm.py +619 -0
  154. forge/core/llm/clients/openai_compat.py +244 -0
  155. forge/core/llm/clients/openrouter.py +234 -0
  156. forge/core/llm/credentials.py +439 -0
  157. forge/core/llm/detection.py +86 -0
  158. forge/core/llm/errors.py +44 -0
  159. forge/core/llm/protocols.py +80 -0
  160. forge/core/llm/types.py +176 -0
  161. forge/core/logging.py +146 -0
  162. forge/core/models/__init__.py +91 -0
  163. forge/core/models/catalog.py +467 -0
  164. forge/core/models/pricing.py +165 -0
  165. forge/core/models/types.py +167 -0
  166. forge/core/naming.py +212 -0
  167. forge/core/ops/__init__.py +73 -0
  168. forge/core/ops/context.py +141 -0
  169. forge/core/ops/gc.py +802 -0
  170. forge/core/ops/proxy.py +146 -0
  171. forge/core/ops/resolution.py +135 -0
  172. forge/core/ops/session.py +344 -0
  173. forge/core/ops/session_context.py +548 -0
  174. forge/core/paths.py +38 -0
  175. forge/core/process.py +54 -0
  176. forge/core/reactive/__init__.py +38 -0
  177. forge/core/reactive/cost_tracking.py +300 -0
  178. forge/core/reactive/env.py +180 -0
  179. forge/core/reactive/proxy.py +78 -0
  180. forge/core/reactive/routing.py +622 -0
  181. forge/core/reactive/session_runner.py +185 -0
  182. forge/core/reactive/structured_output.py +62 -0
  183. forge/core/reactive/tagger.py +94 -0
  184. forge/core/reactive/throttle.py +132 -0
  185. forge/core/state/__init__.py +59 -0
  186. forge/core/state/exceptions.py +59 -0
  187. forge/core/state/io.py +140 -0
  188. forge/core/state/lock.py +99 -0
  189. forge/core/state/timestamps.py +60 -0
  190. forge/core/transcript.py +78 -0
  191. forge/core/typing_helpers.py +24 -0
  192. forge/core/workqueue/__init__.py +67 -0
  193. forge/core/workqueue/queue.py +552 -0
  194. forge/core/workqueue/types.py +63 -0
  195. forge/guard/__init__.py +26 -0
  196. forge/guard/deterministic/__init__.py +26 -0
  197. forge/guard/deterministic/base.py +158 -0
  198. forge/guard/deterministic/coding_standards.py +256 -0
  199. forge/guard/deterministic/registry.py +148 -0
  200. forge/guard/deterministic/tdd.py +171 -0
  201. forge/guard/engine.py +216 -0
  202. forge/guard/protocols.py +91 -0
  203. forge/guard/queries.py +96 -0
  204. forge/guard/semantic/__init__.py +34 -0
  205. forge/guard/semantic/promotion.py +18 -0
  206. forge/guard/semantic/supervisor.py +813 -0
  207. forge/guard/semantic/verdict.py +183 -0
  208. forge/guard/store.py +124 -0
  209. forge/guard/team/__init__.py +6 -0
  210. forge/guard/team/config.py +24 -0
  211. forge/guard/team/handlers.py +209 -0
  212. forge/guard/team/prompts.py +41 -0
  213. forge/guard/types.py +125 -0
  214. forge/guard/workflow/__init__.py +17 -0
  215. forge/guard/workflow/branches.py +67 -0
  216. forge/guard/workflow/config.py +63 -0
  217. forge/guard/workflow/divergence.py +113 -0
  218. forge/guard/workflow/policy.py +87 -0
  219. forge/guard/workflow/stages.py +205 -0
  220. forge/install/__init__.py +55 -0
  221. forge/install/cli.py +281 -0
  222. forge/install/exceptions.py +163 -0
  223. forge/install/hooks.py +109 -0
  224. forge/install/installer.py +1037 -0
  225. forge/install/models.py +321 -0
  226. forge/install/preset.py +272 -0
  227. forge/install/settings_merge.py +831 -0
  228. forge/install/tracking.py +238 -0
  229. forge/install/version.py +141 -0
  230. forge/proxy/__init__.py +0 -0
  231. forge/proxy/base_client.py +181 -0
  232. forge/proxy/client_adapter.py +476 -0
  233. forge/proxy/client_factory.py +531 -0
  234. forge/proxy/converters.py +1206 -0
  235. forge/proxy/cost_logger.py +132 -0
  236. forge/proxy/cost_tracker.py +242 -0
  237. forge/proxy/data_models.py +338 -0
  238. forge/proxy/error_hints.py +92 -0
  239. forge/proxy/metrics.py +222 -0
  240. forge/proxy/model_spec.py +158 -0
  241. forge/proxy/proxies.py +333 -0
  242. forge/proxy/proxy_identity.py +134 -0
  243. forge/proxy/proxy_orchestrator.py +1018 -0
  244. forge/proxy/proxy_startup.py +54 -0
  245. forge/proxy/server.py +1561 -0
  246. forge/proxy/utils.py +537 -0
  247. forge/review/__init__.py +6 -0
  248. forge/review/adversarial.py +111 -0
  249. forge/review/consensus.py +236 -0
  250. forge/review/engine.py +356 -0
  251. forge/review/models.py +437 -0
  252. forge/review/resources/__init__.py +5 -0
  253. forge/review/resources/codereview-performance.md +85 -0
  254. forge/review/resources/codereview-quick.md +75 -0
  255. forge/review/resources/codereview-security.md +92 -0
  256. forge/review/resources/codereview.md +85 -0
  257. forge/review/resources/docreview-quick.md +75 -0
  258. forge/review/resources/docreview.md +86 -0
  259. forge/review/resources/thinkdeep.md +89 -0
  260. forge/review/routing.py +368 -0
  261. forge/review/synthesis.py +73 -0
  262. forge/runtime_config.py +438 -0
  263. forge/search/__init__.py +55 -0
  264. forge/search/bm25_store.py +264 -0
  265. forge/search/content_store.py +197 -0
  266. forge/search/engine.py +352 -0
  267. forge/search/exceptions.py +51 -0
  268. forge/search/extractor.py +234 -0
  269. forge/search/index_state.py +295 -0
  270. forge/search/store.py +215 -0
  271. forge/search/tokenizer.py +24 -0
  272. forge/session/__init__.py +130 -0
  273. forge/session/active.py +339 -0
  274. forge/session/artifacts.py +202 -0
  275. forge/session/claude/__init__.py +50 -0
  276. forge/session/claude/cleanup.py +105 -0
  277. forge/session/claude/invoke.py +236 -0
  278. forge/session/claude/paths.py +200 -0
  279. forge/session/cleanup.py +216 -0
  280. forge/session/config.py +34 -0
  281. forge/session/direct_model.py +107 -0
  282. forge/session/effective.py +169 -0
  283. forge/session/exceptions.py +255 -0
  284. forge/session/handoff.py +881 -0
  285. forge/session/handoff_agent.py +544 -0
  286. forge/session/hooks/__init__.py +35 -0
  287. forge/session/hooks/models.py +73 -0
  288. forge/session/hooks/session_start.py +507 -0
  289. forge/session/identity.py +84 -0
  290. forge/session/index.py +553 -0
  291. forge/session/manager.py +1506 -0
  292. forge/session/models.py +572 -0
  293. forge/session/overrides.py +344 -0
  294. forge/session/plan_resolution.py +286 -0
  295. forge/session/prev_sessions.py +128 -0
  296. forge/session/store.py +431 -0
  297. forge/session/validation.py +47 -0
  298. forge/session/worktree/__init__.py +65 -0
  299. forge/session/worktree/cleanup.py +262 -0
  300. forge/session/worktree/config_copy.py +203 -0
  301. forge/session/worktree/create.py +332 -0
  302. forge/sidecar/__init__.py +29 -0
  303. forge/sidecar/container.py +161 -0
  304. forge/sidecar/docker.py +86 -0
  305. forge/sidecar/secrets.py +19 -0
  306. multi_forge-0.2.0.dist-info/METADATA +242 -0
  307. multi_forge-0.2.0.dist-info/RECORD +311 -0
  308. multi_forge-0.2.0.dist-info/WHEEL +4 -0
  309. multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
  310. multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
  311. multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
forge/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """Multi-Forge - Multi-runtime agent toolkit."""
2
+
3
+ __version__ = "0.2.0"
File without changes
File without changes
@@ -0,0 +1,87 @@
1
+ ---
2
+ name: forge:analyze
3
+ description: Deep single-model analysis of a topic, question, or architectural decision.
4
+ disable-model-invocation: false
5
+ argument-hint: '[topic: path or question or instruction] [--output path] [--models model]'
6
+ context: fork
7
+ effort: high
8
+ allowed-tools: Bash, Read
9
+ ---
10
+
11
+ # Deep Analysis
12
+
13
+ Deep analysis of a topic, question, or architectural decision using a dedicated model worker.
14
+
15
+ ## Usage
16
+
17
+ ```
18
+ /forge:analyze [topic] [--models model]
19
+ ```
20
+
21
+ ## Arguments
22
+
23
+ | Argument | Required | Description |
24
+ | ---------- | -------- | -------------------------------------------------------------------------------------- |
25
+ | `topic` | Optional | Question, file path, directory, or instruction on what to analyze (defaults to asking) |
26
+ | `--models` | Optional | Comma-separated model list (default: claude-opus) |
27
+ | `--output` | Optional | Write result to file instead of conversation (e.g., `analysis.md`) |
28
+
29
+ **Available models:** !`forge workflow list-models`
30
+
31
+ Only use models with status **ready** in the table above. If the default set includes unavailable models, pass
32
+ `--models <ready models>` explicitly. If the user explicitly requested an unavailable model, stop and tell them what
33
+ proxy or credential is missing rather than silently substituting. If no models are ready, tell the user what's missing
34
+ and stop.
35
+
36
+ ---
37
+
38
+ ## Execution
39
+
40
+ ### Step 1: Resolve Topic and Flags
41
+
42
+ Parse `$ARGUMENTS` into a positional topic and optional flags. The topic is everything that is not a recognized flag
43
+ (question, file path, directory, or free-form instruction). Strip any leading `@` prefix on the topic. If no topic is
44
+ found, ask the user what they want to analyze.
45
+
46
+ Recognized flags (extract from `$ARGUMENTS` if present):
47
+
48
+ - `--models <value>` — comma-separated model list (default: claude-opus)
49
+ - `--output <path>` — write result to file instead of conversation
50
+
51
+ Never ask the user to clarify. If `$ARGUMENTS` contains anything, proceed immediately.
52
+
53
+ ### Step 2: Run Deep Analysis
54
+
55
+ ```bash
56
+ forge workflow analyze "the user's topic" [--models <models>] --json
57
+ ```
58
+
59
+ Omit `--models` if the user didn't specify (defaults to claude-opus).
60
+
61
+ If the command exits with a non-zero code or returns invalid JSON, report the error to the user and stop. Do not attempt
62
+ to parse partial output or fabricate a response.
63
+
64
+ ### Step 3: Present Analysis
65
+
66
+ Format the model's deep analysis as a structured response:
67
+
68
+ 0. Resolved model used: from `resolved_models`, include requested model, resolved model ref, provider, proxy, and
69
+ template
70
+ 1. Problem decomposition
71
+ 2. Key evidence and considerations
72
+ 3. Analysis and trade-offs
73
+ 4. Recommendations with rationale
74
+
75
+ If the model failed, report the error and suggest retrying.
76
+
77
+ **Output routing:** If `--output` was specified, write the complete analysis to that path using the Write tool (create
78
+ parent directories if needed). Print a one-line confirmation: `Wrote analysis to {path}`. Do not also print the full
79
+ result in the conversation. If `--output` was not specified, print the result in the conversation as usual.
80
+
81
+ ---
82
+
83
+ ## Requirements
84
+
85
+ - **Forge CLI**: `forge` must be on PATH
86
+ - **Claude CLI**: workflow workers run through local `claude -p`; `claude` must be on PATH in this Bash environment
87
+ - **Claude Opus**: Uses direct Anthropic (no proxy needed)
@@ -0,0 +1,91 @@
1
+ ---
2
+ name: forge:challenge
3
+ description: Pressure-test a claim, recommendation, or assumption. Defaults to skepticism.
4
+ argument-hint: '[claim or objection]'
5
+ effort: high
6
+ allowed-tools: Read, Grep, Glob, Bash, Agent
7
+ ---
8
+
9
+ # Challenge
10
+
11
+ Pressure-test a claim, recommendation, or assumption with adversarial skepticism.
12
+
13
+ ## Usage
14
+
15
+ ```
16
+ /forge:challenge [claim]
17
+ ```
18
+
19
+ ## Arguments
20
+
21
+ | Argument | Required | Description |
22
+ | -------- | -------- | ---------------------------------------------------------------------- |
23
+ | `claim` | Optional | Statement, objection, or question to pressure-test (inferred if empty) |
24
+
25
+ ---
26
+
27
+ ## Execution
28
+
29
+ ### Step 1: Resolve Claim
30
+
31
+ `$ARGUMENTS` is the claim to challenge. It should be a statement, objection, question, or instruction -- not a bare file
32
+ path. If it starts with `@`, strip the prefix (Claude Code file reference syntax).
33
+
34
+ If `$ARGUMENTS` is empty, infer the claim from the immediately preceding conversation context: the last recommendation,
35
+ decision, assertion, or proposed change. Only ask the user what to challenge if no prior claim is identifiable from
36
+ context.
37
+
38
+ Never ask the user to clarify if a claim was provided. If `$ARGUMENTS` contains anything, proceed immediately.
39
+
40
+ ### Step 2: Challenge
41
+
42
+ This skill defaults to **skepticism, not balance**. The starting posture is adversarial: assume the claim may be wrong
43
+ and try to prove that. Only soften to a balanced conclusion if the skeptical case genuinely fails.
44
+
45
+ If the challenge starts from a neutral or symmetrical frame, it provides no value over a standard analysis. The entire
46
+ point is targeted pressure-testing.
47
+
48
+ Execute these steps:
49
+
50
+ 1. **Restate the claim precisely.** What exactly is being asserted? Remove ambiguity.
51
+
52
+ 2. **Assume it is wrong.** Actively search for:
53
+
54
+ - Flaws in reasoning or hidden assumptions
55
+ - Counterexamples from the codebase or known constraints
56
+ - Missing edge cases or failure modes
57
+ - Simpler alternatives that would invalidate the complexity
58
+ - Contradictions with existing architecture or decisions
59
+
60
+ 3. **Investigate the repo.** Use Read, Grep, and Glob to find evidence. Check whether the claim holds against actual
61
+ code, tests, configuration, and documented decisions. Do not reason from first principles alone when evidence is
62
+ available.
63
+
64
+ 4. **Test the skeptical case.** Is the counterargument strong, or does it fall apart under scrutiny?
65
+
66
+ 5. **If the skeptical case fails,** explain clearly why the original claim survives. This is a valid and useful outcome
67
+ -- the claim is stronger for having been tested.
68
+
69
+ 6. **Return a verdict:**
70
+
71
+ - **Concern validated** -- the skeptical case holds; the claim has real problems
72
+ - **Partially validated** -- some aspects hold, others don't; specific issues identified
73
+ - **Concern not supported** -- the skeptical case failed; the claim survives scrutiny
74
+ - **Insufficient evidence** -- cannot determine either way from available information
75
+
76
+ ### Step 3: Format Output
77
+
78
+ Present the challenge as:
79
+
80
+ ```
81
+ ## Challenge: [restated claim]
82
+
83
+ ### Skeptical Case
84
+ [The strongest argument against the claim, with evidence]
85
+
86
+ ### Counter-Evidence
87
+ [What supports the claim, why the skeptical case fails (if it does)]
88
+
89
+ ### Verdict: [verdict]
90
+ [1-2 sentence summary of the conclusion]
91
+ ```
@@ -0,0 +1,120 @@
1
+ ---
2
+ name: forge:consensus
3
+ description: Multi-model consensus workflow. Role-assigned models converge toward a shared recommendation through two rounds of evaluation and reconciliation.
4
+ disable-model-invocation: true
5
+ argument-hint: '[subject: path or proposal or instruction] [--output path] [--code] [--models m1,m2] [--worker model:role]'
6
+ context: fork
7
+ effort: high
8
+ allowed-tools: Bash, Read
9
+ ---
10
+
11
+ # Consensus Workflow
12
+
13
+ Run a multi-model consensus workflow where role-assigned models build a shared recommendation through two rounds of
14
+ evaluation and reconciliation.
15
+
16
+ When invoked from Claude Code, execute the workflow now. Do not just restate these instructions, say "Command
17
+ completed", or ask the user to run the commands manually unless a real prerequisite is missing.
18
+
19
+ ## Usage
20
+
21
+ ```
22
+ /forge:consensus [subject] [--code] [--models model1,model2] [--worker model:role]
23
+ ```
24
+
25
+ ## Arguments
26
+
27
+ | Argument | Required | Description |
28
+ | ---------- | -------- | ---------------------------------------------------------------------------------- |
29
+ | `subject` | Optional | File, directory, proposal, or instruction on what to evaluate (defaults to cwd) |
30
+ | `--code` | Optional | Switch: use code evaluation framework (default: proposal) |
31
+ | `--models` | Optional | Comma-separated model list (default: Forge workflow defaults) |
32
+ | `--worker` | Optional | Repeatable: model:role or model:"custom prompt" (mutually exclusive with --models) |
33
+ | `--output` | Optional | Write result to file instead of conversation (e.g., `consensus.md`) |
34
+
35
+ **Available models:** !`forge workflow list-models`
36
+
37
+ Only use models with status **ready** in the table above. If the default set includes unavailable models, pass
38
+ `--models <ready models>` explicitly. If the user explicitly requested an unavailable model, stop and tell them what
39
+ proxy or credential is missing rather than silently substituting. If no models are ready, tell the user what's missing
40
+ and stop.
41
+
42
+ ---
43
+
44
+ ## Execution
45
+
46
+ ### Step 1: Resolve Subject and Flags
47
+
48
+ Parse `$ARGUMENTS` into a positional subject and optional flags. The subject is everything that is not a recognized flag
49
+ (file path, directory, proposal text, or free-form instruction). Strip any leading `@` prefix on the subject. If no
50
+ subject is found, default to the current working directory.
51
+
52
+ Recognized flags (extract from `$ARGUMENTS` if present):
53
+
54
+ - `--code` -- switch
55
+ - `--models <value>` -- comma-separated model list (mutually exclusive with --worker)
56
+ - `--worker <value>` -- repeatable: model:role or model:custom prompt
57
+ - `--output <path>` -- write result to file instead of conversation
58
+
59
+ Never ask the user to clarify. If `$ARGUMENTS` contains anything, proceed immediately.
60
+
61
+ ### Step 2: Run Consensus Workflow
62
+
63
+ ```bash
64
+ forge workflow consensus "<subject>" [--code] [--models <models>] [--worker <spec>]... --json
65
+ ```
66
+
67
+ Omit any flag the user didn't specify. Do not pass both `--models` and `--worker`.
68
+
69
+ Parse the JSON output. The workflow runs two rounds:
70
+
71
+ - **Round 1**: Each model independently evaluates the subject from their assigned role
72
+ - **Round 2**: Each model receives all Round 1 positions and produces a reconciled recommendation
73
+ - **Resolved models**: The `resolved_models` object records the requested model, actual routed model ref, provider,
74
+ proxy, template, and role for each worker
75
+
76
+ If the command fails, surface the real error and stop; do not claim success.
77
+
78
+ ### Step 3: Synthesize
79
+
80
+ Read `${CLAUDE_SKILL_DIR}/resources/synthesis.md` for synthesis instructions.
81
+
82
+ Apply the synthesis rules to produce a unified consensus report from both rounds of results. Start the report with a
83
+ "Resolved Models Used" section listing each worker from `resolved_models`, including requested model, resolved model
84
+ ref, provider, proxy, template, and role.
85
+
86
+ **Output routing:** If `--output` was specified, write the complete synthesis to that path using the Write tool (create
87
+ parent directories if needed). Print a one-line confirmation: `Wrote synthesis to {path}`. Do not also print the full
88
+ result in the conversation. If `--output` was not specified, print the result in the conversation as usual.
89
+
90
+ ---
91
+
92
+ ## Models and Roles
93
+
94
+ Models are assigned roles cyclically. Default roles differ by mode:
95
+
96
+ **Proposal mode** (default):
97
+
98
+ | Order | Default Model | Role | Focus |
99
+ | ----- | ---------------------- | ------------ | -------------------------------------------- |
100
+ | 1st | gpt-5.5 | architecture | Structural alignment, coupling, abstractions |
101
+ | 2nd | gemini-3.1-pro-preview | security | Vulnerabilities, trust boundaries, risks |
102
+ | 3rd | claude-opus | correctness | Logic errors, edge cases, invariants |
103
+
104
+ **Code mode** (`--code`):
105
+
106
+ | Order | Default Model | Role | Focus |
107
+ | ----- | ---------------------- | --------------- | -------------------------------------- |
108
+ | 1st | gpt-5.5 | architecture | Component boundaries, dependency flow |
109
+ | 2nd | gemini-3.1-pro-preview | security | Injection, auth, secrets, trust |
110
+ | 3rd | claude-opus | maintainability | Readability, complexity, test coverage |
111
+
112
+ Use `--models` to control which models participate. Use `--worker` for explicit model:role mapping.
113
+
114
+ **Available named roles:** architecture, security, correctness, maintainability, performance
115
+
116
+ ## Requirements
117
+
118
+ - **Forge CLI**: `forge` must be on PATH
119
+ - **Claude CLI**: workflow workers run through local `claude -p`; `claude` must be on PATH in this Bash environment
120
+ - **Proxies**: GPT-5.5 and Gemini require active proxies (`forge proxy create openrouter-openai`)
@@ -0,0 +1,94 @@
1
+ # Code Consensus Evaluation
2
+
3
+ ```xml
4
+ <role>
5
+ You are a senior code evaluator participating in a multi-perspective consensus process.
6
+ {role_prompt}
7
+ You identify issues and opportunities from your assigned perspective.
8
+ You provide actionable feedback with specific code references.
9
+ </role>
10
+
11
+ <behavior>
12
+ - Read all code in scope before forming opinions
13
+ - Cite specific file:line references for every finding
14
+ - Evaluate from your assigned perspective
15
+ - Support every claim with evidence or reasoning
16
+ - Cover ALL files in ONE pass -- do not present partial results
17
+ - Be specific: "potential null dereference at auth.py:45" not "might have issues"
18
+ - Provide a clear position with confidence level
19
+ </behavior>
20
+
21
+ <scope_constraints>
22
+ - Review only what's in scope
23
+ - Do not expand to adjacent code unless directly affected
24
+ - If tests exist for reviewed code, check them for coverage gaps
25
+ </scope_constraints>
26
+ ```
27
+
28
+ ---
29
+
30
+ ## Code Under Evaluation
31
+
32
+ {target}
33
+
34
+ ---
35
+
36
+ ## Evaluation Framework
37
+
38
+ ### 1. Quality
39
+
40
+ - Logic errors and edge cases
41
+ - Error handling: are errors caught, propagated, and surfaced correctly?
42
+ - Type safety: do type annotations match runtime behavior?
43
+ - Test coverage: are critical paths tested?
44
+
45
+ ### 2. Security
46
+
47
+ - Input validation at trust boundaries
48
+ - Injection vectors (command, SQL, path traversal)
49
+ - Secrets in code or logs
50
+ - Authentication and authorization gaps
51
+
52
+ ### 3. Performance
53
+
54
+ - Unnecessary allocations or copies in hot paths
55
+ - N+1 query patterns
56
+ - Missing caching where data is reused
57
+ - Blocking calls in async contexts
58
+
59
+ ### 4. Architecture
60
+
61
+ - Component boundaries: is coupling appropriate?
62
+ - Dependency direction: do imports flow the right way?
63
+ - Abstraction level: is complexity in the right place?
64
+ - Interface contracts: are public APIs stable and well-defined?
65
+
66
+ ### 5. Recommendation
67
+
68
+ - Your position: SUPPORT, SUPPORT_WITH_CONDITIONS, or OPPOSE
69
+ - Confidence level: LOW, MEDIUM, HIGH
70
+ - Key conditions (if SUPPORT_WITH_CONDITIONS)
71
+
72
+ ---
73
+
74
+ ## Output Format
75
+
76
+ ````xml
77
+ <output_format>
78
+ Respond with your assessment in JSON:
79
+
80
+ {
81
+ "position": "SUPPORT" | "SUPPORT_WITH_CONDITIONS" | "OPPOSE",
82
+ "confidence": "LOW" | "MEDIUM" | "HIGH",
83
+ "key_points": [
84
+ {"category": "quality|security|performance|architecture|maintainability",
85
+ "point": "specific finding with file:line reference",
86
+ "severity": "critical|high|medium|low"}
87
+ ],
88
+ "recommendation": "1-2 sentence summary from your perspective",
89
+ "conditions": ["condition 1", "condition 2"]
90
+ }
91
+
92
+ Wrap the JSON in a ```json code fence.
93
+ </output_format>
94
+ ````
@@ -0,0 +1,70 @@
1
+ # Consensus Evaluation
2
+
3
+ ```xml
4
+ <role>
5
+ You are a technical expert participating in a multi-perspective consensus process.
6
+ {role_prompt}
7
+ </role>
8
+
9
+ <behavior>
10
+ - Evaluate from your assigned perspective
11
+ - Support every claim with evidence or reasoning
12
+ - Be specific about trade-offs and constraints
13
+ - Identify both strengths and weaknesses from your viewpoint
14
+ - Provide a clear position with confidence level
15
+ </behavior>
16
+ ```
17
+
18
+ ---
19
+
20
+ ## Subject Under Evaluation
21
+
22
+ {subject}
23
+
24
+ ---
25
+
26
+ ## Evaluation Framework
27
+
28
+ ### 1. Assessment from Your Perspective
29
+
30
+ - What are the key considerations from your assigned viewpoint?
31
+ - What risks or opportunities do you see that others might miss?
32
+
33
+ ### 2. Strengths
34
+
35
+ - What aspects of this proposal align well with your area of focus?
36
+
37
+ ### 3. Concerns
38
+
39
+ - What issues or risks do you identify from your perspective?
40
+ - How severe are they? What is the mitigation path?
41
+
42
+ ### 4. Recommendation
43
+
44
+ - Your position: SUPPORT, SUPPORT_WITH_CONDITIONS, or OPPOSE
45
+ - Confidence level: LOW, MEDIUM, HIGH
46
+ - Key conditions (if SUPPORT_WITH_CONDITIONS)
47
+
48
+ ---
49
+
50
+ ## Output Format
51
+
52
+ ````xml
53
+ <output_format>
54
+ Respond with your assessment in JSON:
55
+
56
+ {
57
+ "position": "SUPPORT" | "SUPPORT_WITH_CONDITIONS" | "OPPOSE",
58
+ "confidence": "LOW" | "MEDIUM" | "HIGH",
59
+ "key_points": [
60
+ {"category": "strength|concern|risk|opportunity",
61
+ "point": "specific finding from your perspective",
62
+ "severity": "critical|high|medium|low"}
63
+ ],
64
+ "recommendation": "1-2 sentence summary from your perspective",
65
+ "conditions": ["condition 1", "condition 2"]
66
+ }
67
+
68
+ Wrap the JSON in a ```json code fence.
69
+ </output_format>
70
+ ````
@@ -0,0 +1,101 @@
1
+ # Consensus Synthesis Instructions
2
+
3
+ You have received results from a two-round consensus workflow. Round 1 contains independent positions from role-assigned
4
+ models. Round 2 contains reconciled recommendations after each model reviewed all Round 1 positions.
5
+
6
+ Your task is to synthesize these into a unified consensus report.
7
+
8
+ **Key principle**: The reconciliation process -- how and whether models converged -- is as valuable as the final
9
+ positions. Surface the dynamics, not just the outcomes.
10
+
11
+ ## Synthesis Framework
12
+
13
+ ### 1. Identify Points of Agreement
14
+
15
+ Recommendations that ALL perspectives converged on during reconciliation (Round 2). These are high-confidence findings.
16
+
17
+ ```markdown
18
+ ## Agreed Recommendations (High Confidence)
19
+
20
+ - **[Recommendation]** (all perspectives agree)
21
+ - Evidence: [supporting reasoning from multiple roles]
22
+ ```
23
+
24
+ ### 2. Identify Partial Agreement
25
+
26
+ Recommendations where MOST perspectives agree but with different emphasis or conditions:
27
+
28
+ ```markdown
29
+ ## Partially Agreed (Moderate Confidence)
30
+
31
+ - **[Recommendation]**
32
+ - Agreeing: [roles that support]
33
+ - Dissenting: [role] -- [reason for different emphasis]
34
+ - Conditions: [if applicable]
35
+ ```
36
+
37
+ ### 3. Identify Remaining Disagreements
38
+
39
+ Points where consensus was NOT reached after reconciliation. This should be the most detailed section -- remaining
40
+ disagreements expose genuine analytical uncertainty and are often the most valuable findings for the reader.
41
+
42
+ For each unresolved point:
43
+
44
+ - Which perspectives disagree and why
45
+ - How positions shifted (or hardened) between Round 1 and Round 2
46
+ - Which position has stronger evidence
47
+ - Whether the disagreement is fundamental or a matter of emphasis
48
+ - What the disagreement reveals about the underlying problem's complexity
49
+
50
+ ```markdown
51
+ ## No Consensus
52
+
53
+ - **[Point of disagreement]**
54
+ - [Role A]: [position and reasoning]
55
+ - [Role B]: [position and reasoning]
56
+ - Assessment: [which has stronger evidence, or why this is genuinely unresolvable]
57
+ ```
58
+
59
+ **Convergence dynamics**: For each recommendation in sections 1-3, briefly note how positions shifted between rounds.
60
+ Did Round 2 reconciliation move perspectives closer, or did it sharpen the disagreement? The trajectory matters as much
61
+ as the final position.
62
+
63
+ ### 4. Final Recommendation
64
+
65
+ Based on the synthesis above:
66
+
67
+ - If full consensus: State the shared recommendation with confidence level
68
+ - If partial consensus: State what was agreed, flag what was not, recommend which disputed position to follow and why
69
+ - If no consensus: Explicitly state "NO CONSENSUS" and explain the fundamental disagreements that prevented convergence
70
+
71
+ ### 5. Confidence Assessment
72
+
73
+ - Consensus strength: strong | moderate | weak | none
74
+ - What would strengthen the consensus?
75
+ - What caveats apply?
76
+
77
+ ## Output Format
78
+
79
+ ```markdown
80
+ # Consensus Report: [Subject]
81
+
82
+ ## Summary
83
+ - Models consulted: N
84
+ - Roles: [list]
85
+ - Consensus strength: strong|moderate|weak|none
86
+
87
+ ## Agreed Recommendations (High Confidence)
88
+ [...]
89
+
90
+ ## Partially Agreed (Moderate Confidence)
91
+ [...]
92
+
93
+ ## No Consensus
94
+ [...]
95
+
96
+ ## Overall Recommendation
97
+ [...]
98
+
99
+ ## Confidence and Caveats
100
+ [...]
101
+ ```
@@ -0,0 +1,116 @@
1
+ ---
2
+ name: forge:debate
3
+ description: Adversarial multi-model evaluation. Models argue for, against, and neutrally about a subject.
4
+ disable-model-invocation: true
5
+ argument-hint: '[subject: path or proposal or instruction] [--output path] [--code] [--models m1,m2] [--worker model:stance]'
6
+ context: fork
7
+ effort: high
8
+ allowed-tools: Bash, Read
9
+ ---
10
+
11
+ # Debate Evaluation
12
+
13
+ Run an adversarial multi-model evaluation where models argue for, against, and neutrally about a subject.
14
+
15
+ When invoked from Claude Code, execute the workflow now. Do not just restate these instructions, say "Command
16
+ completed", or ask the user to run the commands manually unless a real prerequisite is missing.
17
+
18
+ ## Usage
19
+
20
+ ```
21
+ /forge:debate [subject] [--code] [--models model1,model2]
22
+ ```
23
+
24
+ ## Arguments
25
+
26
+ | Argument | Required | Description |
27
+ | ---------- | -------- | ------------------------------------------------------------------------------------ |
28
+ | `subject` | Optional | File, directory, proposal, or instruction on what to evaluate (defaults to cwd) |
29
+ | `--code` | Optional | Switch: use code evaluation framework (default: proposal) |
30
+ | `--models` | Optional | Comma-separated model list (default: Forge workflow defaults) |
31
+ | `--worker` | Optional | Repeatable: model:stance or model:"custom prompt" (mutually exclusive with --models) |
32
+ | `--output` | Optional | Write result to file instead of conversation (e.g., `debate.md`) |
33
+
34
+ **Available models:** !`forge workflow list-models`
35
+
36
+ Only use models with status **ready** in the table above. If the default set includes unavailable models, pass
37
+ `--models <ready models>` explicitly. If the user explicitly requested an unavailable model, stop and tell them what
38
+ proxy or credential is missing rather than silently substituting. If no models are ready, tell the user what's missing
39
+ and stop.
40
+
41
+ ---
42
+
43
+ ## Execution
44
+
45
+ ### Step 1: Resolve Subject and Flags
46
+
47
+ Parse `$ARGUMENTS` into a positional subject and optional flags. The subject is everything that is not a recognized flag
48
+ (file path, directory, proposal text, or free-form instruction). Strip any leading `@` prefix on the subject. If no
49
+ subject is found, default to the current working directory.
50
+
51
+ Recognized flags (extract from `$ARGUMENTS` if present):
52
+
53
+ - `--code` — switch
54
+ - `--models <value>` — comma-separated model list (mutually exclusive with --worker)
55
+ - `--worker <value>` — repeatable: model:stance or model:custom prompt
56
+ - `--output <path>` — write result to file instead of conversation
57
+
58
+ Never ask the user to clarify. If `$ARGUMENTS` contains anything, proceed immediately.
59
+
60
+ ### Step 2: Run Adversarial Evaluation
61
+
62
+ ```bash
63
+ forge workflow debate "<subject>" [--code] [--models <models>] [--worker <spec>]... --json
64
+ ```
65
+
66
+ Omit any flag the user didn't specify. Do not pass both `--models` and `--worker`.
67
+
68
+ Parse the JSON output. Each model receives a different stance (for/against/neutral) and evaluates the subject from that
69
+ perspective. The `resolved_models` object records the requested model, actual routed model ref, provider, proxy,
70
+ template, and stance for each worker. If the command fails, surface the real error and stop; do not claim success.
71
+
72
+ ### Step 3: Synthesize
73
+
74
+ Combine the perspectives:
75
+
76
+ 0. **Resolved models used**: one line per worker from `resolved_models`, including requested model, resolved model ref,
77
+ provider, proxy, template, and stance
78
+ 1. **Points of agreement** across all stances
79
+ 2. **Key disagreements** and which stance has stronger evidence
80
+ 3. **Risk assessment** from the critic's perspective
81
+ 4. **Viability assessment** from the supporter's perspective
82
+ 5. **Overall recommendation** with confidence level
83
+
84
+ Make it clear which parts came from agreement across stances versus which parts remain disputed.
85
+
86
+ **Output routing:** If `--output` was specified, write the complete synthesis to that path using the Write tool (create
87
+ parent directories if needed). Print a one-line confirmation: `Wrote synthesis to {path}`. Do not also print the full
88
+ result in the conversation. If `--output` was not specified, print the result in the conversation as usual.
89
+
90
+ ---
91
+
92
+ ## Models and Roles
93
+
94
+ Models are assigned stances cyclically. Default models:
95
+
96
+ | Order | Default Model | Stance | Role |
97
+ | ----- | ---------------------- | ------- | ------------------------ |
98
+ | 1st | gpt-5.5 | FOR | Supporter -- strengths |
99
+ | 2nd | gemini-3.1-pro-preview | AGAINST | Critic -- risks |
100
+ | 3rd | claude-opus | NEUTRAL | Analyst -- balanced view |
101
+
102
+ Use `--models` to control which models participate. Stances cycle through for/against/neutral in order.
103
+
104
+ ## Code Mode
105
+
106
+ When `--code` is specified, models evaluate the target code from adversarial perspectives:
107
+
108
+ - **FOR** stance: Identifies good design, correct implementations, production readiness
109
+ - **AGAINST** stance: Identifies bugs, security issues, performance problems, architectural flaws
110
+ - **NEUTRAL** stance: Balanced assessment of code quality with file:line evidence
111
+
112
+ ## Requirements
113
+
114
+ - **Forge CLI**: `forge` must be on PATH
115
+ - **Claude CLI**: workflow workers run through local `claude -p`; `claude` must be on PATH in this Bash environment
116
+ - **Proxies**: GPT-5.5 and Gemini require active proxies (`forge proxy create openrouter-openai`)