multi-forge 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (311) hide show
  1. forge/__init__.py +3 -0
  2. forge/_extensions/agents/.gitkeep +0 -0
  3. forge/_extensions/commands/.gitkeep +0 -0
  4. forge/_extensions/skills/analyze/SKILL.md +87 -0
  5. forge/_extensions/skills/challenge/SKILL.md +91 -0
  6. forge/_extensions/skills/consensus/SKILL.md +120 -0
  7. forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
  8. forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
  9. forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
  10. forge/_extensions/skills/debate/SKILL.md +116 -0
  11. forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
  12. forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
  13. forge/_extensions/skills/panel/SKILL.md +141 -0
  14. forge/_extensions/skills/panel/resources/synthesis.md +103 -0
  15. forge/_extensions/skills/qa/SKILL.md +704 -0
  16. forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
  17. forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
  18. forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
  19. forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
  20. forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
  21. forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
  22. forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
  23. forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
  24. forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
  25. forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
  26. forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
  27. forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
  28. forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
  29. forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
  30. forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
  31. forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
  32. forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
  33. forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
  34. forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
  35. forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
  36. forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
  37. forge/_extensions/skills/qa/resources/checklist.md +103 -0
  38. forge/_extensions/skills/qa/resources/report-template.md +62 -0
  39. forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
  40. forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
  41. forge/_extensions/skills/review/SKILL.md +125 -0
  42. forge/_extensions/skills/review/references/claude-4.6.md +474 -0
  43. forge/_extensions/skills/review/references/claude-4.7.md +710 -0
  44. forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
  45. forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
  46. forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
  47. forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
  48. forge/_extensions/skills/review/resources/code-gemini.md +184 -0
  49. forge/_extensions/skills/review/resources/code-openai.md +203 -0
  50. forge/_extensions/skills/review/resources/code.md +160 -0
  51. forge/_extensions/skills/review-docs/SKILL.md +121 -0
  52. forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
  53. forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
  54. forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
  55. forge/_extensions/skills/review-docs/resources/docs.md +170 -0
  56. forge/_extensions/skills/smoke-test/SKILL.md +27 -0
  57. forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
  58. forge/_extensions/skills/understand/SKILL.md +148 -0
  59. forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
  60. forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
  61. forge/_extensions/skills/understand/resources/code-openai.md +181 -0
  62. forge/_extensions/skills/understand/resources/code.md +163 -0
  63. forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
  64. forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
  65. forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
  66. forge/_extensions/skills/understand/resources/docs.md +177 -0
  67. forge/_extensions/skills/walkthrough/SKILL.md +599 -0
  68. forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
  69. forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
  70. forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
  71. forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
  72. forge/backend/__init__.py +174 -0
  73. forge/backend/adapters/__init__.py +38 -0
  74. forge/backend/adapters/litellm.py +158 -0
  75. forge/backend/creation.py +89 -0
  76. forge/backend/registry.py +178 -0
  77. forge/cli/__init__.py +16 -0
  78. forge/cli/auth.py +483 -0
  79. forge/cli/backend.py +298 -0
  80. forge/cli/claude.py +411 -0
  81. forge/cli/config_cmd.py +303 -0
  82. forge/cli/extensions.py +1001 -0
  83. forge/cli/gc.py +165 -0
  84. forge/cli/guard.py +1018 -0
  85. forge/cli/guards.py +106 -0
  86. forge/cli/handoff.py +110 -0
  87. forge/cli/hooks/__init__.py +36 -0
  88. forge/cli/hooks/_group.py +20 -0
  89. forge/cli/hooks/_helpers.py +149 -0
  90. forge/cli/hooks/commands.py +1677 -0
  91. forge/cli/hooks/direct_commands.py +1304 -0
  92. forge/cli/hooks/install.py +232 -0
  93. forge/cli/hooks/policy.py +151 -0
  94. forge/cli/hooks/read_hygiene.py +74 -0
  95. forge/cli/hooks/verification.py +370 -0
  96. forge/cli/logs.py +406 -0
  97. forge/cli/main.py +292 -0
  98. forge/cli/proxy.py +1821 -0
  99. forge/cli/proxy_costs.py +313 -0
  100. forge/cli/search.py +416 -0
  101. forge/cli/session.py +892 -0
  102. forge/cli/session_addendum.py +81 -0
  103. forge/cli/session_fork.py +750 -0
  104. forge/cli/session_handoff.py +141 -0
  105. forge/cli/session_lifecycle.py +2053 -0
  106. forge/cli/session_manage.py +1336 -0
  107. forge/cli/session_memory.py +201 -0
  108. forge/cli/status_line.py +1398 -0
  109. forge/cli/workflow.py +1964 -0
  110. forge/config/__init__.py +110 -0
  111. forge/config/dataclass_utils.py +88 -0
  112. forge/config/defaults/__init__.py +0 -0
  113. forge/config/defaults/backends/__init__.py +0 -0
  114. forge/config/defaults/backends/litellm.yaml +196 -0
  115. forge/config/defaults/templates/__init__.py +0 -0
  116. forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
  117. forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
  118. forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
  119. forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
  120. forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
  121. forge/config/defaults/templates/litellm-gemini.yaml +21 -0
  122. forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
  123. forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
  124. forge/config/defaults/templates/litellm-openai.yaml +28 -0
  125. forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
  126. forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
  127. forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
  128. forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
  129. forge/config/defaults/templates/openrouter-glm.yaml +23 -0
  130. forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
  131. forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
  132. forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
  133. forge/config/defaults/templates/openrouter-openai.yaml +28 -0
  134. forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
  135. forge/config/loader.py +675 -0
  136. forge/config/schema.py +448 -0
  137. forge/core/__init__.py +5 -0
  138. forge/core/auth/__init__.py +67 -0
  139. forge/core/auth/capabilities.py +219 -0
  140. forge/core/auth/credentials_file.py +244 -0
  141. forge/core/auth/protocols.py +18 -0
  142. forge/core/auth/secrets.py +243 -0
  143. forge/core/auth/template_secrets.py +112 -0
  144. forge/core/data/__init__.py +5 -0
  145. forge/core/data/model_catalog.yaml +1522 -0
  146. forge/core/data/pricing.yaml +140 -0
  147. forge/core/data/system_prompt_addendums/__init__.py +0 -0
  148. forge/core/data/system_prompt_addendums/gemini.md +330 -0
  149. forge/core/data/system_prompt_addendums/openai.md +328 -0
  150. forge/core/llm/__init__.py +231 -0
  151. forge/core/llm/clients/__init__.py +14 -0
  152. forge/core/llm/clients/base.py +115 -0
  153. forge/core/llm/clients/litellm.py +619 -0
  154. forge/core/llm/clients/openai_compat.py +244 -0
  155. forge/core/llm/clients/openrouter.py +234 -0
  156. forge/core/llm/credentials.py +439 -0
  157. forge/core/llm/detection.py +86 -0
  158. forge/core/llm/errors.py +44 -0
  159. forge/core/llm/protocols.py +80 -0
  160. forge/core/llm/types.py +176 -0
  161. forge/core/logging.py +146 -0
  162. forge/core/models/__init__.py +91 -0
  163. forge/core/models/catalog.py +467 -0
  164. forge/core/models/pricing.py +165 -0
  165. forge/core/models/types.py +167 -0
  166. forge/core/naming.py +212 -0
  167. forge/core/ops/__init__.py +73 -0
  168. forge/core/ops/context.py +141 -0
  169. forge/core/ops/gc.py +802 -0
  170. forge/core/ops/proxy.py +146 -0
  171. forge/core/ops/resolution.py +135 -0
  172. forge/core/ops/session.py +344 -0
  173. forge/core/ops/session_context.py +548 -0
  174. forge/core/paths.py +38 -0
  175. forge/core/process.py +54 -0
  176. forge/core/reactive/__init__.py +38 -0
  177. forge/core/reactive/cost_tracking.py +300 -0
  178. forge/core/reactive/env.py +180 -0
  179. forge/core/reactive/proxy.py +78 -0
  180. forge/core/reactive/routing.py +622 -0
  181. forge/core/reactive/session_runner.py +185 -0
  182. forge/core/reactive/structured_output.py +62 -0
  183. forge/core/reactive/tagger.py +94 -0
  184. forge/core/reactive/throttle.py +132 -0
  185. forge/core/state/__init__.py +59 -0
  186. forge/core/state/exceptions.py +59 -0
  187. forge/core/state/io.py +140 -0
  188. forge/core/state/lock.py +99 -0
  189. forge/core/state/timestamps.py +60 -0
  190. forge/core/transcript.py +78 -0
  191. forge/core/typing_helpers.py +24 -0
  192. forge/core/workqueue/__init__.py +67 -0
  193. forge/core/workqueue/queue.py +552 -0
  194. forge/core/workqueue/types.py +63 -0
  195. forge/guard/__init__.py +26 -0
  196. forge/guard/deterministic/__init__.py +26 -0
  197. forge/guard/deterministic/base.py +158 -0
  198. forge/guard/deterministic/coding_standards.py +256 -0
  199. forge/guard/deterministic/registry.py +148 -0
  200. forge/guard/deterministic/tdd.py +171 -0
  201. forge/guard/engine.py +216 -0
  202. forge/guard/protocols.py +91 -0
  203. forge/guard/queries.py +96 -0
  204. forge/guard/semantic/__init__.py +34 -0
  205. forge/guard/semantic/promotion.py +18 -0
  206. forge/guard/semantic/supervisor.py +813 -0
  207. forge/guard/semantic/verdict.py +183 -0
  208. forge/guard/store.py +124 -0
  209. forge/guard/team/__init__.py +6 -0
  210. forge/guard/team/config.py +24 -0
  211. forge/guard/team/handlers.py +209 -0
  212. forge/guard/team/prompts.py +41 -0
  213. forge/guard/types.py +125 -0
  214. forge/guard/workflow/__init__.py +17 -0
  215. forge/guard/workflow/branches.py +67 -0
  216. forge/guard/workflow/config.py +63 -0
  217. forge/guard/workflow/divergence.py +113 -0
  218. forge/guard/workflow/policy.py +87 -0
  219. forge/guard/workflow/stages.py +205 -0
  220. forge/install/__init__.py +55 -0
  221. forge/install/cli.py +281 -0
  222. forge/install/exceptions.py +163 -0
  223. forge/install/hooks.py +109 -0
  224. forge/install/installer.py +1037 -0
  225. forge/install/models.py +321 -0
  226. forge/install/preset.py +272 -0
  227. forge/install/settings_merge.py +831 -0
  228. forge/install/tracking.py +238 -0
  229. forge/install/version.py +141 -0
  230. forge/proxy/__init__.py +0 -0
  231. forge/proxy/base_client.py +181 -0
  232. forge/proxy/client_adapter.py +476 -0
  233. forge/proxy/client_factory.py +531 -0
  234. forge/proxy/converters.py +1206 -0
  235. forge/proxy/cost_logger.py +132 -0
  236. forge/proxy/cost_tracker.py +242 -0
  237. forge/proxy/data_models.py +338 -0
  238. forge/proxy/error_hints.py +92 -0
  239. forge/proxy/metrics.py +222 -0
  240. forge/proxy/model_spec.py +158 -0
  241. forge/proxy/proxies.py +333 -0
  242. forge/proxy/proxy_identity.py +134 -0
  243. forge/proxy/proxy_orchestrator.py +1018 -0
  244. forge/proxy/proxy_startup.py +54 -0
  245. forge/proxy/server.py +1561 -0
  246. forge/proxy/utils.py +537 -0
  247. forge/review/__init__.py +6 -0
  248. forge/review/adversarial.py +111 -0
  249. forge/review/consensus.py +236 -0
  250. forge/review/engine.py +356 -0
  251. forge/review/models.py +437 -0
  252. forge/review/resources/__init__.py +5 -0
  253. forge/review/resources/codereview-performance.md +85 -0
  254. forge/review/resources/codereview-quick.md +75 -0
  255. forge/review/resources/codereview-security.md +92 -0
  256. forge/review/resources/codereview.md +85 -0
  257. forge/review/resources/docreview-quick.md +75 -0
  258. forge/review/resources/docreview.md +86 -0
  259. forge/review/resources/thinkdeep.md +89 -0
  260. forge/review/routing.py +368 -0
  261. forge/review/synthesis.py +73 -0
  262. forge/runtime_config.py +438 -0
  263. forge/search/__init__.py +55 -0
  264. forge/search/bm25_store.py +264 -0
  265. forge/search/content_store.py +197 -0
  266. forge/search/engine.py +352 -0
  267. forge/search/exceptions.py +51 -0
  268. forge/search/extractor.py +234 -0
  269. forge/search/index_state.py +295 -0
  270. forge/search/store.py +215 -0
  271. forge/search/tokenizer.py +24 -0
  272. forge/session/__init__.py +130 -0
  273. forge/session/active.py +339 -0
  274. forge/session/artifacts.py +202 -0
  275. forge/session/claude/__init__.py +50 -0
  276. forge/session/claude/cleanup.py +105 -0
  277. forge/session/claude/invoke.py +236 -0
  278. forge/session/claude/paths.py +200 -0
  279. forge/session/cleanup.py +216 -0
  280. forge/session/config.py +34 -0
  281. forge/session/direct_model.py +107 -0
  282. forge/session/effective.py +169 -0
  283. forge/session/exceptions.py +255 -0
  284. forge/session/handoff.py +881 -0
  285. forge/session/handoff_agent.py +544 -0
  286. forge/session/hooks/__init__.py +35 -0
  287. forge/session/hooks/models.py +73 -0
  288. forge/session/hooks/session_start.py +507 -0
  289. forge/session/identity.py +84 -0
  290. forge/session/index.py +553 -0
  291. forge/session/manager.py +1506 -0
  292. forge/session/models.py +572 -0
  293. forge/session/overrides.py +344 -0
  294. forge/session/plan_resolution.py +286 -0
  295. forge/session/prev_sessions.py +128 -0
  296. forge/session/store.py +431 -0
  297. forge/session/validation.py +47 -0
  298. forge/session/worktree/__init__.py +65 -0
  299. forge/session/worktree/cleanup.py +262 -0
  300. forge/session/worktree/config_copy.py +203 -0
  301. forge/session/worktree/create.py +332 -0
  302. forge/sidecar/__init__.py +29 -0
  303. forge/sidecar/container.py +161 -0
  304. forge/sidecar/docker.py +86 -0
  305. forge/sidecar/secrets.py +19 -0
  306. multi_forge-0.2.0.dist-info/METADATA +242 -0
  307. multi_forge-0.2.0.dist-info/RECORD +311 -0
  308. multi_forge-0.2.0.dist-info/WHEEL +4 -0
  309. multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
  310. multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
  311. multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,125 @@
1
+ ---
2
+ name: forge:review
3
+ description: Review code for conformance, correctness, and architecture alignment.
4
+ disable-model-invocation: false
5
+ argument-hint: '[target: path or instruction] [--output path]'
6
+ allowed-tools: Read, Grep, Glob, Bash, Agent
7
+ ---
8
+
9
+ # Code Review
10
+
11
+ Review code for conformance, correctness, and architecture alignment.
12
+
13
+ ## Usage
14
+
15
+ ```
16
+ /forge:review [target]
17
+ ```
18
+
19
+ ## Arguments
20
+
21
+ | Argument | Required | Description |
22
+ | ---------- | -------- | ------------------------------------------------------------------- |
23
+ | `target` | Optional | File, directory, or instruction on what to review (defaults to cwd) |
24
+ | `--output` | Optional | Write result to file instead of conversation (e.g., `review.md`) |
25
+
26
+ ## Execution
27
+
28
+ Follow these steps in order. Do not skip steps.
29
+
30
+ ### Step 1: Resolve Target
31
+
32
+ `$ARGUMENTS` is the target. It may be a file path, directory, or free-form instruction. If it starts with `@`, strip the
33
+ prefix (Claude Code file reference syntax). If `$ARGUMENTS` is empty, default to the current working directory.
34
+
35
+ Recognized flags (extract from `$ARGUMENTS` if present):
36
+
37
+ - `--output <path>` — write result to file instead of conversation
38
+
39
+ Never ask the user to clarify. If `$ARGUMENTS` contains anything, proceed immediately.
40
+
41
+ ### Step 2: Load Instruction File
42
+
43
+ **Do NOT start the review until this step is complete.**
44
+
45
+ Model family: !`forge session context --field model_family 2>/dev/null || true` Main model:
46
+ !`forge session context --field main_model 2>/dev/null || true`
47
+
48
+ Resolve session context from `$FORGE_SESSION` or the local environment. Do not force `$CLAUDE_SESSION_ID`: unmanaged
49
+ direct Claude sessions are not in Forge's session index, but may still expose direct-model environment metadata.
50
+
51
+ Pick **one** instruction file (first match wins, read only one):
52
+
53
+ 1. If model family is `openai` or `gemini`: `${CLAUDE_SKILL_DIR}/resources/code-{family}.md`
54
+ 2. Otherwise: `${CLAUDE_SKILL_DIR}/resources/code.md`
55
+
56
+ If model family lookup returns empty output, `anthropic`, or errors, treat it as the default family and immediately
57
+ select `${CLAUDE_SKILL_DIR}/resources/code.md`. Do not probe multiple variants.
58
+
59
+ In v1, direct-session model pins such as `claude-opus-4-7` do not change this single-model resource selection: a 4.7
60
+ direct session still uses the Anthropic/default review resource. Use `/forge:panel --code` with `claude-opus-4.7` in the
61
+ model list when you want the 4.7 bounded-review worker hint.
62
+
63
+ ### Tool-call hygiene (normative)
64
+
65
+ When reading the selected instruction file, call `Read` with exactly one argument:
66
+
67
+ ```json
68
+ {"file_path":"/absolute/path/to/instruction-file.md"}
69
+ ```
70
+
71
+ Rules:
72
+
73
+ - Do NOT send empty-string values for optional fields
74
+ - Do NOT include assistant-generated commentary or repair text in tool arguments
75
+
76
+ A PreToolUse hook may strip extra Read parameters (`offset`, `limit`, `pages`) for skill instruction files, but callers
77
+ must still send `Read` with only `file_path`.
78
+
79
+ Read that one file using the Read tool with just the file_path parameter. Do not read both. If the chosen file is
80
+ missing, report the path and stop.
81
+
82
+ **After loading, tell the user in one message:**
83
+
84
+ ```
85
+ Reviewing {target} in code mode.
86
+ model_family: {family or "anthropic"}
87
+ model: {main_model or "Claude Code default (exact model not exposed to Forge)"}
88
+ instruction: {instruction_file_name}
89
+ ```
90
+
91
+ Do not read target files or begin review until after you have:
92
+
93
+ 1. Resolved the target
94
+ 2. Resolved the instruction file
95
+ 3. Emitted the preflight summary message
96
+
97
+ ### Step 3: Execute Review
98
+
99
+ If the selected instruction file refers to an Explore subagent, use the `Agent` tool with `subagent_type: "Explore"`. Do
100
+ not interpret `Task` in resource files as a separate tool.
101
+
102
+ If the selected instruction file mentions disallowed or unavailable tools, stop and report the mismatch instead of
103
+ substituting another tool.
104
+
105
+ Execute the review following the loaded instructions. The instruction file defines the rubric, structure, and output
106
+ format. Do not invent your own review format -- follow what the instruction file says.
107
+
108
+ Do not call `mcp__zen__*` tools from this skill.
109
+
110
+ When a resource file contains tool guidance that conflicts with this SKILL.md file, this SKILL.md file wins. Do not
111
+ improvise around the conflict.
112
+
113
+ **Output routing:** If `--output` was specified, write the complete review to that path using the Write tool (create
114
+ parent directories if needed). Print a one-line confirmation: `Wrote review to {path}`. Do not also print the full
115
+ result in the conversation. If `--output` was not specified, print the result in the conversation as usual.
116
+
117
+ ## Multi-Model Mode (optional)
118
+
119
+ For a multi-model perspective, use `forge workflow panel --code` to get independent code reviews from multiple backends:
120
+
121
+ ```bash
122
+ forge workflow panel [target] --code --json
123
+ ```
124
+
125
+ Or invoke `/forge:panel --code` for the full multi-model code review workflow.
@@ -0,0 +1,474 @@
1
+ # Claude 4.6 Prompting Guide (Opus 4.6 / Sonnet 4.6)
2
+
3
+ > Synthesized from
4
+ > [Anthropic Claude Docs](https://platform.claude.com/docs/en/build-with-claude/prompt-engineering/claude-prompting-best-practices),
5
+ > [What's New in Claude 4.6](https://platform.claude.com/docs/en/about-claude/models/whats-new-claude-4-6),
6
+ > [Anthropic Engineering Blog](https://www.anthropic.com/engineering), and web research. February 2026.
7
+
8
+ ## Overview
9
+
10
+ Claude 4.6 models (Opus 4.6, Sonnet 4.6) are Anthropic's frontier models, released February 2026. Key advances over
11
+ Claude 4.5:
12
+
13
+ - **1M token context window** at standard pricing (up from 200K standard / 1M beta)
14
+ - **Adaptive thinking** — Claude dynamically decides when and how much to think
15
+ - **Effort parameter** — `low` / `medium` / `high` / `max` (Opus only) replaces `budget_tokens`
16
+ - **Context compaction** — server-side automatic summarization (beta)
17
+ - **76% on 8-needle MRCR v2** (vs 18.5% for Sonnet 4.5) — qualitative leap in long-context reasoning
18
+ - **Prefilling removed** — assistant message prefilling returns 400 error on 4.6 models
19
+
20
+ **Key mindset shift:** Claude 4.6 models follow instructions precisely and **think adaptively**. The effort parameter
21
+ replaces prompt-level workarounds ("think carefully", "be thorough") which can now cause overthinking loops. Use the
22
+ effort parameter as the primary lever for reasoning depth.
23
+
24
+ ### Model Selection
25
+
26
+ | Model | Best For |
27
+ | -------------- | ------------------------------------------------------------------------------------------ |
28
+ | **Opus 4.6** | Hardest problems: large-scale migrations, deep research, extended autonomous work |
29
+ | **Sonnet 4.6** | 80%+ of tasks: fast turnaround, cost-efficient, 98% of Opus coding quality at 1/5 the cost |
30
+ | **Haiku 4.5** | Fast, cost-effective. Straightforward tools. (No 4.6 version yet) |
31
+
32
+ **Rule of thumb:** Use Sonnet 4.6 by default. Reach for Opus only for deepest reasoning or work across many interrelated
33
+ files.
34
+
35
+ ---
36
+
37
+ ## Core API Parameters
38
+
39
+ ### Adaptive Thinking & Effort
40
+
41
+ **Adaptive thinking** (`thinking: {type: "adaptive"}`) is the recommended thinking mode for 4.6 models. Claude
42
+ dynamically decides when and how much to think based on problem complexity.
43
+
44
+ **Effort levels** control the depth of reasoning:
45
+
46
+ | Level | Behavior | When to use |
47
+ | -------- | ------------------------------------------------------------------------------ | ------------------------------------------------ |
48
+ | `low` | Skips thinking for simple requests. Minimal tool calls. Short responses. | Renaming, typo fixes, boilerplate, simple Q&A |
49
+ | `medium` | **Recommended default for Sonnet 4.6.** Balanced speed, cost, and performance. | Agentic coding, tool-heavy workflows, code gen |
50
+ | `high` | Almost always engages thinking. **Default if effort is not set.** | Complex multi-step tasks, detailed analysis |
51
+ | `max` | Maximum reasoning depth. **Opus 4.6 only.** Burns tokens fast. | System design, deeply nested bugs, complex algos |
52
+
53
+ ```python
54
+ response = client.messages.create(
55
+ model="claude-sonnet-4-6",
56
+ max_tokens=16000,
57
+ thinking={"type": "adaptive"},
58
+ output_config={"effort": "medium"},
59
+ messages=[{"role": "user", "content": "..."}],
60
+ )
61
+ ```
62
+
63
+ **Critical best practices:**
64
+
65
+ - **Set `max_tokens` to at least 16K** (32K recommended). Thinking and output share the same budget — low limits cause
66
+ mid-reasoning cutoff with no graceful degradation.
67
+ - **Set effort explicitly on Sonnet 4.6.** It defaults to `high`, which may cause higher latency than Sonnet 4.5. Start
68
+ with `medium` and adjust.
69
+ - **Remove old "think carefully" prompts.** These workarounds amplify 4.6's already-proactive behavior and can cause
70
+ overthinking loops. The effort parameter is the better lever.
71
+ - If `stop_reason: "max_tokens"` appears, increase `max_tokens` or lower effort.
72
+
73
+ **Deprecated:** `thinking: {type: "enabled"}` and `budget_tokens` still work on 4.6 but will be removed in a future
74
+ release. Migrate to adaptive thinking + effort.
75
+
76
+ **In Claude Code:** Type `/effort` to cycle through Low/Medium/High/Max. Type "ultrathink" in any prompt to temporarily
77
+ boost to High for that response.
78
+
79
+ ### Thinking Display Control
80
+
81
+ Omit thinking content from responses for faster streaming while preserving multi-turn continuity:
82
+
83
+ ```python
84
+ thinking={"type": "adaptive", "display": "omitted"}
85
+ ```
86
+
87
+ Billing is unchanged — you still pay for thinking tokens even when omitted.
88
+
89
+ ### Temperature
90
+
91
+ | Setting | Use Case |
92
+ | --------- | -------------------------------------------- |
93
+ | 0.0 - 0.4 | Analytical, multiple choice, code generation |
94
+ | 0.7 - 1.0 | Creative, generative tasks |
95
+ | **1.0** | Default |
96
+
97
+ **Breaking change (Claude 4+):** You cannot specify both `temperature` and `top_p` in the same request.
98
+
99
+ ### Context Window & Output
100
+
101
+ | Model | Context Window | Max Output |
102
+ | ---------- | -------------- | ---------- |
103
+ | Opus 4.6 | 1M tokens | 128K |
104
+ | Sonnet 4.6 | 1M tokens | 64K |
105
+ | Haiku 4.5 | 200K tokens | — |
106
+
107
+ - 1M context is GA at standard pricing (no beta header, no premium rates)
108
+ - Media limit raised to 600 images/PDF pages per request (up from 100) at 1M context
109
+ - Requests over 200K work automatically for 4.6 models
110
+
111
+ ### Knowledge Cutoff
112
+
113
+ **May 2025** (Opus 4.6 and Sonnet 4.6).
114
+
115
+ ---
116
+
117
+ ## Key Behavioral Differences from Claude 4.5
118
+
119
+ | Aspect | Claude 4.6 Behavior |
120
+ | ---------------------- | -------------------------------------------------------------------------------- |
121
+ | Thinking | Adaptive by default; effort parameter replaces budget_tokens |
122
+ | Long-context reasoning | 76% on 8-needle MRCR v2 (vs 18.5% Sonnet 4.5) — qualitative leap |
123
+ | Context window | 1M GA at standard pricing (vs 200K standard / 1M beta) |
124
+ | Instruction following | Stronger; fewer false claims of success; fewer hallucinations |
125
+ | Overengineering | Significantly reduced; less "laziness" |
126
+ | Coding preference | Sonnet 4.6 preferred over Sonnet 4.5 ~70% of the time in Claude Code testing |
127
+ | Safety | Better prompt injection resistance; lowest over-refusal rate |
128
+ | Prefilling | **Removed.** Returns 400 error. Use structured outputs instead. |
129
+ | Context compaction | **New.** Server-side automatic summarization (beta) |
130
+ | Web search filtering | **New.** Dynamic code-based filtering of search results before context injection |
131
+
132
+ **Sonnet 4.6 vs Opus 4.5:** Users even preferred Sonnet 4.6 to Opus 4.5 59% of the time — Sonnet 4.6 is not just a
133
+ cheaper Opus, it is a meaningfully better model than last generation's flagship.
134
+
135
+ ---
136
+
137
+ ## XML Tags
138
+
139
+ Claude remains optimized for XML-style tags. Use descriptive tag names that match content.
140
+
141
+ ### Common Tag Patterns
142
+
143
+ ```xml
144
+ <role>
145
+ You are an expert software architect specializing in distributed systems.
146
+ </role>
147
+
148
+ <instructions>
149
+ 1. Analyze the provided code
150
+ 2. Identify architectural issues
151
+ 3. Suggest improvements with examples
152
+ </instructions>
153
+
154
+ <constraints>
155
+ - Keep suggestions actionable
156
+ - Focus on the top 3 most impactful changes
157
+ - Provide code examples for each suggestion
158
+ </constraints>
159
+
160
+ <context>
161
+ [Your documents/code here]
162
+ </context>
163
+
164
+ <output_format>
165
+ Structure your response as:
166
+ 1. Executive Summary (2-3 sentences)
167
+ 2. Issues Found (bulleted list)
168
+ 3. Recommendations (numbered, with code)
169
+ </output_format>
170
+ ```
171
+
172
+ ---
173
+
174
+ ## Tool Use & Parallel Execution
175
+
176
+ Claude 4.6 models excel at parallel tool execution.
177
+
178
+ ### Key Capabilities
179
+
180
+ - **Parallel tool calls** — Sonnet 4.6 is particularly aggressive at firing multiple operations simultaneously
181
+ - **Interleaved thinking** — adaptive thinking automatically enables thinking between tool calls
182
+ - **Token-efficient tool use** — built into Claude 4 models (no beta header needed)
183
+ - **Programmatic tool calling** — Claude writes code that calls multiple tools, processes outputs, and controls context
184
+
185
+ ### Boosting Parallel Execution
186
+
187
+ ```xml
188
+ <tool_usage>
189
+ - Prioritize calling tools simultaneously when actions can be done in parallel
190
+ - When reading multiple files, run parallel tool calls to read all files at once
191
+ - For independent searches, fire them off simultaneously rather than sequentially
192
+ </tool_usage>
193
+ ```
194
+
195
+ ### Web Search & Dynamic Filtering (New)
196
+
197
+ Web search and web fetch tools now support **dynamic filtering** — Claude writes and executes code to filter search
198
+ results before they enter the context window. This improves accuracy while reducing token consumption. Code execution is
199
+ free when used with web search/fetch.
200
+
201
+ ### Chain of Thought for Tool Use
202
+
203
+ For Sonnet/Haiku, use CoT prompting to improve tool selection:
204
+
205
+ ```
206
+ Before calling any tool:
207
+ 1. Analyze which tool is relevant to the query
208
+ 2. Check each required parameter - has the user provided enough information?
209
+ 3. Only proceed if all required parameters are present
210
+ 4. Otherwise, ask for the missing parameters
211
+ ```
212
+
213
+ ---
214
+
215
+ ## Preventing Overengineering
216
+
217
+ Claude 4.6 is significantly less prone to overengineering than 4.5, but explicit constraints still help:
218
+
219
+ ```xml
220
+ <scope_constraints>
221
+ - Avoid over-engineering. Only make changes that are directly requested or clearly necessary.
222
+ - Do not create extra files unless explicitly needed
223
+ - Do not add abstractions or flexibility beyond requirements
224
+ - Choose the simplest valid interpretation of ambiguous instructions
225
+ - Keep solutions minimal and focused
226
+ </scope_constraints>
227
+ ```
228
+
229
+ ---
230
+
231
+ ## System Prompt Best Practices
232
+
233
+ ### Dial Back Aggressive Language
234
+
235
+ Claude 4.6's stronger instruction following means aggressive prompts now **overtrigger**:
236
+
237
+ ```
238
+ # TOO AGGRESSIVE (causes overtriggering)
239
+ CRITICAL: You MUST use this tool when the user asks about data.
240
+
241
+ # BETTER (normal prompting)
242
+ Use this tool when the user asks about data.
243
+ ```
244
+
245
+ ### Use Decision Rules Instead of Prohibitions
246
+
247
+ Claude 4.6 evaluates logical necessity rather than following literally:
248
+
249
+ ```xml
250
+ <decision_rules>
251
+ IF message is about: debugging, how-it-works questions, system testing
252
+ THEN: engage directly, skip enforcement
253
+
254
+ IF verified_data_available
255
+ THEN: use_precise_figures
256
+ ELSE: provide_ranges_labeled_as_estimates
257
+ </decision_rules>
258
+ ```
259
+
260
+ ### Output Formatting
261
+
262
+ Match prompt style to desired output style. Reduce markdown in prompt to reduce it in output.
263
+
264
+ ---
265
+
266
+ ## Long Context Best Practices
267
+
268
+ ### Document Placement
269
+
270
+ **Put long documents at the TOP**, queries at the END — up to **30% improvement** on complex, multi-document inputs:
271
+
272
+ ```
273
+ [Long documents - 20K+ tokens]
274
+
275
+ Based on the documents above, answer the following:
276
+ [Your query]
277
+ ```
278
+
279
+ ### Structure Multiple Documents
280
+
281
+ ```xml
282
+ <documents>
283
+ <document index="1">
284
+ <source>quarterly_report.pdf</source>
285
+ <document_content>[content here]</document_content>
286
+ </document>
287
+ <document index="2">
288
+ <source>market_analysis.pdf</source>
289
+ <document_content>[content here]</document_content>
290
+ </document>
291
+ </documents>
292
+
293
+ Based on the documents above, [your query]
294
+ ```
295
+
296
+ ### Ground Responses in Quotes
297
+
298
+ ```
299
+ Before answering, quote the specific passages from the documents that support your response.
300
+ Then provide your analysis based on those quotes.
301
+ ```
302
+
303
+ ### Context Management
304
+
305
+ - Use `/compact` command in Claude Code to summarize long conversations
306
+ - **Context compaction** (beta) provides automatic server-side summarization for 4.6 models
307
+ - Be surgical with context — precise file references over entire folders
308
+ - Claude 4.6 tracks remaining context window throughout conversation (context awareness)
309
+
310
+ ---
311
+
312
+ ## Context Compaction (New, Beta)
313
+
314
+ Server-side automatic context summarization for effectively infinite conversations. When context approaches the window
315
+ limit, the API automatically summarizes earlier conversation parts.
316
+
317
+ Available in beta for Opus 4.6 and Sonnet 4.6.
318
+
319
+ ---
320
+
321
+ ## Structured Outputs (Replaces Prefilling)
322
+
323
+ **Breaking change:** Assistant message prefilling returns a 400 error on Claude 4.6 models.
324
+
325
+ ### Alternatives
326
+
327
+ | Previous Pattern (4.5) | New Pattern (4.6) |
328
+ | --------------------------- | -------------------------------------------------- |
329
+ | Prefill `{` for JSON output | Use `output_config.format` or structured outputs |
330
+ | Prefill to skip preamble | System prompt: "Respond directly without preamble" |
331
+ | Prefill for classification | Use tools with enum fields |
332
+
333
+ For guaranteed JSON schema compliance, use **Structured Outputs**:
334
+
335
+ - `output_format` for JSON responses
336
+ - `strict: true` for tool input validation
337
+
338
+ ---
339
+
340
+ ## Migration from Claude 4.5
341
+
342
+ ### Breaking Changes
343
+
344
+ | Change | Impact |
345
+ | -------------------------------- | ------------------------------------------------- |
346
+ | Prefilling removed | Returns 400 error; use structured outputs |
347
+ | `budget_tokens` deprecated | Use adaptive thinking + effort parameter |
348
+ | Sonnet effort defaults to `high` | May cause higher latency than 4.5; set explicitly |
349
+ | `temperature` + `top_p` | Still cannot use both (same as 4.5) |
350
+
351
+ ### What Changed
352
+
353
+ | Aspect | Claude 4.5 | Claude 4.6 |
354
+ | ----------------------- | -------------------------- | ------------------------------------ |
355
+ | Thinking | Extended thinking + budget | Adaptive thinking + effort parameter |
356
+ | Context window | 200K (1M beta) | 1M GA at standard pricing |
357
+ | Max output (Opus) | 64K | 128K |
358
+ | Instruction following | Precise | Stronger; fewer false claims |
359
+ | Prefilling | Supported | Removed (400 error) |
360
+ | Default effort (Sonnet) | N/A (no effort param) | `high` (set explicitly) |
361
+ | Context compaction | Manual (`/compact`) | Server-side automatic (beta) |
362
+
363
+ ### Migration Checklist
364
+
365
+ 1. **Remove assistant message prefilling** — use structured outputs or `output_config.format`
366
+ 2. **Set effort explicitly on Sonnet 4.6** — start with `medium` to match 4.5 latency
367
+ 3. **Remove "think carefully" prompts** — these cause overthinking on 4.6; use effort parameter
368
+ 4. **Switch to adaptive thinking** — replace `{type: "enabled", budget_tokens: N}` with `{type: "adaptive"}`
369
+ 5. **Increase `max_tokens`** — set to at least 16K (32K recommended) for thinking headroom
370
+ 6. **Test for latency changes** — default `high` effort may be slower than expected
371
+
372
+ ---
373
+
374
+ ## Complete Example: Coding Assistant System Prompt
375
+
376
+ ```xml
377
+ <role>
378
+ You are an expert software engineer. You write clean, maintainable code
379
+ and provide clear explanations.
380
+ </role>
381
+
382
+ <behavior>
383
+ - Follow instructions precisely
384
+ - Ask clarifying questions only when critical information is missing
385
+ - Provide working code, not pseudocode, unless requested otherwise
386
+ </behavior>
387
+
388
+ <scope_constraints>
389
+ - Avoid over-engineering. Only make changes directly requested or clearly necessary.
390
+ - Do not create extra files unless explicitly needed
391
+ - Do not add abstractions beyond requirements
392
+ - Choose the simplest valid interpretation of ambiguous instructions
393
+ </scope_constraints>
394
+
395
+ <output_format>
396
+ For code changes:
397
+ 1. Brief explanation of approach (1-2 sentences)
398
+ 2. The code
399
+ 3. Usage example if applicable
400
+
401
+ For questions:
402
+ - Direct answer first
403
+ - Supporting explanation if helpful
404
+ </output_format>
405
+
406
+ <tool_usage>
407
+ - Prioritize parallel tool calls when actions are independent
408
+ - Read multiple files simultaneously to build context faster
409
+ - After modifications, verify changes work as expected
410
+ </tool_usage>
411
+ ```
412
+
413
+ ---
414
+
415
+ ## Key Differences: Claude 4.6 vs GPT-5.5 vs Gemini 3.1 Pro
416
+
417
+ | Aspect | Claude 4.6 | GPT-5.5 | Gemini 3.1 Pro |
418
+ | ------------------------- | ------------------------------- | ---------------------------------- | ------------------------------- |
419
+ | Default reasoning | Adaptive (effort: high default) | `none` | `high` (dynamic, 3 tiers) |
420
+ | Thinking control | Effort: low/medium/high/max | reasoning_effort: none to xhigh | thinking_level: low/medium/high |
421
+ | Tag preference | XML strongly preferred | XML preferred | XML or Markdown (not both) |
422
+ | System prompt sensitivity | High (dial back aggressive) | Moderate | Moderate |
423
+ | Temperature | Use only temp OR top_p | Flexible | Must stay at 1.0 |
424
+ | Context window | 1M (GA, standard pricing) | 1M (2x pricing above 272K) | 1M |
425
+ | Max output | 128K (Opus) / 64K (Sonnet) | 128K | 65K |
426
+ | Context extension | Compaction (beta) + `/compact` | Native compaction (server-side) | Thought signatures |
427
+ | Tool Search | No | **Yes (47% savings)** | No |
428
+ | Custom tools endpoint | No | No | **Yes** |
429
+ | Multimodal | Images + PDFs | Native | Native (text/image/video/audio) |
430
+ | Prefilling | **Removed (400 error)** | Supported | Supported |
431
+ | Knowledge cutoff | May 2025 | August 2025 | January 2025 |
432
+ | Best for | Coding, long-running work | Agentic, coding, professional work | Reasoning, multimodal, agentic |
433
+
434
+ ---
435
+
436
+ ## Pro Tips
437
+
438
+ 1. **Use Sonnet 4.6 for 80%+ of tasks** — 98% of Opus coding quality at 1/5 cost
439
+
440
+ 2. **Set effort explicitly** — Sonnet defaults to `high`; start with `medium` for balanced latency/quality
441
+
442
+ 3. **Replace "think carefully" with effort parameter** — old prompt workarounds cause overthinking on 4.6
443
+
444
+ 4. **Remove prefilling** — use structured outputs or system prompt instructions for format control
445
+
446
+ 5. **Set `max_tokens` to 32K** — thinking and output share the budget; low limits cause mid-reasoning cutoff
447
+
448
+ 6. **Dial back aggressive language** — "Use this tool when..." not "CRITICAL: You MUST use..."
449
+
450
+ 7. **Use decision rules, not prohibitions** — Claude 4.6 reasons about logical necessity
451
+
452
+ 8. **Documents at top, query at end** — up to 30% improvement on long-context tasks
453
+
454
+ 9. **Constrain overengineering explicitly** — still worth including even though 4.6 is better at this
455
+
456
+ ---
457
+
458
+ ## Sources
459
+
460
+ - [Anthropic: Prompting Best Practices](https://platform.claude.com/docs/en/build-with-claude/prompt-engineering/claude-prompting-best-practices)
461
+ - [Anthropic: What's New in Claude 4.6](https://platform.claude.com/docs/en/about-claude/models/whats-new-claude-4-6)
462
+ - [Anthropic: Introducing Claude Opus 4.6](https://www.anthropic.com/news/claude-opus-4-6)
463
+ - [Anthropic: Introducing Claude Sonnet 4.6](https://www.anthropic.com/news/claude-sonnet-4-6)
464
+ - [Anthropic: Claude Opus 4.6](https://www.anthropic.com/claude/opus)
465
+ - [Anthropic: Migration Guide](https://platform.claude.com/docs/en/about-claude/models/migration-guide)
466
+ - [Anthropic: Adaptive Thinking](https://platform.claude.com/docs/en/build-with-claude/adaptive-thinking)
467
+ - [Anthropic: Effort Parameter](https://platform.claude.com/docs/en/build-with-claude/effort)
468
+ - [Anthropic: Extended Thinking](https://platform.claude.com/docs/en/build-with-claude/extended-thinking)
469
+ - [Anthropic: Extended Thinking Tips](https://platform.claude.com/docs/en/build-with-claude/prompt-engineering/extended-thinking-tips)
470
+ - [Anthropic: Context Windows](https://platform.claude.com/docs/en/build-with-claude/context-windows)
471
+ - [Anthropic: Models Overview](https://platform.claude.com/docs/en/about-claude/models/overview)
472
+ - [Resolve AI: Testing Sonnet 4.6 Adaptive Thinking](https://resolve.ai/blog/Our-early-impressions-of-Claude-Sonnet-4.6)
473
+ - [NxCode: Sonnet 4.6 vs 4.5 Migration Guide](https://www.nxcode.io/resources/news/claude-sonnet-4-6-vs-4-5-upgrade-guide-2026)
474
+ - [NxCode: Sonnet 4.6 vs Opus 4.6 Comparison](https://www.nxcode.io/resources/news/claude-sonnet-4-6-vs-opus-4-6-complete-comparison-2026)