@bastani/atomic 0.8.30 → 0.8.31-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. package/CHANGELOG.md +29 -0
  2. package/README.md +12 -10
  3. package/dist/builtin/cursor/CHANGELOG.md +4 -0
  4. package/dist/builtin/cursor/package.json +2 -2
  5. package/dist/builtin/intercom/CHANGELOG.md +4 -0
  6. package/dist/builtin/intercom/package.json +2 -2
  7. package/dist/builtin/mcp/CHANGELOG.md +4 -0
  8. package/dist/builtin/mcp/package.json +3 -3
  9. package/dist/builtin/subagents/CHANGELOG.md +13 -0
  10. package/dist/builtin/subagents/agents/codebase-online-researcher.md +8 -8
  11. package/dist/builtin/subagents/agents/debugger.md +6 -6
  12. package/dist/builtin/subagents/package.json +4 -4
  13. package/dist/builtin/subagents/skills/effective-liteparse/SKILL.md +118 -0
  14. package/dist/builtin/subagents/skills/effective-liteparse/scripts/search.py +128 -0
  15. package/dist/builtin/subagents/skills/playwright-cli/SKILL.md +404 -0
  16. package/dist/builtin/subagents/skills/playwright-cli/references/element-attributes.md +23 -0
  17. package/dist/builtin/subagents/skills/playwright-cli/references/playwright-tests.md +39 -0
  18. package/dist/builtin/subagents/skills/playwright-cli/references/request-mocking.md +87 -0
  19. package/dist/builtin/subagents/skills/playwright-cli/references/running-code.md +241 -0
  20. package/dist/builtin/subagents/skills/playwright-cli/references/session-management.md +225 -0
  21. package/dist/builtin/subagents/skills/playwright-cli/references/spec-driven-testing.md +305 -0
  22. package/dist/builtin/subagents/skills/playwright-cli/references/storage-state.md +275 -0
  23. package/dist/builtin/subagents/skills/playwright-cli/references/test-generation.md +134 -0
  24. package/dist/builtin/subagents/skills/playwright-cli/references/tracing.md +139 -0
  25. package/dist/builtin/subagents/skills/playwright-cli/references/video-recording.md +143 -0
  26. package/dist/builtin/web-access/CHANGELOG.md +4 -0
  27. package/dist/builtin/web-access/package.json +2 -2
  28. package/dist/builtin/workflows/CHANGELOG.md +16 -0
  29. package/dist/builtin/workflows/README.md +4 -4
  30. package/dist/builtin/workflows/builtin/deep-research-codebase.ts +1 -1
  31. package/dist/builtin/workflows/builtin/goal.ts +2 -2
  32. package/dist/builtin/workflows/builtin/open-claude-design.ts +60 -57
  33. package/dist/builtin/workflows/builtin/ralph.ts +117 -14
  34. package/dist/builtin/workflows/builtin/shared-prompts.ts +1 -1
  35. package/dist/builtin/workflows/package.json +2 -2
  36. package/dist/builtin/workflows/skills/research-codebase/SKILL.md +1 -1
  37. package/dist/builtin/workflows/src/extension/workflow-schema.ts +3 -1
  38. package/dist/builtin/workflows/src/runs/foreground/stage-runner.ts +5 -0
  39. package/dist/builtin/workflows/src/runs/shared/model-fallback.ts +95 -8
  40. package/dist/builtin/workflows/src/shared/authoring-contract.d.ts +11 -0
  41. package/dist/cli/args.d.ts +1 -0
  42. package/dist/cli/args.d.ts.map +1 -1
  43. package/dist/cli/args.js +21 -1
  44. package/dist/cli/args.js.map +1 -1
  45. package/dist/cli/list-models.d.ts.map +1 -1
  46. package/dist/cli/list-models.js +2 -1
  47. package/dist/cli/list-models.js.map +1 -1
  48. package/dist/core/agent-session-services.d.ts +2 -0
  49. package/dist/core/agent-session-services.d.ts.map +1 -1
  50. package/dist/core/agent-session-services.js +2 -0
  51. package/dist/core/agent-session-services.js.map +1 -1
  52. package/dist/core/agent-session.d.ts +18 -0
  53. package/dist/core/agent-session.d.ts.map +1 -1
  54. package/dist/core/agent-session.js +182 -19
  55. package/dist/core/agent-session.js.map +1 -1
  56. package/dist/core/compaction/branch-summarization.d.ts.map +1 -1
  57. package/dist/core/compaction/branch-summarization.js +20 -5
  58. package/dist/core/compaction/branch-summarization.js.map +1 -1
  59. package/dist/core/compaction/context-compaction.d.ts.map +1 -1
  60. package/dist/core/compaction/context-compaction.js +14 -3
  61. package/dist/core/compaction/context-compaction.js.map +1 -1
  62. package/dist/core/context-window.d.ts +39 -0
  63. package/dist/core/context-window.d.ts.map +1 -0
  64. package/dist/core/context-window.js +99 -0
  65. package/dist/core/context-window.js.map +1 -0
  66. package/dist/core/copilot-errors.d.ts +9 -0
  67. package/dist/core/copilot-errors.d.ts.map +1 -0
  68. package/dist/core/copilot-errors.js +32 -0
  69. package/dist/core/copilot-errors.js.map +1 -0
  70. package/dist/core/copilot-model-catalog.d.ts +135 -0
  71. package/dist/core/copilot-model-catalog.d.ts.map +1 -0
  72. package/dist/core/copilot-model-catalog.js +257 -0
  73. package/dist/core/copilot-model-catalog.js.map +1 -0
  74. package/dist/core/export-html/template.js +10 -1
  75. package/dist/core/extensions/types.d.ts +3 -1
  76. package/dist/core/extensions/types.d.ts.map +1 -1
  77. package/dist/core/extensions/types.js.map +1 -1
  78. package/dist/core/model-registry.d.ts +10 -0
  79. package/dist/core/model-registry.d.ts.map +1 -1
  80. package/dist/core/model-registry.js +107 -4
  81. package/dist/core/model-registry.js.map +1 -1
  82. package/dist/core/model-resolver.d.ts.map +1 -1
  83. package/dist/core/model-resolver.js +4 -0
  84. package/dist/core/model-resolver.js.map +1 -1
  85. package/dist/core/project-trust.d.ts.map +1 -1
  86. package/dist/core/project-trust.js +2 -1
  87. package/dist/core/project-trust.js.map +1 -1
  88. package/dist/core/provider-attribution.d.ts.map +1 -1
  89. package/dist/core/provider-attribution.js +17 -7
  90. package/dist/core/provider-attribution.js.map +1 -1
  91. package/dist/core/sdk.d.ts +8 -0
  92. package/dist/core/sdk.d.ts.map +1 -1
  93. package/dist/core/sdk.js +58 -0
  94. package/dist/core/sdk.js.map +1 -1
  95. package/dist/core/session-manager.d.ts +8 -1
  96. package/dist/core/session-manager.d.ts.map +1 -1
  97. package/dist/core/session-manager.js +19 -3
  98. package/dist/core/session-manager.js.map +1 -1
  99. package/dist/core/settings-manager.d.ts +15 -0
  100. package/dist/core/settings-manager.d.ts.map +1 -1
  101. package/dist/core/settings-manager.js +124 -1
  102. package/dist/core/settings-manager.js.map +1 -1
  103. package/dist/core/system-prompt.d.ts.map +1 -1
  104. package/dist/core/system-prompt.js +1 -0
  105. package/dist/core/system-prompt.js.map +1 -1
  106. package/dist/core/tools/edit-diff.d.ts +1 -2
  107. package/dist/core/tools/edit-diff.d.ts.map +1 -1
  108. package/dist/core/tools/edit-diff.js +1 -2
  109. package/dist/core/tools/edit-diff.js.map +1 -1
  110. package/dist/index.d.ts +3 -1
  111. package/dist/index.d.ts.map +1 -1
  112. package/dist/index.js +2 -0
  113. package/dist/index.js.map +1 -1
  114. package/dist/main.d.ts.map +1 -1
  115. package/dist/main.js +24 -1
  116. package/dist/main.js.map +1 -1
  117. package/dist/modes/index.d.ts +1 -1
  118. package/dist/modes/index.d.ts.map +1 -1
  119. package/dist/modes/index.js.map +1 -1
  120. package/dist/modes/interactive/components/config-selector.d.ts.map +1 -1
  121. package/dist/modes/interactive/components/config-selector.js +5 -7
  122. package/dist/modes/interactive/components/config-selector.js.map +1 -1
  123. package/dist/modes/interactive/components/context-window-selector.d.ts +53 -0
  124. package/dist/modes/interactive/components/context-window-selector.d.ts.map +1 -0
  125. package/dist/modes/interactive/components/context-window-selector.js +136 -0
  126. package/dist/modes/interactive/components/context-window-selector.js.map +1 -0
  127. package/dist/modes/interactive/components/model-selector.d.ts.map +1 -1
  128. package/dist/modes/interactive/components/model-selector.js +2 -1
  129. package/dist/modes/interactive/components/model-selector.js.map +1 -1
  130. package/dist/modes/interactive/components/scoped-models-selector.d.ts.map +1 -1
  131. package/dist/modes/interactive/components/scoped-models-selector.js +4 -1
  132. package/dist/modes/interactive/components/scoped-models-selector.js.map +1 -1
  133. package/dist/modes/interactive/components/settings-selector.d.ts +2 -0
  134. package/dist/modes/interactive/components/settings-selector.d.ts.map +1 -1
  135. package/dist/modes/interactive/components/settings-selector.js +165 -15
  136. package/dist/modes/interactive/components/settings-selector.js.map +1 -1
  137. package/dist/modes/interactive/components/tree-selector.d.ts.map +1 -1
  138. package/dist/modes/interactive/components/tree-selector.js +51 -4
  139. package/dist/modes/interactive/components/tree-selector.js.map +1 -1
  140. package/dist/modes/interactive/interactive-mode.d.ts +6 -1
  141. package/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
  142. package/dist/modes/interactive/interactive-mode.js +115 -55
  143. package/dist/modes/interactive/interactive-mode.js.map +1 -1
  144. package/dist/modes/interactive/model-search.d.ts +7 -0
  145. package/dist/modes/interactive/model-search.d.ts.map +1 -0
  146. package/dist/modes/interactive/model-search.js +6 -0
  147. package/dist/modes/interactive/model-search.js.map +1 -0
  148. package/dist/modes/interactive/theme/theme-controller.d.ts +30 -0
  149. package/dist/modes/interactive/theme/theme-controller.d.ts.map +1 -0
  150. package/dist/modes/interactive/theme/theme-controller.js +108 -0
  151. package/dist/modes/interactive/theme/theme-controller.js.map +1 -0
  152. package/dist/modes/interactive/theme/theme-schema.json +2 -1
  153. package/dist/modes/interactive/theme/theme.d.ts +5 -0
  154. package/dist/modes/interactive/theme/theme.d.ts.map +1 -1
  155. package/dist/modes/interactive/theme/theme.js +70 -29
  156. package/dist/modes/interactive/theme/theme.js.map +1 -1
  157. package/dist/modes/rpc/rpc-client.d.ts +14 -2
  158. package/dist/modes/rpc/rpc-client.d.ts.map +1 -1
  159. package/dist/modes/rpc/rpc-client.js +23 -3
  160. package/dist/modes/rpc/rpc-client.js.map +1 -1
  161. package/dist/modes/rpc/rpc-mode.d.ts +1 -1
  162. package/dist/modes/rpc/rpc-mode.d.ts.map +1 -1
  163. package/dist/modes/rpc/rpc-mode.js +31 -2
  164. package/dist/modes/rpc/rpc-mode.js.map +1 -1
  165. package/dist/modes/rpc/rpc-types.d.ts +23 -0
  166. package/dist/modes/rpc/rpc-types.d.ts.map +1 -1
  167. package/dist/modes/rpc/rpc-types.js.map +1 -1
  168. package/dist/package-manager-cli.d.ts.map +1 -1
  169. package/dist/package-manager-cli.js +39 -9
  170. package/dist/package-manager-cli.js.map +1 -1
  171. package/docs/custom-provider.md +4 -1
  172. package/docs/extensions.md +21 -0
  173. package/docs/json.md +3 -1
  174. package/docs/models.md +78 -2
  175. package/docs/packages.md +13 -9
  176. package/docs/providers.md +3 -0
  177. package/docs/quickstart.md +14 -0
  178. package/docs/rpc.md +80 -1
  179. package/docs/sdk.md +35 -11
  180. package/docs/session-format.md +15 -1
  181. package/docs/sessions.md +1 -1
  182. package/docs/settings.md +12 -2
  183. package/docs/themes.md +3 -1
  184. package/docs/tui.md +1 -1
  185. package/docs/usage.md +12 -9
  186. package/docs/workflows.md +34 -10
  187. package/examples/extensions/custom-provider-anthropic/package-lock.json +2 -2
  188. package/examples/extensions/custom-provider-anthropic/package.json +1 -1
  189. package/examples/extensions/custom-provider-gitlab-duo/package.json +1 -1
  190. package/examples/extensions/gondolin/package-lock.json +2 -2
  191. package/examples/extensions/gondolin/package.json +1 -1
  192. package/examples/extensions/preset.ts +10 -4
  193. package/examples/extensions/provider-payload.ts +5 -5
  194. package/examples/extensions/sandbox/index.ts +2 -2
  195. package/examples/extensions/sandbox/package-lock.json +3 -3
  196. package/examples/extensions/sandbox/package.json +2 -2
  197. package/examples/extensions/subagent/agents.ts +2 -2
  198. package/examples/extensions/subagent/index.ts +4 -2
  199. package/examples/extensions/with-deps/package-lock.json +2 -2
  200. package/examples/extensions/with-deps/package.json +1 -1
  201. package/package.json +5 -5
  202. package/dist/builtin/subagents/skills/browser/EXAMPLES.md +0 -151
  203. package/dist/builtin/subagents/skills/browser/LICENSE.txt +0 -21
  204. package/dist/builtin/subagents/skills/browser/REFERENCE.md +0 -451
  205. package/dist/builtin/subagents/skills/browser/SKILL.md +0 -170
package/docs/rpc.md CHANGED
@@ -13,6 +13,7 @@ atomic --mode rpc [options]
13
13
  Common options:
14
14
  - `--provider <name>`: Set the LLM provider (anthropic, openai, google, etc.)
15
15
  - `--model <pattern>`: Model pattern or ID (supports `provider/id` and optional `:<thinking>`)
16
+ - `--context-window <tokens>`: Select a supported context-window size for the startup model (`400k`, `1m`, or raw tokens)
16
17
  - `--name <name>` / `-n <name>`: Set the session display name at startup
17
18
  - `--no-session`: Disable session persistence
18
19
  - `--session-dir <path>`: Custom session storage directory
@@ -190,7 +191,7 @@ Response:
190
191
  }
191
192
  ```
192
193
 
193
- The `model` field is a full [Model](#model) object or `null`. The `sessionName` field is the display name set via `set_session_name`, or omitted if not set.
194
+ The `model` field is a full [Model](#model) object or `null`. Its `contextWindow` is the active/effective token budget; selectable models may also include `defaultContextWindow` and `contextWindowOptions`. The `sessionName` field is the display name set via `set_session_name`, or omitted if not set.
194
195
 
195
196
  #### get_messages
196
197
 
@@ -276,6 +277,66 @@ Response contains an array of full [Model](#model) objects:
276
277
  }
277
278
  ```
278
279
 
280
+ ### Context Window
281
+
282
+ #### get_available_context_windows
283
+
284
+ List the context-window token budgets supported by the current model and read the active/effective runtime selection.
285
+
286
+ ```json
287
+ {"type": "get_available_context_windows"}
288
+ ```
289
+
290
+ Response:
291
+ ```json
292
+ {
293
+ "type": "response",
294
+ "command": "get_available_context_windows",
295
+ "success": true,
296
+ "data": {
297
+ "contextWindows": [400000, 1000000],
298
+ "currentContextWindow": 400000,
299
+ "supportsSelection": true
300
+ }
301
+ }
302
+ ```
303
+
304
+ - `contextWindows`: supported token budgets for the active model, sorted ascending.
305
+ - `currentContextWindow`: the active/effective token budget on `model.contextWindow`; omitted when no model is selected.
306
+ - `supportsSelection`: `true` when the active model exposes more than one supported budget.
307
+
308
+ #### set_context_window
309
+
310
+ Set the active context-window token budget for the current model at runtime.
311
+
312
+ ```json
313
+ {"type": "set_context_window", "contextWindow": 1000000}
314
+ ```
315
+
316
+ Compact string values are also accepted:
317
+ ```json
318
+ {"type": "set_context_window", "contextWindow": "1m"}
319
+ ```
320
+
321
+ Response:
322
+ ```json
323
+ {"type": "response", "command": "set_context_window", "success": true}
324
+ ```
325
+
326
+ This command calls `AgentSession.setContextWindow(...)` without `{ persistDefault: true }`: it updates the active model, appends a `context_window_change` session entry and emits `context_window_changed` when the budget changes, but it does **not** write context-window defaults to settings. Use startup `--context-window` or an interactive context-window selection when you intentionally want the effective selection persisted under `defaultContextWindows["provider/modelId"]`.
327
+
328
+ Unsupported or malformed selections return the standard RPC error response:
329
+ ```json
330
+ {
331
+ "type": "response",
332
+ "command": "set_context_window",
333
+ "success": false,
334
+ "error": "Context window 2m is not supported by custom/selectable-context. Supported values: 400k, 1m."
335
+ }
336
+ ```
337
+
338
+ Larger provider context windows may consume more credits/cost. For allowlisted GitHub Copilot long-context models (including `github-copilot/gpt-5.5` and `github-copilot/gemini-3.1-pro-preview`), selecting `1m` raises Atomic's local budget and sends `X-GitHub-Api-Version: 2026-06-01`; GitHub applies the long-context billing tier server-side by prompt token count. That tier consumes more Copilot AI credits and requires Copilot long-context/usage-based billing entitlement, otherwise requests over GitHub's server cap are rejected with a friendly hint.
339
+
279
340
  ### Thinking
280
341
 
281
342
  #### set_thinking_level
@@ -760,6 +821,7 @@ Events are streamed to stdout as JSON lines during agent operation. Events do NO
760
821
  | `tool_execution_update` | Tool execution progress (streaming output) |
761
822
  | `tool_execution_end` | Tool completes |
762
823
  | `queue_update` | Pending steering/follow-up queue changed |
824
+ | `context_window_changed` | Active context-window token budget changed |
763
825
  | `compaction_start` | Default Verbatim Compaction begins |
764
826
  | `compaction_end` | Default Verbatim Compaction completes |
765
827
  | `context_compaction_start` | Compatibility `context_compact` RPC begins |
@@ -911,6 +973,19 @@ Emitted whenever the pending steering or follow-up queue changes.
911
973
  }
912
974
  ```
913
975
 
976
+ ### context_window_changed
977
+
978
+ Emitted when the active context-window token budget changes through RPC `set_context_window`, `AgentSession.setContextWindow()` in an SDK-backed runtime, or because in-place tree navigation replayed a branch-scoped `context_window_change` entry. Navigation replay updates the active model for accurate budgeting and compaction but does not append another session entry or write context-window defaults to settings.
979
+
980
+ ```json
981
+ {
982
+ "type": "context_window_changed",
983
+ "contextWindow": 1000000
984
+ }
985
+ ```
986
+
987
+ Larger provider context windows may consume more credits/cost. Prefer the model default unless the additional repository/session context is useful for the current task. For allowlisted GitHub Copilot long-context models such as `github-copilot/gpt-5.5` and `github-copilot/gemini-3.1-pro-preview`, a `1m` selection raises Atomic's local budget and sends `X-GitHub-Api-Version: 2026-06-01`; GitHub applies the long-context billing tier server-side by prompt size, consumes more Copilot AI credits, and requires long-context/usage-based billing entitlement.
988
+
914
989
  ### compaction_start / compaction_end
915
990
 
916
991
  Emitted when default Verbatim Compaction runs, whether manual or automatic. The result records deletion targets and stats rather than a generated summary.
@@ -1234,6 +1309,8 @@ Source files and installed definitions:
1234
1309
  "reasoning": true,
1235
1310
  "input": ["text", "image"],
1236
1311
  "contextWindow": 200000,
1312
+ "defaultContextWindow": 200000,
1313
+ "contextWindowOptions": [200000, 1000000],
1237
1314
  "maxTokens": 16384,
1238
1315
  "cost": {
1239
1316
  "input": 3.0,
@@ -1244,6 +1321,8 @@ Source files and installed definitions:
1244
1321
  }
1245
1322
  ```
1246
1323
 
1324
+ `contextWindow` is the active/effective token budget used by Atomic's local budgeting, footer/stats, and compaction logic. `defaultContextWindow` is the model's scalar default before a session/runtime override, and `contextWindowOptions` lists selectable token budgets when the model supports more than one size. RPC clients can read/select the active runtime budget with `get_available_context_windows` and `set_context_window`; the runtime command does not persist context-window defaults to settings.
1325
+
1247
1326
  ### UserMessage
1248
1327
 
1249
1328
  ```json
package/docs/sdk.md CHANGED
@@ -107,11 +107,14 @@ interface AgentSession {
107
107
  sessionFile: string | undefined;
108
108
  sessionId: string;
109
109
 
110
- // Model control
110
+ // Model, thinking, and context-window control
111
111
  setModel(model: Model): Promise<void>;
112
112
  setThinkingLevel(level: ThinkingLevel): void;
113
+ setContextWindow(contextWindow: number, options?: { persistDefault?: boolean }): void;
113
114
  cycleModel(): Promise<ModelCycleResult | undefined>;
114
115
  cycleThinkingLevel(): ThinkingLevel | undefined;
116
+ getAvailableContextWindows(): number[];
117
+ supportsContextWindowSelection(): boolean;
115
118
 
116
119
  // State access
117
120
  agent: Agent;
@@ -121,7 +124,7 @@ interface AgentSession {
121
124
  isStreaming: boolean;
122
125
 
123
126
  // In-place tree navigation within the current session file
124
- navigateTree(targetId: string, options?: { summarize?: boolean; customInstructions?: string; replaceInstructions?: boolean; label?: string }): Promise<{ editorText?: string; cancelled: boolean }>;
127
+ navigateTree(targetId: string, options?: { summarize?: boolean; customInstructions?: string; replaceInstructions?: boolean; label?: string }): Promise<{ editorText?: string; cancelled: boolean; aborted?: boolean; summaryEntry?: BranchSummaryEntry }>;
125
128
 
126
129
  // Verbatim Compaction (deletion-only Context Compaction)
127
130
  compact(): Promise<ContextCompactionResult>;
@@ -337,10 +340,13 @@ session.subscribe((event) => {
337
340
  // event.toolResults: tool results from this turn
338
341
  break;
339
342
 
340
- // Session events (queue, compaction, retry)
343
+ // Session events (queue, context-window, compaction, retry)
341
344
  case "queue_update":
342
345
  console.log(event.steering, event.followUp);
343
346
  break;
347
+ case "context_window_changed":
348
+ console.log(`Context window: ${event.contextWindow}`);
349
+ break;
344
350
  case "compaction_start":
345
351
  case "compaction_end":
346
352
  case "auto_retry_start":
@@ -412,6 +418,8 @@ const available = await modelRegistry.getAvailable();
412
418
  const { session } = await createAgentSession({
413
419
  model: opus,
414
420
  thinkingLevel: "medium", // off, minimal, low, medium, high, xhigh
421
+ contextWindow: 1_000_000, // optional; must be supported by the selected model unless non-strict fallback is acceptable
422
+ contextWindowStrict: true, // optional; return contextWindowError instead of warning/fallback when unsupported
415
423
 
416
424
  // Models for cycling (CTRL+P in interactive mode)
417
425
  scopedModels: [
@@ -429,6 +437,12 @@ If no model is provided:
429
437
  2. Uses default from settings
430
438
  3. Falls back to first available model
431
439
 
440
+ Context-window selection is independent from `thinkingLevel`. `contextWindow` accepts a raw token count such as `400_000` or `1_000_000`; for most providers the value must be present in the model's supported context windows (`model.contextWindowOptions` plus the scalar default). GitHub Copilot is the only provider with rounded long-context budget handling: when a tiered Copilot model advertises a long tier below the branded request (for example `936_000` for a `1_000_000` request), Atomic selects the largest advertised Copilot long tier at or below the request instead of falling back to the short tier. Settings lookup first checks the selected model's `defaultContextWindows["provider/modelId"]` entry, then the optional global `defaultContextWindow` fallback; unsupported model-specific settings keep the model default and return `contextWindowWarning`, while unsupported global fallback values are ignored silently as not applicable to the active model. When you pass `contextWindowStrict: true`, an unsupported explicit selection is reported as `contextWindowError` so callers can fail before prompting. A successful explicit `contextWindow` startup option is journaled as a `context_window_change` entry even when it equals the scalar model default, so the user's explicit budget choice survives future settings changes and resume.
441
+
442
+ At runtime, use `session.getAvailableContextWindows()` to inspect supported values, `session.supportsContextWindowSelection()` to check whether more than one value is selectable, and `session.setContextWindow(tokens, { persistDefault })` to change the active model budget. `setContextWindow()` journals a `context_window_change` entry only when the active value changes. Passing `{ persistDefault: true }` also writes the effective selected budget to `defaultContextWindows["provider/modelId"]` in settings instead of the global fallback, so a Copilot prompt cap such as `936k` does not leak into Anthropic, Cursor, or other providers. Tree navigation replays the target branch's `context_window_change` state into the active model without adding another journal entry or changing settings. Larger provider context windows may consume more credits/cost, so opt into larger values deliberately. For allowlisted GitHub Copilot long-context models (including `github-copilot/gpt-5.5` and `github-copilot/gemini-3.1-pro-preview`), selecting `1m` raises Atomic's local budget to the model's advertised `922k`/`936k` tier and sends `X-GitHub-Api-Version: 2026-06-01`; GitHub applies the long-context tier server-side by prompt token count, consumes more Copilot AI credits, and requires long-context/usage-based billing entitlement.
443
+
444
+ The package root exports the same context-window helpers and types used by the runtime: `parseContextWindowValue()`, `formatContextWindow()`, `validateContextWindowValue()`, `normalizeContextWindowOptions()`, `getModelDefaultContextWindow()`, `getSupportedContextWindows()`, `withContextWindowOptions()`, `selectContextWindow()`, `ContextWindowParseResult`, `ContextWindowSelection`, `ContextWindowSelectionError`, and `ContextWindowSelectionOptions`. Importing from `@bastani/atomic` also includes the `@earendil-works/pi-ai` `Model<Api>` augmentation for `contextWindowOptions` and `defaultContextWindow`, so SDK consumers can use the helper types without importing internal source paths.
445
+
432
446
  > See [examples/sdk/02-custom-model.ts](https://github.com/bastani-inc/atomic/blob/main/packages/coding-agent/examples/sdk/02-custom-model.ts)
433
447
 
434
448
  ### API Keys and OAuth
@@ -532,11 +546,11 @@ const { session } = await createAgentSession({
532
546
  ```typescript
533
547
  import { createAgentSession, type BashCommandPolicy } from "@bastani/atomic";
534
548
 
535
- const browseOnly: BashCommandPolicy = {
549
+ const playwrightCliOnly: BashCommandPolicy = {
536
550
  default: "deny",
537
551
  allow: [
538
- "which browse",
539
- { prefix: "browse " },
552
+ "which playwright-cli",
553
+ { prefix: "playwright-cli " },
540
554
  { prefix: "grep " },
541
555
  { glob: "bun test test/unit/*.test.ts" },
542
556
  { regex: "^rg\\b" },
@@ -547,19 +561,19 @@ const browseOnly: BashCommandPolicy = {
547
561
 
548
562
  const { session } = await createAgentSession({
549
563
  tools: ["read", "bash"],
550
- bashPolicy: browseOnly,
564
+ bashPolicy: playwrightCliOnly,
551
565
  });
552
566
  ```
553
567
 
554
568
  Rules match exact command strings, prefixes, command-string globs, or JavaScript regular expressions. `default` defaults to `"allow"` for backward compatibility; set `default: "deny"` for an allowlist-only shell. Omitting `bashPolicy`, passing `{}`, or passing a default-allow policy with no `allow`/`deny` rules is a compatibility no-op and does not parse the command. Empty `allow`/`deny` arrays and match-only default-allow policies are treated the same; malformed policy objects still fail closed.
555
569
 
556
- Glob rules match command target strings, not filesystem path segments. `*` and `?` can match `/`, so `{ glob: "browse *" }` matches `browse http://localhost:3000`, `browse docs/index.html`, and `browse ./preview/output.html`, while still matching the whole target so `echo browse docs/index.html` does not match unless the pattern includes leading wildcards. Backslash escapes the next glob character when you need a literal `*`, `?`, or bracket; inside bracket classes, escaped metacharacters such as `\-`, `\^`, `\]`, `\[`, and `\\` stay literal instead of becoming regex ranges, negation markers, class delimiters, or backslash escapes. Malformed glob bracket classes or ranges, such as `{ glob: "echo [z-a]" }`, fail closed as `invalid-policy` rather than surfacing raw regular-expression errors.
570
+ Glob rules match command target strings, not filesystem path segments. `*` and `?` can match `/`, so `{ glob: "playwright-cli *" }` matches `playwright-cli http://localhost:3000`, `playwright-cli docs/index.html`, and `playwright-cli ./preview/output.html`, while still matching the whole target so `echo playwright-cli docs/index.html` does not match unless the pattern includes leading wildcards. Backslash escapes the next glob character when you need a literal `*`, `?`, or bracket; inside bracket classes, escaped metacharacters such as `\-`, `\^`, `\]`, `\[`, and `\\` stay literal instead of becoming regex ranges, negation markers, class delimiters, or backslash escapes. Malformed glob bracket classes or ranges, such as `{ glob: "echo [z-a]" }`, fail closed as `invalid-policy` rather than surfacing raw regular-expression errors.
557
571
 
558
572
  Runtime policy validation is part of enforcement for JavaScript/JSON callers: a provided policy must be a non-null object with only the top-level keys `default`, `allow`, `deny`, and `match`; typoed or extra keys such as `denny` or `extra` are rejected as `invalid-policy` even when the policy otherwise looks like default-allow. `allow`/`deny` must be arrays when present, rules must be non-empty strings or one-variant objects with string values, regex flags must be strings, and invalid regexes, invalid globs, or stateful `g`/`y` flags are rejected as `invalid-policy` before shell execution.
559
573
 
560
- By default, `match: "segments"` parses shell separators and substitutions and requires every executable segment to pass. Separators include pipes, `&&`, `||`, `;`, background `&`, and unquoted line terminators: LF, CRLF, and bare CR are command separators rather than ordinary whitespace. Bash noclobber redirection `>|` is treated as redirection syntax rather than a pipeline separator after a command head, so `echo ok >|/tmp/out` remains one `echo` segment. For example, `browse snapshot | grep title` must satisfy both the `browse` rule and the `grep` rule, and `browse snapshot; rm -rf /` or `browse snapshot\nrm -rf /` is blocked when `rm` is denied or when `default: "deny"` has no matching allow rule. Segment mode also checks command substitutions (`$(...)`, backticks) and process substitutions (`<(...)`, `>(...)`). Syntax Atomic cannot safely segment is rejected before a shell process starts.
574
+ By default, `match: "segments"` parses shell separators and substitutions and requires every executable segment to pass. Separators include pipes, `&&`, `||`, `;`, background `&`, and unquoted line terminators: LF, CRLF, and bare CR are command separators rather than ordinary whitespace. Bash noclobber redirection `>|` is treated as redirection syntax rather than a pipeline separator after a command head, so `echo ok >|/tmp/out` remains one `echo` segment. For example, `playwright-cli snapshot | grep title` must satisfy both the `playwright-cli` rule and the `grep` rule, and `playwright-cli snapshot; rm -rf /` or `playwright-cli snapshot\nrm -rf /` is blocked when `rm` is denied or when `default: "deny"` has no matching allow rule. Segment mode also checks command substitutions (`$(...)`, backticks) and process substitutions (`<(...)`, `>(...)`). Syntax Atomic cannot safely segment is rejected before a shell process starts.
561
575
 
562
- Segment mode requires each command head to be a statically identifiable literal word. Literal names such as `grep`, `./script`, `/usr/bin/env`, `bun`, `browse`, and names containing hyphens, underscores, dots, or slashes are accepted when they contain no shell expansion syntax. Atomic conservatively rejects Bash reserved words and compound introducers (`coproc`, `if`, `for`, `while`, `case`, `{`, `}`, `!`), leading redirection syntax (`>file cmd`, `2>file cmd`, `<file cmd`, `&>file cmd`, `>|file cmd`, `<&0 cmd`, `>&2 cmd`), redirection operators attached to the command-head word (`cmd>file`, `cmd>>file`, `cmd>|file`, `cmd2>file`, `cmd>&2`, `cmd</tmp/in`), leading environment assignment words (`PATH=/tmp:$PATH browse snapshot`, `LD_PRELOAD=/tmp/x browse snapshot`, `FOO=bar`), variable or parameter-expanded heads (`$cmd`, `${cmd}`), quote- or escape-constructed heads (`r''m`, `"rm"`, `r\m`), tilde/glob/brace-expanded heads (`~/bin/rm`, `r*m`, `{rm,echo}`), and command/process substitutions or backticks embedded in the head. Substitutions in argument positions are still parsed so nested commands must also pass the policy.
576
+ Segment mode requires each command head to be a statically identifiable literal word. Literal names such as `grep`, `./script`, `/usr/bin/env`, `bun`, `playwright-cli`, and names containing hyphens, underscores, dots, or slashes are accepted when they contain no shell expansion syntax. Atomic conservatively rejects Bash reserved words and compound introducers (`coproc`, `if`, `for`, `while`, `case`, `{`, `}`, `!`), leading redirection syntax (`>file cmd`, `2>file cmd`, `<file cmd`, `&>file cmd`, `>|file cmd`, `<&0 cmd`, `>&2 cmd`), redirection operators attached to the command-head word (`cmd>file`, `cmd>>file`, `cmd>|file`, `cmd2>file`, `cmd>&2`, `cmd</tmp/in`), leading environment assignment words (`PATH=/tmp:$PATH playwright-cli snapshot`, `LD_PRELOAD=/tmp/x playwright-cli snapshot`, `FOO=bar`), variable or parameter-expanded heads (`$cmd`, `${cmd}`), quote- or escape-constructed heads (`r''m`, `"rm"`, `r\m`), tilde/glob/brace-expanded heads (`~/bin/rm`, `r*m`, `{rm,echo}`), and command/process substitutions or backticks embedded in the head. Substitutions in argument positions are still parsed so nested commands must also pass the policy.
563
577
 
564
578
  Use `match: "whole"` only when you intentionally want rules to match the raw command string as-is. Whole-command prefix rules can allow shell operators inside the same raw string.
565
579
 
@@ -991,6 +1005,12 @@ interface CreateAgentSessionResult {
991
1005
 
992
1006
  // Warning if session model couldn't be restored
993
1007
  modelFallbackMessage?: string;
1008
+
1009
+ // Warning if a saved/default context window could not be applied to the selected model
1010
+ contextWindowWarning?: string;
1011
+
1012
+ // Error if an explicit strict context-window selection is unsupported
1013
+ contextWindowError?: string;
994
1014
  }
995
1015
 
996
1016
  interface LoadExtensionsResult {
@@ -1236,7 +1256,8 @@ DefaultResourceLoader
1236
1256
  type ResourceLoader
1237
1257
  createEventBus
1238
1258
 
1239
- // Helpers
1259
+ // Constants and helpers
1260
+ CONFIG_DIR_NAME
1240
1261
  defineTool
1241
1262
  STRUCTURED_OUTPUT_TOOL_NAME
1242
1263
  createStructuredOutputTool
@@ -1246,6 +1267,9 @@ getPackageDir
1246
1267
  getReadmePath
1247
1268
  getDocsPath
1248
1269
  getExamplesPath
1270
+ generateDiffString
1271
+ generateUnifiedPatch
1272
+ type EditDiffResult
1249
1273
 
1250
1274
  // Session management
1251
1275
  SessionManager
@@ -216,6 +216,16 @@ Emitted when the user changes the thinking/reasoning level.
216
216
  {"type":"thinking_level_change","id":"e5f6g7h8","parentId":"d4e5f6g7","timestamp":"2024-12-03T14:06:00.000Z","thinkingLevel":"high"}
217
217
  ```
218
218
 
219
+ ### ContextWindowChangeEntry
220
+
221
+ Emitted when the user selects a supported context-window size for the active model. The value is a token count, independent of thinking/reasoning level. Explicit startup selections are journaled even when they equal the model's scalar default so the user's budget choice survives later settings changes and resume.
222
+
223
+ ```json
224
+ {"type":"context_window_change","id":"f6g7h8i9","parentId":"e5f6g7h8","timestamp":"2024-12-03T14:07:00.000Z","contextWindow":1000000}
225
+ ```
226
+
227
+ `buildSessionContext()` replays the latest `context_window_change` on the active branch. In-place tree navigation also applies the branch's replayed context window to the active model without appending another `context_window_change` entry or writing context-window defaults to settings. If a historical value is no longer supported by the current model, session creation/navigation falls back to the model default the same way other context-window restore paths do.
228
+
219
229
  ### CompactionEntry
220
230
 
221
231
  Retired summary-compaction entry. Atomic no longer produces this entry type, does not treat it as an active compaction boundary, and does not inject its generated summary into active LLM context. Historical JSONL files may still contain these lines for audit/export compatibility.
@@ -312,7 +322,7 @@ Entries form a tree:
312
322
  `buildSessionContext()` walks from the current leaf to the root, producing the message list for the LLM:
313
323
 
314
324
  1. Collects all entries on the active branch path
315
- 2. Extracts current model and thinking level settings
325
+ 2. Extracts current model, thinking level, and context-window settings
316
326
  3. Applies every `ContextCompactionEntry` logical deletion on that path, filtering targeted entries/content blocks from active context while leaving retained content unchanged
317
327
  4. Converts `BranchSummaryEntry` and `CustomMessageEntry` to appropriate message formats
318
328
  5. Ignores retired `CompactionEntry` lines for active LLM context; they remain archival JSONL data only
@@ -358,6 +368,9 @@ for (const line of lines) {
358
368
  case "thinking_level_change":
359
369
  console.log(`[${entry.id}] Thinking: ${entry.thinkingLevel}`);
360
370
  break;
371
+ case "context_window_change":
372
+ console.log(`[${entry.id}] Context window: ${entry.contextWindow}`);
373
+ break;
361
374
  }
362
375
  }
363
376
  ```
@@ -385,6 +398,7 @@ Key methods for working with sessions programmatically.
385
398
  ### Instance Methods - Appending (all return entry ID)
386
399
  - `appendMessage(message)` - Add message
387
400
  - `appendThinkingLevelChange(level)` - Record thinking change
401
+ - `appendContextWindowChange(contextWindow)` - Record context-window selection in tokens
388
402
  - `appendModelChange(provider, modelId)` - Record model change
389
403
  - `appendContextCompaction(deletedTargets, protectedEntryIds, stats, backupPath?)` - Add logical deletion compaction
390
404
  - `appendCustomEntry(customType, data?)` - Extension state (not in context)
package/docs/sessions.md CHANGED
@@ -142,6 +142,6 @@ See [Compaction](/compaction) for Verbatim Compaction, branch summarization inte
142
142
 
143
143
  ## Session Format
144
144
 
145
- Session files are JSONL and contain message entries, model changes, thinking-level changes, labels, context compactions, branch summaries, extension entries, and retired legacy `type:"compaction"` records from older sessions.
145
+ Session files are JSONL and contain message entries, model changes, thinking-level changes, context-window changes, labels, context compactions, branch summaries, extension entries, and retired legacy `type:"compaction"` records from older sessions.
146
146
 
147
147
  For parsers, extensions, SDK usage, and the full SessionManager API, see [Session Format](/session-format).
package/docs/settings.md CHANGED
@@ -227,18 +227,27 @@ Normally the package manager's global modules location is queried using `root -g
227
227
 
228
228
  When multiple sources specify a session directory, precedence is `--session-dir`, `ATOMIC_CODING_AGENT_SESSION_DIR`, then `sessionDir` in settings.json.
229
229
 
230
- ### Model Cycling
230
+ ### Models
231
231
 
232
232
  | Setting | Type | Default | Description |
233
233
  |---------|------|---------|-------------|
234
234
  | `enabledModels` | string[] | - | Model patterns for CTRL+P cycling (same format as `--models` CLI flag) |
235
+ | `defaultContextWindow` | number \| string | model default | Optional global fallback context window for models that expose selectable context windows. Accepts raw token counts or compact labels such as `400k` and `1m`. Unsupported values are ignored for models that do not support them. |
236
+ | `defaultContextWindows` | object | `{}` | Per-model preferred context windows keyed as `provider/modelId`. The interactive `/model` context picker writes this setting so a Copilot-specific prompt cap such as `936k` does not leak into Anthropic, Cursor, or other providers. |
235
237
 
236
238
  ```json
237
239
  {
238
- "enabledModels": ["claude-*", "gpt-4o", "gemini-2*"]
240
+ "enabledModels": ["claude-*", "gpt-4o", "gemini-2*"],
241
+ "defaultContextWindow": "1m",
242
+ "defaultContextWindows": {
243
+ "github-copilot/claude-opus-4.8": "936k",
244
+ "github-copilot/gpt-5.5": "922k"
245
+ }
239
246
  }
240
247
  ```
241
248
 
249
+ Context-window settings are independent of `defaultThinkingLevel`: selecting a larger context window does not change reasoning effort. Interactive users can change the active model's budget through the `/model` selection flow, which prompts for a context window whenever the chosen model supports more than one window and persists the effective selection under `defaultContextWindows["provider/modelId"]`. Atomic treats `defaultContextWindow` as a broad fallback only: if the active model does not support that value, the model's own default is used without a startup warning; targeted `defaultContextWindows` entries still warn when they become unsupported for their exact model. Larger provider context windows can carry higher usage cost. For GitHub Copilot allowlisted long-context models (including `github-copilot/gpt-5.5` and `github-copilot/gemini-3.1-pro-preview`), selecting `1m` raises Atomic's local prompt budget to the largest advertised long-context tier at or below that rounded request (for example `922k` or `936k`) and sends `X-GitHub-Api-Version: 2026-06-01`; GitHub then applies the long-context tier server-side by prompt token count. That tier consumes more Copilot AI credits and requires Copilot long-context/usage-based billing entitlement, otherwise requests over the server cap are rejected with a friendly hint. Custom providers and explicit model overrides can still declare their own selectable `contextWindowOptions`.
250
+
242
251
  ### Markdown
243
252
 
244
253
  | Setting | Type | Default | Description |
@@ -297,6 +306,7 @@ See [Atomic packages](/packages) for package management details.
297
306
  "defaultProvider": "anthropic",
298
307
  "defaultModel": "claude-sonnet-4-20250514",
299
308
  "defaultThinkingLevel": "medium",
309
+ "defaultContextWindow": "400k",
300
310
  "theme": "dark",
301
311
  "compaction": {
302
312
  "enabled": true,
package/docs/themes.md CHANGED
@@ -37,6 +37,8 @@ Select a theme via `/settings` or in `settings.json`:
37
37
  }
38
38
  ```
39
39
 
40
+ Use `"theme": "light-theme/dark-theme"` for automatic mode. Atomic chooses the first theme when the terminal reports a light color scheme and the second theme for dark terminals, and it follows terminal color-scheme changes when supported.
41
+
40
42
  On first run, Atomic detects your terminal background and defaults to `dark` or `light`.
41
43
 
42
44
  ## Creating a Custom Theme
@@ -137,7 +139,7 @@ vim ~/.atomic/agent/themes/my-theme.json
137
139
  }
138
140
  ```
139
141
 
140
- - `name` is required and must be unique.
142
+ - `name` is required, must be unique, and must not contain `/`.
141
143
  - `vars` is optional. Define reusable colors here, then reference them in `colors`.
142
144
  - `colors` must define all 51 required tokens.
143
145
 
package/docs/tui.md CHANGED
@@ -250,7 +250,7 @@ md.setText("Updated markdown");
250
250
 
251
251
  ### Image
252
252
 
253
- Renders images in supported terminals (Kitty, iTerm2, Ghostty, WezTerm).
253
+ Renders images in supported terminals (Kitty, iTerm2, Ghostty, WezTerm, Warp).
254
254
 
255
255
  ```typescript
256
256
  const image = new Image(
package/docs/usage.md CHANGED
@@ -131,17 +131,20 @@ atomic [options] [@files...] [messages...]
131
131
  ### Package Commands
132
132
 
133
133
  ```bash
134
- atomic install <source> [-l] # Install package, -l for project-local
135
- atomic remove <source> [-l] # Remove package
136
- atomic uninstall <source> [-l] # Alias for remove
137
- atomic update [source|self|atomic] # Update Atomic and packages; skips pinned packages
138
- atomic update --extensions # Update packages only
139
- atomic update --self # Update Atomic only
140
- atomic update --extension <src> # Update one package
141
- atomic list # List installed packages
142
- atomic config # Enable/disable package resources
134
+ atomic install <source> [-l] # Install package, -l for project-local
135
+ atomic remove <source> [-l] # Remove package
136
+ atomic uninstall <source> [-l] # Alias for remove
137
+ atomic update [source|self|atomic] # Update Atomic only, or one package source
138
+ atomic update --all # Update Atomic and packages; reconcile pinned git refs
139
+ atomic update --extensions # Update packages only; reconcile pinned git refs
140
+ atomic update --self # Update Atomic only
141
+ atomic update --extension <src> # Update one package
142
+ atomic list # List installed packages
143
+ atomic config # Enable/disable package resources
143
144
  ```
144
145
 
146
+ These commands manage Atomic packages and `atomic update` can update the Atomic CLI installation. To uninstall Atomic itself, see [Quickstart](/quickstart#uninstall). `atomic config` and project package commands accept `--approve`/`--no-approve` to trust or ignore project-local settings for one command. `atomic update` never prompts for project trust.
147
+
145
148
  See [Atomic Packages](/packages) for package sources and security notes.
146
149
 
147
150
  ### Modes
package/docs/workflows.md CHANGED
@@ -153,7 +153,7 @@ For the builtin result tables below, `deep-research-codebase`, `goal`, and `ralp
153
153
  |---|---|---|
154
154
  | `deep-research-codebase` | Scout + research-history chain → parallel specialist waves → aggregator. Indexes the whole repo and synthesizes findings. | Broad or cross-cutting research before you decide what to change. Prefer `/skill:research-codebase` for one subsystem. |
155
155
  | `goal` | Persisted goal ledger → bounded worker turns → receipts → three-reviewer gate → deterministic reducer → final report. | Small-to-medium scope changes when you can identify the work surface, state the exact outcome, and name the validation that proves it is done — for example tests, lint/typecheck, docs builds, or observable behavior. |
156
- | `ralph` | Prompt-engineering → codebase/online research → sub-agent orchestration → parallel review → optional final-stage PR handoff. | Larger migrations, broad refactors, and multi-package changes where you want Atomic to transform the prompt into a research question, research the codebase before implementing, delegate through sub-agents, review, iterate, and optionally allow only the final `pull-request` stage to attempt PR creation with `create_pr=true`. |
156
+ | `ralph` | Prompt-engineering → codebase/online research → sub-agent orchestration → multi-model parallel review → optional final-stage PR handoff. | Larger migrations, broad refactors, and multi-package changes where you want Atomic to transform the prompt into a research question, research the codebase before implementing, delegate through sub-agents, review, iterate, and optionally allow only the final `pull-request` stage to attempt PR creation with `create_pr=true`. |
157
157
  | `open-claude-design` | Design-system onboarding → reference import → HTML generation → impeccable-driven refinement → quality gate → rich HTML handoff. Renders a live `preview.html` you can iterate against (opens through `browser` when available). | UI, page, component, theme, or design-token work that benefits from generation + critique loops. |
158
158
 
159
159
  ### `deep-research-codebase`
@@ -224,7 +224,7 @@ Run examples:
224
224
 
225
225
  Write the `objective` like a compact acceptance spec. Say what should exist when the run is done, how you want testing handled, which command(s) or manual checks matter, and what outcome proves completion. The workflow is intentionally lean: it does not first generate an RFC or migration plan, so the developer-supplied objective is where scope, validation, and completion criteria belong.
226
226
 
227
- The worker may claim readiness, but it cannot finalize completion. Workers and reviewers are prompted to verify user-visible behavior end-to-end when practical, using browser-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. Three reviewers independently inspect the ledger, worker receipt, repository state, and diff against `base_branch`; each returns structured JSON with findings, evidence, verification still remaining, and an optional blocker. A TypeScript reducer marks the goal complete only when reviewer quorum approves, marks blocked only when the same dependency/tool blocker repeats for the blocker threshold, continues when evidence is missing, and returns `needs_human` when `max_turns` is exhausted or worker execution fails.
227
+ The worker may claim readiness, but it cannot finalize completion. Workers and reviewers are prompted to verify user-visible behavior end-to-end when practical, using `playwright-cli`-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. Three reviewers independently inspect the ledger, worker receipt, repository state, and diff against `base_branch`; each returns structured JSON with findings, evidence, verification still remaining, and an optional blocker. A TypeScript reducer marks the goal complete only when reviewer quorum approves, marks blocked only when the same dependency/tool blocker repeats for the blocker threshold, continues when evidence is missing, and returns `needs_human` when `max_turns` is exhausted or worker execution fails.
228
228
 
229
229
  Result fields:
230
230
 
@@ -262,7 +262,7 @@ Run examples:
262
262
  /workflow ralph prompt="Safely implement the API refactor" git_worktree_dir=../atomic-ralph-api-wt base_branch=main
263
263
  ```
264
264
 
265
- Each `ralph` iteration starts by prompt-engineering the user prompt with `/skill:prompt-engineer Transform the following user prompt to a codebase and online research question which can be thoroughly explored: ...`, then researches that transformed question with `/skill:research-codebase ...` and writes the findings under `research/`. The orchestrator treats that research artifact as its primary implementation context, initializes/updates an OS-temp implementation notes file, delegates implementation through sub-agents, and asks two reviewers to inspect the patch directly against `base_branch`. Ralph's orchestrator and reviewers are prompted to verify user-visible behavior end-to-end when practical, using browser-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. If reviewers find issues, the next prompt-engineering and research stages receive the review artifact path so follow-up research can address unresolved findings, and research stages fork from prior research session data when available. The loop stops when every reviewer approves or `max_loops` is reached. By default Ralph does not start the final `pull-request` stage, and `pr_report` is omitted. Prompt text alone does not opt in. Pass `create_pr=true` only when you explicitly want the final `pull-request` stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation, such as GitHub `gh`, Azure Repos `az repos pr create`, or Sapling/Phabricator tooling; Ralph's own PR-creation instructions live in that final stage.
265
+ Each `ralph` iteration starts by prompt-engineering the user prompt with `/skill:prompt-engineer Transform the following user prompt to a codebase and online research question which can be thoroughly explored: ...`, then researches that transformed question with `/skill:research-codebase ...` and writes the findings under `research/`. The orchestrator treats that research artifact as its primary implementation context, initializes/updates an OS-temp implementation notes file while generating verifiable evidence for any claims it records in the notes and reviewer artifacts, delegates implementation through sub-agents, and asks three independent reviewers to inspect the patch directly against `base_branch`. The reviewer fan-out runs each reviewer on a different primary model family (with shared fallbacks) so the adversarial review gets cross-model coverage instead of three passes from one model. Ralph's orchestrator and reviewers are prompted to verify user-visible behavior end-to-end when practical, using `playwright-cli`-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. For UI-applicable or full-stack changes, the orchestrator runs a `playwright-cli` end-to-end QA pass and records a reviewable proof video (referenced in the implementation notes and surfaced as `qa_video_path`); when `create_pr=true`, the final `pull-request` stage attaches or links that video to the created PR/MR/review. If reviewers find issues, the next prompt-engineering and research stages receive the review artifact path so follow-up research can address unresolved findings, and research stages fork from prior research session data when available. The loop stops only when all three reviewers independently approve (each finds no issues) or `max_loops` is reached, so a P0–P3 finding from any single reviewer keeps Ralph iterating instead of being out-voted by a majority quorum. By default Ralph does not start the final `pull-request` stage, and `pr_report` is omitted. Prompt text alone does not opt in. Pass `create_pr=true` only when you explicitly want the final `pull-request` stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation, such as GitHub `gh`, Azure Repos `az repos pr create`, or Sapling/Phabricator tooling; Ralph's own PR-creation instructions live in that final stage.
266
266
 
267
267
  Set `git_worktree_dir` when you want Ralph's worker stages isolated in a reusable Git worktree. Relative paths resolve from the invoking repository root, existing same-repository worktree roots are reused, and missing paths are created from `base_branch`. Ralph preserves the invoking repo-relative cwd inside the worktree, so launching from `repo/packages/api` with `git_worktree_dir=../repo-wt` runs stages from `../repo-wt/packages/api`.
268
268
 
@@ -276,6 +276,7 @@ Result fields:
276
276
  | `research` | Latest research report text or artifact reference. |
277
277
  | `research_path` | Path to the latest generated research artifact under `research/`. |
278
278
  | `implementation_notes_path` | OS-temp notes file containing decisions, deviations, blockers, and validation notes. |
279
+ | `qa_video_path` | Absolute path to the reviewable QA end-to-end proof video recorded with `playwright-cli` for UI-applicable changes, when one was produced. |
279
280
  | `pr_report` | Pull-request report emitted only when `create_pr=true` and the final `pull-request` stage runs. |
280
281
  | `approved` | Whether the reviewer loop approved before completion or optional final handoff. |
281
282
  | `iterations_completed` | Number of research/orchestrate/review loops completed. |
@@ -313,6 +314,7 @@ Result fields:
313
314
  | `preview_file_url` | `file://` URL for the generated `preview.html` file. |
314
315
  | `spec_path` | Absolute path to the generated `spec.html` file. |
315
316
  | `spec_file_url` | `file://` URL for the generated `spec.html` file. |
317
+ | `playwright_cli_status` | Outcome of the initial deterministic step that ensures the `playwright-cli` skill's `playwright-cli` command is installed. |
316
318
 
317
319
  `open-claude-design` has no `result` output; it exposes only the declared fields listed above. Use the declared `artifact` and `handoff` fields for generated content.
318
320
 
@@ -969,7 +971,7 @@ workflow({
969
971
  })
970
972
  ```
971
973
 
972
- Direct mode supports top-level/default options and per-task options such as `context`, `forkFromSessionFile`, `model`, `fallbackModels`, `thinkingLevel`, `tools`, `noTools`, `customTools`, `bashPolicy`, `mcp`, `output`, `outputMode`, `reads`, `worktree`, `gitWorktreeDir`, `baseBranch`, `maxOutput`, `artifacts`, `sessionDir`, `cwd`, and `agentDir`. Direct chains also support `chainName`, `chainDir`, and `failFast`.
974
+ Direct mode supports top-level/default options and per-task options such as `context`, `forkFromSessionFile`, `model`, `fallbackModels`, `thinkingLevel`, `contextWindow`, `tools`, `noTools`, `customTools`, `bashPolicy`, `mcp`, `output`, `outputMode`, `reads`, `worktree`, `gitWorktreeDir`, `baseBranch`, `maxOutput`, `artifacts`, `sessionDir`, `cwd`, and `agentDir`. Direct chains also support `chainName`, `chainDir`, and `failFast`.
973
975
 
974
976
  For large fan-outs, prefer `outputMode: "file-only"` so the parent result contains compact file references instead of full output. Treat intercom payloads from async direct runs as user-visible workflow output.
975
977
 
@@ -1477,7 +1479,8 @@ Common task/stage options include:
1477
1479
  - `prompt` or `task`
1478
1480
  - `previous` for small handoff context; use artifact paths plus `reads` for large outputs, logs, research bundles, or reviewer payloads
1479
1481
  - `context: "fresh" | "fork"`, `forkFromSessionFile`
1480
- - `model`, `fallbackModels`, `thinkingLevel`, `scopedModels`, `modelRegistry` — `model` and each `fallbackModels` entry accept a `model_name:thinking_effort` reasoning suffix; the standalone `thinkingLevel` is deprecated (see [Reasoning levels](#reasoning-levels))
1482
+ - `model`, `fallbackModels`, `thinkingLevel`, `scopedModels`, `modelRegistry` — `model` and each `fallbackModels` entry accept a `model_name:thinking_effort` reasoning suffix and an optional parenthesized context-window token such as `model (1m)` (see [Reasoning levels](#reasoning-levels) and [Context windows](#context-windows)); the standalone `thinkingLevel` is deprecated
1483
+ - `contextWindow`, `contextWindowStrict` — stage-wide context-window budget mapped to the SDK `createAgentSession` options of the same name (non-strict by default)
1481
1484
  - `tools`, `noTools`, `customTools`, `mcp: { allow?: string[], deny?: string[] }`, `bashPolicy`
1482
1485
  - `schema` for a structured final answer from this workflow item
1483
1486
  - `output`, `outputMode`, `reads`, `worktree`, `gitWorktreeDir`, `baseBranch`, `maxOutput`, `artifacts`, `sessionDir`, `cwd`, `agentDir`
@@ -1495,18 +1498,18 @@ await ctx.task("browser-preview", {
1495
1498
  bashPolicy: {
1496
1499
  default: "deny",
1497
1500
  allow: [
1498
- "which browse",
1499
- { prefix: "browse open " },
1500
- { prefix: "browse snapshot" },
1501
+ "which playwright-cli",
1502
+ { prefix: "playwright-cli open " },
1503
+ { prefix: "playwright-cli snapshot" },
1501
1504
  { prefix: "grep " },
1502
1505
  ],
1503
1506
  deny: [{ regex: "\\brm\\b" }],
1504
1507
  },
1505
- prompt: "Open the preview with browse, then summarize the visible state.",
1508
+ prompt: "Open the preview with playwright-cli, then summarize the visible state.",
1506
1509
  });
1507
1510
  ```
1508
1511
 
1509
- A command such as `browse snapshot | grep title` passes only when both segments are allowed, and `browse snapshot\nrm -rf /tmp/proof` cannot be hidden behind a `{ prefix: "browse " }` rule because the newline starts a new segment. Glob rules match command strings rather than filesystem path segments: `*` and `?` may span `/`, so `{ glob: "browse *" }` matches URLs and slash-bearing paths such as `browse http://localhost:3000`, `browse docs/index.html`, and `browse ./preview/output.html` while still matching the whole target rather than `echo browse ...`; escaped bracket-class metacharacters such as `\-`, `\^`, `\]`, `\[`, and `\\` stay literal, while malformed glob ranges such as `{ glob: "echo [z-a]" }` become `invalid-policy` denials. Segment mode accepts literal heads such as `grep`, `./script`, `/usr/bin/env`, `bun`, and `browse`, and treats non-leading `>|` as redirection syntax so `echo ok >|/tmp/out` stays one segment, but conservatively rejects reserved or compound heads (`coproc`, `if`, `for`, `while`, `case`, `{`, `}`, `!`), leading redirections (`>file cmd`, `2>file cmd`, `<file cmd`, `&>file cmd`, `&>>file cmd`, `>|file cmd`, `<&0 cmd`, `>&2 cmd`), redirections attached to the command-head word (`cmd>file`, `cmd>>file`, `cmd>|file`, `cmd2>file`, `cmd>&2`, `cmd</tmp/in`), leading environment assignments (`PATH=/tmp:$PATH browse snapshot`, `LD_PRELOAD=/tmp/x browse snapshot`, `FOO=bar`), dynamic heads such as `$cmd`, `${cmd}`, `r''m`, `r\m`, `~/bin/rm`, `r*m`, `{rm,echo}`, `r$(printf m)`, or backtick-built command names. A single denied, redirection-prefixed, attached-redirection, assignment-prefixed, dynamic, or unrecognized segment blocks the whole command with a model-readable tool error and no UI prompt, so the behavior works in headless workflow runs. Use `match: "whole"` only when raw-command matching is intentional.
1512
+ A command such as `playwright-cli snapshot | grep title` passes only when both segments are allowed, and `playwright-cli snapshot\nrm -rf /tmp/proof` cannot be hidden behind a `{ prefix: "playwright-cli " }` rule because the newline starts a new segment. Glob rules match command strings rather than filesystem path segments: `*` and `?` may span `/`, so `{ glob: "playwright-cli *" }` matches URLs and slash-bearing paths such as `playwright-cli http://localhost:3000`, `playwright-cli docs/index.html`, and `playwright-cli ./preview/output.html` while still matching the whole target rather than `echo playwright-cli ...`; escaped bracket-class metacharacters such as `\-`, `\^`, `\]`, `\[`, and `\\` stay literal, while malformed glob ranges such as `{ glob: "echo [z-a]" }` become `invalid-policy` denials. Segment mode accepts literal heads such as `grep`, `./script`, `/usr/bin/env`, `bun`, and `playwright-cli`, and treats non-leading `>|` as redirection syntax so `echo ok >|/tmp/out` stays one segment, but conservatively rejects reserved or compound heads (`coproc`, `if`, `for`, `while`, `case`, `{`, `}`, `!`), leading redirections (`>file cmd`, `2>file cmd`, `<file cmd`, `&>file cmd`, `&>>file cmd`, `>|file cmd`, `<&0 cmd`, `>&2 cmd`), redirections attached to the command-head word (`cmd>file`, `cmd>>file`, `cmd>|file`, `cmd2>file`, `cmd>&2`, `cmd</tmp/in`), leading environment assignments (`PATH=/tmp:$PATH playwright-cli snapshot`, `LD_PRELOAD=/tmp/x playwright-cli snapshot`, `FOO=bar`), dynamic heads such as `$cmd`, `${cmd}`, `r''m`, `r\m`, `~/bin/rm`, `r*m`, `{rm,echo}`, `r$(printf m)`, or backtick-built command names. A single denied, redirection-prefixed, attached-redirection, assignment-prefixed, dynamic, or unrecognized segment blocks the whole command with a model-readable tool error and no UI prompt, so the behavior works in headless workflow runs. Use `match: "whole"` only when raw-command matching is intentional.
1510
1513
 
1511
1514
  `gitWorktreeDir` selects a reusable Git worktree root for `ctx.stage`, `ctx.task`, `ctx.chain`, and `ctx.parallel`. If the path is missing, Atomic creates it with `git worktree add --detach <path> <baseBranch>`; if it exists, it must be a same-repository worktree root. The default stage cwd becomes the matching cwd inside the worktree and preserves the invoking repo-relative subdirectory. Explicit `cwd` still wins; relative `cwd` values resolve from the worktree cwd, while absolute `cwd` values are used as provided. `gitWorktreeDir` is mutually exclusive with `worktree: true`: use `gitWorktreeDir` for named/reusable worktrees and `worktree: true` for temporary direct-mode worktrees that are cleaned up after the run.
1512
1515
 
@@ -1554,6 +1557,27 @@ The standalone `thinkingLevel` stage option is deprecated. It still applies as a
1554
1557
 
1555
1558
  This applies everywhere a stage accepts a model: direct `ctx.task`/`ctx.chain`/`ctx.parallel` options, `ctx.stage` options, builtin workflow stage definitions, and workflow parameters. `fallbackThinkingLevels` is an optional compatibility helper aligned by index to `fallbackModels`; it applies only to fallback entries that do not already carry a suffix. Each `WorkflowModelAttempt` reports the resolved model and the effective reasoning effort used for that attempt.
1556
1559
 
1560
+ ### Context windows
1561
+
1562
+ A `model`/`fallbackModels` entry may also request a context-window budget with a parenthesized size token in the model-name portion — placed *before* the optional `:reasoning` suffix so it never collides with the reasoning level. This mirrors GitHub Copilot's `Claude Opus 4.8 (1M context)` model-name convention:
1563
+
1564
+ ```ts
1565
+ await ctx.task("review", {
1566
+ task: "Review the diff",
1567
+ model: "anthropic/claude-fable-5:xhigh",
1568
+ // The copilot opus fallback runs at its largest advertised (long-context) window.
1569
+ fallbackModels: ["github-copilot/claude-opus-4.8 (1m):xhigh", "anthropic/claude-opus-4-8:xhigh"],
1570
+ });
1571
+ ```
1572
+
1573
+ The token accepts the same compact sizes as the `--context-window` flag (`1m`, `936k`, `400k`, or a raw token count) and is resolved against that specific candidate model's advertised windows:
1574
+
1575
+ - an exact supported window is used as-is;
1576
+ - otherwise the largest supported window not exceeding the request is selected, so `(1m)` lands on a model's ~936K long-context tier;
1577
+ - when the model exposes no larger tier (or is unavailable), the request is dropped and the session keeps the model's default (short) window — a non-strict, automatic fallback.
1578
+
1579
+ The budget applies only to the candidate that carries the token; other primary and fallback models in the same chain are unaffected. A parenthesized token that is not a valid size (for example `(preview)`) is left attached to the model id rather than being treated as a context window. For stage-wide selection you can instead set the `contextWindow` (and `contextWindowStrict`) stage option, which maps to the SDK `createAgentSession` options of the same name.
1580
+
1557
1581
  ## Programmatic Usage
1558
1582
 
1559
1583
  `@bastani/workflows` is an Atomic package extension. It registers:
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "atomic-extension-custom-provider-anthropic",
3
- "version": "0.79.4",
3
+ "version": "0.79.7",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "atomic-extension-custom-provider-anthropic",
9
- "version": "0.79.4",
9
+ "version": "0.79.7",
10
10
  "dependencies": {
11
11
  "@anthropic-ai/sdk": "^0.52.0"
12
12
  }
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "atomic-extension-custom-provider-anthropic",
3
3
  "private": true,
4
- "version": "0.79.4",
4
+ "version": "0.79.7",
5
5
  "type": "module",
6
6
  "scripts": {
7
7
  "clean": "echo 'nothing to clean'",
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "atomic-extension-custom-provider-gitlab-duo",
3
3
  "private": true,
4
- "version": "0.79.4",
4
+ "version": "0.79.7",
5
5
  "type": "module",
6
6
  "scripts": {
7
7
  "clean": "echo 'nothing to clean'",
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "atomic-extension-gondolin",
3
- "version": "0.79.4",
3
+ "version": "0.79.7",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "atomic-extension-gondolin",
9
- "version": "0.79.4",
9
+ "version": "0.79.7",
10
10
  "dependencies": {
11
11
  "@earendil-works/gondolin": "0.12.0"
12
12
  }
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "atomic-extension-gondolin",
3
3
  "private": true,
4
- "version": "0.79.4",
4
+ "version": "0.79.7",
5
5
  "type": "module",
6
6
  "scripts": {
7
7
  "clean": "echo 'nothing to clean'",