@bastani/atomic 0.8.13 → 0.8.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. package/CHANGELOG.md +23 -0
  2. package/dist/builtin/intercom/package.json +1 -1
  3. package/dist/builtin/mcp/host-html-template.ts +1 -1
  4. package/dist/builtin/mcp/init.ts +15 -2
  5. package/dist/builtin/mcp/mcp-callback-server.ts +10 -9
  6. package/dist/builtin/mcp/package.json +1 -1
  7. package/dist/builtin/mcp/ui-session.ts +9 -6
  8. package/dist/builtin/subagents/CHANGELOG.md +8 -1
  9. package/dist/builtin/subagents/README.md +39 -32
  10. package/dist/builtin/subagents/package.json +1 -1
  11. package/dist/builtin/subagents/skills/subagent/SKILL.md +11 -11
  12. package/dist/builtin/subagents/src/agents/agent-management.ts +6 -1
  13. package/dist/builtin/subagents/src/agents/agent-serializer.ts +2 -0
  14. package/dist/builtin/subagents/src/agents/agents.ts +44 -19
  15. package/dist/builtin/subagents/src/extension/config.ts +16 -0
  16. package/dist/builtin/subagents/src/extension/fanout-child.ts +246 -0
  17. package/dist/builtin/subagents/src/extension/index.ts +466 -603
  18. package/dist/builtin/subagents/src/intercom/intercom-bridge.ts +6 -4
  19. package/dist/builtin/subagents/src/intercom/result-intercom.ts +109 -1
  20. package/dist/builtin/subagents/src/runs/background/async-execution.ts +124 -19
  21. package/dist/builtin/subagents/src/runs/background/async-job-tracker.ts +41 -6
  22. package/dist/builtin/subagents/src/runs/background/async-resume.ts +28 -15
  23. package/dist/builtin/subagents/src/runs/background/async-status.ts +60 -30
  24. package/dist/builtin/subagents/src/runs/background/result-watcher.ts +111 -54
  25. package/dist/builtin/subagents/src/runs/background/run-id-resolver.ts +83 -0
  26. package/dist/builtin/subagents/src/runs/background/run-status.ts +79 -3
  27. package/dist/builtin/subagents/src/runs/background/stale-run-reconciler.ts +46 -1
  28. package/dist/builtin/subagents/src/runs/background/subagent-runner.ts +66 -14
  29. package/dist/builtin/subagents/src/runs/foreground/chain-execution.ts +10 -3
  30. package/dist/builtin/subagents/src/runs/foreground/execution.ts +14 -2
  31. package/dist/builtin/subagents/src/runs/foreground/subagent-executor.ts +320 -23
  32. package/dist/builtin/subagents/src/runs/shared/completion-guard.ts +23 -1
  33. package/dist/builtin/subagents/src/runs/shared/mcp-direct-tool-allowlist.ts +369 -0
  34. package/dist/builtin/subagents/src/runs/shared/nested-events.ts +935 -0
  35. package/dist/builtin/subagents/src/runs/shared/nested-path.ts +52 -0
  36. package/dist/builtin/subagents/src/runs/shared/nested-render.ts +115 -0
  37. package/dist/builtin/subagents/src/runs/shared/parallel-utils.ts +1 -0
  38. package/dist/builtin/subagents/src/runs/shared/pi-args.ts +82 -9
  39. package/dist/builtin/subagents/src/runs/shared/pi-spawn.ts +1 -1
  40. package/dist/builtin/subagents/src/runs/shared/single-output.ts +12 -2
  41. package/dist/builtin/subagents/src/runs/shared/subagent-prompt-runtime.ts +32 -10
  42. package/dist/builtin/subagents/src/runs/shared/worktree.ts +3 -2
  43. package/dist/builtin/subagents/src/shared/artifacts.ts +0 -1
  44. package/dist/builtin/subagents/src/shared/types.ts +96 -1
  45. package/dist/builtin/subagents/src/shared/utils.ts +10 -2
  46. package/dist/builtin/subagents/src/slash/slash-commands.ts +468 -625
  47. package/dist/builtin/subagents/src/tui/render.ts +1227 -2093
  48. package/dist/builtin/web-access/package.json +1 -1
  49. package/dist/builtin/workflows/CHANGELOG.md +24 -0
  50. package/dist/builtin/workflows/README.md +28 -11
  51. package/dist/builtin/workflows/builtin/deep-research-codebase.ts +323 -40
  52. package/dist/builtin/workflows/builtin/ralph.ts +362 -176
  53. package/dist/builtin/workflows/package.json +2 -5
  54. package/dist/builtin/workflows/skills/research-codebase/SKILL.md +1 -1
  55. package/dist/builtin/workflows/skills/skill-creator/LICENSE.txt +202 -0
  56. package/dist/builtin/workflows/skills/skill-creator/SKILL.md +489 -0
  57. package/dist/builtin/workflows/skills/skill-creator/agents/analyzer.md +274 -0
  58. package/dist/builtin/workflows/skills/skill-creator/agents/comparator.md +202 -0
  59. package/dist/builtin/workflows/skills/skill-creator/agents/grader.md +223 -0
  60. package/dist/builtin/workflows/skills/skill-creator/assets/eval_review.html +146 -0
  61. package/dist/builtin/workflows/skills/skill-creator/eval-viewer/generate_review.py +471 -0
  62. package/dist/builtin/workflows/skills/skill-creator/eval-viewer/viewer.html +1325 -0
  63. package/dist/builtin/workflows/skills/skill-creator/references/schemas.md +430 -0
  64. package/dist/builtin/workflows/skills/skill-creator/scripts/__init__.py +0 -0
  65. package/dist/builtin/workflows/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  66. package/dist/builtin/workflows/skills/skill-creator/scripts/generate_report.py +326 -0
  67. package/dist/builtin/workflows/skills/skill-creator/scripts/improve_description.py +247 -0
  68. package/dist/builtin/workflows/skills/skill-creator/scripts/package_skill.py +136 -0
  69. package/dist/builtin/workflows/skills/skill-creator/scripts/quick_validate.py +103 -0
  70. package/dist/builtin/workflows/skills/skill-creator/scripts/run_eval.py +310 -0
  71. package/dist/builtin/workflows/skills/skill-creator/scripts/run_loop.py +328 -0
  72. package/dist/builtin/workflows/skills/skill-creator/scripts/utils.py +47 -0
  73. package/dist/builtin/workflows/src/extension/index.ts +869 -93
  74. package/dist/builtin/workflows/src/extension/render-call.ts +34 -1
  75. package/dist/builtin/workflows/src/extension/render-result.ts +126 -21
  76. package/dist/builtin/workflows/src/extension/runtime.ts +91 -3
  77. package/dist/builtin/workflows/src/extension/wiring.ts +38 -12
  78. package/dist/builtin/workflows/src/extension/workflow-schema.ts +62 -5
  79. package/dist/builtin/workflows/src/runs/background/runner.ts +3 -3
  80. package/dist/builtin/workflows/src/runs/background/status.ts +42 -8
  81. package/dist/builtin/workflows/src/runs/foreground/executor.ts +410 -95
  82. package/dist/builtin/workflows/src/runs/foreground/stage-control-registry.ts +5 -2
  83. package/dist/builtin/workflows/src/runs/foreground/stage-runner.ts +8 -0
  84. package/dist/builtin/workflows/src/runs/shared/model-fallback.ts +6 -4
  85. package/dist/builtin/workflows/src/runs/shared/worktree.ts +3 -2
  86. package/dist/builtin/workflows/src/shared/persistence-restore.ts +138 -5
  87. package/dist/builtin/workflows/src/shared/persistence-session-entries.ts +30 -0
  88. package/dist/builtin/workflows/src/shared/render-inputs-schema.ts +78 -120
  89. package/dist/builtin/workflows/src/shared/stage-ui-broker.ts +193 -0
  90. package/dist/builtin/workflows/src/shared/store-types.ts +26 -1
  91. package/dist/builtin/workflows/src/shared/store.ts +145 -17
  92. package/dist/builtin/workflows/src/shared/timing.ts +6 -2
  93. package/dist/builtin/workflows/src/shared/workflow-failures.ts +375 -0
  94. package/dist/builtin/workflows/src/tui/chat-surface.ts +68 -17
  95. package/dist/builtin/workflows/src/tui/connectors.ts +2 -2
  96. package/dist/builtin/workflows/src/tui/dispatch-confirm.ts +24 -26
  97. package/dist/builtin/workflows/src/tui/graph-canvas.ts +4 -8
  98. package/dist/builtin/workflows/src/tui/graph-view.ts +17 -14
  99. package/dist/builtin/workflows/src/tui/header.ts +38 -0
  100. package/dist/builtin/workflows/src/tui/inline-form-card.ts +161 -238
  101. package/dist/builtin/workflows/src/tui/inline-form-editor.ts +68 -73
  102. package/dist/builtin/workflows/src/tui/inline-form-overlay.ts +2 -3
  103. package/dist/builtin/workflows/src/tui/inline-form-store.ts +2 -1
  104. package/dist/builtin/workflows/src/tui/inputs-overlay.ts +1 -3
  105. package/dist/builtin/workflows/src/tui/inputs-picker.ts +286 -399
  106. package/dist/builtin/workflows/src/tui/keybindings-adapter.ts +11 -0
  107. package/dist/builtin/workflows/src/tui/node-card.ts +2 -1
  108. package/dist/builtin/workflows/src/tui/overlay-adapter.ts +9 -1
  109. package/dist/builtin/workflows/src/tui/prompt-card.ts +46 -19
  110. package/dist/builtin/workflows/src/tui/run-detail.ts +63 -80
  111. package/dist/builtin/workflows/src/tui/session-confirm.ts +9 -3
  112. package/dist/builtin/workflows/src/tui/session-picker.ts +19 -16
  113. package/dist/builtin/workflows/src/tui/stage-chat-layout.ts +88 -0
  114. package/dist/builtin/workflows/src/tui/stage-chat-view.ts +368 -879
  115. package/dist/builtin/workflows/src/tui/status-helpers.ts +4 -0
  116. package/dist/builtin/workflows/src/tui/status-list.ts +67 -75
  117. package/dist/builtin/workflows/src/tui/store-widget-installer.ts +50 -12
  118. package/dist/builtin/workflows/src/tui/submit-pane.ts +164 -0
  119. package/dist/builtin/workflows/src/tui/switcher.ts +27 -4
  120. package/dist/builtin/workflows/src/tui/text-helpers.ts +98 -4
  121. package/dist/builtin/workflows/src/tui/widget.ts +90 -68
  122. package/dist/builtin/workflows/src/tui/workflow-attach-pane.ts +23 -2
  123. package/dist/builtin/workflows/src/tui/workflow-list.ts +44 -68
  124. package/dist/cli/file-processor.d.ts.map +1 -1
  125. package/dist/cli/file-processor.js +2 -3
  126. package/dist/cli/file-processor.js.map +1 -1
  127. package/dist/config.d.ts.map +1 -1
  128. package/dist/config.js +3 -10
  129. package/dist/config.js.map +1 -1
  130. package/dist/core/agent-session-runtime.d.ts.map +1 -1
  131. package/dist/core/agent-session-runtime.js +2 -1
  132. package/dist/core/agent-session-runtime.js.map +1 -1
  133. package/dist/core/agent-session-services.d.ts.map +1 -1
  134. package/dist/core/agent-session-services.js +3 -2
  135. package/dist/core/agent-session-services.js.map +1 -1
  136. package/dist/core/agent-session.d.ts +6 -0
  137. package/dist/core/agent-session.d.ts.map +1 -1
  138. package/dist/core/agent-session.js +16 -2
  139. package/dist/core/agent-session.js.map +1 -1
  140. package/dist/core/atomic-guide-command.d.ts.map +1 -1
  141. package/dist/core/atomic-guide-command.js +8 -9
  142. package/dist/core/atomic-guide-command.js.map +1 -1
  143. package/dist/core/auth-storage.d.ts.map +1 -1
  144. package/dist/core/auth-storage.js +3 -2
  145. package/dist/core/auth-storage.js.map +1 -1
  146. package/dist/core/bash-executor.d.ts.map +1 -1
  147. package/dist/core/bash-executor.js +2 -1
  148. package/dist/core/bash-executor.js.map +1 -1
  149. package/dist/core/export-html/index.d.ts.map +1 -1
  150. package/dist/core/export-html/index.js +8 -6
  151. package/dist/core/export-html/index.js.map +1 -1
  152. package/dist/core/export-html/template.js +6 -3
  153. package/dist/core/extensions/loader.d.ts.map +1 -1
  154. package/dist/core/extensions/loader.js +12 -29
  155. package/dist/core/extensions/loader.js.map +1 -1
  156. package/dist/core/model-registry.d.ts.map +1 -1
  157. package/dist/core/model-registry.js +5 -1
  158. package/dist/core/model-registry.js.map +1 -1
  159. package/dist/core/package-manager.d.ts +8 -0
  160. package/dist/core/package-manager.d.ts.map +1 -1
  161. package/dist/core/package-manager.js +145 -58
  162. package/dist/core/package-manager.js.map +1 -1
  163. package/dist/core/prompt-templates.d.ts.map +1 -1
  164. package/dist/core/prompt-templates.js +6 -20
  165. package/dist/core/prompt-templates.js.map +1 -1
  166. package/dist/core/resource-loader.d.ts.map +1 -1
  167. package/dist/core/resource-loader.js +38 -31
  168. package/dist/core/resource-loader.js.map +1 -1
  169. package/dist/core/sdk.d.ts.map +1 -1
  170. package/dist/core/sdk.js +9 -4
  171. package/dist/core/sdk.js.map +1 -1
  172. package/dist/core/session-manager.d.ts.map +1 -1
  173. package/dist/core/session-manager.js +32 -24
  174. package/dist/core/session-manager.js.map +1 -1
  175. package/dist/core/settings-manager.d.ts.map +1 -1
  176. package/dist/core/settings-manager.js +8 -15
  177. package/dist/core/settings-manager.js.map +1 -1
  178. package/dist/core/skills.d.ts.map +1 -1
  179. package/dist/core/skills.js +8 -22
  180. package/dist/core/skills.js.map +1 -1
  181. package/dist/core/tools/ask-user-question/state/questionnaire-session.d.ts +5 -4
  182. package/dist/core/tools/ask-user-question/state/questionnaire-session.d.ts.map +1 -1
  183. package/dist/core/tools/ask-user-question/state/questionnaire-session.js +34 -11
  184. package/dist/core/tools/ask-user-question/state/questionnaire-session.js.map +1 -1
  185. package/dist/core/tools/ask-user-question/state/selectors/contract.d.ts +1 -0
  186. package/dist/core/tools/ask-user-question/state/selectors/contract.d.ts.map +1 -1
  187. package/dist/core/tools/ask-user-question/state/selectors/contract.js.map +1 -1
  188. package/dist/core/tools/ask-user-question/state/selectors/projections.d.ts.map +1 -1
  189. package/dist/core/tools/ask-user-question/state/selectors/projections.js +1 -0
  190. package/dist/core/tools/ask-user-question/state/selectors/projections.js.map +1 -1
  191. package/dist/core/tools/ask-user-question/state/state-reducer.d.ts +1 -2
  192. package/dist/core/tools/ask-user-question/state/state-reducer.d.ts.map +1 -1
  193. package/dist/core/tools/ask-user-question/state/state-reducer.js +26 -9
  194. package/dist/core/tools/ask-user-question/state/state-reducer.js.map +1 -1
  195. package/dist/core/tools/ask-user-question/state/state.d.ts +4 -0
  196. package/dist/core/tools/ask-user-question/state/state.d.ts.map +1 -1
  197. package/dist/core/tools/ask-user-question/state/state.js.map +1 -1
  198. package/dist/core/tools/ask-user-question/view/components/option-list-view.d.ts +1 -0
  199. package/dist/core/tools/ask-user-question/view/components/option-list-view.d.ts.map +1 -1
  200. package/dist/core/tools/ask-user-question/view/components/option-list-view.js +1 -0
  201. package/dist/core/tools/ask-user-question/view/components/option-list-view.js.map +1 -1
  202. package/dist/core/tools/ask-user-question/view/components/wrapping-select.d.ts +9 -6
  203. package/dist/core/tools/ask-user-question/view/components/wrapping-select.d.ts.map +1 -1
  204. package/dist/core/tools/ask-user-question/view/components/wrapping-select.js +28 -7
  205. package/dist/core/tools/ask-user-question/view/components/wrapping-select.js.map +1 -1
  206. package/dist/core/tools/ask-user-question/view/props-adapter.d.ts.map +1 -1
  207. package/dist/core/tools/ask-user-question/view/props-adapter.js +4 -1
  208. package/dist/core/tools/ask-user-question/view/props-adapter.js.map +1 -1
  209. package/dist/core/tools/bash.d.ts.map +1 -1
  210. package/dist/core/tools/bash.js +56 -53
  211. package/dist/core/tools/bash.js.map +1 -1
  212. package/dist/core/tools/edit-diff.d.ts +3 -1
  213. package/dist/core/tools/edit-diff.d.ts.map +1 -1
  214. package/dist/core/tools/edit-diff.js +8 -1
  215. package/dist/core/tools/edit-diff.js.map +1 -1
  216. package/dist/core/tools/edit.d.ts +3 -1
  217. package/dist/core/tools/edit.d.ts.map +1 -1
  218. package/dist/core/tools/edit.js +44 -81
  219. package/dist/core/tools/edit.js.map +1 -1
  220. package/dist/core/tools/file-mutation-queue.d.ts.map +1 -1
  221. package/dist/core/tools/file-mutation-queue.js +27 -12
  222. package/dist/core/tools/file-mutation-queue.js.map +1 -1
  223. package/dist/core/tools/find.d.ts.map +1 -1
  224. package/dist/core/tools/find.js +2 -3
  225. package/dist/core/tools/find.js.map +1 -1
  226. package/dist/core/tools/grep.d.ts.map +1 -1
  227. package/dist/core/tools/grep.js +3 -3
  228. package/dist/core/tools/grep.js.map +1 -1
  229. package/dist/core/tools/ls.d.ts.map +1 -1
  230. package/dist/core/tools/ls.js +5 -5
  231. package/dist/core/tools/ls.js.map +1 -1
  232. package/dist/core/tools/output-accumulator.d.ts +2 -0
  233. package/dist/core/tools/output-accumulator.d.ts.map +1 -1
  234. package/dist/core/tools/output-accumulator.js +11 -4
  235. package/dist/core/tools/output-accumulator.js.map +1 -1
  236. package/dist/core/tools/path-utils.d.ts +2 -0
  237. package/dist/core/tools/path-utils.d.ts.map +1 -1
  238. package/dist/core/tools/path-utils.js +39 -21
  239. package/dist/core/tools/path-utils.js.map +1 -1
  240. package/dist/core/tools/read.d.ts.map +1 -1
  241. package/dist/core/tools/read.js +9 -8
  242. package/dist/core/tools/read.js.map +1 -1
  243. package/dist/core/tools/truncate.d.ts.map +1 -1
  244. package/dist/core/tools/truncate.js +12 -2
  245. package/dist/core/tools/truncate.js.map +1 -1
  246. package/dist/core/tools/write.d.ts.map +1 -1
  247. package/dist/core/tools/write.js +20 -35
  248. package/dist/core/tools/write.js.map +1 -1
  249. package/dist/index.d.ts +2 -1
  250. package/dist/index.d.ts.map +1 -1
  251. package/dist/index.js +4 -1
  252. package/dist/index.js.map +1 -1
  253. package/dist/main.d.ts.map +1 -1
  254. package/dist/main.js +5 -6
  255. package/dist/main.js.map +1 -1
  256. package/dist/modes/interactive/chat-input-actions.d.ts +24 -0
  257. package/dist/modes/interactive/chat-input-actions.d.ts.map +1 -0
  258. package/dist/modes/interactive/chat-input-actions.js +179 -0
  259. package/dist/modes/interactive/chat-input-actions.js.map +1 -0
  260. package/dist/modes/interactive/components/chat-message-renderer.d.ts +1 -0
  261. package/dist/modes/interactive/components/chat-message-renderer.d.ts.map +1 -1
  262. package/dist/modes/interactive/components/chat-message-renderer.js +14 -3
  263. package/dist/modes/interactive/components/chat-message-renderer.js.map +1 -1
  264. package/dist/modes/interactive/components/chat-session-host.d.ts +157 -0
  265. package/dist/modes/interactive/components/chat-session-host.d.ts.map +1 -0
  266. package/dist/modes/interactive/components/chat-session-host.js +1007 -0
  267. package/dist/modes/interactive/components/chat-session-host.js.map +1 -0
  268. package/dist/modes/interactive/components/config-selector.d.ts.map +1 -1
  269. package/dist/modes/interactive/components/config-selector.js +1 -1
  270. package/dist/modes/interactive/components/config-selector.js.map +1 -1
  271. package/dist/modes/interactive/components/footer.d.ts +1 -0
  272. package/dist/modes/interactive/components/footer.d.ts.map +1 -1
  273. package/dist/modes/interactive/components/footer.js +14 -5
  274. package/dist/modes/interactive/components/footer.js.map +1 -1
  275. package/dist/modes/interactive/components/index.d.ts +1 -0
  276. package/dist/modes/interactive/components/index.d.ts.map +1 -1
  277. package/dist/modes/interactive/components/index.js +1 -0
  278. package/dist/modes/interactive/components/index.js.map +1 -1
  279. package/dist/modes/interactive/components/login-dialog.d.ts +9 -1
  280. package/dist/modes/interactive/components/login-dialog.d.ts.map +1 -1
  281. package/dist/modes/interactive/components/login-dialog.js +29 -4
  282. package/dist/modes/interactive/components/login-dialog.js.map +1 -1
  283. package/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
  284. package/dist/modes/interactive/interactive-mode.js +18 -67
  285. package/dist/modes/interactive/interactive-mode.js.map +1 -1
  286. package/dist/utils/child-process.d.ts +1 -0
  287. package/dist/utils/child-process.d.ts.map +1 -1
  288. package/dist/utils/child-process.js +8 -0
  289. package/dist/utils/child-process.js.map +1 -1
  290. package/dist/utils/clipboard-native.d.ts +3 -1
  291. package/dist/utils/clipboard-native.d.ts.map +1 -1
  292. package/dist/utils/clipboard-native.js +14 -8
  293. package/dist/utils/clipboard-native.js.map +1 -1
  294. package/dist/utils/image-resize-core.d.ts +30 -0
  295. package/dist/utils/image-resize-core.d.ts.map +1 -0
  296. package/dist/utils/image-resize-core.js +124 -0
  297. package/dist/utils/image-resize-core.js.map +1 -0
  298. package/dist/utils/image-resize-worker.d.ts +2 -0
  299. package/dist/utils/image-resize-worker.d.ts.map +1 -0
  300. package/dist/utils/image-resize-worker.js +31 -0
  301. package/dist/utils/image-resize-worker.js.map +1 -0
  302. package/dist/utils/image-resize.d.ts +7 -27
  303. package/dist/utils/image-resize.d.ts.map +1 -1
  304. package/dist/utils/image-resize.js +75 -115
  305. package/dist/utils/image-resize.js.map +1 -1
  306. package/dist/utils/paths.d.ts +16 -1
  307. package/dist/utils/paths.d.ts.map +1 -1
  308. package/dist/utils/paths.js +49 -7
  309. package/dist/utils/paths.js.map +1 -1
  310. package/docs/changelog.mdx +29 -0
  311. package/docs/compaction.md +1 -1
  312. package/docs/custom-provider.md +2 -2
  313. package/docs/development.md +1 -1
  314. package/docs/docs.json +98 -143
  315. package/docs/extensions.md +29 -16
  316. package/docs/favicon.svg +29 -0
  317. package/docs/images/interactive-mode.png +0 -0
  318. package/docs/images/tree-view.png +0 -0
  319. package/docs/images/workflow-command.png +0 -0
  320. package/docs/images/workflow-graph.png +0 -0
  321. package/docs/images/workflow-input-picker.png +0 -0
  322. package/docs/images/workflow-list.png +0 -0
  323. package/docs/index.md +10 -1
  324. package/docs/logo.svg +59 -0
  325. package/docs/packages.md +3 -3
  326. package/docs/providers.md +1 -1
  327. package/docs/quickstart.md +98 -2
  328. package/docs/rpc.md +8 -8
  329. package/docs/sdk.md +23 -12
  330. package/docs/sessions.md +1 -1
  331. package/docs/skills.md +15 -1
  332. package/docs/termux.md +11 -1
  333. package/docs/themes.md +6 -6
  334. package/docs/tui.md +18 -18
  335. package/docs/usage.md +1 -1
  336. package/docs/workflows.md +172 -2
  337. package/examples/extensions/subagent/index.ts +2 -1
  338. package/package.json +6 -6
  339. /package/dist/builtin/{workflows → subagents}/skills/playwright-cli/SKILL.md +0 -0
  340. /package/dist/builtin/{workflows → subagents}/skills/playwright-cli/references/element-attributes.md +0 -0
  341. /package/dist/builtin/{workflows → subagents}/skills/playwright-cli/references/playwright-tests.md +0 -0
  342. /package/dist/builtin/{workflows → subagents}/skills/playwright-cli/references/request-mocking.md +0 -0
  343. /package/dist/builtin/{workflows → subagents}/skills/playwright-cli/references/running-code.md +0 -0
  344. /package/dist/builtin/{workflows → subagents}/skills/playwright-cli/references/session-management.md +0 -0
  345. /package/dist/builtin/{workflows → subagents}/skills/playwright-cli/references/spec-driven-testing.md +0 -0
  346. /package/dist/builtin/{workflows → subagents}/skills/playwright-cli/references/storage-state.md +0 -0
  347. /package/dist/builtin/{workflows → subagents}/skills/playwright-cli/references/test-generation.md +0 -0
  348. /package/dist/builtin/{workflows → subagents}/skills/playwright-cli/references/tracing.md +0 -0
  349. /package/dist/builtin/{workflows → subagents}/skills/playwright-cli/references/video-recording.md +0 -0
  350. /package/dist/builtin/{workflows → subagents}/skills/tdd/SKILL.md +0 -0
  351. /package/dist/builtin/{workflows → subagents}/skills/tdd/deep-modules.md +0 -0
  352. /package/dist/builtin/{workflows → subagents}/skills/tdd/interface-design.md +0 -0
  353. /package/dist/builtin/{workflows → subagents}/skills/tdd/mocking.md +0 -0
  354. /package/dist/builtin/{workflows → subagents}/skills/tdd/refactoring.md +0 -0
  355. /package/dist/builtin/{workflows → subagents}/skills/tdd/tests.md +0 -0
@@ -7,10 +7,16 @@
7
7
  * iteration feeds review findings into the next planner with ctx.task().
8
8
  */
9
9
 
10
+ import { mkdir, mkdtemp, writeFile } from "node:fs/promises";
11
+ import { tmpdir } from "node:os";
12
+ import { dirname, extname, join } from "node:path";
10
13
  import { defineWorkflow } from "../src/index.js";
11
14
  import type { WorkflowTaskResult } from "../src/shared/types.js";
12
15
 
13
16
  const DEFAULT_MAX_LOOPS = 10;
17
+ const DEFAULT_SPEC_DIR = "specs";
18
+ const IMPLEMENTATION_NOTES_FILENAME = "implementation-notes.md";
19
+ const MAX_SPEC_SLUG_LENGTH = 80;
14
20
 
15
21
  type ReviewFinding = {
16
22
  readonly title: string;
@@ -225,6 +231,82 @@ function positiveInteger(value: number | undefined, fallback: number): number {
225
231
  : fallback;
226
232
  }
227
233
 
234
+ function normalizeBranchInput(
235
+ value: string | undefined,
236
+ fallback: string,
237
+ ): string {
238
+ const trimmed = value?.trim();
239
+ if (!trimmed) return fallback;
240
+
241
+ const looksLikeSafeGitRef =
242
+ /^(?!-)(?!.*(?:\.\.|@\{|\/\/|\.lock(?:\/|$)))[A-Za-z0-9][A-Za-z0-9._/@+-]*$/.test(
243
+ trimmed,
244
+ );
245
+ return looksLikeSafeGitRef ? trimmed : fallback;
246
+ }
247
+
248
+ function slugifySpecTopic(prompt: string): string {
249
+ const slug = prompt
250
+ .toLowerCase()
251
+ .replace(/[^a-z0-9]+/g, "-")
252
+ .replace(/^-+|-+$/g, "")
253
+ .slice(0, MAX_SPEC_SLUG_LENGTH)
254
+ .replace(/-+$/g, "");
255
+ return slug.length > 0 ? slug : "plan";
256
+ }
257
+
258
+ function defaultSpecPath(prompt: string, now = new Date()): string {
259
+ const date = now.toISOString().slice(0, 10);
260
+ return join(DEFAULT_SPEC_DIR, `${date}-${slugifySpecTopic(prompt)}.md`);
261
+ }
262
+
263
+ function suffixedPath(path: string, suffix: number): string {
264
+ const extension = extname(path);
265
+ const stem = extension.length === 0 ? path : path.slice(0, -extension.length);
266
+ return `${stem}-${suffix}${extension}`;
267
+ }
268
+
269
+ function isFileExistsError(error: unknown): boolean {
270
+ return error instanceof Error && (error as { readonly code?: string }).code === "EEXIST";
271
+ }
272
+
273
+ async function writeSpecFile(path: string, content: string): Promise<string> {
274
+ await mkdir(dirname(path), { recursive: true });
275
+
276
+ for (let suffix = 0; ; suffix += 1) {
277
+ const candidate = suffix === 0 ? path : suffixedPath(path, suffix + 1);
278
+ try {
279
+ await writeFile(candidate, content.endsWith("\n") ? content : `${content}\n`, {
280
+ encoding: "utf8",
281
+ flag: "wx",
282
+ });
283
+ return candidate;
284
+ } catch (error) {
285
+ if (isFileExistsError(error)) continue;
286
+ throw error;
287
+ }
288
+ }
289
+ }
290
+
291
+ async function createImplementationNotesFile(prompt: string): Promise<string> {
292
+ const notesDir = await mkdtemp(join(tmpdir(), "atomic-ralph-notes-"));
293
+ const notesPath = join(notesDir, IMPLEMENTATION_NOTES_FILENAME);
294
+ const initialNotes = [
295
+ "# Implementation Notes",
296
+ "",
297
+ `Task: ${prompt || "(empty prompt)"}`,
298
+ "",
299
+ "## Running Notes",
300
+ "",
301
+ "- Record implementation decisions, deviations from the spec, tradeoffs, blockers, validation notes, and anything else the user should know.",
302
+ ].join("\n");
303
+ await writeFile(notesPath, `${initialNotes}\n`, {
304
+ encoding: "utf8",
305
+ flag: "wx",
306
+ });
307
+ return notesPath;
308
+ }
309
+
228
310
  function parseReviewDecision(text: string): ReviewDecision | undefined {
229
311
  try {
230
312
  const parsed = JSON.parse(text) as Partial<ReviewDecision>;
@@ -274,15 +356,22 @@ function reviewerErrorResult(
274
356
  },
275
357
  };
276
358
  return {
277
- name: `reviewer-${iteration}-error`,
278
- stageName: `reviewer-${iteration}-error`,
359
+ name: "reviewer-error",
360
+ stageName: "reviewer-error",
279
361
  text: JSON.stringify(decision, null, 2),
280
362
  };
281
363
  }
282
364
 
365
+ function discoveryContextLabel(name: string | undefined): string {
366
+ if (name?.startsWith("infra-locate-")) return "Infrastructure locator";
367
+ if (name?.startsWith("infra-analyze-")) return "Infrastructure analyzer";
368
+ if (name?.startsWith("infra-patterns-")) return "Infrastructure pattern finder";
369
+ return "Infrastructure discovery";
370
+ }
371
+
283
372
  function formatDiscovery(results: readonly WorkflowTaskResult[]): string {
284
373
  return results
285
- .map((result) => `### ${result.name}\n\n${result.text}`)
374
+ .map((result) => `### ${discoveryContextLabel(result.name)}\n\n${result.text}`)
286
375
  .join("\n\n---\n\n");
287
376
  }
288
377
 
@@ -306,17 +395,28 @@ export default defineWorkflow("ralph")
306
395
  default: DEFAULT_MAX_LOOPS,
307
396
  description: `Maximum plan/orchestrate/review iterations (default ${DEFAULT_MAX_LOOPS}).`,
308
397
  })
398
+ .input("base_branch", {
399
+ type: "string",
400
+ default: "origin/main",
401
+ description:
402
+ "Branch reviewers compare the current code delta against (default origin/main).",
403
+ })
309
404
  .run(async (ctx) => {
310
405
  const inputs = ctx.inputs as {
311
406
  prompt?: string;
312
407
  max_loops?: number;
408
+ base_branch?: string;
313
409
  };
314
410
  const prompt = inputs.prompt ?? "";
315
411
  const maxLoops = positiveInteger(inputs.max_loops, DEFAULT_MAX_LOOPS);
412
+ const comparisonBaseBranch = normalizeBranchInput(inputs.base_branch, "origin/main");
316
413
 
317
414
  let reviewReport = "";
318
415
  let finalPlan = "";
416
+ let finalPlanPath = "";
319
417
  let finalResult = "";
418
+ let finalPrReport = "";
419
+ const implementationNotesPath = await createImplementationNotesFile(prompt);
320
420
  let approved = false;
321
421
  let iterationsCompleted = 0;
322
422
 
@@ -398,172 +498,188 @@ export default defineWorkflow("ralph")
398
498
  for (let iteration = 1; iteration <= maxLoops; iteration += 1) {
399
499
  iterationsCompleted = iteration;
400
500
 
401
- const planAndExecute = await ctx.chain(
402
- [
403
- {
404
- name: `planner-${iteration}`,
405
- task: taggedPrompt([
406
- [
407
- "role",
408
- "You are a technical architect. Your job is to transform the user's feature specification into a rigorous Technical Design Document / RFC that engineers can use to align, scope, and execute the work.",
409
- ],
410
- [
411
- "critical_deliverable",
412
- [
413
- "Your final output is a filled-in RFC rendered as markdown text.",
414
- "Render the RFC Template in this prompt with every section populated by feature-specific content drawn from the user's specification and your codebase investigation.",
415
- "Do not implement code changes in this stage; this stage only investigates and authors the RFC.",
416
- ].join("\n"),
417
- ],
418
- [
419
- "task",
420
- `Plan iteration ${iteration}/${maxLoops} for this user specification:\n${prompt}`,
421
- ],
422
- [
423
- "previous_review_findings",
424
- reviewReport
425
- ? "Previous review findings:\n{previous}"
426
- : "No prior review findings; this is the first iteration.",
427
- ],
428
- [
429
- "short_circuit",
430
- [
431
- "If the user specification is a file path instead of raw prose, and it explicitly asks you to forward or use that path rather than author an RFC, output only the absolute path and stop.",
432
- "Otherwise, author the RFC normally.",
433
- ].join("\n"),
434
- ],
435
- [
436
- "investigation_phase",
437
- [
438
- "Before drafting, read the specification carefully and identify the concrete problem, success criteria, hard constraints, and non-goals.",
439
- "Survey the codebase using file/search tools such as read plus grep/rg/find/glob-style shell commands to ground the RFC in current architecture.",
440
- "Name concrete services, modules, files, tests, data models, APIs, CLIs, config files, and external integrations this work will touch.",
441
- "Capture metadata with bash: `git config user.name` for Author(s), and `date '+%Y-%m-%d'` for Created / Last Updated.",
442
- "Look for prior art: existing RFCs, ADRs, README files, specs, docs, tests, or code comments that explain why the current state exists.",
443
- ].join("\n"),
444
- ],
445
- [
446
- "authoring_principles",
447
- [
448
- "Be specific: `src/server/auth.ts:42` beats `the auth layer`.",
449
- "Trade-offs over conclusions: Alternatives Considered must include at least two real alternatives with honest pros, cons, and rejection reasons.",
450
- "Non-goals matter: explicitly exclude work that is out of scope to prevent scope creep.",
451
- "Diagrams are load-bearing: Section 4.1 must include a Mermaid system architecture diagram grounded in real components.",
452
- "Surface open questions in Section 9 with owner placeholders such as `[OWNER: infra team]`; do not paper over uncertainty.",
453
- "Match depth to stakes: a small refactor can be concise, but every template section header must remain present.",
454
- "If prior review findings are present, explicitly address each finding or explain why it is obsolete.",
455
- ].join("\n"),
456
- ],
457
- [
458
- "stage_contract",
459
- [
460
- "This stage is investigation-first RFC authoring. The RFC is only valid if it is grounded in repository inspection performed during this stage.",
461
- "Do not fill the template from generic architecture guesses. Before writing the final RFC, inspect relevant code, docs, tests, configs, and prior design material.",
462
- "Treat the output format as the report after investigation, not a substitute for investigation.",
463
- ].join("\n"),
464
- ],
465
- [
466
- "evidence_expectations",
467
- [
468
- "Every major design claim should be traceable to concrete evidence: file paths, symbols, commands, docs, tests, configs, or prior RFCs.",
469
- "Include those concrete references inside the RFC sections where they support the design.",
470
- "If expected evidence cannot be found, say so in the relevant RFC section or Open Questions rather than papering over the gap.",
471
- ].join("\n"),
472
- ],
473
- [
474
- "output_discipline",
475
- [
476
- "Render the RFC Template exactly as the final document structure: preserve every header and the metadata table.",
477
- "Replace instructional placeholders with real, feature-specific content; do not leave template guidance in the final RFC.",
478
- "Output nothing after the RFC: no meta-commentary, no summary of what you wrote, no implementation log.",
479
- ].join("\n"),
480
- ],
481
- ["rfc_template", PLANNER_RFC_TEMPLATE],
482
- ]),
483
- ...(reviewReport
484
- ? { previous: { name: "review-report", text: reviewReport } }
485
- : {}),
486
- ...plannerModelConfig,
487
- },
488
- {
489
- name: `orchestrator-${iteration}`,
490
- task: taggedPrompt([
491
- [
492
- "role",
493
- "You are a sub-agent orchestrator with many tools available. Your primary implementation tool is the `subagent` tool.",
494
- ],
495
- [
496
- "objective",
497
- `Implement iteration ${iteration}/${maxLoops} for the task: ${prompt}`,
498
- ],
499
- ["planner_notes", "{previous}"],
500
- [
501
- "delegation_policy",
502
- [
503
- "You are not the implementer. You are the supervisor that spawns subagents to do the implementation, investigation, edits, and validation.",
504
- "All non-trivial operations must be delegated to subagents via the `subagent` tool before you claim progress.",
505
- "Delegate codebase understanding, impact analysis, and implementation research to codebase-locator, codebase-analyzer, and pattern-finder style subagents when available.",
506
- "Delegate shell-heavy work — especially commands likely to produce lots of output, log digging, CLI investigation, and broad grep/find exploration — to subagents that can run those commands rather than doing it in this orchestrator context.",
507
- "Delegate implementation edits to a focused subagent with clear files, constraints, and validation expectations; do not merely describe the edits yourself.",
508
- "Use separate subagents for separate tasks, and launch independent subagents in parallel when useful.",
509
- "Do not split highly overlapping tasks across multiple subagents; consolidate overlapping work into one focused delegation to avoid duplicate effort.",
510
- "If a subagent takes a long time, do not attempt to do its assigned job yourself while waiting. Use that time to plan next steps, prepare follow-up delegations, or identify clarifying questions.",
511
- ].join("\n"),
512
- ],
513
- [
514
- "execution_contract",
515
- [
516
- "The required output format is a completion report, not the task itself.",
517
- "Do not jump straight to the report. First spawn the necessary subagents, wait for their results, coordinate any follow-up subagents, and only then write the report.",
518
- "A valid response must be grounded in actual subagent work: name the delegated work, summarize what each subagent did, and distinguish completed changes from recommendations or blockers.",
519
- "If you cannot spawn or use subagents, treat that as a blocker and report it honestly instead of pretending the requested work was done.",
520
- ].join("\n"),
521
- ],
522
- [
523
- "subagent_tracking",
524
- [
525
- "Use the `todo` tool as your active control ledger for subagent work.",
526
- "Before launching subagents, create todo items for each delegated task with enough detail to identify owner, purpose, and expected output.",
527
- "Mark todo items in_progress when the corresponding subagent starts, append progress/results as subagents report back, and close them only after you have incorporated or explicitly rejected their result.",
528
- "Keep pending, in_progress, blocked, and completed work accurate so you do not lose track of parallel subagents or unresolved follow-ups.",
529
- "Before writing the final report, review the todo list and resolve every pending/in_progress item as completed, blocked, or deferred with an explanation.",
530
- ].join("\n"),
531
- ],
532
- [
533
- "instructions",
534
- [
535
- "Start from the planner notes and decompose the work into delegated subagent tasks.",
536
- "Pass each subagent the relevant task, constraints, files, validation expectations, and any prior review findings.",
537
- "Coordinate subagent results into the smallest coherent set of changes that satisfies the planner notes.",
538
- "Preserve existing architecture and repository conventions unless the plan explicitly justifies a change.",
539
- "Run or delegate the most relevant validation commands available in the repository.",
540
- "If blocked, describe the blocker and the safest partial state instead of inventing success.",
541
- "Do not hide failures; reviewers need accurate status.",
542
- ].join("\n"),
543
- ],
544
- [
545
- "output_format",
546
- [
547
- "After subagents have done the work, return Markdown with headings:",
548
- "1. Delegations performed — subagents spawned and what each completed",
549
- "2. Changes made — concrete changes from subagent work, not intentions",
550
- "3. Files touched",
551
- "4. Validation run / recommended",
552
- "5. Deferred work or blockers",
553
- ].join("\n"),
554
- ],
555
- ]),
556
- ...orchestratorModelConfig,
557
- },
558
- ],
559
- { task: prompt },
560
- );
561
- const planner = planAndExecute[0]!;
562
- const orchestrator = planAndExecute[1]!;
501
+ const planner = await ctx.task(`planner-${iteration}`, {
502
+ prompt: taggedPrompt([
503
+ [
504
+ "role",
505
+ "You are a technical architect. Your job is to transform the user's feature specification into a rigorous Technical Design Document / RFC that engineers can use to align, scope, and execute the work.",
506
+ ],
507
+ [
508
+ "critical_deliverable",
509
+ [
510
+ "Your final output is a filled-in RFC rendered as markdown text.",
511
+ "Render the RFC Template in this prompt with every section populated by feature-specific content drawn from the user's specification and your codebase investigation.",
512
+ "Do not implement code changes in this stage; this stage only investigates and authors the RFC.",
513
+ ].join("\n"),
514
+ ],
515
+ [
516
+ "task",
517
+ `Plan iteration ${iteration}/${maxLoops} for this user specification:\n${prompt}`,
518
+ ],
519
+ [
520
+ "previous_review_findings",
521
+ reviewReport
522
+ ? "Previous review findings:\n{previous}"
523
+ : "No prior review findings; this is the first iteration.",
524
+ ],
525
+ [
526
+ "input_spec_files",
527
+ [
528
+ "If the user specification is a file path instead of raw prose, read that file and use it as source material for the RFC.",
529
+ "Still author the RFC normally; do not output only a forwarded path.",
530
+ ].join("\n"),
531
+ ],
532
+ [
533
+ "investigation_phase",
534
+ [
535
+ "Before drafting, read the specification carefully and identify the concrete problem, success criteria, hard constraints, and non-goals.",
536
+ "Survey the codebase using file/search tools such as read plus grep/rg/find/glob-style shell commands to ground the RFC in current architecture.",
537
+ "Name concrete services, modules, files, tests, data models, APIs, CLIs, config files, and external integrations this work will touch.",
538
+ "Capture metadata with bash: `git config user.name` for Author(s), and `date '+%Y-%m-%d'` for Created / Last Updated.",
539
+ "Look for prior art: existing RFCs, ADRs, README files, specs, docs, tests, or code comments that explain why the current state exists.",
540
+ ].join("\n"),
541
+ ],
542
+ [
543
+ "authoring_principles",
544
+ [
545
+ "Be specific: `src/server/auth.ts:42` beats `the auth layer`.",
546
+ "Trade-offs over conclusions: Alternatives Considered must include at least two real alternatives with honest pros, cons, and rejection reasons.",
547
+ "Non-goals matter: explicitly exclude work that is out of scope to prevent scope creep.",
548
+ "Diagrams are load-bearing: Section 4.1 must include a Mermaid system architecture diagram grounded in real components.",
549
+ "Surface open questions in Section 9 with owner placeholders such as `[OWNER: infra team]`; do not paper over uncertainty.",
550
+ "Match depth to stakes: a small refactor can be concise, but every template section header must remain present.",
551
+ "If prior review findings are present, explicitly address each finding or explain why it is obsolete.",
552
+ ].join("\n"),
553
+ ],
554
+ [
555
+ "stage_contract",
556
+ [
557
+ "This stage is investigation-first RFC authoring. The RFC is only valid if it is grounded in repository inspection performed during this stage.",
558
+ "Do not fill the template from generic architecture guesses. Before writing the final RFC, inspect relevant code, docs, tests, configs, and prior design material.",
559
+ "Treat the output format as the report after investigation, not a substitute for investigation.",
560
+ ].join("\n"),
561
+ ],
562
+ [
563
+ "evidence_expectations",
564
+ [
565
+ "Every major design claim should be traceable to concrete evidence: file paths, symbols, commands, docs, tests, configs, or prior RFCs.",
566
+ "Include those concrete references inside the RFC sections where they support the design.",
567
+ "If expected evidence cannot be found, say so in the relevant RFC section or Open Questions rather than papering over the gap.",
568
+ ].join("\n"),
569
+ ],
570
+ [
571
+ "output_discipline",
572
+ [
573
+ "Render the RFC Template exactly as the final document structure: preserve every header and the metadata table.",
574
+ "Replace instructional placeholders with real, feature-specific content; do not leave template guidance in the final RFC.",
575
+ "Output nothing after the RFC: no meta-commentary, no summary of what you wrote, no implementation log.",
576
+ ].join("\n"),
577
+ ],
578
+ ["rfc_template", PLANNER_RFC_TEMPLATE],
579
+ ]),
580
+ ...(reviewReport
581
+ ? { previous: { name: "review-report", text: reviewReport } }
582
+ : {}),
583
+ ...plannerModelConfig,
584
+ });
563
585
  finalPlan = planner.text;
586
+ const specPath = await writeSpecFile(defaultSpecPath(prompt), planner.text);
587
+ finalPlanPath = specPath;
588
+
589
+ const orchestrator = await ctx.task(`orchestrator-${iteration}`, {
590
+ prompt: taggedPrompt([
591
+ [
592
+ "role",
593
+ "You are a sub-agent orchestrator with many tools available. Your primary implementation tool is the `subagent` tool.",
594
+ ],
595
+ [
596
+ "objective",
597
+ `Implement iteration ${iteration}/${maxLoops} for the task: ${prompt}`,
598
+ ],
599
+ [
600
+ "spec_file",
601
+ [
602
+ `The technical specification for this iteration was written to: ${specPath}`,
603
+ "Read this file before delegating or implementing anything.",
604
+ "Do not rely on an inline planner transcript; the spec file is the authoritative plan for this iteration.",
605
+ ].join("\n"),
606
+ ],
607
+ [
608
+ "implementation_notes",
609
+ [
610
+ `Keep a running Markdown implementation notes file at this OS temp directory path: ${implementationNotesPath}`,
611
+ "The file has already been initialized for this workflow run; update it while you implement the spec.",
612
+ "Record decisions you had to make that were not in the spec, things you had to change from the spec, tradeoffs you had to make, blockers, validation outcomes, and anything else the user should know.",
613
+ "Ask delegated subagents to report any notes-worthy decisions or tradeoffs back to you, then consolidate them into this file before your final report.",
614
+ "Do not include secrets, credentials, tokens, or unrelated environment details in the notes file.",
615
+ ].join("\n"),
616
+ ],
617
+ [
618
+ "delegation_policy",
619
+ [
620
+ "You are not the implementer. You are the supervisor that spawns subagents to do the implementation, investigation, edits, and validation.",
621
+ "All non-trivial operations must be delegated to subagents via the `subagent` tool before you claim progress.",
622
+ "Delegate codebase understanding, impact analysis, and implementation research to codebase-locator, codebase-analyzer, and pattern-finder style subagents when available.",
623
+ "Delegate shell-heavy work — especially commands likely to produce lots of output, log digging, CLI investigation, and broad grep/find exploration — to subagents that can run those commands rather than doing it in this orchestrator context.",
624
+ "Delegate implementation edits to a focused subagent with clear files, constraints, and validation expectations; do not merely describe the edits yourself.",
625
+ "Use separate subagents for separate tasks, and launch independent subagents in parallel when useful.",
626
+ "Do not split highly overlapping tasks across multiple subagents; consolidate overlapping work into one focused delegation to avoid duplicate effort.",
627
+ "If a subagent takes a long time, do not attempt to do its assigned job yourself while waiting. Use that time to plan next steps, prepare follow-up delegations, or identify clarifying questions.",
628
+ ].join("\n"),
629
+ ],
630
+ [
631
+ "execution_contract",
632
+ [
633
+ "The required output format is a completion report, not the task itself.",
634
+ "Do not jump straight to the report. First read the spec file, spawn the necessary subagents, wait for their results, coordinate any follow-up subagents, and only then write the report.",
635
+ "A valid response must be grounded in actual subagent work: name the delegated work, summarize what each subagent did, and distinguish completed changes from recommendations or blockers.",
636
+ "If you cannot read the spec file, spawn subagents, or use subagents, treat that as a blocker and report it honestly instead of pretending the requested work was done.",
637
+ ].join("\n"),
638
+ ],
639
+ [
640
+ "subagent_tracking",
641
+ [
642
+ "Use the `todo` tool as your active control ledger for subagent work.",
643
+ "Before launching subagents, create todo items for each delegated task with enough detail to identify owner, purpose, and expected output.",
644
+ "Mark todo items in_progress when the corresponding subagent starts, append progress/results as subagents report back, and close them only after you have incorporated or explicitly rejected their result.",
645
+ "Keep pending, in_progress, blocked, and completed work accurate so you do not lose track of parallel subagents or unresolved follow-ups.",
646
+ "Before writing the final report, review the todo list and resolve every pending/in_progress item as completed, blocked, or deferred with an explanation.",
647
+ ].join("\n"),
648
+ ],
649
+ [
650
+ "instructions",
651
+ [
652
+ `Start by reading the spec file at ${specPath}.`,
653
+ "Decompose the work into delegated subagent tasks based on that spec file.",
654
+ "Pass each subagent the relevant task, constraints, files, validation expectations, any prior review findings from the spec, and instructions to report implementation-note-worthy decisions or tradeoffs.",
655
+ "Coordinate subagent results into the smallest coherent set of changes that satisfies the spec.",
656
+ "Preserve existing architecture and repository conventions unless the spec explicitly justifies a change.",
657
+ "Run or delegate the most relevant validation commands available in the repository.",
658
+ `Before your final report, update the running implementation notes file at ${implementationNotesPath} with decisions, spec deviations, tradeoffs, blockers, and validation outcomes from this iteration.`,
659
+ "If blocked, describe the blocker and the safest partial state instead of inventing success.",
660
+ "Do not hide failures; reviewers need accurate status.",
661
+ ].join("\n"),
662
+ ],
663
+ [
664
+ "output_format",
665
+ [
666
+ "After subagents have done the work, return Markdown with headings:",
667
+ "1. Spec file — the path you read",
668
+ "2. Delegations performed — subagents spawned and what each completed",
669
+ "3. Changes made — concrete changes from subagent work, not intentions",
670
+ "4. Files touched",
671
+ "5. Validation run / recommended",
672
+ "6. Deferred work or blockers",
673
+ "7. Implementation notes — confirm the OS temp notes path was updated",
674
+ ].join("\n"),
675
+ ],
676
+ ]),
677
+ reads: [specPath, implementationNotesPath],
678
+ ...orchestratorModelConfig,
679
+ });
564
680
  finalResult = orchestrator.text;
565
681
 
566
- const simplifier = await ctx.task(`code-simplifier-${iteration}`, {
682
+ await ctx.task(`code-simplifier-${iteration}`, {
567
683
  prompt: taggedPrompt([
568
684
  [
569
685
  "role",
@@ -795,10 +911,16 @@ export default defineWorkflow("ralph")
795
911
  ],
796
912
  [
797
913
  "objective",
798
- `Review iteration ${iteration}/${maxLoops} for the task: ${prompt}`,
914
+ `Review the current code delta for the task: ${prompt}`,
915
+ ],
916
+ [
917
+ "comparison_baseline",
918
+ [
919
+ `The baseline branch for comparison is \`${comparisonBaseBranch}\`.`,
920
+ "Compare the current working tree against this baseline branch, not against previous workflow reasoning or expected loop progress.",
921
+ `Start with \`git status --short\`, then use working-tree-aware commands such as \`git diff ${comparisonBaseBranch}\` and \`git diff --cached ${comparisonBaseBranch}\` to identify changed tracked files; inspect untracked files from status directly.`,
922
+ ].join("\n"),
799
923
  ],
800
- ["latest_orchestrator_result", orchestrator.text],
801
- ["latest_simplifier_result", simplifier.text],
802
924
  ["infrastructure_discovery", discoveryContext],
803
925
  [
804
926
  "project_guidance",
@@ -826,7 +948,7 @@ export default defineWorkflow("ralph")
826
948
  "A finding should meaningfully impact accuracy, performance, security, or maintainability.",
827
949
  "A finding must be discrete and actionable, not a broad complaint about the whole codebase or a pile of related concerns.",
828
950
  "Do not demand rigor inconsistent with the rest of the repository; match the seriousness of existing code and project norms.",
829
- "Flag only bugs introduced by this iteration's patch; do not flag pre-existing issues unless the patch makes them worse in a concrete way.",
951
+ "Flag only bugs introduced by the current patch; do not flag pre-existing issues unless the patch makes them worse in a concrete way.",
830
952
  "Do not rely on unstated assumptions about author intent or codebase behavior.",
831
953
  "Speculation is insufficient: identify the code path, scenario, environment, or input that is provably affected.",
832
954
  "Do not flag intentional behavior changes as bugs unless they clearly violate the task or documented contract.",
@@ -858,8 +980,8 @@ export default defineWorkflow("ralph")
858
980
  [
859
981
  "review_stage_contract",
860
982
  [
861
- "The structured review decision is only valid after you inspect the actual repository state for this iteration.",
862
- "Do not approve based solely on orchestrator, simplifier, or discovery summaries.",
983
+ "The structured review decision is only valid after you inspect the actual repository state and compare it against the stated baseline branch.",
984
+ "Do not approve based solely on workflow stage summaries or prior agent reasoning.",
863
985
  "The tool call is the final verdict after review work, not a shortcut around review work.",
864
986
  ].join("\n"),
865
987
  ],
@@ -916,15 +1038,13 @@ export default defineWorkflow("ralph")
916
1038
  reviews = await ctx.parallel(
917
1039
  [
918
1040
  {
919
- name: `reviewer-${iteration}-a`,
1041
+ name: "reviewer-a",
920
1042
  task: reviewPrompt,
921
- previous: [orchestrator, simplifier, ...discovery],
922
1043
  ...reviewerModelConfig,
923
1044
  },
924
1045
  {
925
- name: `reviewer-${iteration}-b`,
1046
+ name: "reviewer-b",
926
1047
  task: reviewPrompt,
927
- previous: [orchestrator, simplifier, ...discovery],
928
1048
  ...reviewerModelConfig,
929
1049
  },
930
1050
  ],
@@ -942,9 +1062,75 @@ export default defineWorkflow("ralph")
942
1062
  if (approved) break;
943
1063
  }
944
1064
 
1065
+ const prResult = await ctx.task("pull-request", {
1066
+ prompt: taggedPrompt([
1067
+ [
1068
+ "role",
1069
+ "You are a careful release engineer preparing a pull request from the current workspace state.",
1070
+ ],
1071
+ [
1072
+ "objective",
1073
+ `Review the changes since the base branch \`${comparisonBaseBranch}\` and create a pull request if possible and credentials are available.`,
1074
+ ],
1075
+ [
1076
+ "workflow_context",
1077
+ [
1078
+ `Original task: ${prompt}`,
1079
+ `Review loop approved: ${approved ? "yes" : "no"}`,
1080
+ finalPlanPath
1081
+ ? `Planner spec path: ${finalPlanPath}`
1082
+ : "Planner spec path: unavailable",
1083
+ `Implementation notes path: ${implementationNotesPath}`,
1084
+ ].join("\n"),
1085
+ ],
1086
+ [
1087
+ "required_checks",
1088
+ [
1089
+ "Start by inspecting `git status --short` so unstaged, staged, and untracked changes are all visible.",
1090
+ `Review the patch against \`${comparisonBaseBranch}\` with working-tree-aware commands such as \`git diff ${comparisonBaseBranch}\` and \`git diff --cached ${comparisonBaseBranch}\`.`,
1091
+ "If untracked files are present, inspect them directly before deciding whether they belong in the PR.",
1092
+ "Read the implementation notes file and use its full contents as the body of a PR comment after the pull request exists.",
1093
+ "Check the local Git identity with `git config user.name` and `git config user.email` so you can prefer the matching GitHub account when multiple accounts are logged in.",
1094
+ "Check whether GitHub credentials are available with non-destructive commands such as `gh auth status` and `gh auth status --show-token-scopes` before attempting PR creation.",
1095
+ "If multiple GitHub accounts or hosts are logged in, use the git config username/email as a heuristic to choose the most likely identity, but try each available credential/account and use the first one that can read the repository and create the PR.",
1096
+ ].join("\n"),
1097
+ ],
1098
+ [
1099
+ "pr_policy",
1100
+ [
1101
+ "Create a PR only if there are meaningful changes, a remote/branch target is available, credentials are available, and the current state is suitable for review.",
1102
+ "If no logged-in account can access the repository or create the PR, do not fake success; report each credential/account tried, what failed, and provide the command the user can run later.",
1103
+ "When you successfully create or update the PR, create a PR comment containing the implementation notes file contents as the last action of this workflow stage.",
1104
+ "If PR creation is not possible, do not create a standalone comment elsewhere; include the implementation notes path and summary in your report instead.",
1105
+ "If the review loop did not approve, prefer reporting the remaining blockers over creating a PR unless the changes are still intentionally ready for human review.",
1106
+ "Do not make unrelated code edits in this phase. Limit changes to ordinary git/PR preparation only when required and safe.",
1107
+ ].join("\n"),
1108
+ ],
1109
+ [
1110
+ "output_format",
1111
+ [
1112
+ "Return Markdown with headings:",
1113
+ "1. Change review — summary of files and diff scope inspected",
1114
+ "2. PR status — created PR URL, or why no PR was created",
1115
+ "3. Implementation notes comment — whether the PR comment was created as the last action, or why it could not be created",
1116
+ "4. Commands run — include exit status or clear outcome",
1117
+ "5. Follow-up for the user — exact next steps if credentials or repository state blocked PR creation",
1118
+ ].join("\n"),
1119
+ ],
1120
+ ]),
1121
+ reads: finalPlanPath
1122
+ ? [finalPlanPath, implementationNotesPath]
1123
+ : [implementationNotesPath],
1124
+ ...orchestratorModelConfig,
1125
+ });
1126
+ finalPrReport = prResult.text;
1127
+
945
1128
  return {
946
1129
  result: finalResult,
947
1130
  plan: finalPlan,
1131
+ plan_path: finalPlanPath,
1132
+ implementation_notes_path: implementationNotesPath,
1133
+ pr_report: finalPrReport,
948
1134
  approved,
949
1135
  iterations_completed: iterationsCompleted,
950
1136
  review_report: reviewReport,