create-walle 0.9.21 → 0.9.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (500) hide show
  1. package/README.md +27 -5
  2. package/package.json +2 -2
  3. package/template/CLAUDE.md +2 -2
  4. package/template/LICENSE +1 -1
  5. package/template/bin/ctm-dev-cleanup.js +24 -3
  6. package/template/bin/ctm-launch.sh +13 -0
  7. package/template/bin/dev.sh +156 -18
  8. package/template/bin/node-bin.sh +84 -0
  9. package/template/bin/pin-node.sh +51 -0
  10. package/template/claude-task-manager/api-prompts.js +1203 -182
  11. package/template/claude-task-manager/api-reviews.js +109 -15
  12. package/template/claude-task-manager/approval-agent.js +1360 -280
  13. package/template/claude-task-manager/bin/restart-ctm.sh +64 -23
  14. package/template/claude-task-manager/bin/storage-migration-supervisor.js +338 -0
  15. package/template/claude-task-manager/db.js +4417 -295
  16. package/template/claude-task-manager/docs/app-update-refresh-protocol.md +69 -0
  17. package/template/claude-task-manager/docs/approval-ai-refinement.md +138 -0
  18. package/template/claude-task-manager/docs/approval-rescue-loop.md +74 -0
  19. package/template/claude-task-manager/docs/codex-operational-warning-health.md +107 -0
  20. package/template/claude-task-manager/docs/codex-resume-state-guard-design.md +17 -12
  21. package/template/claude-task-manager/docs/codex-terminal-render-controller-handoff.md +311 -0
  22. package/template/claude-task-manager/docs/coding-agent-hooks-architecture.md +418 -0
  23. package/template/claude-task-manager/docs/conversation-import-freshness.md +20 -0
  24. package/template/claude-task-manager/docs/google-workspace-auth-health.md +77 -0
  25. package/template/claude-task-manager/docs/image-paste-ux.md +13 -0
  26. package/template/claude-task-manager/docs/ipad-web-preview.md +88 -0
  27. package/template/claude-task-manager/docs/main-loop-offload-architecture.md +66 -0
  28. package/template/claude-task-manager/docs/microsoft-dev-tunnel-phone-access-design.md +274 -519
  29. package/template/claude-task-manager/docs/mobile-live-streaming.md +27 -5
  30. package/template/claude-task-manager/docs/mobile-remote-submission-lifecycle.md +69 -0
  31. package/template/claude-task-manager/docs/phone-access-design.md +53 -15
  32. package/template/claude-task-manager/docs/phone-passkey-identity.md +122 -0
  33. package/template/claude-task-manager/docs/phone-setup.md +3 -0
  34. package/template/claude-task-manager/docs/prompt-editing-tree-design.md +25 -1
  35. package/template/claude-task-manager/docs/remote-desktop-access-design.md +268 -0
  36. package/template/claude-task-manager/docs/restart-lifecycle-architecture.md +95 -0
  37. package/template/claude-task-manager/docs/runtime-work-control-plane.md +53 -0
  38. package/template/claude-task-manager/docs/session-interactive-wait-surfaces.md +38 -0
  39. package/template/claude-task-manager/docs/session-needs-you-dismissal.md +84 -0
  40. package/template/claude-task-manager/docs/session-render-state-management-design.md +91 -3
  41. package/template/claude-task-manager/docs/session-standup-command-center-design.md +25 -1
  42. package/template/claude-task-manager/docs/session-title-authority.md +32 -0
  43. package/template/claude-task-manager/docs/session-workspace-binding.md +33 -0
  44. package/template/claude-task-manager/docs/skill-intent-resolution-design.md +72 -0
  45. package/template/claude-task-manager/docs/walle-mcp-supervisor-health.md +86 -0
  46. package/template/claude-task-manager/docs/walle-relay-phone-access-design.md +24 -15
  47. package/template/claude-task-manager/docs/walle-session-history-hydration.md +114 -0
  48. package/template/claude-task-manager/docs/walle-session-input-queue.md +104 -0
  49. package/template/claude-task-manager/docs/walle-session-model-catalog.md +90 -0
  50. package/template/claude-task-manager/docs/walle-session-model-preferences.md +15 -6
  51. package/template/claude-task-manager/git-utils.js +897 -27
  52. package/template/claude-task-manager/lib/agent-capabilities.js +33 -0
  53. package/template/claude-task-manager/lib/agent-cli-cache.js +37 -7
  54. package/template/claude-task-manager/lib/agent-hooks-installer.js +26 -2
  55. package/template/claude-task-manager/lib/agent-presets.js +17 -1
  56. package/template/claude-task-manager/lib/all-sessions-query.js +108 -0
  57. package/template/claude-task-manager/lib/approval-ai-refinement.js +488 -0
  58. package/template/claude-task-manager/lib/approval-self-adapt.js +168 -0
  59. package/template/claude-task-manager/lib/async-semaphore.js +44 -0
  60. package/template/claude-task-manager/lib/auth-context.js +5 -0
  61. package/template/claude-task-manager/lib/auth-rate-limit.js +47 -4
  62. package/template/claude-task-manager/lib/auth-rules.js +29 -2
  63. package/template/claude-task-manager/lib/auto-approval-verifier.js +129 -16
  64. package/template/claude-task-manager/lib/background-llm.js +144 -17
  65. package/template/claude-task-manager/lib/branch-inventory.js +212 -0
  66. package/template/claude-task-manager/lib/claude-desktop-sessions.js +15 -3
  67. package/template/claude-task-manager/lib/coalesce-sync-frames.js +151 -0
  68. package/template/claude-task-manager/lib/codex-launch-health.js +762 -0
  69. package/template/claude-task-manager/lib/codex-transcript-pager.js +51 -0
  70. package/template/claude-task-manager/lib/codex-zst.js +124 -0
  71. package/template/claude-task-manager/lib/coding-agent-models.js +233 -30
  72. package/template/claude-task-manager/lib/connection-health.js +232 -0
  73. package/template/claude-task-manager/lib/conversation-blob-parser.js +42 -0
  74. package/template/claude-task-manager/lib/conversation-tail-merge.js +89 -26
  75. package/template/claude-task-manager/lib/ctm-session-context-api.js +39 -10
  76. package/template/claude-task-manager/lib/cursor-conversation-store.js +354 -0
  77. package/template/claude-task-manager/lib/db-owner-worker-client.js +315 -0
  78. package/template/claude-task-manager/lib/document-review.js +141 -6
  79. package/template/claude-task-manager/lib/escalation-review.js +152 -0
  80. package/template/claude-task-manager/lib/graceful-shutdown.js +159 -0
  81. package/template/claude-task-manager/lib/headless-term-service.js +678 -0
  82. package/template/claude-task-manager/lib/heavy-worker-fallback.js +38 -0
  83. package/template/claude-task-manager/lib/jsonl-conversation-parser.js +542 -0
  84. package/template/claude-task-manager/lib/jsonl-range-reader.js +112 -0
  85. package/template/claude-task-manager/lib/main-db-census.js +216 -0
  86. package/template/claude-task-manager/lib/message-pagination.js +106 -4
  87. package/template/claude-task-manager/lib/microsoft-dev-tunnel-setup.js +750 -26
  88. package/template/claude-task-manager/lib/mobile-auth-api.js +274 -7
  89. package/template/claude-task-manager/lib/mobile-auth-store.js +592 -10
  90. package/template/claude-task-manager/lib/mobile-notification-dispatcher.js +15 -0
  91. package/template/claude-task-manager/lib/model-overview-brain-fallback.js +311 -0
  92. package/template/claude-task-manager/lib/model-overview-cache.js +141 -0
  93. package/template/claude-task-manager/lib/models-health-routing-notice.js +126 -0
  94. package/template/claude-task-manager/lib/node-pin-guard.js +93 -0
  95. package/template/claude-task-manager/lib/perf-tracker.js +242 -6
  96. package/template/claude-task-manager/lib/permission-match.js +76 -0
  97. package/template/claude-task-manager/lib/permission-sync.js +133 -20
  98. package/template/claude-task-manager/lib/process-title.js +35 -0
  99. package/template/claude-task-manager/lib/prompt-executions-query.js +25 -0
  100. package/template/claude-task-manager/lib/prompt-index-disk-cache.js +44 -0
  101. package/template/claude-task-manager/lib/prompt-intent.js +132 -0
  102. package/template/claude-task-manager/lib/provider-user-context.js +34 -0
  103. package/template/claude-task-manager/lib/read-pool-client.js +313 -0
  104. package/template/claude-task-manager/lib/readpool-breaker.js +31 -0
  105. package/template/claude-task-manager/lib/recent-sessions-breaker.js +12 -0
  106. package/template/claude-task-manager/lib/remote-feedback-client.js +72 -0
  107. package/template/claude-task-manager/lib/remote-relay-protocol.js +37 -4
  108. package/template/claude-task-manager/lib/remote-relay-store.js +159 -0
  109. package/template/claude-task-manager/lib/remote-submission-observer.js +278 -0
  110. package/template/claude-task-manager/lib/restart-guard.js +109 -0
  111. package/template/claude-task-manager/lib/restore-interruption-detector.js +439 -0
  112. package/template/claude-task-manager/lib/restore-policy.js +13 -0
  113. package/template/claude-task-manager/lib/restore-resume-batch.js +74 -0
  114. package/template/claude-task-manager/lib/restore-runtime.js +68 -0
  115. package/template/claude-task-manager/lib/restore-storm.js +34 -0
  116. package/template/claude-task-manager/lib/resume-cwd.js +36 -0
  117. package/template/claude-task-manager/lib/resume-preflight.js +313 -0
  118. package/template/claude-task-manager/lib/runtime-work-registry.js +444 -0
  119. package/template/claude-task-manager/lib/sanitize-openai-auth.js +31 -0
  120. package/template/claude-task-manager/lib/scheduler.js +21 -1
  121. package/template/claude-task-manager/lib/scrollback-snapshot-store.js +159 -0
  122. package/template/claude-task-manager/lib/serial-task-queue.js +64 -0
  123. package/template/claude-task-manager/lib/server-listeners.js +239 -0
  124. package/template/claude-task-manager/lib/session-capture.js +42 -7
  125. package/template/claude-task-manager/lib/session-content-backfill.js +131 -0
  126. package/template/claude-task-manager/lib/session-history.js +388 -43
  127. package/template/claude-task-manager/lib/session-host-manager.js +287 -0
  128. package/template/claude-task-manager/lib/session-image-refs.js +209 -0
  129. package/template/claude-task-manager/lib/session-jobs.js +399 -59
  130. package/template/claude-task-manager/lib/session-prompt-index.js +137 -0
  131. package/template/claude-task-manager/lib/session-restore.js +53 -0
  132. package/template/claude-task-manager/lib/session-standup.js +123 -23
  133. package/template/claude-task-manager/lib/session-state-bus.js +14 -0
  134. package/template/claude-task-manager/lib/session-stream.js +64 -16
  135. package/template/claude-task-manager/lib/session-timeline-summary.js +260 -0
  136. package/template/claude-task-manager/lib/session-token-usage.js +494 -0
  137. package/template/claude-task-manager/lib/session-workspace-binding.js +356 -0
  138. package/template/claude-task-manager/lib/setup-network-config.js +9 -0
  139. package/template/claude-task-manager/lib/size-cap.js +45 -0
  140. package/template/claude-task-manager/lib/size-cap.test.js +62 -0
  141. package/template/claude-task-manager/lib/skill-autocomplete.js +180 -1
  142. package/template/claude-task-manager/lib/skill-intent-resolver.js +304 -0
  143. package/template/claude-task-manager/lib/sqlite-driver.js +19 -3
  144. package/template/claude-task-manager/lib/standup-attention.js +7 -3
  145. package/template/claude-task-manager/lib/status-authority.js +39 -0
  146. package/template/claude-task-manager/lib/status-hooks.js +4 -0
  147. package/template/claude-task-manager/lib/storage-migration.js +235 -0
  148. package/template/claude-task-manager/lib/structured-capture.js +298 -0
  149. package/template/claude-task-manager/lib/sync-io-census.js +163 -0
  150. package/template/claude-task-manager/lib/tailscale-setup.js +6 -0
  151. package/template/claude-task-manager/lib/terminal-activity-evidence.js +33 -0
  152. package/template/claude-task-manager/lib/terminal-choice.js +364 -0
  153. package/template/claude-task-manager/lib/terminal-control-sanitize.js +17 -0
  154. package/template/claude-task-manager/lib/terminal-fingerprint.js +48 -0
  155. package/template/claude-task-manager/lib/terminal-output-flush.js +84 -0
  156. package/template/claude-task-manager/lib/timeline-order.js +122 -0
  157. package/template/claude-task-manager/lib/transcript-store.js +348 -43
  158. package/template/claude-task-manager/lib/transport-security.js +84 -1
  159. package/template/claude-task-manager/lib/wait-state.js +184 -0
  160. package/template/claude-task-manager/lib/walle-client.js +47 -5
  161. package/template/claude-task-manager/lib/walle-ctm-history.js +564 -4
  162. package/template/claude-task-manager/lib/walle-external-actions.js +135 -16
  163. package/template/claude-task-manager/lib/walle-history-hydration.js +46 -0
  164. package/template/claude-task-manager/lib/walle-native-health.js +403 -0
  165. package/template/claude-task-manager/lib/walle-repair.js +701 -0
  166. package/template/claude-task-manager/lib/walle-session-cache.js +109 -0
  167. package/template/claude-task-manager/lib/walle-session-context.js +57 -21
  168. package/template/claude-task-manager/lib/walle-session-model-catalog.js +34 -0
  169. package/template/claude-task-manager/lib/walle-supervisor.js +539 -63
  170. package/template/claude-task-manager/lib/walle-transcript.js +52 -0
  171. package/template/claude-task-manager/lib/worktree-active-sync.js +11 -7
  172. package/template/claude-task-manager/lib/worktree-cwd.js +32 -1
  173. package/template/claude-task-manager/package.json +1 -1
  174. package/template/claude-task-manager/prompt-harvest.js +89 -66
  175. package/template/claude-task-manager/providers/claude-code.js +51 -3
  176. package/template/claude-task-manager/providers/cursor.js +140 -45
  177. package/template/claude-task-manager/public/css/reviews.css +551 -61
  178. package/template/claude-task-manager/public/css/setup.css +191 -0
  179. package/template/claude-task-manager/public/css/walle-session.css +865 -10
  180. package/template/claude-task-manager/public/css/walle.css +154 -0
  181. package/template/claude-task-manager/public/designs/ai-providers-consolidation-v2.html +830 -0
  182. package/template/claude-task-manager/public/index.html +18516 -2058
  183. package/template/claude-task-manager/public/ipad.html +363 -0
  184. package/template/claude-task-manager/public/js/document-review-links.js +301 -0
  185. package/template/claude-task-manager/public/js/image-normalize.js +69 -36
  186. package/template/claude-task-manager/public/js/message-renderer.js +1265 -77
  187. package/template/claude-task-manager/public/js/prompts.js +66 -29
  188. package/template/claude-task-manager/public/js/reviews.js +901 -133
  189. package/template/claude-task-manager/public/js/session-activity-utils.js +11 -1
  190. package/template/claude-task-manager/public/js/session-search-utils.js +94 -10
  191. package/template/claude-task-manager/public/js/session-status-precedence.js +23 -5
  192. package/template/claude-task-manager/public/js/setup.js +1273 -176
  193. package/template/claude-task-manager/public/js/stream-view.js +691 -73
  194. package/template/claude-task-manager/public/js/terminal-reconciler.js +210 -0
  195. package/template/claude-task-manager/public/js/walle-session.js +2455 -158
  196. package/template/claude-task-manager/public/js/walle.js +455 -28
  197. package/template/claude-task-manager/public/m/app.css +2909 -262
  198. package/template/claude-task-manager/public/m/app.js +6601 -398
  199. package/template/claude-task-manager/public/m/claim.html +224 -17
  200. package/template/claude-task-manager/public/m/index.html +117 -21
  201. package/template/claude-task-manager/public/m/sw.js +3 -1
  202. package/template/claude-task-manager/public/manifest.json +2 -2
  203. package/template/claude-task-manager/public/prompts.html +30 -14
  204. package/template/claude-task-manager/queue-engine.js +507 -28
  205. package/template/claude-task-manager/scripts/repair-claude-session-images.js +27 -8
  206. package/template/claude-task-manager/server.js +14341 -2197
  207. package/template/claude-task-manager/session-integrity.js +160 -18
  208. package/template/claude-task-manager/session-search-ranking.js +1 -0
  209. package/template/claude-task-manager/session-utils.js +25 -5
  210. package/template/claude-task-manager/workers/approval-blocklist.js +96 -6
  211. package/template/claude-task-manager/workers/approval-widget-validator.js +14 -8
  212. package/template/claude-task-manager/workers/conversation-import-worker.js +11 -50
  213. package/template/claude-task-manager/workers/db-owner-worker.js +386 -0
  214. package/template/claude-task-manager/workers/harvest-worker.js +9 -55
  215. package/template/claude-task-manager/workers/headless-term-worker.js +9 -530
  216. package/template/claude-task-manager/workers/read-pool-worker.js +387 -0
  217. package/template/claude-task-manager/workers/scrollback-worker.js +11 -72
  218. package/template/claude-task-manager/workers/session-host-process.js +146 -0
  219. package/template/claude-task-manager/workers/session-integrity-worker.js +10 -54
  220. package/template/claude-task-manager/workers/state-detectors/base.js +18 -1
  221. package/template/claude-task-manager/workers/state-detectors/claude-code.js +182 -9
  222. package/template/claude-task-manager/workers/state-detectors/codex.js +150 -2
  223. package/template/claude-task-manager/workers/state-detectors/cursor.js +127 -0
  224. package/template/claude-task-manager/workers/state-detectors/gemini.js +21 -0
  225. package/template/claude-task-manager/workers/state-detectors/index.js +29 -0
  226. package/template/claude-task-manager/workers/state-detectors/opencode.js +103 -0
  227. package/template/docs/design/markdown-review-pane.md +206 -0
  228. package/template/docs/designs/2026-05-17-portkey-gateway-provider-ux.md +129 -38
  229. package/template/docs/designs/2026-05-20-mobile-worktree-finish-command.md +27 -0
  230. package/template/docs/designs/2026-05-22-ai-configuration-consolidation.md +248 -0
  231. package/template/docs/designs/ai-configuration-consolidation-mock.html +812 -0
  232. package/template/docs/private-memory-and-pii-policy.md +69 -0
  233. package/template/package.json +2 -1
  234. package/template/scripts/check-private-data.js +201 -0
  235. package/template/shared/sqlite-owner-guard.js +30 -0
  236. package/template/shared/sqlite-owner-write-queue.js +225 -0
  237. package/template/shared/sqlite-storage-policy.js +111 -0
  238. package/template/shared/sqlite-write-lock.js +428 -0
  239. package/template/wall-e/agent-runners/claude-code.js +5 -0
  240. package/template/wall-e/agent.js +166 -22
  241. package/template/wall-e/api-walle.js +524 -70
  242. package/template/wall-e/auth/provider-flows.js +11 -1
  243. package/template/wall-e/bin/walle-mcp-stdio.js +341 -17
  244. package/template/wall-e/brain.js +1614 -141
  245. package/template/wall-e/chat/attachment-blocks.js +96 -0
  246. package/template/wall-e/chat/attachments.js +2 -1
  247. package/template/wall-e/chat/capability-resolver.js +7 -7
  248. package/template/wall-e/chat/context-messages.js +28 -0
  249. package/template/wall-e/chat/conversation-frame.js +630 -0
  250. package/template/wall-e/chat/provider-messages.js +125 -0
  251. package/template/wall-e/chat.js +1002 -233
  252. package/template/wall-e/coding/acceptance-contract.js +170 -0
  253. package/template/wall-e/coding/acp-adapter.js +1 -1
  254. package/template/wall-e/coding/agent-catalog.js +3 -0
  255. package/template/wall-e/coding/artifact-store.js +93 -0
  256. package/template/wall-e/coding/capability-router.js +120 -0
  257. package/template/wall-e/coding/coding-run-controller.js +423 -0
  258. package/template/wall-e/coding/compaction-service.js +157 -12
  259. package/template/wall-e/coding/frontend-verification.js +258 -0
  260. package/template/wall-e/coding/lifecycle-hooks.js +75 -0
  261. package/template/wall-e/coding/local-preview-contract.js +157 -0
  262. package/template/wall-e/coding/permission-service.js +57 -13
  263. package/template/wall-e/coding/prompt-bundle.js +19 -1
  264. package/template/wall-e/coding/prompt-section-registry.js +227 -0
  265. package/template/wall-e/coding/provider-compat.js +15 -0
  266. package/template/wall-e/coding/runtime-events.js +224 -0
  267. package/template/wall-e/coding/runtime-mode.js +3 -0
  268. package/template/wall-e/coding/side-git-snapshot.js +160 -4
  269. package/template/wall-e/coding/snapshot-service.js +143 -1
  270. package/template/wall-e/coding/stream-processor.js +388 -34
  271. package/template/wall-e/coding/task-tool.js +141 -4
  272. package/template/wall-e/coding/tool-execution-controller.js +365 -0
  273. package/template/wall-e/coding/tool-registry.js +43 -5
  274. package/template/wall-e/coding/user-hooks.js +217 -0
  275. package/template/wall-e/coding-orchestrator.js +1330 -221
  276. package/template/wall-e/coding-prompts.js +20 -4
  277. package/template/wall-e/context/context-builder.js +15 -2
  278. package/template/wall-e/decision/confidence.js +1 -1
  279. package/template/wall-e/docs/coding-acceptance-contract.md +41 -0
  280. package/template/wall-e/docs/external-action-controller.md +26 -6
  281. package/template/wall-e/docs/telemetry-lifecycle.md +8 -2
  282. package/template/wall-e/embeddings.js +591 -53
  283. package/template/wall-e/external-action-controller.js +12 -0
  284. package/template/wall-e/http/auth.js +1 -0
  285. package/template/wall-e/http/chat-api.js +46 -11
  286. package/template/wall-e/http/model-admin.js +836 -34
  287. package/template/wall-e/lib/boot-profile.js +88 -0
  288. package/template/wall-e/lib/event-loop-monitor.js +93 -0
  289. package/template/wall-e/lib/service-health.js +194 -0
  290. package/template/wall-e/llm/anthropic.js +130 -5
  291. package/template/wall-e/llm/client.js +266 -63
  292. package/template/wall-e/llm/default-fallback.js +382 -0
  293. package/template/wall-e/llm/health.js +19 -0
  294. package/template/wall-e/llm/message-guard.js +78 -0
  295. package/template/wall-e/llm/model-catalog.js +252 -1
  296. package/template/wall-e/llm/openai.js +26 -4
  297. package/template/wall-e/llm/portkey-sync.js +654 -0
  298. package/template/wall-e/llm/provider-error.js +30 -2
  299. package/template/wall-e/llm/registry.js +5 -1
  300. package/template/wall-e/llm/request-compat.js +67 -0
  301. package/template/wall-e/loops/backfill.js +79 -23
  302. package/template/wall-e/loops/brain-optimize.js +67 -0
  303. package/template/wall-e/loops/ingest.js +25 -10
  304. package/template/wall-e/loops/question-digest.js +160 -0
  305. package/template/wall-e/loops/reflect.js +6 -4
  306. package/template/wall-e/loops/think.js +39 -12
  307. package/template/wall-e/mcp-server.js +318 -36
  308. package/template/wall-e/memory/ctm-context-client.js +52 -14
  309. package/template/wall-e/memory/ctm-operational-context.js +237 -0
  310. package/template/wall-e/memory/ctm-prompt-executions-client.js +128 -0
  311. package/template/wall-e/memory/ctm-session-context.js +111 -63
  312. package/template/wall-e/prompts/coding/deepseek.txt +3 -0
  313. package/template/wall-e/prompts/coding/gemini.txt +6 -0
  314. package/template/wall-e/prompts/coding/gpt.txt +6 -0
  315. package/template/wall-e/prompts/coding/local.txt +7 -0
  316. package/template/wall-e/runtime/decision-hooks.js +115 -0
  317. package/template/wall-e/runtime/devbox-gateway.js +82 -8
  318. package/template/wall-e/runtime/prompt-manifest.js +86 -0
  319. package/template/wall-e/runtime/tool-executor.js +269 -0
  320. package/template/wall-e/runtime/tool-result-envelope.js +138 -0
  321. package/template/wall-e/runtime/transcript-projection.js +60 -0
  322. package/template/wall-e/runtime/walle-runtime.js +224 -0
  323. package/template/wall-e/scripts/db-optimize/migrate.js +162 -0
  324. package/template/wall-e/scripts/db-optimize/recall-eval.js +117 -0
  325. package/template/wall-e/server.js +15 -0
  326. package/template/wall-e/session-files.js +9 -0
  327. package/template/wall-e/skills/_bundled/google-calendar/run.js +1 -1
  328. package/template/wall-e/skills/_bundled/gws-workspace/run.js +1 -1
  329. package/template/wall-e/skills/_bundled/slack-mentions/run.js +76 -6
  330. package/template/wall-e/skills/claude-code-reader.js +7 -3
  331. package/template/wall-e/skills/script-skill-runner.js +10 -0
  332. package/template/wall-e/skills/skill-planner.js +38 -0
  333. package/template/wall-e/tools/builtin-middleware.js +19 -9
  334. package/template/wall-e/tools/local-tools.js +1428 -16
  335. package/template/wall-e/tools/permission-checker.js +73 -5
  336. package/template/wall-e/tools/question-manager.js +117 -7
  337. package/template/wall-e/training/harvester.js +12 -28
  338. package/template/wall-e/training/replay.js +25 -80
  339. package/template/website/index.html +10 -10
  340. package/template/wall-e/eval/ab-test.js +0 -203
  341. package/template/wall-e/eval/agent-runner.js +0 -772
  342. package/template/wall-e/eval/agent-scorer.js +0 -461
  343. package/template/wall-e/eval/aggregator.js +0 -414
  344. package/template/wall-e/eval/allowed-test-commands.js +0 -34
  345. package/template/wall-e/eval/benchmark-generator.js +0 -113
  346. package/template/wall-e/eval/benchmarks/chat-eval.json +0 -1662
  347. package/template/wall-e/eval/benchmarks/chat.json +0 -82
  348. package/template/wall-e/eval/benchmarks/coding-agent-real.json +0 -1
  349. package/template/wall-e/eval/benchmarks/coding-agent.json +0 -1581
  350. package/template/wall-e/eval/benchmarks/coding.json +0 -122
  351. package/template/wall-e/eval/benchmarks/memory-retrieval.json +0 -234
  352. package/template/wall-e/eval/benchmarks/reasoning.json +0 -82
  353. package/template/wall-e/eval/benchmarks/swebench-lite-30.json +0 -212
  354. package/template/wall-e/eval/benchmarks.js +0 -669
  355. package/template/wall-e/eval/cc-replay.js +0 -719
  356. package/template/wall-e/eval/chat-eval.js +0 -525
  357. package/template/wall-e/eval/check-keys.js +0 -15
  358. package/template/wall-e/eval/check-providers.js +0 -42
  359. package/template/wall-e/eval/codex-cli-baseline.js +0 -669
  360. package/template/wall-e/eval/coding-agent-real.js +0 -570
  361. package/template/wall-e/eval/context-compactor.js +0 -251
  362. package/template/wall-e/eval/debug-agent003.js +0 -68
  363. package/template/wall-e/eval/diagnostics.js +0 -216
  364. package/template/wall-e/eval/eval-orchestrator.js +0 -642
  365. package/template/wall-e/eval/evaluate.js +0 -202
  366. package/template/wall-e/eval/evaluator.js +0 -373
  367. package/template/wall-e/eval/exporter.js +0 -212
  368. package/template/wall-e/eval/fixtures/express-basic/package.json +0 -9
  369. package/template/wall-e/eval/fixtures/express-basic/server.js +0 -115
  370. package/template/wall-e/eval/fixtures/express-basic/test.js +0 -83
  371. package/template/wall-e/eval/fixtures/express-buggy/package.json +0 -9
  372. package/template/wall-e/eval/fixtures/express-buggy/server.js +0 -113
  373. package/template/wall-e/eval/fixtures/express-buggy/test.js +0 -83
  374. package/template/wall-e/eval/fixtures/express-buggy-items/package.json +0 -9
  375. package/template/wall-e/eval/fixtures/express-buggy-items/server.js +0 -112
  376. package/template/wall-e/eval/fixtures/express-buggy-items/test.js +0 -83
  377. package/template/wall-e/eval/fixtures/express-buggy-search/package.json +0 -9
  378. package/template/wall-e/eval/fixtures/express-buggy-search/server.js +0 -121
  379. package/template/wall-e/eval/fixtures/express-buggy-search/test.js +0 -83
  380. package/template/wall-e/eval/fixtures/express-rename-data/data.js +0 -34
  381. package/template/wall-e/eval/fixtures/express-rename-data/package.json +0 -9
  382. package/template/wall-e/eval/fixtures/express-rename-data/server.js +0 -97
  383. package/template/wall-e/eval/fixtures/express-rename-data/test.js +0 -88
  384. package/template/wall-e/eval/fixtures/express-xss/package.json +0 -12
  385. package/template/wall-e/eval/fixtures/express-xss/server.js +0 -90
  386. package/template/wall-e/eval/fixtures/express-xss/test.js +0 -67
  387. package/template/wall-e/eval/fixtures/express-xss/views/profile.ejs +0 -9
  388. package/template/wall-e/eval/fixtures/fullstack-app/config/default.js +0 -9
  389. package/template/wall-e/eval/fixtures/fullstack-app/config/test.js +0 -13
  390. package/template/wall-e/eval/fixtures/fullstack-app/package.json +0 -11
  391. package/template/wall-e/eval/fixtures/fullstack-app/public/css/style.css +0 -137
  392. package/template/wall-e/eval/fixtures/fullstack-app/public/index.html +0 -46
  393. package/template/wall-e/eval/fixtures/fullstack-app/public/js/app.js +0 -121
  394. package/template/wall-e/eval/fixtures/fullstack-app/public/js/auth.js +0 -71
  395. package/template/wall-e/eval/fixtures/fullstack-app/public/js/items.js +0 -80
  396. package/template/wall-e/eval/fixtures/fullstack-app/public/js/users.js +0 -46
  397. package/template/wall-e/eval/fixtures/fullstack-app/public/login.html +0 -45
  398. package/template/wall-e/eval/fixtures/fullstack-app/public/register.html +0 -38
  399. package/template/wall-e/eval/fixtures/fullstack-app/scripts/migrate.js +0 -23
  400. package/template/wall-e/eval/fixtures/fullstack-app/scripts/seed.js +0 -46
  401. package/template/wall-e/eval/fixtures/fullstack-app/server/db.js +0 -99
  402. package/template/wall-e/eval/fixtures/fullstack-app/server/index.js +0 -94
  403. package/template/wall-e/eval/fixtures/fullstack-app/server/middleware/auth.js +0 -19
  404. package/template/wall-e/eval/fixtures/fullstack-app/server/middleware/logger.js +0 -19
  405. package/template/wall-e/eval/fixtures/fullstack-app/server/router.js +0 -50
  406. package/template/wall-e/eval/fixtures/fullstack-app/server/routes/auth.js +0 -69
  407. package/template/wall-e/eval/fixtures/fullstack-app/server/routes/health.js +0 -23
  408. package/template/wall-e/eval/fixtures/fullstack-app/server/routes/items.js +0 -88
  409. package/template/wall-e/eval/fixtures/fullstack-app/server/routes/users.js +0 -75
  410. package/template/wall-e/eval/fixtures/fullstack-app/server/test.js +0 -198
  411. package/template/wall-e/eval/fixtures/fullstack-app/server/utils/response.js +0 -34
  412. package/template/wall-e/eval/fixtures/fullstack-app/server/utils/validate.js +0 -26
  413. package/template/wall-e/eval/fixtures/fullstack-app/server.js +0 -8
  414. package/template/wall-e/eval/fixtures/fullstack-app/test.js +0 -12
  415. package/template/wall-e/eval/fixtures/monorepo-basic/package.json +0 -8
  416. package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/data.js +0 -58
  417. package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/middleware.js +0 -46
  418. package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/package.json +0 -8
  419. package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/routes.js +0 -64
  420. package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/server.js +0 -56
  421. package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/test.js +0 -116
  422. package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/commands.js +0 -61
  423. package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/index.js +0 -62
  424. package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/output.js +0 -43
  425. package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/package.json +0 -11
  426. package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/test.js +0 -44
  427. package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/formatters.js +0 -43
  428. package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/index.js +0 -12
  429. package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/package.json +0 -5
  430. package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/test.js +0 -55
  431. package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/validators.js +0 -29
  432. package/template/wall-e/eval/fixtures/monorepo-basic/test.js +0 -46
  433. package/template/wall-e/eval/fixtures/node-cli/index.js +0 -78
  434. package/template/wall-e/eval/fixtures/node-cli/package.json +0 -10
  435. package/template/wall-e/eval/fixtures/node-cli/test.js +0 -57
  436. package/template/wall-e/eval/fixtures/node-typed/package.json +0 -8
  437. package/template/wall-e/eval/fixtures/node-typed/src/handlers.js +0 -31
  438. package/template/wall-e/eval/fixtures/node-typed/src/utils.js +0 -33
  439. package/template/wall-e/eval/fixtures/node-typed/test.js +0 -36
  440. package/template/wall-e/eval/fixtures/python-flask/app.py +0 -14
  441. package/template/wall-e/eval/fixtures/python-flask/requirements.txt +0 -2
  442. package/template/wall-e/eval/fixtures/python-flask/test_app.py +0 -25
  443. package/template/wall-e/eval/fixtures/wall-e-subset/brain.js +0 -105
  444. package/template/wall-e/eval/fixtures/wall-e-subset/eval/aggregator.js +0 -101
  445. package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks/chat.json +0 -20
  446. package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks/coding.json +0 -32
  447. package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks.js +0 -64
  448. package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/package.json +0 -6
  449. package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/server.js +0 -31
  450. package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/test.js +0 -18
  451. package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/utils.js +0 -34
  452. package/template/wall-e/eval/fixtures/wall-e-subset/eval/runner.js +0 -104
  453. package/template/wall-e/eval/fixtures/wall-e-subset/eval/scorer.js +0 -73
  454. package/template/wall-e/eval/fixtures/wall-e-subset/eval/test.js +0 -134
  455. package/template/wall-e/eval/fixtures/wall-e-subset/llm/client.js +0 -99
  456. package/template/wall-e/eval/fixtures/wall-e-subset/llm/providers.js +0 -63
  457. package/template/wall-e/eval/fixtures/wall-e-subset/llm/test.js +0 -70
  458. package/template/wall-e/eval/fixtures/wall-e-subset/package.json +0 -10
  459. package/template/wall-e/eval/fixtures/wall-e-subset/test.js +0 -86
  460. package/template/wall-e/eval/harvester.js +0 -685
  461. package/template/wall-e/eval/head-to-head.js +0 -388
  462. package/template/wall-e/eval/humaneval-adapter.js +0 -321
  463. package/template/wall-e/eval/list-models.js +0 -31
  464. package/template/wall-e/eval/livecodebench-adapter.js +0 -291
  465. package/template/wall-e/eval/mail-integration.js +0 -443
  466. package/template/wall-e/eval/manifest.js +0 -186
  467. package/template/wall-e/eval/meta-harness/adapters/coding-agent.js +0 -57
  468. package/template/wall-e/eval/meta-harness/bootstrap-snapshot.js +0 -149
  469. package/template/wall-e/eval/meta-harness/candidate-store.js +0 -117
  470. package/template/wall-e/eval/meta-harness/cli.js +0 -86
  471. package/template/wall-e/eval/meta-harness/domain-spec.js +0 -154
  472. package/template/wall-e/eval/meta-harness/domains/coding-agent.domain.json +0 -84
  473. package/template/wall-e/eval/meta-harness/examples/env-bootstrap-candidate.js +0 -29
  474. package/template/wall-e/eval/meta-harness/experience-store.js +0 -174
  475. package/template/wall-e/eval/meta-harness/frontier.js +0 -96
  476. package/template/wall-e/eval/meta-harness/harness-interface.js +0 -90
  477. package/template/wall-e/eval/meta-harness/leakage-guard.js +0 -80
  478. package/template/wall-e/eval/meta-harness/optimizer.js +0 -207
  479. package/template/wall-e/eval/meta-harness/proposer-runner.js +0 -110
  480. package/template/wall-e/eval/meta-harness/reporting.js +0 -58
  481. package/template/wall-e/eval/meta-harness/telemetry.js +0 -27
  482. package/template/wall-e/eval/meta-harness/validation.js +0 -81
  483. package/template/wall-e/eval/promoter.js +0 -228
  484. package/template/wall-e/eval/provider-normalizer.js +0 -33
  485. package/template/wall-e/eval/replay.js +0 -395
  486. package/template/wall-e/eval/run-agent-benchmarks.js +0 -386
  487. package/template/wall-e/eval/run-codex-cli-baseline.js +0 -177
  488. package/template/wall-e/eval/run-coding-agent-real.js +0 -187
  489. package/template/wall-e/eval/run-eval.js +0 -435
  490. package/template/wall-e/eval/run-model-comparison.js +0 -142
  491. package/template/wall-e/eval/session-evaluator.js +0 -187
  492. package/template/wall-e/eval/session-miner.js +0 -207
  493. package/template/wall-e/eval/session-retrieval-benchmark.js +0 -150
  494. package/template/wall-e/eval/session-transcripts.js +0 -509
  495. package/template/wall-e/eval/shadow.js +0 -161
  496. package/template/wall-e/eval/swebench-adapter.js +0 -345
  497. package/template/wall-e/eval/swebench-docker.js +0 -192
  498. package/template/wall-e/eval/train.py +0 -320
  499. package/template/wall-e/eval/trainer.js +0 -232
  500. package/template/wall-e/eval/weekly-eval-loop.js +0 -241
@@ -15,6 +15,24 @@ const agentRunners = require('./agent-runners');
15
15
  const { ASK_USER_TOOL, QuestionManager } = require('./tools/question-manager');
16
16
  const { detectProject } = require('./tools/project-detector');
17
17
  const { normalizeToolCall } = require('./llm/text-tool-calls');
18
+ const {
19
+ hasVerificationEvidence,
20
+ hasFailedVerificationAttempt,
21
+ callName,
22
+ toolResultSucceeded,
23
+ normalizeToolCallEvidence,
24
+ buildAcceptanceContract,
25
+ collectToolEvidence,
26
+ validatorFailure,
27
+ validatorPass,
28
+ summarizeValidatorFailures,
29
+ } = require('./coding/acceptance-contract');
30
+ const {
31
+ isFrontendFile,
32
+ checkFrontendStaticContracts,
33
+ resolveFrontendEntrypoints,
34
+ } = require('./coding/frontend-verification');
35
+ const { pathToFileURL } = require('node:url');
18
36
 
19
37
  // ─── Progress Streaming (Phase 8) ────────────────────────────────────────────
20
38
  // Global progress emitter — SSE endpoint and chat handler subscribe to this.
@@ -64,6 +82,10 @@ const {
64
82
  resolvePromptCapabilities,
65
83
  loadRequestedSkillInstructions,
66
84
  } = require('./coding/prompt-capabilities');
85
+ const {
86
+ routeArtifactCapabilities,
87
+ hasCapability,
88
+ } = require('./coding/capability-router');
67
89
  const { createCodingTranscript } = require('./coding/transcript-writer');
68
90
  const { createCodingCapabilities } = require('./coding/capability-broker');
69
91
  const {
@@ -72,8 +94,10 @@ const {
72
94
  } = require('./coding/compaction-service');
73
95
  const {
74
96
  emitAgentRunContextWarnings,
75
- resolveAgentRunContext,
76
97
  } = require('./runtime/agent-run-context');
98
+ const {
99
+ resolveWallERuntimeProfile,
100
+ } = require('./runtime/walle-runtime');
77
101
  const { estimateTokens, estimateMessagesTokens } = require('./context/token-counter');
78
102
  const { recoverAllowedTextToolCalls } = require('./llm/text-tool-calls');
79
103
 
@@ -112,17 +136,42 @@ const CODING_TOOLS = [
112
136
  {
113
137
  name: 'run_shell',
114
138
  description: 'Run a shell command. Supports pipes, redirects, and subshells. '
115
- + 'Destructive commands (rm, sudo, etc.) are blocked.',
139
+ + 'Destructive commands (rm, sudo, etc.) are blocked. '
140
+ + 'For dev servers, watchers, or long builds set background:true (never `&`) and poll with bg_output.',
116
141
  input_schema: {
117
142
  type: 'object',
118
143
  properties: {
119
144
  command: { type: 'string', description: 'Shell command to run (e.g., "npm test | tail -20")' },
120
145
  timeout_ms: { type: 'number', description: 'Timeout in ms (default 30000)' },
121
146
  cwd: { type: 'string', description: 'Working directory (optional)' },
147
+ background: { type: 'boolean', description: 'Run detached in the background; returns resource_id immediately. Use for dev servers/watchers/long builds instead of `&`.' },
122
148
  },
123
149
  required: ['command'],
124
150
  },
125
151
  },
152
+ {
153
+ name: 'bg_output',
154
+ description: 'Read the latest output of a background process started with run_shell {background:true}. Returns status (running/exited), exit code, and the log tail.',
155
+ input_schema: {
156
+ type: 'object',
157
+ properties: {
158
+ resource_id: { type: 'string', description: 'resource_id returned by run_shell {background:true}' },
159
+ tail_lines: { type: 'number', description: 'Trailing log lines to return (default 100)' },
160
+ },
161
+ required: ['resource_id'],
162
+ },
163
+ },
164
+ {
165
+ name: 'bg_kill',
166
+ description: 'Stop a background process started with run_shell {background:true}.',
167
+ input_schema: {
168
+ type: 'object',
169
+ properties: {
170
+ resource_id: { type: 'string', description: 'resource_id returned by run_shell {background:true}' },
171
+ },
172
+ required: ['resource_id'],
173
+ },
174
+ },
126
175
  {
127
176
  name: 'glob',
128
177
  description: 'Find files matching a glob pattern (e.g., "**/*.js").',
@@ -174,6 +223,140 @@ const CODING_TOOLS = [
174
223
  required: ['url'],
175
224
  },
176
225
  },
226
+ {
227
+ name: 'browser_smoke_test',
228
+ description: 'Render a URL in headless Chrome, collect JavaScript exceptions, console errors, failed requests, and safely click interactive elements. Use after frontend/UI work; screenshots prove appearance, this proves the page does not break when loaded or clicked.',
229
+ input_schema: {
230
+ type: 'object',
231
+ properties: {
232
+ url: { type: 'string', description: 'URL to validate; supports file:// local HTML files or localhost URLs from start_static_server.' },
233
+ viewport: { type: 'string', enum: ['desktop', 'mobile', 'tablet'], description: 'Viewport preset. Default: desktop.' },
234
+ click_selectors: { type: 'array', items: { type: 'string' }, description: 'Optional selectors to click. Defaults to [onclick], button, [role=button], and hash links.' },
235
+ max_clicks: { type: 'number', description: 'Maximum interactive elements to click. Default: 20.' },
236
+ settle_ms: { type: 'number', description: 'Milliseconds to wait after load/clicks. Default: 750.' },
237
+ timeout_ms: { type: 'number', description: 'Overall timeout. Default: 45000.' },
238
+ },
239
+ required: ['url'],
240
+ },
241
+ },
242
+ {
243
+ name: 'check_url',
244
+ description: 'Fetch an http:// or https:// URL and report whether it returns a 2xx/3xx response. Use this before claiming a local dev/static server is reachable.',
245
+ input_schema: {
246
+ type: 'object',
247
+ properties: {
248
+ url: { type: 'string', description: 'URL to fetch.' },
249
+ timeout_ms: { type: 'number', description: 'Timeout in ms (default 5000).' },
250
+ },
251
+ required: ['url'],
252
+ },
253
+ },
254
+ {
255
+ name: 'web_search',
256
+ description: 'Search the public web and return result titles, URLs, and snippets. Use to find documentation or error-message references when you do not know the URL; then read the page with web_fetch.',
257
+ input_schema: {
258
+ type: 'object',
259
+ properties: {
260
+ query: { type: 'string', description: 'Search query' },
261
+ max_results: { type: 'number', description: 'Max results (default 8)' },
262
+ },
263
+ required: ['query'],
264
+ },
265
+ },
266
+ {
267
+ name: 'web_fetch',
268
+ description: 'Fetch a web page or API endpoint and return extracted text. Use for reading documentation or references found via web_search.',
269
+ input_schema: {
270
+ type: 'object',
271
+ properties: {
272
+ url: { type: 'string', description: 'URL to fetch' },
273
+ extract_text: { type: 'boolean', description: 'Strip HTML tags (default true)' },
274
+ },
275
+ required: ['url'],
276
+ },
277
+ },
278
+ {
279
+ name: 'start_static_server',
280
+ description: 'Start a managed local static file server for a directory, wait for its health URL, and return a verified URL plus resource_id. Prefer this over run_shell background servers.',
281
+ input_schema: {
282
+ type: 'object',
283
+ properties: {
284
+ directory: { type: 'string', description: 'Directory to serve. Defaults to project cwd.' },
285
+ port: { type: 'number', description: 'Port to bind. Use 0 or omit for an available port.' },
286
+ route: { type: 'string', description: 'Route to health-check after start. Default: /index.html.' },
287
+ timeout_ms: { type: 'number', description: 'Startup timeout in ms (default 5000).' },
288
+ },
289
+ },
290
+ },
291
+ {
292
+ name: 'stop_static_server',
293
+ description: 'Stop a static server started by start_static_server using its resource_id.',
294
+ input_schema: {
295
+ type: 'object',
296
+ properties: {
297
+ resource_id: { type: 'string', description: 'resource_id returned by start_static_server.' },
298
+ },
299
+ required: ['resource_id'],
300
+ },
301
+ },
302
+ {
303
+ name: 'pdf_info',
304
+ description: 'Validate a PDF file and return structured metadata such as bytes, page count when available, and hash. Use before reading, summarizing, or claiming a PDF artifact is valid.',
305
+ input_schema: {
306
+ type: 'object',
307
+ properties: {
308
+ file_path: { type: 'string', description: 'Path to the PDF file.' },
309
+ max_bytes: { type: 'number', description: 'Maximum allowed file size in bytes (default 32MB).' },
310
+ },
311
+ required: ['file_path'],
312
+ },
313
+ },
314
+ {
315
+ name: 'pdf_render_pages',
316
+ description: 'Render a bounded PDF page range to image previews using pdftoppm when available. Use page previews to visually inspect generated or input PDFs before claiming success.',
317
+ input_schema: {
318
+ type: 'object',
319
+ properties: {
320
+ file_path: { type: 'string', description: 'Path to the PDF file.' },
321
+ pages: { type: 'string', description: 'Page range like "1", "1-3", or "2-". Defaults to "1". Maximum 20 pages.' },
322
+ output_dir: { type: 'string', description: 'Directory for rendered preview images. Defaults to a temp directory.' },
323
+ dpi: { type: 'number', description: 'Render DPI, 72-200. Default 144.' },
324
+ },
325
+ required: ['file_path'],
326
+ },
327
+ },
328
+ {
329
+ name: 'pdf_read_pages',
330
+ description: 'Read text from a bounded PDF page range using pdftotext when available, and include PDF metadata. Use for PDF analysis before answering from a document.',
331
+ input_schema: {
332
+ type: 'object',
333
+ properties: {
334
+ file_path: { type: 'string', description: 'Path to the PDF file.' },
335
+ pages: { type: 'string', description: 'Page range like "1", "1-3", or "2-". Defaults to "1-5". Maximum 20 pages.' },
336
+ max_chars: { type: 'number', description: 'Maximum text characters to return (default 20000).' },
337
+ },
338
+ required: ['file_path'],
339
+ },
340
+ },
341
+ {
342
+ name: 'make_pdf',
343
+ description: 'Generate a PDF from Markdown or HTML through the configured make-pdf renderer, then validate the output and optionally render page previews. Use for PDF creation instead of claiming a document is done from source text alone.',
344
+ input_schema: {
345
+ type: 'object',
346
+ properties: {
347
+ input_path: { type: 'string', description: 'Markdown or HTML source file to render.' },
348
+ output_path: { type: 'string', description: 'Desired PDF output path. Defaults next to input.' },
349
+ title: { type: 'string', description: 'Optional document title metadata.' },
350
+ page_size: { type: 'string', description: 'Optional page size such as Letter or A4.' },
351
+ margins: { type: 'string', description: 'Optional margin preset/string if supported by renderer.' },
352
+ cover: { type: 'boolean', description: 'Ask the renderer for a cover page when supported.' },
353
+ toc: { type: 'boolean', description: 'Ask the renderer for a table of contents when supported.' },
354
+ watermark: { type: 'string', description: 'Optional watermark text when supported.' },
355
+ render_preview: { type: 'boolean', description: 'Render first-page previews after generation. Default true.' },
356
+ },
357
+ required: ['input_path'],
358
+ },
359
+ },
177
360
  {
178
361
  name: 'edit_file',
179
362
  description: 'Make a targeted edit to a file by replacing a string match. Uses a 9-strategy fuzzy matching chain — tolerates minor whitespace, indentation, and Unicode differences. More efficient than write_file for modifying existing files.',
@@ -332,8 +515,8 @@ const CODING_TOOLS = [
332
515
  // Inspired by OpenCode's agent types (build/plan/explore).
333
516
  // We simplify to tool filtering per phase since Wall-E has its own permission checker.
334
517
 
335
- const READ_ONLY_TOOL_NAMES = new Set(['read_file', 'glob', 'grep_files', 'list_directory', 'lsp_symbols', 'lsp_definition', 'lsp_references', 'lsp_diagnostics', 'lsp_hover', 'lsp_implementation']);
336
- const REVIEW_TOOL_NAMES = new Set(['read_file', 'glob', 'grep_files', 'list_directory', 'lsp_symbols', 'lsp_definition', 'lsp_references', 'lsp_diagnostics', 'lsp_hover', 'lsp_implementation']);
518
+ const READ_ONLY_TOOL_NAMES = new Set(['read_file', 'glob', 'grep_files', 'list_directory', 'pdf_info', 'pdf_read_pages', 'pdf_render_pages', 'lsp_symbols', 'lsp_definition', 'lsp_references', 'lsp_diagnostics', 'lsp_hover', 'lsp_implementation']);
519
+ const REVIEW_TOOL_NAMES = new Set(['read_file', 'glob', 'grep_files', 'list_directory', 'pdf_info', 'pdf_read_pages', 'pdf_render_pages', 'lsp_symbols', 'lsp_definition', 'lsp_references', 'lsp_diagnostics', 'lsp_hover', 'lsp_implementation']);
337
520
  // BUILD uses all CODING_TOOLS (default)
338
521
 
339
522
  const READ_ONLY_TOOLS = CODING_TOOLS.filter(t => READ_ONLY_TOOL_NAMES.has(t.name));
@@ -420,6 +603,7 @@ function toolRequiresPermission(name) {
420
603
  'apply_patch',
421
604
  'multi_edit',
422
605
  'browser_screenshot',
606
+ 'browser_smoke_test',
423
607
  'applescript',
424
608
  'claude_code',
425
609
  'mail_send',
@@ -486,6 +670,78 @@ function parsePlan(output) {
486
670
  throw new Error('Failed to parse plan: no valid JSON with non-empty subtasks array found');
487
671
  }
488
672
 
673
+ function safeBranchSlug(text, fallback = 'task') {
674
+ const slug = String(text || '')
675
+ .toLowerCase()
676
+ .replace(/[^a-z0-9]+/g, '-')
677
+ .replace(/^-+|-+$/g, '')
678
+ .slice(0, 48);
679
+ return slug || fallback;
680
+ }
681
+
682
+ function plannerOutputRequestsClarification(output = '') {
683
+ const text = contentToText(output).toLowerCase();
684
+ if (!text.trim()) return false;
685
+ return /\b(?:please|can you|could you)\s+(?:provide|clarify|tell me|share)\b/.test(text)
686
+ || /\b(?:need|needs|require|required)\s+(?:more|additional)\s+(?:information|context|details)\b/.test(text)
687
+ || /\b(?:which|what)\s+(?:file|directory|project|repo|repository|path)\b[\s\S]{0,120}\?/.test(text);
688
+ }
689
+
690
+ function plannerOutputRefusesTask(output = '') {
691
+ const text = contentToText(output).toLowerCase();
692
+ if (!text.trim()) return false;
693
+ return /\b(?:i\s+)?(?:cannot|can't|unable to|not able to)\b[\s\S]{0,160}\b(?:help|comply|perform|complete|do this task)\b/.test(text)
694
+ || /\b(?:unsafe|not allowed|forbidden|against policy)\b/.test(text);
695
+ }
696
+
697
+ function shouldRecoverPlannerParseFailure({ request, output, cwd } = {}) {
698
+ const requestText = contentToText(request);
699
+ if (!isActionRequiredPrompt(requestText, { mode: 'build' })) return false;
700
+ if (!cwd) return false;
701
+ const outputText = contentToText(output);
702
+ if (plannerOutputRequestsClarification(outputText)) return false;
703
+ if (plannerOutputRefusesTask(outputText)) return false;
704
+ return true;
705
+ }
706
+
707
+ function buildPlannerRecoveryPlan(request, context = {}, parseErr, plannerOutput = '') {
708
+ const filesHint = Object.keys(context.relevantFiles || {}).slice(0, 12);
709
+ const plannerNotes = [
710
+ context.plannerNotes ? `Planner exploration notes:\n${String(context.plannerNotes).slice(0, 2400)}` : '',
711
+ plannerOutput ? `Unstructured planner output excerpt:\n${contentToText(plannerOutput).slice(0, 1600)}` : '',
712
+ ].filter(Boolean).join('\n\n');
713
+ const promptLines = [
714
+ 'The planning model failed to return the strict JSON plan, so this is a recovery build pass.',
715
+ 'Do not stop at analysis, an audit, or another implementation plan.',
716
+ 'Inspect the current workspace, make the concrete code/file changes requested by the user, then run the most relevant verification available.',
717
+ 'If verification is blocked, provide tool-backed evidence of the blocker instead of claiming success.',
718
+ '',
719
+ `User request:\n${contentToText(request).trim()}`,
720
+ ];
721
+ if (plannerNotes) {
722
+ promptLines.push('', plannerNotes);
723
+ }
724
+ if (parseErr?.message) {
725
+ promptLines.push('', `Planner failure: ${parseErr.message}`);
726
+ }
727
+ return {
728
+ branch_name: `walle/direct-${safeBranchSlug(request)}`,
729
+ estimated_scope: 'recovered-single-pass',
730
+ planning_recovery: {
731
+ strategy: 'single_build_subtask',
732
+ reason: parseErr?.message || 'planner did not return valid plan JSON',
733
+ },
734
+ subtasks: [{
735
+ id: '1',
736
+ title: 'Implement request directly',
737
+ prompt: promptLines.join('\n'),
738
+ depends_on: [],
739
+ verify: { test: true, review: true },
740
+ files_hint: filesHint,
741
+ }],
742
+ };
743
+ }
744
+
489
745
  // buildSubtaskPrompt moved to coding-prompts.js (imported above).
490
746
 
491
747
  function contentToText(content) {
@@ -541,13 +797,29 @@ function isLegitimateNoEditResponse(content, toolCallHistory = []) {
541
797
  }
542
798
 
543
799
  const EDIT_TOOL_NAMES = new Set(['edit_file', 'write_file', 'apply_patch', 'multi_edit']);
544
- const MEANINGFUL_ACTION_TOOL_NAMES = new Set([
800
+ const CODING_EXECUTION_TOOL_NAMES = new Set([
545
801
  ...EDIT_TOOL_NAMES,
802
+ 'start_coding',
803
+ 'run_skill',
804
+ ]);
805
+ const MEANINGFUL_ACTION_TOOL_NAMES = new Set([
806
+ ...CODING_EXECUTION_TOOL_NAMES,
546
807
  'run_shell',
547
808
  'browser_screenshot',
809
+ 'browser_smoke_test',
810
+ 'check_url',
811
+ 'url_check',
812
+ 'pdf_info',
813
+ 'pdf_render_pages',
814
+ 'pdf_read_pages',
815
+ 'make_pdf',
548
816
  'mcp_call',
817
+ ]);
818
+ const SETUP_ONLY_TOOL_NAMES = new Set([
549
819
  'load_skill',
550
820
  'skill',
821
+ 'skill_loaded',
822
+ 'skill_load_failed',
551
823
  ]);
552
824
 
553
825
  const ACTION_REQUIRED_PROMPT_RE = /\b(fix|implement|improve|update|change|edit|modify|add|remove|refactor|build|create|write|generate|convert|repair|apply|run|test|verify|make)\b/i;
@@ -562,21 +834,12 @@ const PROSPECTIVE_WORK_RE = new RegExp([
562
834
  ].join(''), 'i');
563
835
 
564
836
  function hasToolCall(toolCallHistory = [], names = new Set()) {
565
- return (toolCallHistory || []).some((call) => names.has(call.name));
837
+ return (toolCallHistory || []).some((call) => names.has(callName(call)));
566
838
  }
567
839
 
568
- function isVerificationToolCall(call = {}) {
569
- const name = String(call.name || '');
570
- const input = String(call.inputHash || JSON.stringify(call.input || {}));
571
- if (name === 'browser_screenshot') return true;
572
- if (name === 'run_shell') {
573
- return /\b(?:test|spec|lint|build|typecheck|tsc|pytest|jest|mocha|vitest|playwright|node\s+--(?:test|check)|npm\s+(?:test|run)|pnpm\s+(?:test|run)|yarn\s+(?:test|run)|git\s+diff\s+--check)\b/i.test(input);
574
- }
575
- return /(?:test|verify|screenshot|diagnostic|lint|build)/i.test(name);
576
- }
577
-
578
- function hasVerificationEvidence(toolCallHistory = []) {
579
- return (toolCallHistory || []).some(isVerificationToolCall);
840
+ function onlySetupToolCalls(toolCallHistory = []) {
841
+ const calls = toolCallHistory || [];
842
+ return calls.length > 0 && calls.every((call) => SETUP_ONLY_TOOL_NAMES.has(callName(call)));
580
843
  }
581
844
 
582
845
  function isVerificationBlockerResponse(content) {
@@ -602,15 +865,60 @@ function isActionRequiredPrompt(prompt, { mode } = {}) {
602
865
  return true;
603
866
  }
604
867
 
868
+ function promptRequiresFileChanges(prompt, { mode } = {}) {
869
+ if (!isActionRequiredPrompt(prompt, { mode })) return false;
870
+ const intentText = stripPathLikeTokens(contentToText(prompt));
871
+ if (!FILE_CHANGE_PROMPT_RE.test(intentText)) return false;
872
+ if (NO_CHANGE_TASK_RE.test(intentText) && !/\b(improve|fix|implement|update|change|edit|modify|apply|make|write|create|build)\b/i.test(intentText)) {
873
+ return false;
874
+ }
875
+ return true;
876
+ }
877
+
878
+ function isReadOnlyCodingIntent(intent = null) {
879
+ if (!intent || typeof intent !== 'object') return false;
880
+ if (intent.readOnly === true || intent.kind === 'read_only') return true;
881
+ return intent.expectsChange === false && intent.reason === 'conversational_update_language';
882
+ }
883
+
605
884
  function isPrematureActionResponse(content) {
606
885
  const text = contentToText(content);
607
886
  if (!text.trim()) return false;
887
+ if (/\btool budget exhausted\b/i.test(text)) return true;
888
+ if (/\bwhat was not completed\b/i.test(text)) return true;
889
+ if (/\bnone of the proposed implementations were written\b/i.test(text)) return true;
890
+ if (/\bno changes were made\b/i.test(text) && /\b(?:not completed|failed|exhausted|recovery path)\b/i.test(text)) return true;
608
891
  if (PROSPECTIVE_WORK_RE.test(text)) return true;
609
892
  if (/\bwhat['’]?s wrong\b[\s\S]{0,400}\bfix:/i.test(text)) return true;
893
+ if (/\b(?:should i|shall i|do you want me to)\s+(?:proceed|continue|apply|implement|make|start|do)\b/i.test(text)) return true;
894
+ if (/\byour call\b[\s\S]{0,220}\b(?:proceed|continue|phase|prioriti[sz]e|pick|choose|apply|implement)\b/i.test(text)) return true;
895
+ if (/\b(?:implementation|fix|improvement)\s+plan\b/i.test(text)
896
+ && /\b(?:next steps?|recommendations?|roadmap|proceed|continue|apply|implement)\b/i.test(text)) return true;
610
897
  return false;
611
898
  }
612
899
 
900
+ // "Screenshot, self-critique, and fix visual issues" is conditional: the
901
+ // agent only writes files if it FINDS a problem in the screenshot. A clean
902
+ // run that finds nothing to fix is the GOOD outcome — but the title contains
903
+ // "fix", which the FILE_CHANGE_PROMPT_RE below would otherwise treat as an
904
+ // edit task and fail with "Subtask ended without file changes".
905
+ //
906
+ // We trigger only when (a) the title contains an explicit visual-verification
907
+ // keyword (screenshot / self-critique / visual review) AND (b) it LEADS with
908
+ // one. "Review and improve UX" is excluded by (a) — the "improve" is a real
909
+ // edit task, not conditional. "Fix issues found in screenshot" is excluded by
910
+ // (b) — the primary verb is "Fix", screenshot is just context.
911
+ const VERIFICATION_KEYWORD_RE = /\b(screenshot|self[-\s]?critique|browser[-\s]?screenshot|visual\s*review)\b/i;
912
+ const VERIFICATION_LEADS_RE = /^\s*(screenshot|self[-\s]?critique|visual\s*review|browser[-\s]?screenshot)\b/i;
913
+
914
+ function isVerificationPrimarySubtask(subtask = {}) {
915
+ const title = String(subtask.title || '');
916
+ if (!VERIFICATION_KEYWORD_RE.test(title)) return false;
917
+ return VERIFICATION_LEADS_RE.test(title);
918
+ }
919
+
613
920
  function subtaskRequiresFileChanges(subtask = {}) {
921
+ if (isVerificationPrimarySubtask(subtask)) return false;
614
922
  const text = stripPathLikeTokens(`${subtask.title || ''}\n${subtask.prompt || ''}`);
615
923
  if (!FILE_CHANGE_PROMPT_RE.test(text)) return false;
616
924
  if (NO_CHANGE_TASK_RE.test(text) && !/\b(improve|fix|implement|update|change|edit|modify|apply|make|write|create|build)\b/i.test(text)) {
@@ -620,19 +928,81 @@ function subtaskRequiresFileChanges(subtask = {}) {
620
928
  }
621
929
 
622
930
  function toolCallHistoryFromLog(log = []) {
623
- return (log || [])
624
- .flatMap((turn) => (turn.toolCalls || []).map((call) => ({
625
- name: call.name,
626
- inputHash: JSON.stringify(call.input || {}).slice(0, 500),
627
- })));
931
+ return (log || []).flatMap((turn) => {
932
+ const results = turn.toolResults || [];
933
+ return (turn.toolCalls || []).map((call, index) => {
934
+ const resultRecord = results[index] || {};
935
+ return normalizeToolCallEvidence(call, resultRecord.result || resultRecord);
936
+ });
937
+ });
628
938
  }
629
939
 
630
- function getNoActionContinuation({ prompt, content, toolCallHistory = [], mode, toolsAvailable, nudges = 0, maxNudges = 2, cwd } = {}) {
940
+ function getNoActionContinuation({ prompt, content, toolCallHistory = [], mode, toolsAvailable, nudges = 0, maxNudges = 2, cwd, codingIntent, intent } = {}) {
941
+ if (isReadOnlyCodingIntent(codingIntent || intent)) return null;
631
942
  if (!isActionRequiredPrompt(prompt, { mode })) return null;
632
943
 
633
944
  const madeEdits = hasToolCall(toolCallHistory, EDIT_TOOL_NAMES);
945
+ const requiresVisualEvidence = /\b(?:website|web\s*page|frontend|ui|ux|visual|responsive|mobile|layout|css|html)\b/i.test(contentToText(prompt));
946
+ const hasSuccessfulScreenshot = (toolCallHistory || []).some((call) => (
947
+ callName(call) === 'browser_screenshot' && toolResultSucceeded(call)
948
+ ));
949
+ const hasSuccessfulBrowserSmoke = (toolCallHistory || []).some((call) => (
950
+ callName(call) === 'browser_smoke_test' && toolResultSucceeded(call)
951
+ ));
952
+ const requiresPdfEvidence = /\b(?:make|generate|create|export|render|design|format|style|polish|typeset|print|convert)\b[\s\S]{0,80}\bpdf\b|\bpdf\b[\s\S]{0,80}\b(?:make|generate|create|export|render|design|format|style|polish|typeset|print|convert)\b/i.test(contentToText(prompt));
953
+ const touchedPdfFlow = madeEdits || hasToolCall(toolCallHistory, new Set(['run_shell', 'pdf_info', 'pdf_read_pages', 'pdf_render_pages']));
954
+ const hasSuccessfulPdfArtifact = (toolCallHistory || []).some((call) => {
955
+ const name = callName(call);
956
+ if (name !== 'make_pdf' && name !== 'pdf_info') return false;
957
+ if (!toolResultSucceeded(call)) return false;
958
+ const result = call.result && typeof call.result === 'object' ? call.result : call;
959
+ return Boolean(result.path || result.artifact?.path || result.bytes || result.sha256);
960
+ });
961
+ if (touchedPdfFlow && requiresPdfEvidence && !hasSuccessfulPdfArtifact && !isVerificationBlockerResponse(content)) {
962
+ const reason = hasFailedVerificationAttempt(toolCallHistory)
963
+ ? 'The assistant worked on PDF/document output but PDF artifact verification failed or produced no successful evidence.'
964
+ : 'The assistant worked on PDF/document output but ended before successful PDF artifact verification.';
965
+ if (!toolsAvailable) return { action: 'fail', reason: `${reason} No tool turns remain.` };
966
+ if (nudges >= maxNudges) return { action: 'fail', reason: `${reason} Verification continuation limit reached.` };
967
+ return {
968
+ action: 'continue',
969
+ reason,
970
+ message: `[SYSTEM] ${reason} This is not complete.\n` +
971
+ `For PDF/document generation, call make_pdf or otherwise validate the generated PDF with pdf_info and render at least one page with pdf_render_pages before claiming success.\n` +
972
+ `If PDF verification is genuinely impossible, state the exact failed tool result and do not claim the PDF is done.\n` +
973
+ `Working directory: ${cwd}`,
974
+ };
975
+ }
976
+ if (madeEdits && requiresVisualEvidence && (!hasSuccessfulScreenshot || !hasSuccessfulBrowserSmoke) && !isVerificationBlockerResponse(content)) {
977
+ const reason = hasFailedVerificationAttempt(toolCallHistory)
978
+ ? 'The assistant made frontend/UI changes but browser verification failed or produced incomplete evidence.'
979
+ : 'The assistant made frontend/UI changes but ended before successful browser screenshot and runtime smoke verification.';
980
+ if (!toolsAvailable) {
981
+ return {
982
+ action: 'fail',
983
+ reason: `${reason} No tool turns remain.`,
984
+ };
985
+ }
986
+ if (nudges >= maxNudges) {
987
+ return {
988
+ action: 'fail',
989
+ reason: `${reason} Verification continuation limit reached.`,
990
+ };
991
+ }
992
+ return {
993
+ action: 'continue',
994
+ reason,
995
+ message: `[SYSTEM] ${reason} This is not complete.\n` +
996
+ `For website/UI/UX/frontend work, capture browser_screenshot and run browser_smoke_test at the relevant file:// or verified local URL before claiming success. If a server is needed, use start_static_server then check_url before browser verification.\n` +
997
+ `If browser verification is genuinely impossible, state that blocker explicitly with the failed tool result and do not claim the website is ready.\n` +
998
+ `Working directory: ${cwd}`,
999
+ };
1000
+ }
1001
+ const failedVerification = hasFailedVerificationAttempt(toolCallHistory);
634
1002
  if (madeEdits && !hasVerificationEvidence(toolCallHistory) && !isVerificationBlockerResponse(content)) {
635
- const reason = 'The assistant made file changes but ended before running verification.';
1003
+ const reason = failedVerification
1004
+ ? 'The assistant made file changes but verification failed or produced no successful evidence.'
1005
+ : 'The assistant made file changes but ended before running verification.';
636
1006
  if (!toolsAvailable) {
637
1007
  return {
638
1008
  action: 'fail',
@@ -649,7 +1019,7 @@ function getNoActionContinuation({ prompt, content, toolCallHistory = [], mode,
649
1019
  action: 'continue',
650
1020
  reason,
651
1021
  message: `[SYSTEM] ${reason} This is not complete.\n` +
652
- `Run the relevant verification now: tests, lint, build, typecheck, browser screenshot, or at minimum git diff --check when no project test exists.\n` +
1022
+ `Run the relevant verification now: tests, lint, build, typecheck, check_url/browser_screenshot for websites, or at minimum git diff --check when no project test exists.\n` +
653
1023
  `Only summarize success after a tool result proves the work. If verification is genuinely impossible, state the blocker with tool-backed evidence.\n` +
654
1024
  `Working directory: ${cwd}`,
655
1025
  };
@@ -657,8 +1027,41 @@ function getNoActionContinuation({ prompt, content, toolCallHistory = [], mode,
657
1027
  if (madeEdits) return null;
658
1028
  if (isLegitimateNoEditResponse(content, toolCallHistory)) return null;
659
1029
 
1030
+ const ranCodingExecution = hasToolCall(toolCallHistory, CODING_EXECUTION_TOOL_NAMES);
660
1031
  const didMeaningfulAction = hasToolCall(toolCallHistory, MEANINGFUL_ACTION_TOOL_NAMES);
661
1032
  const premature = isPrematureActionResponse(content);
1033
+ if (promptRequiresFileChanges(prompt, { mode }) && !ranCodingExecution) {
1034
+ const reason = !toolCallHistory.length
1035
+ ? 'The assistant ended an action-oriented coding turn without using any tools.'
1036
+ : onlySetupToolCalls(toolCallHistory)
1037
+ ? 'The assistant only loaded skills or capability context and did not execute the requested coding change.'
1038
+ : premature
1039
+ ? 'The assistant ended with prospective work instead of executing it.'
1040
+ : 'The assistant inspected or diagnosed the requested change but did not execute a coding change.';
1041
+
1042
+ if (!toolsAvailable) {
1043
+ return {
1044
+ action: 'fail',
1045
+ reason: `${reason} No tool turns remain.`,
1046
+ };
1047
+ }
1048
+
1049
+ if (nudges >= maxNudges) {
1050
+ return {
1051
+ action: 'fail',
1052
+ reason: `${reason} Coding-execution continuation limit reached.`,
1053
+ };
1054
+ }
1055
+
1056
+ return {
1057
+ action: 'continue',
1058
+ reason,
1059
+ message: `[SYSTEM] ${reason} This is not complete.\n` +
1060
+ `Use the available tools now. In Wall-E chat, call start_coding for coding-agent work; in coding mode, inspect files, then edit/write/apply_patch to make the change, and run relevant verification.\n` +
1061
+ `Do not end with a plan, audit, diagnostic report, or "I will..." statement. Finish only after work is executed, or state a concrete blocker/no-change reason backed by tool results.\n` +
1062
+ `Working directory: ${cwd}`,
1063
+ };
1064
+ }
662
1065
  if (didMeaningfulAction && !premature) return null;
663
1066
 
664
1067
  const reason = !toolCallHistory.length
@@ -692,6 +1095,18 @@ function getNoActionContinuation({ prompt, content, toolCallHistory = [], mode,
692
1095
  }
693
1096
 
694
1097
  function changedFilesSince(cwd, baseline = new Set()) {
1098
+ // Structured baseline from captureChangedFilesBaseline() — handles git AND non-git cwds.
1099
+ if (baseline && typeof baseline === 'object' && !(baseline instanceof Set)
1100
+ && !Array.isArray(baseline) && typeof baseline.isGit === 'boolean') {
1101
+ if (!baseline.isGit) {
1102
+ // Non-git working dir: `git status` throws here (caught → empty set), so the agent's
1103
+ // real writes would silently report as []. Detect created/modified files by mtime.
1104
+ return collectFilesModifiedSince(cwd, baseline.startedAtMs || 0);
1105
+ }
1106
+ const before = baseline.dirty instanceof Set ? baseline.dirty : new Set(baseline.dirty || []);
1107
+ return [...getGitChangedFiles(cwd)].filter((rel) => !before.has(rel));
1108
+ }
1109
+ // Legacy: a Set/array of pre-existing dirty git paths.
695
1110
  const before = baseline instanceof Set ? baseline : new Set(baseline || []);
696
1111
  return [...getGitChangedFiles(cwd)].filter((rel) => !before.has(rel));
697
1112
  }
@@ -700,6 +1115,25 @@ function isTimeoutOnlyOutput(output) {
700
1115
  return /^\s*\[Timeout reached\]\s*$/i.test(contentToText(output));
701
1116
  }
702
1117
 
1118
+ // Resolve the run's wall-clock timeout (ms). An explicit timeoutMs always wins.
1119
+ // Otherwise: headless/automated runs keep the 300s safety cap so CI/background work
1120
+ // can't hang; interactive runs (a user is present) get 0 = "no deadline" so they run
1121
+ // until the agent finishes or the user stops them (matches Claude Code / opencode).
1122
+ // Single source of truth for "is a human present this turn?" — used by both the run
1123
+ // deadline (resolveRunTimeoutMs) and the in-loop permission/acceptance behavior, so the
1124
+ // two can never drift. Interactive = no headless/benchmark and not explicitly opted out.
1125
+ function isInteractiveRun(opts = {}) {
1126
+ if (opts.interactive === true) return true;
1127
+ if (opts.interactive === false) return false;
1128
+ return !opts.headless && !opts.benchmark;
1129
+ }
1130
+
1131
+ function resolveRunTimeoutMs(opts = {}) {
1132
+ if (opts.timeoutMs) return opts.timeoutMs;
1133
+ const isInteractive = isInteractiveRun(opts);
1134
+ return isInteractive ? 0 : 300000;
1135
+ }
1136
+
703
1137
  function providerSupportsToolCalls(provider) {
704
1138
  if (!provider) return true;
705
1139
  if (provider.capabilities?.tools === false) return false;
@@ -763,6 +1197,25 @@ function createCodingCompactionService(provider, modelId, opts = {}) {
763
1197
  });
764
1198
  }
765
1199
 
1200
+ // A `stop` user hook can refuse to let the run finish (e.g. "tests must
1201
+ // pass"). Bounded: a flaky or unsatisfiable hook must not loop the agent
1202
+ // forever — after MAX_STOP_HOOK_BOUNCES the honest-failure path proceeds.
1203
+ const MAX_STOP_HOOK_BOUNCES = 3;
1204
+
1205
+ async function evaluateStopGate({ userHooks, log, sessionId, cwd, mode, turn, text }) {
1206
+ if (!userHooks || typeof userHooks.hasHooks !== 'function' || !userHooks.hasHooks('stop')) return null;
1207
+ const bounces = log._stopHookBounces || 0;
1208
+ if (bounces >= MAX_STOP_HOOK_BOUNCES) return null;
1209
+ const verdict = await userHooks.run('stop', { sessionId, cwd, mode, turn, text });
1210
+ if (verdict.decision !== 'deny') return null;
1211
+ log._stopHookBounces = bounces + 1;
1212
+ return {
1213
+ reason: verdict.reason,
1214
+ message: `A stop hook rejected finishing this task (attempt ${bounces + 1}/${MAX_STOP_HOOK_BOUNCES}): ${verdict.reason || 'no reason given'}\n` +
1215
+ 'Address the issue, then finish. If it is genuinely unresolvable, explain the exact blocker in your final summary.',
1216
+ };
1217
+ }
1218
+
766
1219
  async function maybeCompactCodingContext({
767
1220
  messages,
768
1221
  compactionService,
@@ -775,13 +1228,43 @@ async function maybeCompactCodingContext({
775
1228
  mode,
776
1229
  step = -1,
777
1230
  sessionMemory,
1231
+ userHooks = null,
778
1232
  reason = 'context_threshold',
779
1233
  opts = {},
780
1234
  } = {}) {
781
1235
  if (!compactionService || !Array.isArray(messages) || messages.length < 2) return null;
782
1236
  const systemTokens = estimateTokens(systemPrompt || '');
783
1237
  const estimatedInputTokens = systemTokens + estimateMessagesTokens(messages);
784
- if (!compactionService.shouldCompact({ messages, systemTokens })) return null;
1238
+
1239
+ // Cheap layer first: truncate OLD tool outputs before reaching for LLM
1240
+ // summarization. Rewriting old messages resets the prompt-cache prefix,
1241
+ // but pruning fires rarely (threshold crossing) and shrinks input enough
1242
+ // to amortize the one-turn cache miss.
1243
+ let pruneDetail = null;
1244
+ if (typeof compactionService.shouldPrune === 'function'
1245
+ && typeof compactionService.prune === 'function'
1246
+ && compactionService.shouldPrune({ messages, systemTokens })) {
1247
+ const pruneResult = compactionService.prune(messages);
1248
+ if (pruneResult?.pruned && Array.isArray(pruneResult.messages)) {
1249
+ messages.splice(0, messages.length, ...pruneResult.messages);
1250
+ pruneDetail = {
1251
+ prunedBlocks: pruneResult.prunedBlocks,
1252
+ tokensBefore: pruneResult.tokensBefore,
1253
+ tokensAfter: pruneResult.tokensAfter,
1254
+ };
1255
+ events?.emit?.('context.pruned', { sessionId, reason, ...pruneDetail });
1256
+ emitProgress?.({
1257
+ phase: mode || 'executing',
1258
+ step,
1259
+ message: `Pruned ${pruneResult.prunedBlocks} old tool output(s) (~${Math.max(0, pruneResult.tokensBefore - pruneResult.tokensAfter)} tokens)`,
1260
+ detail: pruneDetail,
1261
+ });
1262
+ }
1263
+ }
1264
+
1265
+ if (!compactionService.shouldCompact({ messages, systemTokens })) {
1266
+ return pruneDetail ? { compacted: false, pruned: true, ...pruneDetail } : null;
1267
+ }
785
1268
 
786
1269
  emitProgress?.({
787
1270
  phase: mode || 'executing',
@@ -789,6 +1272,8 @@ async function maybeCompactCodingContext({
789
1272
  message: 'Compacting coding context...',
790
1273
  });
791
1274
 
1275
+ if (userHooks?.runObserved) await userHooks.runObserved('pre_compact', { sessionId, cwd, reason });
1276
+
792
1277
  const result = await compactionService.compact(messages, {
793
1278
  sessionId,
794
1279
  cwd,
@@ -849,6 +1334,16 @@ async function runCliFallback(prompt, opts = {}, { sid, cwd, reason, fromProvide
849
1334
  detail: { reason, fromProvider },
850
1335
  });
851
1336
  }
1337
+ // Forward the run's auto-approval intent to the spawned CLI. The stream-native path
1338
+ // answers tool-permission requests in-process via headlessPolicy; the CLI fallback
1339
+ // spawns a real `claude`, so unless it is told to bypass permissions it silently
1340
+ // stalls in ask-mode and writes nothing. Mirror runAgentLoop's effective policy
1341
+ // (see headlessPolicy default below): an explicit opts.permissionMode wins, else
1342
+ // headlessPolicy:'allow' (or a benchmark run) maps to bypassPermissions; any other
1343
+ // policy leaves the CLI's default ask-mode intact.
1344
+ const effectiveHeadlessPolicy = opts.headlessPolicy || (opts.benchmark ? 'allow' : 'reject');
1345
+ const permissionMode = opts.permissionMode
1346
+ || (effectiveHeadlessPolicy === 'allow' ? 'bypassPermissions' : undefined);
852
1347
  const result = await runHeadless(prompt, {
853
1348
  cwd,
854
1349
  sessionId: sid,
@@ -857,6 +1352,8 @@ async function runCliFallback(prompt, opts = {}, { sid, cwd, reason, fromProvide
857
1352
  runnerId,
858
1353
  model,
859
1354
  mode: opts.mode || 'build',
1355
+ permissionMode,
1356
+ maxTurns: opts.maxTurns,
860
1357
  });
861
1358
  return {
862
1359
  ...result,
@@ -1031,6 +1528,226 @@ function collectEmptyChangedFiles(cwd, changedFiles) {
1031
1528
  return empties;
1032
1529
  }
1033
1530
 
1531
+ function changedFilesTouchFrontend(files = []) {
1532
+ return (files || []).some((file) => isFrontendFile(file));
1533
+ }
1534
+
1535
+ function emitAcceptanceValidatorProgress(onProgress, event = {}) {
1536
+ const payload = {
1537
+ type: 'acceptance_validator',
1538
+ phase: 'validating',
1539
+ step: event.step ?? -1,
1540
+ validator: event.validator || '',
1541
+ status: event.status || '',
1542
+ message: event.message || '',
1543
+ detail: event.detail || {},
1544
+ };
1545
+ try { onProgress?.(payload); } catch {}
1546
+ try {
1547
+ safeTelemetry()?.track?.('coding_acceptance_validator', {
1548
+ validator: payload.validator,
1549
+ status: payload.status,
1550
+ task_kind: event.taskKind || '',
1551
+ failures: event.failures || 0,
1552
+ });
1553
+ } catch {}
1554
+ }
1555
+
1556
+ function screenshotEvidenceExists(screenshots = [], toolCallHistory = []) {
1557
+ if (Array.isArray(screenshots) && screenshots.some((shot) => shot && (shot.path || shot.url))) return true;
1558
+ return (toolCallHistory || []).some((call) => (
1559
+ callName(call) === 'browser_screenshot' && toolResultSucceeded(call)
1560
+ ));
1561
+ }
1562
+
1563
+ async function runAcceptanceValidators({
1564
+ cwd,
1565
+ contract,
1566
+ changedFiles = [],
1567
+ screenshots = [],
1568
+ toolCallHistory = [],
1569
+ autoBrowser = false,
1570
+ requireBrowserRuntime = false,
1571
+ onProgress,
1572
+ step = -1,
1573
+ } = {}) {
1574
+ const validators = [];
1575
+ const concerns = [];
1576
+ const report = {
1577
+ ok: true,
1578
+ validators,
1579
+ concerns,
1580
+ frontend: null,
1581
+ };
1582
+ if (!contract?.requiresFrontendValidation) return report;
1583
+
1584
+ emitAcceptanceValidatorProgress(onProgress, {
1585
+ step,
1586
+ validator: 'frontend.static_contract',
1587
+ status: 'started',
1588
+ taskKind: contract.taskKind,
1589
+ });
1590
+ const staticVerdict = checkFrontendStaticContracts(cwd, changedFiles);
1591
+ report.frontend = { static: staticVerdict };
1592
+ if (!staticVerdict.ok) {
1593
+ const failure = validatorFailure(
1594
+ 'frontend.static_contract',
1595
+ `Frontend static contract failed: ${staticVerdict.concerns.slice(0, 3).join('; ')}`,
1596
+ staticVerdict
1597
+ );
1598
+ validators.push(failure);
1599
+ concerns.push(...staticVerdict.concerns);
1600
+ emitAcceptanceValidatorProgress(onProgress, {
1601
+ step,
1602
+ validator: failure.name,
1603
+ status: 'failed',
1604
+ message: failure.message,
1605
+ taskKind: contract.taskKind,
1606
+ failures: staticVerdict.concerns.length,
1607
+ });
1608
+ report.ok = false;
1609
+ return report;
1610
+ }
1611
+ validators.push(validatorPass('frontend.static_contract', 'Frontend static contract passed', staticVerdict));
1612
+ emitAcceptanceValidatorProgress(onProgress, {
1613
+ step,
1614
+ validator: 'frontend.static_contract',
1615
+ status: 'passed',
1616
+ taskKind: contract.taskKind,
1617
+ });
1618
+
1619
+ const hasScreenshot = screenshotEvidenceExists(screenshots, toolCallHistory);
1620
+ if (!hasScreenshot) {
1621
+ const failure = validatorFailure(
1622
+ 'frontend.screenshot_evidence',
1623
+ 'Frontend verification failed: no successful browser_screenshot evidence captured',
1624
+ { screenshots: screenshots.length }
1625
+ );
1626
+ validators.push(failure);
1627
+ concerns.push('[frontend-visual] No successful browser_screenshot evidence captured for frontend changes');
1628
+ report.ok = false;
1629
+ } else {
1630
+ validators.push(validatorPass('frontend.screenshot_evidence', 'Frontend screenshot evidence present', {
1631
+ screenshots: screenshots.length,
1632
+ }));
1633
+ }
1634
+
1635
+ const smokeEvidence = collectToolEvidence(toolCallHistory, 'browser_smoke_test');
1636
+ const failedSmoke = smokeEvidence.find((item) => !item.ok);
1637
+ if (failedSmoke) {
1638
+ const failure = validatorFailure(
1639
+ 'frontend.browser_runtime',
1640
+ 'Frontend browser runtime smoke test failed',
1641
+ failedSmoke.result
1642
+ );
1643
+ validators.push(failure);
1644
+ concerns.push('[frontend-runtime] Browser runtime smoke test failed');
1645
+ report.ok = false;
1646
+ return report;
1647
+ }
1648
+ if (smokeEvidence.some((item) => item.ok)) {
1649
+ validators.push(validatorPass('frontend.browser_runtime', 'Frontend browser runtime smoke evidence present', {
1650
+ evidence: smokeEvidence.length,
1651
+ }));
1652
+ return report;
1653
+ }
1654
+
1655
+ if (!requireBrowserRuntime) {
1656
+ validators.push(validatorPass('frontend.browser_runtime', 'Frontend browser runtime smoke deferred to final gate', {
1657
+ deferred: true,
1658
+ }));
1659
+ return report;
1660
+ }
1661
+
1662
+ if (!autoBrowser) {
1663
+ const failure = validatorFailure(
1664
+ 'frontend.browser_runtime',
1665
+ 'Frontend verification failed: no successful browser_smoke_test evidence captured',
1666
+ {}
1667
+ );
1668
+ validators.push(failure);
1669
+ concerns.push('[frontend-runtime] No successful browser_smoke_test evidence captured');
1670
+ report.ok = false;
1671
+ return report;
1672
+ }
1673
+
1674
+ const entrypoints = resolveFrontendEntrypoints(cwd, changedFiles);
1675
+ if (entrypoints.length === 0) {
1676
+ validators.push(validatorPass('frontend.browser_runtime', 'Frontend browser runtime smoke skipped: no HTML entrypoint found', {
1677
+ skipped: true,
1678
+ }));
1679
+ return report;
1680
+ }
1681
+
1682
+ const localTools = getLocalTools();
1683
+ if (!localTools.findChromeExecutable()) {
1684
+ const failure = validatorFailure(
1685
+ 'frontend.browser_runtime',
1686
+ 'Frontend browser runtime smoke test blocked: no Chromium-based browser found',
1687
+ { entrypoints }
1688
+ );
1689
+ validators.push(failure);
1690
+ concerns.push('[frontend-runtime] No Chromium-based browser available for browser_smoke_test');
1691
+ report.ok = false;
1692
+ return report;
1693
+ }
1694
+
1695
+ const smokeResults = [];
1696
+ for (const entrypoint of entrypoints.slice(0, 2)) {
1697
+ for (const viewport of ['desktop', 'mobile']) {
1698
+ emitAcceptanceValidatorProgress(onProgress, {
1699
+ step,
1700
+ validator: 'frontend.browser_runtime',
1701
+ status: 'started',
1702
+ taskKind: contract.taskKind,
1703
+ detail: { entrypoint, viewport },
1704
+ });
1705
+ const result = await localTools.browserSmokeTest({
1706
+ url: pathToFileURL(entrypoint).href,
1707
+ viewport,
1708
+ max_clicks: 25,
1709
+ settle_ms: 750,
1710
+ });
1711
+ smokeResults.push(result);
1712
+ if (!result.ok) {
1713
+ const failure = validatorFailure(
1714
+ 'frontend.browser_runtime',
1715
+ `Frontend browser runtime smoke failed for ${path.relative(cwd, entrypoint)} (${viewport})`,
1716
+ result
1717
+ );
1718
+ validators.push(failure);
1719
+ concerns.push(...(result.failures || []).slice(0, 5).map((item) => (
1720
+ `[frontend-runtime] ${item.type || 'failure'} ${item.exception || item.args || item.errorText || item.error || ''}`.trim()
1721
+ )));
1722
+ if (concerns.length === 0) concerns.push(`[frontend-runtime] ${failure.message}`);
1723
+ report.ok = false;
1724
+ report.frontend.browserSmoke = smokeResults;
1725
+ emitAcceptanceValidatorProgress(onProgress, {
1726
+ step,
1727
+ validator: failure.name,
1728
+ status: 'failed',
1729
+ message: failure.message,
1730
+ taskKind: contract.taskKind,
1731
+ failures: result.failures?.length || 1,
1732
+ });
1733
+ return report;
1734
+ }
1735
+ }
1736
+ }
1737
+ report.frontend.browserSmoke = smokeResults;
1738
+ validators.push(validatorPass('frontend.browser_runtime', 'Frontend browser runtime smoke passed', {
1739
+ entrypoints: entrypoints.map((file) => path.relative(cwd, file)),
1740
+ runs: smokeResults.length,
1741
+ }));
1742
+ emitAcceptanceValidatorProgress(onProgress, {
1743
+ step,
1744
+ validator: 'frontend.browser_runtime',
1745
+ status: 'passed',
1746
+ taskKind: contract.taskKind,
1747
+ });
1748
+ return report;
1749
+ }
1750
+
1034
1751
  function collectEditedFilePaths(toolName, args = {}, result = {}) {
1035
1752
  const editTools = new Set(['edit_file', 'write_file', 'apply_patch', 'multi_edit']);
1036
1753
  if (!editTools.has(toolName)) return [];
@@ -1150,10 +1867,11 @@ async function shutdownPostEditMiddleware(state) {
1150
1867
  async function runAgentLoop(prompt, opts = {}) {
1151
1868
  const { cwd, timeoutMs, maxTurns, provider, model, tools, onProgress } = opts;
1152
1869
  const explicitProvider = !!provider;
1153
- const sid = opts._resumeSessionId || crypto.randomUUID();
1870
+ const sid = opts._resumeSessionId || opts.runSessionId || opts.agentRunId || crypto.randomUUID();
1871
+ const codingIntent = opts.codingIntent || opts.intent || null;
1154
1872
 
1155
1873
  // Persist activity start (Phase 2: Activity History)
1156
- const isResume = !!opts._resumeSessionId;
1874
+ const isResume = Boolean(opts._resumeSessionId && opts._resumeMessages);
1157
1875
  try { getActivityLog().log({ session_id: sid, type: isResume ? 'coding_resume' : 'coding_start', title: isResume ? 'Coding session resumed' : 'Coding session started', body: prompt.slice(0, 200) }); } catch {}
1158
1876
 
1159
1877
  // Helper: emit progress both to callback and global emitter
@@ -1164,8 +1882,34 @@ async function runAgentLoop(prompt, opts = {}) {
1164
1882
  // Also forward to per-task event bus if available (A3 unification)
1165
1883
  if (events) events.emit('progress', full);
1166
1884
  }
1167
- const timeout = timeoutMs || 300000;
1168
- const deadline = Date.now() + timeout;
1885
+ const externalSignal = opts.signal || opts.abortSignal || null;
1886
+ const throwIfExternalAbort = () => {
1887
+ if (!externalSignal?.aborted) return;
1888
+ const err = new Error('Cancelled');
1889
+ err.code = 'WALLE_CANCELLED';
1890
+ throw err;
1891
+ };
1892
+ const linkExternalAbort = (controller) => {
1893
+ if (!externalSignal || !controller) return () => {};
1894
+ if (externalSignal.aborted) {
1895
+ try { controller.abort(); } catch {}
1896
+ return () => {};
1897
+ }
1898
+ const onAbort = () => {
1899
+ try { controller.abort(); } catch {}
1900
+ };
1901
+ externalSignal.addEventListener('abort', onAbort, { once: true });
1902
+ return () => {
1903
+ try { externalSignal.removeEventListener('abort', onAbort); } catch {}
1904
+ };
1905
+ };
1906
+ // Interactive sessions (a user is watching and approving) must not be killed by a
1907
+ // wall-clock deadline — they run until the agent finishes or the user stops them,
1908
+ // like Claude Code / opencode. The 300s default is only for headless/automated
1909
+ // runs (so CI/background work can't hang). An explicit timeoutMs always wins.
1910
+ const interactiveRun = isInteractiveRun(opts);
1911
+ const timeout = resolveRunTimeoutMs(opts);
1912
+ const deadline = timeout > 0 ? Date.now() + timeout : Infinity;
1169
1913
  let turns = maxTurns || MAX_AGENT_TURNS;
1170
1914
  const log = []; // training data: every turn logged
1171
1915
 
@@ -1178,8 +1922,10 @@ async function runAgentLoop(prompt, opts = {}) {
1178
1922
  }
1179
1923
  const modelId = resolveModelId(model, llm);
1180
1924
  const resolvedCwd = realpathBestEffort(cwd || process.cwd());
1181
- const preRunDirtyFiles = getGitChangedFiles(resolvedCwd);
1182
- const agentRunContext = resolveAgentRunContext({
1925
+ // Baseline for post-run change detection. Works in non-git cwds too (mtime-based) so
1926
+ // an agent that writes into a plain folder doesn't report changedFiles: [].
1927
+ const preRunFileBaseline = captureChangedFilesBaseline(resolvedCwd);
1928
+ const wallERuntimeProfile = resolveWallERuntimeProfile({
1183
1929
  ...opts,
1184
1930
  channel: opts.channel || 'coding',
1185
1931
  agentMode: opts.agentMode || 'coding',
@@ -1190,16 +1936,30 @@ async function runAgentLoop(prompt, opts = {}) {
1190
1936
  chatSessionId: opts.chatSessionId || opts.session_id || '',
1191
1937
  cwd: resolvedCwd,
1192
1938
  });
1193
- emitAgentRunContextWarnings(agentRunContext, { telemetry: safeTelemetry() });
1939
+ const agentRunContext = wallERuntimeProfile.context;
1940
+ emitAgentRunContextWarnings({ ...agentRunContext, warnings: wallERuntimeProfile.warnings }, { telemetry: safeTelemetry() });
1194
1941
  const promptCapabilityHints = opts.promptCapabilityHints || parsePromptCapabilityHints(prompt);
1195
1942
  const capabilities = resolveCodingCapabilities({ ...opts, promptCapabilityHints }, {
1196
1943
  cwd: resolvedCwd,
1197
1944
  brain: opts.brain || null,
1198
1945
  });
1199
1946
  const taskFileHints = extractTaskFileHints(prompt);
1947
+ const artifactCapabilities = routeArtifactCapabilities({
1948
+ prompt,
1949
+ taskFileHints,
1950
+ projectInfo: null,
1951
+ });
1200
1952
  const runtimeMode = resolveRuntimeMode(opts);
1201
1953
  const baseTools = Array.isArray(tools) ? tools : getToolsForMode(opts.mode || 'build');
1202
1954
  const requestedTools = filterToolsForRuntimeMode(baseTools, runtimeMode);
1955
+ const transcriptMessageOwner = String(opts.transcriptMessageOwner || opts.transcript_message_owner || '').toLowerCase();
1956
+ const externalTranscriptMessages = opts.externalTranscriptMessages === true
1957
+ || opts.external_transcript_messages === true
1958
+ || opts.skipTranscriptMessages === true
1959
+ || opts.skip_transcript_messages === true
1960
+ || transcriptMessageOwner === 'ctm'
1961
+ || transcriptMessageOwner === 'host'
1962
+ || transcriptMessageOwner === 'external';
1203
1963
  const transcript = createCodingTranscript({
1204
1964
  transcript: opts.transcript,
1205
1965
  persistTranscript: opts.persistTranscript,
@@ -1230,7 +1990,7 @@ async function runAgentLoop(prompt, opts = {}) {
1230
1990
  mode: opts.mode || '',
1231
1991
  });
1232
1992
  }
1233
- if (!opts._resumeMessages && transcript?.appendUserMessage) {
1993
+ if (!externalTranscriptMessages && !opts._resumeMessages && transcript?.appendUserMessage) {
1234
1994
  transcript.appendUserMessage(prompt, {
1235
1995
  sessionId: sid,
1236
1996
  cwd: resolvedCwd,
@@ -1288,7 +2048,7 @@ async function runAgentLoop(prompt, opts = {}) {
1288
2048
  }
1289
2049
  } catch {}
1290
2050
 
1291
- if (isFrontendTask(taskFileHints, prompt)
2051
+ if ((hasCapability(artifactCapabilities, 'frontend_design') || isFrontendTask(taskFileHints, prompt))
1292
2052
  && !projectSkills.some((s) => s && s.name === 'frontend-design')) {
1293
2053
  projectSkills = [
1294
2054
  ...projectSkills,
@@ -1322,6 +2082,24 @@ async function runAgentLoop(prompt, opts = {}) {
1322
2082
  }
1323
2083
  promptCapabilities = await loadRequestedSkillInstructions(promptCapabilities, capabilities.skillRunner);
1324
2084
 
2085
+ if (artifactCapabilities.length && transcript?.appendPart) {
2086
+ transcript.appendPart({
2087
+ sessionId: sid,
2088
+ cwd: resolvedCwd,
2089
+ partType: 'capability_routed',
2090
+ data: {
2091
+ type: 'capability_routed',
2092
+ capabilities: artifactCapabilities.map((capability) => ({
2093
+ id: capability.id,
2094
+ label: capability.label,
2095
+ tools: capability.tools,
2096
+ requiredArtifacts: capability.requiredArtifacts,
2097
+ completionGate: capability.completionGate,
2098
+ })),
2099
+ },
2100
+ });
2101
+ }
2102
+
1325
2103
  // Build system prompt with project context.
1326
2104
  const systemPrompt = buildAgentSystemPrompt({
1327
2105
  resolvedCwd,
@@ -1329,9 +2107,13 @@ async function runAgentLoop(prompt, opts = {}) {
1329
2107
  projectSkills,
1330
2108
  taskFileHints,
1331
2109
  runtimeMode,
2110
+ mode: opts.mode,
2111
+ provider: llm.type || '',
2112
+ model: modelId,
1332
2113
  runtimeContext: {
1333
2114
  memoryToolsAvailable: Boolean(capabilities.mcpClient),
1334
2115
  promptCapabilities,
2116
+ artifactCapabilities,
1335
2117
  userTask: prompt,
1336
2118
  },
1337
2119
  });
@@ -1339,6 +2121,7 @@ async function runAgentLoop(prompt, opts = {}) {
1339
2121
  // Resume support: use restored messages if resuming from checkpoint
1340
2122
  const messages = opts._resumeMessages || [{ role: 'user', content: prompt }];
1341
2123
  let finalOutput = '';
2124
+ let finalAnswerDelivered = false;
1342
2125
  let totalInput = 0;
1343
2126
  let totalOutput = 0;
1344
2127
  let consecutiveErrors = 0;
@@ -1372,10 +2155,12 @@ async function runAgentLoop(prompt, opts = {}) {
1372
2155
 
1373
2156
  mw.use('tool.after', screenshotTrackerHook(screenshotsTaken));
1374
2157
  const events = opts.events || new CodingEvents();
1375
- const { PermissionService } = require('./coding/permission-service');
2158
+ const { PermissionService, WAIT_FOR_REPLY } = require('./coding/permission-service');
1376
2159
  const permissionService = opts.permissionService || new PermissionService({
1377
2160
  events,
1378
- timeoutMs: opts.permissionTimeoutMs,
2161
+ // Interactive runs wait for the user to approve (no auto-deny timeout); headless
2162
+ // runs resolve immediately via headlessPolicy, so the timeout never applies there.
2163
+ timeoutMs: opts.permissionTimeoutMs ?? (interactiveRun ? WAIT_FOR_REPLY : undefined),
1379
2164
  headlessPolicy: opts.headlessPolicy || (opts.benchmark ? 'allow' : 'reject'),
1380
2165
  });
1381
2166
  const { AgentCatalog } = require('./coding/agent-catalog');
@@ -1401,7 +2186,7 @@ async function runAgentLoop(prompt, opts = {}) {
1401
2186
  headless: opts.headless,
1402
2187
  benchmark: opts.benchmark,
1403
2188
  headlessPolicy: opts.headlessPolicy,
1404
- _resumeSessionId: taskId,
2189
+ runSessionId: taskId,
1405
2190
  enableTaskTool: false,
1406
2191
  brain: opts.brain || null,
1407
2192
  mcpClient: capabilities.mcpClient,
@@ -1437,6 +2222,102 @@ async function runAgentLoop(prompt, opts = {}) {
1437
2222
  // Inspired by OpenCode Question service (packages/opencode/src/question/index.ts)
1438
2223
  const questionManager = opts.questionManager || new QuestionManager(events);
1439
2224
  const compactionService = createCodingCompactionService(llm, modelId, opts);
2225
+ const { RuntimeEventWriter } = require('./coding/runtime-events');
2226
+ const {
2227
+ appendPromptManifest,
2228
+ buildCodingPromptManifest,
2229
+ } = require('./coding/prompt-section-registry');
2230
+ const runtimeEvents = opts.runtimeEvents || new RuntimeEventWriter({
2231
+ transcript,
2232
+ events,
2233
+ defaults: {
2234
+ sessionId: sid,
2235
+ agentSessionId: agentRunContext.agentSessionId || sid,
2236
+ cwd: resolvedCwd,
2237
+ provider: llm.type || '',
2238
+ model: modelId,
2239
+ actor: agentRunContext.agentKind || 'walle-coding',
2240
+ },
2241
+ });
2242
+ const promptManifest = opts.promptManifest || buildCodingPromptManifest({
2243
+ systemPrompt,
2244
+ userTask: prompt,
2245
+ provider: llm.type || '',
2246
+ model: modelId,
2247
+ runtimeMode: runtimeMode.id,
2248
+ tools: requestedTools,
2249
+ promptCapabilities,
2250
+ metadata: {
2251
+ sessionId: sid,
2252
+ agentKind: agentRunContext.agentKind,
2253
+ agentMode: agentRunContext.agentMode,
2254
+ runtimeProfile: wallERuntimeProfile.profileId,
2255
+ persistenceProfile: wallERuntimeProfile.persistenceProfile,
2256
+ permissionProfile: wallERuntimeProfile.permissionProfile,
2257
+ outputContract: wallERuntimeProfile.outputContract,
2258
+ mode: opts.mode || '',
2259
+ },
2260
+ });
2261
+ appendPromptManifest(transcript, promptManifest, {
2262
+ sessionId: sid,
2263
+ cwd: resolvedCwd,
2264
+ chatSessionId: opts.chatSessionId || '',
2265
+ });
2266
+ runtimeEvents.emit({
2267
+ type: 'prompt_built',
2268
+ payload: {
2269
+ promptManifestId: promptManifest.promptManifestId,
2270
+ stableHash: promptManifest.stableHash,
2271
+ dynamicHash: promptManifest.dynamicHash,
2272
+ stableSectionCount: promptManifest.stableSectionCount,
2273
+ dynamicSectionCount: promptManifest.dynamicSectionCount,
2274
+ tokenEstimate: promptManifest.tokenEstimate,
2275
+ },
2276
+ });
2277
+ const { LifecycleHookBus } = require('./coding/lifecycle-hooks');
2278
+ const { ToolExecutionController } = require('./coding/tool-execution-controller');
2279
+ const lifecycleHooks = opts.lifecycleHooks || new LifecycleHookBus({
2280
+ events,
2281
+ middleware: mw,
2282
+ runtimeEvents,
2283
+ defaults: {
2284
+ sessionId: sid,
2285
+ agentSessionId: agentRunContext.agentSessionId || sid,
2286
+ cwd: resolvedCwd,
2287
+ provider: llm.type || '',
2288
+ model: modelId,
2289
+ actor: agentRunContext.agentKind || 'walle-coding',
2290
+ },
2291
+ });
2292
+ // User-defined lifecycle hooks (.walle/hooks.json). `opts.userHooks` may
2293
+ // inject a prebuilt instance (tests) or `null` to disable.
2294
+ const { createUserHooks } = require('./coding/user-hooks');
2295
+ const userHooks = opts.userHooks !== undefined
2296
+ ? opts.userHooks
2297
+ : createUserHooks({ projectRoot: resolvedCwd, cwd: resolvedCwd });
2298
+ if (userHooks) {
2299
+ emitProgress({ phase: opts.mode || 'executing', step: -1, message: `User hooks active (${userHooks.hooks.length})` });
2300
+ userHooks.runObserved('session_start', { sessionId: sid, cwd: resolvedCwd, mode: opts.mode || 'build' });
2301
+ }
2302
+
2303
+ const toolExecutionController = opts.toolExecutionController || new ToolExecutionController({
2304
+ toolRegistry,
2305
+ middleware: mw,
2306
+ permissionService,
2307
+ questionManager,
2308
+ events,
2309
+ lifecycleHooks,
2310
+ cwd: resolvedCwd,
2311
+ projectRoot: resolvedCwd,
2312
+ sessionId: sid,
2313
+ provider: llm.type || '',
2314
+ model: modelId,
2315
+ mode: opts.mode || '',
2316
+ runtimeMode: runtimeMode.id,
2317
+ headless: Boolean(opts.headless),
2318
+ benchmark: Boolean(opts.benchmark),
2319
+ userHooks,
2320
+ });
1440
2321
 
1441
2322
  // projectInfo already detected above (before system prompt)
1442
2323
  const llmCtxRef = { current: null }; // populated each turn (see llmCtx below)
@@ -1448,47 +2329,87 @@ async function runAgentLoop(prompt, opts = {}) {
1448
2329
  // fall back to the legacy whole-response loop.
1449
2330
  if (shouldUseStreamProcessor(opts)) {
1450
2331
  const { StreamProcessor } = require('./coding/stream-processor');
1451
- const { SnapshotService } = require('./coding/snapshot-service');
2332
+ const { SnapshotService, BoundaryStore } = require('./coding/snapshot-service');
2333
+ const streamToolExecutionController = new ToolExecutionController({
2334
+ toolRegistry,
2335
+ middleware: mw,
2336
+ permissionService: null,
2337
+ questionManager,
2338
+ events,
2339
+ lifecycleHooks,
2340
+ cwd: resolvedCwd,
2341
+ projectRoot: resolvedCwd,
2342
+ sessionId: sid,
2343
+ provider: llm.type || '',
2344
+ model: modelId,
2345
+ mode: opts.mode || '',
2346
+ runtimeMode: runtimeMode.id,
2347
+ headless: Boolean(opts.headless),
2348
+ benchmark: Boolean(opts.benchmark),
2349
+ handlePermissions: false,
2350
+ userHooks,
2351
+ });
1452
2352
  const processor = new StreamProcessor({
1453
2353
  provider: llm,
1454
2354
  model: modelId,
1455
2355
  transcript,
1456
- snapshotService: opts.snapshotService || new SnapshotService({ cwd: resolvedCwd }),
2356
+ snapshotService: opts.snapshotService || new SnapshotService({
2357
+ cwd: resolvedCwd,
2358
+ // Whole-worktree step snapshots + restart-surviving boundaries for
2359
+ // the rewind API. WALLE_WORKTREE_SNAPSHOTS=0 disables.
2360
+ worktreeSnapshots: process.env.WALLE_WORKTREE_SNAPSHOTS !== '0' && !opts.benchmark,
2361
+ boundaryStore: new BoundaryStore(),
2362
+ }),
1457
2363
  permissionService,
1458
2364
  headless: Boolean(opts.headless || opts.benchmark),
1459
2365
  toolExecutor: async (call) => {
1460
- const input = { ...(call.input || {}) };
1461
- if (['read_file', 'write_file', 'edit_file'].includes(call.name)) {
1462
- if (input.file_path && !path.isAbsolute(input.file_path)) input.file_path = path.join(resolvedCwd, input.file_path);
1463
- if (!input.file_path && input.path) input.file_path = path.isAbsolute(input.path) ? input.path : path.join(resolvedCwd, input.path);
1464
- }
1465
- if (call.name === 'list_directory' && input.directory && !path.isAbsolute(input.directory)) {
1466
- input.directory = path.join(resolvedCwd, input.directory);
1467
- }
1468
- if (call.name === 'run_shell' && !input.cwd) {
1469
- input.cwd = resolvedCwd;
1470
- }
1471
- input.sessionId = sid;
1472
- input.projectRoot = resolvedCwd;
1473
- const toolCtx = { sessionId: sid, cwd: resolvedCwd, model: modelId, provider: llm.type, runtimeMode: runtimeMode.id };
1474
- const finalInput = await mw.run('tool.before', toolCtx, call.name, input);
1475
- const result = await toolRegistry.execute(call.name, finalInput, toolCtx);
1476
- return mw.run('tool.after', toolCtx, call.name, finalInput, result);
2366
+ const execution = await streamToolExecutionController.execute(call, {
2367
+ sessionId: sid,
2368
+ cwd: resolvedCwd,
2369
+ projectRoot: resolvedCwd,
2370
+ model: modelId,
2371
+ provider: llm.type,
2372
+ mode: opts.mode || '',
2373
+ runtimeMode: runtimeMode.id,
2374
+ interactive: opts.interactive,
2375
+ onTodos: (todos) => { currentTodos = todos; },
2376
+ });
2377
+ return execution.result;
1477
2378
  },
1478
2379
  });
1479
- processor.on('event', (evt) => emitProgress({
1480
- phase: opts.mode || 'executing',
1481
- step: 0,
1482
- message: evt.type,
1483
- detail: evt,
1484
- }));
2380
+ processor.on('event', (evt) => {
2381
+ // Forward structured runtime events with their top-level `type` intact.
2382
+ // CTM (server.js onEvent: event.type === 'lane_event'/'permission_resolved')
2383
+ // and the browser (walle-session.js: switch(ev.type) → case 'permission_request')
2384
+ // both dispatch on the top-level type, so wrapping these into
2385
+ // {phase,step,message,detail} silently swallowed the live approval card and
2386
+ // the "Needs You" wait state — the request then parked until the user
2387
+ // reloaded (the durable restore card in walle-ctm-history.js still worked).
2388
+ // Keep approval + lane events un-wrapped so a watching client surfaces the
2389
+ // card and waiting state without a reload.
2390
+ if (evt && [
2391
+ 'tool_call', 'tool_result', 'tool_done', 'skill_loaded', 'skill_load_failed',
2392
+ 'permission_request', 'permission_resolved', 'permission_denied', 'lane_event',
2393
+ ].includes(evt.type)) {
2394
+ emitProgress(evt);
2395
+ return;
2396
+ }
2397
+ emitProgress({
2398
+ phase: opts.mode || 'executing',
2399
+ step: 0,
2400
+ message: evt?.type || 'event',
2401
+ detail: evt,
2402
+ });
2403
+ });
1485
2404
 
1486
2405
  let streamStatus = 'finished';
1487
2406
  let streamStopReason = '';
1488
2407
  let streamModel = modelId;
1489
2408
  const streamErrors = [];
2409
+ let streamProviderError = null;
1490
2410
  let streamHadEdit = false;
1491
2411
  for (let turnIndex = opts._resumeTurn || 0; turnIndex < turns; turnIndex++) {
2412
+ throwIfExternalAbort();
1492
2413
  const remaining = deadline - Date.now();
1493
2414
  if (remaining <= 0) {
1494
2415
  streamStatus = 'error';
@@ -1503,6 +2424,7 @@ async function runAgentLoop(prompt, opts = {}) {
1503
2424
  });
1504
2425
  const perTurnCap = opts.perTurnTimeoutMs || (/ollama|mlx/.test(llm.type || '') ? 600000 : 300000);
1505
2426
  const ac = new AbortController();
2427
+ const unlinkExternalAbort = linkExternalAbort(ac);
1506
2428
  const timer = setTimeout(() => ac.abort(), Math.min(remaining, perTurnCap));
1507
2429
  let turn;
1508
2430
  let toolsForTurn = [];
@@ -1518,7 +2440,7 @@ async function runAgentLoop(prompt, opts = {}) {
1518
2440
  });
1519
2441
  const llmCtx = { params: createInitialLlmParams(opts, taskFileHints.length >= 4 ? 8192 : 4096), system: systemPrompt, cwd: resolvedCwd,
1520
2442
  provider: llm.type, model: modelId, mode: opts.mode, runtimeMode: runtimeMode.id, claudeMd: opts.claudeMd, log: {},
1521
- toolsAvailable: toolsForTurn.length > 0, promptCapabilities };
2443
+ toolsAvailable: toolsForTurn.length > 0, promptCapabilities, promptManifest };
1522
2444
  llmCtxRef.current = llmCtx;
1523
2445
  await mw.run('llm.before', llmCtx);
1524
2446
  await maybeCompactCodingContext({
@@ -1533,6 +2455,7 @@ async function runAgentLoop(prompt, opts = {}) {
1533
2455
  mode: opts.mode || 'executing',
1534
2456
  step: turnIndex,
1535
2457
  sessionMemory: opts.sessionMemory,
2458
+ userHooks,
1536
2459
  reason: 'stream_pre_turn',
1537
2460
  opts,
1538
2461
  });
@@ -1549,9 +2472,11 @@ async function runAgentLoop(prompt, opts = {}) {
1549
2472
  thinking: llmCtx.params.thinking,
1550
2473
  reasoningEffort: llmCtx.params.reasoningEffort,
1551
2474
  options: llmCtx.params.options,
2475
+ promptCache: true,
1552
2476
  });
1553
2477
  } finally {
1554
2478
  clearTimeout(timer);
2479
+ unlinkExternalAbort();
1555
2480
  }
1556
2481
 
1557
2482
  totalInput += turn.usage?.input || 0;
@@ -1560,17 +2485,25 @@ async function runAgentLoop(prompt, opts = {}) {
1560
2485
  streamStopReason = turn.stopReason || streamStopReason;
1561
2486
  streamModel = turn.model || streamModel;
1562
2487
  if (turn.errors?.length) streamErrors.push(...turn.errors);
2488
+ if (turn.providerError) streamProviderError = turn.providerError;
1563
2489
  if (turn.text) finalOutput += turn.text;
1564
2490
  const streamToolCalls = (turn.toolCalls || []).map(tc => ({ name: tc.name, input: tc.input }));
1565
- toolCallHistory.push(...streamToolCalls.map(tc => ({
1566
- name: tc.name,
1567
- inputHash: JSON.stringify(tc.input || {}).slice(0, 500),
1568
- })));
2491
+ const streamToolResults = turn.toolResults || [];
2492
+ toolCallHistory.push(...streamToolCalls.map((tc, index) => {
2493
+ const resultRecord = streamToolResults[index] || {};
2494
+ return normalizeToolCallEvidence(tc, resultRecord.result || resultRecord);
2495
+ }));
1569
2496
  log.push({
1570
2497
  turn: turnIndex,
1571
2498
  model: turn.model || modelId,
1572
2499
  provider: turn.provider || llm.type,
1573
2500
  toolCalls: streamToolCalls,
2501
+ toolResults: streamToolResults.map((record) => ({
2502
+ name: record.name,
2503
+ ok: normalizeToolCallEvidence(record, record.result || record).ok === true,
2504
+ error: record.error || record.result?.error || null,
2505
+ result: record.result || null,
2506
+ })),
1574
2507
  content: turn.text,
1575
2508
  stopReason: turn.stopReason,
1576
2509
  });
@@ -1586,6 +2519,7 @@ async function runAgentLoop(prompt, opts = {}) {
1586
2519
  toolsAvailable: toolsForTurn.length > 0,
1587
2520
  nudges: log._noActionNudges || 0,
1588
2521
  cwd: resolvedCwd,
2522
+ codingIntent,
1589
2523
  });
1590
2524
  if (continuation?.action === 'continue') {
1591
2525
  log._noActionNudges = (log._noActionNudges || 0) + 1;
@@ -1600,6 +2534,16 @@ async function runAgentLoop(prompt, opts = {}) {
1600
2534
  emitProgress({ phase: opts.mode || 'executing', step: turnIndex, message: 'Action guard failed incomplete no-tool response', detail: { reason: continuation.reason } });
1601
2535
  break;
1602
2536
  }
2537
+ const stopGate = await evaluateStopGate({
2538
+ userHooks, log, sessionId: sid, cwd: resolvedCwd, mode: opts.mode, turn: turnIndex, text: contentToText(turn.text),
2539
+ });
2540
+ if (stopGate) {
2541
+ if (turn.assistantMessage) messages.push(turn.assistantMessage);
2542
+ messages.push({ role: 'user', content: stopGate.message });
2543
+ emitProgress({ phase: opts.mode || 'executing', step: turnIndex, message: 'Stop hook rejected completion — continuing', detail: { reason: stopGate.reason } });
2544
+ continue;
2545
+ }
2546
+ if (contentToText(turn.text).trim()) finalAnswerDelivered = true;
1603
2547
  }
1604
2548
  if (turn.assistantMessage) messages.push(turn.assistantMessage);
1605
2549
  if (turn.toolResultMessage) messages.push(turn.toolResultMessage);
@@ -1627,10 +2571,15 @@ async function runAgentLoop(prompt, opts = {}) {
1627
2571
  sessionId: sid,
1628
2572
  cwd: resolvedCwd,
1629
2573
  partType: 'error',
1630
- data: { errors: streamErrors },
2574
+ data: streamProviderError
2575
+ // Classified provider failure: surface the friendly, actionable message
2576
+ // (parity with the chat path) instead of a raw "fetch failed". Raw text
2577
+ // is retained in `errors` for debugging.
2578
+ ? { message: streamProviderError.userMessage, providerError: streamProviderError, errors: streamErrors }
2579
+ : { errors: streamErrors },
1631
2580
  });
1632
2581
  }
1633
- if (finalOutput && transcript?.appendAssistantMessage) {
2582
+ if (!externalTranscriptMessages && finalOutput && transcript?.appendAssistantMessage) {
1634
2583
  transcript.appendAssistantMessage(finalOutput, {
1635
2584
  sessionId: sid,
1636
2585
  cwd: resolvedCwd,
@@ -1642,11 +2591,15 @@ async function runAgentLoop(prompt, opts = {}) {
1642
2591
  }
1643
2592
 
1644
2593
  await shutdownPostEditMiddleware(postEditMiddleware);
1645
- const changedFiles = changedFilesSince(resolvedCwd, preRunDirtyFiles);
2594
+ const changedFiles = changedFilesSince(resolvedCwd, preRunFileBaseline);
1646
2595
  return {
1647
2596
  success: streamStatus !== 'error',
1648
2597
  output: finalOutput,
1649
- stderr: streamErrors.join('\n'),
2598
+ // Surface the classified, friendly provider message (e.g. "AI provider network
2599
+ // error: …could not reach the provider endpoint…") to the caller/chat reply
2600
+ // instead of a raw "fetch failed". The raw text stays in `errors` for the
2601
+ // CLI-recoverability pattern match.
2602
+ stderr: (streamProviderError && streamProviderError.userMessage) || streamErrors.join('\n'),
1650
2603
  sessionId: sid,
1651
2604
  exitCode: streamStatus === 'error' ? -1 : 0,
1652
2605
  log,
@@ -1656,6 +2609,7 @@ async function runAgentLoop(prompt, opts = {}) {
1656
2609
  next: 'stop',
1657
2610
  runtimeMode: runtimeMode.id,
1658
2611
  changedFiles,
2612
+ finalAnswerDelivered,
1659
2613
  };
1660
2614
  }
1661
2615
 
@@ -1674,6 +2628,7 @@ async function runAgentLoop(prompt, opts = {}) {
1674
2628
  try {
1675
2629
  const startTurn = opts._resumeTurn || 0;
1676
2630
  for (let turn = startTurn; turn < turns; turn++) {
2631
+ throwIfExternalAbort();
1677
2632
  const remaining = deadline - Date.now();
1678
2633
  if (remaining <= 0) {
1679
2634
  finalOutput += '\n[Timeout reached]';
@@ -1692,13 +2647,14 @@ async function runAgentLoop(prompt, opts = {}) {
1692
2647
  const isLocal = /ollama|mlx/.test(llm.type || '');
1693
2648
  const perTurnCap = opts.perTurnTimeoutMs || (isLocal ? 600000 : 300000);
1694
2649
  const ac = new AbortController();
2650
+ const unlinkExternalAbort = linkExternalAbort(ac);
1695
2651
  const timer = setTimeout(() => ac.abort(), Math.min(remaining, perTurnCap));
1696
2652
 
1697
2653
  // Middleware: prepare LLM call
1698
2654
  const turnsRemaining = turns - turn;
1699
2655
  const llmCtx = { params: createInitialLlmParams(opts, taskFileHints.length >= 4 ? 8192 : 4096), system: systemPrompt, cwd: resolvedCwd,
1700
2656
  provider: llm.type, model: modelId, mode: opts.mode, runtimeMode: runtimeMode.id, claudeMd: opts.claudeMd, log: {},
1701
- toolsAvailable: turnsRemaining > 1, promptCapabilities };
2657
+ toolsAvailable: turnsRemaining > 1, promptCapabilities, promptManifest };
1702
2658
  llmCtxRef.current = llmCtx; // expose to event bridge (A2)
1703
2659
  await mw.run('llm.before', llmCtx);
1704
2660
  let adaptedTools = await toolRegistry.getDefinitions(llmCtx);
@@ -1715,6 +2671,7 @@ async function runAgentLoop(prompt, opts = {}) {
1715
2671
  mode: opts.mode || 'executing',
1716
2672
  step: turn,
1717
2673
  sessionMemory: opts.sessionMemory,
2674
+ userHooks,
1718
2675
  reason: 'legacy_pre_turn',
1719
2676
  opts,
1720
2677
  });
@@ -1759,10 +2716,12 @@ async function runAgentLoop(prompt, opts = {}) {
1759
2716
  thinking: llmCtx.params.thinking,
1760
2717
  reasoningEffort: llmCtx.params.reasoningEffort,
1761
2718
  options: llmCtx.params.options,
2719
+ promptCache: true,
1762
2720
  signal: ac.signal,
1763
2721
  });
1764
2722
  } finally {
1765
2723
  clearTimeout(timer);
2724
+ unlinkExternalAbort();
1766
2725
  }
1767
2726
  response = recoverAllowedTextToolCalls(response, adaptedTools);
1768
2727
  if (response.textToolCallFormat) {
@@ -1794,13 +2753,18 @@ async function runAgentLoop(prompt, opts = {}) {
1794
2753
  if (response.usage) {
1795
2754
  const inputTokens = response.usage.input || 0;
1796
2755
  const outputTokens = response.usage.output || 0;
2756
+ // Cache hits cost 0.1x input price, cache writes 1.25x (Anthropic).
2757
+ const cacheRead = response.usage.cacheRead || 0;
2758
+ const cacheWrite = response.usage.cacheWrite || 0;
2759
+ const effectiveInput = Math.max(0, inputTokens - cacheRead - cacheWrite)
2760
+ + cacheRead * 0.1 + cacheWrite * 1.25;
1797
2761
  // Cost estimate: rough pricing per 1M tokens
1798
2762
  const costPer1M = {
1799
2763
  input: modelId.includes('haiku') ? 0.25 : modelId.includes('sonnet') ? 3.0 : 15.0,
1800
2764
  output: modelId.includes('haiku') ? 1.25 : modelId.includes('sonnet') ? 15.0 : 75.0,
1801
2765
  };
1802
- const turnCost = (inputTokens * costPer1M.input + outputTokens * costPer1M.output) / 1_000_000;
1803
- turnCosts.push({ turn, inputTokens, outputTokens, cost: turnCost });
2766
+ const turnCost = (effectiveInput * costPer1M.input + outputTokens * costPer1M.output) / 1_000_000;
2767
+ turnCosts.push({ turn, inputTokens, outputTokens, cacheRead, cacheWrite, cost: turnCost });
1804
2768
  budgetUsed += turnCost;
1805
2769
  if (opts.budgetUsd && budgetUsed > opts.budgetUsd) {
1806
2770
  finalOutput += '\n[Budget exceeded]';
@@ -1833,6 +2797,7 @@ async function runAgentLoop(prompt, opts = {}) {
1833
2797
  toolsAvailable: adaptedTools.length > 0,
1834
2798
  nudges: log._noActionNudges || 0,
1835
2799
  cwd: resolvedCwd,
2800
+ codingIntent,
1836
2801
  });
1837
2802
  if (continuation?.action === 'continue') {
1838
2803
  log._noActionNudges = (log._noActionNudges || 0) + 1;
@@ -1844,12 +2809,22 @@ async function runAgentLoop(prompt, opts = {}) {
1844
2809
  if (continuation?.action === 'fail') {
1845
2810
  throw new Error(continuation.reason);
1846
2811
  }
2812
+ const stopGate = await evaluateStopGate({
2813
+ userHooks, log, sessionId: sid, cwd: resolvedCwd, mode: opts.mode, turn, text: contentToText(response.content),
2814
+ });
2815
+ if (stopGate) {
2816
+ messages.push({ role: 'assistant', content: assistantHistoryContent(response) });
2817
+ messages.push({ role: 'user', content: stopGate.message });
2818
+ emitProgress({ phase: opts.mode || 'executing', step: turn, message: 'Stop hook rejected completion — continuing', detail: { reason: stopGate.reason } });
2819
+ continue;
2820
+ }
1847
2821
  emitProgress({
1848
2822
  phase: opts.mode || 'executing',
1849
2823
  step: turn,
1850
2824
  message: 'Agent finished',
1851
2825
  });
1852
2826
  finalOutput += (typeof response.content === 'string' ? response.content : '') || '';
2827
+ if (contentToText(response.content).trim()) finalAnswerDelivered = true;
1853
2828
  break;
1854
2829
  }
1855
2830
 
@@ -1866,125 +2841,34 @@ async function runAgentLoop(prompt, opts = {}) {
1866
2841
  detail: { tool: tc.name, input: tc.input },
1867
2842
  });
1868
2843
 
1869
- let result;
1870
- try {
1871
- const input = { ...tc.input };
1872
-
1873
- // Auto-correct missing file_path: resolve relative paths to cwd
1874
- if (['read_file', 'write_file', 'edit_file'].includes(tc.name)) {
1875
- if (input.file_path && !path.isAbsolute(input.file_path)) {
1876
- input.file_path = path.join(resolvedCwd, input.file_path);
1877
- } else if (!input.file_path && tc.name === 'read_file' && input.path) {
1878
- // Some models use 'path' instead of 'file_path'
1879
- input.file_path = path.isAbsolute(input.path) ? input.path : path.join(resolvedCwd, input.path);
1880
- }
1881
- }
1882
-
1883
- // Auto-correct list_directory: resolve relative paths
1884
- if (tc.name === 'list_directory' && input.directory && !path.isAbsolute(input.directory)) {
1885
- input.directory = path.join(resolvedCwd, input.directory);
1886
- }
1887
-
1888
- // Path traversal guard: file tools must stay within cwd
1889
- if (['read_file', 'write_file', 'edit_file'].includes(tc.name) && input.file_path) {
1890
- if (!isWithinDirectory(input.file_path, resolvedCwd)) {
1891
- result = { error: `Path ${input.file_path} is outside allowed directory ${resolvedCwd}` };
1892
- turnHadError = true;
1893
- throw new Error('path_blocked'); // skip to result push
1894
- }
1895
- }
1896
-
1897
- // Override directory for search tools
1898
- if (tc.name === 'glob' && !input.directory) input.directory = resolvedCwd;
1899
- if (tc.name === 'grep_files' && !input.directory) input.directory = resolvedCwd;
1900
- if (tc.name === 'run_shell') {
1901
- input.timeout_ms = input.timeout_ms || 30000;
1902
- input.cwd = input.cwd || resolvedCwd;
1903
- }
1904
-
1905
- if (toolRequiresPermission(tc.name)) {
1906
- const permResult = await permissionService.authorize({
1907
- sessionId: sid,
1908
- tool: tc.name,
1909
- input,
1910
- cwd: input.cwd || resolvedCwd,
1911
- projectRoot: resolvedCwd,
1912
- mode: opts.mode,
1913
- headless: Boolean(opts.headless || opts.benchmark),
1914
- metadata: { toolCallId: tc.id || tc.toolCallId || '' },
1915
- });
1916
- if (permResult.decision !== 'allow') {
1917
- result = { error: `Permission denied: ${permResult.reason || permResult.message || permResult.decision}` };
1918
- turnHadError = true;
1919
- throw new Error('path_blocked');
1920
- }
1921
- }
1922
-
1923
- // Middleware: before tool
1924
- const modifiedInput = await mw.run('tool.before', llmCtx, tc.name, input);
1925
- const finalInput = (modifiedInput && typeof modifiedInput === 'object') ? modifiedInput : input;
1926
- if (['read_file', 'write_file', 'edit_file', 'apply_patch', 'multi_edit', 'glob', 'grep_files', 'list_directory'].includes(tc.name)) {
1927
- finalInput.sessionId = sid;
1928
- finalInput.projectRoot = resolvedCwd;
1929
- }
1930
-
1931
- // In-flight todo tracking (6m)
1932
- if (tc.name === 'update_todos') {
1933
- currentTodos = finalInput.todos || [];
1934
- result = { ok: true, todos: currentTodos };
1935
- } else if (tc.name === 'ask_user') {
1936
- // In headless/benchmark mode, auto-dismiss ask_user to avoid blocking
1937
- if (opts.mode === 'build' && !opts.interactive) {
1938
- result = { dismissed: true, message: 'Running in non-interactive mode. Please proceed with your best judgment based on the code you have read.' };
1939
- } else {
1940
- // Interactive question (B1) — ask the user and wait for answer
1941
- try {
1942
- const answer = await questionManager.ask(sid, {
1943
- question: finalInput.question,
1944
- header: finalInput.header,
1945
- options: finalInput.options,
1946
- multiple: finalInput.multiple,
1947
- });
1948
- result = answer ? { answers: answer } : { dismissed: true, message: 'Question timed out or was dismissed' };
1949
- } catch (e) {
1950
- result = { error: `Question failed: ${e.message}` };
1951
- }
1952
- }
1953
- } else {
1954
- result = await toolRegistry.execute(tc.name, finalInput, {
1955
- sessionId: sid,
1956
- cwd: resolvedCwd,
1957
- model: modelId,
1958
- provider: llm.type,
1959
- llmCtx,
1960
- });
1961
- }
1962
-
1963
- // Middleware: after tool
1964
- result = await mw.run('tool.after', llmCtx, tc.name, finalInput, result) || result;
2844
+ const execution = await toolExecutionController.execute(tc, {
2845
+ sessionId: sid,
2846
+ cwd: resolvedCwd,
2847
+ projectRoot: resolvedCwd,
2848
+ model: modelId,
2849
+ provider: llm.type,
2850
+ mode: opts.mode || '',
2851
+ runtimeMode: runtimeMode.id,
2852
+ llmCtx,
2853
+ interactive: opts.interactive,
2854
+ onTodos: (todos) => { currentTodos = todos; },
2855
+ });
2856
+ const result = execution.result;
2857
+ const evidenceInput = execution.evidenceInput || tc.input || {};
2858
+ if (!execution.ok) turnHadError = true;
1965
2859
 
1966
- // ── Event bus emissions (A1) ──
1967
- // Fire events so middleware and subscribers can react to tool outcomes.
1968
- if (['edit_file', 'write_file', 'multi_edit'].includes(tc.name) && result && !result.error) {
1969
- events.emit('file.edited', { filePath: finalInput.file_path, sessionId: sid });
1970
- }
1971
- if (tc.name === 'apply_patch' && result && !result.error) {
1972
- const patchFiles = [
1973
- ...(result.added || []),
1974
- ...(result.modified || []),
1975
- ];
1976
- for (const filePath of patchFiles) {
1977
- events.emit('file.edited', { filePath, sessionId: sid });
1978
- }
1979
- }
1980
- if (tc.name === 'read_file' && result && !result.error) {
1981
- events.emit('file.read', { filePath: finalInput.file_path, sessionId: sid });
1982
- }
1983
- } catch (err) {
1984
- if (err.message !== 'path_blocked') {
1985
- result = { error: err.message };
1986
- }
1987
- turnHadError = true;
2860
+ const typedArtifacts = storeTypedArtifactsForTranscript(result, {
2861
+ sessionId: sid,
2862
+ cwd: resolvedCwd,
2863
+ toolCallId: tc.id || '',
2864
+ toolName: tc.name,
2865
+ transcript,
2866
+ });
2867
+ if (typedArtifacts.length && log[log.length - 1]) {
2868
+ log[log.length - 1].artifacts = [
2869
+ ...(log[log.length - 1].artifacts || []),
2870
+ ...typedArtifacts,
2871
+ ];
1988
2872
  }
1989
2873
 
1990
2874
  const resultStr = typeof result === 'string' ? result : JSON.stringify(result);
@@ -2003,11 +2887,18 @@ async function runAgentLoop(prompt, opts = {}) {
2003
2887
  toolResults.push({ type: 'tool_result', tool_use_id: tc.id, content: capped });
2004
2888
 
2005
2889
  log[log.length - 1].toolResults = log[log.length - 1].toolResults || [];
2006
- log[log.length - 1].toolResults.push({ name: tc.name, resultLength: resultStr.length, error: turnHadError });
2890
+ const evidence = normalizeToolCallEvidence({ name: tc.name, input: evidenceInput }, result);
2891
+ log[log.length - 1].toolResults.push({
2892
+ name: tc.name,
2893
+ resultLength: resultStr.length,
2894
+ ok: evidence.ok === true,
2895
+ error: result?.error || null,
2896
+ exitCode: result?.exitCode,
2897
+ result,
2898
+ });
2007
2899
 
2008
2900
  // Doom loop detection (6a) -- track tool calls for identical pattern
2009
- const inputHash = JSON.stringify(tc.input);
2010
- toolCallHistory.push({ name: tc.name, inputHash });
2901
+ toolCallHistory.push(evidence);
2011
2902
 
2012
2903
  if (toolCallHistory.length >= DOOM_LOOP_THRESHOLD) {
2013
2904
  const recent = toolCallHistory.slice(-DOOM_LOOP_THRESHOLD);
@@ -2066,7 +2957,20 @@ async function runAgentLoop(prompt, opts = {}) {
2066
2957
  if (response.stopReason === 'end_turn' || response.stopReason === 'max_tokens') break;
2067
2958
  }
2068
2959
  } catch (err) {
2069
- emitProgress({ phase: 'error', step: -1, message: err.message });
2960
+ // Classify provider/LLM failures into a clear, human message (naming the model) so the
2961
+ // coding agent surfaces e.g. "AI provider network error: … could not reach the provider
2962
+ // endpoint…" instead of a raw "fetch failed" — mirrors the stream path
2963
+ // (stream-processor.js), which already decorates. Raw err.message is kept below for the
2964
+ // CLI-recoverability pattern match (which keys on the actual error text).
2965
+ let friendlyError = (err && err.message) || 'Coding session failed';
2966
+ try {
2967
+ const { decorateProviderError } = require('./llm/provider-error');
2968
+ const decorated = decorateProviderError(err, { provider: llm?.type || '', model: modelId || model || '' });
2969
+ if (decorated && decorated.providerError && decorated.providerError.userMessage) {
2970
+ friendlyError = decorated.providerError.userMessage;
2971
+ }
2972
+ } catch {}
2973
+ emitProgress({ phase: 'error', step: -1, message: friendlyError });
2070
2974
 
2071
2975
  // Persist activity error (Phase 2: Activity History)
2072
2976
  try { getActivityLog().log({ session_id: sid, type: 'coding_error', title: 'Coding session failed', body: err.message, detail: JSON.stringify({ turns: log.length }) }); } catch {}
@@ -2078,10 +2982,10 @@ async function runAgentLoop(prompt, opts = {}) {
2078
2982
  sessionId: sid,
2079
2983
  cwd: resolvedCwd,
2080
2984
  partType: 'error',
2081
- data: { message: err.message },
2985
+ data: { message: friendlyError },
2082
2986
  });
2083
2987
  }
2084
- if (finalOutput && transcript?.appendAssistantMessage) {
2988
+ if (!externalTranscriptMessages && finalOutput && transcript?.appendAssistantMessage) {
2085
2989
  transcript.appendAssistantMessage(finalOutput, {
2086
2990
  sessionId: sid,
2087
2991
  cwd: resolvedCwd,
@@ -2093,9 +2997,10 @@ async function runAgentLoop(prompt, opts = {}) {
2093
2997
  }
2094
2998
 
2095
2999
  // Graceful cleanup (6r)
3000
+ try { require('./tools/local-tools').cleanupBackgroundProcesses({ sessionId: sid }); } catch {}
2096
3001
  const cleanup = {
2097
3002
  lastCompletedTurn: log.length - 1,
2098
- error: err.message,
3003
+ error: friendlyError,
2099
3004
  todosAtAbort: currentTodos,
2100
3005
  };
2101
3006
 
@@ -2141,7 +3046,7 @@ async function runAgentLoop(prompt, opts = {}) {
2141
3046
  return {
2142
3047
  success: false,
2143
3048
  output: finalOutput,
2144
- stderr: err.message,
3049
+ stderr: friendlyError,
2145
3050
  sessionId: sid,
2146
3051
  exitCode: -1,
2147
3052
  log,
@@ -2149,10 +3054,11 @@ async function runAgentLoop(prompt, opts = {}) {
2149
3054
  provider: llm?.type,
2150
3055
  model: modelId,
2151
3056
  runtimeMode: runtimeMode.id,
3057
+ finalAnswerDelivered,
2152
3058
  turnCosts,
2153
3059
  budgetUsed,
2154
3060
  cleanup,
2155
- changedFiles: changedFilesSince(resolvedCwd, preRunDirtyFiles),
3061
+ changedFiles: changedFilesSince(resolvedCwd, preRunFileBaseline),
2156
3062
  };
2157
3063
  }
2158
3064
 
@@ -2168,6 +3074,20 @@ async function runAgentLoop(prompt, opts = {}) {
2168
3074
  if (questionManager) questionManager.clear();
2169
3075
  try { require('./tools/file-tracker').clearSession(sid); } catch {}
2170
3076
 
3077
+ // Stop session-scoped background processes (dev servers, watchers).
3078
+ // Persistent ones are reported so the summary can mention them.
3079
+ try {
3080
+ const bg = require('./tools/local-tools').cleanupBackgroundProcesses({ sessionId: sid });
3081
+ if (bg.stopped.length || bg.persisted.length) {
3082
+ emitProgress({
3083
+ phase: 'done',
3084
+ step: -1,
3085
+ message: `Background processes: stopped ${bg.stopped.length}, left running ${bg.persisted.length}`,
3086
+ detail: bg,
3087
+ });
3088
+ }
3089
+ } catch {}
3090
+
2171
3091
  emitProgress({ phase: 'done', step: -1, message: 'Agent loop finished' });
2172
3092
 
2173
3093
  // Delete checkpoint on successful completion (no longer needed)
@@ -2175,7 +3095,7 @@ async function runAgentLoop(prompt, opts = {}) {
2175
3095
 
2176
3096
  // Persist activity completion (Phase 2: Activity History)
2177
3097
  try { getActivityLog().log({ session_id: sid, type: 'coding_complete', title: 'Coding session completed', body: finalOutput.slice(0, 500), detail: JSON.stringify({ turns: log.length, tokens: totalInput + totalOutput }) }); } catch {}
2178
- if (finalOutput && transcript?.appendAssistantMessage) {
3098
+ if (!externalTranscriptMessages && finalOutput && transcript?.appendAssistantMessage) {
2179
3099
  transcript.appendAssistantMessage(finalOutput, {
2180
3100
  sessionId: sid,
2181
3101
  cwd: resolvedCwd,
@@ -2208,7 +3128,8 @@ async function runAgentLoop(prompt, opts = {}) {
2208
3128
  turnCosts,
2209
3129
  budgetUsed,
2210
3130
  screenshots: screenshotsTaken,
2211
- changedFiles: changedFilesSince(resolvedCwd, preRunDirtyFiles),
3131
+ changedFiles: changedFilesSince(resolvedCwd, preRunFileBaseline),
3132
+ finalAnswerDelivered,
2212
3133
  };
2213
3134
  }
2214
3135
 
@@ -2542,6 +3463,63 @@ function getGitChangedFiles(cwd) {
2542
3463
  }
2543
3464
  }
2544
3465
 
3466
+ // Directories we never descend into when scanning a non-git working tree for changes.
3467
+ // Dependency/build/VCS dirs would balloon the walk and never represent the agent's edits.
3468
+ const _CHANGED_SCAN_SKIP_DIRS = new Set([
3469
+ '.git', '.hg', '.svn', 'node_modules', 'bower_components', '.next', '.nuxt',
3470
+ 'dist', 'build', 'out', 'target', 'vendor', '.venv', 'venv', '__pycache__',
3471
+ '.cache', '.turbo', '.gradle', '.idea', '.vscode', 'coverage', '.parcel-cache',
3472
+ ]);
3473
+
3474
+ // Captures a baseline for changedFilesSince() that works in BOTH git and non-git cwds.
3475
+ // Git: the set of already-dirty paths, so the agent's own edits can be isolated from
3476
+ // pre-existing uncommitted changes. Non-git: a wall-clock marker captured before the
3477
+ // agent runs, so files it creates/modifies can be detected by mtime afterward.
3478
+ function captureChangedFilesBaseline(cwd) {
3479
+ if (isGitRepository(cwd)) {
3480
+ return { isGit: true, dirty: getGitChangedFiles(cwd) };
3481
+ }
3482
+ return { isGit: false, startedAtMs: Date.now() };
3483
+ }
3484
+
3485
+ // Walks a non-git working tree and returns relative paths of files created or modified
3486
+ // at/after `sinceMs`. Bounded (skip-dirs + entry/result caps) so a huge tree — e.g. a
3487
+ // folder holding a multi-hundred-MB archive — can't make change detection runaway.
3488
+ function collectFilesModifiedSince(cwd, sinceMs, { maxEntries = 60000, maxResults = 5000 } = {}) {
3489
+ const results = [];
3490
+ let visited = 0;
3491
+ const stack = [''];
3492
+ while (stack.length) {
3493
+ const relDir = stack.pop();
3494
+ let entries;
3495
+ try {
3496
+ entries = fs.readdirSync(path.join(cwd, relDir), { withFileTypes: true });
3497
+ } catch {
3498
+ continue;
3499
+ }
3500
+ for (const ent of entries) {
3501
+ if (visited++ >= maxEntries || results.length >= maxResults) return results;
3502
+ const rel = relDir ? `${relDir}/${ent.name}` : ent.name;
3503
+ if (ent.isDirectory()) {
3504
+ if (_CHANGED_SCAN_SKIP_DIRS.has(ent.name)) continue;
3505
+ stack.push(rel);
3506
+ } else if (ent.isFile()) {
3507
+ if (ent.name === '.DS_Store') continue;
3508
+ let st;
3509
+ try {
3510
+ st = fs.statSync(path.join(cwd, rel));
3511
+ } catch {
3512
+ continue;
3513
+ }
3514
+ if (st.mtimeMs >= sinceMs && _isPathSafeRelative(rel)) results.push(rel);
3515
+ }
3516
+ // Symlinks (and other non-file/non-dir entries) are intentionally skipped to avoid
3517
+ // following them out of the working tree or into cycles.
3518
+ }
3519
+ }
3520
+ return results;
3521
+ }
3522
+
2545
3523
  function getGitTrackedAndUntrackedFiles(cwd) {
2546
3524
  try {
2547
3525
  const stdout = execFileSync('git', ['ls-files', '-z', '--cached', '--others', '--exclude-standard'], {
@@ -2747,20 +3725,34 @@ async function plan(request, cwd, options = {}) {
2747
3725
  if (!result.success) {
2748
3726
  parseErr.message = `Planning failed before producing valid JSON (${result.stderr || 'provider error'}): ${parseErr.message}`;
2749
3727
  }
2750
- if (process.env.WALLE_PLAN_DEBUG) {
2751
- const dumpPath = path.join(
2752
- process.env.WALL_E_DATA_DIR || '/tmp',
2753
- `planner-debug-${Date.now()}.txt`,
2754
- );
2755
- try {
2756
- fs.writeFileSync(
2757
- dumpPath,
2758
- `=== prompt ===\n${prompt}\n\n=== output ===\n${result.output || ''}\n\n=== outputRaw ===\n${result.outputRaw || ''}\n`,
3728
+ if (shouldRecoverPlannerParseFailure({ request, output: result.output, cwd })) {
3729
+ if (onProgress) {
3730
+ onProgress({
3731
+ type: 'planning_recovery',
3732
+ phase: 'planning',
3733
+ step: -1,
3734
+ message: 'Planner returned unstructured output; recovering with a direct implementation subtask.',
3735
+ detail: { reason: parseErr.message },
3736
+ });
3737
+ }
3738
+ planObj = buildPlannerRecoveryPlan(request, context, parseErr, result.output);
3739
+ config._planningRecovery = planObj.planning_recovery;
3740
+ } else {
3741
+ if (process.env.WALLE_PLAN_DEBUG) {
3742
+ const dumpPath = path.join(
3743
+ process.env.WALL_E_DATA_DIR || '/tmp',
3744
+ `planner-debug-${Date.now()}.txt`,
2759
3745
  );
2760
- parseErr.message += ` (planner debug dumped to ${dumpPath})`;
2761
- } catch {}
3746
+ try {
3747
+ fs.writeFileSync(
3748
+ dumpPath,
3749
+ `=== prompt ===\n${prompt}\n\n=== output ===\n${result.output || ''}\n\n=== outputRaw ===\n${result.outputRaw || ''}\n`,
3750
+ );
3751
+ parseErr.message += ` (planner debug dumped to ${dumpPath})`;
3752
+ } catch {}
3753
+ }
3754
+ throw parseErr;
2762
3755
  }
2763
- throw parseErr;
2764
3756
  }
2765
3757
 
2766
3758
  // Enforce max_subtasks
@@ -2919,6 +3911,13 @@ async function execute(planData, { cwd, onProgress, startFrom = 0 } = {}) {
2919
3911
  const attemptChangedFiles = changedFilesSince(cwd, attemptStartDirtyFiles);
2920
3912
  const reportedChangedFiles = Array.isArray(result.changedFiles) ? result.changedFiles : [];
2921
3913
  const changedFilesForValidation = [...new Set([...reportedChangedFiles, ...attemptChangedFiles])];
3914
+ const acceptanceContract = buildAcceptanceContract({
3915
+ request: subtask.prompt || subtask.title || '',
3916
+ subtask,
3917
+ changedFiles: changedFilesForValidation,
3918
+ frontend: changedFilesTouchFrontend(changedFilesForValidation),
3919
+ requiresFileChanges,
3920
+ });
2922
3921
 
2923
3922
  if (isTimeoutOnlyOutput(result.output)) {
2924
3923
  lastError = `Subtask timed out before producing a usable result${result.stderr ? `: ${result.stderr}` : ''}`;
@@ -2960,6 +3959,24 @@ async function execute(planData, { cwd, onProgress, startFrom = 0 } = {}) {
2960
3959
  continue;
2961
3960
  }
2962
3961
 
3962
+ const acceptanceReport = await runAcceptanceValidators({
3963
+ cwd,
3964
+ contract: acceptanceContract,
3965
+ changedFiles: changedFilesForValidation,
3966
+ screenshots: result.screenshots || [],
3967
+ toolCallHistory: toolCallHistoryFromLog(result.log),
3968
+ autoBrowser: false,
3969
+ requireBrowserRuntime: false,
3970
+ onProgress,
3971
+ step: i,
3972
+ });
3973
+ if (!acceptanceReport.ok) {
3974
+ lastError = `Acceptance validation failed: ${summarizeValidatorFailures(acceptanceReport).join('; ')}`;
3975
+ if (onProgress) onProgress({ type: 'retry', index: i, retry, error: lastError, acceptance: acceptanceReport });
3976
+ if (retry < config.max_retries) restoreSubtaskSnapshot(snapshot, cwd, baselineUntracked);
3977
+ continue;
3978
+ }
3979
+
2963
3980
  // Run tests if configured
2964
3981
  let testsOk = true;
2965
3982
  if (config.test_command) {
@@ -3080,6 +4097,7 @@ async function execute(planData, { cwd, onProgress, startFrom = 0 } = {}) {
3080
4097
  failed_subtask: i,
3081
4098
  files_changed: [...agentChangedFiles],
3082
4099
  pre_existing_dirty_files: [...preExistingDirtyFiles],
4100
+ screenshots: allScreenshots,
3083
4101
  };
3084
4102
  }
3085
4103
 
@@ -3095,6 +4113,7 @@ async function execute(planData, { cwd, onProgress, startFrom = 0 } = {}) {
3095
4113
  state_path: statePath,
3096
4114
  files_changed: [...agentChangedFiles],
3097
4115
  pre_existing_dirty_files: [...preExistingDirtyFiles],
4116
+ screenshots: allScreenshots,
3098
4117
  };
3099
4118
  }
3100
4119
 
@@ -3104,6 +4123,7 @@ async function execute(planData, { cwd, onProgress, startFrom = 0 } = {}) {
3104
4123
  async function complete(request, planData, executeResult, { cwd, brain, onProgress } = {}) {
3105
4124
  const { plan: planObj, config } = planData;
3106
4125
  const agentFiles = (executeResult.files_changed || []).filter(_isPathSafeRelative);
4126
+ const screenshots = Array.isArray(executeResult.screenshots) ? executeResult.screenshots : [];
3107
4127
  const report = {
3108
4128
  success: executeResult.success,
3109
4129
  branch: planObj.branch_name,
@@ -3124,10 +4144,42 @@ async function complete(request, planData, executeResult, { cwd, brain, onProgre
3124
4144
  return report;
3125
4145
  }
3126
4146
 
4147
+ let diff = '';
4148
+ if (agentFiles.length > 0) {
4149
+ diff = await getGitDiffForFiles(cwd, agentFiles);
4150
+ }
4151
+
4152
+ if (diff && codingReview.diffTouchesFrontend(diff)) {
4153
+ const acceptanceContract = buildAcceptanceContract({
4154
+ request,
4155
+ subtask: { title: 'Final frontend acceptance', prompt: request },
4156
+ changedFiles: agentFiles,
4157
+ frontend: true,
4158
+ requiresFileChanges: config.require_changes !== false,
4159
+ });
4160
+ const acceptanceReport = await runAcceptanceValidators({
4161
+ cwd,
4162
+ contract: acceptanceContract,
4163
+ changedFiles: agentFiles,
4164
+ screenshots,
4165
+ toolCallHistory: [],
4166
+ autoBrowser: config.browser_smoke !== false && config.browserSmoke !== false,
4167
+ requireBrowserRuntime: config.browser_smoke !== false && config.browserSmoke !== false,
4168
+ onProgress,
4169
+ step: -1,
4170
+ });
4171
+ report.frontendVerification = acceptanceReport;
4172
+ if (!acceptanceReport.ok) {
4173
+ report.success = false;
4174
+ report.error = summarizeValidatorFailures(acceptanceReport)[0] || 'Frontend acceptance validation failed';
4175
+ report.concerns.push(...acceptanceReport.concerns.slice(0, 10));
4176
+ return report;
4177
+ }
4178
+ }
4179
+
3127
4180
  // Final review
3128
4181
  if (config.review) {
3129
4182
  if (onProgress) onProgress({ phase: 'reviewing', step: -1, message: 'Final review...' });
3130
- const diff = await getGitDiffForFiles(cwd, agentFiles);
3131
4183
  if (diff) {
3132
4184
  const verdict = config.review_quorum
3133
4185
  ? await codingQuorum.runCodingQuorum({
@@ -3155,8 +4207,8 @@ async function complete(request, planData, executeResult, { cwd, brain, onProgre
3155
4207
  return report;
3156
4208
  }
3157
4209
 
3158
- if (codingReview.diffTouchesFrontend(diff) && allScreenshots.length > 0) {
3159
- const visualVerdict = await codingReview.reviewVisual(request, allScreenshots, diff, {
4210
+ if (codingReview.diffTouchesFrontend(diff) && screenshots.length > 0) {
4211
+ const visualVerdict = await codingReview.reviewVisual(request, screenshots, diff, {
3160
4212
  cwd,
3161
4213
  reviewer: config.reviewer,
3162
4214
  reviewers: config.reviewers,
@@ -3208,7 +4260,7 @@ async function complete(request, planData, executeResult, { cwd, brain, onProgre
3208
4260
  if (diffErr.code !== 1) throw diffErr;
3209
4261
  }
3210
4262
  const sanitizedRequest = request.replace(/[\r\n]+/g, ' ').trim().slice(0, 72);
3211
- const commitMsg = `feat: ${sanitizedRequest}\n\nOrchestrated by Wall-E coding agent.\nSubtasks: ${planObj.subtasks.length}\n\nCo-authored-by: Codex <noreply@openai.com>`;
4263
+ const commitMsg = `feat: ${sanitizedRequest}\n\nOrchestrated by Wall-E coding agent.\nSubtasks: ${planObj.subtasks.length}\n\nCo-authored-by: Wall-E <noreply@example.invalid>`;
3212
4264
  const { stdout } = await execFileAsync('git', ['commit', '-m', commitMsg], { cwd });
3213
4265
  // Extract commit hash
3214
4266
  const hashMatch = stdout.match(/\[[\w/.-]+ ([a-f0-9]+)\]/);
@@ -3285,6 +4337,56 @@ async function complete(request, planData, executeResult, { cwd, brain, onProgre
3285
4337
  return report;
3286
4338
  }
3287
4339
 
4340
+ function storeTypedArtifactsForTranscript(result, { sessionId, cwd, toolCallId, toolName, transcript } = {}) {
4341
+ if (!result || typeof result !== 'object' || !transcript?.appendArtifact) return [];
4342
+ let descriptors = [];
4343
+ try {
4344
+ const { extractTypedArtifactDescriptors } = require('./coding/stream-processor');
4345
+ descriptors = extractTypedArtifactDescriptors(result, { id: toolCallId, name: toolName });
4346
+ } catch {
4347
+ descriptors = [];
4348
+ }
4349
+ if (!descriptors.length) return [];
4350
+ let artifactStore = null;
4351
+ try {
4352
+ const { ArtifactStore } = require('./coding/artifact-store');
4353
+ artifactStore = new ArtifactStore();
4354
+ } catch {}
4355
+ const stored = [];
4356
+ const seen = new Set();
4357
+ for (const descriptor of descriptors) {
4358
+ try {
4359
+ const artifact = artifactStore?.storeArtifact
4360
+ ? artifactStore.storeArtifact({
4361
+ sessionId,
4362
+ toolCallId,
4363
+ toolName,
4364
+ kind: descriptor.kind,
4365
+ sourcePath: descriptor.path || descriptor.sourcePath,
4366
+ content: descriptor.content,
4367
+ mimeType: descriptor.mimeType,
4368
+ bytes: descriptor.bytes,
4369
+ sha256: descriptor.sha256,
4370
+ metadata: descriptor.metadata || {},
4371
+ })
4372
+ : descriptor;
4373
+ const key = artifact.artifactId || `${artifact.kind}:${artifact.path}`;
4374
+ if (seen.has(key)) continue;
4375
+ seen.add(key);
4376
+ transcript.appendArtifact({
4377
+ sessionId,
4378
+ cwd,
4379
+ type: artifact.kind || 'artifact',
4380
+ toolCallId,
4381
+ name: toolName,
4382
+ artifact,
4383
+ });
4384
+ stored.push(artifact);
4385
+ } catch {}
4386
+ }
4387
+ return stored;
4388
+ }
4389
+
3288
4390
  /**
3289
4391
  * Resume a coding session from a saved checkpoint.
3290
4392
  * Loads checkpoint data from brain DB, reconstructs messages, and re-enters runAgentLoop.
@@ -3335,13 +4437,20 @@ module.exports = {
3335
4437
  readCheckpoint,
3336
4438
  formatReport,
3337
4439
  isTimeoutOnlyOutput,
4440
+ resolveRunTimeoutMs,
4441
+ isInteractiveRun,
3338
4442
  isActionRequiredPrompt,
3339
4443
  isPrematureActionResponse,
3340
4444
  getNoActionContinuation,
4445
+ evaluateStopGate,
4446
+ MAX_STOP_HOOK_BOUNCES,
3341
4447
  hasVerificationEvidence,
3342
4448
  subtaskRequiresFileChanges,
3343
4449
  screenshotTrackerHook,
3344
4450
  collectEmptyChangedFiles,
4451
+ changedFilesSince,
4452
+ captureChangedFilesBaseline,
4453
+ collectFilesModifiedSince,
3345
4454
  CODING_TOOLS,
3346
4455
  READ_ONLY_TOOLS,
3347
4456
  BUILD_TOOLS,