create-walle 0.9.21 → 0.9.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (500) hide show
  1. package/README.md +27 -5
  2. package/package.json +2 -2
  3. package/template/CLAUDE.md +2 -2
  4. package/template/LICENSE +1 -1
  5. package/template/bin/ctm-dev-cleanup.js +24 -3
  6. package/template/bin/ctm-launch.sh +13 -0
  7. package/template/bin/dev.sh +156 -18
  8. package/template/bin/node-bin.sh +84 -0
  9. package/template/bin/pin-node.sh +51 -0
  10. package/template/claude-task-manager/api-prompts.js +1203 -182
  11. package/template/claude-task-manager/api-reviews.js +109 -15
  12. package/template/claude-task-manager/approval-agent.js +1360 -280
  13. package/template/claude-task-manager/bin/restart-ctm.sh +64 -23
  14. package/template/claude-task-manager/bin/storage-migration-supervisor.js +338 -0
  15. package/template/claude-task-manager/db.js +4417 -295
  16. package/template/claude-task-manager/docs/app-update-refresh-protocol.md +69 -0
  17. package/template/claude-task-manager/docs/approval-ai-refinement.md +138 -0
  18. package/template/claude-task-manager/docs/approval-rescue-loop.md +74 -0
  19. package/template/claude-task-manager/docs/codex-operational-warning-health.md +107 -0
  20. package/template/claude-task-manager/docs/codex-resume-state-guard-design.md +17 -12
  21. package/template/claude-task-manager/docs/codex-terminal-render-controller-handoff.md +311 -0
  22. package/template/claude-task-manager/docs/coding-agent-hooks-architecture.md +418 -0
  23. package/template/claude-task-manager/docs/conversation-import-freshness.md +20 -0
  24. package/template/claude-task-manager/docs/google-workspace-auth-health.md +77 -0
  25. package/template/claude-task-manager/docs/image-paste-ux.md +13 -0
  26. package/template/claude-task-manager/docs/ipad-web-preview.md +88 -0
  27. package/template/claude-task-manager/docs/main-loop-offload-architecture.md +66 -0
  28. package/template/claude-task-manager/docs/microsoft-dev-tunnel-phone-access-design.md +274 -519
  29. package/template/claude-task-manager/docs/mobile-live-streaming.md +27 -5
  30. package/template/claude-task-manager/docs/mobile-remote-submission-lifecycle.md +69 -0
  31. package/template/claude-task-manager/docs/phone-access-design.md +53 -15
  32. package/template/claude-task-manager/docs/phone-passkey-identity.md +122 -0
  33. package/template/claude-task-manager/docs/phone-setup.md +3 -0
  34. package/template/claude-task-manager/docs/prompt-editing-tree-design.md +25 -1
  35. package/template/claude-task-manager/docs/remote-desktop-access-design.md +268 -0
  36. package/template/claude-task-manager/docs/restart-lifecycle-architecture.md +95 -0
  37. package/template/claude-task-manager/docs/runtime-work-control-plane.md +53 -0
  38. package/template/claude-task-manager/docs/session-interactive-wait-surfaces.md +38 -0
  39. package/template/claude-task-manager/docs/session-needs-you-dismissal.md +84 -0
  40. package/template/claude-task-manager/docs/session-render-state-management-design.md +91 -3
  41. package/template/claude-task-manager/docs/session-standup-command-center-design.md +25 -1
  42. package/template/claude-task-manager/docs/session-title-authority.md +32 -0
  43. package/template/claude-task-manager/docs/session-workspace-binding.md +33 -0
  44. package/template/claude-task-manager/docs/skill-intent-resolution-design.md +72 -0
  45. package/template/claude-task-manager/docs/walle-mcp-supervisor-health.md +86 -0
  46. package/template/claude-task-manager/docs/walle-relay-phone-access-design.md +24 -15
  47. package/template/claude-task-manager/docs/walle-session-history-hydration.md +114 -0
  48. package/template/claude-task-manager/docs/walle-session-input-queue.md +104 -0
  49. package/template/claude-task-manager/docs/walle-session-model-catalog.md +90 -0
  50. package/template/claude-task-manager/docs/walle-session-model-preferences.md +15 -6
  51. package/template/claude-task-manager/git-utils.js +897 -27
  52. package/template/claude-task-manager/lib/agent-capabilities.js +33 -0
  53. package/template/claude-task-manager/lib/agent-cli-cache.js +37 -7
  54. package/template/claude-task-manager/lib/agent-hooks-installer.js +26 -2
  55. package/template/claude-task-manager/lib/agent-presets.js +17 -1
  56. package/template/claude-task-manager/lib/all-sessions-query.js +108 -0
  57. package/template/claude-task-manager/lib/approval-ai-refinement.js +488 -0
  58. package/template/claude-task-manager/lib/approval-self-adapt.js +168 -0
  59. package/template/claude-task-manager/lib/async-semaphore.js +44 -0
  60. package/template/claude-task-manager/lib/auth-context.js +5 -0
  61. package/template/claude-task-manager/lib/auth-rate-limit.js +47 -4
  62. package/template/claude-task-manager/lib/auth-rules.js +29 -2
  63. package/template/claude-task-manager/lib/auto-approval-verifier.js +129 -16
  64. package/template/claude-task-manager/lib/background-llm.js +144 -17
  65. package/template/claude-task-manager/lib/branch-inventory.js +212 -0
  66. package/template/claude-task-manager/lib/claude-desktop-sessions.js +15 -3
  67. package/template/claude-task-manager/lib/coalesce-sync-frames.js +151 -0
  68. package/template/claude-task-manager/lib/codex-launch-health.js +762 -0
  69. package/template/claude-task-manager/lib/codex-transcript-pager.js +51 -0
  70. package/template/claude-task-manager/lib/codex-zst.js +124 -0
  71. package/template/claude-task-manager/lib/coding-agent-models.js +233 -30
  72. package/template/claude-task-manager/lib/connection-health.js +232 -0
  73. package/template/claude-task-manager/lib/conversation-blob-parser.js +42 -0
  74. package/template/claude-task-manager/lib/conversation-tail-merge.js +89 -26
  75. package/template/claude-task-manager/lib/ctm-session-context-api.js +39 -10
  76. package/template/claude-task-manager/lib/cursor-conversation-store.js +354 -0
  77. package/template/claude-task-manager/lib/db-owner-worker-client.js +315 -0
  78. package/template/claude-task-manager/lib/document-review.js +141 -6
  79. package/template/claude-task-manager/lib/escalation-review.js +152 -0
  80. package/template/claude-task-manager/lib/graceful-shutdown.js +159 -0
  81. package/template/claude-task-manager/lib/headless-term-service.js +678 -0
  82. package/template/claude-task-manager/lib/heavy-worker-fallback.js +38 -0
  83. package/template/claude-task-manager/lib/jsonl-conversation-parser.js +542 -0
  84. package/template/claude-task-manager/lib/jsonl-range-reader.js +112 -0
  85. package/template/claude-task-manager/lib/main-db-census.js +216 -0
  86. package/template/claude-task-manager/lib/message-pagination.js +106 -4
  87. package/template/claude-task-manager/lib/microsoft-dev-tunnel-setup.js +750 -26
  88. package/template/claude-task-manager/lib/mobile-auth-api.js +274 -7
  89. package/template/claude-task-manager/lib/mobile-auth-store.js +592 -10
  90. package/template/claude-task-manager/lib/mobile-notification-dispatcher.js +15 -0
  91. package/template/claude-task-manager/lib/model-overview-brain-fallback.js +311 -0
  92. package/template/claude-task-manager/lib/model-overview-cache.js +141 -0
  93. package/template/claude-task-manager/lib/models-health-routing-notice.js +126 -0
  94. package/template/claude-task-manager/lib/node-pin-guard.js +93 -0
  95. package/template/claude-task-manager/lib/perf-tracker.js +242 -6
  96. package/template/claude-task-manager/lib/permission-match.js +76 -0
  97. package/template/claude-task-manager/lib/permission-sync.js +133 -20
  98. package/template/claude-task-manager/lib/process-title.js +35 -0
  99. package/template/claude-task-manager/lib/prompt-executions-query.js +25 -0
  100. package/template/claude-task-manager/lib/prompt-index-disk-cache.js +44 -0
  101. package/template/claude-task-manager/lib/prompt-intent.js +132 -0
  102. package/template/claude-task-manager/lib/provider-user-context.js +34 -0
  103. package/template/claude-task-manager/lib/read-pool-client.js +313 -0
  104. package/template/claude-task-manager/lib/readpool-breaker.js +31 -0
  105. package/template/claude-task-manager/lib/recent-sessions-breaker.js +12 -0
  106. package/template/claude-task-manager/lib/remote-feedback-client.js +72 -0
  107. package/template/claude-task-manager/lib/remote-relay-protocol.js +37 -4
  108. package/template/claude-task-manager/lib/remote-relay-store.js +159 -0
  109. package/template/claude-task-manager/lib/remote-submission-observer.js +278 -0
  110. package/template/claude-task-manager/lib/restart-guard.js +109 -0
  111. package/template/claude-task-manager/lib/restore-interruption-detector.js +439 -0
  112. package/template/claude-task-manager/lib/restore-policy.js +13 -0
  113. package/template/claude-task-manager/lib/restore-resume-batch.js +74 -0
  114. package/template/claude-task-manager/lib/restore-runtime.js +68 -0
  115. package/template/claude-task-manager/lib/restore-storm.js +34 -0
  116. package/template/claude-task-manager/lib/resume-cwd.js +36 -0
  117. package/template/claude-task-manager/lib/resume-preflight.js +313 -0
  118. package/template/claude-task-manager/lib/runtime-work-registry.js +444 -0
  119. package/template/claude-task-manager/lib/sanitize-openai-auth.js +31 -0
  120. package/template/claude-task-manager/lib/scheduler.js +21 -1
  121. package/template/claude-task-manager/lib/scrollback-snapshot-store.js +159 -0
  122. package/template/claude-task-manager/lib/serial-task-queue.js +64 -0
  123. package/template/claude-task-manager/lib/server-listeners.js +239 -0
  124. package/template/claude-task-manager/lib/session-capture.js +42 -7
  125. package/template/claude-task-manager/lib/session-content-backfill.js +131 -0
  126. package/template/claude-task-manager/lib/session-history.js +388 -43
  127. package/template/claude-task-manager/lib/session-host-manager.js +287 -0
  128. package/template/claude-task-manager/lib/session-image-refs.js +209 -0
  129. package/template/claude-task-manager/lib/session-jobs.js +399 -59
  130. package/template/claude-task-manager/lib/session-prompt-index.js +137 -0
  131. package/template/claude-task-manager/lib/session-restore.js +53 -0
  132. package/template/claude-task-manager/lib/session-standup.js +123 -23
  133. package/template/claude-task-manager/lib/session-state-bus.js +14 -0
  134. package/template/claude-task-manager/lib/session-stream.js +64 -16
  135. package/template/claude-task-manager/lib/session-timeline-summary.js +260 -0
  136. package/template/claude-task-manager/lib/session-token-usage.js +494 -0
  137. package/template/claude-task-manager/lib/session-workspace-binding.js +356 -0
  138. package/template/claude-task-manager/lib/setup-network-config.js +9 -0
  139. package/template/claude-task-manager/lib/size-cap.js +45 -0
  140. package/template/claude-task-manager/lib/size-cap.test.js +62 -0
  141. package/template/claude-task-manager/lib/skill-autocomplete.js +180 -1
  142. package/template/claude-task-manager/lib/skill-intent-resolver.js +304 -0
  143. package/template/claude-task-manager/lib/sqlite-driver.js +19 -3
  144. package/template/claude-task-manager/lib/standup-attention.js +7 -3
  145. package/template/claude-task-manager/lib/status-authority.js +39 -0
  146. package/template/claude-task-manager/lib/status-hooks.js +4 -0
  147. package/template/claude-task-manager/lib/storage-migration.js +235 -0
  148. package/template/claude-task-manager/lib/structured-capture.js +298 -0
  149. package/template/claude-task-manager/lib/sync-io-census.js +163 -0
  150. package/template/claude-task-manager/lib/tailscale-setup.js +6 -0
  151. package/template/claude-task-manager/lib/terminal-activity-evidence.js +33 -0
  152. package/template/claude-task-manager/lib/terminal-choice.js +364 -0
  153. package/template/claude-task-manager/lib/terminal-control-sanitize.js +17 -0
  154. package/template/claude-task-manager/lib/terminal-fingerprint.js +48 -0
  155. package/template/claude-task-manager/lib/terminal-output-flush.js +84 -0
  156. package/template/claude-task-manager/lib/timeline-order.js +122 -0
  157. package/template/claude-task-manager/lib/transcript-store.js +348 -43
  158. package/template/claude-task-manager/lib/transport-security.js +84 -1
  159. package/template/claude-task-manager/lib/wait-state.js +184 -0
  160. package/template/claude-task-manager/lib/walle-client.js +47 -5
  161. package/template/claude-task-manager/lib/walle-ctm-history.js +564 -4
  162. package/template/claude-task-manager/lib/walle-external-actions.js +135 -16
  163. package/template/claude-task-manager/lib/walle-history-hydration.js +46 -0
  164. package/template/claude-task-manager/lib/walle-native-health.js +403 -0
  165. package/template/claude-task-manager/lib/walle-repair.js +701 -0
  166. package/template/claude-task-manager/lib/walle-session-cache.js +109 -0
  167. package/template/claude-task-manager/lib/walle-session-context.js +57 -21
  168. package/template/claude-task-manager/lib/walle-session-model-catalog.js +34 -0
  169. package/template/claude-task-manager/lib/walle-supervisor.js +539 -63
  170. package/template/claude-task-manager/lib/walle-transcript.js +52 -0
  171. package/template/claude-task-manager/lib/worktree-active-sync.js +11 -7
  172. package/template/claude-task-manager/lib/worktree-cwd.js +32 -1
  173. package/template/claude-task-manager/package.json +1 -1
  174. package/template/claude-task-manager/prompt-harvest.js +89 -66
  175. package/template/claude-task-manager/providers/claude-code.js +51 -3
  176. package/template/claude-task-manager/providers/cursor.js +140 -45
  177. package/template/claude-task-manager/public/css/reviews.css +551 -61
  178. package/template/claude-task-manager/public/css/setup.css +191 -0
  179. package/template/claude-task-manager/public/css/walle-session.css +865 -10
  180. package/template/claude-task-manager/public/css/walle.css +154 -0
  181. package/template/claude-task-manager/public/designs/ai-providers-consolidation-v2.html +830 -0
  182. package/template/claude-task-manager/public/index.html +18516 -2058
  183. package/template/claude-task-manager/public/ipad.html +363 -0
  184. package/template/claude-task-manager/public/js/document-review-links.js +301 -0
  185. package/template/claude-task-manager/public/js/image-normalize.js +69 -36
  186. package/template/claude-task-manager/public/js/message-renderer.js +1265 -77
  187. package/template/claude-task-manager/public/js/prompts.js +66 -29
  188. package/template/claude-task-manager/public/js/reviews.js +901 -133
  189. package/template/claude-task-manager/public/js/session-activity-utils.js +11 -1
  190. package/template/claude-task-manager/public/js/session-search-utils.js +94 -10
  191. package/template/claude-task-manager/public/js/session-status-precedence.js +23 -5
  192. package/template/claude-task-manager/public/js/setup.js +1273 -176
  193. package/template/claude-task-manager/public/js/stream-view.js +691 -73
  194. package/template/claude-task-manager/public/js/terminal-reconciler.js +210 -0
  195. package/template/claude-task-manager/public/js/walle-session.js +2455 -158
  196. package/template/claude-task-manager/public/js/walle.js +455 -28
  197. package/template/claude-task-manager/public/m/app.css +2909 -262
  198. package/template/claude-task-manager/public/m/app.js +6601 -398
  199. package/template/claude-task-manager/public/m/claim.html +224 -17
  200. package/template/claude-task-manager/public/m/index.html +117 -21
  201. package/template/claude-task-manager/public/m/sw.js +3 -1
  202. package/template/claude-task-manager/public/manifest.json +2 -2
  203. package/template/claude-task-manager/public/prompts.html +30 -14
  204. package/template/claude-task-manager/queue-engine.js +507 -28
  205. package/template/claude-task-manager/scripts/repair-claude-session-images.js +27 -8
  206. package/template/claude-task-manager/server.js +14341 -2197
  207. package/template/claude-task-manager/session-integrity.js +160 -18
  208. package/template/claude-task-manager/session-search-ranking.js +1 -0
  209. package/template/claude-task-manager/session-utils.js +25 -5
  210. package/template/claude-task-manager/workers/approval-blocklist.js +96 -6
  211. package/template/claude-task-manager/workers/approval-widget-validator.js +14 -8
  212. package/template/claude-task-manager/workers/conversation-import-worker.js +11 -50
  213. package/template/claude-task-manager/workers/db-owner-worker.js +386 -0
  214. package/template/claude-task-manager/workers/harvest-worker.js +9 -55
  215. package/template/claude-task-manager/workers/headless-term-worker.js +9 -530
  216. package/template/claude-task-manager/workers/read-pool-worker.js +387 -0
  217. package/template/claude-task-manager/workers/scrollback-worker.js +11 -72
  218. package/template/claude-task-manager/workers/session-host-process.js +146 -0
  219. package/template/claude-task-manager/workers/session-integrity-worker.js +10 -54
  220. package/template/claude-task-manager/workers/state-detectors/base.js +18 -1
  221. package/template/claude-task-manager/workers/state-detectors/claude-code.js +182 -9
  222. package/template/claude-task-manager/workers/state-detectors/codex.js +150 -2
  223. package/template/claude-task-manager/workers/state-detectors/cursor.js +127 -0
  224. package/template/claude-task-manager/workers/state-detectors/gemini.js +21 -0
  225. package/template/claude-task-manager/workers/state-detectors/index.js +29 -0
  226. package/template/claude-task-manager/workers/state-detectors/opencode.js +103 -0
  227. package/template/docs/design/markdown-review-pane.md +206 -0
  228. package/template/docs/designs/2026-05-17-portkey-gateway-provider-ux.md +129 -38
  229. package/template/docs/designs/2026-05-20-mobile-worktree-finish-command.md +27 -0
  230. package/template/docs/designs/2026-05-22-ai-configuration-consolidation.md +248 -0
  231. package/template/docs/designs/ai-configuration-consolidation-mock.html +812 -0
  232. package/template/docs/private-memory-and-pii-policy.md +69 -0
  233. package/template/package.json +2 -1
  234. package/template/scripts/check-private-data.js +201 -0
  235. package/template/shared/sqlite-owner-guard.js +30 -0
  236. package/template/shared/sqlite-owner-write-queue.js +225 -0
  237. package/template/shared/sqlite-storage-policy.js +111 -0
  238. package/template/shared/sqlite-write-lock.js +428 -0
  239. package/template/wall-e/agent-runners/claude-code.js +5 -0
  240. package/template/wall-e/agent.js +166 -22
  241. package/template/wall-e/api-walle.js +524 -70
  242. package/template/wall-e/auth/provider-flows.js +11 -1
  243. package/template/wall-e/bin/walle-mcp-stdio.js +341 -17
  244. package/template/wall-e/brain.js +1614 -141
  245. package/template/wall-e/chat/attachment-blocks.js +96 -0
  246. package/template/wall-e/chat/attachments.js +2 -1
  247. package/template/wall-e/chat/capability-resolver.js +7 -7
  248. package/template/wall-e/chat/context-messages.js +28 -0
  249. package/template/wall-e/chat/conversation-frame.js +630 -0
  250. package/template/wall-e/chat/provider-messages.js +125 -0
  251. package/template/wall-e/chat.js +1002 -233
  252. package/template/wall-e/coding/acceptance-contract.js +170 -0
  253. package/template/wall-e/coding/acp-adapter.js +1 -1
  254. package/template/wall-e/coding/agent-catalog.js +3 -0
  255. package/template/wall-e/coding/artifact-store.js +93 -0
  256. package/template/wall-e/coding/capability-router.js +120 -0
  257. package/template/wall-e/coding/coding-run-controller.js +423 -0
  258. package/template/wall-e/coding/compaction-service.js +157 -12
  259. package/template/wall-e/coding/frontend-verification.js +258 -0
  260. package/template/wall-e/coding/lifecycle-hooks.js +75 -0
  261. package/template/wall-e/coding/local-preview-contract.js +157 -0
  262. package/template/wall-e/coding/permission-service.js +57 -13
  263. package/template/wall-e/coding/prompt-bundle.js +19 -1
  264. package/template/wall-e/coding/prompt-section-registry.js +227 -0
  265. package/template/wall-e/coding/provider-compat.js +15 -0
  266. package/template/wall-e/coding/runtime-events.js +224 -0
  267. package/template/wall-e/coding/runtime-mode.js +3 -0
  268. package/template/wall-e/coding/side-git-snapshot.js +160 -4
  269. package/template/wall-e/coding/snapshot-service.js +143 -1
  270. package/template/wall-e/coding/stream-processor.js +388 -34
  271. package/template/wall-e/coding/task-tool.js +141 -4
  272. package/template/wall-e/coding/tool-execution-controller.js +365 -0
  273. package/template/wall-e/coding/tool-registry.js +43 -5
  274. package/template/wall-e/coding/user-hooks.js +217 -0
  275. package/template/wall-e/coding-orchestrator.js +1330 -221
  276. package/template/wall-e/coding-prompts.js +20 -4
  277. package/template/wall-e/context/context-builder.js +15 -2
  278. package/template/wall-e/decision/confidence.js +1 -1
  279. package/template/wall-e/docs/coding-acceptance-contract.md +41 -0
  280. package/template/wall-e/docs/external-action-controller.md +26 -6
  281. package/template/wall-e/docs/telemetry-lifecycle.md +8 -2
  282. package/template/wall-e/embeddings.js +591 -53
  283. package/template/wall-e/external-action-controller.js +12 -0
  284. package/template/wall-e/http/auth.js +1 -0
  285. package/template/wall-e/http/chat-api.js +46 -11
  286. package/template/wall-e/http/model-admin.js +836 -34
  287. package/template/wall-e/lib/boot-profile.js +88 -0
  288. package/template/wall-e/lib/event-loop-monitor.js +93 -0
  289. package/template/wall-e/lib/service-health.js +194 -0
  290. package/template/wall-e/llm/anthropic.js +130 -5
  291. package/template/wall-e/llm/client.js +266 -63
  292. package/template/wall-e/llm/default-fallback.js +382 -0
  293. package/template/wall-e/llm/health.js +19 -0
  294. package/template/wall-e/llm/message-guard.js +78 -0
  295. package/template/wall-e/llm/model-catalog.js +252 -1
  296. package/template/wall-e/llm/openai.js +26 -4
  297. package/template/wall-e/llm/portkey-sync.js +654 -0
  298. package/template/wall-e/llm/provider-error.js +30 -2
  299. package/template/wall-e/llm/registry.js +5 -1
  300. package/template/wall-e/llm/request-compat.js +67 -0
  301. package/template/wall-e/loops/backfill.js +79 -23
  302. package/template/wall-e/loops/brain-optimize.js +67 -0
  303. package/template/wall-e/loops/ingest.js +25 -10
  304. package/template/wall-e/loops/question-digest.js +160 -0
  305. package/template/wall-e/loops/reflect.js +6 -4
  306. package/template/wall-e/loops/think.js +39 -12
  307. package/template/wall-e/mcp-server.js +318 -36
  308. package/template/wall-e/memory/ctm-context-client.js +52 -14
  309. package/template/wall-e/memory/ctm-operational-context.js +237 -0
  310. package/template/wall-e/memory/ctm-prompt-executions-client.js +128 -0
  311. package/template/wall-e/memory/ctm-session-context.js +111 -63
  312. package/template/wall-e/prompts/coding/deepseek.txt +3 -0
  313. package/template/wall-e/prompts/coding/gemini.txt +6 -0
  314. package/template/wall-e/prompts/coding/gpt.txt +6 -0
  315. package/template/wall-e/prompts/coding/local.txt +7 -0
  316. package/template/wall-e/runtime/decision-hooks.js +115 -0
  317. package/template/wall-e/runtime/devbox-gateway.js +82 -8
  318. package/template/wall-e/runtime/prompt-manifest.js +86 -0
  319. package/template/wall-e/runtime/tool-executor.js +269 -0
  320. package/template/wall-e/runtime/tool-result-envelope.js +138 -0
  321. package/template/wall-e/runtime/transcript-projection.js +60 -0
  322. package/template/wall-e/runtime/walle-runtime.js +224 -0
  323. package/template/wall-e/scripts/db-optimize/migrate.js +162 -0
  324. package/template/wall-e/scripts/db-optimize/recall-eval.js +117 -0
  325. package/template/wall-e/server.js +15 -0
  326. package/template/wall-e/session-files.js +9 -0
  327. package/template/wall-e/skills/_bundled/google-calendar/run.js +1 -1
  328. package/template/wall-e/skills/_bundled/gws-workspace/run.js +1 -1
  329. package/template/wall-e/skills/_bundled/slack-mentions/run.js +76 -6
  330. package/template/wall-e/skills/claude-code-reader.js +7 -3
  331. package/template/wall-e/skills/script-skill-runner.js +10 -0
  332. package/template/wall-e/skills/skill-planner.js +38 -0
  333. package/template/wall-e/tools/builtin-middleware.js +19 -9
  334. package/template/wall-e/tools/local-tools.js +1428 -16
  335. package/template/wall-e/tools/permission-checker.js +73 -5
  336. package/template/wall-e/tools/question-manager.js +117 -7
  337. package/template/wall-e/training/harvester.js +12 -28
  338. package/template/wall-e/training/replay.js +25 -80
  339. package/template/website/index.html +10 -10
  340. package/template/wall-e/eval/ab-test.js +0 -203
  341. package/template/wall-e/eval/agent-runner.js +0 -772
  342. package/template/wall-e/eval/agent-scorer.js +0 -461
  343. package/template/wall-e/eval/aggregator.js +0 -414
  344. package/template/wall-e/eval/allowed-test-commands.js +0 -34
  345. package/template/wall-e/eval/benchmark-generator.js +0 -113
  346. package/template/wall-e/eval/benchmarks/chat-eval.json +0 -1662
  347. package/template/wall-e/eval/benchmarks/chat.json +0 -82
  348. package/template/wall-e/eval/benchmarks/coding-agent-real.json +0 -1
  349. package/template/wall-e/eval/benchmarks/coding-agent.json +0 -1581
  350. package/template/wall-e/eval/benchmarks/coding.json +0 -122
  351. package/template/wall-e/eval/benchmarks/memory-retrieval.json +0 -234
  352. package/template/wall-e/eval/benchmarks/reasoning.json +0 -82
  353. package/template/wall-e/eval/benchmarks/swebench-lite-30.json +0 -212
  354. package/template/wall-e/eval/benchmarks.js +0 -669
  355. package/template/wall-e/eval/cc-replay.js +0 -719
  356. package/template/wall-e/eval/chat-eval.js +0 -525
  357. package/template/wall-e/eval/check-keys.js +0 -15
  358. package/template/wall-e/eval/check-providers.js +0 -42
  359. package/template/wall-e/eval/codex-cli-baseline.js +0 -669
  360. package/template/wall-e/eval/coding-agent-real.js +0 -570
  361. package/template/wall-e/eval/context-compactor.js +0 -251
  362. package/template/wall-e/eval/debug-agent003.js +0 -68
  363. package/template/wall-e/eval/diagnostics.js +0 -216
  364. package/template/wall-e/eval/eval-orchestrator.js +0 -642
  365. package/template/wall-e/eval/evaluate.js +0 -202
  366. package/template/wall-e/eval/evaluator.js +0 -373
  367. package/template/wall-e/eval/exporter.js +0 -212
  368. package/template/wall-e/eval/fixtures/express-basic/package.json +0 -9
  369. package/template/wall-e/eval/fixtures/express-basic/server.js +0 -115
  370. package/template/wall-e/eval/fixtures/express-basic/test.js +0 -83
  371. package/template/wall-e/eval/fixtures/express-buggy/package.json +0 -9
  372. package/template/wall-e/eval/fixtures/express-buggy/server.js +0 -113
  373. package/template/wall-e/eval/fixtures/express-buggy/test.js +0 -83
  374. package/template/wall-e/eval/fixtures/express-buggy-items/package.json +0 -9
  375. package/template/wall-e/eval/fixtures/express-buggy-items/server.js +0 -112
  376. package/template/wall-e/eval/fixtures/express-buggy-items/test.js +0 -83
  377. package/template/wall-e/eval/fixtures/express-buggy-search/package.json +0 -9
  378. package/template/wall-e/eval/fixtures/express-buggy-search/server.js +0 -121
  379. package/template/wall-e/eval/fixtures/express-buggy-search/test.js +0 -83
  380. package/template/wall-e/eval/fixtures/express-rename-data/data.js +0 -34
  381. package/template/wall-e/eval/fixtures/express-rename-data/package.json +0 -9
  382. package/template/wall-e/eval/fixtures/express-rename-data/server.js +0 -97
  383. package/template/wall-e/eval/fixtures/express-rename-data/test.js +0 -88
  384. package/template/wall-e/eval/fixtures/express-xss/package.json +0 -12
  385. package/template/wall-e/eval/fixtures/express-xss/server.js +0 -90
  386. package/template/wall-e/eval/fixtures/express-xss/test.js +0 -67
  387. package/template/wall-e/eval/fixtures/express-xss/views/profile.ejs +0 -9
  388. package/template/wall-e/eval/fixtures/fullstack-app/config/default.js +0 -9
  389. package/template/wall-e/eval/fixtures/fullstack-app/config/test.js +0 -13
  390. package/template/wall-e/eval/fixtures/fullstack-app/package.json +0 -11
  391. package/template/wall-e/eval/fixtures/fullstack-app/public/css/style.css +0 -137
  392. package/template/wall-e/eval/fixtures/fullstack-app/public/index.html +0 -46
  393. package/template/wall-e/eval/fixtures/fullstack-app/public/js/app.js +0 -121
  394. package/template/wall-e/eval/fixtures/fullstack-app/public/js/auth.js +0 -71
  395. package/template/wall-e/eval/fixtures/fullstack-app/public/js/items.js +0 -80
  396. package/template/wall-e/eval/fixtures/fullstack-app/public/js/users.js +0 -46
  397. package/template/wall-e/eval/fixtures/fullstack-app/public/login.html +0 -45
  398. package/template/wall-e/eval/fixtures/fullstack-app/public/register.html +0 -38
  399. package/template/wall-e/eval/fixtures/fullstack-app/scripts/migrate.js +0 -23
  400. package/template/wall-e/eval/fixtures/fullstack-app/scripts/seed.js +0 -46
  401. package/template/wall-e/eval/fixtures/fullstack-app/server/db.js +0 -99
  402. package/template/wall-e/eval/fixtures/fullstack-app/server/index.js +0 -94
  403. package/template/wall-e/eval/fixtures/fullstack-app/server/middleware/auth.js +0 -19
  404. package/template/wall-e/eval/fixtures/fullstack-app/server/middleware/logger.js +0 -19
  405. package/template/wall-e/eval/fixtures/fullstack-app/server/router.js +0 -50
  406. package/template/wall-e/eval/fixtures/fullstack-app/server/routes/auth.js +0 -69
  407. package/template/wall-e/eval/fixtures/fullstack-app/server/routes/health.js +0 -23
  408. package/template/wall-e/eval/fixtures/fullstack-app/server/routes/items.js +0 -88
  409. package/template/wall-e/eval/fixtures/fullstack-app/server/routes/users.js +0 -75
  410. package/template/wall-e/eval/fixtures/fullstack-app/server/test.js +0 -198
  411. package/template/wall-e/eval/fixtures/fullstack-app/server/utils/response.js +0 -34
  412. package/template/wall-e/eval/fixtures/fullstack-app/server/utils/validate.js +0 -26
  413. package/template/wall-e/eval/fixtures/fullstack-app/server.js +0 -8
  414. package/template/wall-e/eval/fixtures/fullstack-app/test.js +0 -12
  415. package/template/wall-e/eval/fixtures/monorepo-basic/package.json +0 -8
  416. package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/data.js +0 -58
  417. package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/middleware.js +0 -46
  418. package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/package.json +0 -8
  419. package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/routes.js +0 -64
  420. package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/server.js +0 -56
  421. package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/test.js +0 -116
  422. package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/commands.js +0 -61
  423. package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/index.js +0 -62
  424. package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/output.js +0 -43
  425. package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/package.json +0 -11
  426. package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/test.js +0 -44
  427. package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/formatters.js +0 -43
  428. package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/index.js +0 -12
  429. package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/package.json +0 -5
  430. package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/test.js +0 -55
  431. package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/validators.js +0 -29
  432. package/template/wall-e/eval/fixtures/monorepo-basic/test.js +0 -46
  433. package/template/wall-e/eval/fixtures/node-cli/index.js +0 -78
  434. package/template/wall-e/eval/fixtures/node-cli/package.json +0 -10
  435. package/template/wall-e/eval/fixtures/node-cli/test.js +0 -57
  436. package/template/wall-e/eval/fixtures/node-typed/package.json +0 -8
  437. package/template/wall-e/eval/fixtures/node-typed/src/handlers.js +0 -31
  438. package/template/wall-e/eval/fixtures/node-typed/src/utils.js +0 -33
  439. package/template/wall-e/eval/fixtures/node-typed/test.js +0 -36
  440. package/template/wall-e/eval/fixtures/python-flask/app.py +0 -14
  441. package/template/wall-e/eval/fixtures/python-flask/requirements.txt +0 -2
  442. package/template/wall-e/eval/fixtures/python-flask/test_app.py +0 -25
  443. package/template/wall-e/eval/fixtures/wall-e-subset/brain.js +0 -105
  444. package/template/wall-e/eval/fixtures/wall-e-subset/eval/aggregator.js +0 -101
  445. package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks/chat.json +0 -20
  446. package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks/coding.json +0 -32
  447. package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks.js +0 -64
  448. package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/package.json +0 -6
  449. package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/server.js +0 -31
  450. package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/test.js +0 -18
  451. package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/utils.js +0 -34
  452. package/template/wall-e/eval/fixtures/wall-e-subset/eval/runner.js +0 -104
  453. package/template/wall-e/eval/fixtures/wall-e-subset/eval/scorer.js +0 -73
  454. package/template/wall-e/eval/fixtures/wall-e-subset/eval/test.js +0 -134
  455. package/template/wall-e/eval/fixtures/wall-e-subset/llm/client.js +0 -99
  456. package/template/wall-e/eval/fixtures/wall-e-subset/llm/providers.js +0 -63
  457. package/template/wall-e/eval/fixtures/wall-e-subset/llm/test.js +0 -70
  458. package/template/wall-e/eval/fixtures/wall-e-subset/package.json +0 -10
  459. package/template/wall-e/eval/fixtures/wall-e-subset/test.js +0 -86
  460. package/template/wall-e/eval/harvester.js +0 -685
  461. package/template/wall-e/eval/head-to-head.js +0 -388
  462. package/template/wall-e/eval/humaneval-adapter.js +0 -321
  463. package/template/wall-e/eval/list-models.js +0 -31
  464. package/template/wall-e/eval/livecodebench-adapter.js +0 -291
  465. package/template/wall-e/eval/mail-integration.js +0 -443
  466. package/template/wall-e/eval/manifest.js +0 -186
  467. package/template/wall-e/eval/meta-harness/adapters/coding-agent.js +0 -57
  468. package/template/wall-e/eval/meta-harness/bootstrap-snapshot.js +0 -149
  469. package/template/wall-e/eval/meta-harness/candidate-store.js +0 -117
  470. package/template/wall-e/eval/meta-harness/cli.js +0 -86
  471. package/template/wall-e/eval/meta-harness/domain-spec.js +0 -154
  472. package/template/wall-e/eval/meta-harness/domains/coding-agent.domain.json +0 -84
  473. package/template/wall-e/eval/meta-harness/examples/env-bootstrap-candidate.js +0 -29
  474. package/template/wall-e/eval/meta-harness/experience-store.js +0 -174
  475. package/template/wall-e/eval/meta-harness/frontier.js +0 -96
  476. package/template/wall-e/eval/meta-harness/harness-interface.js +0 -90
  477. package/template/wall-e/eval/meta-harness/leakage-guard.js +0 -80
  478. package/template/wall-e/eval/meta-harness/optimizer.js +0 -207
  479. package/template/wall-e/eval/meta-harness/proposer-runner.js +0 -110
  480. package/template/wall-e/eval/meta-harness/reporting.js +0 -58
  481. package/template/wall-e/eval/meta-harness/telemetry.js +0 -27
  482. package/template/wall-e/eval/meta-harness/validation.js +0 -81
  483. package/template/wall-e/eval/promoter.js +0 -228
  484. package/template/wall-e/eval/provider-normalizer.js +0 -33
  485. package/template/wall-e/eval/replay.js +0 -395
  486. package/template/wall-e/eval/run-agent-benchmarks.js +0 -386
  487. package/template/wall-e/eval/run-codex-cli-baseline.js +0 -177
  488. package/template/wall-e/eval/run-coding-agent-real.js +0 -187
  489. package/template/wall-e/eval/run-eval.js +0 -435
  490. package/template/wall-e/eval/run-model-comparison.js +0 -142
  491. package/template/wall-e/eval/session-evaluator.js +0 -187
  492. package/template/wall-e/eval/session-miner.js +0 -207
  493. package/template/wall-e/eval/session-retrieval-benchmark.js +0 -150
  494. package/template/wall-e/eval/session-transcripts.js +0 -509
  495. package/template/wall-e/eval/shadow.js +0 -161
  496. package/template/wall-e/eval/swebench-adapter.js +0 -345
  497. package/template/wall-e/eval/swebench-docker.js +0 -192
  498. package/template/wall-e/eval/train.py +0 -320
  499. package/template/wall-e/eval/trainer.js +0 -232
  500. package/template/wall-e/eval/weekly-eval-loop.js +0 -241
@@ -0,0 +1,423 @@
1
+ 'use strict';
2
+
3
+ const crypto = require('crypto');
4
+
5
+ function normalizeText(value) {
6
+ return String(value || '').replace(/\s+/g, ' ').trim();
7
+ }
8
+
9
+ function includesAny(text, patterns) {
10
+ return patterns.some((pattern) => pattern.test(text));
11
+ }
12
+
13
+ const CHANGE_PATTERNS = [
14
+ /\bfix(?:es|ed|ing)?\b/i,
15
+ /\bimplement(?:s|ed|ing)?\b/i,
16
+ /\bchange(?:s|d|ing)?\b/i,
17
+ /\bupdate(?:s|d|ing)?\b/i,
18
+ /\bedit(?:s|ed|ing)?\b/i,
19
+ /\bmodify(?:ies|ied|ing)?\b/i,
20
+ /\bimprove(?:s|d|ing)?\b/i,
21
+ /\bpolish(?:es|ed|ing)?\b/i,
22
+ /\benhance(?:s|d|ing)?\b/i,
23
+ /\bupgrade(?:s|d|ing)?\b/i,
24
+ /\brefactor(?:s|ed|ing)?\b/i,
25
+ /\bwire(?:s|d|ing)?\b/i,
26
+ /\badd(?:s|ed|ing)?\b/i,
27
+ /\bremove(?:s|d|ing)?\b/i,
28
+ /\bdelete(?:s|d|ing)?\b/i,
29
+ /\bbuild(?:s|ing)?\b/i,
30
+ /\bcreate(?:s|d|ing)?\b/i,
31
+ /\bgenerate(?:s|d|ing)?\b/i,
32
+ /\bwrite(?:s|ing|ten)?\b/i,
33
+ /\bdocument(?:s|ed|ing)?\b/i,
34
+ /\btest(?:s|ed|ing)?\b/i,
35
+ /\bcommit(?:s|ted|ting)?\b/i,
36
+ /\bmerge(?:s|d|ing)?\b/i,
37
+ ];
38
+
39
+ const READ_ONLY_PATTERNS = [
40
+ /\b(?:don'?t|do not) (?:do|make|change|implement|edit|fix|touch)\b/i,
41
+ /\bno (?:code )?(?:change|changes|edits|implementation)\b/i,
42
+ /\bjust (?:answer|respond|reply|explain|discuss)\b/i,
43
+ /\bdiscuss (?:first|before)\b/i,
44
+ /\blet'?s discuss\b/i,
45
+ /\bproposal\b/i,
46
+ /\bdesign\b/i,
47
+ /\bplan\b/i,
48
+ /\bwhy\b/i,
49
+ /\bexplain\b/i,
50
+ /\banaly[sz]e\b/i,
51
+ /\binvestigate\b/i,
52
+ /\bstudy\b/i,
53
+ /\breview\b/i,
54
+ ];
55
+
56
+ const CONVERSATIONAL_UPDATE_PATTERNS = [
57
+ /\b(?:update|refine|revise|correct|expand|clarify|adjust|improve)\s+(?:your|the|that|this|my|above|previous|prior|last)?\s*(?:answer|summary|analysis|response|reply|conclusion|search|result|results|finding|findings|write[- ]?up)\b/i,
58
+ /\b(?:refine|broaden|narrow|redo|rerun)\s+(?:your|the|that|this|my|above|previous|prior|last)?\s*search\b/i,
59
+ /\b(?:update|refine|revise|correct|expand|clarify|adjust|improve)\s+(?:it|that|this)\s+(?:above|based on|with|using)\b/i,
60
+ ];
61
+
62
+ const WORKSPACE_MUTATION_TARGET_PATTERNS = [
63
+ /\b(?:file|files|code|repo|repository|worktree|project|app|source|implementation|docs?|documentation|readme|markdown|test|tests|spec|specs|db|database|schema|server|client|frontend|backend|component|module|package|script|config)\b/i,
64
+ /\b(?:in|to|under)\s+[`'"]?[^`'"\s]+\.(?:js|mjs|cjs|ts|tsx|jsx|md|mdx|json|py|go|rs|swift|html|css|scss|sql|yml|yaml|toml|sh|rb|java|kt|xml)\b/i,
65
+ /(?:^|[\s"'`])(?:\.{0,2}\/|~\/)[^\s"'`]+/,
66
+ /\b[\w.-]+\.(?:js|mjs|cjs|ts|tsx|jsx|md|mdx|json|py|go|rs|swift|html|css|scss|sql|yml|yaml|toml|sh|rb|java|kt|xml)\b/i,
67
+ ];
68
+
69
+ function classifyCodingTurnIntent(prompt) {
70
+ const text = normalizeText(prompt);
71
+ const asksChange = includesAny(text, CHANGE_PATTERNS);
72
+ const asksReadOnly = includesAny(text, READ_ONLY_PATTERNS);
73
+ const asksConversationalUpdate = includesAny(text, CONVERSATIONAL_UPDATE_PATTERNS);
74
+ const hasWorkspaceMutationTarget = includesAny(text, WORKSPACE_MUTATION_TARGET_PATTERNS);
75
+ const explicitNoChange = /\b(?:don'?t|do not) (?:do|make|change|implement|edit|fix|touch)\b/i.test(text)
76
+ || /\bno (?:code )?(?:change|changes|edits|implementation)\b/i.test(text);
77
+ const conversationalUpdateOnly = asksConversationalUpdate && !hasWorkspaceMutationTarget;
78
+ const effectiveAsksChange = asksChange && !conversationalUpdateOnly;
79
+ const readOnly = explicitNoChange || conversationalUpdateOnly || (asksReadOnly && !effectiveAsksChange);
80
+ return {
81
+ kind: effectiveAsksChange && !explicitNoChange ? 'coding_change' : (readOnly ? 'read_only' : 'coding_unknown'),
82
+ expectsChange: effectiveAsksChange && !explicitNoChange,
83
+ readOnly,
84
+ explicitNoChange,
85
+ reason: effectiveAsksChange && !explicitNoChange
86
+ ? 'change_language'
87
+ : (conversationalUpdateOnly
88
+ ? 'conversational_update_language'
89
+ : (readOnly ? 'read_only_language' : 'ambiguous_coding_turn')),
90
+ };
91
+ }
92
+
93
+ function collectToolCalls(result = {}) {
94
+ const calls = [];
95
+ for (const turn of result.log || []) {
96
+ for (const call of turn.toolCalls || []) calls.push(call);
97
+ }
98
+ return calls;
99
+ }
100
+
101
+ function collectToolResults(result = {}) {
102
+ const results = [];
103
+ for (const turn of result.log || []) {
104
+ for (const item of turn.toolResults || []) results.push(item);
105
+ }
106
+ return results;
107
+ }
108
+
109
+ function toolName(value) {
110
+ return String(value?.name || value?.tool || value?.toolName || value?.tool_name || '').toLowerCase();
111
+ }
112
+
113
+ function commandText(call = {}) {
114
+ const input = call.input || call.args || {};
115
+ return String(input.command || input.cmd || input.script || '');
116
+ }
117
+
118
+ function hasEditTool(result = {}) {
119
+ return collectToolCalls(result).some((call) => /^(edit_file|write_file|apply_patch|multi_edit)$/.test(toolName(call)));
120
+ }
121
+
122
+ function hasVerificationTool(result = {}) {
123
+ const calls = collectToolCalls(result);
124
+ const results = collectToolResults(result);
125
+ return calls.some((call, index) => {
126
+ const name = toolName(call);
127
+ const cmd = commandText(call);
128
+ const looksLikeVerification = /^(run_shell|shell|exec|bash|terminal)$/.test(name)
129
+ && /\b(test|lint|check|typecheck|tsc|pytest|vitest|jest|playwright|node --test|npm test|npm run test)\b/i.test(cmd);
130
+ if (!looksLikeVerification) return false;
131
+ const paired = results[index] || {};
132
+ return paired.ok === true || paired.result?.ok === true || paired.result?.exitCode === 0 || paired.result?.exit_code === 0;
133
+ });
134
+ }
135
+
136
+ function normalizeChangedFiles(files) {
137
+ if (!Array.isArray(files)) return [];
138
+ return [...new Set(files.map((file) => normalizeText(file)).filter(Boolean))].slice(0, 200);
139
+ }
140
+
141
+ function makeBlockedReply({ output, reason }) {
142
+ const tail = normalizeText(output).slice(0, 1800);
143
+ return [
144
+ 'I did not make code changes, so I am not marking this coding task complete.',
145
+ '',
146
+ `Blocked reason: ${reason}.`,
147
+ tail ? `\nLast agent output:\n\n${tail}` : '',
148
+ ].join('\n').trim();
149
+ }
150
+
151
+ function hasDeliveredFinalAnswer(result = {}) {
152
+ if (result.finalAnswerDelivered === true) return true;
153
+ if (result.finalAnswerDelivered === false) return false;
154
+ if (normalizeText(result.output || result.reply || '').length > 0) return true;
155
+ const finalTurn = [...(result.log || [])].reverse().find((turn) => Array.isArray(turn.toolCalls) && turn.toolCalls.length === 0);
156
+ return normalizeText(finalTurn?.content || '').length > 0;
157
+ }
158
+
159
+ function makeIncompleteReadOnlyReply({ output, stderr } = {}) {
160
+ const detail = normalizeText(stderr || output || '').slice(0, 1000);
161
+ return [
162
+ 'The agent did not produce a final answer for this follow-up.',
163
+ detail ? `\nRuntime detail: ${detail}` : '',
164
+ ].join('\n').trim();
165
+ }
166
+
167
+ function deriveCodingOutcome(result = {}, { prompt = '', interactive = false } = {}) {
168
+ const intent = classifyCodingTurnIntent(prompt);
169
+ const changedFiles = normalizeChangedFiles(result.changedFiles);
170
+ const edited = changedFiles.length > 0 || hasEditTool(result);
171
+ const verified = hasVerificationTool(result);
172
+ const baseSuccess = result.success !== false && Number(result.exitCode || 0) === 0;
173
+ if (!baseSuccess) {
174
+ return {
175
+ status: 'failed',
176
+ success: false,
177
+ reason: 'agent_runtime_failed',
178
+ changedFiles,
179
+ verified,
180
+ intent,
181
+ };
182
+ }
183
+ // Interactive (opencode parity): never block/fail on "no file changes". The turn is
184
+ // successful as long as the agent delivered a final answer. Reuses existing success
185
+ // statuses (`read_only_answer`, `completed_with_changes`) so no new outcome.status enum
186
+ // is introduced — interactive simply never emits `blocked`.
187
+ if (interactive) {
188
+ if (!hasDeliveredFinalAnswer(result)) {
189
+ return {
190
+ status: 'failed',
191
+ success: false,
192
+ reason: 'agent_finished_without_answer',
193
+ changedFiles,
194
+ verified,
195
+ intent,
196
+ };
197
+ }
198
+ if (changedFiles.length > 0) {
199
+ return {
200
+ status: 'completed_with_changes',
201
+ success: true,
202
+ reason: verified ? 'changed_and_verified' : 'changed_unverified',
203
+ changedFiles,
204
+ verified,
205
+ intent,
206
+ };
207
+ }
208
+ return {
209
+ status: 'read_only_answer',
210
+ success: true,
211
+ reason: 'answer_delivered',
212
+ changedFiles,
213
+ verified,
214
+ intent,
215
+ };
216
+ }
217
+ if (intent.expectsChange && changedFiles.length === 0) {
218
+ return {
219
+ status: 'blocked',
220
+ success: false,
221
+ reason: edited ? 'edit_tool_produced_no_diff' : 'coding_task_finished_without_changes',
222
+ changedFiles,
223
+ verified,
224
+ intent,
225
+ };
226
+ }
227
+ if (intent.readOnly && changedFiles.length === 0) {
228
+ if (!hasDeliveredFinalAnswer(result)) {
229
+ return {
230
+ status: 'failed',
231
+ success: false,
232
+ reason: 'agent_finished_without_answer',
233
+ changedFiles,
234
+ verified,
235
+ intent,
236
+ };
237
+ }
238
+ return {
239
+ status: 'read_only_answer',
240
+ success: true,
241
+ reason: intent.reason,
242
+ changedFiles,
243
+ verified,
244
+ intent,
245
+ };
246
+ }
247
+ if (changedFiles.length > 0) {
248
+ return {
249
+ status: 'completed_with_changes',
250
+ success: true,
251
+ reason: verified ? 'changed_and_verified' : 'changed_unverified',
252
+ changedFiles,
253
+ verified,
254
+ intent,
255
+ };
256
+ }
257
+ return {
258
+ status: 'completed_no_changes',
259
+ success: true,
260
+ reason: 'no_change_required_or_detected',
261
+ changedFiles,
262
+ verified,
263
+ intent,
264
+ };
265
+ }
266
+
267
+ function resolveCodingProvider({ model, provider, providerFactory } = {}) {
268
+ if (typeof providerFactory === 'function') {
269
+ return {
270
+ providerClient: providerFactory(provider || null, {}),
271
+ model: model || null,
272
+ route: null,
273
+ };
274
+ }
275
+ if (!model && !provider && typeof providerFactory !== 'function') return { providerClient: null, model: model || null, route: null };
276
+ let route = null;
277
+ let resolvedProvider = provider || null;
278
+ let resolvedModel = model || null;
279
+ let providerConfig = {};
280
+ if (model) {
281
+ const { resolveModelSelection } = require('../chat');
282
+ route = resolveModelSelection(model, provider || undefined);
283
+ if (route?.ambiguous) {
284
+ const choices = (route.candidates || []).map((candidate) => `${candidate.registryId} (${candidate.label})`).join(', ');
285
+ throw new Error(`Ambiguous model route for "${route.input}". Choose a specific model route: ${choices}`);
286
+ }
287
+ resolvedProvider = route.provider || resolvedProvider;
288
+ resolvedModel = route.model || resolvedModel;
289
+ providerConfig = route.providerConfig || {};
290
+ }
291
+ if (!resolvedProvider && !providerFactory) return { providerClient: null, model: resolvedModel, route };
292
+ const { createClient, getProviderRuntimeConfig } = require('../llm/client');
293
+ const providerClient = createClient(resolvedProvider, getProviderRuntimeConfig(resolvedProvider, providerConfig));
294
+ return { providerClient, model: resolvedModel, route };
295
+ }
296
+
297
+ class CodingRunController {
298
+ constructor({ runAgentLoop = null, now = () => Date.now(), idFactory = () => crypto.randomUUID() } = {}) {
299
+ this.runAgentLoop = runAgentLoop || require('../coding-orchestrator').runAgentLoop;
300
+ this.now = now;
301
+ this.idFactory = idFactory;
302
+ }
303
+
304
+ async runTurn(request = {}, { onEvent = null, signal = null } = {}) {
305
+ const prompt = normalizeText(request.message || request.request || request.prompt || '');
306
+ if (!prompt) throw new Error('message is required');
307
+ const codingRunId = normalizeText(request.codingRunId || request.coding_run_id || request.agentSessionId || request.agent_session_id || request.session_id)
308
+ || `coding-${this.idFactory()}`;
309
+ const cwd = request.cwd || request.context?.cwd || request.context?.projectPath || process.cwd();
310
+ const startedAt = this.now();
311
+ const progress = (event) => {
312
+ if (typeof onEvent === 'function') onEvent(event);
313
+ };
314
+ const providerRoute = resolveCodingProvider({
315
+ model: request.model,
316
+ provider: request.provider,
317
+ providerFactory: request.providerFactory,
318
+ });
319
+ const codingIntent = classifyCodingTurnIntent(prompt);
320
+ progress({
321
+ type: 'coding_run_started',
322
+ codingRunId,
323
+ sessionId: request.session_id || request.chatSessionId || '',
324
+ ctmSessionId: request.ctmSessionId || request.ctm_session_id || '',
325
+ cwd,
326
+ model: providerRoute.model || request.model || '',
327
+ provider: providerRoute.providerClient?.type || request.provider || providerRoute.route?.provider || '',
328
+ intent: codingIntent,
329
+ });
330
+ const result = await this.runAgentLoop(prompt, {
331
+ cwd,
332
+ model: providerRoute.model || request.model,
333
+ provider: providerRoute.providerClient || undefined,
334
+ codingIntent,
335
+ timeoutMs: request.timeoutMs || request.timeout_ms,
336
+ maxTurns: request.maxTurns || request.max_turns,
337
+ mode: request.mode || 'build',
338
+ channel: request.channel || 'ctm-session',
339
+ agentMode: request.agentMode || request.agent_mode || 'coding',
340
+ agentKind: request.agentKind || request.agent_kind || 'walle-coding',
341
+ taskType: request.taskType || request.task_type || 'coding',
342
+ session_id: request.session_id || request.chatSessionId || codingRunId,
343
+ ctmSessionId: request.ctmSessionId || request.ctm_session_id || '',
344
+ agentSessionId: request.agentSessionId || request.agent_session_id || codingRunId,
345
+ chatSessionId: request.chatSessionId || request.chat_session_id || request.session_id || '',
346
+ persistTranscript: request.persistTranscript,
347
+ transcript: request.transcript,
348
+ contextMessages: request.contextMessages || request.context_messages,
349
+ transcriptMessageOwner: request.transcriptMessageOwner || request.transcript_message_owner,
350
+ externalTranscriptMessages: request.externalTranscriptMessages ?? request.external_transcript_messages,
351
+ skipTranscriptMessages: request.skipTranscriptMessages ?? request.skip_transcript_messages,
352
+ promptCapabilities: request.promptCapabilities,
353
+ promptCapabilityHints: request.promptCapabilityHints,
354
+ permissionService: request.permissionService,
355
+ headlessPolicy: request.headlessPolicy,
356
+ benchmark: request.benchmark,
357
+ headless: request.headless,
358
+ interactive: request.interactive,
359
+ abortSignal: signal,
360
+ onProgress: progress,
361
+ runSessionId: codingRunId,
362
+ });
363
+ const { isInteractiveRun } = require('../coding-orchestrator');
364
+ const interactive = isInteractiveRun(request);
365
+ const outcome = deriveCodingOutcome(result, { prompt, interactive });
366
+ let reply;
367
+ if (interactive) {
368
+ // opencode parity: the model's answer is the reply. Never reframe a delivered
369
+ // answer as a blocked coding task. Fall back to a gentle notice only when the
370
+ // turn produced no output at all.
371
+ reply = (result.output || result.stderr || '')
372
+ || (outcome.reason === 'agent_finished_without_answer'
373
+ ? makeIncompleteReadOnlyReply({ output: result.output, stderr: result.stderr })
374
+ : '');
375
+ } else {
376
+ reply = outcome.status === 'blocked'
377
+ ? makeBlockedReply({ output: result.output || result.stderr || '', reason: outcome.reason })
378
+ : outcome.reason === 'agent_finished_without_answer'
379
+ ? makeIncompleteReadOnlyReply({ output: result.output, stderr: result.stderr })
380
+ : (result.output || result.stderr || '');
381
+ }
382
+ const response = {
383
+ ok: outcome.success,
384
+ success: outcome.success,
385
+ reply,
386
+ output: result.output || '',
387
+ stderr: result.stderr || '',
388
+ model: result.model || providerRoute.model || request.model || '',
389
+ provider: result.provider || providerRoute.providerClient?.type || request.provider || '',
390
+ latencyMs: this.now() - startedAt,
391
+ tokens: result.usage || {},
392
+ cost: result.cost || 0,
393
+ toolCalls: collectToolCalls(result),
394
+ log: result.log || [],
395
+ changedFiles: outcome.changedFiles,
396
+ codingRunId,
397
+ completion: outcome,
398
+ outcome,
399
+ runtimeMode: result.runtimeMode || '',
400
+ sessionId: result.sessionId || codingRunId,
401
+ };
402
+ progress({
403
+ type: 'coding_outcome',
404
+ codingRunId,
405
+ status: outcome.status,
406
+ success: outcome.success,
407
+ reason: outcome.reason,
408
+ changedFiles: outcome.changedFiles,
409
+ verified: outcome.verified,
410
+ latencyMs: response.latencyMs,
411
+ });
412
+ return response;
413
+ }
414
+ }
415
+
416
+ module.exports = {
417
+ CodingRunController,
418
+ classifyCodingTurnIntent,
419
+ deriveCodingOutcome,
420
+ resolveCodingProvider,
421
+ collectToolCalls,
422
+ hasDeliveredFinalAnswer,
423
+ };
@@ -9,6 +9,22 @@ const DEFAULT_THRESHOLD = 0.75;
9
9
  const DEFAULT_TAIL_TOKEN_BUDGET = 40000;
10
10
  const DEFAULT_KEEP_RECENT_USER_TURNS = 4;
11
11
 
12
+ // Pruning: the cheap compaction layer. Triggers earlier than summarization
13
+ // and only truncates OLD tool outputs — no LLM call, no information about
14
+ // decisions lost (the agent can re-run a tool if it needs the full output).
15
+ const DEFAULT_PRUNE_THRESHOLD = 0.60;
16
+ const DEFAULT_PRUNE_PROTECT_TOKENS = 40000;
17
+ const DEFAULT_PRUNE_MAX_TOOL_OUTPUT_CHARS = 2000;
18
+ const DEFAULT_PRUNE_PROTECTED_TOOLS = Object.freeze(['skill', 'update_todos', 'task', 'ask_user']);
19
+ const PRUNED_MARKER_RE = /\[pruned \d+ chars/;
20
+
21
+ // Chunked summarization: a single LLM call used to see at most the first
22
+ // 60k chars of the head — everything past that silently vanished from the
23
+ // summary. Chunk and chain instead; if the head is still too big, keep the
24
+ // oldest chunks (original task framing) plus the newest and note the gap.
25
+ const SUMMARY_CHUNK_CHARS = 60000;
26
+ const SUMMARY_MAX_CHUNKS = 4;
27
+
12
28
  class CompactionService {
13
29
  constructor({
14
30
  provider,
@@ -17,6 +33,10 @@ class CompactionService {
17
33
  threshold = DEFAULT_THRESHOLD,
18
34
  tailTokenBudget = DEFAULT_TAIL_TOKEN_BUDGET,
19
35
  keepRecentUserTurns = DEFAULT_KEEP_RECENT_USER_TURNS,
36
+ pruneThreshold = DEFAULT_PRUNE_THRESHOLD,
37
+ pruneProtectTokens = DEFAULT_PRUNE_PROTECT_TOKENS,
38
+ pruneMaxToolOutputChars = DEFAULT_PRUNE_MAX_TOOL_OUTPUT_CHARS,
39
+ pruneProtectedTools = DEFAULT_PRUNE_PROTECTED_TOOLS,
20
40
  now,
21
41
  } = {}) {
22
42
  this.provider = provider || null;
@@ -25,17 +45,98 @@ class CompactionService {
25
45
  this.threshold = threshold;
26
46
  this.tailTokenBudget = tailTokenBudget;
27
47
  this.keepRecentUserTurns = keepRecentUserTurns;
48
+ this.pruneThreshold = pruneThreshold;
49
+ this.pruneProtectTokens = pruneProtectTokens;
50
+ this.pruneMaxToolOutputChars = pruneMaxToolOutputChars;
51
+ this.pruneProtectedTools = new Set(pruneProtectedTools || []);
28
52
  this.now = now || (() => new Date().toISOString());
29
53
  }
30
54
 
31
- shouldCompact({ messages = [], systemTokens = 0, usage = null, contextWindow = this.contextWindow } = {}) {
55
+ _measuredTokens({ messages = [], systemTokens = 0, usage = null } = {}) {
32
56
  const actualInput = Number.isFinite(usage?.input) ? usage.input
33
57
  : Number.isFinite(usage?.inputTokens) ? usage.inputTokens
34
58
  : null;
35
- const tokens = actualInput == null
59
+ return actualInput == null
36
60
  ? systemTokens + estimateMessagesTokens(messages)
37
61
  : actualInput;
38
- return tokens >= contextWindow * this.threshold;
62
+ }
63
+
64
+ shouldCompact({ messages = [], systemTokens = 0, usage = null, contextWindow = this.contextWindow } = {}) {
65
+ return this._measuredTokens({ messages, systemTokens, usage }) >= contextWindow * this.threshold;
66
+ }
67
+
68
+ shouldPrune({ messages = [], systemTokens = 0, usage = null, contextWindow = this.contextWindow } = {}) {
69
+ return this._measuredTokens({ messages, systemTokens, usage }) >= contextWindow * this.pruneThreshold;
70
+ }
71
+
72
+ /**
73
+ * Cheap compaction layer: truncate tool outputs in OLD turns, keeping the
74
+ * recent `protectTokens` of conversation untouched. Pure — returns new
75
+ * message objects for anything modified; idempotent (already-pruned blocks
76
+ * carry a marker and are skipped). No LLM call.
77
+ */
78
+ prune(messages = [], {
79
+ protectTokens = this.pruneProtectTokens,
80
+ maxToolOutputChars = this.pruneMaxToolOutputChars,
81
+ protectedTools = this.pruneProtectedTools,
82
+ } = {}) {
83
+ const tokensBefore = estimateMessagesTokens(messages);
84
+ if (!Array.isArray(messages) || messages.length === 0) {
85
+ return { pruned: false, messages: messages || [], prunedBlocks: 0, tokensBefore, tokensAfter: tokensBefore };
86
+ }
87
+
88
+ const protectedSet = protectedTools instanceof Set ? protectedTools : new Set(protectedTools || []);
89
+ const toolNameById = new Map();
90
+ for (const msg of messages) {
91
+ if (msg?.role !== 'assistant' || !Array.isArray(msg.content)) continue;
92
+ for (const block of msg.content) {
93
+ if (block?.type === 'tool_use' && block.id) toolNameById.set(block.id, block.name || '');
94
+ }
95
+ }
96
+
97
+ // Everything from protectStartIndex onward is untouchable: the most
98
+ // recent messages whose cumulative size fits the protect budget. The
99
+ // message that crosses the budget is old enough to prune.
100
+ let protectStartIndex = messages.length;
101
+ let protectedTokens = 0;
102
+ for (let i = messages.length - 1; i >= 0; i--) {
103
+ protectedTokens += estimateMessagesTokens([messages[i]]);
104
+ if (protectedTokens > protectTokens) break;
105
+ protectStartIndex = i;
106
+ }
107
+
108
+ let prunedBlocks = 0;
109
+ const out = messages.map((msg, index) => {
110
+ if (index >= protectStartIndex) return msg;
111
+ if (!msg || !Array.isArray(msg.content)) return msg;
112
+ let changed = false;
113
+ const content = msg.content.map((block) => {
114
+ if (block?.type !== 'tool_result') return block;
115
+ const toolName = toolNameById.get(block.tool_use_id) || '';
116
+ if (protectedSet.has(toolName)) return block;
117
+ const text = toolResultText(block.content);
118
+ if (text.length <= maxToolOutputChars || PRUNED_MARKER_RE.test(text)) return block;
119
+ changed = true;
120
+ prunedBlocks += 1;
121
+ const kept = text.slice(0, maxToolOutputChars);
122
+ return {
123
+ ...block,
124
+ content: `${kept}\n[pruned ${text.length - kept.length} chars of old ${toolName || 'tool'} output to save context — re-run the tool if the full output is needed]`,
125
+ };
126
+ });
127
+ return changed ? { ...msg, content } : msg;
128
+ });
129
+
130
+ if (prunedBlocks === 0) {
131
+ return { pruned: false, messages, prunedBlocks: 0, tokensBefore, tokensAfter: tokensBefore };
132
+ }
133
+ return {
134
+ pruned: true,
135
+ messages: out,
136
+ prunedBlocks,
137
+ tokensBefore,
138
+ tokensAfter: estimateMessagesTokens(out),
139
+ };
39
140
  }
40
141
 
41
142
  selectTail(messages = [], {
@@ -191,20 +292,59 @@ class CompactionService {
191
292
  const fallback = fallbackSummary(historyText, selection);
192
293
  if (!this.provider || typeof this.provider.chat !== 'function') return fallback;
193
294
 
194
- const prompt = `Summarize the compacted Wall-E coding history. Preserve decisions, file paths, commands, tool findings, failures, and unfinished work. Do not include a preamble.\n\nCOMPACTED HISTORY:\n${historyText.slice(0, 60000)}`;
195
295
  try {
196
- const response = await this.provider.chat({
197
- model: this.model,
198
- messages: [{ role: 'user', content: prompt }],
199
- maxTokens: 1200,
200
- metadata: { purpose: 'compaction' },
201
- });
202
- const content = textFromContent(response?.content || '');
203
- return content.trim() || fallback;
296
+ const { chunks, omittedChars } = selectSummaryChunks(historyText, SUMMARY_CHUNK_CHARS, SUMMARY_MAX_CHUNKS);
297
+ let carry = '';
298
+ for (const chunk of chunks) {
299
+ carry = await this._summarizeChunk(chunk, carry);
300
+ }
301
+ if (!carry.trim()) return fallback;
302
+ return omittedChars > 0
303
+ ? `${carry}\n\n(Note: ${omittedChars} chars of mid-history were omitted from this summary.)`
304
+ : carry;
204
305
  } catch {
205
306
  return fallback;
206
307
  }
207
308
  }
309
+
310
+ async _summarizeChunk(chunkText, carrySummary) {
311
+ const prompt = carrySummary
312
+ ? `You are updating a running summary of a Wall-E coding session. Merge the new history below into the summary. Preserve decisions, file paths, commands, tool findings, failures, and unfinished work. Do not include a preamble.\n\nSUMMARY SO FAR:\n${carrySummary}\n\nNEXT PART OF HISTORY:\n${chunkText}`
313
+ : `Summarize the compacted Wall-E coding history. Preserve decisions, file paths, commands, tool findings, failures, and unfinished work. Do not include a preamble.\n\nCOMPACTED HISTORY:\n${chunkText}`;
314
+ const response = await this.provider.chat({
315
+ model: this.model,
316
+ messages: [{ role: 'user', content: prompt }],
317
+ maxTokens: 1200,
318
+ metadata: { purpose: 'compaction' },
319
+ });
320
+ return textFromContent(response?.content || '').trim();
321
+ }
322
+ }
323
+
324
+ function toolResultText(content) {
325
+ if (typeof content === 'string') return content;
326
+ if (Array.isArray(content)) {
327
+ return content
328
+ .map((block) => (typeof block === 'string' ? block : String(block?.text || '')))
329
+ .join('\n');
330
+ }
331
+ return content == null ? '' : String(content);
332
+ }
333
+
334
+ /**
335
+ * Split history into chunks for chained summarization. When over budget,
336
+ * keep the oldest chunks (original task framing) plus the newest chunk and
337
+ * report how much of the middle was dropped.
338
+ */
339
+ function selectSummaryChunks(text, chunkSize = SUMMARY_CHUNK_CHARS, maxChunks = SUMMARY_MAX_CHUNKS) {
340
+ const value = String(text || '');
341
+ if (value.length <= chunkSize) return { chunks: [value], omittedChars: 0 };
342
+ const all = [];
343
+ for (let i = 0; i < value.length; i += chunkSize) all.push(value.slice(i, i + chunkSize));
344
+ if (all.length <= maxChunks) return { chunks: all, omittedChars: 0 };
345
+ const kept = [...all.slice(0, maxChunks - 1), all[all.length - 1]];
346
+ const omittedChars = value.length - kept.reduce((sum, chunk) => sum + chunk.length, 0);
347
+ return { chunks: kept, omittedChars };
208
348
  }
209
349
 
210
350
  function remapTailStartId(compaction, idMap = {}, forkMessages = []) {
@@ -253,9 +393,14 @@ module.exports = {
253
393
  CompactionService,
254
394
  remapTailStartId,
255
395
  getMessageId,
396
+ selectSummaryChunks,
256
397
  DEFAULT_CONTEXT_WINDOW,
257
398
  DEFAULT_THRESHOLD,
258
399
  DEFAULT_TAIL_TOKEN_BUDGET,
259
400
  DEFAULT_KEEP_RECENT_USER_TURNS,
401
+ DEFAULT_PRUNE_THRESHOLD,
402
+ DEFAULT_PRUNE_PROTECT_TOKENS,
403
+ DEFAULT_PRUNE_MAX_TOOL_OUTPUT_CHARS,
404
+ DEFAULT_PRUNE_PROTECTED_TOOLS,
260
405
  estimateTokens,
261
406
  };