create-walle 0.9.21 → 0.9.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (500) hide show
  1. package/README.md +27 -5
  2. package/package.json +2 -2
  3. package/template/CLAUDE.md +2 -2
  4. package/template/LICENSE +1 -1
  5. package/template/bin/ctm-dev-cleanup.js +24 -3
  6. package/template/bin/ctm-launch.sh +13 -0
  7. package/template/bin/dev.sh +156 -18
  8. package/template/bin/node-bin.sh +84 -0
  9. package/template/bin/pin-node.sh +51 -0
  10. package/template/claude-task-manager/api-prompts.js +1203 -182
  11. package/template/claude-task-manager/api-reviews.js +109 -15
  12. package/template/claude-task-manager/approval-agent.js +1360 -280
  13. package/template/claude-task-manager/bin/restart-ctm.sh +64 -23
  14. package/template/claude-task-manager/bin/storage-migration-supervisor.js +338 -0
  15. package/template/claude-task-manager/db.js +4417 -295
  16. package/template/claude-task-manager/docs/app-update-refresh-protocol.md +69 -0
  17. package/template/claude-task-manager/docs/approval-ai-refinement.md +138 -0
  18. package/template/claude-task-manager/docs/approval-rescue-loop.md +74 -0
  19. package/template/claude-task-manager/docs/codex-operational-warning-health.md +107 -0
  20. package/template/claude-task-manager/docs/codex-resume-state-guard-design.md +17 -12
  21. package/template/claude-task-manager/docs/codex-terminal-render-controller-handoff.md +311 -0
  22. package/template/claude-task-manager/docs/coding-agent-hooks-architecture.md +418 -0
  23. package/template/claude-task-manager/docs/conversation-import-freshness.md +20 -0
  24. package/template/claude-task-manager/docs/google-workspace-auth-health.md +77 -0
  25. package/template/claude-task-manager/docs/image-paste-ux.md +13 -0
  26. package/template/claude-task-manager/docs/ipad-web-preview.md +88 -0
  27. package/template/claude-task-manager/docs/main-loop-offload-architecture.md +66 -0
  28. package/template/claude-task-manager/docs/microsoft-dev-tunnel-phone-access-design.md +274 -519
  29. package/template/claude-task-manager/docs/mobile-live-streaming.md +27 -5
  30. package/template/claude-task-manager/docs/mobile-remote-submission-lifecycle.md +69 -0
  31. package/template/claude-task-manager/docs/phone-access-design.md +53 -15
  32. package/template/claude-task-manager/docs/phone-passkey-identity.md +122 -0
  33. package/template/claude-task-manager/docs/phone-setup.md +3 -0
  34. package/template/claude-task-manager/docs/prompt-editing-tree-design.md +25 -1
  35. package/template/claude-task-manager/docs/remote-desktop-access-design.md +268 -0
  36. package/template/claude-task-manager/docs/restart-lifecycle-architecture.md +95 -0
  37. package/template/claude-task-manager/docs/runtime-work-control-plane.md +53 -0
  38. package/template/claude-task-manager/docs/session-interactive-wait-surfaces.md +38 -0
  39. package/template/claude-task-manager/docs/session-needs-you-dismissal.md +84 -0
  40. package/template/claude-task-manager/docs/session-render-state-management-design.md +91 -3
  41. package/template/claude-task-manager/docs/session-standup-command-center-design.md +25 -1
  42. package/template/claude-task-manager/docs/session-title-authority.md +32 -0
  43. package/template/claude-task-manager/docs/session-workspace-binding.md +33 -0
  44. package/template/claude-task-manager/docs/skill-intent-resolution-design.md +72 -0
  45. package/template/claude-task-manager/docs/walle-mcp-supervisor-health.md +86 -0
  46. package/template/claude-task-manager/docs/walle-relay-phone-access-design.md +24 -15
  47. package/template/claude-task-manager/docs/walle-session-history-hydration.md +114 -0
  48. package/template/claude-task-manager/docs/walle-session-input-queue.md +104 -0
  49. package/template/claude-task-manager/docs/walle-session-model-catalog.md +90 -0
  50. package/template/claude-task-manager/docs/walle-session-model-preferences.md +15 -6
  51. package/template/claude-task-manager/git-utils.js +897 -27
  52. package/template/claude-task-manager/lib/agent-capabilities.js +33 -0
  53. package/template/claude-task-manager/lib/agent-cli-cache.js +37 -7
  54. package/template/claude-task-manager/lib/agent-hooks-installer.js +26 -2
  55. package/template/claude-task-manager/lib/agent-presets.js +17 -1
  56. package/template/claude-task-manager/lib/all-sessions-query.js +108 -0
  57. package/template/claude-task-manager/lib/approval-ai-refinement.js +488 -0
  58. package/template/claude-task-manager/lib/approval-self-adapt.js +168 -0
  59. package/template/claude-task-manager/lib/async-semaphore.js +44 -0
  60. package/template/claude-task-manager/lib/auth-context.js +5 -0
  61. package/template/claude-task-manager/lib/auth-rate-limit.js +47 -4
  62. package/template/claude-task-manager/lib/auth-rules.js +29 -2
  63. package/template/claude-task-manager/lib/auto-approval-verifier.js +129 -16
  64. package/template/claude-task-manager/lib/background-llm.js +144 -17
  65. package/template/claude-task-manager/lib/branch-inventory.js +212 -0
  66. package/template/claude-task-manager/lib/claude-desktop-sessions.js +15 -3
  67. package/template/claude-task-manager/lib/coalesce-sync-frames.js +151 -0
  68. package/template/claude-task-manager/lib/codex-launch-health.js +762 -0
  69. package/template/claude-task-manager/lib/codex-transcript-pager.js +51 -0
  70. package/template/claude-task-manager/lib/codex-zst.js +124 -0
  71. package/template/claude-task-manager/lib/coding-agent-models.js +233 -30
  72. package/template/claude-task-manager/lib/connection-health.js +232 -0
  73. package/template/claude-task-manager/lib/conversation-blob-parser.js +42 -0
  74. package/template/claude-task-manager/lib/conversation-tail-merge.js +89 -26
  75. package/template/claude-task-manager/lib/ctm-session-context-api.js +39 -10
  76. package/template/claude-task-manager/lib/cursor-conversation-store.js +354 -0
  77. package/template/claude-task-manager/lib/db-owner-worker-client.js +315 -0
  78. package/template/claude-task-manager/lib/document-review.js +141 -6
  79. package/template/claude-task-manager/lib/escalation-review.js +152 -0
  80. package/template/claude-task-manager/lib/graceful-shutdown.js +159 -0
  81. package/template/claude-task-manager/lib/headless-term-service.js +678 -0
  82. package/template/claude-task-manager/lib/heavy-worker-fallback.js +38 -0
  83. package/template/claude-task-manager/lib/jsonl-conversation-parser.js +542 -0
  84. package/template/claude-task-manager/lib/jsonl-range-reader.js +112 -0
  85. package/template/claude-task-manager/lib/main-db-census.js +216 -0
  86. package/template/claude-task-manager/lib/message-pagination.js +106 -4
  87. package/template/claude-task-manager/lib/microsoft-dev-tunnel-setup.js +750 -26
  88. package/template/claude-task-manager/lib/mobile-auth-api.js +274 -7
  89. package/template/claude-task-manager/lib/mobile-auth-store.js +592 -10
  90. package/template/claude-task-manager/lib/mobile-notification-dispatcher.js +15 -0
  91. package/template/claude-task-manager/lib/model-overview-brain-fallback.js +311 -0
  92. package/template/claude-task-manager/lib/model-overview-cache.js +141 -0
  93. package/template/claude-task-manager/lib/models-health-routing-notice.js +126 -0
  94. package/template/claude-task-manager/lib/node-pin-guard.js +93 -0
  95. package/template/claude-task-manager/lib/perf-tracker.js +242 -6
  96. package/template/claude-task-manager/lib/permission-match.js +76 -0
  97. package/template/claude-task-manager/lib/permission-sync.js +133 -20
  98. package/template/claude-task-manager/lib/process-title.js +35 -0
  99. package/template/claude-task-manager/lib/prompt-executions-query.js +25 -0
  100. package/template/claude-task-manager/lib/prompt-index-disk-cache.js +44 -0
  101. package/template/claude-task-manager/lib/prompt-intent.js +132 -0
  102. package/template/claude-task-manager/lib/provider-user-context.js +34 -0
  103. package/template/claude-task-manager/lib/read-pool-client.js +313 -0
  104. package/template/claude-task-manager/lib/readpool-breaker.js +31 -0
  105. package/template/claude-task-manager/lib/recent-sessions-breaker.js +12 -0
  106. package/template/claude-task-manager/lib/remote-feedback-client.js +72 -0
  107. package/template/claude-task-manager/lib/remote-relay-protocol.js +37 -4
  108. package/template/claude-task-manager/lib/remote-relay-store.js +159 -0
  109. package/template/claude-task-manager/lib/remote-submission-observer.js +278 -0
  110. package/template/claude-task-manager/lib/restart-guard.js +109 -0
  111. package/template/claude-task-manager/lib/restore-interruption-detector.js +439 -0
  112. package/template/claude-task-manager/lib/restore-policy.js +13 -0
  113. package/template/claude-task-manager/lib/restore-resume-batch.js +74 -0
  114. package/template/claude-task-manager/lib/restore-runtime.js +68 -0
  115. package/template/claude-task-manager/lib/restore-storm.js +34 -0
  116. package/template/claude-task-manager/lib/resume-cwd.js +36 -0
  117. package/template/claude-task-manager/lib/resume-preflight.js +313 -0
  118. package/template/claude-task-manager/lib/runtime-work-registry.js +444 -0
  119. package/template/claude-task-manager/lib/sanitize-openai-auth.js +31 -0
  120. package/template/claude-task-manager/lib/scheduler.js +21 -1
  121. package/template/claude-task-manager/lib/scrollback-snapshot-store.js +159 -0
  122. package/template/claude-task-manager/lib/serial-task-queue.js +64 -0
  123. package/template/claude-task-manager/lib/server-listeners.js +239 -0
  124. package/template/claude-task-manager/lib/session-capture.js +42 -7
  125. package/template/claude-task-manager/lib/session-content-backfill.js +131 -0
  126. package/template/claude-task-manager/lib/session-history.js +388 -43
  127. package/template/claude-task-manager/lib/session-host-manager.js +287 -0
  128. package/template/claude-task-manager/lib/session-image-refs.js +209 -0
  129. package/template/claude-task-manager/lib/session-jobs.js +399 -59
  130. package/template/claude-task-manager/lib/session-prompt-index.js +137 -0
  131. package/template/claude-task-manager/lib/session-restore.js +53 -0
  132. package/template/claude-task-manager/lib/session-standup.js +123 -23
  133. package/template/claude-task-manager/lib/session-state-bus.js +14 -0
  134. package/template/claude-task-manager/lib/session-stream.js +64 -16
  135. package/template/claude-task-manager/lib/session-timeline-summary.js +260 -0
  136. package/template/claude-task-manager/lib/session-token-usage.js +494 -0
  137. package/template/claude-task-manager/lib/session-workspace-binding.js +356 -0
  138. package/template/claude-task-manager/lib/setup-network-config.js +9 -0
  139. package/template/claude-task-manager/lib/size-cap.js +45 -0
  140. package/template/claude-task-manager/lib/size-cap.test.js +62 -0
  141. package/template/claude-task-manager/lib/skill-autocomplete.js +180 -1
  142. package/template/claude-task-manager/lib/skill-intent-resolver.js +304 -0
  143. package/template/claude-task-manager/lib/sqlite-driver.js +19 -3
  144. package/template/claude-task-manager/lib/standup-attention.js +7 -3
  145. package/template/claude-task-manager/lib/status-authority.js +39 -0
  146. package/template/claude-task-manager/lib/status-hooks.js +4 -0
  147. package/template/claude-task-manager/lib/storage-migration.js +235 -0
  148. package/template/claude-task-manager/lib/structured-capture.js +298 -0
  149. package/template/claude-task-manager/lib/sync-io-census.js +163 -0
  150. package/template/claude-task-manager/lib/tailscale-setup.js +6 -0
  151. package/template/claude-task-manager/lib/terminal-activity-evidence.js +33 -0
  152. package/template/claude-task-manager/lib/terminal-choice.js +364 -0
  153. package/template/claude-task-manager/lib/terminal-control-sanitize.js +17 -0
  154. package/template/claude-task-manager/lib/terminal-fingerprint.js +48 -0
  155. package/template/claude-task-manager/lib/terminal-output-flush.js +84 -0
  156. package/template/claude-task-manager/lib/timeline-order.js +122 -0
  157. package/template/claude-task-manager/lib/transcript-store.js +348 -43
  158. package/template/claude-task-manager/lib/transport-security.js +84 -1
  159. package/template/claude-task-manager/lib/wait-state.js +184 -0
  160. package/template/claude-task-manager/lib/walle-client.js +47 -5
  161. package/template/claude-task-manager/lib/walle-ctm-history.js +564 -4
  162. package/template/claude-task-manager/lib/walle-external-actions.js +135 -16
  163. package/template/claude-task-manager/lib/walle-history-hydration.js +46 -0
  164. package/template/claude-task-manager/lib/walle-native-health.js +403 -0
  165. package/template/claude-task-manager/lib/walle-repair.js +701 -0
  166. package/template/claude-task-manager/lib/walle-session-cache.js +109 -0
  167. package/template/claude-task-manager/lib/walle-session-context.js +57 -21
  168. package/template/claude-task-manager/lib/walle-session-model-catalog.js +34 -0
  169. package/template/claude-task-manager/lib/walle-supervisor.js +539 -63
  170. package/template/claude-task-manager/lib/walle-transcript.js +52 -0
  171. package/template/claude-task-manager/lib/worktree-active-sync.js +11 -7
  172. package/template/claude-task-manager/lib/worktree-cwd.js +32 -1
  173. package/template/claude-task-manager/package.json +1 -1
  174. package/template/claude-task-manager/prompt-harvest.js +89 -66
  175. package/template/claude-task-manager/providers/claude-code.js +51 -3
  176. package/template/claude-task-manager/providers/cursor.js +140 -45
  177. package/template/claude-task-manager/public/css/reviews.css +551 -61
  178. package/template/claude-task-manager/public/css/setup.css +191 -0
  179. package/template/claude-task-manager/public/css/walle-session.css +865 -10
  180. package/template/claude-task-manager/public/css/walle.css +154 -0
  181. package/template/claude-task-manager/public/designs/ai-providers-consolidation-v2.html +830 -0
  182. package/template/claude-task-manager/public/index.html +18516 -2058
  183. package/template/claude-task-manager/public/ipad.html +363 -0
  184. package/template/claude-task-manager/public/js/document-review-links.js +301 -0
  185. package/template/claude-task-manager/public/js/image-normalize.js +69 -36
  186. package/template/claude-task-manager/public/js/message-renderer.js +1265 -77
  187. package/template/claude-task-manager/public/js/prompts.js +66 -29
  188. package/template/claude-task-manager/public/js/reviews.js +901 -133
  189. package/template/claude-task-manager/public/js/session-activity-utils.js +11 -1
  190. package/template/claude-task-manager/public/js/session-search-utils.js +94 -10
  191. package/template/claude-task-manager/public/js/session-status-precedence.js +23 -5
  192. package/template/claude-task-manager/public/js/setup.js +1273 -176
  193. package/template/claude-task-manager/public/js/stream-view.js +691 -73
  194. package/template/claude-task-manager/public/js/terminal-reconciler.js +210 -0
  195. package/template/claude-task-manager/public/js/walle-session.js +2455 -158
  196. package/template/claude-task-manager/public/js/walle.js +455 -28
  197. package/template/claude-task-manager/public/m/app.css +2909 -262
  198. package/template/claude-task-manager/public/m/app.js +6601 -398
  199. package/template/claude-task-manager/public/m/claim.html +224 -17
  200. package/template/claude-task-manager/public/m/index.html +117 -21
  201. package/template/claude-task-manager/public/m/sw.js +3 -1
  202. package/template/claude-task-manager/public/manifest.json +2 -2
  203. package/template/claude-task-manager/public/prompts.html +30 -14
  204. package/template/claude-task-manager/queue-engine.js +507 -28
  205. package/template/claude-task-manager/scripts/repair-claude-session-images.js +27 -8
  206. package/template/claude-task-manager/server.js +14341 -2197
  207. package/template/claude-task-manager/session-integrity.js +160 -18
  208. package/template/claude-task-manager/session-search-ranking.js +1 -0
  209. package/template/claude-task-manager/session-utils.js +25 -5
  210. package/template/claude-task-manager/workers/approval-blocklist.js +96 -6
  211. package/template/claude-task-manager/workers/approval-widget-validator.js +14 -8
  212. package/template/claude-task-manager/workers/conversation-import-worker.js +11 -50
  213. package/template/claude-task-manager/workers/db-owner-worker.js +386 -0
  214. package/template/claude-task-manager/workers/harvest-worker.js +9 -55
  215. package/template/claude-task-manager/workers/headless-term-worker.js +9 -530
  216. package/template/claude-task-manager/workers/read-pool-worker.js +387 -0
  217. package/template/claude-task-manager/workers/scrollback-worker.js +11 -72
  218. package/template/claude-task-manager/workers/session-host-process.js +146 -0
  219. package/template/claude-task-manager/workers/session-integrity-worker.js +10 -54
  220. package/template/claude-task-manager/workers/state-detectors/base.js +18 -1
  221. package/template/claude-task-manager/workers/state-detectors/claude-code.js +182 -9
  222. package/template/claude-task-manager/workers/state-detectors/codex.js +150 -2
  223. package/template/claude-task-manager/workers/state-detectors/cursor.js +127 -0
  224. package/template/claude-task-manager/workers/state-detectors/gemini.js +21 -0
  225. package/template/claude-task-manager/workers/state-detectors/index.js +29 -0
  226. package/template/claude-task-manager/workers/state-detectors/opencode.js +103 -0
  227. package/template/docs/design/markdown-review-pane.md +206 -0
  228. package/template/docs/designs/2026-05-17-portkey-gateway-provider-ux.md +129 -38
  229. package/template/docs/designs/2026-05-20-mobile-worktree-finish-command.md +27 -0
  230. package/template/docs/designs/2026-05-22-ai-configuration-consolidation.md +248 -0
  231. package/template/docs/designs/ai-configuration-consolidation-mock.html +812 -0
  232. package/template/docs/private-memory-and-pii-policy.md +69 -0
  233. package/template/package.json +2 -1
  234. package/template/scripts/check-private-data.js +201 -0
  235. package/template/shared/sqlite-owner-guard.js +30 -0
  236. package/template/shared/sqlite-owner-write-queue.js +225 -0
  237. package/template/shared/sqlite-storage-policy.js +111 -0
  238. package/template/shared/sqlite-write-lock.js +428 -0
  239. package/template/wall-e/agent-runners/claude-code.js +5 -0
  240. package/template/wall-e/agent.js +166 -22
  241. package/template/wall-e/api-walle.js +524 -70
  242. package/template/wall-e/auth/provider-flows.js +11 -1
  243. package/template/wall-e/bin/walle-mcp-stdio.js +341 -17
  244. package/template/wall-e/brain.js +1614 -141
  245. package/template/wall-e/chat/attachment-blocks.js +96 -0
  246. package/template/wall-e/chat/attachments.js +2 -1
  247. package/template/wall-e/chat/capability-resolver.js +7 -7
  248. package/template/wall-e/chat/context-messages.js +28 -0
  249. package/template/wall-e/chat/conversation-frame.js +630 -0
  250. package/template/wall-e/chat/provider-messages.js +125 -0
  251. package/template/wall-e/chat.js +1002 -233
  252. package/template/wall-e/coding/acceptance-contract.js +170 -0
  253. package/template/wall-e/coding/acp-adapter.js +1 -1
  254. package/template/wall-e/coding/agent-catalog.js +3 -0
  255. package/template/wall-e/coding/artifact-store.js +93 -0
  256. package/template/wall-e/coding/capability-router.js +120 -0
  257. package/template/wall-e/coding/coding-run-controller.js +423 -0
  258. package/template/wall-e/coding/compaction-service.js +157 -12
  259. package/template/wall-e/coding/frontend-verification.js +258 -0
  260. package/template/wall-e/coding/lifecycle-hooks.js +75 -0
  261. package/template/wall-e/coding/local-preview-contract.js +157 -0
  262. package/template/wall-e/coding/permission-service.js +57 -13
  263. package/template/wall-e/coding/prompt-bundle.js +19 -1
  264. package/template/wall-e/coding/prompt-section-registry.js +227 -0
  265. package/template/wall-e/coding/provider-compat.js +15 -0
  266. package/template/wall-e/coding/runtime-events.js +224 -0
  267. package/template/wall-e/coding/runtime-mode.js +3 -0
  268. package/template/wall-e/coding/side-git-snapshot.js +160 -4
  269. package/template/wall-e/coding/snapshot-service.js +143 -1
  270. package/template/wall-e/coding/stream-processor.js +388 -34
  271. package/template/wall-e/coding/task-tool.js +141 -4
  272. package/template/wall-e/coding/tool-execution-controller.js +365 -0
  273. package/template/wall-e/coding/tool-registry.js +43 -5
  274. package/template/wall-e/coding/user-hooks.js +217 -0
  275. package/template/wall-e/coding-orchestrator.js +1330 -221
  276. package/template/wall-e/coding-prompts.js +20 -4
  277. package/template/wall-e/context/context-builder.js +15 -2
  278. package/template/wall-e/decision/confidence.js +1 -1
  279. package/template/wall-e/docs/coding-acceptance-contract.md +41 -0
  280. package/template/wall-e/docs/external-action-controller.md +26 -6
  281. package/template/wall-e/docs/telemetry-lifecycle.md +8 -2
  282. package/template/wall-e/embeddings.js +591 -53
  283. package/template/wall-e/external-action-controller.js +12 -0
  284. package/template/wall-e/http/auth.js +1 -0
  285. package/template/wall-e/http/chat-api.js +46 -11
  286. package/template/wall-e/http/model-admin.js +836 -34
  287. package/template/wall-e/lib/boot-profile.js +88 -0
  288. package/template/wall-e/lib/event-loop-monitor.js +93 -0
  289. package/template/wall-e/lib/service-health.js +194 -0
  290. package/template/wall-e/llm/anthropic.js +130 -5
  291. package/template/wall-e/llm/client.js +266 -63
  292. package/template/wall-e/llm/default-fallback.js +382 -0
  293. package/template/wall-e/llm/health.js +19 -0
  294. package/template/wall-e/llm/message-guard.js +78 -0
  295. package/template/wall-e/llm/model-catalog.js +252 -1
  296. package/template/wall-e/llm/openai.js +26 -4
  297. package/template/wall-e/llm/portkey-sync.js +654 -0
  298. package/template/wall-e/llm/provider-error.js +30 -2
  299. package/template/wall-e/llm/registry.js +5 -1
  300. package/template/wall-e/llm/request-compat.js +67 -0
  301. package/template/wall-e/loops/backfill.js +79 -23
  302. package/template/wall-e/loops/brain-optimize.js +67 -0
  303. package/template/wall-e/loops/ingest.js +25 -10
  304. package/template/wall-e/loops/question-digest.js +160 -0
  305. package/template/wall-e/loops/reflect.js +6 -4
  306. package/template/wall-e/loops/think.js +39 -12
  307. package/template/wall-e/mcp-server.js +318 -36
  308. package/template/wall-e/memory/ctm-context-client.js +52 -14
  309. package/template/wall-e/memory/ctm-operational-context.js +237 -0
  310. package/template/wall-e/memory/ctm-prompt-executions-client.js +128 -0
  311. package/template/wall-e/memory/ctm-session-context.js +111 -63
  312. package/template/wall-e/prompts/coding/deepseek.txt +3 -0
  313. package/template/wall-e/prompts/coding/gemini.txt +6 -0
  314. package/template/wall-e/prompts/coding/gpt.txt +6 -0
  315. package/template/wall-e/prompts/coding/local.txt +7 -0
  316. package/template/wall-e/runtime/decision-hooks.js +115 -0
  317. package/template/wall-e/runtime/devbox-gateway.js +82 -8
  318. package/template/wall-e/runtime/prompt-manifest.js +86 -0
  319. package/template/wall-e/runtime/tool-executor.js +269 -0
  320. package/template/wall-e/runtime/tool-result-envelope.js +138 -0
  321. package/template/wall-e/runtime/transcript-projection.js +60 -0
  322. package/template/wall-e/runtime/walle-runtime.js +224 -0
  323. package/template/wall-e/scripts/db-optimize/migrate.js +162 -0
  324. package/template/wall-e/scripts/db-optimize/recall-eval.js +117 -0
  325. package/template/wall-e/server.js +15 -0
  326. package/template/wall-e/session-files.js +9 -0
  327. package/template/wall-e/skills/_bundled/google-calendar/run.js +1 -1
  328. package/template/wall-e/skills/_bundled/gws-workspace/run.js +1 -1
  329. package/template/wall-e/skills/_bundled/slack-mentions/run.js +76 -6
  330. package/template/wall-e/skills/claude-code-reader.js +7 -3
  331. package/template/wall-e/skills/script-skill-runner.js +10 -0
  332. package/template/wall-e/skills/skill-planner.js +38 -0
  333. package/template/wall-e/tools/builtin-middleware.js +19 -9
  334. package/template/wall-e/tools/local-tools.js +1428 -16
  335. package/template/wall-e/tools/permission-checker.js +73 -5
  336. package/template/wall-e/tools/question-manager.js +117 -7
  337. package/template/wall-e/training/harvester.js +12 -28
  338. package/template/wall-e/training/replay.js +25 -80
  339. package/template/website/index.html +10 -10
  340. package/template/wall-e/eval/ab-test.js +0 -203
  341. package/template/wall-e/eval/agent-runner.js +0 -772
  342. package/template/wall-e/eval/agent-scorer.js +0 -461
  343. package/template/wall-e/eval/aggregator.js +0 -414
  344. package/template/wall-e/eval/allowed-test-commands.js +0 -34
  345. package/template/wall-e/eval/benchmark-generator.js +0 -113
  346. package/template/wall-e/eval/benchmarks/chat-eval.json +0 -1662
  347. package/template/wall-e/eval/benchmarks/chat.json +0 -82
  348. package/template/wall-e/eval/benchmarks/coding-agent-real.json +0 -1
  349. package/template/wall-e/eval/benchmarks/coding-agent.json +0 -1581
  350. package/template/wall-e/eval/benchmarks/coding.json +0 -122
  351. package/template/wall-e/eval/benchmarks/memory-retrieval.json +0 -234
  352. package/template/wall-e/eval/benchmarks/reasoning.json +0 -82
  353. package/template/wall-e/eval/benchmarks/swebench-lite-30.json +0 -212
  354. package/template/wall-e/eval/benchmarks.js +0 -669
  355. package/template/wall-e/eval/cc-replay.js +0 -719
  356. package/template/wall-e/eval/chat-eval.js +0 -525
  357. package/template/wall-e/eval/check-keys.js +0 -15
  358. package/template/wall-e/eval/check-providers.js +0 -42
  359. package/template/wall-e/eval/codex-cli-baseline.js +0 -669
  360. package/template/wall-e/eval/coding-agent-real.js +0 -570
  361. package/template/wall-e/eval/context-compactor.js +0 -251
  362. package/template/wall-e/eval/debug-agent003.js +0 -68
  363. package/template/wall-e/eval/diagnostics.js +0 -216
  364. package/template/wall-e/eval/eval-orchestrator.js +0 -642
  365. package/template/wall-e/eval/evaluate.js +0 -202
  366. package/template/wall-e/eval/evaluator.js +0 -373
  367. package/template/wall-e/eval/exporter.js +0 -212
  368. package/template/wall-e/eval/fixtures/express-basic/package.json +0 -9
  369. package/template/wall-e/eval/fixtures/express-basic/server.js +0 -115
  370. package/template/wall-e/eval/fixtures/express-basic/test.js +0 -83
  371. package/template/wall-e/eval/fixtures/express-buggy/package.json +0 -9
  372. package/template/wall-e/eval/fixtures/express-buggy/server.js +0 -113
  373. package/template/wall-e/eval/fixtures/express-buggy/test.js +0 -83
  374. package/template/wall-e/eval/fixtures/express-buggy-items/package.json +0 -9
  375. package/template/wall-e/eval/fixtures/express-buggy-items/server.js +0 -112
  376. package/template/wall-e/eval/fixtures/express-buggy-items/test.js +0 -83
  377. package/template/wall-e/eval/fixtures/express-buggy-search/package.json +0 -9
  378. package/template/wall-e/eval/fixtures/express-buggy-search/server.js +0 -121
  379. package/template/wall-e/eval/fixtures/express-buggy-search/test.js +0 -83
  380. package/template/wall-e/eval/fixtures/express-rename-data/data.js +0 -34
  381. package/template/wall-e/eval/fixtures/express-rename-data/package.json +0 -9
  382. package/template/wall-e/eval/fixtures/express-rename-data/server.js +0 -97
  383. package/template/wall-e/eval/fixtures/express-rename-data/test.js +0 -88
  384. package/template/wall-e/eval/fixtures/express-xss/package.json +0 -12
  385. package/template/wall-e/eval/fixtures/express-xss/server.js +0 -90
  386. package/template/wall-e/eval/fixtures/express-xss/test.js +0 -67
  387. package/template/wall-e/eval/fixtures/express-xss/views/profile.ejs +0 -9
  388. package/template/wall-e/eval/fixtures/fullstack-app/config/default.js +0 -9
  389. package/template/wall-e/eval/fixtures/fullstack-app/config/test.js +0 -13
  390. package/template/wall-e/eval/fixtures/fullstack-app/package.json +0 -11
  391. package/template/wall-e/eval/fixtures/fullstack-app/public/css/style.css +0 -137
  392. package/template/wall-e/eval/fixtures/fullstack-app/public/index.html +0 -46
  393. package/template/wall-e/eval/fixtures/fullstack-app/public/js/app.js +0 -121
  394. package/template/wall-e/eval/fixtures/fullstack-app/public/js/auth.js +0 -71
  395. package/template/wall-e/eval/fixtures/fullstack-app/public/js/items.js +0 -80
  396. package/template/wall-e/eval/fixtures/fullstack-app/public/js/users.js +0 -46
  397. package/template/wall-e/eval/fixtures/fullstack-app/public/login.html +0 -45
  398. package/template/wall-e/eval/fixtures/fullstack-app/public/register.html +0 -38
  399. package/template/wall-e/eval/fixtures/fullstack-app/scripts/migrate.js +0 -23
  400. package/template/wall-e/eval/fixtures/fullstack-app/scripts/seed.js +0 -46
  401. package/template/wall-e/eval/fixtures/fullstack-app/server/db.js +0 -99
  402. package/template/wall-e/eval/fixtures/fullstack-app/server/index.js +0 -94
  403. package/template/wall-e/eval/fixtures/fullstack-app/server/middleware/auth.js +0 -19
  404. package/template/wall-e/eval/fixtures/fullstack-app/server/middleware/logger.js +0 -19
  405. package/template/wall-e/eval/fixtures/fullstack-app/server/router.js +0 -50
  406. package/template/wall-e/eval/fixtures/fullstack-app/server/routes/auth.js +0 -69
  407. package/template/wall-e/eval/fixtures/fullstack-app/server/routes/health.js +0 -23
  408. package/template/wall-e/eval/fixtures/fullstack-app/server/routes/items.js +0 -88
  409. package/template/wall-e/eval/fixtures/fullstack-app/server/routes/users.js +0 -75
  410. package/template/wall-e/eval/fixtures/fullstack-app/server/test.js +0 -198
  411. package/template/wall-e/eval/fixtures/fullstack-app/server/utils/response.js +0 -34
  412. package/template/wall-e/eval/fixtures/fullstack-app/server/utils/validate.js +0 -26
  413. package/template/wall-e/eval/fixtures/fullstack-app/server.js +0 -8
  414. package/template/wall-e/eval/fixtures/fullstack-app/test.js +0 -12
  415. package/template/wall-e/eval/fixtures/monorepo-basic/package.json +0 -8
  416. package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/data.js +0 -58
  417. package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/middleware.js +0 -46
  418. package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/package.json +0 -8
  419. package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/routes.js +0 -64
  420. package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/server.js +0 -56
  421. package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/test.js +0 -116
  422. package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/commands.js +0 -61
  423. package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/index.js +0 -62
  424. package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/output.js +0 -43
  425. package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/package.json +0 -11
  426. package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/test.js +0 -44
  427. package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/formatters.js +0 -43
  428. package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/index.js +0 -12
  429. package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/package.json +0 -5
  430. package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/test.js +0 -55
  431. package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/validators.js +0 -29
  432. package/template/wall-e/eval/fixtures/monorepo-basic/test.js +0 -46
  433. package/template/wall-e/eval/fixtures/node-cli/index.js +0 -78
  434. package/template/wall-e/eval/fixtures/node-cli/package.json +0 -10
  435. package/template/wall-e/eval/fixtures/node-cli/test.js +0 -57
  436. package/template/wall-e/eval/fixtures/node-typed/package.json +0 -8
  437. package/template/wall-e/eval/fixtures/node-typed/src/handlers.js +0 -31
  438. package/template/wall-e/eval/fixtures/node-typed/src/utils.js +0 -33
  439. package/template/wall-e/eval/fixtures/node-typed/test.js +0 -36
  440. package/template/wall-e/eval/fixtures/python-flask/app.py +0 -14
  441. package/template/wall-e/eval/fixtures/python-flask/requirements.txt +0 -2
  442. package/template/wall-e/eval/fixtures/python-flask/test_app.py +0 -25
  443. package/template/wall-e/eval/fixtures/wall-e-subset/brain.js +0 -105
  444. package/template/wall-e/eval/fixtures/wall-e-subset/eval/aggregator.js +0 -101
  445. package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks/chat.json +0 -20
  446. package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks/coding.json +0 -32
  447. package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks.js +0 -64
  448. package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/package.json +0 -6
  449. package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/server.js +0 -31
  450. package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/test.js +0 -18
  451. package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/utils.js +0 -34
  452. package/template/wall-e/eval/fixtures/wall-e-subset/eval/runner.js +0 -104
  453. package/template/wall-e/eval/fixtures/wall-e-subset/eval/scorer.js +0 -73
  454. package/template/wall-e/eval/fixtures/wall-e-subset/eval/test.js +0 -134
  455. package/template/wall-e/eval/fixtures/wall-e-subset/llm/client.js +0 -99
  456. package/template/wall-e/eval/fixtures/wall-e-subset/llm/providers.js +0 -63
  457. package/template/wall-e/eval/fixtures/wall-e-subset/llm/test.js +0 -70
  458. package/template/wall-e/eval/fixtures/wall-e-subset/package.json +0 -10
  459. package/template/wall-e/eval/fixtures/wall-e-subset/test.js +0 -86
  460. package/template/wall-e/eval/harvester.js +0 -685
  461. package/template/wall-e/eval/head-to-head.js +0 -388
  462. package/template/wall-e/eval/humaneval-adapter.js +0 -321
  463. package/template/wall-e/eval/list-models.js +0 -31
  464. package/template/wall-e/eval/livecodebench-adapter.js +0 -291
  465. package/template/wall-e/eval/mail-integration.js +0 -443
  466. package/template/wall-e/eval/manifest.js +0 -186
  467. package/template/wall-e/eval/meta-harness/adapters/coding-agent.js +0 -57
  468. package/template/wall-e/eval/meta-harness/bootstrap-snapshot.js +0 -149
  469. package/template/wall-e/eval/meta-harness/candidate-store.js +0 -117
  470. package/template/wall-e/eval/meta-harness/cli.js +0 -86
  471. package/template/wall-e/eval/meta-harness/domain-spec.js +0 -154
  472. package/template/wall-e/eval/meta-harness/domains/coding-agent.domain.json +0 -84
  473. package/template/wall-e/eval/meta-harness/examples/env-bootstrap-candidate.js +0 -29
  474. package/template/wall-e/eval/meta-harness/experience-store.js +0 -174
  475. package/template/wall-e/eval/meta-harness/frontier.js +0 -96
  476. package/template/wall-e/eval/meta-harness/harness-interface.js +0 -90
  477. package/template/wall-e/eval/meta-harness/leakage-guard.js +0 -80
  478. package/template/wall-e/eval/meta-harness/optimizer.js +0 -207
  479. package/template/wall-e/eval/meta-harness/proposer-runner.js +0 -110
  480. package/template/wall-e/eval/meta-harness/reporting.js +0 -58
  481. package/template/wall-e/eval/meta-harness/telemetry.js +0 -27
  482. package/template/wall-e/eval/meta-harness/validation.js +0 -81
  483. package/template/wall-e/eval/promoter.js +0 -228
  484. package/template/wall-e/eval/provider-normalizer.js +0 -33
  485. package/template/wall-e/eval/replay.js +0 -395
  486. package/template/wall-e/eval/run-agent-benchmarks.js +0 -386
  487. package/template/wall-e/eval/run-codex-cli-baseline.js +0 -177
  488. package/template/wall-e/eval/run-coding-agent-real.js +0 -187
  489. package/template/wall-e/eval/run-eval.js +0 -435
  490. package/template/wall-e/eval/run-model-comparison.js +0 -142
  491. package/template/wall-e/eval/session-evaluator.js +0 -187
  492. package/template/wall-e/eval/session-miner.js +0 -207
  493. package/template/wall-e/eval/session-retrieval-benchmark.js +0 -150
  494. package/template/wall-e/eval/session-transcripts.js +0 -509
  495. package/template/wall-e/eval/shadow.js +0 -161
  496. package/template/wall-e/eval/swebench-adapter.js +0 -345
  497. package/template/wall-e/eval/swebench-docker.js +0 -192
  498. package/template/wall-e/eval/train.py +0 -320
  499. package/template/wall-e/eval/trainer.js +0 -232
  500. package/template/wall-e/eval/weekly-eval-loop.js +0 -241
@@ -1,110 +0,0 @@
1
- 'use strict';
2
-
3
- const fs = require('node:fs');
4
- const path = require('node:path');
5
-
6
- function buildProposerPrompt({ iteration, store, domainSpec, frontier = {} } = {}) {
7
- if (!store) throw new Error('store is required');
8
- if (!domainSpec) throw new Error('domainSpec is required');
9
- const allowed = (domainSpec.allowedCandidateSurfaces || []).join(', ');
10
- const heldoutPolicy = domainSpec.leakagePolicy?.proposerCanReadHeldout
11
- ? 'Held-out artifacts are visible.'
12
- : 'Do not read, infer, or hardcode held-out task IDs or held-out results.';
13
- return [
14
- `You are the Meta-Harness proposer for domain ${domainSpec.id}.`,
15
- `Iteration: ${iteration}`,
16
- '',
17
- 'Read these run-local artifacts before writing candidates:',
18
- '- run_manifest.json',
19
- '- frontier.json',
20
- '- experience.jsonl',
21
- '- reports/*.md when present',
22
- '- iterations/*/candidates/*/tasks/*/{result.json,diff.patch,git-status.txt,workspace-manifest.json}',
23
- '',
24
- `Allowed candidate surfaces: ${allowed}`,
25
- heldoutPolicy,
26
- '',
27
- 'Candidate module contract:',
28
- '- CommonJS module exporting { manifest, apply(baseHarness, context) }.',
29
- '- manifest.id must be safe and unique.',
30
- '- manifest.hypothesis must be falsifiable and trace-grounded.',
31
- '- manifest.surfaces must only use allowed candidate surfaces.',
32
- '- apply() must return a harness overlay; do not mutate production Wall-E source.',
33
- '',
34
- 'Write candidate files inside candidates/<candidate-id>/candidate.js.',
35
- 'Then write pending_eval.json at the run root with this exact shape:',
36
- '{ "candidates": [{ "id": "...", "candidatePath": "candidates/<id>/candidate.js", "hypothesis": "...", "surfaces": ["..."] }] }',
37
- '',
38
- 'Avoid parameter sweeps, task-name hardcoding, scorer changes, and no-op candidates.',
39
- `Current best: ${frontier.bestCandidate?.candidateId || 'none'} score=${frontier.bestCandidate?.aggregateScore ?? 'n/a'}`,
40
- ].join('\n');
41
- }
42
-
43
- async function runProposer({
44
- runAgentLoop,
45
- store,
46
- domainSpec,
47
- frontier = {},
48
- iteration,
49
- provider = null,
50
- model = null,
51
- timeoutMs = 600000,
52
- } = {}) {
53
- if (typeof runAgentLoop !== 'function') throw new Error('runAgentLoop is required');
54
- store.ensureRun();
55
- const prompt = buildProposerPrompt({ iteration, store, domainSpec, frontier });
56
- const result = await runAgentLoop(prompt, {
57
- cwd: store.runDir,
58
- provider,
59
- model,
60
- timeoutMs,
61
- maxTurns: 12,
62
- mode: 'build',
63
- benchmark: true,
64
- headless: true,
65
- headlessPolicy: 'allow',
66
- permissionTimeoutMs: 0,
67
- persistTranscript: true,
68
- });
69
- const proposals = readPendingEval(store);
70
- store.appendExperience({
71
- event: 'proposer_ran',
72
- iteration,
73
- success: result.success === true,
74
- proposalCount: proposals.length,
75
- sessionId: result.sessionId || null,
76
- error: result.error || result.stderr || null,
77
- });
78
- return { result, proposals, prompt };
79
- }
80
-
81
- function readPendingEval(store) {
82
- const pendingPath = path.join(store.runDir, 'pending_eval.json');
83
- if (!fs.existsSync(pendingPath)) return [];
84
- const pending = JSON.parse(fs.readFileSync(pendingPath, 'utf8'));
85
- const raw = Array.isArray(pending) ? pending : pending.candidates;
86
- if (!Array.isArray(raw)) throw new Error('pending_eval.json must contain a candidates array');
87
- return raw.map((entry) => normalizeProposal(entry, store));
88
- }
89
-
90
- function normalizeProposal(entry, store) {
91
- if (!entry || typeof entry !== 'object') throw new Error('pending candidate entry must be an object');
92
- if (!entry.candidatePath) throw new Error(`pending candidate ${entry.id || '<unknown>'} missing candidatePath`);
93
- const candidatePath = path.isAbsolute(entry.candidatePath)
94
- ? entry.candidatePath
95
- : path.resolve(store.runDir, entry.candidatePath);
96
- if (!candidatePath.startsWith(store.runDir + path.sep)) {
97
- throw new Error(`pending candidate path escapes run directory: ${entry.candidatePath}`);
98
- }
99
- return {
100
- ...entry,
101
- candidatePath,
102
- };
103
- }
104
-
105
- module.exports = {
106
- buildProposerPrompt,
107
- runProposer,
108
- readPendingEval,
109
- normalizeProposal,
110
- };
@@ -1,58 +0,0 @@
1
- 'use strict';
2
-
3
- const path = require('node:path');
4
-
5
- function writeIterationReport({ store, iteration, frontier, summaries = [] } = {}) {
6
- store.ensureRun();
7
- const lines = [
8
- `# Meta-Harness Iteration ${iteration}`,
9
- '',
10
- `Best candidate: ${frontier?.bestCandidate?.candidateId || 'none'}`,
11
- `Best score: ${frontier?.bestCandidate?.aggregateScore ?? 'n/a'}`,
12
- '',
13
- '## Candidate Summaries',
14
- '',
15
- ];
16
- for (const summary of summaries) {
17
- lines.push(`- ${summary.candidateId} [${summary.split}]: score=${formatScore(summary.aggregateScore)}, tasks=${summary.taskCount}, success=${summary.successCount}`);
18
- }
19
- if (summaries.length === 0) lines.push('- No candidates evaluated.');
20
- lines.push('', '## Next Proposer Checklist', '');
21
- lines.push('- Read this report, frontier.json, and experience.jsonl before proposing.');
22
- lines.push('- Inspect raw task artifacts for failures, especially diff.patch and result.json.');
23
- lines.push('- Propose only trace-grounded candidate mechanisms using allowed surfaces.');
24
- const rel = path.join('reports', `iteration-${String(iteration).padStart(3, '0')}.md`);
25
- store.writeText(rel, lines.join('\n') + '\n');
26
- return path.join(store.runDir, rel);
27
- }
28
-
29
- function writeRunSummary({ store, frontier, summaries = [] } = {}) {
30
- store.ensureRun();
31
- const lines = [
32
- '# Meta-Harness Run Summary',
33
- '',
34
- `Best candidate: ${frontier?.bestCandidate?.candidateId || 'none'}`,
35
- `Best score: ${frontier?.bestCandidate?.aggregateScore ?? 'n/a'}`,
36
- `Evaluated summaries: ${summaries.length}`,
37
- '',
38
- '## Frontier History',
39
- '',
40
- ];
41
- for (const item of frontier?.history || []) {
42
- lines.push(`- iter ${item.iteration} ${item.candidateId} [${item.split}]: score=${formatScore(item.aggregateScore)}`);
43
- }
44
- if (!frontier?.history?.length) lines.push('- No frontier entries.');
45
- store.writeText(path.join('reports', 'run-summary.md'), lines.join('\n') + '\n');
46
- return path.join(store.runDir, 'reports', 'run-summary.md');
47
- }
48
-
49
- function formatScore(value) {
50
- const n = Number(value);
51
- return Number.isFinite(n) ? n.toFixed(4) : 'n/a';
52
- }
53
-
54
- module.exports = {
55
- writeIterationReport,
56
- writeRunSummary,
57
- formatScore,
58
- };
@@ -1,27 +0,0 @@
1
- 'use strict';
2
-
3
- function recordMetaHarnessTelemetry(store, event = {}) {
4
- const row = {
5
- timestamp: typeof store?.now === 'function' ? store.now() : new Date().toISOString(),
6
- subsystem: 'meta-harness',
7
- ...event,
8
- };
9
- try {
10
- if (store?.ensureRun) store.ensureRun();
11
- if (store?.appendJsonl) store.appendJsonl('telemetry.jsonl', row);
12
- } catch (err) {
13
- try {
14
- console.warn(`[meta-harness telemetry] failed to write event ${event.type || event.event || 'unknown'}: ${err.message}`);
15
- } catch {}
16
- }
17
- return row;
18
- }
19
-
20
- function createTelemetryEmitter(store, defaults = {}) {
21
- return (event = {}) => recordMetaHarnessTelemetry(store, { ...defaults, ...event });
22
- }
23
-
24
- module.exports = {
25
- recordMetaHarnessTelemetry,
26
- createTelemetryEmitter,
27
- };
@@ -1,81 +0,0 @@
1
- 'use strict';
2
-
3
- const fs = require('node:fs');
4
- const path = require('node:path');
5
-
6
- const { SAFE_ID_RE } = require('./domain-spec');
7
-
8
- function loadCandidateModule(filePath) {
9
- const absolutePath = path.resolve(filePath);
10
- if (!fs.existsSync(absolutePath)) throw new Error(`candidate file not found: ${absolutePath}`);
11
- delete require.cache[require.resolve(absolutePath)];
12
- return require(absolutePath);
13
- }
14
-
15
- function validateCandidateModule(candidateModule, opts = {}) {
16
- const normalized = normalizeCandidateModule(candidateModule);
17
- if (!normalized || typeof normalized !== 'object') {
18
- throw new Error('candidate module must export an object');
19
- }
20
- validateCandidateManifest(normalized.manifest, opts);
21
- if (typeof normalized.apply !== 'function') {
22
- throw new Error(`candidate ${normalized.manifest.id} must export apply(baseHarness, context)`);
23
- }
24
- return normalized;
25
- }
26
-
27
- function validateCandidateManifest(manifest, opts = {}) {
28
- if (!manifest || typeof manifest !== 'object' || Array.isArray(manifest)) {
29
- throw new Error('candidate manifest must be an object');
30
- }
31
- requireString(manifest, 'id', 'candidate manifest id');
32
- if (!SAFE_ID_RE.test(manifest.id)) throw new Error(`candidate id is not safe: ${manifest.id}`);
33
- requireString(manifest, 'hypothesis', `candidate ${manifest.id} hypothesis`);
34
- const surfaces = requireArray(manifest, 'surfaces', `candidate ${manifest.id} surfaces`);
35
- if (surfaces.length === 0) throw new Error(`candidate ${manifest.id} surfaces must not be empty`);
36
- for (const surface of surfaces) {
37
- if (typeof surface !== 'string' || !SAFE_ID_RE.test(surface)) {
38
- throw new Error(`candidate ${manifest.id} has unsafe surface: ${surface}`);
39
- }
40
- }
41
- if (opts.domainSpec) {
42
- assertAllowedSurfaces(surfaces, opts.domainSpec.allowedCandidateSurfaces || [], manifest.id);
43
- }
44
- return true;
45
- }
46
-
47
- function assertAllowedSurfaces(surfaces, allowedSurfaces, candidateId = 'candidate') {
48
- const allowed = new Set(allowedSurfaces || []);
49
- const forbidden = surfaces.filter((surface) => !allowed.has(surface));
50
- if (forbidden.length > 0) {
51
- throw new Error(`${candidateId} uses forbidden candidate surface(s): ${forbidden.join(', ')}`);
52
- }
53
- return true;
54
- }
55
-
56
- function normalizeCandidateModule(candidateModule) {
57
- if (!candidateModule || typeof candidateModule !== 'object') return candidateModule;
58
- return candidateModule.default || candidateModule.candidate || candidateModule;
59
- }
60
-
61
- function requireString(obj, key, label = key) {
62
- const value = obj[key];
63
- if (typeof value !== 'string' || value.trim() === '') {
64
- throw new Error(`${label} must be a non-empty string`);
65
- }
66
- return value;
67
- }
68
-
69
- function requireArray(obj, key, label = key) {
70
- const value = obj[key];
71
- if (!Array.isArray(value)) throw new Error(`${label} must be an array`);
72
- return value;
73
- }
74
-
75
- module.exports = {
76
- loadCandidateModule,
77
- validateCandidateModule,
78
- validateCandidateManifest,
79
- assertAllowedSurfaces,
80
- normalizeCandidateModule,
81
- };
@@ -1,228 +0,0 @@
1
- 'use strict';
2
-
3
- /**
4
- * Promotion detector — checks if shadow models are ready for promotion.
5
- * Creates brain tasks and briefing items when models meet criteria.
6
- */
7
-
8
- const PROMOTION_CRITERIA = {
9
- minEvals: 50,
10
- minAvgScore: 0.75,
11
- minWinRate: 0.6,
12
- validTrends: ['improving', 'stable'],
13
- minDaysSinceAlert: 7,
14
- };
15
-
16
- /**
17
- * Format a promotion alert as markdown.
18
- */
19
- function formatPromotionAlert(candidate) {
20
- const winRate = candidate.total_evals > 0
21
- ? (candidate.win_count / candidate.total_evals * 100).toFixed(1)
22
- : '0.0';
23
- return [
24
- `## Model Promotion: ${candidate.model}`,
25
- '',
26
- `**Task type:** ${candidate.task_type}`,
27
- `**Evaluations:** ${candidate.total_evals}`,
28
- `**Average score:** ${(candidate.avg_score * 100).toFixed(1)}%`,
29
- `**Win rate:** ${winRate}%`,
30
- `**Strong wins:** ${candidate.strong_win_count}`,
31
- `**Avg latency:** ${Math.round(candidate.avg_latency_ms || 0)}ms`,
32
- `**Trend:** ${candidate.trend}`,
33
- `**Current rollout:** ${((candidate.rollout_pct || 0) * 100).toFixed(0)}%`,
34
- '',
35
- '### Recommendation',
36
- buildRecommendation(candidate),
37
- '',
38
- '### Actions',
39
- '- **Approve**: Set as default model for this task type, start gradual rollout',
40
- '- **Trial**: Start at 10% rollout for 7 days',
41
- '- **Reject**: Suppress alerts for 30 days',
42
- ].join('\n');
43
- }
44
-
45
- /**
46
- * Build a recommendation based on candidate stats.
47
- */
48
- function buildRecommendation(candidate) {
49
- const winRate = candidate.total_evals > 0
50
- ? candidate.win_count / candidate.total_evals
51
- : 0;
52
-
53
- if (candidate.avg_score >= 0.9 && winRate >= 0.8 && candidate.trend === 'improving') {
54
- return 'Strong candidate for full promotion. High quality and improving trend.';
55
- }
56
- if (candidate.avg_score >= 0.8 && winRate >= 0.7) {
57
- return 'Good candidate. Consider a 30% trial rollout to validate at scale.';
58
- }
59
- return 'Meets minimum criteria. Recommend starting with a 10% trial.';
60
- }
61
-
62
- /**
63
- * Check for models eligible for promotion.
64
- * @param {Object} brain - Brain module
65
- * @returns {{ candidates: Array, alertsCreated: number }}
66
- */
67
- function checkPromotions(brain) {
68
- const promotionCandidates = brain.getPromotionCandidates();
69
- let alertsCreated = 0;
70
-
71
- for (const candidate of promotionCandidates) {
72
- // Skip if already fully promoted
73
- if (candidate.rollout_pct >= 1.0) continue;
74
-
75
- // Skip if recently alerted
76
- if (candidate.last_promotion_alert) {
77
- const daysSince = (Date.now() - new Date(candidate.last_promotion_alert).getTime()) / 86400000;
78
- if (daysSince < PROMOTION_CRITERIA.minDaysSinceAlert) continue;
79
- }
80
-
81
- // Create brain task for user review
82
- try {
83
- brain.insertTask({
84
- title: `Model promotion: ${candidate.model} for ${candidate.task_type}`,
85
- description: formatPromotionAlert(candidate),
86
- priority: 'high',
87
- type: 'once',
88
- execution: 'manual',
89
- source: 'training-pipeline',
90
- });
91
- } catch (err) {
92
- console.error(`[promoter] Failed to create task for ${candidate.model}:`, err.message);
93
- continue;
94
- }
95
-
96
- // Add briefing item
97
- try {
98
- if (brain.insertBriefingItem) {
99
- brain.insertBriefingItem({
100
- title: `Shadow model ${candidate.model} ready for promotion (${candidate.task_type})`,
101
- category: 'training',
102
- urgency: 'medium',
103
- context: JSON.stringify({
104
- model: candidate.model,
105
- taskType: candidate.task_type,
106
- avgScore: candidate.avg_score,
107
- totalEvals: candidate.total_evals,
108
- trend: candidate.trend,
109
- }),
110
- });
111
- }
112
- } catch (err) {
113
- console.error(`[promoter] Failed to create briefing for ${candidate.model}:`, err.message);
114
- }
115
-
116
- // Update last_promotion_alert timestamp
117
- try {
118
- brain.upsertModelTaskScore(candidate.model, candidate.task_type, {
119
- provider: candidate.provider,
120
- totalEvals: candidate.total_evals,
121
- avgScore: candidate.avg_score,
122
- winCount: candidate.win_count,
123
- strongWinCount: candidate.strong_win_count,
124
- avgLatencyMs: candidate.avg_latency_ms,
125
- score7d: candidate.score_7d,
126
- score30d: candidate.score_30d,
127
- trend: candidate.trend,
128
- lastEvalAt: candidate.last_eval_at,
129
- });
130
- // Set last_promotion_alert directly
131
- brain.getDb().prepare('UPDATE model_task_scores SET last_promotion_alert = datetime("now") WHERE model = ? AND task_type = ?')
132
- .run(candidate.model, candidate.task_type);
133
- } catch {}
134
-
135
- alertsCreated++;
136
- }
137
-
138
- return { candidates: promotionCandidates, alertsCreated };
139
- }
140
-
141
- const DEMOTION_CRITERIA = {
142
- maxScore7d: 0.5,
143
- minEvals: 30,
144
- };
145
-
146
- /**
147
- * Check for models that should be demoted based on declining performance.
148
- * Models with 7-day rolling avg < 0.5 and 30+ evals get flagged.
149
- *
150
- * @param {Object} brain - Brain module
151
- * @returns {{ demotions: Array, alertsCreated: number }}
152
- */
153
- function checkDemotions(brain) {
154
- const db = brain.getDb();
155
- let alertsCreated = 0;
156
-
157
- // Find models with poor recent performance
158
- const demotionCandidates = db.prepare(`
159
- SELECT model, task_type, provider, total_evals, avg_score,
160
- score_7d, score_30d, trend, rollout_pct, last_eval_at
161
- FROM model_task_scores
162
- WHERE score_7d IS NOT NULL
163
- AND score_7d < ?
164
- AND total_evals >= ?
165
- `).all(DEMOTION_CRITERIA.maxScore7d, DEMOTION_CRITERIA.minEvals);
166
-
167
- for (const candidate of demotionCandidates) {
168
- const dropPct = candidate.score_30d
169
- ? Math.round((1 - candidate.score_7d / candidate.score_30d) * 100 * 10) / 10
170
- : 0;
171
-
172
- // Create regression alert with demotion context
173
- const alertId = `demotion::${candidate.model}::${candidate.task_type}::${new Date().toISOString().slice(0, 10)}`;
174
- try {
175
- db.prepare(`
176
- INSERT OR IGNORE INTO eval_regression_alerts
177
- (id, benchmark_id, model, provider, baseline_avg, current_score, drop_pct)
178
- VALUES (?, ?, ?, ?, ?, ?, ?)
179
- `).run(
180
- alertId,
181
- `demotion:${candidate.task_type}`,
182
- candidate.model,
183
- candidate.provider || 'unknown',
184
- candidate.score_30d || candidate.avg_score,
185
- candidate.score_7d,
186
- dropPct
187
- );
188
- } catch (err) {
189
- console.error(`[promoter] Failed to create demotion alert for ${candidate.model}:`, err.message);
190
- continue;
191
- }
192
-
193
- // Create briefing item
194
- try {
195
- if (brain.insertBriefingItem) {
196
- brain.insertBriefingItem({
197
- title: `Model demotion candidate: ${candidate.model} (${candidate.task_type})`,
198
- category: 'training',
199
- urgency: 'high',
200
- context: JSON.stringify({
201
- model: candidate.model,
202
- taskType: candidate.task_type,
203
- score7d: candidate.score_7d,
204
- score30d: candidate.score_30d,
205
- totalEvals: candidate.total_evals,
206
- trend: candidate.trend,
207
- dropPct,
208
- }),
209
- });
210
- }
211
- } catch (err) {
212
- console.error(`[promoter] Failed to create demotion briefing for ${candidate.model}:`, err.message);
213
- }
214
-
215
- alertsCreated++;
216
- }
217
-
218
- return { demotions: demotionCandidates, alertsCreated };
219
- }
220
-
221
- module.exports = {
222
- checkPromotions,
223
- checkDemotions,
224
- formatPromotionAlert,
225
- buildRecommendation,
226
- PROMOTION_CRITERIA,
227
- DEMOTION_CRITERIA,
228
- };
@@ -1,33 +0,0 @@
1
- 'use strict';
2
-
3
- function providerString(provider) {
4
- if (provider && typeof provider === 'object') {
5
- if (typeof provider.type === 'string') return provider.type.trim();
6
- if (typeof provider.provider === 'string') return provider.provider.trim();
7
- }
8
- if (typeof provider === 'string') return provider.trim();
9
- return '';
10
- }
11
-
12
- function inferProviderFromModel(model) {
13
- const value = typeof model === 'string' ? model.trim().toLowerCase() : '';
14
- if (!value) return null;
15
- if (value.startsWith('deepseek-') || value.startsWith('deepseek/')) return 'deepseek';
16
- if (value.startsWith('kimi-') || value.startsWith('moonshot-') || value.startsWith('moonshot/')) return 'moonshot';
17
- if (value.startsWith('claude-')) return 'anthropic';
18
- if (value.startsWith('gemini-') || value.startsWith('google/')) return 'google';
19
- if (/^(gpt-|chatgpt-|o[134](?:-|$))/.test(value)) return 'openai';
20
- return null;
21
- }
22
-
23
- function normalizeEvalProvider(provider, model, { fallback = 'default' } = {}) {
24
- const explicit = providerString(provider);
25
- if (explicit && explicit !== 'default') return explicit;
26
- return inferProviderFromModel(model) || explicit || fallback;
27
- }
28
-
29
- module.exports = {
30
- inferProviderFromModel,
31
- normalizeEvalProvider,
32
- providerString,
33
- };