discoclaw 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. package/.context/README.md +42 -0
  2. package/.context/architecture.md +58 -0
  3. package/.context/bot-setup.md +24 -0
  4. package/.context/dev.md +230 -0
  5. package/.context/discord.md +144 -0
  6. package/.context/memory.md +257 -0
  7. package/.context/ops.md +59 -0
  8. package/.context/pa-safety.md +47 -0
  9. package/.context/pa.md +118 -0
  10. package/.context/project.md +43 -0
  11. package/.context/runtime.md +253 -0
  12. package/.context/tasks.md +71 -0
  13. package/.context/tools.md +75 -0
  14. package/.env.example +88 -0
  15. package/.env.example.full +378 -0
  16. package/LICENSE +21 -0
  17. package/README.md +220 -0
  18. package/dist/beads/auto-tag.js +2 -0
  19. package/dist/beads/auto-tag.test.js +62 -0
  20. package/dist/beads/bd-cli.js +9 -0
  21. package/dist/beads/bd-cli.test.js +495 -0
  22. package/dist/beads/bead-hooks-cli.js +149 -0
  23. package/dist/beads/bead-sync-cli.js +5 -0
  24. package/dist/beads/bead-sync-cli.test.js +72 -0
  25. package/dist/beads/bead-sync-coordinator.js +4 -0
  26. package/dist/beads/bead-sync-coordinator.test.js +239 -0
  27. package/dist/beads/bead-sync-watcher.js +2 -0
  28. package/dist/beads/bead-sync-watcher.test.js +96 -0
  29. package/dist/beads/bead-sync.js +7 -0
  30. package/dist/beads/bead-sync.test.js +876 -0
  31. package/dist/beads/bead-thread-cache.js +8 -0
  32. package/dist/beads/bead-thread-cache.test.js +91 -0
  33. package/dist/beads/discord-sync.js +18 -0
  34. package/dist/beads/discord-sync.test.js +782 -0
  35. package/dist/beads/find-bead-by-thread.test.js +36 -0
  36. package/dist/beads/forum-guard.js +2 -0
  37. package/dist/beads/forum-guard.test.js +204 -0
  38. package/dist/beads/initialize.js +3 -0
  39. package/dist/beads/initialize.test.js +304 -0
  40. package/dist/beads/types.js +10 -0
  41. package/dist/cli/daemon-installer.js +225 -0
  42. package/dist/cli/daemon-installer.test.js +289 -0
  43. package/dist/cli/index.js +42 -0
  44. package/dist/cli/init-wizard.js +374 -0
  45. package/dist/cli/init-wizard.test.js +191 -0
  46. package/dist/config.js +385 -0
  47. package/dist/config.test.js +589 -0
  48. package/dist/cron/auto-tag.js +100 -0
  49. package/dist/cron/auto-tag.test.js +91 -0
  50. package/dist/cron/cadence.js +74 -0
  51. package/dist/cron/cadence.test.js +53 -0
  52. package/dist/cron/cron-sync-coordinator.js +66 -0
  53. package/dist/cron/cron-sync-coordinator.test.js +118 -0
  54. package/dist/cron/cron-sync.js +165 -0
  55. package/dist/cron/cron-sync.test.js +228 -0
  56. package/dist/cron/cron-tag-map-watcher.js +128 -0
  57. package/dist/cron/cron-tag-map-watcher.test.js +155 -0
  58. package/dist/cron/default-timezone.js +23 -0
  59. package/dist/cron/default-timezone.test.js +30 -0
  60. package/dist/cron/discord-sync.js +205 -0
  61. package/dist/cron/discord-sync.test.js +353 -0
  62. package/dist/cron/executor.js +303 -0
  63. package/dist/cron/executor.test.js +614 -0
  64. package/dist/cron/forum-sync.js +347 -0
  65. package/dist/cron/forum-sync.test.js +539 -0
  66. package/dist/cron/job-lock.js +164 -0
  67. package/dist/cron/job-lock.test.js +178 -0
  68. package/dist/cron/parser.js +68 -0
  69. package/dist/cron/parser.test.js +115 -0
  70. package/dist/cron/run-control.js +24 -0
  71. package/dist/cron/run-control.test.js +27 -0
  72. package/dist/cron/run-stats.js +265 -0
  73. package/dist/cron/run-stats.test.js +160 -0
  74. package/dist/cron/scheduler.js +97 -0
  75. package/dist/cron/scheduler.test.js +112 -0
  76. package/dist/cron/tag-map.js +47 -0
  77. package/dist/cron/tag-map.test.js +64 -0
  78. package/dist/cron/types.js +1 -0
  79. package/dist/discoclaw-plan-format.test.js +137 -0
  80. package/dist/discoclaw-recipe-format.test.js +137 -0
  81. package/dist/discord/abort-registry.js +70 -0
  82. package/dist/discord/action-categories.js +36 -0
  83. package/dist/discord/action-types.js +1 -0
  84. package/dist/discord/action-utils.js +58 -0
  85. package/dist/discord/action-utils.test.js +58 -0
  86. package/dist/discord/actions-beads.js +1 -0
  87. package/dist/discord/actions-beads.test.js +372 -0
  88. package/dist/discord/actions-bot-profile.js +107 -0
  89. package/dist/discord/actions-bot-profile.test.js +138 -0
  90. package/dist/discord/actions-channels.js +427 -0
  91. package/dist/discord/actions-channels.test.js +697 -0
  92. package/dist/discord/actions-config.js +173 -0
  93. package/dist/discord/actions-config.test.js +322 -0
  94. package/dist/discord/actions-crons.js +586 -0
  95. package/dist/discord/actions-crons.test.js +499 -0
  96. package/dist/discord/actions-defer.js +60 -0
  97. package/dist/discord/actions-defer.test.js +134 -0
  98. package/dist/discord/actions-forge.js +134 -0
  99. package/dist/discord/actions-forge.test.js +206 -0
  100. package/dist/discord/actions-guild.js +301 -0
  101. package/dist/discord/actions-guild.test.js +386 -0
  102. package/dist/discord/actions-memory.js +106 -0
  103. package/dist/discord/actions-memory.test.js +248 -0
  104. package/dist/discord/actions-messaging.js +401 -0
  105. package/dist/discord/actions-messaging.test.js +738 -0
  106. package/dist/discord/actions-moderation.js +65 -0
  107. package/dist/discord/actions-moderation.test.js +88 -0
  108. package/dist/discord/actions-plan.js +445 -0
  109. package/dist/discord/actions-plan.test.js +610 -0
  110. package/dist/discord/actions-poll.js +38 -0
  111. package/dist/discord/actions-poll.test.js +93 -0
  112. package/dist/discord/actions-tasks.js +3 -0
  113. package/dist/discord/actions-tasks.test.js +418 -0
  114. package/dist/discord/actions.js +600 -0
  115. package/dist/discord/actions.test.js +522 -0
  116. package/dist/discord/allowed-mentions.js +3 -0
  117. package/dist/discord/allowed-mentions.test.js +17 -0
  118. package/dist/discord/allowlist.js +29 -0
  119. package/dist/discord/allowlist.test.js +24 -0
  120. package/dist/discord/audit-handler.js +191 -0
  121. package/dist/discord/audit-handler.test.js +361 -0
  122. package/dist/discord/bot.js +141 -0
  123. package/dist/discord/channel-context.js +181 -0
  124. package/dist/discord/defer-scheduler.js +45 -0
  125. package/dist/discord/destructive-confirmation.js +128 -0
  126. package/dist/discord/destructive-confirmation.test.js +49 -0
  127. package/dist/discord/discord-plan-auto-implement.test.js +18 -0
  128. package/dist/discord/durable-memory.js +145 -0
  129. package/dist/discord/durable-memory.test.js +281 -0
  130. package/dist/discord/durable-write-queue.js +4 -0
  131. package/dist/discord/file-download.js +308 -0
  132. package/dist/discord/file-download.test.js +303 -0
  133. package/dist/discord/forge-audit-verdict.js +140 -0
  134. package/dist/discord/forge-auto-implement.js +80 -0
  135. package/dist/discord/forge-auto-implement.test.js +110 -0
  136. package/dist/discord/forge-commands.js +698 -0
  137. package/dist/discord/forge-commands.test.js +1606 -0
  138. package/dist/discord/forge-plan-registry.js +68 -0
  139. package/dist/discord/forge-plan-registry.test.js +127 -0
  140. package/dist/discord/forum-count-sync.js +130 -0
  141. package/dist/discord/forum-count-sync.test.js +200 -0
  142. package/dist/discord/health-command.js +98 -0
  143. package/dist/discord/health-command.test.js +195 -0
  144. package/dist/discord/help-command.js +22 -0
  145. package/dist/discord/help-command.test.js +49 -0
  146. package/dist/discord/image-download.js +201 -0
  147. package/dist/discord/image-download.test.js +499 -0
  148. package/dist/discord/inflight-replies.js +228 -0
  149. package/dist/discord/inflight-replies.test.js +295 -0
  150. package/dist/discord/json-extract.js +110 -0
  151. package/dist/discord/keyed-queue.js +22 -0
  152. package/dist/discord/memory-commands.js +85 -0
  153. package/dist/discord/memory-commands.test.js +159 -0
  154. package/dist/discord/memory-timing.integration.test.js +159 -0
  155. package/dist/discord/message-coordinator.js +2347 -0
  156. package/dist/discord/message-coordinator.onboarding.test.js +183 -0
  157. package/dist/discord/message-coordinator.plan-run.test.js +264 -0
  158. package/dist/discord/message-history.js +53 -0
  159. package/dist/discord/message-history.test.js +95 -0
  160. package/dist/discord/models-command.js +59 -0
  161. package/dist/discord/models-command.test.js +150 -0
  162. package/dist/discord/nickname.test.js +76 -0
  163. package/dist/discord/onboarding-completion.js +55 -0
  164. package/dist/discord/onboarding-completion.test.js +176 -0
  165. package/dist/discord/output-common.js +178 -0
  166. package/dist/discord/output-common.test.js +198 -0
  167. package/dist/discord/output-utils.js +156 -0
  168. package/dist/discord/parse-identity-name.test.js +129 -0
  169. package/dist/discord/plan-commands.js +612 -0
  170. package/dist/discord/plan-commands.test.js +1622 -0
  171. package/dist/discord/plan-manager.js +1491 -0
  172. package/dist/discord/plan-manager.test.js +2380 -0
  173. package/dist/discord/plan-parser.js +110 -0
  174. package/dist/discord/plan-parser.test.js +63 -0
  175. package/dist/discord/plan-run-phase-start.js +20 -0
  176. package/dist/discord/plan-run-phase-start.test.js +29 -0
  177. package/dist/discord/platform-message.js +45 -0
  178. package/dist/discord/platform-message.test.js +110 -0
  179. package/dist/discord/prompt-common.js +240 -0
  180. package/dist/discord/prompt-common.test.js +423 -0
  181. package/dist/discord/reaction-handler.js +691 -0
  182. package/dist/discord/reaction-handler.test.js +1574 -0
  183. package/dist/discord/reaction-prompts.js +118 -0
  184. package/dist/discord/reaction-prompts.test.js +253 -0
  185. package/dist/discord/reply-reference.js +66 -0
  186. package/dist/discord/reply-reference.test.js +125 -0
  187. package/dist/discord/restart-command.js +143 -0
  188. package/dist/discord/restart-command.test.js +196 -0
  189. package/dist/discord/runtime-utils.js +43 -0
  190. package/dist/discord/runtime-utils.test.js +112 -0
  191. package/dist/discord/session-key.js +7 -0
  192. package/dist/discord/session-key.test.js +13 -0
  193. package/dist/discord/shortterm-memory.js +166 -0
  194. package/dist/discord/shortterm-memory.test.js +345 -0
  195. package/dist/discord/shutdown-context.js +122 -0
  196. package/dist/discord/shutdown-context.test.js +279 -0
  197. package/dist/discord/startup-profile.test.js +214 -0
  198. package/dist/discord/status-channel.js +190 -0
  199. package/dist/discord/status-channel.test.js +282 -0
  200. package/dist/discord/status-command.js +206 -0
  201. package/dist/discord/status-command.test.js +341 -0
  202. package/dist/discord/streaming-progress.js +107 -0
  203. package/dist/discord/streaming-progress.test.js +93 -0
  204. package/dist/discord/summarizer.js +89 -0
  205. package/dist/discord/summarizer.test.js +245 -0
  206. package/dist/discord/system-bootstrap.js +396 -0
  207. package/dist/discord/system-bootstrap.test.js +724 -0
  208. package/dist/discord/thread-context.js +169 -0
  209. package/dist/discord/thread-context.test.js +386 -0
  210. package/dist/discord/tool-aware-queue.js +116 -0
  211. package/dist/discord/tool-aware-queue.test.js +180 -0
  212. package/dist/discord/update-command.js +127 -0
  213. package/dist/discord/update-command.test.js +275 -0
  214. package/dist/discord/user-errors.js +40 -0
  215. package/dist/discord/user-errors.test.js +31 -0
  216. package/dist/discord/user-turn-to-durable.js +111 -0
  217. package/dist/discord/user-turn-to-durable.test.js +273 -0
  218. package/dist/discord-followup.test.js +677 -0
  219. package/dist/discord.channel-context.test.js +95 -0
  220. package/dist/discord.fail-closed.test.js +199 -0
  221. package/dist/discord.health-command.integration.test.js +140 -0
  222. package/dist/discord.js +190 -0
  223. package/dist/discord.prompt-context.test.js +1431 -0
  224. package/dist/discord.render.test.js +621 -0
  225. package/dist/discord.status-wiring.test.js +187 -0
  226. package/dist/engine/claudeCli.js +137 -0
  227. package/dist/engine/types.js +1 -0
  228. package/dist/group-queue.js +25 -0
  229. package/dist/health/credential-check.js +175 -0
  230. package/dist/health/credential-check.test.js +401 -0
  231. package/dist/health/startup-healing.js +139 -0
  232. package/dist/health/startup-healing.test.js +298 -0
  233. package/dist/identity.js +36 -0
  234. package/dist/index.js +1378 -0
  235. package/dist/logging/logger-like.js +1 -0
  236. package/dist/observability/memory-sampler.js +51 -0
  237. package/dist/observability/memory-sampler.test.js +93 -0
  238. package/dist/observability/metrics.js +88 -0
  239. package/dist/observability/metrics.test.js +42 -0
  240. package/dist/onboarding/onboarding-flow.js +246 -0
  241. package/dist/onboarding/onboarding-flow.test.js +238 -0
  242. package/dist/onboarding/onboarding-writer.js +102 -0
  243. package/dist/onboarding/onboarding-writer.test.js +143 -0
  244. package/dist/pidlock.js +187 -0
  245. package/dist/pidlock.test.js +128 -0
  246. package/dist/pipeline/engine.js +206 -0
  247. package/dist/pipeline/engine.test.js +771 -0
  248. package/dist/root-policy.js +21 -0
  249. package/dist/root-policy.test.js +55 -0
  250. package/dist/runtime/claude-code-cli.js +35 -0
  251. package/dist/runtime/claude-code-cli.test.js +1199 -0
  252. package/dist/runtime/cli-adapter.js +584 -0
  253. package/dist/runtime/cli-output-parsers.js +108 -0
  254. package/dist/runtime/cli-shared.js +96 -0
  255. package/dist/runtime/cli-shared.test.js +104 -0
  256. package/dist/runtime/cli-strategy.js +6 -0
  257. package/dist/runtime/codex-cli.js +16 -0
  258. package/dist/runtime/codex-cli.test.js +862 -0
  259. package/dist/runtime/concurrency-limit.js +80 -0
  260. package/dist/runtime/concurrency-limit.test.js +137 -0
  261. package/dist/runtime/gemini-cli.js +16 -0
  262. package/dist/runtime/gemini-cli.test.js +413 -0
  263. package/dist/runtime/long-running-process.js +415 -0
  264. package/dist/runtime/long-running-process.test.js +318 -0
  265. package/dist/runtime/model-smoke-helpers.js +160 -0
  266. package/dist/runtime/model-smoke.test.js +194 -0
  267. package/dist/runtime/model-tiers.js +33 -0
  268. package/dist/runtime/model-tiers.test.js +65 -0
  269. package/dist/runtime/openai-auth.js +151 -0
  270. package/dist/runtime/openai-auth.test.js +361 -0
  271. package/dist/runtime/openai-compat.js +178 -0
  272. package/dist/runtime/openai-compat.test.js +449 -0
  273. package/dist/runtime/process-pool.js +93 -0
  274. package/dist/runtime/process-pool.test.js +148 -0
  275. package/dist/runtime/registry.js +15 -0
  276. package/dist/runtime/registry.test.js +47 -0
  277. package/dist/runtime/session-scanner.js +186 -0
  278. package/dist/runtime/session-scanner.test.js +257 -0
  279. package/dist/runtime/strategies/claude-strategy.js +193 -0
  280. package/dist/runtime/strategies/codex-strategy.js +161 -0
  281. package/dist/runtime/strategies/gemini-strategy.js +64 -0
  282. package/dist/runtime/strategies/template-strategy.js +85 -0
  283. package/dist/runtime/tool-capabilities.js +27 -0
  284. package/dist/runtime/tool-capabilities.test.js +24 -0
  285. package/dist/runtime/tool-labels.js +48 -0
  286. package/dist/runtime/types.js +2 -0
  287. package/dist/sessionManager.js +47 -0
  288. package/dist/sessions.js +18 -0
  289. package/dist/tasks/architecture-contract.js +33 -0
  290. package/dist/tasks/architecture-contract.test.js +90 -0
  291. package/dist/tasks/auto-tag.js +50 -0
  292. package/dist/tasks/auto-tag.test.js +64 -0
  293. package/dist/tasks/bd-cli.js +164 -0
  294. package/dist/tasks/bd-cli.test.js +359 -0
  295. package/dist/tasks/bead-sync.js +1 -0
  296. package/dist/tasks/context-summary.js +27 -0
  297. package/dist/tasks/discord-sync.js +3 -0
  298. package/dist/tasks/discord-sync.test.js +685 -0
  299. package/dist/tasks/discord-types.js +4 -0
  300. package/dist/tasks/find-task-by-thread.test.js +36 -0
  301. package/dist/tasks/forum-guard.js +81 -0
  302. package/dist/tasks/forum-guard.test.js +192 -0
  303. package/dist/tasks/initialize.js +77 -0
  304. package/dist/tasks/initialize.test.js +263 -0
  305. package/dist/tasks/logger-types.js +1 -0
  306. package/dist/tasks/metrics-types.js +3 -0
  307. package/dist/tasks/migrate.js +33 -0
  308. package/dist/tasks/migrate.test.js +156 -0
  309. package/dist/tasks/path-defaults.js +67 -0
  310. package/dist/tasks/path-defaults.test.js +73 -0
  311. package/dist/tasks/runtime-types.js +1 -0
  312. package/dist/tasks/service.js +33 -0
  313. package/dist/tasks/service.test.js +51 -0
  314. package/dist/tasks/store.js +238 -0
  315. package/dist/tasks/store.test.js +417 -0
  316. package/dist/tasks/sync-context.js +1 -0
  317. package/dist/tasks/sync-contract.js +24 -0
  318. package/dist/tasks/sync-contract.test.js +25 -0
  319. package/dist/tasks/sync-coordinator-metrics.js +41 -0
  320. package/dist/tasks/sync-coordinator-retries.js +71 -0
  321. package/dist/tasks/sync-coordinator.js +96 -0
  322. package/dist/tasks/sync-coordinator.test.js +501 -0
  323. package/dist/tasks/sync-types.js +1 -0
  324. package/dist/tasks/sync-watcher.js +27 -0
  325. package/dist/tasks/sync-watcher.test.js +92 -0
  326. package/dist/tasks/tag-map.js +36 -0
  327. package/dist/tasks/tag-map.test.js +54 -0
  328. package/dist/tasks/task-action-contract.js +16 -0
  329. package/dist/tasks/task-action-contract.test.js +16 -0
  330. package/dist/tasks/task-action-executor.js +18 -0
  331. package/dist/tasks/task-action-executor.test.js +420 -0
  332. package/dist/tasks/task-action-mutation-helpers.js +17 -0
  333. package/dist/tasks/task-action-mutations.js +151 -0
  334. package/dist/tasks/task-action-prompt.js +62 -0
  335. package/dist/tasks/task-action-read-ops.js +73 -0
  336. package/dist/tasks/task-action-runner-types.js +1 -0
  337. package/dist/tasks/task-action-thread-sync.js +82 -0
  338. package/dist/tasks/task-actions.js +3 -0
  339. package/dist/tasks/task-cli.js +227 -0
  340. package/dist/tasks/task-context.js +1 -0
  341. package/dist/tasks/task-lifecycle.js +46 -0
  342. package/dist/tasks/task-lifecycle.test.js +35 -0
  343. package/dist/tasks/task-sync-apply-plan.js +95 -0
  344. package/dist/tasks/task-sync-apply-types.js +12 -0
  345. package/dist/tasks/task-sync-apply.js +319 -0
  346. package/dist/tasks/task-sync-cli.js +89 -0
  347. package/dist/tasks/task-sync-cli.test.js +70 -0
  348. package/dist/tasks/task-sync-engine.js +88 -0
  349. package/dist/tasks/task-sync-engine.test.js +934 -0
  350. package/dist/tasks/task-sync-phase-apply.js +171 -0
  351. package/dist/tasks/task-sync-pipeline.js +2 -0
  352. package/dist/tasks/task-sync-pipeline.test.js +265 -0
  353. package/dist/tasks/task-sync-reconcile-plan.js +182 -0
  354. package/dist/tasks/task-sync-reconcile.js +144 -0
  355. package/dist/tasks/task-sync.js +56 -0
  356. package/dist/tasks/task-sync.test.js +86 -0
  357. package/dist/tasks/thread-cache.js +42 -0
  358. package/dist/tasks/thread-cache.test.js +89 -0
  359. package/dist/tasks/thread-contracts.test.js +711 -0
  360. package/dist/tasks/thread-forum-ops.js +68 -0
  361. package/dist/tasks/thread-helpers.js +86 -0
  362. package/dist/tasks/thread-helpers.test.js +33 -0
  363. package/dist/tasks/thread-lifecycle-ops.js +144 -0
  364. package/dist/tasks/thread-ops-shared.js +21 -0
  365. package/dist/tasks/thread-ops.js +2 -0
  366. package/dist/tasks/types.js +20 -0
  367. package/dist/tasks/types.test.js +60 -0
  368. package/dist/test-setup.js +11 -0
  369. package/dist/test-setup.test.js +42 -0
  370. package/dist/transport/types.js +1 -0
  371. package/dist/validate.js +41 -0
  372. package/dist/validate.test.js +94 -0
  373. package/dist/version.js +15 -0
  374. package/dist/version.test.js +31 -0
  375. package/dist/webhook/server.js +199 -0
  376. package/dist/webhook/server.test.js +460 -0
  377. package/dist/workspace-bootstrap.js +135 -0
  378. package/dist/workspace-bootstrap.test.js +514 -0
  379. package/dist/workspace-permissions.js +134 -0
  380. package/dist/workspace-permissions.test.js +181 -0
  381. package/package.json +74 -0
  382. package/scripts/cron/cron-tag-map.json +9 -0
  383. package/scripts/tasks/tag-map.json +10 -0
  384. package/systemd/discoclaw.service +19 -0
  385. package/templates/recipes/integration.discoclaw-recipe.md +171 -0
  386. package/templates/workspace/AGENTS.md +217 -0
  387. package/templates/workspace/BOOTSTRAP.md +1 -0
  388. package/templates/workspace/HEARTBEAT.md +10 -0
  389. package/templates/workspace/IDENTITY.md +16 -0
  390. package/templates/workspace/MEMORY.md +24 -0
  391. package/templates/workspace/SOUL.md +52 -0
  392. package/templates/workspace/TOOLS.md +304 -0
  393. package/templates/workspace/USER.md +37 -0
@@ -0,0 +1,318 @@
1
+ import { beforeEach, describe, expect, it, vi, afterEach } from 'vitest';
2
+ import { EventEmitter } from 'node:events';
3
+ vi.mock('execa', () => ({
4
+ execa: vi.fn(),
5
+ }));
6
+ import { execa } from 'execa';
7
+ import { LongRunningProcess } from './long-running-process.js';
8
+ function createMockSubprocess() {
9
+ const stdout = new EventEmitter();
10
+ const stderr = new EventEmitter();
11
+ const stdin = { write: vi.fn(), end: vi.fn() };
12
+ let resolvePromise;
13
+ let rejectPromise;
14
+ const promise = new Promise((res, rej) => {
15
+ resolvePromise = res;
16
+ rejectPromise = rej;
17
+ });
18
+ const proc = Object.assign(promise, {
19
+ stdout,
20
+ stderr,
21
+ stdin,
22
+ kill: vi.fn(),
23
+ pid: 12345,
24
+ });
25
+ return { proc, stdout, stderr, stdin, resolve: resolvePromise, reject: rejectPromise };
26
+ }
27
+ const baseOpts = {
28
+ claudeBin: 'claude',
29
+ model: 'opus',
30
+ cwd: '/tmp',
31
+ dangerouslySkipPermissions: true,
32
+ hangTimeoutMs: 5000,
33
+ idleTimeoutMs: 10000,
34
+ };
35
+ beforeEach(() => {
36
+ vi.useFakeTimers();
37
+ execa.mockReset?.();
38
+ });
39
+ afterEach(() => {
40
+ vi.useRealTimers();
41
+ });
42
+ describe('LongRunningProcess', () => {
43
+ it('spawns with correct args', () => {
44
+ const mock = createMockSubprocess();
45
+ execa.mockReturnValue(mock.proc);
46
+ const proc = new LongRunningProcess({ ...baseOpts, tools: ['Read', 'Bash'], addDirs: ['/workspace'] });
47
+ const ok = proc.spawn();
48
+ expect(ok).toBe(true);
49
+ expect(proc.state).toBe('idle');
50
+ expect(proc.isAlive).toBe(true);
51
+ const callArgs = execa.mock.calls[0]?.[1] ?? [];
52
+ expect(callArgs).toContain('--input-format');
53
+ expect(callArgs).toContain('stream-json');
54
+ expect(callArgs).toContain('--output-format');
55
+ expect(callArgs).toContain('--include-partial-messages');
56
+ expect(callArgs).toContain('--model');
57
+ expect(callArgs).toContain('opus');
58
+ expect(callArgs).toContain('--dangerously-skip-permissions');
59
+ expect(callArgs).toContain('--tools');
60
+ expect(callArgs).toContain('Read,Bash');
61
+ expect(callArgs).toContain('--add-dir');
62
+ expect(callArgs).toContain('/workspace');
63
+ // -p is required for --input-format stream-json
64
+ expect(callArgs).toContain('-p');
65
+ });
66
+ it('first turn yields text_delta and text_final from stream', async () => {
67
+ const mock = createMockSubprocess();
68
+ execa.mockReturnValue(mock.proc);
69
+ const proc = new LongRunningProcess(baseOpts);
70
+ proc.spawn();
71
+ const events = [];
72
+ const gen = proc.sendTurn('Hello');
73
+ // Simulate stdout data arriving
74
+ const resultLine = JSON.stringify({ type: 'message_delta', text: 'Hi there' });
75
+ const finalLine = JSON.stringify({ type: 'result', result: 'Hi there' });
76
+ // Process events in microtasks
77
+ queueMicrotask(() => {
78
+ mock.stdout.emit('data', resultLine + '\n' + finalLine + '\n');
79
+ });
80
+ for await (const evt of gen) {
81
+ events.push(evt);
82
+ }
83
+ expect(events.find((e) => e.type === 'text_delta')?.text).toBe('Hi there');
84
+ expect(events.find((e) => e.type === 'text_final')?.text).toBe('Hi there');
85
+ expect(events.find((e) => e.type === 'done')).toBeTruthy();
86
+ expect(proc.state).toBe('idle');
87
+ // Verify stdin was written with correct NDJSON (API-shaped message)
88
+ const written = mock.stdin.write.mock.calls[0]?.[0];
89
+ const parsed = JSON.parse(written.trim());
90
+ expect(parsed.type).toBe('user');
91
+ expect(parsed.message.role).toBe('user');
92
+ expect(parsed.message.content).toBe('Hello');
93
+ });
94
+ it('second turn reuses the same process', async () => {
95
+ const mock = createMockSubprocess();
96
+ execa.mockReturnValue(mock.proc);
97
+ const proc = new LongRunningProcess(baseOpts);
98
+ proc.spawn();
99
+ // First turn
100
+ queueMicrotask(() => {
101
+ mock.stdout.emit('data', JSON.stringify({ type: 'result', result: 'response1' }) + '\n');
102
+ });
103
+ const events1 = [];
104
+ for await (const evt of proc.sendTurn('turn1')) {
105
+ events1.push(evt);
106
+ }
107
+ expect(events1.find((e) => e.type === 'text_final')?.text).toBe('response1');
108
+ expect(proc.state).toBe('idle');
109
+ // Second turn (should reuse, not respawn)
110
+ queueMicrotask(() => {
111
+ mock.stdout.emit('data', JSON.stringify({ type: 'result', result: 'response2' }) + '\n');
112
+ });
113
+ const events2 = [];
114
+ for await (const evt of proc.sendTurn('turn2')) {
115
+ events2.push(evt);
116
+ }
117
+ expect(events2.find((e) => e.type === 'text_final')?.text).toBe('response2');
118
+ expect(proc.state).toBe('idle');
119
+ // execa should have been called only once
120
+ expect(execa.mock.calls).toHaveLength(1);
121
+ });
122
+ it('hang timeout triggers error and kills process', async () => {
123
+ const mock = createMockSubprocess();
124
+ execa.mockReturnValue(mock.proc);
125
+ const proc = new LongRunningProcess({ ...baseOpts, hangTimeoutMs: 1000 });
126
+ proc.spawn();
127
+ const events = [];
128
+ const genPromise = (async () => {
129
+ for await (const evt of proc.sendTurn('Hello')) {
130
+ events.push(evt);
131
+ }
132
+ })();
133
+ // No stdout data arrives — advance past hang timeout
134
+ await vi.advanceTimersByTimeAsync(1100);
135
+ await genPromise;
136
+ expect(events.find((e) => e.type === 'error')?.message).toContain('hang detected');
137
+ expect(events.find((e) => e.type === 'done')).toBeTruthy();
138
+ expect(proc.state).toBe('dead');
139
+ });
140
+ it('idle timeout kills idle process', async () => {
141
+ const mock = createMockSubprocess();
142
+ execa.mockReturnValue(mock.proc);
143
+ const proc = new LongRunningProcess({ ...baseOpts, idleTimeoutMs: 2000 });
144
+ proc.spawn();
145
+ expect(proc.state).toBe('idle');
146
+ // Advance past idle timeout
147
+ await vi.advanceTimersByTimeAsync(2100);
148
+ expect(proc.state).toBe('dead');
149
+ });
150
+ it('process crash during turn emits error + done', async () => {
151
+ const mock = createMockSubprocess();
152
+ execa.mockReturnValue(mock.proc);
153
+ const proc = new LongRunningProcess(baseOpts);
154
+ proc.spawn();
155
+ const events = [];
156
+ const genPromise = (async () => {
157
+ for await (const evt of proc.sendTurn('Hello')) {
158
+ events.push(evt);
159
+ }
160
+ })();
161
+ // Simulate process exit while busy
162
+ queueMicrotask(() => {
163
+ mock.resolve({ exitCode: 1 });
164
+ });
165
+ await genPromise;
166
+ expect(events.find((e) => e.type === 'error')?.message).toContain('process exited unexpectedly');
167
+ expect(events.find((e) => e.type === 'done')).toBeTruthy();
168
+ expect(proc.state).toBe('dead');
169
+ });
170
+ it('kill() while busy unblocks the consumer (emits done)', async () => {
171
+ const mock = createMockSubprocess();
172
+ execa.mockReturnValue(mock.proc);
173
+ const proc = new LongRunningProcess(baseOpts);
174
+ proc.spawn();
175
+ const events = [];
176
+ const genPromise = (async () => {
177
+ for await (const evt of proc.sendTurn('Hello')) {
178
+ events.push(evt);
179
+ }
180
+ })();
181
+ queueMicrotask(() => {
182
+ proc.kill();
183
+ });
184
+ await genPromise;
185
+ expect(events.find((e) => e.type === 'done')).toBeTruthy();
186
+ expect(proc.state).toBe('dead');
187
+ expect(events.find((e) => e.type === 'error')?.message).toBe('multi-turn: terminated');
188
+ });
189
+ it('forceKill() while busy unblocks the consumer (emits done)', async () => {
190
+ const mock = createMockSubprocess();
191
+ execa.mockReturnValue(mock.proc);
192
+ const proc = new LongRunningProcess(baseOpts);
193
+ proc.spawn();
194
+ const events = [];
195
+ const genPromise = (async () => {
196
+ for await (const evt of proc.sendTurn('Hello')) {
197
+ events.push(evt);
198
+ }
199
+ })();
200
+ queueMicrotask(() => {
201
+ proc.forceKill();
202
+ });
203
+ await genPromise;
204
+ expect(events.find((e) => e.type === 'done')).toBeTruthy();
205
+ expect(proc.state).toBe('dead');
206
+ expect(events.find((e) => e.type === 'error')?.message).toBe('multi-turn: terminated');
207
+ });
208
+ it('sendTurn on non-idle process yields error', async () => {
209
+ const mock = createMockSubprocess();
210
+ execa.mockReturnValue(mock.proc);
211
+ const proc = new LongRunningProcess(baseOpts);
212
+ proc.spawn();
213
+ proc.kill();
214
+ const events = [];
215
+ for await (const evt of proc.sendTurn('Hello')) {
216
+ events.push(evt);
217
+ }
218
+ expect(events.find((e) => e.type === 'error')?.message).toContain('cannot send turn');
219
+ expect(events.find((e) => e.type === 'done')).toBeTruthy();
220
+ });
221
+ it('kill() transitions to dead', () => {
222
+ const mock = createMockSubprocess();
223
+ execa.mockReturnValue(mock.proc);
224
+ const proc = new LongRunningProcess(baseOpts);
225
+ proc.spawn();
226
+ expect(proc.isAlive).toBe(true);
227
+ proc.kill();
228
+ expect(proc.state).toBe('dead');
229
+ expect(proc.isAlive).toBe(false);
230
+ });
231
+ it('strips tool use blocks from final text', async () => {
232
+ const mock = createMockSubprocess();
233
+ execa.mockReturnValue(mock.proc);
234
+ const proc = new LongRunningProcess(baseOpts);
235
+ proc.spawn();
236
+ queueMicrotask(() => {
237
+ mock.stdout.emit('data', JSON.stringify({ type: 'message_delta', text: 'thinking...' }) + '\n' +
238
+ JSON.stringify({ type: 'message_delta', text: '<tool_use>read</tool_use>' }) + '\n' +
239
+ JSON.stringify({ type: 'message_delta', text: 'The answer is 42.' }) + '\n' +
240
+ JSON.stringify({ type: 'result', result: 'The answer is 42.' }) + '\n');
241
+ });
242
+ const events = [];
243
+ for await (const evt of proc.sendTurn('test')) {
244
+ events.push(evt);
245
+ }
246
+ expect(events.find((e) => e.type === 'text_final')?.text).toBe('The answer is 42.');
247
+ });
248
+ it('sendTurn with images writes content-block array to stdin', async () => {
249
+ const mock = createMockSubprocess();
250
+ execa.mockReturnValue(mock.proc);
251
+ const proc = new LongRunningProcess(baseOpts);
252
+ proc.spawn();
253
+ queueMicrotask(() => {
254
+ mock.stdout.emit('data', JSON.stringify({ type: 'result', result: 'I see an image' }) + '\n');
255
+ });
256
+ const images = [{ base64: 'iVBORw0KGgo=', mediaType: 'image/png' }];
257
+ const events = [];
258
+ for await (const evt of proc.sendTurn('Describe this', images)) {
259
+ events.push(evt);
260
+ }
261
+ expect(events.find((e) => e.type === 'text_final')?.text).toBe('I see an image');
262
+ // Verify stdin was written with content-block array
263
+ const written = mock.stdin.write.mock.calls[0]?.[0];
264
+ const parsed = JSON.parse(written.trim());
265
+ expect(parsed.type).toBe('user');
266
+ expect(parsed.message.role).toBe('user');
267
+ expect(Array.isArray(parsed.message.content)).toBe(true);
268
+ expect(parsed.message.content[0]).toEqual({ type: 'text', text: 'Describe this' });
269
+ expect(parsed.message.content[1].type).toBe('image');
270
+ expect(parsed.message.content[1].source.type).toBe('base64');
271
+ expect(parsed.message.content[1].source.media_type).toBe('image/png');
272
+ expect(parsed.message.content[1].source.data).toBe('iVBORw0KGgo=');
273
+ });
274
+ it('spawns with --fallback-model, --max-budget-usd, --append-system-prompt when set', () => {
275
+ const mock = createMockSubprocess();
276
+ execa.mockReturnValue(mock.proc);
277
+ const proc = new LongRunningProcess({
278
+ ...baseOpts,
279
+ fallbackModel: 'sonnet',
280
+ maxBudgetUsd: 7.5,
281
+ appendSystemPrompt: 'You are Weston.',
282
+ });
283
+ proc.spawn();
284
+ const callArgs = execa.mock.calls[0]?.[1] ?? [];
285
+ expect(callArgs).toContain('--fallback-model');
286
+ expect(callArgs[callArgs.indexOf('--fallback-model') + 1]).toBe('sonnet');
287
+ expect(callArgs).toContain('--max-budget-usd');
288
+ expect(callArgs[callArgs.indexOf('--max-budget-usd') + 1]).toBe('7.5');
289
+ expect(callArgs).toContain('--append-system-prompt');
290
+ expect(callArgs[callArgs.indexOf('--append-system-prompt') + 1]).toBe('You are Weston.');
291
+ });
292
+ it('omits new flags when not set', () => {
293
+ const mock = createMockSubprocess();
294
+ execa.mockReturnValue(mock.proc);
295
+ const proc = new LongRunningProcess(baseOpts);
296
+ proc.spawn();
297
+ const callArgs = execa.mock.calls[0]?.[1] ?? [];
298
+ expect(callArgs).not.toContain('--fallback-model');
299
+ expect(callArgs).not.toContain('--max-budget-usd');
300
+ expect(callArgs).not.toContain('--append-system-prompt');
301
+ });
302
+ it('sendTurn without images writes plain string content (no regression)', async () => {
303
+ const mock = createMockSubprocess();
304
+ execa.mockReturnValue(mock.proc);
305
+ const proc = new LongRunningProcess(baseOpts);
306
+ proc.spawn();
307
+ queueMicrotask(() => {
308
+ mock.stdout.emit('data', JSON.stringify({ type: 'result', result: 'ok' }) + '\n');
309
+ });
310
+ const events = [];
311
+ for await (const evt of proc.sendTurn('Hello')) {
312
+ events.push(evt);
313
+ }
314
+ const written = mock.stdin.write.mock.calls[0]?.[0];
315
+ const parsed = JSON.parse(written.trim());
316
+ expect(parsed.message.content).toBe('Hello');
317
+ });
318
+ });
@@ -0,0 +1,160 @@
1
+ /**
2
+ * Smoke-test helpers: prompt definitions, response validation, and env-driven
3
+ * runtime factories. Used by model-smoke.test.ts to exercise each configured
4
+ * model tier through the full RuntimeAdapter.invoke() → EngineEvent pipeline.
5
+ *
6
+ * Runtime factories:
7
+ * buildSmokeRuntime — Claude Code CLI (CLAUDE_BIN, CLAUDE_OUTPUT_FORMAT, …)
8
+ * buildGeminiSmokeRuntime — Gemini CLI (GEMINI_BIN, GEMINI_MODEL)
9
+ * buildOpenAISmokeRuntime — OpenAI API (OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MODEL)
10
+ * buildCodexSmokeRuntime — Codex CLI (CODEX_BIN, CODEX_MODEL)
11
+ */
12
+ import { createClaudeCliRuntime } from './claude-code-cli.js';
13
+ import { createGeminiCliRuntime } from './gemini-cli.js';
14
+ import { createOpenAICompatRuntime } from './openai-compat.js';
15
+ import { createCodexCliRuntime } from './codex-cli.js';
16
+ export const PROMPT_CATEGORIES = [
17
+ {
18
+ name: 'greeting',
19
+ prompt: 'Say hello in exactly one sentence. Output only that sentence.',
20
+ // Non-empty text is sufficient for greeting.
21
+ },
22
+ {
23
+ name: 'code_block',
24
+ prompt: 'Write a JavaScript function called add that takes two numbers and returns their sum. Wrap your answer in a fenced code block.',
25
+ validate: (text) => text.includes('```'),
26
+ },
27
+ {
28
+ name: 'structured_markdown',
29
+ prompt: 'List the three primary colors of light as a markdown bullet list. Output only the list.',
30
+ validate: (text) => text.includes('-') || text.includes('*'),
31
+ },
32
+ {
33
+ name: 'cron_style',
34
+ prompt: 'What cron expression runs a job every day at midnight? Output only the cron expression, nothing else.',
35
+ validate: (text) => /\d/.test(text) && text.includes('*'),
36
+ },
37
+ {
38
+ name: 'unicode',
39
+ prompt: 'Echo back this emoji exactly: 🎉 Output only the emoji.',
40
+ validate: (text) => text.includes('🎉'),
41
+ },
42
+ {
43
+ name: 'empty_input',
44
+ prompt: ' ',
45
+ // No strict validate; model should respond without crashing.
46
+ },
47
+ ];
48
+ /**
49
+ * Validate a pre-collected EngineEvent array and return a diagnostic result.
50
+ *
51
+ * Fails if:
52
+ * - an `error` event appears in the stream
53
+ * - the stream ends without a `done` event
54
+ * - the accumulated response text is empty (except for `empty_input` category)
55
+ *
56
+ * Tier and category labels are included in all diagnostic messages.
57
+ */
58
+ export function validateSmokeResponse(events, tierLabel, categoryName) {
59
+ const label = `[${tierLabel}/${categoryName}]`;
60
+ const errorEvt = events.find((e) => e.type === 'error');
61
+ if (errorEvt) {
62
+ return {
63
+ ok: false,
64
+ text: '',
65
+ errorMessage: `${label} error event: ${errorEvt.message}`,
66
+ events,
67
+ };
68
+ }
69
+ if (!events.some((e) => e.type === 'done')) {
70
+ return {
71
+ ok: false,
72
+ text: '',
73
+ errorMessage: `${label} stream ended without done event`,
74
+ events,
75
+ };
76
+ }
77
+ // Prefer text_final; fall back to concatenated text_delta events.
78
+ const finalEvt = events.find((e) => e.type === 'text_final');
79
+ const text = finalEvt
80
+ ? finalEvt.text
81
+ : events
82
+ .filter((e) => e.type === 'text_delta')
83
+ .map((e) => e.text)
84
+ .join('');
85
+ // For empty_input category, an empty response is acceptable.
86
+ if (!text.trim() && categoryName !== 'empty_input') {
87
+ return { ok: false, text, errorMessage: `${label} response text is empty`, events };
88
+ }
89
+ return { ok: true, text, events };
90
+ }
91
+ /**
92
+ * Build a RuntimeAdapter from env vars, applying the same normalization rules
93
+ * as `parseConfig` in src/config.ts:
94
+ * - `CLAUDE_OUTPUT_FORMAT` validated as `text` | `stream-json`
95
+ * - verbose suppressed when outputFormat is `text`
96
+ * - `RUNTIME_MAX_BUDGET_USD` validated as a positive finite number
97
+ *
98
+ * Returns both the adapter and the resolved binary path so callers can run a
99
+ * binary availability check before invoking.
100
+ */
101
+ export function buildSmokeRuntime(env = process.env) {
102
+ const claudeBin = env.CLAUDE_BIN?.trim() || 'claude';
103
+ const outputFormatRaw = env.CLAUDE_OUTPUT_FORMAT?.trim();
104
+ if (outputFormatRaw && outputFormatRaw !== 'text' && outputFormatRaw !== 'stream-json') {
105
+ throw new Error(`CLAUDE_OUTPUT_FORMAT must be "text" or "stream-json", got "${outputFormatRaw}"`);
106
+ }
107
+ const outputFormat = outputFormatRaw === 'stream-json' ? 'stream-json' : 'text';
108
+ const rawVerbose = env.CLAUDE_VERBOSE === '1';
109
+ const verbose = rawVerbose && outputFormat !== 'text';
110
+ const dangerouslySkipPermissions = env.CLAUDE_DANGEROUSLY_SKIP_PERMISSIONS === '1';
111
+ let maxBudgetUsd;
112
+ const budgetRaw = env.RUNTIME_MAX_BUDGET_USD?.trim();
113
+ if (budgetRaw) {
114
+ const parsed = Number(budgetRaw);
115
+ if (!Number.isFinite(parsed) || parsed <= 0) {
116
+ throw new Error(`RUNTIME_MAX_BUDGET_USD must be a positive number, got "${budgetRaw}"`);
117
+ }
118
+ maxBudgetUsd = parsed;
119
+ }
120
+ const runtime = createClaudeCliRuntime({
121
+ claudeBin,
122
+ dangerouslySkipPermissions,
123
+ outputFormat,
124
+ verbose,
125
+ maxBudgetUsd,
126
+ });
127
+ return { runtime, claudeBin };
128
+ }
129
+ /**
130
+ * Build a Gemini RuntimeAdapter from env vars.
131
+ * Reads `GEMINI_BIN` (default: `gemini`) and `GEMINI_MODEL` (default: `gemini-2.5-flash`).
132
+ */
133
+ export function buildGeminiSmokeRuntime(env = process.env) {
134
+ const geminiBin = env.GEMINI_BIN?.trim() || 'gemini';
135
+ const defaultModel = env.GEMINI_MODEL?.trim() || 'gemini-2.5-flash';
136
+ const runtime = createGeminiCliRuntime({ geminiBin, defaultModel });
137
+ return { runtime, geminiBin };
138
+ }
139
+ /**
140
+ * Build an OpenAI RuntimeAdapter from env vars.
141
+ * Reads `OPENAI_API_KEY`, `OPENAI_BASE_URL` (default: `https://api.openai.com/v1`),
142
+ * and `OPENAI_MODEL` (default: `gpt-4o`).
143
+ */
144
+ export function buildOpenAISmokeRuntime(env = process.env) {
145
+ const apiKey = env.OPENAI_API_KEY?.trim() || '';
146
+ const baseUrl = env.OPENAI_BASE_URL?.trim() || 'https://api.openai.com/v1';
147
+ const defaultModel = env.OPENAI_MODEL?.trim() || 'gpt-4o';
148
+ const runtime = createOpenAICompatRuntime({ auth: 'api_key', apiKey, baseUrl, defaultModel });
149
+ return { runtime, apiKey };
150
+ }
151
+ /**
152
+ * Build a Codex CLI RuntimeAdapter from env vars.
153
+ * Reads `CODEX_BIN` (default: `codex`) and `CODEX_MODEL` (default: `gpt-5.3-codex`).
154
+ */
155
+ export function buildCodexSmokeRuntime(env = process.env) {
156
+ const codexBin = env.CODEX_BIN?.trim() || 'codex';
157
+ const defaultModel = env.CODEX_MODEL?.trim() || 'gpt-5.3-codex';
158
+ const runtime = createCodexCliRuntime({ codexBin, defaultModel });
159
+ return { runtime, codexBin };
160
+ }
@@ -0,0 +1,194 @@
1
+ /**
2
+ * Model smoke tests — end-to-end validation of each configured runtime.
3
+ *
4
+ * Opt-in via provider-specific env vars (all skipped by default so normal
5
+ * `pnpm test` runs are not slowed down):
6
+ *
7
+ * SMOKE_TEST_TIERS=fast,capable pnpm test
8
+ * Run Claude Code smoke tests for the fast and capable tiers.
9
+ *
10
+ * GEMINI_SMOKE_TEST_TIERS=fast pnpm test
11
+ * Run Gemini smoke tests for the fast tier.
12
+ *
13
+ * OPENAI_SMOKE_TEST_TIERS=fast pnpm test
14
+ * Run OpenAI smoke tests (requires OPENAI_API_KEY).
15
+ *
16
+ * CODEX_SMOKE_TEST_TIERS=fast pnpm test
17
+ * Run Codex smoke tests (requires codex binary on PATH).
18
+ *
19
+ * SMOKE_TEST_TIERS=fast SMOKE_TEST_TIMEOUT_MS=120000 pnpm test
20
+ * Override per-prompt timeout.
21
+ *
22
+ * Catches: bad API keys, wrong tier mappings, malformed system prompts,
23
+ * missing binaries — all surfaces as error events or empty text.
24
+ */
25
+ import { describe, expect, it, beforeAll } from 'vitest';
26
+ import { execFileSync } from 'node:child_process';
27
+ import { PROMPT_CATEGORIES, validateSmokeResponse, buildSmokeRuntime, buildGeminiSmokeRuntime, buildOpenAISmokeRuntime, buildCodexSmokeRuntime, } from './model-smoke-helpers.js';
28
+ import { resolveModel } from './model-tiers.js';
29
+ /** Working directory passed to every invocation. */
30
+ const CWD = '/tmp';
31
+ /** Per-test timeout — configurable via SMOKE_TEST_TIMEOUT_MS (default 60 s). */
32
+ const rawTimeout = process.env.SMOKE_TEST_TIMEOUT_MS?.trim();
33
+ const TIMEOUT = (() => {
34
+ if (!rawTimeout)
35
+ return 60_000;
36
+ const n = Number(rawTimeout);
37
+ if (!Number.isFinite(n) || n <= 0) {
38
+ throw new Error(`SMOKE_TEST_TIMEOUT_MS must be a positive number, got "${rawTimeout}"`);
39
+ }
40
+ return n;
41
+ })();
42
+ /**
43
+ * Comma-separated tier names or literal model IDs from SMOKE_TEST_TIERS.
44
+ * Empty = all smoke tests skipped.
45
+ */
46
+ const SMOKE_TIERS = process.env.SMOKE_TEST_TIERS?.trim()
47
+ ? process.env.SMOKE_TEST_TIERS.trim()
48
+ .split(',')
49
+ .map((s) => s.trim())
50
+ .filter(Boolean)
51
+ : [];
52
+ /**
53
+ * Comma-separated tier names or literal model IDs from GEMINI_SMOKE_TEST_TIERS.
54
+ * Empty = all Gemini smoke tests skipped.
55
+ */
56
+ const GEMINI_SMOKE_TIERS = process.env.GEMINI_SMOKE_TEST_TIERS?.trim()
57
+ ? process.env.GEMINI_SMOKE_TEST_TIERS.trim()
58
+ .split(',')
59
+ .map((s) => s.trim())
60
+ .filter(Boolean)
61
+ : [];
62
+ /**
63
+ * Comma-separated tier names or literal model IDs from OPENAI_SMOKE_TEST_TIERS.
64
+ * Empty = all OpenAI smoke tests skipped.
65
+ */
66
+ const OPENAI_SMOKE_TIERS = process.env.OPENAI_SMOKE_TEST_TIERS?.trim()
67
+ ? process.env.OPENAI_SMOKE_TEST_TIERS.trim()
68
+ .split(',')
69
+ .map((s) => s.trim())
70
+ .filter(Boolean)
71
+ : [];
72
+ /**
73
+ * Comma-separated tier names or literal model IDs from CODEX_SMOKE_TEST_TIERS.
74
+ * Empty = all Codex smoke tests skipped.
75
+ */
76
+ const CODEX_SMOKE_TIERS = process.env.CODEX_SMOKE_TEST_TIERS?.trim()
77
+ ? process.env.CODEX_SMOKE_TEST_TIERS.trim()
78
+ .split(',')
79
+ .map((s) => s.trim())
80
+ .filter(Boolean)
81
+ : [];
82
+ // Only build when opt-in is requested; avoids config-error noise in normal CI runs.
83
+ const smokeState = SMOKE_TIERS.length > 0 ? buildSmokeRuntime() : null;
84
+ const geminiSmokeState = GEMINI_SMOKE_TIERS.length > 0 ? buildGeminiSmokeRuntime() : null;
85
+ const openaiSmokeState = OPENAI_SMOKE_TIERS.length > 0 ? buildOpenAISmokeRuntime() : null;
86
+ const codexSmokeState = CODEX_SMOKE_TIERS.length > 0 ? buildCodexSmokeRuntime() : null;
87
+ // ---------------------------------------------------------------------------
88
+ // Claude Code — one describe block per requested tier
89
+ // ---------------------------------------------------------------------------
90
+ describe.each(SMOKE_TIERS)('claude_code / %s', (tierOrModel) => {
91
+ const model = resolveModel(tierOrModel, 'claude_code');
92
+ const { runtime, claudeBin } = smokeState;
93
+ beforeAll(() => {
94
+ try {
95
+ execFileSync('which', [claudeBin], { stdio: 'pipe' });
96
+ }
97
+ catch {
98
+ throw new Error(`Smoke test opt-in (SMOKE_TEST_TIERS="${process.env.SMOKE_TEST_TIERS}") ` +
99
+ `requires binary "${claudeBin}" on PATH. ` +
100
+ `Install the Claude CLI or set CLAUDE_BIN to the correct path.`);
101
+ }
102
+ });
103
+ it.each(PROMPT_CATEGORIES)('$name', async ({ prompt, validate, name }) => {
104
+ const events = [];
105
+ for await (const evt of runtime.invoke({ prompt, model, cwd: CWD, tools: [] })) {
106
+ events.push(evt);
107
+ }
108
+ const result = validateSmokeResponse(events, tierOrModel, name);
109
+ expect(result.ok, `smoke failed: ${result.errorMessage}`).toBe(true);
110
+ if (validate) {
111
+ expect(validate(result.text), `[${tierOrModel}/${name}] validation failed for text: ${JSON.stringify(result.text)}`).toBe(true);
112
+ }
113
+ }, TIMEOUT);
114
+ });
115
+ // ---------------------------------------------------------------------------
116
+ // Gemini CLI — one describe block per requested tier
117
+ // ---------------------------------------------------------------------------
118
+ describe.each(GEMINI_SMOKE_TIERS)('gemini / %s', (tierOrModel) => {
119
+ const model = resolveModel(tierOrModel, 'gemini');
120
+ const { runtime, geminiBin } = geminiSmokeState;
121
+ beforeAll(() => {
122
+ try {
123
+ execFileSync('which', [geminiBin], { stdio: 'pipe' });
124
+ }
125
+ catch {
126
+ throw new Error(`Smoke test opt-in (GEMINI_SMOKE_TEST_TIERS="${process.env.GEMINI_SMOKE_TEST_TIERS}") ` +
127
+ `requires binary "${geminiBin}" on PATH. ` +
128
+ `Install the Gemini CLI or set GEMINI_BIN to the correct path.`);
129
+ }
130
+ });
131
+ it.each(PROMPT_CATEGORIES)('$name', async ({ prompt, validate, name }) => {
132
+ const events = [];
133
+ for await (const evt of runtime.invoke({ prompt, model, cwd: CWD, tools: [] })) {
134
+ events.push(evt);
135
+ }
136
+ const result = validateSmokeResponse(events, tierOrModel, name);
137
+ expect(result.ok, `smoke failed: ${result.errorMessage}`).toBe(true);
138
+ if (validate) {
139
+ expect(validate(result.text), `[${tierOrModel}/${name}] validation failed for text: ${JSON.stringify(result.text)}`).toBe(true);
140
+ }
141
+ }, TIMEOUT);
142
+ });
143
+ // ---------------------------------------------------------------------------
144
+ // OpenAI API — one describe block per requested tier
145
+ // ---------------------------------------------------------------------------
146
+ describe.each(OPENAI_SMOKE_TIERS)('openai / %s', (tierOrModel) => {
147
+ const model = resolveModel(tierOrModel, 'openai');
148
+ const { runtime, apiKey } = openaiSmokeState;
149
+ beforeAll(() => {
150
+ if (!apiKey) {
151
+ throw new Error(`Smoke test opt-in (OPENAI_SMOKE_TEST_TIERS="${process.env.OPENAI_SMOKE_TEST_TIERS}") ` +
152
+ `requires OPENAI_API_KEY to be set.`);
153
+ }
154
+ });
155
+ it.each(PROMPT_CATEGORIES)('$name', async ({ prompt, validate, name }) => {
156
+ const events = [];
157
+ for await (const evt of runtime.invoke({ prompt, model, cwd: CWD, tools: [] })) {
158
+ events.push(evt);
159
+ }
160
+ const result = validateSmokeResponse(events, tierOrModel, name);
161
+ expect(result.ok, `smoke failed: ${result.errorMessage}`).toBe(true);
162
+ if (validate) {
163
+ expect(validate(result.text), `[${tierOrModel}/${name}] validation failed for text: ${JSON.stringify(result.text)}`).toBe(true);
164
+ }
165
+ }, TIMEOUT);
166
+ });
167
+ // ---------------------------------------------------------------------------
168
+ // Codex CLI — one describe block per requested tier
169
+ // ---------------------------------------------------------------------------
170
+ describe.each(CODEX_SMOKE_TIERS)('codex / %s', (tierOrModel) => {
171
+ const model = resolveModel(tierOrModel, 'codex');
172
+ const { runtime, codexBin } = codexSmokeState;
173
+ beforeAll(() => {
174
+ try {
175
+ execFileSync('which', [codexBin], { stdio: 'pipe' });
176
+ }
177
+ catch {
178
+ throw new Error(`Smoke test opt-in (CODEX_SMOKE_TEST_TIERS="${process.env.CODEX_SMOKE_TEST_TIERS}") ` +
179
+ `requires binary "${codexBin}" on PATH. ` +
180
+ `Install the Codex CLI or set CODEX_BIN to the correct path.`);
181
+ }
182
+ });
183
+ it.each(PROMPT_CATEGORIES)('$name', async ({ prompt, validate, name }) => {
184
+ const events = [];
185
+ for await (const evt of runtime.invoke({ prompt, model, cwd: CWD, tools: [] })) {
186
+ events.push(evt);
187
+ }
188
+ const result = validateSmokeResponse(events, tierOrModel, name);
189
+ expect(result.ok, `smoke failed: ${result.errorMessage}`).toBe(true);
190
+ if (validate) {
191
+ expect(validate(result.text), `[${tierOrModel}/${name}] validation failed for text: ${JSON.stringify(result.text)}`).toBe(true);
192
+ }
193
+ }, TIMEOUT);
194
+ });