gsd-pi 2.63.0-dev.026d309 → 2.63.0-dev.351157b

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (312) hide show
  1. package/README.md +46 -134
  2. package/dist/cli.js +44 -6
  3. package/dist/help-text.js +4 -1
  4. package/dist/onboarding.js +15 -8
  5. package/dist/resource-loader.js +18 -3
  6. package/dist/resources/extensions/cmux/index.js +21 -12
  7. package/dist/resources/extensions/gsd/auto/finalize-timeout.js +40 -0
  8. package/dist/resources/extensions/gsd/auto/loop.js +4 -0
  9. package/dist/resources/extensions/gsd/auto/phases.js +123 -22
  10. package/dist/resources/extensions/gsd/auto/session.js +8 -0
  11. package/dist/resources/extensions/gsd/auto-dashboard.js +9 -3
  12. package/dist/resources/extensions/gsd/auto-post-unit.js +45 -10
  13. package/dist/resources/extensions/gsd/auto-prompts.js +25 -0
  14. package/dist/resources/extensions/gsd/auto-recovery.js +15 -7
  15. package/dist/resources/extensions/gsd/auto-start.js +10 -21
  16. package/dist/resources/extensions/gsd/auto-tool-tracking.js +17 -0
  17. package/dist/resources/extensions/gsd/auto-worktree.js +13 -7
  18. package/dist/resources/extensions/gsd/auto.js +19 -2
  19. package/dist/resources/extensions/gsd/bootstrap/db-tools.js +73 -60
  20. package/dist/resources/extensions/gsd/bootstrap/dynamic-tools.js +13 -0
  21. package/dist/resources/extensions/gsd/bootstrap/query-tools.js +85 -0
  22. package/dist/resources/extensions/gsd/bootstrap/register-extension.js +3 -0
  23. package/dist/resources/extensions/gsd/bootstrap/register-hooks.js +9 -1
  24. package/dist/resources/extensions/gsd/bootstrap/sanitize-complete-milestone.js +54 -0
  25. package/dist/resources/extensions/gsd/commands-handlers.js +9 -4
  26. package/dist/resources/extensions/gsd/constants.js +42 -0
  27. package/dist/resources/extensions/gsd/db-writer.js +72 -4
  28. package/dist/resources/extensions/gsd/forensics.js +20 -4
  29. package/dist/resources/extensions/gsd/gsd-db.js +64 -17
  30. package/dist/resources/extensions/gsd/guided-flow.js +19 -0
  31. package/dist/resources/extensions/gsd/metrics.js +27 -1
  32. package/dist/resources/extensions/gsd/native-git-bridge.js +5 -3
  33. package/dist/resources/extensions/gsd/preferences-types.js +1 -0
  34. package/dist/resources/extensions/gsd/preferences.js +7 -2
  35. package/dist/resources/extensions/gsd/prompts/complete-milestone.md +2 -0
  36. package/dist/resources/extensions/gsd/prompts/complete-slice.md +2 -0
  37. package/dist/resources/extensions/gsd/prompts/doctor-heal.md +1 -0
  38. package/dist/resources/extensions/gsd/prompts/forensics.md +2 -0
  39. package/dist/resources/extensions/gsd/prompts/reassess-roadmap.md +2 -0
  40. package/dist/resources/extensions/gsd/prompts/system.md +1 -0
  41. package/dist/resources/extensions/gsd/prompts/validate-milestone.md +2 -0
  42. package/dist/resources/extensions/gsd/roadmap-mutations.js +1 -1
  43. package/dist/resources/extensions/gsd/roadmap-slices.js +9 -5
  44. package/dist/resources/extensions/gsd/slice-parallel-conflict.js +67 -0
  45. package/dist/resources/extensions/gsd/slice-parallel-eligibility.js +51 -0
  46. package/dist/resources/extensions/gsd/slice-parallel-orchestrator.js +378 -0
  47. package/dist/resources/extensions/gsd/state.js +74 -14
  48. package/dist/resources/extensions/gsd/status-guards.js +11 -0
  49. package/dist/resources/extensions/gsd/tools/complete-milestone.js +17 -12
  50. package/dist/resources/extensions/gsd/tools/complete-slice.js +40 -26
  51. package/dist/resources/extensions/gsd/tools/complete-task.js +12 -12
  52. package/dist/resources/extensions/gsd/tools/plan-milestone.js +33 -25
  53. package/dist/resources/extensions/gsd/tools/plan-slice.js +5 -8
  54. package/dist/resources/extensions/gsd/workflow-projections.js +21 -5
  55. package/dist/resources/extensions/gsd/worktree-manager.js +82 -29
  56. package/dist/resources/extensions/gsd/worktree-resolver.js +4 -3
  57. package/dist/resources/extensions/mcp-client/auth.js +101 -0
  58. package/dist/resources/extensions/mcp-client/index.js +10 -1
  59. package/dist/resources/extensions/ollama/index.js +6 -12
  60. package/dist/resources/extensions/ollama/model-capabilities.js +37 -34
  61. package/dist/resources/extensions/ollama/ndjson-stream.js +54 -0
  62. package/dist/resources/extensions/ollama/ollama-chat-provider.js +380 -0
  63. package/dist/resources/extensions/ollama/ollama-client.js +23 -32
  64. package/dist/resources/extensions/ollama/ollama-discovery.js +2 -7
  65. package/dist/resources/extensions/ollama/ollama-tool.js +62 -0
  66. package/dist/resources/extensions/ollama/thinking-parser.js +104 -0
  67. package/dist/web/standalone/.next/BUILD_ID +1 -1
  68. package/dist/web/standalone/.next/app-path-routes-manifest.json +14 -14
  69. package/dist/web/standalone/.next/build-manifest.json +2 -2
  70. package/dist/web/standalone/.next/prerender-manifest.json +3 -3
  71. package/dist/web/standalone/.next/server/app/_global-error.html +2 -2
  72. package/dist/web/standalone/.next/server/app/_global-error.rsc +1 -1
  73. package/dist/web/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  74. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
  75. package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
  76. package/dist/web/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  77. package/dist/web/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  78. package/dist/web/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  79. package/dist/web/standalone/.next/server/app/_not-found.html +1 -1
  80. package/dist/web/standalone/.next/server/app/_not-found.rsc +1 -1
  81. package/dist/web/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
  82. package/dist/web/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  83. package/dist/web/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
  84. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  85. package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  86. package/dist/web/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
  87. package/dist/web/standalone/.next/server/app/api/boot/route.js +1 -1
  88. package/dist/web/standalone/.next/server/app/api/boot/route.js.nft.json +1 -1
  89. package/dist/web/standalone/.next/server/app/api/bridge-terminal/input/route.js +1 -1
  90. package/dist/web/standalone/.next/server/app/api/bridge-terminal/input/route.js.nft.json +1 -1
  91. package/dist/web/standalone/.next/server/app/api/bridge-terminal/resize/route.js +1 -1
  92. package/dist/web/standalone/.next/server/app/api/bridge-terminal/resize/route.js.nft.json +1 -1
  93. package/dist/web/standalone/.next/server/app/api/bridge-terminal/stream/route.js +2 -2
  94. package/dist/web/standalone/.next/server/app/api/bridge-terminal/stream/route.js.nft.json +1 -1
  95. package/dist/web/standalone/.next/server/app/api/captures/route.js +1 -1
  96. package/dist/web/standalone/.next/server/app/api/captures/route.js.nft.json +1 -1
  97. package/dist/web/standalone/.next/server/app/api/cleanup/route.js +1 -1
  98. package/dist/web/standalone/.next/server/app/api/cleanup/route.js.nft.json +1 -1
  99. package/dist/web/standalone/.next/server/app/api/doctor/route.js +1 -1
  100. package/dist/web/standalone/.next/server/app/api/doctor/route.js.nft.json +1 -1
  101. package/dist/web/standalone/.next/server/app/api/export-data/route.js +1 -1
  102. package/dist/web/standalone/.next/server/app/api/export-data/route.js.nft.json +1 -1
  103. package/dist/web/standalone/.next/server/app/api/files/route.js +1 -1
  104. package/dist/web/standalone/.next/server/app/api/files/route.js.nft.json +1 -1
  105. package/dist/web/standalone/.next/server/app/api/forensics/route.js +1 -1
  106. package/dist/web/standalone/.next/server/app/api/forensics/route.js.nft.json +1 -1
  107. package/dist/web/standalone/.next/server/app/api/git/route.js +1 -1
  108. package/dist/web/standalone/.next/server/app/api/git/route.js.nft.json +1 -1
  109. package/dist/web/standalone/.next/server/app/api/history/route.js +1 -1
  110. package/dist/web/standalone/.next/server/app/api/history/route.js.nft.json +1 -1
  111. package/dist/web/standalone/.next/server/app/api/hooks/route.js +1 -1
  112. package/dist/web/standalone/.next/server/app/api/hooks/route.js.nft.json +1 -1
  113. package/dist/web/standalone/.next/server/app/api/inspect/route.js +1 -1
  114. package/dist/web/standalone/.next/server/app/api/inspect/route.js.nft.json +1 -1
  115. package/dist/web/standalone/.next/server/app/api/knowledge/route.js +1 -1
  116. package/dist/web/standalone/.next/server/app/api/knowledge/route.js.nft.json +1 -1
  117. package/dist/web/standalone/.next/server/app/api/live-state/route.js +1 -1
  118. package/dist/web/standalone/.next/server/app/api/live-state/route.js.nft.json +1 -1
  119. package/dist/web/standalone/.next/server/app/api/onboarding/route.js +1 -1
  120. package/dist/web/standalone/.next/server/app/api/onboarding/route.js.nft.json +1 -1
  121. package/dist/web/standalone/.next/server/app/api/projects/route.js +1 -1
  122. package/dist/web/standalone/.next/server/app/api/projects/route.js.nft.json +1 -1
  123. package/dist/web/standalone/.next/server/app/api/recovery/route.js +1 -1
  124. package/dist/web/standalone/.next/server/app/api/recovery/route.js.nft.json +1 -1
  125. package/dist/web/standalone/.next/server/app/api/session/browser/route.js +1 -1
  126. package/dist/web/standalone/.next/server/app/api/session/browser/route.js.nft.json +1 -1
  127. package/dist/web/standalone/.next/server/app/api/session/command/route.js +1 -1
  128. package/dist/web/standalone/.next/server/app/api/session/command/route.js.nft.json +1 -1
  129. package/dist/web/standalone/.next/server/app/api/session/events/route.js +2 -2
  130. package/dist/web/standalone/.next/server/app/api/session/events/route.js.nft.json +1 -1
  131. package/dist/web/standalone/.next/server/app/api/session/manage/route.js +1 -1
  132. package/dist/web/standalone/.next/server/app/api/session/manage/route.js.nft.json +1 -1
  133. package/dist/web/standalone/.next/server/app/api/settings-data/route.js +1 -1
  134. package/dist/web/standalone/.next/server/app/api/settings-data/route.js.nft.json +1 -1
  135. package/dist/web/standalone/.next/server/app/api/skill-health/route.js +1 -1
  136. package/dist/web/standalone/.next/server/app/api/skill-health/route.js.nft.json +1 -1
  137. package/dist/web/standalone/.next/server/app/api/steer/route.js +1 -1
  138. package/dist/web/standalone/.next/server/app/api/steer/route.js.nft.json +1 -1
  139. package/dist/web/standalone/.next/server/app/api/switch-root/route.js +1 -1
  140. package/dist/web/standalone/.next/server/app/api/switch-root/route.js.nft.json +1 -1
  141. package/dist/web/standalone/.next/server/app/api/terminal/sessions/route.js +2 -2
  142. package/dist/web/standalone/.next/server/app/api/terminal/sessions/route.js.nft.json +1 -1
  143. package/dist/web/standalone/.next/server/app/api/terminal/stream/route.js +2 -2
  144. package/dist/web/standalone/.next/server/app/api/terminal/stream/route.js.nft.json +1 -1
  145. package/dist/web/standalone/.next/server/app/api/undo/route.js +1 -1
  146. package/dist/web/standalone/.next/server/app/api/undo/route.js.nft.json +1 -1
  147. package/dist/web/standalone/.next/server/app/api/visualizer/route.js +1 -1
  148. package/dist/web/standalone/.next/server/app/api/visualizer/route.js.nft.json +1 -1
  149. package/dist/web/standalone/.next/server/app/index.html +1 -1
  150. package/dist/web/standalone/.next/server/app/index.rsc +1 -1
  151. package/dist/web/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
  152. package/dist/web/standalone/.next/server/app/index.segments/_full.segment.rsc +1 -1
  153. package/dist/web/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
  154. package/dist/web/standalone/.next/server/app/index.segments/_index.segment.rsc +1 -1
  155. package/dist/web/standalone/.next/server/app/index.segments/_tree.segment.rsc +1 -1
  156. package/dist/web/standalone/.next/server/app-paths-manifest.json +14 -14
  157. package/dist/web/standalone/.next/server/chunks/6897.js +12 -0
  158. package/dist/web/standalone/.next/server/pages/404.html +1 -1
  159. package/dist/web/standalone/.next/server/pages/500.html +2 -2
  160. package/dist/web/standalone/.next/server/server-reference-manifest.json +1 -1
  161. package/package.json +1 -1
  162. package/packages/pi-agent-core/dist/agent-loop.d.ts +8 -0
  163. package/packages/pi-agent-core/dist/agent-loop.d.ts.map +1 -1
  164. package/packages/pi-agent-core/dist/agent-loop.js +50 -0
  165. package/packages/pi-agent-core/dist/agent-loop.js.map +1 -1
  166. package/packages/pi-agent-core/src/agent-loop.test.ts +221 -5
  167. package/packages/pi-agent-core/src/agent-loop.ts +53 -0
  168. package/packages/pi-ai/dist/types.d.ts +16 -1
  169. package/packages/pi-ai/dist/types.d.ts.map +1 -1
  170. package/packages/pi-ai/dist/types.js.map +1 -1
  171. package/packages/pi-ai/src/types.ts +18 -1
  172. package/packages/pi-coding-agent/dist/core/auth-storage.d.ts +9 -0
  173. package/packages/pi-coding-agent/dist/core/auth-storage.d.ts.map +1 -1
  174. package/packages/pi-coding-agent/dist/core/auth-storage.js +50 -1
  175. package/packages/pi-coding-agent/dist/core/auth-storage.js.map +1 -1
  176. package/packages/pi-coding-agent/dist/core/auth-storage.test.js +41 -0
  177. package/packages/pi-coding-agent/dist/core/auth-storage.test.js.map +1 -1
  178. package/packages/pi-coding-agent/dist/core/extensions/loader.d.ts +7 -0
  179. package/packages/pi-coding-agent/dist/core/extensions/loader.d.ts.map +1 -1
  180. package/packages/pi-coding-agent/dist/core/extensions/loader.js +31 -4
  181. package/packages/pi-coding-agent/dist/core/extensions/loader.js.map +1 -1
  182. package/packages/pi-coding-agent/dist/core/extensions/loader.test.js +28 -1
  183. package/packages/pi-coding-agent/dist/core/extensions/loader.test.js.map +1 -1
  184. package/packages/pi-coding-agent/dist/core/extensions/types.d.ts +2 -0
  185. package/packages/pi-coding-agent/dist/core/extensions/types.d.ts.map +1 -1
  186. package/packages/pi-coding-agent/dist/core/extensions/types.js.map +1 -1
  187. package/packages/pi-coding-agent/dist/core/model-registry.d.ts +1 -0
  188. package/packages/pi-coding-agent/dist/core/model-registry.d.ts.map +1 -1
  189. package/packages/pi-coding-agent/dist/core/model-registry.js +1 -0
  190. package/packages/pi-coding-agent/dist/core/model-registry.js.map +1 -1
  191. package/packages/pi-coding-agent/dist/core/model-resolver.js +3 -3
  192. package/packages/pi-coding-agent/dist/core/model-resolver.js.map +1 -1
  193. package/packages/pi-coding-agent/dist/core/resource-loader.d.ts +23 -1
  194. package/packages/pi-coding-agent/dist/core/resource-loader.d.ts.map +1 -1
  195. package/packages/pi-coding-agent/dist/core/resource-loader.js +80 -56
  196. package/packages/pi-coding-agent/dist/core/resource-loader.js.map +1 -1
  197. package/packages/pi-coding-agent/dist/core/sdk.d.ts.map +1 -1
  198. package/packages/pi-coding-agent/dist/core/sdk.js +10 -0
  199. package/packages/pi-coding-agent/dist/core/sdk.js.map +1 -1
  200. package/packages/pi-coding-agent/src/core/auth-storage.test.ts +53 -0
  201. package/packages/pi-coding-agent/src/core/auth-storage.ts +66 -1
  202. package/packages/pi-coding-agent/src/core/extensions/loader.test.ts +39 -1
  203. package/packages/pi-coding-agent/src/core/extensions/loader.ts +34 -4
  204. package/packages/pi-coding-agent/src/core/extensions/types.ts +2 -0
  205. package/packages/pi-coding-agent/src/core/model-registry.ts +2 -0
  206. package/packages/pi-coding-agent/src/core/model-resolver.ts +3 -3
  207. package/packages/pi-coding-agent/src/core/resource-loader.ts +89 -56
  208. package/packages/pi-coding-agent/src/core/sdk.ts +11 -0
  209. package/src/resources/extensions/cmux/index.ts +18 -12
  210. package/src/resources/extensions/gsd/auto/finalize-timeout.ts +46 -0
  211. package/src/resources/extensions/gsd/auto/loop.ts +5 -0
  212. package/src/resources/extensions/gsd/auto/phases.ts +156 -34
  213. package/src/resources/extensions/gsd/auto/session.ts +9 -0
  214. package/src/resources/extensions/gsd/auto-dashboard.ts +11 -3
  215. package/src/resources/extensions/gsd/auto-post-unit.ts +53 -12
  216. package/src/resources/extensions/gsd/auto-prompts.ts +21 -0
  217. package/src/resources/extensions/gsd/auto-recovery.ts +9 -8
  218. package/src/resources/extensions/gsd/auto-start.ts +11 -20
  219. package/src/resources/extensions/gsd/auto-tool-tracking.ts +19 -0
  220. package/src/resources/extensions/gsd/auto-worktree.ts +14 -6
  221. package/src/resources/extensions/gsd/auto.ts +22 -1
  222. package/src/resources/extensions/gsd/bootstrap/db-tools.ts +74 -60
  223. package/src/resources/extensions/gsd/bootstrap/dynamic-tools.ts +15 -0
  224. package/src/resources/extensions/gsd/bootstrap/query-tools.ts +98 -0
  225. package/src/resources/extensions/gsd/bootstrap/register-extension.ts +4 -0
  226. package/src/resources/extensions/gsd/bootstrap/register-hooks.ts +9 -1
  227. package/src/resources/extensions/gsd/bootstrap/sanitize-complete-milestone.ts +57 -0
  228. package/src/resources/extensions/gsd/commands-handlers.ts +10 -4
  229. package/src/resources/extensions/gsd/constants.ts +44 -0
  230. package/src/resources/extensions/gsd/db-writer.ts +78 -4
  231. package/src/resources/extensions/gsd/forensics.ts +21 -5
  232. package/src/resources/extensions/gsd/gsd-db.ts +64 -17
  233. package/src/resources/extensions/gsd/guided-flow.ts +22 -0
  234. package/src/resources/extensions/gsd/metrics.ts +28 -1
  235. package/src/resources/extensions/gsd/native-git-bridge.ts +5 -3
  236. package/src/resources/extensions/gsd/preferences-types.ts +3 -0
  237. package/src/resources/extensions/gsd/preferences.ts +9 -2
  238. package/src/resources/extensions/gsd/prompts/complete-milestone.md +2 -0
  239. package/src/resources/extensions/gsd/prompts/complete-slice.md +2 -0
  240. package/src/resources/extensions/gsd/prompts/doctor-heal.md +1 -0
  241. package/src/resources/extensions/gsd/prompts/forensics.md +2 -0
  242. package/src/resources/extensions/gsd/prompts/reassess-roadmap.md +2 -0
  243. package/src/resources/extensions/gsd/prompts/system.md +1 -0
  244. package/src/resources/extensions/gsd/prompts/validate-milestone.md +2 -0
  245. package/src/resources/extensions/gsd/roadmap-mutations.ts +1 -1
  246. package/src/resources/extensions/gsd/roadmap-slices.ts +10 -5
  247. package/src/resources/extensions/gsd/slice-parallel-conflict.ts +86 -0
  248. package/src/resources/extensions/gsd/slice-parallel-eligibility.ts +73 -0
  249. package/src/resources/extensions/gsd/slice-parallel-orchestrator.ts +477 -0
  250. package/src/resources/extensions/gsd/state.ts +67 -12
  251. package/src/resources/extensions/gsd/status-guards.ts +13 -0
  252. package/src/resources/extensions/gsd/tests/artifact-corruption-2630.test.ts +288 -0
  253. package/src/resources/extensions/gsd/tests/auto-loop.test.ts +34 -13
  254. package/src/resources/extensions/gsd/tests/cmux.test.ts +58 -0
  255. package/src/resources/extensions/gsd/tests/cold-resume-db-reopen.test.ts +51 -0
  256. package/src/resources/extensions/gsd/tests/complete-milestone.test.ts +140 -0
  257. package/src/resources/extensions/gsd/tests/complete-task.test.ts +39 -0
  258. package/src/resources/extensions/gsd/tests/dashboard-model-label-ordering.test.ts +107 -0
  259. package/src/resources/extensions/gsd/tests/db-access-guardrails.test.ts +109 -0
  260. package/src/resources/extensions/gsd/tests/db-path-worktree-symlink.test.ts +13 -9
  261. package/src/resources/extensions/gsd/tests/db-writer.test.ts +134 -0
  262. package/src/resources/extensions/gsd/tests/deferred-slice-dispatch.test.ts +203 -0
  263. package/src/resources/extensions/gsd/tests/discuss-tool-scoping.test.ts +130 -0
  264. package/src/resources/extensions/gsd/tests/doctor-fix-flag.test.ts +92 -0
  265. package/src/resources/extensions/gsd/tests/finalize-timeout-guard.test.ts +116 -0
  266. package/src/resources/extensions/gsd/tests/forensics-stuck-loops.test.ts +103 -0
  267. package/src/resources/extensions/gsd/tests/insert-slice-no-wipe.test.ts +88 -0
  268. package/src/resources/extensions/gsd/tests/integration/git-service.test.ts +27 -7
  269. package/src/resources/extensions/gsd/tests/integration/idle-recovery.test.ts +34 -0
  270. package/src/resources/extensions/gsd/tests/metrics.test.ts +116 -1
  271. package/src/resources/extensions/gsd/tests/milestone-status-tool.test.ts +201 -0
  272. package/src/resources/extensions/gsd/tests/plan-milestone-title.test.ts +2 -1
  273. package/src/resources/extensions/gsd/tests/plan-milestone.test.ts +82 -18
  274. package/src/resources/extensions/gsd/tests/preferences.test.ts +10 -0
  275. package/src/resources/extensions/gsd/tests/prompt-contracts.test.ts +25 -0
  276. package/src/resources/extensions/gsd/tests/roadmap-slices.test.ts +69 -0
  277. package/src/resources/extensions/gsd/tests/shared-wal.test.ts +30 -0
  278. package/src/resources/extensions/gsd/tests/slice-context-injection.test.ts +50 -0
  279. package/src/resources/extensions/gsd/tests/slice-parallel-conflict.test.ts +92 -0
  280. package/src/resources/extensions/gsd/tests/slice-parallel-eligibility.test.ts +95 -0
  281. package/src/resources/extensions/gsd/tests/slice-parallel-orchestrator.test.ts +83 -0
  282. package/src/resources/extensions/gsd/tests/tool-invocation-error-loop-break.test.ts +103 -0
  283. package/src/resources/extensions/gsd/tests/tool-param-optionality.test.ts +349 -0
  284. package/src/resources/extensions/gsd/tests/worktree-health-dispatch.test.ts +35 -2
  285. package/src/resources/extensions/gsd/tests/worktree-health-monorepo.test.ts +73 -0
  286. package/src/resources/extensions/gsd/tests/worktree-resolver.test.ts +34 -0
  287. package/src/resources/extensions/gsd/tests/worktree-submodule-safety.test.ts +1 -1
  288. package/src/resources/extensions/gsd/tests/worktree-teardown-safety.test.ts +148 -0
  289. package/src/resources/extensions/gsd/tools/complete-milestone.ts +34 -20
  290. package/src/resources/extensions/gsd/tools/complete-slice.ts +41 -26
  291. package/src/resources/extensions/gsd/tools/complete-task.ts +12 -12
  292. package/src/resources/extensions/gsd/tools/plan-milestone.ts +55 -30
  293. package/src/resources/extensions/gsd/tools/plan-slice.ts +13 -8
  294. package/src/resources/extensions/gsd/types.ts +44 -22
  295. package/src/resources/extensions/gsd/workflow-projections.ts +23 -5
  296. package/src/resources/extensions/gsd/worktree-manager.ts +76 -28
  297. package/src/resources/extensions/gsd/worktree-resolver.ts +4 -3
  298. package/src/resources/extensions/mcp-client/auth.ts +149 -0
  299. package/src/resources/extensions/mcp-client/index.ts +16 -1
  300. package/src/resources/extensions/ollama/index.ts +6 -14
  301. package/src/resources/extensions/ollama/model-capabilities.ts +41 -34
  302. package/src/resources/extensions/ollama/ndjson-stream.ts +63 -0
  303. package/src/resources/extensions/ollama/ollama-chat-provider.ts +459 -0
  304. package/src/resources/extensions/ollama/ollama-client.ts +30 -30
  305. package/src/resources/extensions/ollama/ollama-discovery.ts +5 -8
  306. package/src/resources/extensions/ollama/ollama-tool.ts +69 -0
  307. package/src/resources/extensions/ollama/tests/ollama-discovery.test.ts +0 -27
  308. package/src/resources/extensions/ollama/thinking-parser.ts +116 -0
  309. package/src/resources/extensions/ollama/types.ts +23 -0
  310. package/dist/web/standalone/.next/server/chunks/2229.js +0 -12
  311. /package/dist/web/standalone/.next/static/{TTlAguZQ5vR9EOv6G8cel → QmuF-eAbuU_2MQ03t38qr}/_buildManifest.js +0 -0
  312. /package/dist/web/standalone/.next/static/{TTlAguZQ5vR9EOv6G8cel → QmuF-eAbuU_2MQ03t38qr}/_ssgManifest.js +0 -0
@@ -0,0 +1,203 @@
1
+ /**
2
+ * Regression test for #2661: Auto-mode dispatches deferred slices.
3
+ *
4
+ * When a decision defers a slice, the dispatcher must skip it and advance
5
+ * to the next eligible slice. This tests both:
6
+ * 1. deriveStateFromDb skips slices with status "deferred"
7
+ * 2. saveDecisionToDb updates the slice status when the decision is a deferral
8
+ */
9
+
10
+ import { describe, test } from "node:test";
11
+ import assert from "node:assert/strict";
12
+ import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
13
+ import { join } from "node:path";
14
+ import { tmpdir } from "node:os";
15
+
16
+ import { deriveStateFromDb, invalidateStateCache } from "../state.ts";
17
+ import {
18
+ openDatabase,
19
+ closeDatabase,
20
+ isDbAvailable,
21
+ insertMilestone,
22
+ insertSlice,
23
+ insertTask,
24
+ insertArtifact,
25
+ updateSliceStatus,
26
+ } from "../gsd-db.ts";
27
+ import { isDeferredStatus } from "../status-guards.ts";
28
+
29
+ // ─── Helpers ──────────────────────────────────────────────────────────────
30
+
31
+ function createFixtureBase(): string {
32
+ const base = mkdtempSync(join(tmpdir(), "gsd-deferred-dispatch-"));
33
+ mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
34
+ return base;
35
+ }
36
+
37
+ function writeFile(base: string, relativePath: string, content: string): void {
38
+ const full = join(base, ".gsd", relativePath);
39
+ mkdirSync(join(full, ".."), { recursive: true });
40
+ writeFileSync(full, content);
41
+ }
42
+
43
+ function cleanup(base: string): void {
44
+ rmSync(base, { recursive: true, force: true });
45
+ }
46
+
47
+ // ─── Tests ────────────────────────────────────────────────────────────────
48
+
49
+ describe("deferred-slice-dispatch (#2661)", () => {
50
+ test("isDeferredStatus returns true for 'deferred'", () => {
51
+ assert.ok(isDeferredStatus("deferred"), "should recognize 'deferred'");
52
+ assert.ok(!isDeferredStatus("active"), "should not match 'active'");
53
+ assert.ok(!isDeferredStatus("complete"), "should not match 'complete'");
54
+ assert.ok(!isDeferredStatus("pending"), "should not match 'pending'");
55
+ });
56
+
57
+ test("deriveStateFromDb skips deferred slice and picks next eligible", async () => {
58
+ const base = createFixtureBase();
59
+ try {
60
+ openDatabase(":memory:");
61
+ assert.ok(isDbAvailable());
62
+
63
+ // M001 with three slices: S01 complete, S02 deferred, S03 pending
64
+ insertMilestone({ id: "M001", title: "Test Milestone", status: "active" });
65
+
66
+ insertSlice({ id: "S01", milestoneId: "M001", title: "Done Slice", status: "complete", risk: "low", depends: [] });
67
+ insertSlice({ id: "S02", milestoneId: "M001", title: "Deferred Slice", status: "deferred", risk: "low", depends: [] });
68
+ insertSlice({ id: "S03", milestoneId: "M001", title: "Next Slice", status: "pending", risk: "low", depends: [] });
69
+
70
+ // S01 needs a SUMMARY file to count as complete for milestone-level checks
71
+ writeFile(base, "milestones/M001/M001-ROADMAP.md", `# M001: Test Milestone
72
+
73
+ **Vision:** Test deferred slices.
74
+
75
+ ## Slices
76
+
77
+ - [x] **S01: Done Slice** \`risk:low\` \`depends:[]\`
78
+ > Done.
79
+
80
+ - [ ] **S02: Deferred Slice** \`risk:low\` \`depends:[]\`
81
+ > Deferred.
82
+
83
+ - [ ] **S03: Next Slice** \`risk:low\` \`depends:[]\`
84
+ > Next.
85
+ `);
86
+ writeFile(base, "milestones/M001/slices/S01/S01-SUMMARY.md", "# S01 Summary\nDone.");
87
+
88
+ invalidateStateCache();
89
+ const state = await deriveStateFromDb(base);
90
+
91
+ // The active slice must be S03, NOT S02 (which is deferred)
92
+ assert.equal(state.activeMilestone?.id, "M001", "active milestone is M001");
93
+ assert.equal(state.activeSlice?.id, "S03", "active slice should skip deferred S02 and land on S03");
94
+ assert.notEqual(state.activeSlice?.id, "S02", "active slice must NOT be the deferred S02");
95
+
96
+ closeDatabase();
97
+ } finally {
98
+ closeDatabase();
99
+ cleanup(base);
100
+ }
101
+ });
102
+
103
+ test("deriveStateFromDb does not count deferred slices as done for progress", async () => {
104
+ const base = createFixtureBase();
105
+ try {
106
+ openDatabase(":memory:");
107
+
108
+ insertMilestone({ id: "M001", title: "Test", status: "active" });
109
+ insertSlice({ id: "S01", milestoneId: "M001", title: "Complete", status: "complete", risk: "low", depends: [] });
110
+ insertSlice({ id: "S02", milestoneId: "M001", title: "Deferred", status: "deferred", risk: "low", depends: [] });
111
+ insertSlice({ id: "S03", milestoneId: "M001", title: "Pending", status: "pending", risk: "low", depends: [] });
112
+
113
+ writeFile(base, "milestones/M001/M001-ROADMAP.md", `# M001
114
+ ## Slices
115
+ - [x] **S01: Complete** \`risk:low\` \`depends:[]\`
116
+ - [ ] **S02: Deferred** \`risk:low\` \`depends:[]\`
117
+ - [ ] **S03: Pending** \`risk:low\` \`depends:[]\`
118
+ `);
119
+ writeFile(base, "milestones/M001/slices/S01/S01-SUMMARY.md", "# Done");
120
+
121
+ invalidateStateCache();
122
+ const state = await deriveStateFromDb(base);
123
+
124
+ // Deferred slices should not count as "done" in progress
125
+ // Only S01 (complete) counts as done
126
+ assert.equal(state.progress?.slices?.done, 1, "only 1 slice (S01) should be done");
127
+ // Total should still be 3 (deferred slices are still part of the milestone)
128
+ assert.equal(state.progress?.slices?.total, 3, "all 3 slices counted in total");
129
+
130
+ closeDatabase();
131
+ } finally {
132
+ closeDatabase();
133
+ cleanup(base);
134
+ }
135
+ });
136
+
137
+ test("all slices deferred results in blocked state", async () => {
138
+ const base = createFixtureBase();
139
+ try {
140
+ openDatabase(":memory:");
141
+
142
+ insertMilestone({ id: "M001", title: "Test", status: "active" });
143
+ insertSlice({ id: "S01", milestoneId: "M001", title: "Deferred A", status: "deferred", risk: "low", depends: [] });
144
+ insertSlice({ id: "S02", milestoneId: "M001", title: "Deferred B", status: "deferred", risk: "low", depends: [] });
145
+
146
+ writeFile(base, "milestones/M001/M001-ROADMAP.md", `# M001
147
+ ## Slices
148
+ - [ ] **S01: Deferred A** \`risk:low\` \`depends:[]\`
149
+ - [ ] **S02: Deferred B** \`risk:low\` \`depends:[]\`
150
+ `);
151
+
152
+ invalidateStateCache();
153
+ const state = await deriveStateFromDb(base);
154
+
155
+ // No eligible slice — should be blocked
156
+ assert.equal(state.activeSlice, null, "no active slice when all deferred");
157
+ assert.equal(state.phase, "blocked", "phase should be blocked when all slices deferred");
158
+
159
+ closeDatabase();
160
+ } finally {
161
+ closeDatabase();
162
+ cleanup(base);
163
+ }
164
+ });
165
+
166
+ test("saveDecisionToDb marks slice as deferred when decision is a deferral", async () => {
167
+ const base = createFixtureBase();
168
+ try {
169
+ openDatabase(":memory:");
170
+
171
+ insertMilestone({ id: "M001", title: "Test", status: "active" });
172
+ insertSlice({ id: "S03", milestoneId: "M001", title: "Target Slice", status: "active", risk: "low", depends: [] });
173
+
174
+ writeFile(base, "milestones/M001/M001-ROADMAP.md", `# M001
175
+ ## Slices
176
+ - [ ] **S03: Target Slice** \`risk:low\` \`depends:[]\`
177
+ `);
178
+
179
+ const { saveDecisionToDb } = await import("../db-writer.ts");
180
+ const { getSlice } = await import("../gsd-db.ts");
181
+
182
+ // Save a deferral decision that references M001/S03
183
+ await saveDecisionToDb(
184
+ {
185
+ scope: "deferral",
186
+ decision: "Defer S03 to focus on higher priority work",
187
+ choice: "defer M001/S03",
188
+ rationale: "Not ready yet",
189
+ },
190
+ base,
191
+ );
192
+
193
+ // The slice status should now be "deferred"
194
+ const slice = getSlice("M001", "S03");
195
+ assert.equal(slice?.status, "deferred", "slice status should be updated to 'deferred' after deferral decision");
196
+
197
+ closeDatabase();
198
+ } finally {
199
+ closeDatabase();
200
+ cleanup(base);
201
+ }
202
+ });
203
+ });
@@ -0,0 +1,130 @@
1
+ /**
2
+ * discuss-tool-scoping.test.ts — Tests for #2949.
3
+ *
4
+ * xAI/Grok returns "Grammar is too complex" (400) when the combined tool
5
+ * schemas exceed the provider's grammar limit. The GSD discuss flow only
6
+ * needs a small subset of tools (summary_save, decision_save, etc.), but
7
+ * was sending ALL ~30+ tools to the provider.
8
+ *
9
+ * These tests verify:
10
+ * 1. DISCUSS_TOOLS_ALLOWLIST is exported and contains only the tools
11
+ * needed during discuss flows (no heavy planning/execution/completion tools).
12
+ * 2. Heavy execution tools are NOT in the allowlist.
13
+ * 3. The allowlist includes the tools actually referenced by discuss prompts.
14
+ * 4. dispatchWorkflow scopes tools when unitType is a discuss variant.
15
+ */
16
+
17
+ import { describe, test } from "node:test";
18
+ import assert from "node:assert/strict";
19
+ import { readFileSync } from "node:fs";
20
+ import { join, dirname } from "node:path";
21
+ import { fileURLToPath } from "node:url";
22
+
23
+ import { DISCUSS_TOOLS_ALLOWLIST } from "../constants.ts";
24
+
25
+ const __dirname = dirname(fileURLToPath(import.meta.url));
26
+ const promptsDir = join(__dirname, "..", "prompts");
27
+ const guidedFlowPath = join(__dirname, "..", "guided-flow.ts");
28
+
29
+ // ─── Heavy tools that should NOT be in discuss scope ─────────────────────────
30
+
31
+ /** Tools that are only needed during planning, execution, or completion phases */
32
+ const HEAVY_TOOLS = [
33
+ "gsd_plan_slice",
34
+ "gsd_slice_plan",
35
+ "gsd_plan_task",
36
+ "gsd_task_plan",
37
+ "gsd_task_complete",
38
+ "gsd_complete_task",
39
+ "gsd_slice_complete",
40
+ "gsd_complete_slice",
41
+ "gsd_complete_milestone",
42
+ "gsd_milestone_complete",
43
+ "gsd_validate_milestone",
44
+ "gsd_milestone_validate",
45
+ "gsd_replan_slice",
46
+ "gsd_slice_replan",
47
+ "gsd_reassess_roadmap",
48
+ "gsd_roadmap_reassess",
49
+ "gsd_save_gate_result",
50
+ ];
51
+
52
+ // ─── Tools that discuss prompts reference ────────────────────────────────────
53
+
54
+ /** Tools explicitly called by discuss prompt templates */
55
+ const DISCUSS_REQUIRED_TOOLS = [
56
+ "gsd_summary_save", // guided-discuss-slice.md, guided-discuss-milestone.md, discuss.md
57
+ "gsd_decision_save", // discuss.md output phase
58
+ "gsd_plan_milestone", // discuss.md output phase (single + multi milestone)
59
+ "gsd_milestone_generate_id", // discuss.md multi-milestone Phase 1
60
+ "gsd_requirement_update", // used during discuss for requirement updates
61
+ ];
62
+
63
+ // ─── Tests ───────────────────────────────────────────────────────────────────
64
+
65
+ describe("discuss tool scoping (#2949)", () => {
66
+ test("DISCUSS_TOOLS_ALLOWLIST is exported and non-empty", () => {
67
+ assert.ok(Array.isArray(DISCUSS_TOOLS_ALLOWLIST), "should be an array");
68
+ assert.ok(DISCUSS_TOOLS_ALLOWLIST.length > 0, "should not be empty");
69
+ });
70
+
71
+ test("DISCUSS_TOOLS_ALLOWLIST excludes heavy execution/completion tools", () => {
72
+ for (const heavy of HEAVY_TOOLS) {
73
+ assert.ok(
74
+ !DISCUSS_TOOLS_ALLOWLIST.includes(heavy),
75
+ `allowlist should NOT include heavy tool "${heavy}"`,
76
+ );
77
+ }
78
+ });
79
+
80
+ test("DISCUSS_TOOLS_ALLOWLIST includes tools referenced by discuss prompts", () => {
81
+ for (const required of DISCUSS_REQUIRED_TOOLS) {
82
+ assert.ok(
83
+ DISCUSS_TOOLS_ALLOWLIST.includes(required),
84
+ `allowlist should include "${required}" (used by discuss prompts)`,
85
+ );
86
+ }
87
+ });
88
+
89
+ test("DISCUSS_TOOLS_ALLOWLIST is significantly smaller than full tool set", () => {
90
+ // Full set is 27 DB tools + dynamic + journal = 33+
91
+ // Discuss set should be roughly 10 GSD tools (5 canonical + 5 aliases)
92
+ assert.ok(
93
+ DISCUSS_TOOLS_ALLOWLIST.length <= 12,
94
+ `allowlist should have at most 12 GSD tools, got ${DISCUSS_TOOLS_ALLOWLIST.length}`,
95
+ );
96
+ });
97
+
98
+ test("guided-discuss-slice.md references gsd_summary_save", () => {
99
+ const prompt = readFileSync(join(promptsDir, "guided-discuss-slice.md"), "utf-8");
100
+ assert.ok(
101
+ prompt.includes("gsd_summary_save"),
102
+ "guided-discuss-slice.md should reference gsd_summary_save",
103
+ );
104
+ });
105
+
106
+ test("discuss.md references gsd_plan_milestone and gsd_decision_save", () => {
107
+ const prompt = readFileSync(join(promptsDir, "discuss.md"), "utf-8");
108
+ assert.ok(
109
+ prompt.includes("gsd_plan_milestone"),
110
+ "discuss.md should reference gsd_plan_milestone",
111
+ );
112
+ assert.ok(
113
+ prompt.includes("gsd_decision_save"),
114
+ "discuss.md should reference gsd_decision_save",
115
+ );
116
+ });
117
+
118
+ test("dispatchWorkflow source code scopes tools for discuss unit types", () => {
119
+ const source = readFileSync(guidedFlowPath, "utf-8");
120
+ // Verify that dispatchWorkflow references the allowlist for tool scoping
121
+ assert.ok(
122
+ source.includes("DISCUSS_TOOLS_ALLOWLIST"),
123
+ "guided-flow.ts should reference DISCUSS_TOOLS_ALLOWLIST for tool scoping",
124
+ );
125
+ assert.ok(
126
+ source.includes("setActiveTools"),
127
+ "guided-flow.ts should call setActiveTools to scope tools during discuss",
128
+ );
129
+ });
130
+ });
@@ -0,0 +1,92 @@
1
+ /**
2
+ * Regression test for #1919: --fix flag not stripped before positional parse.
3
+ *
4
+ * parseDoctorArgs("--fix") must:
5
+ * 1. Set fixFlag = true
6
+ * 2. Not leak "--fix" into requestedScope
7
+ * 3. Keep mode as "doctor" (the flag is not a positional subcommand)
8
+ */
9
+
10
+ import { parseDoctorArgs } from "../commands-handlers.js";
11
+ import { createTestContext } from "./test-helpers.ts";
12
+
13
+ const { assertEq, assertTrue, report } = createTestContext();
14
+
15
+ async function main(): Promise<void> {
16
+ // ── 1. Bare --fix flag ──────────────────────────────────────────────────────
17
+ console.log("\n=== bare --fix flag (#1919) ===");
18
+ {
19
+ const r = parseDoctorArgs("--fix");
20
+ assertTrue(r.fixFlag, "--fix sets fixFlag to true");
21
+ assertEq(r.mode, "doctor", "--fix does not change mode from doctor");
22
+ assertEq(r.requestedScope, undefined, "--fix is stripped and does not become requestedScope");
23
+ }
24
+
25
+ // ── 2. --fix with a scope ──────────────────────────────────────────────────
26
+ console.log("\n=== --fix with scope ===");
27
+ {
28
+ const r = parseDoctorArgs("--fix M001/S01");
29
+ assertTrue(r.fixFlag, "--fix M001/S01 sets fixFlag to true");
30
+ assertEq(r.mode, "doctor", "--fix M001/S01 keeps mode as doctor");
31
+ assertEq(r.requestedScope, "M001/S01", "scope is M001/S01 after stripping --fix");
32
+ }
33
+
34
+ // ── 3. Positional fix still works ──────────────────────────────────────────
35
+ console.log("\n=== positional fix subcommand ===");
36
+ {
37
+ const r = parseDoctorArgs("fix");
38
+ assertEq(r.fixFlag, false, "positional fix does not set fixFlag");
39
+ assertEq(r.mode, "fix", "positional fix sets mode to fix");
40
+ assertEq(r.requestedScope, undefined, "no scope with bare positional fix");
41
+ }
42
+
43
+ // ── 4. Positional fix with scope ───────────────────────────────────────────
44
+ console.log("\n=== positional fix with scope ===");
45
+ {
46
+ const r = parseDoctorArgs("fix M001");
47
+ assertEq(r.mode, "fix", "fix M001 sets mode to fix");
48
+ assertEq(r.requestedScope, "M001", "fix M001 parses scope as M001");
49
+ }
50
+
51
+ // ── 5. --fix combined with other flags ─────────────────────────────────────
52
+ console.log("\n=== --fix combined with --dry-run ===");
53
+ {
54
+ const r = parseDoctorArgs("--fix --dry-run");
55
+ assertTrue(r.fixFlag, "--fix --dry-run sets fixFlag");
56
+ assertTrue(r.dryRun, "--fix --dry-run sets dryRun");
57
+ assertEq(r.requestedScope, undefined, "no scope leaked from combined flags");
58
+ }
59
+
60
+ // ── 6. --fix combined with --json ──────────────────────────────────────────
61
+ console.log("\n=== --fix with --json ===");
62
+ {
63
+ const r = parseDoctorArgs("--fix --json");
64
+ assertTrue(r.fixFlag, "--fix --json sets fixFlag");
65
+ assertTrue(r.jsonMode, "--fix --json sets jsonMode");
66
+ assertEq(r.requestedScope, undefined, "no scope leaked from --fix --json");
67
+ }
68
+
69
+ // ── 7. Empty args (baseline) ───────────────────────────────────────────────
70
+ console.log("\n=== empty args baseline ===");
71
+ {
72
+ const r = parseDoctorArgs("");
73
+ assertEq(r.fixFlag, false, "empty args: fixFlag false");
74
+ assertEq(r.mode, "doctor", "empty args: mode is doctor");
75
+ assertEq(r.requestedScope, undefined, "empty args: no scope");
76
+ }
77
+
78
+ // ── 8. heal and audit modes unaffected ─────────────────────────────────────
79
+ console.log("\n=== heal and audit modes ===");
80
+ {
81
+ const rh = parseDoctorArgs("heal M001/S01");
82
+ assertEq(rh.mode, "heal", "heal mode parsed correctly");
83
+ assertEq(rh.requestedScope, "M001/S01", "heal scope parsed correctly");
84
+
85
+ const ra = parseDoctorArgs("audit");
86
+ assertEq(ra.mode, "audit", "audit mode parsed correctly");
87
+ }
88
+
89
+ report();
90
+ }
91
+
92
+ main();
@@ -0,0 +1,116 @@
1
+ /**
2
+ * Regression test for #2344: Auto-loop hangs after plan-slice completes
3
+ * because postUnitPostVerification() never resolves.
4
+ *
5
+ * When postUnitPostVerification() hangs (e.g., due to a module import
6
+ * deadlock or SQLite transaction hang), the auto-loop blocks forever
7
+ * with no error message, no notification, and no recovery.
8
+ *
9
+ * The fix adds a timeout guard around postUnitPostVerification() in
10
+ * runFinalize(). If it doesn't resolve within the timeout, the function
11
+ * force-returns "continue" and logs an error, allowing the loop to
12
+ * proceed to the next iteration.
13
+ *
14
+ * This test verifies the timeout utility used by the fix, since the
15
+ * full runFinalize function has too many transitive dependencies for
16
+ * isolated unit testing.
17
+ */
18
+
19
+ import { createTestContext } from "./test-helpers.ts";
20
+ import {
21
+ withTimeout,
22
+ FINALIZE_POST_TIMEOUT_MS,
23
+ } from "../auto/finalize-timeout.ts";
24
+
25
+ const { assertTrue, assertEq, report } = createTestContext();
26
+
27
+ // ═══ Test: withTimeout resolves when inner promise resolves promptly ══════════
28
+
29
+ {
30
+ console.log("\n=== #2344: withTimeout passes through when promise resolves ===");
31
+
32
+ const result = await withTimeout(
33
+ Promise.resolve("ok"),
34
+ 1000,
35
+ "test-timeout",
36
+ );
37
+ assertEq(result.value, "ok", "should return inner value");
38
+ assertEq(result.timedOut, false, "should not be timed out");
39
+ }
40
+
41
+ // ═══ Test: withTimeout returns fallback when inner promise hangs ══════════════
42
+
43
+ {
44
+ console.log("\n=== #2344: withTimeout returns fallback on hang ===");
45
+
46
+ const startTime = Date.now();
47
+ const result = await withTimeout(
48
+ new Promise<string>(() => {
49
+ // Never resolves
50
+ }),
51
+ 100, // short timeout for testing
52
+ "test-timeout",
53
+ );
54
+ const elapsed = Date.now() - startTime;
55
+
56
+ assertEq(result.timedOut, true, "should report timeout");
57
+ assertEq(result.value, undefined, "value should be undefined on timeout");
58
+ assertTrue(elapsed >= 90, `should wait at least 90ms (took ${elapsed}ms)`);
59
+ assertTrue(elapsed < 500, `should not wait too long (took ${elapsed}ms)`);
60
+ }
61
+
62
+ // ═══ Test: withTimeout handles rejection gracefully ═══════════════════════════
63
+
64
+ {
65
+ console.log("\n=== #2344: withTimeout propagates rejection ===");
66
+
67
+ let caught = false;
68
+ try {
69
+ await withTimeout(
70
+ Promise.reject(new Error("boom")),
71
+ 1000,
72
+ "test-timeout",
73
+ );
74
+ } catch (err: any) {
75
+ caught = true;
76
+ assertEq(err.message, "boom", "should propagate the error");
77
+ }
78
+ assertTrue(caught, "rejection should propagate");
79
+ }
80
+
81
+ // ═══ Test: FINALIZE_POST_TIMEOUT_MS is defined and reasonable ═════════════════
82
+
83
+ {
84
+ console.log("\n=== #2344: timeout constant is defined and reasonable ===");
85
+
86
+ assertTrue(
87
+ typeof FINALIZE_POST_TIMEOUT_MS === "number",
88
+ "FINALIZE_POST_TIMEOUT_MS should be a number",
89
+ );
90
+ assertTrue(
91
+ FINALIZE_POST_TIMEOUT_MS >= 30_000,
92
+ `timeout should be >= 30s (got ${FINALIZE_POST_TIMEOUT_MS}ms)`,
93
+ );
94
+ assertTrue(
95
+ FINALIZE_POST_TIMEOUT_MS <= 120_000,
96
+ `timeout should be <= 120s (got ${FINALIZE_POST_TIMEOUT_MS}ms)`,
97
+ );
98
+ }
99
+
100
+ // ═══ Test: withTimeout cleans up timer on success ════════════════════════════
101
+
102
+ {
103
+ console.log("\n=== #2344: withTimeout cleans up timer on success ===");
104
+
105
+ // If the timer isn't cleaned up, this test would keep the process alive.
106
+ // Relying on process.exit behavior — if test completes, timers were cleaned.
107
+ const result = await withTimeout(
108
+ new Promise<string>((r) => setTimeout(() => r("delayed"), 50)),
109
+ 5000,
110
+ "cleanup-test",
111
+ );
112
+ assertEq(result.value, "delayed", "should resolve with delayed value");
113
+ assertEq(result.timedOut, false, "should not time out");
114
+ }
115
+
116
+ report();
@@ -0,0 +1,103 @@
1
+ /**
2
+ * Forensics detectStuckLoops tests — #1943
3
+ *
4
+ * Verifies that detectStuckLoops counts distinct dispatches (unique startedAt
5
+ * values per type/id) instead of raw entry count, which produces false-positive
6
+ * stuck-loop anomalies when idle-watchdog duplicate metrics entries exist.
7
+ */
8
+
9
+ import test from "node:test";
10
+ import assert from "node:assert/strict";
11
+ import type { UnitMetrics } from "../metrics.js";
12
+ import { detectStuckLoops, type ForensicAnomaly } from "../forensics.js";
13
+
14
+ // ── Helpers ──────────────────────────────────────────────────────────────────
15
+
16
+ function makeUnit(overrides: Partial<UnitMetrics> = {}): UnitMetrics {
17
+ return {
18
+ type: "execute-task",
19
+ id: "M001/S01/T01",
20
+ model: "claude-sonnet-4-20250514",
21
+ startedAt: 1000,
22
+ finishedAt: 2000,
23
+ tokens: { input: 1000, output: 500, cacheRead: 200, cacheWrite: 100, total: 1800 },
24
+ cost: 0.05,
25
+ toolCalls: 3,
26
+ assistantMessages: 2,
27
+ userMessages: 1,
28
+ ...overrides,
29
+ };
30
+ }
31
+
32
+
33
+ // ── Tests ────────────────────────────────────────────────────────────────────
34
+
35
+ test("#1943 detectStuckLoops does not flag idle-watchdog duplicates as stuck loops", () => {
36
+ const anomalies: ForensicAnomaly[] = [];
37
+ const startedAt = 1774011016218;
38
+
39
+ // 20 entries with the SAME startedAt — these are idle-watchdog duplicates,
40
+ // not real re-dispatches. They should count as 1 dispatch.
41
+ const units: UnitMetrics[] = [];
42
+ for (let i = 0; i < 20; i++) {
43
+ units.push(makeUnit({
44
+ type: "research-slice",
45
+ id: "M009/S02",
46
+ startedAt,
47
+ finishedAt: startedAt + (i + 1) * 15000,
48
+ cost: 1.50 + i * 0.05,
49
+ toolCalls: 0,
50
+ }));
51
+ }
52
+
53
+ detectStuckLoops(units, anomalies);
54
+
55
+ // A single dispatch (same startedAt) should NOT trigger a stuck-loop anomaly
56
+ assert.equal(
57
+ anomalies.length, 0,
58
+ `expected 0 anomalies for 20 watchdog snapshots of the same dispatch, got ${anomalies.length}: ${anomalies.map(a => a.summary).join(", ")}`,
59
+ );
60
+ });
61
+
62
+ test("#1943 detectStuckLoops correctly flags real re-dispatches", () => {
63
+ const anomalies: ForensicAnomaly[] = [];
64
+
65
+ // 3 entries with DIFFERENT startedAt values — these are real re-dispatches
66
+ const units: UnitMetrics[] = [
67
+ makeUnit({ type: "execute-task", id: "M001/S01/T01", startedAt: 1000, finishedAt: 2000, cost: 0.05 }),
68
+ makeUnit({ type: "execute-task", id: "M001/S01/T01", startedAt: 3000, finishedAt: 4000, cost: 0.06 }),
69
+ makeUnit({ type: "execute-task", id: "M001/S01/T01", startedAt: 5000, finishedAt: 6000, cost: 0.07 }),
70
+ ];
71
+
72
+ detectStuckLoops(units, anomalies);
73
+
74
+ assert.equal(anomalies.length, 1, "3 distinct dispatches of the same unit should flag 1 anomaly");
75
+ assert.equal(anomalies[0].type, "stuck-loop");
76
+ assert.ok(anomalies[0].summary.includes("3 times"), `summary should mention 3 dispatches: ${anomalies[0].summary}`);
77
+ });
78
+
79
+ test("#1943 detectStuckLoops ignores watchdog duplicates but flags real re-dispatches in mixed data", () => {
80
+ const anomalies: ForensicAnomaly[] = [];
81
+
82
+ const units: UnitMetrics[] = [
83
+ // 5 watchdog duplicates for dispatch 1 (same startedAt = 1000)
84
+ ...Array.from({ length: 5 }, (_, i) =>
85
+ makeUnit({ type: "execute-task", id: "M001/S01/T01", startedAt: 1000, finishedAt: 1000 + (i + 1) * 15000, cost: 0.05 + i * 0.01 }),
86
+ ),
87
+ // 3 watchdog duplicates for dispatch 2 (same startedAt = 100000)
88
+ ...Array.from({ length: 3 }, (_, i) =>
89
+ makeUnit({ type: "execute-task", id: "M001/S01/T01", startedAt: 100000, finishedAt: 100000 + (i + 1) * 15000, cost: 0.08 + i * 0.01 }),
90
+ ),
91
+ // 1 entry for dispatch 3 (startedAt = 200000)
92
+ makeUnit({ type: "execute-task", id: "M001/S01/T01", startedAt: 200000, finishedAt: 260000, cost: 0.10 }),
93
+ // Different unit — only 1 dispatch, should NOT be flagged
94
+ makeUnit({ type: "plan-slice", id: "M001/S01", startedAt: 500, finishedAt: 1500, cost: 0.02 }),
95
+ ];
96
+
97
+ detectStuckLoops(units, anomalies);
98
+
99
+ // M001/S01/T01 has 3 distinct dispatches (startedAt: 1000, 100000, 200000) — should be flagged
100
+ // M001/S01 has 1 dispatch — should NOT be flagged
101
+ assert.equal(anomalies.length, 1, `expected 1 anomaly (for the 3x dispatched task), got ${anomalies.length}`);
102
+ assert.ok(anomalies[0].summary.includes("3 times"));
103
+ });