sofia-cli 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/.github/agents/copilot-instructions.md +39 -0
  2. package/.github/agents/speckit.analyze.agent.md +184 -0
  3. package/.github/agents/speckit.checklist.agent.md +294 -0
  4. package/.github/agents/speckit.clarify.agent.md +181 -0
  5. package/.github/agents/speckit.constitution.agent.md +84 -0
  6. package/.github/agents/speckit.implement.agent.md +135 -0
  7. package/.github/agents/speckit.plan.agent.md +90 -0
  8. package/.github/agents/speckit.specify.agent.md +258 -0
  9. package/.github/agents/speckit.tasks.agent.md +137 -0
  10. package/.github/agents/speckit.taskstoissues.agent.md +30 -0
  11. package/.github/copilot-instructions.md +257 -0
  12. package/.github/prompts/speckit.analyze.prompt.md +3 -0
  13. package/.github/prompts/speckit.checklist.prompt.md +3 -0
  14. package/.github/prompts/speckit.clarify.prompt.md +3 -0
  15. package/.github/prompts/speckit.constitution.prompt.md +3 -0
  16. package/.github/prompts/speckit.implement.prompt.md +3 -0
  17. package/.github/prompts/speckit.plan.prompt.md +3 -0
  18. package/.github/prompts/speckit.specify.prompt.md +3 -0
  19. package/.github/prompts/speckit.tasks.prompt.md +3 -0
  20. package/.github/prompts/speckit.taskstoissues.prompt.md +3 -0
  21. package/.github/workflows/ci.yml +38 -0
  22. package/.prettierrc +6 -0
  23. package/.specify/memory/constitution.md +181 -0
  24. package/.specify/scripts/bash/check-prerequisites.sh +166 -0
  25. package/.specify/scripts/bash/common.sh +156 -0
  26. package/.specify/scripts/bash/create-new-feature.sh +297 -0
  27. package/.specify/scripts/bash/setup-plan.sh +61 -0
  28. package/.specify/scripts/bash/update-agent-context.sh +810 -0
  29. package/.specify/templates/agent-file-template.md +28 -0
  30. package/.specify/templates/checklist-template.md +40 -0
  31. package/.specify/templates/constitution-template.md +50 -0
  32. package/.specify/templates/plan-template.md +113 -0
  33. package/.specify/templates/spec-template.md +115 -0
  34. package/.specify/templates/tasks-template.md +251 -0
  35. package/.vscode/mcp.json +42 -0
  36. package/.vscode/settings.json +19 -0
  37. package/CODE_OF_CONDUCT.md +128 -0
  38. package/LICENSE +21 -0
  39. package/README.md +213 -0
  40. package/dist/src/cli/developCommand.js +240 -0
  41. package/dist/src/cli/directCommands.js +143 -0
  42. package/dist/src/cli/envLoader.js +16 -0
  43. package/dist/src/cli/exportCommand.js +53 -0
  44. package/dist/src/cli/index.js +203 -0
  45. package/dist/src/cli/ioContext.js +109 -0
  46. package/dist/src/cli/preflight.js +57 -0
  47. package/dist/src/cli/statusCommand.js +110 -0
  48. package/dist/src/cli/workshopCommand.js +400 -0
  49. package/dist/src/develop/checkpointState.js +86 -0
  50. package/dist/src/develop/codeGenerator.js +319 -0
  51. package/dist/src/develop/dynamicScaffolder.js +226 -0
  52. package/dist/src/develop/githubMcpAdapter.js +122 -0
  53. package/dist/src/develop/index.js +15 -0
  54. package/dist/src/develop/mcpContextEnricher.js +195 -0
  55. package/dist/src/develop/pocScaffolder.js +542 -0
  56. package/dist/src/develop/ralphLoop.js +659 -0
  57. package/dist/src/develop/templateRegistry.js +364 -0
  58. package/dist/src/develop/testRunner.js +202 -0
  59. package/dist/src/logging/logger.js +58 -0
  60. package/dist/src/loop/conversationLoop.js +227 -0
  61. package/dist/src/loop/phaseSummarizer.js +87 -0
  62. package/dist/src/mcp/mcpManager.js +267 -0
  63. package/dist/src/mcp/mcpTransport.js +391 -0
  64. package/dist/src/mcp/retryPolicy.js +47 -0
  65. package/dist/src/mcp/webSearch.js +254 -0
  66. package/dist/src/phases/contextSummarizer.js +101 -0
  67. package/dist/src/phases/discoveryEnricher.js +156 -0
  68. package/dist/src/phases/phaseExtractors.js +222 -0
  69. package/dist/src/phases/phaseHandlers.js +328 -0
  70. package/dist/src/prompts/design.md +51 -0
  71. package/dist/src/prompts/develop-boundary.md +51 -0
  72. package/dist/src/prompts/develop.md +111 -0
  73. package/dist/src/prompts/discover.md +58 -0
  74. package/dist/src/prompts/ideate.md +56 -0
  75. package/dist/src/prompts/plan.md +51 -0
  76. package/dist/src/prompts/promptLoader.js +167 -0
  77. package/dist/src/prompts/promptLoader.ts +198 -0
  78. package/dist/src/prompts/select.md +47 -0
  79. package/dist/src/prompts/summarize/README.md +8 -0
  80. package/dist/src/prompts/summarize/design-summary.md +37 -0
  81. package/dist/src/prompts/summarize/develop-summary.md +25 -0
  82. package/dist/src/prompts/summarize/ideate-summary.md +27 -0
  83. package/dist/src/prompts/summarize/plan-summary.md +27 -0
  84. package/dist/src/prompts/summarize/select-summary.md +21 -0
  85. package/dist/src/prompts/system.md +28 -0
  86. package/dist/src/sessions/exportPaths.js +22 -0
  87. package/dist/src/sessions/exportWriter.js +406 -0
  88. package/dist/src/sessions/sessionManager.js +81 -0
  89. package/dist/src/sessions/sessionStore.js +65 -0
  90. package/dist/src/shared/activitySpinner.js +91 -0
  91. package/dist/src/shared/copilotClient.js +129 -0
  92. package/dist/src/shared/data/cards.json +1249 -0
  93. package/dist/src/shared/data/cardsLoader.js +51 -0
  94. package/dist/src/shared/errorClassifier.js +120 -0
  95. package/dist/src/shared/events.js +28 -0
  96. package/dist/src/shared/markdownRenderer.js +34 -0
  97. package/dist/src/shared/schemas/session.js +265 -0
  98. package/dist/src/shared/tableRenderer.js +20 -0
  99. package/dist/src/vendor/chalk.js +2 -0
  100. package/dist/src/vendor/cli-table3.js +3 -0
  101. package/dist/src/vendor/commander.js +2 -0
  102. package/dist/src/vendor/marked-terminal.js +3 -0
  103. package/dist/src/vendor/marked.js +2 -0
  104. package/dist/src/vendor/ora.js +2 -0
  105. package/dist/src/vendor/pino.js +2 -0
  106. package/dist/src/vendor/zod.js +2 -0
  107. package/dist/tests/e2e/developE2e.spec.js +126 -0
  108. package/dist/tests/e2e/developFailureE2e.spec.js +247 -0
  109. package/dist/tests/e2e/developPty.spec.js +75 -0
  110. package/dist/tests/e2e/discoveryWebSearchRelevance.spec.js +84 -0
  111. package/dist/tests/e2e/harness.spec.js +83 -0
  112. package/dist/tests/e2e/mcpLive.spec.js +120 -0
  113. package/dist/tests/e2e/newSession.e2e.spec.js +177 -0
  114. package/dist/tests/e2e/ralphLoopEnrichmentComparison.spec.js +62 -0
  115. package/dist/tests/e2e/workiqEnrichment.spec.js +56 -0
  116. package/dist/tests/e2e/zavaSimulation.spec.js +452 -0
  117. package/dist/tests/fixtures/test-fixture-project/src/add.js +3 -0
  118. package/dist/tests/fixtures/test-fixture-project/tests/failing.test.js +6 -0
  119. package/dist/tests/fixtures/test-fixture-project/tests/hanging.test.js +8 -0
  120. package/dist/tests/fixtures/test-fixture-project/tests/passing.test.js +10 -0
  121. package/dist/tests/fixtures/test-fixture-project/vitest.config.js +6 -0
  122. package/dist/tests/integration/autoStartConversation.spec.js +138 -0
  123. package/dist/tests/integration/defaultCommand.spec.js +147 -0
  124. package/dist/tests/integration/directCommandNonTty.spec.js +224 -0
  125. package/dist/tests/integration/directCommandTty.spec.js +151 -0
  126. package/dist/tests/integration/discoveryEnrichmentFlow.spec.js +175 -0
  127. package/dist/tests/integration/exportArtifacts.spec.js +202 -0
  128. package/dist/tests/integration/exportFallbackFlow.spec.js +99 -0
  129. package/dist/tests/integration/mcpDegradationFlow.spec.js +190 -0
  130. package/dist/tests/integration/mcpTransportFlow.spec.js +139 -0
  131. package/dist/tests/integration/newSessionFlow.spec.js +343 -0
  132. package/dist/tests/integration/pocGithubMcp.spec.js +186 -0
  133. package/dist/tests/integration/pocLocalFallback.spec.js +171 -0
  134. package/dist/tests/integration/pocScaffold.spec.js +163 -0
  135. package/dist/tests/integration/ralphLoopFlow.spec.js +359 -0
  136. package/dist/tests/integration/ralphLoopPartial.spec.js +368 -0
  137. package/dist/tests/integration/resumeAndBacktrack.spec.js +247 -0
  138. package/dist/tests/integration/spinnerLifecycle.spec.js +220 -0
  139. package/dist/tests/integration/summarizationFlow.spec.js +115 -0
  140. package/dist/tests/integration/testRunnerReal.spec.js +52 -0
  141. package/dist/tests/integration/webSearchAgent.spec.js +128 -0
  142. package/dist/tests/live/copilotSdkLive.spec.js +107 -0
  143. package/dist/tests/live/zavaFullWorkshop.spec.js +392 -0
  144. package/dist/tests/setup/loadEnv.js +3 -0
  145. package/dist/tests/unit/cli/developCommand.spec.js +567 -0
  146. package/dist/tests/unit/cli/directCommands.spec.js +279 -0
  147. package/dist/tests/unit/cli/envLoader.spec.js +58 -0
  148. package/dist/tests/unit/cli/ioContext.spec.js +119 -0
  149. package/dist/tests/unit/cli/preflight.spec.js +108 -0
  150. package/dist/tests/unit/cli/statusCommand.spec.js +111 -0
  151. package/dist/tests/unit/cli/workshopClientFallback.spec.js +80 -0
  152. package/dist/tests/unit/cli/workshopCommand.spec.js +329 -0
  153. package/dist/tests/unit/config/vitestEnvSetup.spec.js +13 -0
  154. package/dist/tests/unit/develop/checkpointState.spec.js +315 -0
  155. package/dist/tests/unit/develop/codeGenerator.spec.js +355 -0
  156. package/dist/tests/unit/develop/githubMcpAdapter.spec.js +231 -0
  157. package/dist/tests/unit/develop/mcpContextEnricher.spec.js +433 -0
  158. package/dist/tests/unit/develop/outputValidator.spec.js +119 -0
  159. package/dist/tests/unit/develop/pocScaffolder.spec.js +353 -0
  160. package/dist/tests/unit/develop/ralphLoop.spec.js +1248 -0
  161. package/dist/tests/unit/develop/templateRegistry.spec.js +85 -0
  162. package/dist/tests/unit/develop/testRunner.spec.js +249 -0
  163. package/dist/tests/unit/infraBicep.spec.js +92 -0
  164. package/dist/tests/unit/infraDeploy.spec.js +82 -0
  165. package/dist/tests/unit/infraTeardown.spec.js +63 -0
  166. package/dist/tests/unit/logging/logger.spec.js +43 -0
  167. package/dist/tests/unit/loop/conversationLoop.spec.js +592 -0
  168. package/dist/tests/unit/loop/phaseSummarizer.spec.js +141 -0
  169. package/dist/tests/unit/loop/streamingMarkdown.spec.js +147 -0
  170. package/dist/tests/unit/mcp/mcpManager.spec.js +279 -0
  171. package/dist/tests/unit/mcp/mcpTransport.spec.js +529 -0
  172. package/dist/tests/unit/mcp/retryPolicy.spec.js +218 -0
  173. package/dist/tests/unit/mcp/timeoutValidation.spec.js +46 -0
  174. package/dist/tests/unit/mcp/webSearch.spec.js +567 -0
  175. package/dist/tests/unit/phases/contextSummarizer.spec.js +140 -0
  176. package/dist/tests/unit/phases/discoveryEnricher.repeatCalls.spec.js +93 -0
  177. package/dist/tests/unit/phases/discoveryEnricher.spec.js +411 -0
  178. package/dist/tests/unit/phases/phaseExtractors.spec.js +352 -0
  179. package/dist/tests/unit/phases/phaseHandlers.spec.js +425 -0
  180. package/dist/tests/unit/prompts/promptLoader.spec.js +118 -0
  181. package/dist/tests/unit/schemas/pocSchemas.spec.js +412 -0
  182. package/dist/tests/unit/schemas/session.spec.js +257 -0
  183. package/dist/tests/unit/sessions/exportPaths.spec.js +31 -0
  184. package/dist/tests/unit/sessions/exportWriter.spec.js +655 -0
  185. package/dist/tests/unit/sessions/sessionManager.spec.js +151 -0
  186. package/dist/tests/unit/sessions/sessionStore.spec.js +116 -0
  187. package/dist/tests/unit/shared/activitySpinner.spec.js +175 -0
  188. package/dist/tests/unit/shared/cardsLoader.spec.js +76 -0
  189. package/dist/tests/unit/shared/copilotClient.spec.js +155 -0
  190. package/dist/tests/unit/shared/errorClassifier.spec.js +131 -0
  191. package/dist/tests/unit/shared/events.spec.js +55 -0
  192. package/dist/tests/unit/shared/markdownRenderer.spec.js +35 -0
  193. package/dist/tests/unit/shared/markdownRendererChunks.spec.js +70 -0
  194. package/dist/tests/unit/shared/tableRenderer.spec.js +34 -0
  195. package/dist/vitest.config.js +14 -0
  196. package/dist/vitest.live.config.js +18 -0
  197. package/docs/README.md +35 -0
  198. package/docs/architecture.md +169 -0
  199. package/docs/cli-usage.md +207 -0
  200. package/docs/environment.md +66 -0
  201. package/docs/export-format.md +146 -0
  202. package/docs/session-model.md +113 -0
  203. package/eslint.config.js +35 -0
  204. package/infra/deploy.sh +193 -0
  205. package/infra/gather-env.sh +211 -0
  206. package/infra/main.bicep +90 -0
  207. package/infra/main.bicepparam +18 -0
  208. package/infra/resources.bicep +134 -0
  209. package/infra/teardown.sh +114 -0
  210. package/package.json +63 -0
  211. package/specs/001-cli-workshop-rebuild/checklists/requirements.md +35 -0
  212. package/specs/001-cli-workshop-rebuild/contracts/cli.md +59 -0
  213. package/specs/001-cli-workshop-rebuild/contracts/export-summary-json.md +23 -0
  214. package/specs/001-cli-workshop-rebuild/contracts/session-json.md +30 -0
  215. package/specs/001-cli-workshop-rebuild/data-model.md +210 -0
  216. package/specs/001-cli-workshop-rebuild/plan.md +361 -0
  217. package/specs/001-cli-workshop-rebuild/quickstart.md +83 -0
  218. package/specs/001-cli-workshop-rebuild/research.md +116 -0
  219. package/specs/001-cli-workshop-rebuild/spec.md +240 -0
  220. package/specs/001-cli-workshop-rebuild/tasks.md +476 -0
  221. package/specs/002-poc-generation/contracts/poc-output.md +172 -0
  222. package/specs/002-poc-generation/contracts/ralph-loop.md +113 -0
  223. package/specs/002-poc-generation/data-model.md +172 -0
  224. package/specs/002-poc-generation/plan.md +109 -0
  225. package/specs/002-poc-generation/quickstart.md +97 -0
  226. package/specs/002-poc-generation/research.md +786 -0
  227. package/specs/002-poc-generation/spec.md +81 -0
  228. package/specs/002-poc-generation/tasks-fix.md +198 -0
  229. package/specs/002-poc-generation/tasks.md +252 -0
  230. package/specs/003-mcp-transport-integration/checklists/requirements.md +37 -0
  231. package/specs/003-mcp-transport-integration/contracts/context-enricher.md +220 -0
  232. package/specs/003-mcp-transport-integration/contracts/discovery-enricher.md +267 -0
  233. package/specs/003-mcp-transport-integration/contracts/github-adapter.md +149 -0
  234. package/specs/003-mcp-transport-integration/contracts/mcp-transport.md +288 -0
  235. package/specs/003-mcp-transport-integration/data-model.md +326 -0
  236. package/specs/003-mcp-transport-integration/plan.md +114 -0
  237. package/specs/003-mcp-transport-integration/quickstart.md +311 -0
  238. package/specs/003-mcp-transport-integration/research.md +395 -0
  239. package/specs/003-mcp-transport-integration/spec.md +234 -0
  240. package/specs/003-mcp-transport-integration/tasks.md +324 -0
  241. package/specs/003-next-spec-gaps.md +150 -0
  242. package/specs/004-dev-resume-hardening/checklists/requirements.md +37 -0
  243. package/specs/004-dev-resume-hardening/contracts/cli.md +160 -0
  244. package/specs/004-dev-resume-hardening/data-model.md +321 -0
  245. package/specs/004-dev-resume-hardening/plan.md +107 -0
  246. package/specs/004-dev-resume-hardening/quickstart.md +115 -0
  247. package/specs/004-dev-resume-hardening/research.md +142 -0
  248. package/specs/004-dev-resume-hardening/spec.md +221 -0
  249. package/specs/004-dev-resume-hardening/tasks.md +333 -0
  250. package/specs/005-ai-search-deploy/checklists/requirements.md +39 -0
  251. package/specs/005-ai-search-deploy/contracts/web-search-tool.md +241 -0
  252. package/specs/005-ai-search-deploy/data-model.md +130 -0
  253. package/specs/005-ai-search-deploy/plan.md +93 -0
  254. package/specs/005-ai-search-deploy/quickstart.md +96 -0
  255. package/specs/005-ai-search-deploy/research.md +187 -0
  256. package/specs/005-ai-search-deploy/spec.md +143 -0
  257. package/specs/005-ai-search-deploy/tasks.md +284 -0
  258. package/specs/006-workshop-extraction-fixes/checklists/requirements.md +61 -0
  259. package/specs/006-workshop-extraction-fixes/contracts/summarization-and-export.md +131 -0
  260. package/specs/006-workshop-extraction-fixes/data-model.md +149 -0
  261. package/specs/006-workshop-extraction-fixes/plan.md +123 -0
  262. package/specs/006-workshop-extraction-fixes/quickstart.md +101 -0
  263. package/specs/006-workshop-extraction-fixes/research.md +143 -0
  264. package/specs/006-workshop-extraction-fixes/spec.md +210 -0
  265. package/specs/006-workshop-extraction-fixes/tasks.md +316 -0
  266. package/src/cli/developCommand.ts +308 -0
  267. package/src/cli/directCommands.ts +195 -0
  268. package/src/cli/envLoader.ts +17 -0
  269. package/src/cli/exportCommand.ts +65 -0
  270. package/src/cli/index.ts +249 -0
  271. package/src/cli/ioContext.ts +139 -0
  272. package/src/cli/preflight.ts +86 -0
  273. package/src/cli/statusCommand.ts +118 -0
  274. package/src/cli/workshopCommand.ts +496 -0
  275. package/src/develop/checkpointState.ts +121 -0
  276. package/src/develop/codeGenerator.ts +402 -0
  277. package/src/develop/dynamicScaffolder.ts +284 -0
  278. package/src/develop/githubMcpAdapter.ts +199 -0
  279. package/src/develop/index.ts +34 -0
  280. package/src/develop/mcpContextEnricher.ts +279 -0
  281. package/src/develop/pocScaffolder.ts +646 -0
  282. package/src/develop/ralphLoop.ts +1044 -0
  283. package/src/develop/templateRegistry.ts +427 -0
  284. package/src/develop/testRunner.ts +276 -0
  285. package/src/logging/logger.ts +73 -0
  286. package/src/loop/conversationLoop.ts +355 -0
  287. package/src/loop/phaseSummarizer.ts +114 -0
  288. package/src/mcp/mcpManager.ts +365 -0
  289. package/src/mcp/mcpTransport.ts +562 -0
  290. package/src/mcp/retryPolicy.ts +87 -0
  291. package/src/mcp/webSearch.ts +388 -0
  292. package/src/originalPrompts/design_thinking.md +178 -0
  293. package/src/originalPrompts/design_thinking_persona.md +76 -0
  294. package/src/originalPrompts/document_generator_example.md +77 -0
  295. package/src/originalPrompts/document_generator_persona.md +47 -0
  296. package/src/originalPrompts/facilitator_persona.md +125 -0
  297. package/src/originalPrompts/guardrails.md +47 -0
  298. package/src/phases/contextSummarizer.ts +154 -0
  299. package/src/phases/discoveryEnricher.ts +223 -0
  300. package/src/phases/phaseExtractors.ts +247 -0
  301. package/src/phases/phaseHandlers.ts +450 -0
  302. package/src/prompts/design.md +51 -0
  303. package/src/prompts/develop-boundary.md +51 -0
  304. package/src/prompts/develop.md +111 -0
  305. package/src/prompts/discover.md +58 -0
  306. package/src/prompts/ideate.md +56 -0
  307. package/src/prompts/plan.md +51 -0
  308. package/src/prompts/promptLoader.ts +198 -0
  309. package/src/prompts/select.md +47 -0
  310. package/src/prompts/summarize/README.md +8 -0
  311. package/src/prompts/summarize/design-summary.md +37 -0
  312. package/src/prompts/summarize/develop-summary.md +25 -0
  313. package/src/prompts/summarize/ideate-summary.md +27 -0
  314. package/src/prompts/summarize/plan-summary.md +27 -0
  315. package/src/prompts/summarize/select-summary.md +21 -0
  316. package/src/prompts/system.md +28 -0
  317. package/src/sessions/exportPaths.ts +28 -0
  318. package/src/sessions/exportWriter.ts +490 -0
  319. package/src/sessions/sessionManager.ts +119 -0
  320. package/src/sessions/sessionStore.ts +69 -0
  321. package/src/shared/activitySpinner.ts +108 -0
  322. package/src/shared/copilotClient.ts +291 -0
  323. package/src/shared/data/cards.json +1249 -0
  324. package/src/shared/data/cardsLoader.ts +70 -0
  325. package/src/shared/errorClassifier.ts +160 -0
  326. package/src/shared/events.ts +103 -0
  327. package/src/shared/markdownRenderer.ts +44 -0
  328. package/src/shared/schemas/session.ts +346 -0
  329. package/src/shared/tableRenderer.ts +28 -0
  330. package/src/types/marked-terminal.d.ts +5 -0
  331. package/src/vendor/chalk.ts +2 -0
  332. package/src/vendor/cli-table3.ts +3 -0
  333. package/src/vendor/commander.ts +2 -0
  334. package/src/vendor/marked-terminal.ts +3 -0
  335. package/src/vendor/marked.ts +2 -0
  336. package/src/vendor/ora.ts +2 -0
  337. package/src/vendor/pino.ts +3 -0
  338. package/src/vendor/zod.ts +3 -0
  339. package/tests/e2e/developE2e.spec.ts +152 -0
  340. package/tests/e2e/developFailureE2e.spec.ts +289 -0
  341. package/tests/e2e/developPty.spec.ts +86 -0
  342. package/tests/e2e/discoveryWebSearchRelevance.spec.ts +103 -0
  343. package/tests/e2e/harness.spec.ts +104 -0
  344. package/tests/e2e/mcpLive.spec.ts +149 -0
  345. package/tests/e2e/newSession.e2e.spec.ts +245 -0
  346. package/tests/e2e/ralphLoopEnrichmentComparison.spec.ts +70 -0
  347. package/tests/e2e/workiqEnrichment.spec.ts +72 -0
  348. package/tests/e2e/zava-assessment/agent-interaction-script.md +258 -0
  349. package/tests/e2e/zava-assessment/company-profile.md +98 -0
  350. package/tests/e2e/zava-assessment/expected-results-checklist.md +454 -0
  351. package/tests/e2e/zavaSimulation.spec.ts +511 -0
  352. package/tests/fixtures/completedSession.json +141 -0
  353. package/tests/fixtures/test-fixture-project/package-lock.json +1585 -0
  354. package/tests/fixtures/test-fixture-project/package.json +12 -0
  355. package/tests/fixtures/test-fixture-project/src/add.ts +3 -0
  356. package/tests/fixtures/test-fixture-project/tests/failing.test.ts +7 -0
  357. package/tests/fixtures/test-fixture-project/tests/hanging.test.ts +9 -0
  358. package/tests/fixtures/test-fixture-project/tests/passing.test.ts +13 -0
  359. package/tests/fixtures/test-fixture-project/vitest.config.ts +7 -0
  360. package/tests/integration/autoStartConversation.spec.ts +168 -0
  361. package/tests/integration/defaultCommand.spec.ts +179 -0
  362. package/tests/integration/directCommandNonTty.spec.ts +260 -0
  363. package/tests/integration/directCommandTty.spec.ts +185 -0
  364. package/tests/integration/discoveryEnrichmentFlow.spec.ts +209 -0
  365. package/tests/integration/exportArtifacts.spec.ts +232 -0
  366. package/tests/integration/exportFallbackFlow.spec.ts +115 -0
  367. package/tests/integration/mcpDegradationFlow.spec.ts +231 -0
  368. package/tests/integration/mcpTransportFlow.spec.ts +178 -0
  369. package/tests/integration/newSessionFlow.spec.ts +406 -0
  370. package/tests/integration/pocGithubMcp.spec.ts +224 -0
  371. package/tests/integration/pocLocalFallback.spec.ts +205 -0
  372. package/tests/integration/pocScaffold.spec.ts +220 -0
  373. package/tests/integration/ralphLoopFlow.spec.ts +430 -0
  374. package/tests/integration/ralphLoopPartial.spec.ts +416 -0
  375. package/tests/integration/resumeAndBacktrack.spec.ts +278 -0
  376. package/tests/integration/spinnerLifecycle.spec.ts +270 -0
  377. package/tests/integration/summarizationFlow.spec.ts +135 -0
  378. package/tests/integration/testRunnerReal.spec.ts +63 -0
  379. package/tests/integration/webSearchAgent.spec.ts +155 -0
  380. package/tests/live/copilotSdkLive.spec.ts +149 -0
  381. package/tests/live/zavaFullWorkshop.spec.ts +515 -0
  382. package/tests/setup/loadEnv.ts +5 -0
  383. package/tests/unit/cli/developCommand.spec.ts +679 -0
  384. package/tests/unit/cli/directCommands.spec.ts +325 -0
  385. package/tests/unit/cli/envLoader.spec.ts +73 -0
  386. package/tests/unit/cli/ioContext.spec.ts +148 -0
  387. package/tests/unit/cli/preflight.spec.ts +125 -0
  388. package/tests/unit/cli/statusCommand.spec.ts +134 -0
  389. package/tests/unit/cli/workshopClientFallback.spec.ts +100 -0
  390. package/tests/unit/cli/workshopCommand.spec.ts +378 -0
  391. package/tests/unit/config/vitestEnvSetup.spec.ts +24 -0
  392. package/tests/unit/develop/checkpointState.spec.ts +378 -0
  393. package/tests/unit/develop/codeGenerator.spec.ts +447 -0
  394. package/tests/unit/develop/githubMcpAdapter.spec.ts +283 -0
  395. package/tests/unit/develop/mcpContextEnricher.spec.ts +564 -0
  396. package/tests/unit/develop/outputValidator.spec.ts +134 -0
  397. package/tests/unit/develop/pocScaffolder.spec.ts +451 -0
  398. package/tests/unit/develop/ralphLoop.spec.ts +1439 -0
  399. package/tests/unit/develop/templateRegistry.spec.ts +106 -0
  400. package/tests/unit/develop/testRunner.spec.ts +294 -0
  401. package/tests/unit/infraBicep.spec.ts +116 -0
  402. package/tests/unit/infraDeploy.spec.ts +102 -0
  403. package/tests/unit/infraTeardown.spec.ts +77 -0
  404. package/tests/unit/logging/logger.spec.ts +50 -0
  405. package/tests/unit/loop/conversationLoop.spec.ts +719 -0
  406. package/tests/unit/loop/phaseSummarizer.spec.ts +169 -0
  407. package/tests/unit/loop/streamingMarkdown.spec.ts +180 -0
  408. package/tests/unit/mcp/mcpManager.spec.ts +336 -0
  409. package/tests/unit/mcp/mcpTransport.spec.ts +689 -0
  410. package/tests/unit/mcp/retryPolicy.spec.ts +278 -0
  411. package/tests/unit/mcp/timeoutValidation.spec.ts +55 -0
  412. package/tests/unit/mcp/webSearch.spec.ts +718 -0
  413. package/tests/unit/phases/contextSummarizer.spec.ts +158 -0
  414. package/tests/unit/phases/discoveryEnricher.repeatCalls.spec.ts +125 -0
  415. package/tests/unit/phases/discoveryEnricher.spec.ts +512 -0
  416. package/tests/unit/phases/phaseExtractors.spec.ts +406 -0
  417. package/tests/unit/phases/phaseHandlers.spec.ts +483 -0
  418. package/tests/unit/prompts/promptLoader.spec.ts +144 -0
  419. package/tests/unit/schemas/pocSchemas.spec.ts +457 -0
  420. package/tests/unit/schemas/session.spec.ts +328 -0
  421. package/tests/unit/sessions/exportPaths.spec.ts +38 -0
  422. package/tests/unit/sessions/exportWriter.spec.ts +737 -0
  423. package/tests/unit/sessions/sessionManager.spec.ts +174 -0
  424. package/tests/unit/sessions/sessionStore.spec.ts +136 -0
  425. package/tests/unit/shared/activitySpinner.spec.ts +211 -0
  426. package/tests/unit/shared/cardsLoader.spec.ts +89 -0
  427. package/tests/unit/shared/copilotClient.spec.ts +185 -0
  428. package/tests/unit/shared/errorClassifier.spec.ts +152 -0
  429. package/tests/unit/shared/events.spec.ts +71 -0
  430. package/tests/unit/shared/markdownRenderer.spec.ts +42 -0
  431. package/tests/unit/shared/markdownRendererChunks.spec.ts +83 -0
  432. package/tests/unit/shared/tableRenderer.spec.ts +38 -0
  433. package/tsconfig.json +20 -0
  434. package/vitest.config.ts +15 -0
  435. package/vitest.live.config.ts +19 -0
@@ -0,0 +1,270 @@
1
+ /**
2
+ * Integration tests for spinner lifecycle in ConversationLoop (T089).
3
+ *
4
+ * Verifies the full spinner lifecycle during streaming: "Thinking..." appears
5
+ * after user input, transitions on ToolCall events, prints tool summary on
6
+ * ToolResult, stops on first TextDelta, and handles multi-tool sequences.
7
+ */
8
+ import { describe, it, expect, vi, beforeEach } from 'vitest';
9
+ import { Writable } from 'node:stream';
10
+
11
+ import {
12
+ ConversationLoop,
13
+ type LoopIO,
14
+ type PhaseHandler,
15
+ } from '../../src/loop/conversationLoop.js';
16
+ import type { CopilotClient, ConversationSession, CopilotMessage, SessionOptions } from '../../src/shared/copilotClient.js';
17
+ import { ActivitySpinner } from '../../src/shared/activitySpinner.js';
18
+ import type { SofiaEvent } from '../../src/shared/events.js';
19
+ import {
20
+ createTextDeltaEvent,
21
+ createToolCallEvent,
22
+ createToolResultEvent,
23
+ } from '../../src/shared/events.js';
24
+ import type { WorkshopSession } from '../../src/shared/schemas/session.js';
25
+
26
+ // ── Helpers ─────────────────────────────────────────────────────────────────
27
+
28
+ function makeSession(overrides?: Partial<WorkshopSession>): WorkshopSession {
29
+ return {
30
+ sessionId: 'spinner-int-test',
31
+ schemaVersion: '1.0.0',
32
+ createdAt: '2025-01-01T00:00:00Z',
33
+ updatedAt: '2025-01-01T00:00:00Z',
34
+ phase: 'Discover',
35
+ status: 'Active',
36
+ participants: [],
37
+ artifacts: { generatedFiles: [] },
38
+ ...overrides,
39
+ };
40
+ }
41
+
42
+ function makeIO(inputs: (string | null)[], opts?: { json?: boolean; tty?: boolean }): LoopIO & { _written: string[]; _activities: string[]; _toolSummaries: Array<{ toolName: string; summary: string }> } {
43
+ let inputIndex = 0;
44
+ const written: string[] = [];
45
+ const activities: string[] = [];
46
+ const toolSummaries: Array<{ toolName: string; summary: string }> = [];
47
+
48
+ return {
49
+ write(text: string) { written.push(text); },
50
+ writeActivity(text: string) { activities.push(text); },
51
+ writeToolSummary(toolName: string, summary: string) { toolSummaries.push({ toolName, summary }); },
52
+ async readInput(): Promise<string | null> {
53
+ if (inputIndex >= inputs.length) return null;
54
+ return inputs[inputIndex++] ?? null;
55
+ },
56
+ async showDecisionGate() { return { choice: 'continue' as const }; },
57
+ isJsonMode: opts?.json ?? false,
58
+ isTTY: opts?.tty ?? true,
59
+ get _written() { return written; },
60
+ get _activities() { return activities; },
61
+ get _toolSummaries() { return toolSummaries; },
62
+ };
63
+ }
64
+
65
+ function makePhaseHandler(overrides?: Partial<PhaseHandler>): PhaseHandler {
66
+ return {
67
+ phase: 'Discover',
68
+ buildSystemPrompt: () => 'System prompt',
69
+ extractResult: () => ({}),
70
+ ...overrides,
71
+ };
72
+ }
73
+
74
+ function createCaptureStream(): Writable & { getOutput: () => string } {
75
+ const chunks: string[] = [];
76
+ const stream = new Writable({
77
+ write(chunk, _encoding, callback) {
78
+ chunks.push(chunk.toString());
79
+ callback();
80
+ },
81
+ });
82
+ (stream as Writable & { getOutput: () => string }).getOutput = () => chunks.join('');
83
+ return stream as Writable & { getOutput: () => string };
84
+ }
85
+
86
+ /**
87
+ * Create a fake CopilotClient that yields a custom sequence of SofiaEvents.
88
+ * This allows testing ToolCall → ToolResult → TextDelta sequences.
89
+ */
90
+ function createEventSequenceClient(eventSequences: SofiaEvent[][]): CopilotClient {
91
+ let seqIndex = 0;
92
+
93
+ return {
94
+ async createSession(_opts: SessionOptions): Promise<ConversationSession> {
95
+ const history: CopilotMessage[] = [];
96
+ return {
97
+ send(message: CopilotMessage): AsyncIterable<SofiaEvent> {
98
+ history.push(message);
99
+ const events = eventSequences[seqIndex] ?? [createTextDeltaEvent('[No more events]')];
100
+ seqIndex++;
101
+
102
+ return {
103
+ async *[Symbol.asyncIterator]() {
104
+ for (const event of events) {
105
+ yield event;
106
+ }
107
+ },
108
+ };
109
+ },
110
+ getHistory: () => [...history],
111
+ };
112
+ },
113
+ };
114
+ }
115
+
116
+ // ── Tests ────────────────────────────────────────────────────────────────────
117
+
118
+ describe('Spinner lifecycle integration (T089)', () => {
119
+ beforeEach(() => {
120
+ process.removeAllListeners('SIGINT');
121
+ });
122
+
123
+ it('starts Thinking spinner before sending, stops on first TextDelta', async () => {
124
+ const stream = createCaptureStream();
125
+ const spinner = new ActivitySpinner({ isTTY: true, isJsonMode: false, stream });
126
+
127
+ const startSpy = vi.spyOn(spinner, 'startThinking');
128
+ const stopSpy = vi.spyOn(spinner, 'stop');
129
+
130
+ const client = createEventSequenceClient([
131
+ [createTextDeltaEvent('Hello from LLM')],
132
+ ]);
133
+
134
+ const io = makeIO(['test input'], { tty: true });
135
+ const loop = new ConversationLoop({
136
+ client,
137
+ io,
138
+ session: makeSession(),
139
+ phaseHandler: makePhaseHandler(),
140
+ spinner,
141
+ });
142
+
143
+ await loop.run();
144
+
145
+ expect(startSpy).toHaveBeenCalled();
146
+ expect(stopSpy).toHaveBeenCalled();
147
+ expect(spinner.isActive()).toBe(false);
148
+ });
149
+
150
+ it('transitions spinner to tool name on ToolCall, completes on ToolResult', async () => {
151
+ const stream = createCaptureStream();
152
+ const spinner = new ActivitySpinner({ isTTY: true, isJsonMode: false, stream });
153
+
154
+ const toolCallSpy = vi.spyOn(spinner, 'startToolCall');
155
+ const completeSpy = vi.spyOn(spinner, 'completeToolCall');
156
+
157
+ const client = createEventSequenceClient([
158
+ [
159
+ createToolCallEvent('WorkIQ', { query: 'logistics' }),
160
+ createToolResultEvent('WorkIQ', 'Found 5 processes'),
161
+ createTextDeltaEvent('Based on the analysis...'),
162
+ ],
163
+ ]);
164
+
165
+ const io = makeIO(['analyze my processes'], { tty: true });
166
+ const loop = new ConversationLoop({
167
+ client,
168
+ io,
169
+ session: makeSession(),
170
+ phaseHandler: makePhaseHandler(),
171
+ spinner,
172
+ });
173
+
174
+ await loop.run();
175
+
176
+ expect(toolCallSpy).toHaveBeenCalledWith('WorkIQ');
177
+ expect(completeSpy).toHaveBeenCalled();
178
+ expect(spinner.isActive()).toBe(false);
179
+ });
180
+
181
+ it('handles multi-tool sequences (ToolCall → ToolResult → ToolCall → ToolResult → TextDelta)', async () => {
182
+ const stream = createCaptureStream();
183
+ const spinner = new ActivitySpinner({ isTTY: true, isJsonMode: false, stream });
184
+
185
+ const toolCallSpy = vi.spyOn(spinner, 'startToolCall');
186
+ const completeSpy = vi.spyOn(spinner, 'completeToolCall');
187
+
188
+ const client = createEventSequenceClient([
189
+ [
190
+ createToolCallEvent('WorkIQ', { query: 'tasks' }),
191
+ createToolResultEvent('WorkIQ', 'Found 3 tasks'),
192
+ createToolCallEvent('Context7', { doc: 'azure-ai' }),
193
+ createToolResultEvent('Context7', '12 docs retrieved'),
194
+ createTextDeltaEvent('Here are my findings...'),
195
+ ],
196
+ ]);
197
+
198
+ const io = makeIO(['research tasks'], { tty: true });
199
+ const loop = new ConversationLoop({
200
+ client,
201
+ io,
202
+ session: makeSession(),
203
+ phaseHandler: makePhaseHandler(),
204
+ spinner,
205
+ });
206
+
207
+ await loop.run();
208
+
209
+ expect(toolCallSpy).toHaveBeenCalledTimes(2);
210
+ expect(completeSpy).toHaveBeenCalledTimes(2);
211
+
212
+ // Tool summaries should be written to IO
213
+ expect(io._toolSummaries.length).toBe(2);
214
+ expect(io._toolSummaries[0].toolName).toBe('WorkIQ');
215
+ expect(io._toolSummaries[1].toolName).toBe('Context7');
216
+ });
217
+
218
+ it('writes tool summaries to IO on ToolResult events', async () => {
219
+ const stream = createCaptureStream();
220
+ const spinner = new ActivitySpinner({ isTTY: true, isJsonMode: false, stream });
221
+
222
+ const client = createEventSequenceClient([
223
+ [
224
+ createToolCallEvent('GitHub', { repo: 'test' }),
225
+ createToolResultEvent('GitHub', 'Found 8 repos'),
226
+ createTextDeltaEvent('The repo results are...'),
227
+ ],
228
+ ]);
229
+
230
+ const io = makeIO(['search repos'], { tty: true });
231
+ const loop = new ConversationLoop({
232
+ client,
233
+ io,
234
+ session: makeSession(),
235
+ phaseHandler: makePhaseHandler(),
236
+ spinner,
237
+ });
238
+
239
+ await loop.run();
240
+
241
+ expect(io._toolSummaries).toEqual([
242
+ { toolName: 'GitHub', summary: expect.stringContaining('Found 8 repos') },
243
+ ]);
244
+ });
245
+
246
+ it('no-op spinner works without errors in non-TTY mode', async () => {
247
+ const client = createEventSequenceClient([
248
+ [
249
+ createToolCallEvent('WorkIQ', { query: 'test' }),
250
+ createToolResultEvent('WorkIQ', 'ok'),
251
+ createTextDeltaEvent('Results.'),
252
+ ],
253
+ ]);
254
+
255
+ const io = makeIO(['query'], { tty: false });
256
+ const loop = new ConversationLoop({
257
+ client,
258
+ io,
259
+ session: makeSession(),
260
+ phaseHandler: makePhaseHandler(),
261
+ // No spinner provided — uses no-op default
262
+ });
263
+
264
+ await loop.run();
265
+
266
+ // Should work without errors
267
+ const allOutput = io._written.join('');
268
+ expect(allOutput).toContain('Results.');
269
+ });
270
+ });
@@ -0,0 +1,135 @@
1
+ /**
2
+ * Integration test: Summarization flow.
3
+ *
4
+ * Tests the full pipeline: ConversationLoop → phaseSummarize → session updated.
5
+ * Verifies that when inline extraction fails, the post-phase summarization
6
+ * call extracts structured data from the transcript.
7
+ */
8
+ import { describe, it, expect, vi } from 'vitest';
9
+
10
+ import { ConversationLoop } from '../../src/loop/conversationLoop.js';
11
+ import type { LoopIO, PhaseHandler } from '../../src/loop/conversationLoop.js';
12
+ import type { CopilotClient } from '../../src/shared/copilotClient.js';
13
+ import type { WorkshopSession } from '../../src/shared/schemas/session.js';
14
+
15
+ function makeIO(): LoopIO {
16
+ return {
17
+ write: vi.fn(),
18
+ writeActivity: vi.fn(),
19
+ writeToolSummary: vi.fn(),
20
+ readInput: vi.fn().mockResolvedValue(null), // EOF immediately
21
+ showDecisionGate: vi.fn().mockResolvedValue({ choice: 'continue' }),
22
+ isJsonMode: false,
23
+ isTTY: false,
24
+ };
25
+ }
26
+
27
+ function makeSession(overrides?: Partial<WorkshopSession>): WorkshopSession {
28
+ return {
29
+ sessionId: 'integration-test',
30
+ schemaVersion: '1.0.0',
31
+ createdAt: '2025-01-01T00:00:00Z',
32
+ updatedAt: '2025-01-01T00:00:00Z',
33
+ phase: 'Ideate',
34
+ status: 'Active',
35
+ participants: [],
36
+ artifacts: { generatedFiles: [] },
37
+ turns: [],
38
+ ...overrides,
39
+ };
40
+ }
41
+
42
+ describe('summarization flow integration', () => {
43
+ it('populates session.ideas via summarization when inline extraction fails', async () => {
44
+ // Inline extraction returns nothing (simulates LLM not embedding JSON)
45
+ const handler: PhaseHandler = {
46
+ phase: 'Ideate',
47
+ buildSystemPrompt: () => 'You are an Ideate facilitator.',
48
+ extractResult: vi.fn().mockReturnValue({}),
49
+ getInitialMessage: () => 'Start ideation.',
50
+ };
51
+
52
+ const ideas = [
53
+ { id: 'idea-1', title: 'AI Chatbot', description: 'Automated support', workflowStepIds: ['s1'] },
54
+ ];
55
+
56
+ let callCount = 0;
57
+ const fakeClient: CopilotClient = {
58
+ createSession: vi.fn().mockImplementation(async () => ({
59
+ send: vi.fn().mockImplementation(async function* () {
60
+ callCount++;
61
+ if (callCount === 1) {
62
+ // First call: regular conversation (no JSON)
63
+ yield { type: 'TextDelta', text: 'Here are some ideas for your business.' };
64
+ } else {
65
+ // Second call: summarization (returns JSON)
66
+ yield { type: 'TextDelta', text: '```json\n' + JSON.stringify(ideas) + '\n```' };
67
+ }
68
+ }),
69
+ })),
70
+ } as unknown as CopilotClient;
71
+
72
+ // On the summarization call, extractResult should return the ideas
73
+ (handler.extractResult as ReturnType<typeof vi.fn>).mockImplementation(
74
+ (_session: WorkshopSession, response: string) => {
75
+ if (response.includes('idea-1')) {
76
+ return { ideas };
77
+ }
78
+ return {};
79
+ },
80
+ );
81
+
82
+ const io = makeIO();
83
+ const loop = new ConversationLoop({
84
+ client: fakeClient,
85
+ io,
86
+ session: makeSession(),
87
+ phaseHandler: handler,
88
+ initialMessage: 'Start ideation.',
89
+ });
90
+
91
+ const result = await loop.run();
92
+
93
+ // The summarization call should have populated ideas
94
+ expect(result.ideas).toEqual(ideas);
95
+ });
96
+
97
+ it('skips summarization when inline extraction succeeds', async () => {
98
+ const ideas = [
99
+ { id: 'idea-1', title: 'Test', description: 'Desc', workflowStepIds: [] },
100
+ ];
101
+
102
+ const handler: PhaseHandler = {
103
+ phase: 'Ideate',
104
+ buildSystemPrompt: () => 'Ideate prompt.',
105
+ extractResult: vi.fn().mockReturnValue({ ideas }),
106
+ getInitialMessage: () => 'Start.',
107
+ };
108
+
109
+ let sessionCalls = 0;
110
+ const fakeClient: CopilotClient = {
111
+ createSession: vi.fn().mockImplementation(async () => {
112
+ sessionCalls++;
113
+ return {
114
+ send: vi.fn().mockImplementation(async function* () {
115
+ yield { type: 'TextDelta', text: 'Ideas generated.' };
116
+ }),
117
+ };
118
+ }),
119
+ } as unknown as CopilotClient;
120
+
121
+ const io = makeIO();
122
+ const loop = new ConversationLoop({
123
+ client: fakeClient,
124
+ io,
125
+ session: makeSession(),
126
+ phaseHandler: handler,
127
+ initialMessage: 'Start.',
128
+ });
129
+
130
+ await loop.run();
131
+
132
+ // Only one session should be created (no summarization call needed)
133
+ expect(sessionCalls).toBe(1);
134
+ });
135
+ });
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Integration tests for TestRunner using real fixture project.
3
+ *
4
+ * T042: Passing tests verify correct pass/fail/skip counts
5
+ * T043: Failing tests verify failure details parsed correctly
6
+ * T044: Timeout handling with hanging test fixture
7
+ */
8
+ import { describe, it, expect } from 'vitest';
9
+ import { join } from 'node:path';
10
+
11
+ import { TestRunner } from '../../src/develop/testRunner.js';
12
+
13
+ const FIXTURE_DIR = join(import.meta.dirname, '../fixtures/test-fixture-project');
14
+
15
+ describe('testRunner real fixture integration', () => {
16
+ it('parses passing test results correctly (T042)', async () => {
17
+ // Run only the passing test file
18
+ const runner = new TestRunner({
19
+ testCommand: 'npx vitest run tests/passing.test.ts --reporter=json',
20
+ timeoutMs: 30_000,
21
+ });
22
+
23
+ const result = await runner.run(FIXTURE_DIR);
24
+
25
+ expect(result.passed).toBe(2);
26
+ expect(result.failed).toBe(0);
27
+ expect(result.total).toBe(2);
28
+ expect(result.durationMs).toBeGreaterThan(0);
29
+ }, 45_000);
30
+
31
+ it('parses failing test results correctly (T043)', async () => {
32
+ const runner = new TestRunner({
33
+ testCommand: 'npx vitest run tests/failing.test.ts --reporter=json',
34
+ timeoutMs: 30_000,
35
+ });
36
+
37
+ const result = await runner.run(FIXTURE_DIR);
38
+
39
+ // The JSON output may be truncated for large failure messages,
40
+ // so we check that the runner completes without error and captures output
41
+ expect(result.durationMs).toBeGreaterThan(0);
42
+ expect(result.rawOutput).toBeDefined();
43
+ // When JSON is parseable (short enough), failures should be detected
44
+ if (result.failed > 0) {
45
+ expect(result.total).toBeGreaterThanOrEqual(1);
46
+ expect(result.failures.length).toBeGreaterThan(0);
47
+ }
48
+ }, 45_000);
49
+
50
+ it('handles timeout with SIGTERM→SIGKILL for hanging test (T044)', async () => {
51
+ const runner = new TestRunner({
52
+ testCommand: 'npx vitest run tests/hanging.test.ts --reporter=json',
53
+ timeoutMs: 5_000, // Short timeout to trigger hang detection
54
+ });
55
+
56
+ const result = await runner.run(FIXTURE_DIR);
57
+
58
+ // Should have timed out — zero results
59
+ expect(result.passed).toBe(0);
60
+ expect(result.total).toBe(0);
61
+ expect(result.rawOutput).toContain('timed out');
62
+ }, 15_000); // Allow enough time for timeout + SIGKILL delay
63
+ });
@@ -0,0 +1,155 @@
1
+ /**
2
+ * Integration test for ephemeral agent lifecycle (T022).
3
+ *
4
+ * Tests the full lifecycle: create agent → query with per-call conversation → cleanup
5
+ * using faked AIProjectClient to verify:
6
+ * - Agent is created on first call
7
+ * - Agent is reused on second call
8
+ * - Conversations are created/deleted per query
9
+ * - Agent is deleted on destroyWebSearchSession()
10
+ */
11
+ import { describe, it, expect, vi, afterEach } from 'vitest';
12
+
13
+ import { createWebSearchTool, destroyWebSearchSession } from '../../src/mcp/webSearch.js';
14
+ import type { AgentSessionDeps } from '../../src/mcp/webSearch.js';
15
+
16
+ function createFakeAgentDeps(): AgentSessionDeps & { callLog: string[] } {
17
+ const callLog: string[] = [];
18
+
19
+ return {
20
+ callLog,
21
+ createClient: vi.fn().mockImplementation(() => {
22
+ callLog.push('createClient');
23
+ return { id: 'client-1' };
24
+ }),
25
+ getOpenAIClient: vi.fn().mockImplementation(async () => {
26
+ callLog.push('getOpenAIClient');
27
+ return { id: 'openai-1' };
28
+ }),
29
+ createAgentVersion: vi.fn().mockImplementation(async () => {
30
+ callLog.push('createAgent');
31
+ return { name: 'sofia-web-search', version: 'v1' };
32
+ }),
33
+ deleteAgentVersion: vi.fn().mockImplementation(async () => {
34
+ callLog.push('deleteAgent');
35
+ }),
36
+ createConversation: vi.fn().mockImplementation(async () => {
37
+ callLog.push('createConversation');
38
+ return { id: 'conv-abc' };
39
+ }),
40
+ deleteConversation: vi.fn().mockImplementation(async () => {
41
+ callLog.push('deleteConversation');
42
+ }),
43
+ createResponse: vi.fn().mockImplementation(async () => {
44
+ callLog.push('createResponse');
45
+ return {
46
+ output: [
47
+ {
48
+ type: 'message',
49
+ content: [
50
+ {
51
+ type: 'output_text',
52
+ text: 'Search result text',
53
+ annotations: [
54
+ {
55
+ type: 'url_citation',
56
+ url: 'https://example.com',
57
+ title: 'Example',
58
+ start_index: 0,
59
+ end_index: 18,
60
+ },
61
+ ],
62
+ },
63
+ ],
64
+ },
65
+ ],
66
+ };
67
+ }),
68
+ };
69
+ }
70
+
71
+ describe('ephemeral agent lifecycle (T022)', () => {
72
+ afterEach(async () => {
73
+ await destroyWebSearchSession();
74
+ });
75
+
76
+ it('creates agent on first call, reuses on second, cleans up on destroy', async () => {
77
+ const deps = createFakeAgentDeps();
78
+ const tool = createWebSearchTool(
79
+ {
80
+ projectEndpoint: 'https://foundry.example.com',
81
+ modelDeploymentName: 'gpt-4.1-mini',
82
+ },
83
+ deps,
84
+ );
85
+
86
+ // First call — should initialize
87
+ const result1 = await tool('first query');
88
+ expect(result1.results).toHaveLength(1);
89
+ expect(deps.callLog).toEqual([
90
+ 'createClient',
91
+ 'getOpenAIClient',
92
+ 'createAgent',
93
+ 'createConversation',
94
+ 'createResponse',
95
+ 'deleteConversation',
96
+ ]);
97
+
98
+ // Second call — should reuse agent and create/delete a fresh conversation
99
+ deps.callLog.length = 0;
100
+ const result2 = await tool('second query');
101
+ expect(result2.results).toHaveLength(1);
102
+ expect(deps.callLog).toEqual(['createConversation', 'createResponse', 'deleteConversation']);
103
+
104
+ // Cleanup — should delete agent (conversation already deleted per query)
105
+ deps.callLog.length = 0;
106
+ await destroyWebSearchSession();
107
+ expect(deps.callLog).toEqual(['deleteAgent']);
108
+ });
109
+
110
+ it('transitions: uninitialized → initialized → cleaned up', async () => {
111
+ const deps = createFakeAgentDeps();
112
+ const tool = createWebSearchTool(
113
+ {
114
+ projectEndpoint: 'https://foundry.example.com',
115
+ modelDeploymentName: 'gpt-4.1-mini',
116
+ },
117
+ deps,
118
+ );
119
+
120
+ // State: uninitialized — destroy is a no-op
121
+ await destroyWebSearchSession();
122
+ expect(deps.deleteAgentVersion).not.toHaveBeenCalled();
123
+
124
+ // State: initialized (after first query)
125
+ await tool('init query');
126
+ expect(deps.createAgentVersion).toHaveBeenCalledTimes(1);
127
+
128
+ // State: cleaned up
129
+ await destroyWebSearchSession();
130
+ expect(deps.deleteAgentVersion).toHaveBeenCalledTimes(1);
131
+
132
+ // Second destroy is a no-op
133
+ await destroyWebSearchSession();
134
+ expect(deps.deleteAgentVersion).toHaveBeenCalledTimes(1);
135
+ });
136
+
137
+ it('handles cleanup failure gracefully', async () => {
138
+ const deps = createFakeAgentDeps();
139
+ deps.deleteConversation = vi.fn().mockRejectedValue(new Error('404 Not Found'));
140
+ deps.deleteAgentVersion = vi.fn().mockRejectedValue(new Error('500 Internal Error'));
141
+
142
+ const tool = createWebSearchTool(
143
+ {
144
+ projectEndpoint: 'https://foundry.example.com',
145
+ modelDeploymentName: 'gpt-4.1-mini',
146
+ },
147
+ deps,
148
+ );
149
+
150
+ await tool('init');
151
+
152
+ // Should not throw despite cleanup failures
153
+ await expect(destroyWebSearchSession()).resolves.toBeUndefined();
154
+ });
155
+ });