sofia-cli 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/.github/agents/copilot-instructions.md +39 -0
  2. package/.github/agents/speckit.analyze.agent.md +184 -0
  3. package/.github/agents/speckit.checklist.agent.md +294 -0
  4. package/.github/agents/speckit.clarify.agent.md +181 -0
  5. package/.github/agents/speckit.constitution.agent.md +84 -0
  6. package/.github/agents/speckit.implement.agent.md +135 -0
  7. package/.github/agents/speckit.plan.agent.md +90 -0
  8. package/.github/agents/speckit.specify.agent.md +258 -0
  9. package/.github/agents/speckit.tasks.agent.md +137 -0
  10. package/.github/agents/speckit.taskstoissues.agent.md +30 -0
  11. package/.github/copilot-instructions.md +257 -0
  12. package/.github/prompts/speckit.analyze.prompt.md +3 -0
  13. package/.github/prompts/speckit.checklist.prompt.md +3 -0
  14. package/.github/prompts/speckit.clarify.prompt.md +3 -0
  15. package/.github/prompts/speckit.constitution.prompt.md +3 -0
  16. package/.github/prompts/speckit.implement.prompt.md +3 -0
  17. package/.github/prompts/speckit.plan.prompt.md +3 -0
  18. package/.github/prompts/speckit.specify.prompt.md +3 -0
  19. package/.github/prompts/speckit.tasks.prompt.md +3 -0
  20. package/.github/prompts/speckit.taskstoissues.prompt.md +3 -0
  21. package/.github/workflows/ci.yml +38 -0
  22. package/.prettierrc +6 -0
  23. package/.specify/memory/constitution.md +181 -0
  24. package/.specify/scripts/bash/check-prerequisites.sh +166 -0
  25. package/.specify/scripts/bash/common.sh +156 -0
  26. package/.specify/scripts/bash/create-new-feature.sh +297 -0
  27. package/.specify/scripts/bash/setup-plan.sh +61 -0
  28. package/.specify/scripts/bash/update-agent-context.sh +810 -0
  29. package/.specify/templates/agent-file-template.md +28 -0
  30. package/.specify/templates/checklist-template.md +40 -0
  31. package/.specify/templates/constitution-template.md +50 -0
  32. package/.specify/templates/plan-template.md +113 -0
  33. package/.specify/templates/spec-template.md +115 -0
  34. package/.specify/templates/tasks-template.md +251 -0
  35. package/.vscode/mcp.json +42 -0
  36. package/.vscode/settings.json +19 -0
  37. package/CODE_OF_CONDUCT.md +128 -0
  38. package/LICENSE +21 -0
  39. package/README.md +213 -0
  40. package/dist/src/cli/developCommand.js +240 -0
  41. package/dist/src/cli/directCommands.js +143 -0
  42. package/dist/src/cli/envLoader.js +16 -0
  43. package/dist/src/cli/exportCommand.js +53 -0
  44. package/dist/src/cli/index.js +203 -0
  45. package/dist/src/cli/ioContext.js +109 -0
  46. package/dist/src/cli/preflight.js +57 -0
  47. package/dist/src/cli/statusCommand.js +110 -0
  48. package/dist/src/cli/workshopCommand.js +400 -0
  49. package/dist/src/develop/checkpointState.js +86 -0
  50. package/dist/src/develop/codeGenerator.js +319 -0
  51. package/dist/src/develop/dynamicScaffolder.js +226 -0
  52. package/dist/src/develop/githubMcpAdapter.js +122 -0
  53. package/dist/src/develop/index.js +15 -0
  54. package/dist/src/develop/mcpContextEnricher.js +195 -0
  55. package/dist/src/develop/pocScaffolder.js +542 -0
  56. package/dist/src/develop/ralphLoop.js +659 -0
  57. package/dist/src/develop/templateRegistry.js +364 -0
  58. package/dist/src/develop/testRunner.js +202 -0
  59. package/dist/src/logging/logger.js +58 -0
  60. package/dist/src/loop/conversationLoop.js +227 -0
  61. package/dist/src/loop/phaseSummarizer.js +87 -0
  62. package/dist/src/mcp/mcpManager.js +267 -0
  63. package/dist/src/mcp/mcpTransport.js +391 -0
  64. package/dist/src/mcp/retryPolicy.js +47 -0
  65. package/dist/src/mcp/webSearch.js +254 -0
  66. package/dist/src/phases/contextSummarizer.js +101 -0
  67. package/dist/src/phases/discoveryEnricher.js +156 -0
  68. package/dist/src/phases/phaseExtractors.js +222 -0
  69. package/dist/src/phases/phaseHandlers.js +328 -0
  70. package/dist/src/prompts/design.md +51 -0
  71. package/dist/src/prompts/develop-boundary.md +51 -0
  72. package/dist/src/prompts/develop.md +111 -0
  73. package/dist/src/prompts/discover.md +58 -0
  74. package/dist/src/prompts/ideate.md +56 -0
  75. package/dist/src/prompts/plan.md +51 -0
  76. package/dist/src/prompts/promptLoader.js +167 -0
  77. package/dist/src/prompts/promptLoader.ts +198 -0
  78. package/dist/src/prompts/select.md +47 -0
  79. package/dist/src/prompts/summarize/README.md +8 -0
  80. package/dist/src/prompts/summarize/design-summary.md +37 -0
  81. package/dist/src/prompts/summarize/develop-summary.md +25 -0
  82. package/dist/src/prompts/summarize/ideate-summary.md +27 -0
  83. package/dist/src/prompts/summarize/plan-summary.md +27 -0
  84. package/dist/src/prompts/summarize/select-summary.md +21 -0
  85. package/dist/src/prompts/system.md +28 -0
  86. package/dist/src/sessions/exportPaths.js +22 -0
  87. package/dist/src/sessions/exportWriter.js +406 -0
  88. package/dist/src/sessions/sessionManager.js +81 -0
  89. package/dist/src/sessions/sessionStore.js +65 -0
  90. package/dist/src/shared/activitySpinner.js +91 -0
  91. package/dist/src/shared/copilotClient.js +129 -0
  92. package/dist/src/shared/data/cards.json +1249 -0
  93. package/dist/src/shared/data/cardsLoader.js +51 -0
  94. package/dist/src/shared/errorClassifier.js +120 -0
  95. package/dist/src/shared/events.js +28 -0
  96. package/dist/src/shared/markdownRenderer.js +34 -0
  97. package/dist/src/shared/schemas/session.js +265 -0
  98. package/dist/src/shared/tableRenderer.js +20 -0
  99. package/dist/src/vendor/chalk.js +2 -0
  100. package/dist/src/vendor/cli-table3.js +3 -0
  101. package/dist/src/vendor/commander.js +2 -0
  102. package/dist/src/vendor/marked-terminal.js +3 -0
  103. package/dist/src/vendor/marked.js +2 -0
  104. package/dist/src/vendor/ora.js +2 -0
  105. package/dist/src/vendor/pino.js +2 -0
  106. package/dist/src/vendor/zod.js +2 -0
  107. package/dist/tests/e2e/developE2e.spec.js +126 -0
  108. package/dist/tests/e2e/developFailureE2e.spec.js +247 -0
  109. package/dist/tests/e2e/developPty.spec.js +75 -0
  110. package/dist/tests/e2e/discoveryWebSearchRelevance.spec.js +84 -0
  111. package/dist/tests/e2e/harness.spec.js +83 -0
  112. package/dist/tests/e2e/mcpLive.spec.js +120 -0
  113. package/dist/tests/e2e/newSession.e2e.spec.js +177 -0
  114. package/dist/tests/e2e/ralphLoopEnrichmentComparison.spec.js +62 -0
  115. package/dist/tests/e2e/workiqEnrichment.spec.js +56 -0
  116. package/dist/tests/e2e/zavaSimulation.spec.js +452 -0
  117. package/dist/tests/fixtures/test-fixture-project/src/add.js +3 -0
  118. package/dist/tests/fixtures/test-fixture-project/tests/failing.test.js +6 -0
  119. package/dist/tests/fixtures/test-fixture-project/tests/hanging.test.js +8 -0
  120. package/dist/tests/fixtures/test-fixture-project/tests/passing.test.js +10 -0
  121. package/dist/tests/fixtures/test-fixture-project/vitest.config.js +6 -0
  122. package/dist/tests/integration/autoStartConversation.spec.js +138 -0
  123. package/dist/tests/integration/defaultCommand.spec.js +147 -0
  124. package/dist/tests/integration/directCommandNonTty.spec.js +224 -0
  125. package/dist/tests/integration/directCommandTty.spec.js +151 -0
  126. package/dist/tests/integration/discoveryEnrichmentFlow.spec.js +175 -0
  127. package/dist/tests/integration/exportArtifacts.spec.js +202 -0
  128. package/dist/tests/integration/exportFallbackFlow.spec.js +99 -0
  129. package/dist/tests/integration/mcpDegradationFlow.spec.js +190 -0
  130. package/dist/tests/integration/mcpTransportFlow.spec.js +139 -0
  131. package/dist/tests/integration/newSessionFlow.spec.js +343 -0
  132. package/dist/tests/integration/pocGithubMcp.spec.js +186 -0
  133. package/dist/tests/integration/pocLocalFallback.spec.js +171 -0
  134. package/dist/tests/integration/pocScaffold.spec.js +163 -0
  135. package/dist/tests/integration/ralphLoopFlow.spec.js +359 -0
  136. package/dist/tests/integration/ralphLoopPartial.spec.js +368 -0
  137. package/dist/tests/integration/resumeAndBacktrack.spec.js +247 -0
  138. package/dist/tests/integration/spinnerLifecycle.spec.js +220 -0
  139. package/dist/tests/integration/summarizationFlow.spec.js +115 -0
  140. package/dist/tests/integration/testRunnerReal.spec.js +52 -0
  141. package/dist/tests/integration/webSearchAgent.spec.js +128 -0
  142. package/dist/tests/live/copilotSdkLive.spec.js +107 -0
  143. package/dist/tests/live/zavaFullWorkshop.spec.js +392 -0
  144. package/dist/tests/setup/loadEnv.js +3 -0
  145. package/dist/tests/unit/cli/developCommand.spec.js +567 -0
  146. package/dist/tests/unit/cli/directCommands.spec.js +279 -0
  147. package/dist/tests/unit/cli/envLoader.spec.js +58 -0
  148. package/dist/tests/unit/cli/ioContext.spec.js +119 -0
  149. package/dist/tests/unit/cli/preflight.spec.js +108 -0
  150. package/dist/tests/unit/cli/statusCommand.spec.js +111 -0
  151. package/dist/tests/unit/cli/workshopClientFallback.spec.js +80 -0
  152. package/dist/tests/unit/cli/workshopCommand.spec.js +329 -0
  153. package/dist/tests/unit/config/vitestEnvSetup.spec.js +13 -0
  154. package/dist/tests/unit/develop/checkpointState.spec.js +315 -0
  155. package/dist/tests/unit/develop/codeGenerator.spec.js +355 -0
  156. package/dist/tests/unit/develop/githubMcpAdapter.spec.js +231 -0
  157. package/dist/tests/unit/develop/mcpContextEnricher.spec.js +433 -0
  158. package/dist/tests/unit/develop/outputValidator.spec.js +119 -0
  159. package/dist/tests/unit/develop/pocScaffolder.spec.js +353 -0
  160. package/dist/tests/unit/develop/ralphLoop.spec.js +1248 -0
  161. package/dist/tests/unit/develop/templateRegistry.spec.js +85 -0
  162. package/dist/tests/unit/develop/testRunner.spec.js +249 -0
  163. package/dist/tests/unit/infraBicep.spec.js +92 -0
  164. package/dist/tests/unit/infraDeploy.spec.js +82 -0
  165. package/dist/tests/unit/infraTeardown.spec.js +63 -0
  166. package/dist/tests/unit/logging/logger.spec.js +43 -0
  167. package/dist/tests/unit/loop/conversationLoop.spec.js +592 -0
  168. package/dist/tests/unit/loop/phaseSummarizer.spec.js +141 -0
  169. package/dist/tests/unit/loop/streamingMarkdown.spec.js +147 -0
  170. package/dist/tests/unit/mcp/mcpManager.spec.js +279 -0
  171. package/dist/tests/unit/mcp/mcpTransport.spec.js +529 -0
  172. package/dist/tests/unit/mcp/retryPolicy.spec.js +218 -0
  173. package/dist/tests/unit/mcp/timeoutValidation.spec.js +46 -0
  174. package/dist/tests/unit/mcp/webSearch.spec.js +567 -0
  175. package/dist/tests/unit/phases/contextSummarizer.spec.js +140 -0
  176. package/dist/tests/unit/phases/discoveryEnricher.repeatCalls.spec.js +93 -0
  177. package/dist/tests/unit/phases/discoveryEnricher.spec.js +411 -0
  178. package/dist/tests/unit/phases/phaseExtractors.spec.js +352 -0
  179. package/dist/tests/unit/phases/phaseHandlers.spec.js +425 -0
  180. package/dist/tests/unit/prompts/promptLoader.spec.js +118 -0
  181. package/dist/tests/unit/schemas/pocSchemas.spec.js +412 -0
  182. package/dist/tests/unit/schemas/session.spec.js +257 -0
  183. package/dist/tests/unit/sessions/exportPaths.spec.js +31 -0
  184. package/dist/tests/unit/sessions/exportWriter.spec.js +655 -0
  185. package/dist/tests/unit/sessions/sessionManager.spec.js +151 -0
  186. package/dist/tests/unit/sessions/sessionStore.spec.js +116 -0
  187. package/dist/tests/unit/shared/activitySpinner.spec.js +175 -0
  188. package/dist/tests/unit/shared/cardsLoader.spec.js +76 -0
  189. package/dist/tests/unit/shared/copilotClient.spec.js +155 -0
  190. package/dist/tests/unit/shared/errorClassifier.spec.js +131 -0
  191. package/dist/tests/unit/shared/events.spec.js +55 -0
  192. package/dist/tests/unit/shared/markdownRenderer.spec.js +35 -0
  193. package/dist/tests/unit/shared/markdownRendererChunks.spec.js +70 -0
  194. package/dist/tests/unit/shared/tableRenderer.spec.js +34 -0
  195. package/dist/vitest.config.js +14 -0
  196. package/dist/vitest.live.config.js +18 -0
  197. package/docs/README.md +35 -0
  198. package/docs/architecture.md +169 -0
  199. package/docs/cli-usage.md +207 -0
  200. package/docs/environment.md +66 -0
  201. package/docs/export-format.md +146 -0
  202. package/docs/session-model.md +113 -0
  203. package/eslint.config.js +35 -0
  204. package/infra/deploy.sh +193 -0
  205. package/infra/gather-env.sh +211 -0
  206. package/infra/main.bicep +90 -0
  207. package/infra/main.bicepparam +18 -0
  208. package/infra/resources.bicep +134 -0
  209. package/infra/teardown.sh +114 -0
  210. package/package.json +63 -0
  211. package/specs/001-cli-workshop-rebuild/checklists/requirements.md +35 -0
  212. package/specs/001-cli-workshop-rebuild/contracts/cli.md +59 -0
  213. package/specs/001-cli-workshop-rebuild/contracts/export-summary-json.md +23 -0
  214. package/specs/001-cli-workshop-rebuild/contracts/session-json.md +30 -0
  215. package/specs/001-cli-workshop-rebuild/data-model.md +210 -0
  216. package/specs/001-cli-workshop-rebuild/plan.md +361 -0
  217. package/specs/001-cli-workshop-rebuild/quickstart.md +83 -0
  218. package/specs/001-cli-workshop-rebuild/research.md +116 -0
  219. package/specs/001-cli-workshop-rebuild/spec.md +240 -0
  220. package/specs/001-cli-workshop-rebuild/tasks.md +476 -0
  221. package/specs/002-poc-generation/contracts/poc-output.md +172 -0
  222. package/specs/002-poc-generation/contracts/ralph-loop.md +113 -0
  223. package/specs/002-poc-generation/data-model.md +172 -0
  224. package/specs/002-poc-generation/plan.md +109 -0
  225. package/specs/002-poc-generation/quickstart.md +97 -0
  226. package/specs/002-poc-generation/research.md +786 -0
  227. package/specs/002-poc-generation/spec.md +81 -0
  228. package/specs/002-poc-generation/tasks-fix.md +198 -0
  229. package/specs/002-poc-generation/tasks.md +252 -0
  230. package/specs/003-mcp-transport-integration/checklists/requirements.md +37 -0
  231. package/specs/003-mcp-transport-integration/contracts/context-enricher.md +220 -0
  232. package/specs/003-mcp-transport-integration/contracts/discovery-enricher.md +267 -0
  233. package/specs/003-mcp-transport-integration/contracts/github-adapter.md +149 -0
  234. package/specs/003-mcp-transport-integration/contracts/mcp-transport.md +288 -0
  235. package/specs/003-mcp-transport-integration/data-model.md +326 -0
  236. package/specs/003-mcp-transport-integration/plan.md +114 -0
  237. package/specs/003-mcp-transport-integration/quickstart.md +311 -0
  238. package/specs/003-mcp-transport-integration/research.md +395 -0
  239. package/specs/003-mcp-transport-integration/spec.md +234 -0
  240. package/specs/003-mcp-transport-integration/tasks.md +324 -0
  241. package/specs/003-next-spec-gaps.md +150 -0
  242. package/specs/004-dev-resume-hardening/checklists/requirements.md +37 -0
  243. package/specs/004-dev-resume-hardening/contracts/cli.md +160 -0
  244. package/specs/004-dev-resume-hardening/data-model.md +321 -0
  245. package/specs/004-dev-resume-hardening/plan.md +107 -0
  246. package/specs/004-dev-resume-hardening/quickstart.md +115 -0
  247. package/specs/004-dev-resume-hardening/research.md +142 -0
  248. package/specs/004-dev-resume-hardening/spec.md +221 -0
  249. package/specs/004-dev-resume-hardening/tasks.md +333 -0
  250. package/specs/005-ai-search-deploy/checklists/requirements.md +39 -0
  251. package/specs/005-ai-search-deploy/contracts/web-search-tool.md +241 -0
  252. package/specs/005-ai-search-deploy/data-model.md +130 -0
  253. package/specs/005-ai-search-deploy/plan.md +93 -0
  254. package/specs/005-ai-search-deploy/quickstart.md +96 -0
  255. package/specs/005-ai-search-deploy/research.md +187 -0
  256. package/specs/005-ai-search-deploy/spec.md +143 -0
  257. package/specs/005-ai-search-deploy/tasks.md +284 -0
  258. package/specs/006-workshop-extraction-fixes/checklists/requirements.md +61 -0
  259. package/specs/006-workshop-extraction-fixes/contracts/summarization-and-export.md +131 -0
  260. package/specs/006-workshop-extraction-fixes/data-model.md +149 -0
  261. package/specs/006-workshop-extraction-fixes/plan.md +123 -0
  262. package/specs/006-workshop-extraction-fixes/quickstart.md +101 -0
  263. package/specs/006-workshop-extraction-fixes/research.md +143 -0
  264. package/specs/006-workshop-extraction-fixes/spec.md +210 -0
  265. package/specs/006-workshop-extraction-fixes/tasks.md +316 -0
  266. package/src/cli/developCommand.ts +308 -0
  267. package/src/cli/directCommands.ts +195 -0
  268. package/src/cli/envLoader.ts +17 -0
  269. package/src/cli/exportCommand.ts +65 -0
  270. package/src/cli/index.ts +249 -0
  271. package/src/cli/ioContext.ts +139 -0
  272. package/src/cli/preflight.ts +86 -0
  273. package/src/cli/statusCommand.ts +118 -0
  274. package/src/cli/workshopCommand.ts +496 -0
  275. package/src/develop/checkpointState.ts +121 -0
  276. package/src/develop/codeGenerator.ts +402 -0
  277. package/src/develop/dynamicScaffolder.ts +284 -0
  278. package/src/develop/githubMcpAdapter.ts +199 -0
  279. package/src/develop/index.ts +34 -0
  280. package/src/develop/mcpContextEnricher.ts +279 -0
  281. package/src/develop/pocScaffolder.ts +646 -0
  282. package/src/develop/ralphLoop.ts +1044 -0
  283. package/src/develop/templateRegistry.ts +427 -0
  284. package/src/develop/testRunner.ts +276 -0
  285. package/src/logging/logger.ts +73 -0
  286. package/src/loop/conversationLoop.ts +355 -0
  287. package/src/loop/phaseSummarizer.ts +114 -0
  288. package/src/mcp/mcpManager.ts +365 -0
  289. package/src/mcp/mcpTransport.ts +562 -0
  290. package/src/mcp/retryPolicy.ts +87 -0
  291. package/src/mcp/webSearch.ts +388 -0
  292. package/src/originalPrompts/design_thinking.md +178 -0
  293. package/src/originalPrompts/design_thinking_persona.md +76 -0
  294. package/src/originalPrompts/document_generator_example.md +77 -0
  295. package/src/originalPrompts/document_generator_persona.md +47 -0
  296. package/src/originalPrompts/facilitator_persona.md +125 -0
  297. package/src/originalPrompts/guardrails.md +47 -0
  298. package/src/phases/contextSummarizer.ts +154 -0
  299. package/src/phases/discoveryEnricher.ts +223 -0
  300. package/src/phases/phaseExtractors.ts +247 -0
  301. package/src/phases/phaseHandlers.ts +450 -0
  302. package/src/prompts/design.md +51 -0
  303. package/src/prompts/develop-boundary.md +51 -0
  304. package/src/prompts/develop.md +111 -0
  305. package/src/prompts/discover.md +58 -0
  306. package/src/prompts/ideate.md +56 -0
  307. package/src/prompts/plan.md +51 -0
  308. package/src/prompts/promptLoader.ts +198 -0
  309. package/src/prompts/select.md +47 -0
  310. package/src/prompts/summarize/README.md +8 -0
  311. package/src/prompts/summarize/design-summary.md +37 -0
  312. package/src/prompts/summarize/develop-summary.md +25 -0
  313. package/src/prompts/summarize/ideate-summary.md +27 -0
  314. package/src/prompts/summarize/plan-summary.md +27 -0
  315. package/src/prompts/summarize/select-summary.md +21 -0
  316. package/src/prompts/system.md +28 -0
  317. package/src/sessions/exportPaths.ts +28 -0
  318. package/src/sessions/exportWriter.ts +490 -0
  319. package/src/sessions/sessionManager.ts +119 -0
  320. package/src/sessions/sessionStore.ts +69 -0
  321. package/src/shared/activitySpinner.ts +108 -0
  322. package/src/shared/copilotClient.ts +291 -0
  323. package/src/shared/data/cards.json +1249 -0
  324. package/src/shared/data/cardsLoader.ts +70 -0
  325. package/src/shared/errorClassifier.ts +160 -0
  326. package/src/shared/events.ts +103 -0
  327. package/src/shared/markdownRenderer.ts +44 -0
  328. package/src/shared/schemas/session.ts +346 -0
  329. package/src/shared/tableRenderer.ts +28 -0
  330. package/src/types/marked-terminal.d.ts +5 -0
  331. package/src/vendor/chalk.ts +2 -0
  332. package/src/vendor/cli-table3.ts +3 -0
  333. package/src/vendor/commander.ts +2 -0
  334. package/src/vendor/marked-terminal.ts +3 -0
  335. package/src/vendor/marked.ts +2 -0
  336. package/src/vendor/ora.ts +2 -0
  337. package/src/vendor/pino.ts +3 -0
  338. package/src/vendor/zod.ts +3 -0
  339. package/tests/e2e/developE2e.spec.ts +152 -0
  340. package/tests/e2e/developFailureE2e.spec.ts +289 -0
  341. package/tests/e2e/developPty.spec.ts +86 -0
  342. package/tests/e2e/discoveryWebSearchRelevance.spec.ts +103 -0
  343. package/tests/e2e/harness.spec.ts +104 -0
  344. package/tests/e2e/mcpLive.spec.ts +149 -0
  345. package/tests/e2e/newSession.e2e.spec.ts +245 -0
  346. package/tests/e2e/ralphLoopEnrichmentComparison.spec.ts +70 -0
  347. package/tests/e2e/workiqEnrichment.spec.ts +72 -0
  348. package/tests/e2e/zava-assessment/agent-interaction-script.md +258 -0
  349. package/tests/e2e/zava-assessment/company-profile.md +98 -0
  350. package/tests/e2e/zava-assessment/expected-results-checklist.md +454 -0
  351. package/tests/e2e/zavaSimulation.spec.ts +511 -0
  352. package/tests/fixtures/completedSession.json +141 -0
  353. package/tests/fixtures/test-fixture-project/package-lock.json +1585 -0
  354. package/tests/fixtures/test-fixture-project/package.json +12 -0
  355. package/tests/fixtures/test-fixture-project/src/add.ts +3 -0
  356. package/tests/fixtures/test-fixture-project/tests/failing.test.ts +7 -0
  357. package/tests/fixtures/test-fixture-project/tests/hanging.test.ts +9 -0
  358. package/tests/fixtures/test-fixture-project/tests/passing.test.ts +13 -0
  359. package/tests/fixtures/test-fixture-project/vitest.config.ts +7 -0
  360. package/tests/integration/autoStartConversation.spec.ts +168 -0
  361. package/tests/integration/defaultCommand.spec.ts +179 -0
  362. package/tests/integration/directCommandNonTty.spec.ts +260 -0
  363. package/tests/integration/directCommandTty.spec.ts +185 -0
  364. package/tests/integration/discoveryEnrichmentFlow.spec.ts +209 -0
  365. package/tests/integration/exportArtifacts.spec.ts +232 -0
  366. package/tests/integration/exportFallbackFlow.spec.ts +115 -0
  367. package/tests/integration/mcpDegradationFlow.spec.ts +231 -0
  368. package/tests/integration/mcpTransportFlow.spec.ts +178 -0
  369. package/tests/integration/newSessionFlow.spec.ts +406 -0
  370. package/tests/integration/pocGithubMcp.spec.ts +224 -0
  371. package/tests/integration/pocLocalFallback.spec.ts +205 -0
  372. package/tests/integration/pocScaffold.spec.ts +220 -0
  373. package/tests/integration/ralphLoopFlow.spec.ts +430 -0
  374. package/tests/integration/ralphLoopPartial.spec.ts +416 -0
  375. package/tests/integration/resumeAndBacktrack.spec.ts +278 -0
  376. package/tests/integration/spinnerLifecycle.spec.ts +270 -0
  377. package/tests/integration/summarizationFlow.spec.ts +135 -0
  378. package/tests/integration/testRunnerReal.spec.ts +63 -0
  379. package/tests/integration/webSearchAgent.spec.ts +155 -0
  380. package/tests/live/copilotSdkLive.spec.ts +149 -0
  381. package/tests/live/zavaFullWorkshop.spec.ts +515 -0
  382. package/tests/setup/loadEnv.ts +5 -0
  383. package/tests/unit/cli/developCommand.spec.ts +679 -0
  384. package/tests/unit/cli/directCommands.spec.ts +325 -0
  385. package/tests/unit/cli/envLoader.spec.ts +73 -0
  386. package/tests/unit/cli/ioContext.spec.ts +148 -0
  387. package/tests/unit/cli/preflight.spec.ts +125 -0
  388. package/tests/unit/cli/statusCommand.spec.ts +134 -0
  389. package/tests/unit/cli/workshopClientFallback.spec.ts +100 -0
  390. package/tests/unit/cli/workshopCommand.spec.ts +378 -0
  391. package/tests/unit/config/vitestEnvSetup.spec.ts +24 -0
  392. package/tests/unit/develop/checkpointState.spec.ts +378 -0
  393. package/tests/unit/develop/codeGenerator.spec.ts +447 -0
  394. package/tests/unit/develop/githubMcpAdapter.spec.ts +283 -0
  395. package/tests/unit/develop/mcpContextEnricher.spec.ts +564 -0
  396. package/tests/unit/develop/outputValidator.spec.ts +134 -0
  397. package/tests/unit/develop/pocScaffolder.spec.ts +451 -0
  398. package/tests/unit/develop/ralphLoop.spec.ts +1439 -0
  399. package/tests/unit/develop/templateRegistry.spec.ts +106 -0
  400. package/tests/unit/develop/testRunner.spec.ts +294 -0
  401. package/tests/unit/infraBicep.spec.ts +116 -0
  402. package/tests/unit/infraDeploy.spec.ts +102 -0
  403. package/tests/unit/infraTeardown.spec.ts +77 -0
  404. package/tests/unit/logging/logger.spec.ts +50 -0
  405. package/tests/unit/loop/conversationLoop.spec.ts +719 -0
  406. package/tests/unit/loop/phaseSummarizer.spec.ts +169 -0
  407. package/tests/unit/loop/streamingMarkdown.spec.ts +180 -0
  408. package/tests/unit/mcp/mcpManager.spec.ts +336 -0
  409. package/tests/unit/mcp/mcpTransport.spec.ts +689 -0
  410. package/tests/unit/mcp/retryPolicy.spec.ts +278 -0
  411. package/tests/unit/mcp/timeoutValidation.spec.ts +55 -0
  412. package/tests/unit/mcp/webSearch.spec.ts +718 -0
  413. package/tests/unit/phases/contextSummarizer.spec.ts +158 -0
  414. package/tests/unit/phases/discoveryEnricher.repeatCalls.spec.ts +125 -0
  415. package/tests/unit/phases/discoveryEnricher.spec.ts +512 -0
  416. package/tests/unit/phases/phaseExtractors.spec.ts +406 -0
  417. package/tests/unit/phases/phaseHandlers.spec.ts +483 -0
  418. package/tests/unit/prompts/promptLoader.spec.ts +144 -0
  419. package/tests/unit/schemas/pocSchemas.spec.ts +457 -0
  420. package/tests/unit/schemas/session.spec.ts +328 -0
  421. package/tests/unit/sessions/exportPaths.spec.ts +38 -0
  422. package/tests/unit/sessions/exportWriter.spec.ts +737 -0
  423. package/tests/unit/sessions/sessionManager.spec.ts +174 -0
  424. package/tests/unit/sessions/sessionStore.spec.ts +136 -0
  425. package/tests/unit/shared/activitySpinner.spec.ts +211 -0
  426. package/tests/unit/shared/cardsLoader.spec.ts +89 -0
  427. package/tests/unit/shared/copilotClient.spec.ts +185 -0
  428. package/tests/unit/shared/errorClassifier.spec.ts +152 -0
  429. package/tests/unit/shared/events.spec.ts +71 -0
  430. package/tests/unit/shared/markdownRenderer.spec.ts +42 -0
  431. package/tests/unit/shared/markdownRendererChunks.spec.ts +83 -0
  432. package/tests/unit/shared/tableRenderer.spec.ts +38 -0
  433. package/tsconfig.json +20 -0
  434. package/vitest.config.ts +15 -0
  435. package/vitest.live.config.ts +19 -0
@@ -0,0 +1,567 @@
1
+ /**
2
+ * Web search tool tests (T060, T018-T021).
3
+ *
4
+ * Tests for the web.search tool backed by Azure AI Foundry Agent Service.
5
+ *
6
+ * Covers:
7
+ * - WebSearchConfig validation (T018)
8
+ * - Legacy env var detection (T019)
9
+ * - Graceful degradation scenarios (T020)
10
+ * - Citation extraction from url_citation annotations (T021)
11
+ * - Tool definition shape for Copilot SDK registration
12
+ * - Successful search returning structured results
13
+ */
14
+ import { describe, it, expect, vi, afterEach } from 'vitest';
15
+ import { createWebSearchTool, isWebSearchConfigured, extractCitations, destroyWebSearchSession, WEB_SEARCH_TOOL_DEFINITION, } from '../../../src/mcp/webSearch.js';
16
+ // ── Helper: Create fake agent session deps ──────────────────────────────────
17
+ function createFakeDeps(overrides) {
18
+ return {
19
+ createClient: vi.fn().mockReturnValue({ fake: 'client' }),
20
+ getOpenAIClient: vi.fn().mockResolvedValue({ fake: 'openai' }),
21
+ createAgentVersion: vi.fn().mockResolvedValue({ name: 'sofia-web-search', version: 'v1' }),
22
+ deleteAgentVersion: vi.fn().mockResolvedValue(undefined),
23
+ createConversation: vi.fn().mockResolvedValue({ id: 'conv-123' }),
24
+ deleteConversation: vi.fn().mockResolvedValue(undefined),
25
+ createResponse: vi.fn().mockResolvedValue({
26
+ output: [
27
+ {
28
+ type: 'message',
29
+ content: [
30
+ {
31
+ type: 'output_text',
32
+ text: 'Contoso is a healthcare AI company. See source.',
33
+ annotations: [
34
+ {
35
+ type: 'url_citation',
36
+ url: 'https://contoso.com/about',
37
+ title: 'Contoso Ltd - About',
38
+ start_index: 0,
39
+ end_index: 40,
40
+ },
41
+ ],
42
+ },
43
+ ],
44
+ },
45
+ ],
46
+ }),
47
+ ...overrides,
48
+ };
49
+ }
50
+ describe('web.search tool', () => {
51
+ const originalEnv = { ...process.env };
52
+ afterEach(async () => {
53
+ process.env = { ...originalEnv };
54
+ await destroyWebSearchSession();
55
+ });
56
+ describe('isWebSearchConfigured', () => {
57
+ it('returns true when both project endpoint and model deployment name are set', () => {
58
+ process.env.FOUNDRY_PROJECT_ENDPOINT =
59
+ 'https://sofia-foundry.services.ai.azure.com/api/projects/sofia-project';
60
+ process.env.FOUNDRY_MODEL_DEPLOYMENT_NAME = 'gpt-4.1-mini';
61
+ expect(isWebSearchConfigured()).toBe(true);
62
+ });
63
+ it('returns false when project endpoint is missing', () => {
64
+ delete process.env.FOUNDRY_PROJECT_ENDPOINT;
65
+ process.env.FOUNDRY_MODEL_DEPLOYMENT_NAME = 'gpt-4.1-mini';
66
+ expect(isWebSearchConfigured()).toBe(false);
67
+ });
68
+ it('returns false when model deployment name is missing', () => {
69
+ process.env.FOUNDRY_PROJECT_ENDPOINT =
70
+ 'https://sofia-foundry.services.ai.azure.com/api/projects/sofia-project';
71
+ delete process.env.FOUNDRY_MODEL_DEPLOYMENT_NAME;
72
+ expect(isWebSearchConfigured()).toBe(false);
73
+ });
74
+ it('returns false when both are missing', () => {
75
+ delete process.env.FOUNDRY_PROJECT_ENDPOINT;
76
+ delete process.env.FOUNDRY_MODEL_DEPLOYMENT_NAME;
77
+ expect(isWebSearchConfigured()).toBe(false);
78
+ });
79
+ it('returns false when only legacy vars are set (T019)', () => {
80
+ process.env.SOFIA_FOUNDRY_AGENT_ENDPOINT = 'https://foundry.example.com';
81
+ process.env.SOFIA_FOUNDRY_AGENT_KEY = 'test-key-123';
82
+ delete process.env.FOUNDRY_PROJECT_ENDPOINT;
83
+ delete process.env.FOUNDRY_MODEL_DEPLOYMENT_NAME;
84
+ expect(isWebSearchConfigured()).toBe(false);
85
+ });
86
+ });
87
+ describe('WebSearchConfig validation (T018)', () => {
88
+ it('accepts valid config with projectEndpoint and modelDeploymentName', () => {
89
+ const deps = createFakeDeps();
90
+ const tool = createWebSearchTool({
91
+ projectEndpoint: 'https://sofia-foundry.services.ai.azure.com/api/projects/sofia-project',
92
+ modelDeploymentName: 'gpt-4.1-mini',
93
+ }, deps);
94
+ expect(tool).toBeTypeOf('function');
95
+ });
96
+ it('creates client with the provided projectEndpoint', async () => {
97
+ const deps = createFakeDeps();
98
+ const tool = createWebSearchTool({
99
+ projectEndpoint: 'https://my-foundry.services.ai.azure.com/api/projects/proj',
100
+ modelDeploymentName: 'gpt-4.1-mini',
101
+ }, deps);
102
+ await tool('test');
103
+ expect(deps.createClient).toHaveBeenCalledWith('https://my-foundry.services.ai.azure.com/api/projects/proj');
104
+ });
105
+ it('passes modelDeploymentName to agent creation', async () => {
106
+ const deps = createFakeDeps();
107
+ const tool = createWebSearchTool({
108
+ projectEndpoint: 'https://foundry.example.com',
109
+ modelDeploymentName: 'my-model',
110
+ }, deps);
111
+ await tool('test');
112
+ expect(deps.createAgentVersion).toHaveBeenCalledWith(expect.anything(), 'sofia-web-search', expect.objectContaining({ model: 'my-model' }));
113
+ });
114
+ });
115
+ describe('tool definition', () => {
116
+ it('has correct name and description', () => {
117
+ expect(WEB_SEARCH_TOOL_DEFINITION.name).toBe('web.search');
118
+ expect(WEB_SEARCH_TOOL_DEFINITION.description).toBeTruthy();
119
+ });
120
+ it('accepts a query parameter', () => {
121
+ const params = WEB_SEARCH_TOOL_DEFINITION.parameters;
122
+ expect(params).toBeDefined();
123
+ expect(params.properties?.query).toBeDefined();
124
+ });
125
+ });
126
+ describe('createWebSearchTool', () => {
127
+ it('returns structured results with citations on success', async () => {
128
+ const deps = createFakeDeps();
129
+ const tool = createWebSearchTool({
130
+ projectEndpoint: 'https://foundry.example.com',
131
+ modelDeploymentName: 'gpt-4.1-mini',
132
+ }, deps);
133
+ const result = await tool('Contoso healthcare');
134
+ expect(result.results).toHaveLength(1);
135
+ expect(result.results[0].title).toBe('Contoso Ltd - About');
136
+ expect(result.results[0].url).toBe('https://contoso.com/about');
137
+ expect(result.sources).toContain('https://contoso.com/about');
138
+ });
139
+ it('reuses agent on second call (lazy initialization)', async () => {
140
+ const deps = createFakeDeps();
141
+ const tool = createWebSearchTool({
142
+ projectEndpoint: 'https://foundry.example.com',
143
+ modelDeploymentName: 'gpt-4.1-mini',
144
+ }, deps);
145
+ await tool('first query');
146
+ await tool('second query');
147
+ // Agent created once, response called twice
148
+ expect(deps.createAgentVersion).toHaveBeenCalledTimes(1);
149
+ expect(deps.createResponse).toHaveBeenCalledTimes(2);
150
+ });
151
+ it('degrades gracefully when credential fails (T020)', async () => {
152
+ const deps = createFakeDeps({
153
+ getOpenAIClient: vi
154
+ .fn()
155
+ .mockRejectedValue(new Error('Azure authentication failed — run `az login`')),
156
+ });
157
+ const tool = createWebSearchTool({
158
+ projectEndpoint: 'https://foundry.example.com',
159
+ modelDeploymentName: 'gpt-4.1-mini',
160
+ }, deps);
161
+ const result = await tool('test query');
162
+ expect(result.results).toHaveLength(0);
163
+ expect(result.degraded).toBe(true);
164
+ expect(result.error).toContain('Azure authentication failed');
165
+ });
166
+ it('degrades gracefully when agent creation fails (T020)', async () => {
167
+ const deps = createFakeDeps({
168
+ createAgentVersion: vi
169
+ .fn()
170
+ .mockRejectedValue(new Error('Failed to create web search agent: 403 Forbidden')),
171
+ });
172
+ const tool = createWebSearchTool({
173
+ projectEndpoint: 'https://foundry.example.com',
174
+ modelDeploymentName: 'gpt-4.1-mini',
175
+ }, deps);
176
+ const result = await tool('test query');
177
+ expect(result.results).toHaveLength(0);
178
+ expect(result.degraded).toBe(true);
179
+ expect(result.error).toContain('Failed to create web search agent');
180
+ });
181
+ it('degrades gracefully on network error (T020)', async () => {
182
+ const deps = createFakeDeps({
183
+ createClient: vi.fn().mockImplementation(() => {
184
+ throw new Error('Network error: ECONNREFUSED');
185
+ }),
186
+ });
187
+ const tool = createWebSearchTool({
188
+ projectEndpoint: 'https://foundry.example.com',
189
+ modelDeploymentName: 'gpt-4.1-mini',
190
+ }, deps);
191
+ const result = await tool('test query');
192
+ expect(result.results).toHaveLength(0);
193
+ expect(result.degraded).toBe(true);
194
+ expect(result.error).toContain('Network error');
195
+ });
196
+ it('returns empty results with degraded flag when query fails', async () => {
197
+ const deps = createFakeDeps({
198
+ createResponse: vi
199
+ .fn()
200
+ .mockRejectedValue(new Error('Web search query failed: 429 Rate limited')),
201
+ });
202
+ const tool = createWebSearchTool({
203
+ projectEndpoint: 'https://foundry.example.com',
204
+ modelDeploymentName: 'gpt-4.1-mini',
205
+ }, deps);
206
+ const result = await tool('test query');
207
+ expect(result.results).toHaveLength(0);
208
+ expect(result.degraded).toBe(true);
209
+ expect(result.error).toContain('429');
210
+ }, 12000);
211
+ it('falls back to output text snippets when citations are missing', async () => {
212
+ const deps = createFakeDeps({
213
+ createResponse: vi.fn().mockResolvedValue({
214
+ output: [
215
+ {
216
+ type: 'message',
217
+ content: [
218
+ {
219
+ type: 'output_text',
220
+ text: 'Microsoft expands cloud infrastructure in Europe.',
221
+ annotations: [],
222
+ },
223
+ ],
224
+ },
225
+ ],
226
+ }),
227
+ });
228
+ const tool = createWebSearchTool({
229
+ projectEndpoint: 'https://foundry.example.com',
230
+ modelDeploymentName: 'gpt-4.1-mini',
231
+ }, deps);
232
+ const result = await tool('microsoft cloud europe');
233
+ expect(result.results).toHaveLength(1);
234
+ expect(result.results[0].snippet).toContain('cloud infrastructure');
235
+ expect(result.results[0].title).toBe('Foundry response');
236
+ });
237
+ it('returns citations on subsequent calls by isolating each query conversation', async () => {
238
+ let conversationCounter = 0;
239
+ const seenByConversation = new Map();
240
+ const deps = createFakeDeps({
241
+ createConversation: vi.fn().mockImplementation(async () => {
242
+ conversationCounter += 1;
243
+ return { id: `conv-${conversationCounter}` };
244
+ }),
245
+ createResponse: vi
246
+ .fn()
247
+ .mockImplementation(async (_openAIClient, conversationId) => {
248
+ const calls = (seenByConversation.get(conversationId) ?? 0) + 1;
249
+ seenByConversation.set(conversationId, calls);
250
+ // Simulate Foundry behavior where only the first turn in a conversation
251
+ // contains URL citations; follow-up turns may return plain text.
252
+ if (calls > 1) {
253
+ return {
254
+ output: [
255
+ {
256
+ type: 'message',
257
+ content: [
258
+ {
259
+ type: 'output_text',
260
+ text: 'No citations in follow-up turn.',
261
+ annotations: [],
262
+ },
263
+ ],
264
+ },
265
+ ],
266
+ };
267
+ }
268
+ return {
269
+ output: [
270
+ {
271
+ type: 'message',
272
+ content: [
273
+ {
274
+ type: 'output_text',
275
+ text: 'Result with source.',
276
+ annotations: [
277
+ {
278
+ type: 'url_citation',
279
+ url: `https://example.com/${conversationId}`,
280
+ title: `Source ${conversationId}`,
281
+ start_index: 0,
282
+ end_index: 18,
283
+ },
284
+ ],
285
+ },
286
+ ],
287
+ },
288
+ ],
289
+ };
290
+ }),
291
+ });
292
+ const tool = createWebSearchTool({
293
+ projectEndpoint: 'https://foundry.example.com',
294
+ modelDeploymentName: 'gpt-4.1-mini',
295
+ }, deps);
296
+ const first = await tool('first query');
297
+ const second = await tool('second query');
298
+ expect(first.results).toHaveLength(1);
299
+ expect(second.results).toHaveLength(1);
300
+ expect(deps.createConversation).toHaveBeenCalledTimes(2);
301
+ expect(deps.deleteConversation).toHaveBeenCalledTimes(2);
302
+ });
303
+ it('retries on 429 rate limiting with exponential backoff', async () => {
304
+ let callCount = 0;
305
+ const deps = createFakeDeps({
306
+ createResponse: vi.fn().mockImplementation(async () => {
307
+ callCount += 1;
308
+ if (callCount < 2) {
309
+ const error = new Error('Web search query failed: 429 Too Many Requests');
310
+ throw error;
311
+ }
312
+ return {
313
+ output: [
314
+ {
315
+ type: 'message',
316
+ content: [
317
+ {
318
+ type: 'output_text',
319
+ text: 'Result after retry.',
320
+ annotations: [
321
+ {
322
+ type: 'url_citation',
323
+ url: 'https://example.com',
324
+ title: 'Example',
325
+ start_index: 0,
326
+ end_index: 18,
327
+ },
328
+ ],
329
+ },
330
+ ],
331
+ },
332
+ ],
333
+ };
334
+ }),
335
+ });
336
+ const tool = createWebSearchTool({
337
+ projectEndpoint: 'https://foundry.example.com',
338
+ modelDeploymentName: 'gpt-4.1-mini',
339
+ }, deps);
340
+ const result = await tool('test query');
341
+ expect(result.results).toHaveLength(1);
342
+ expect(deps.createResponse).toHaveBeenCalledTimes(2);
343
+ }, 6000);
344
+ it('stops retrying after MAX_RETRIES and returns degraded', async () => {
345
+ const deps = createFakeDeps({
346
+ createResponse: vi
347
+ .fn()
348
+ .mockRejectedValue(new Error('Web search query failed: 429 Too Many Requests')),
349
+ });
350
+ const tool = createWebSearchTool({
351
+ projectEndpoint: 'https://foundry.example.com',
352
+ modelDeploymentName: 'gpt-4.1-mini',
353
+ }, deps);
354
+ const result = await tool('test query');
355
+ expect(result.results).toHaveLength(0);
356
+ expect(result.degraded).toBe(true);
357
+ expect(deps.createResponse).toHaveBeenCalledTimes(3); // initial + 2 retries
358
+ }, 12000);
359
+ it('rotates the underlying agent after several queries', async () => {
360
+ const deps = createFakeDeps();
361
+ const tool = createWebSearchTool({
362
+ projectEndpoint: 'https://foundry.example.com',
363
+ modelDeploymentName: 'gpt-4.1-mini',
364
+ }, deps);
365
+ await tool('query one');
366
+ await tool('query two');
367
+ await tool('query three');
368
+ await tool('query four');
369
+ expect(deps.createAgentVersion).toHaveBeenCalledTimes(2);
370
+ expect(deps.deleteAgentVersion).toHaveBeenCalledTimes(1);
371
+ });
372
+ });
373
+ describe('extractCitations (T021)', () => {
374
+ it('extracts url_citation annotations into results', () => {
375
+ const output = [
376
+ {
377
+ type: 'message',
378
+ content: [
379
+ {
380
+ type: 'output_text',
381
+ text: 'Contoso is a leader in healthcare AI.',
382
+ annotations: [
383
+ {
384
+ type: 'url_citation',
385
+ url: 'https://contoso.com/about',
386
+ title: 'Contoso Ltd - Healthcare AI Solutions',
387
+ start_index: 0,
388
+ end_index: 37,
389
+ },
390
+ ],
391
+ },
392
+ ],
393
+ },
394
+ ];
395
+ const { results, sources } = extractCitations(output);
396
+ expect(results).toHaveLength(1);
397
+ expect(results[0].title).toBe('Contoso Ltd - Healthcare AI Solutions');
398
+ expect(results[0].url).toBe('https://contoso.com/about');
399
+ expect(sources).toContain('https://contoso.com/about');
400
+ });
401
+ it('deduplicates sources by URL', () => {
402
+ const output = [
403
+ {
404
+ type: 'message',
405
+ content: [
406
+ {
407
+ type: 'output_text',
408
+ text: 'First ref. Second ref to same source.',
409
+ annotations: [
410
+ {
411
+ type: 'url_citation',
412
+ url: 'https://example.com',
413
+ title: 'A',
414
+ start_index: 0,
415
+ end_index: 10,
416
+ },
417
+ {
418
+ type: 'url_citation',
419
+ url: 'https://example.com',
420
+ title: 'B',
421
+ start_index: 11,
422
+ end_index: 37,
423
+ },
424
+ ],
425
+ },
426
+ ],
427
+ },
428
+ ];
429
+ const { results, sources } = extractCitations(output);
430
+ expect(results).toHaveLength(1);
431
+ expect(sources).toHaveLength(1);
432
+ });
433
+ it('handles multiple distinct citations', () => {
434
+ const output = [
435
+ {
436
+ type: 'message',
437
+ content: [
438
+ {
439
+ type: 'output_text',
440
+ text: 'Result text with multiple sources.',
441
+ annotations: [
442
+ {
443
+ type: 'url_citation',
444
+ url: 'https://a.com',
445
+ title: 'Source A',
446
+ start_index: 0,
447
+ end_index: 10,
448
+ },
449
+ {
450
+ type: 'url_citation',
451
+ url: 'https://b.com',
452
+ title: 'Source B',
453
+ start_index: 11,
454
+ end_index: 33,
455
+ },
456
+ ],
457
+ },
458
+ ],
459
+ },
460
+ ];
461
+ const { results, sources } = extractCitations(output);
462
+ expect(results).toHaveLength(2);
463
+ expect(sources).toEqual(['https://a.com', 'https://b.com']);
464
+ });
465
+ it('returns empty results for output without citations', () => {
466
+ const output = [
467
+ {
468
+ type: 'message',
469
+ content: [
470
+ {
471
+ type: 'output_text',
472
+ text: 'No citations here.',
473
+ annotations: [],
474
+ },
475
+ ],
476
+ },
477
+ ];
478
+ const { results, sources } = extractCitations(output);
479
+ expect(results).toHaveLength(0);
480
+ expect(sources).toHaveLength(0);
481
+ });
482
+ it('ignores non-url_citation annotations', () => {
483
+ const output = [
484
+ {
485
+ type: 'message',
486
+ content: [
487
+ {
488
+ type: 'output_text',
489
+ text: 'Some text',
490
+ annotations: [
491
+ { type: 'file_citation', url: 'file://local', title: 'File' },
492
+ {
493
+ type: 'url_citation',
494
+ url: 'https://valid.com',
495
+ title: 'Valid',
496
+ start_index: 0,
497
+ end_index: 9,
498
+ },
499
+ ],
500
+ },
501
+ ],
502
+ },
503
+ ];
504
+ const { results } = extractCitations(output);
505
+ expect(results).toHaveLength(1);
506
+ expect(results[0].url).toBe('https://valid.com');
507
+ });
508
+ it('ignores non-message output items', () => {
509
+ const output = [
510
+ { type: 'tool_call', name: 'web_search_preview' },
511
+ {
512
+ type: 'message',
513
+ content: [
514
+ {
515
+ type: 'output_text',
516
+ text: 'Result text.',
517
+ annotations: [
518
+ {
519
+ type: 'url_citation',
520
+ url: 'https://found.com',
521
+ title: 'Found',
522
+ start_index: 0,
523
+ end_index: 12,
524
+ },
525
+ ],
526
+ },
527
+ ],
528
+ },
529
+ ];
530
+ const { results } = extractCitations(output);
531
+ expect(results).toHaveLength(1);
532
+ expect(results[0].url).toBe('https://found.com');
533
+ });
534
+ });
535
+ describe('destroyWebSearchSession', () => {
536
+ it('cleans up the agent on destroy (conversations are per-query)', async () => {
537
+ const deps = createFakeDeps();
538
+ const tool = createWebSearchTool({
539
+ projectEndpoint: 'https://foundry.example.com',
540
+ modelDeploymentName: 'gpt-4.1-mini',
541
+ }, deps);
542
+ // Initialize the session
543
+ await tool('trigger init');
544
+ // Destroy
545
+ await destroyWebSearchSession();
546
+ expect(deps.deleteConversation).toHaveBeenCalledWith(expect.anything(), 'conv-123');
547
+ expect(deps.deleteAgentVersion).toHaveBeenCalledWith(expect.anything(), 'sofia-web-search', 'v1');
548
+ });
549
+ it('is safe to call when not initialized', async () => {
550
+ // Should not throw
551
+ await destroyWebSearchSession();
552
+ });
553
+ it('logs warning but does not throw when cleanup fails', async () => {
554
+ const deps = createFakeDeps({
555
+ deleteConversation: vi.fn().mockRejectedValue(new Error('cleanup failed')),
556
+ deleteAgentVersion: vi.fn().mockRejectedValue(new Error('cleanup failed')),
557
+ });
558
+ const tool = createWebSearchTool({
559
+ projectEndpoint: 'https://foundry.example.com',
560
+ modelDeploymentName: 'gpt-4.1-mini',
561
+ }, deps);
562
+ await tool('trigger init');
563
+ // Should not throw
564
+ await expect(destroyWebSearchSession()).resolves.toBeUndefined();
565
+ });
566
+ });
567
+ });