sofia-cli 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/.github/agents/copilot-instructions.md +39 -0
  2. package/.github/agents/speckit.analyze.agent.md +184 -0
  3. package/.github/agents/speckit.checklist.agent.md +294 -0
  4. package/.github/agents/speckit.clarify.agent.md +181 -0
  5. package/.github/agents/speckit.constitution.agent.md +84 -0
  6. package/.github/agents/speckit.implement.agent.md +135 -0
  7. package/.github/agents/speckit.plan.agent.md +90 -0
  8. package/.github/agents/speckit.specify.agent.md +258 -0
  9. package/.github/agents/speckit.tasks.agent.md +137 -0
  10. package/.github/agents/speckit.taskstoissues.agent.md +30 -0
  11. package/.github/copilot-instructions.md +257 -0
  12. package/.github/prompts/speckit.analyze.prompt.md +3 -0
  13. package/.github/prompts/speckit.checklist.prompt.md +3 -0
  14. package/.github/prompts/speckit.clarify.prompt.md +3 -0
  15. package/.github/prompts/speckit.constitution.prompt.md +3 -0
  16. package/.github/prompts/speckit.implement.prompt.md +3 -0
  17. package/.github/prompts/speckit.plan.prompt.md +3 -0
  18. package/.github/prompts/speckit.specify.prompt.md +3 -0
  19. package/.github/prompts/speckit.tasks.prompt.md +3 -0
  20. package/.github/prompts/speckit.taskstoissues.prompt.md +3 -0
  21. package/.github/workflows/ci.yml +38 -0
  22. package/.prettierrc +6 -0
  23. package/.specify/memory/constitution.md +181 -0
  24. package/.specify/scripts/bash/check-prerequisites.sh +166 -0
  25. package/.specify/scripts/bash/common.sh +156 -0
  26. package/.specify/scripts/bash/create-new-feature.sh +297 -0
  27. package/.specify/scripts/bash/setup-plan.sh +61 -0
  28. package/.specify/scripts/bash/update-agent-context.sh +810 -0
  29. package/.specify/templates/agent-file-template.md +28 -0
  30. package/.specify/templates/checklist-template.md +40 -0
  31. package/.specify/templates/constitution-template.md +50 -0
  32. package/.specify/templates/plan-template.md +113 -0
  33. package/.specify/templates/spec-template.md +115 -0
  34. package/.specify/templates/tasks-template.md +251 -0
  35. package/.vscode/mcp.json +42 -0
  36. package/.vscode/settings.json +19 -0
  37. package/CODE_OF_CONDUCT.md +128 -0
  38. package/LICENSE +21 -0
  39. package/README.md +213 -0
  40. package/dist/src/cli/developCommand.js +240 -0
  41. package/dist/src/cli/directCommands.js +143 -0
  42. package/dist/src/cli/envLoader.js +16 -0
  43. package/dist/src/cli/exportCommand.js +53 -0
  44. package/dist/src/cli/index.js +203 -0
  45. package/dist/src/cli/ioContext.js +109 -0
  46. package/dist/src/cli/preflight.js +57 -0
  47. package/dist/src/cli/statusCommand.js +110 -0
  48. package/dist/src/cli/workshopCommand.js +400 -0
  49. package/dist/src/develop/checkpointState.js +86 -0
  50. package/dist/src/develop/codeGenerator.js +319 -0
  51. package/dist/src/develop/dynamicScaffolder.js +226 -0
  52. package/dist/src/develop/githubMcpAdapter.js +122 -0
  53. package/dist/src/develop/index.js +15 -0
  54. package/dist/src/develop/mcpContextEnricher.js +195 -0
  55. package/dist/src/develop/pocScaffolder.js +542 -0
  56. package/dist/src/develop/ralphLoop.js +659 -0
  57. package/dist/src/develop/templateRegistry.js +364 -0
  58. package/dist/src/develop/testRunner.js +202 -0
  59. package/dist/src/logging/logger.js +58 -0
  60. package/dist/src/loop/conversationLoop.js +227 -0
  61. package/dist/src/loop/phaseSummarizer.js +87 -0
  62. package/dist/src/mcp/mcpManager.js +267 -0
  63. package/dist/src/mcp/mcpTransport.js +391 -0
  64. package/dist/src/mcp/retryPolicy.js +47 -0
  65. package/dist/src/mcp/webSearch.js +254 -0
  66. package/dist/src/phases/contextSummarizer.js +101 -0
  67. package/dist/src/phases/discoveryEnricher.js +156 -0
  68. package/dist/src/phases/phaseExtractors.js +222 -0
  69. package/dist/src/phases/phaseHandlers.js +328 -0
  70. package/dist/src/prompts/design.md +51 -0
  71. package/dist/src/prompts/develop-boundary.md +51 -0
  72. package/dist/src/prompts/develop.md +111 -0
  73. package/dist/src/prompts/discover.md +58 -0
  74. package/dist/src/prompts/ideate.md +56 -0
  75. package/dist/src/prompts/plan.md +51 -0
  76. package/dist/src/prompts/promptLoader.js +167 -0
  77. package/dist/src/prompts/promptLoader.ts +198 -0
  78. package/dist/src/prompts/select.md +47 -0
  79. package/dist/src/prompts/summarize/README.md +8 -0
  80. package/dist/src/prompts/summarize/design-summary.md +37 -0
  81. package/dist/src/prompts/summarize/develop-summary.md +25 -0
  82. package/dist/src/prompts/summarize/ideate-summary.md +27 -0
  83. package/dist/src/prompts/summarize/plan-summary.md +27 -0
  84. package/dist/src/prompts/summarize/select-summary.md +21 -0
  85. package/dist/src/prompts/system.md +28 -0
  86. package/dist/src/sessions/exportPaths.js +22 -0
  87. package/dist/src/sessions/exportWriter.js +406 -0
  88. package/dist/src/sessions/sessionManager.js +81 -0
  89. package/dist/src/sessions/sessionStore.js +65 -0
  90. package/dist/src/shared/activitySpinner.js +91 -0
  91. package/dist/src/shared/copilotClient.js +129 -0
  92. package/dist/src/shared/data/cards.json +1249 -0
  93. package/dist/src/shared/data/cardsLoader.js +51 -0
  94. package/dist/src/shared/errorClassifier.js +120 -0
  95. package/dist/src/shared/events.js +28 -0
  96. package/dist/src/shared/markdownRenderer.js +34 -0
  97. package/dist/src/shared/schemas/session.js +265 -0
  98. package/dist/src/shared/tableRenderer.js +20 -0
  99. package/dist/src/vendor/chalk.js +2 -0
  100. package/dist/src/vendor/cli-table3.js +3 -0
  101. package/dist/src/vendor/commander.js +2 -0
  102. package/dist/src/vendor/marked-terminal.js +3 -0
  103. package/dist/src/vendor/marked.js +2 -0
  104. package/dist/src/vendor/ora.js +2 -0
  105. package/dist/src/vendor/pino.js +2 -0
  106. package/dist/src/vendor/zod.js +2 -0
  107. package/dist/tests/e2e/developE2e.spec.js +126 -0
  108. package/dist/tests/e2e/developFailureE2e.spec.js +247 -0
  109. package/dist/tests/e2e/developPty.spec.js +75 -0
  110. package/dist/tests/e2e/discoveryWebSearchRelevance.spec.js +84 -0
  111. package/dist/tests/e2e/harness.spec.js +83 -0
  112. package/dist/tests/e2e/mcpLive.spec.js +120 -0
  113. package/dist/tests/e2e/newSession.e2e.spec.js +177 -0
  114. package/dist/tests/e2e/ralphLoopEnrichmentComparison.spec.js +62 -0
  115. package/dist/tests/e2e/workiqEnrichment.spec.js +56 -0
  116. package/dist/tests/e2e/zavaSimulation.spec.js +452 -0
  117. package/dist/tests/fixtures/test-fixture-project/src/add.js +3 -0
  118. package/dist/tests/fixtures/test-fixture-project/tests/failing.test.js +6 -0
  119. package/dist/tests/fixtures/test-fixture-project/tests/hanging.test.js +8 -0
  120. package/dist/tests/fixtures/test-fixture-project/tests/passing.test.js +10 -0
  121. package/dist/tests/fixtures/test-fixture-project/vitest.config.js +6 -0
  122. package/dist/tests/integration/autoStartConversation.spec.js +138 -0
  123. package/dist/tests/integration/defaultCommand.spec.js +147 -0
  124. package/dist/tests/integration/directCommandNonTty.spec.js +224 -0
  125. package/dist/tests/integration/directCommandTty.spec.js +151 -0
  126. package/dist/tests/integration/discoveryEnrichmentFlow.spec.js +175 -0
  127. package/dist/tests/integration/exportArtifacts.spec.js +202 -0
  128. package/dist/tests/integration/exportFallbackFlow.spec.js +99 -0
  129. package/dist/tests/integration/mcpDegradationFlow.spec.js +190 -0
  130. package/dist/tests/integration/mcpTransportFlow.spec.js +139 -0
  131. package/dist/tests/integration/newSessionFlow.spec.js +343 -0
  132. package/dist/tests/integration/pocGithubMcp.spec.js +186 -0
  133. package/dist/tests/integration/pocLocalFallback.spec.js +171 -0
  134. package/dist/tests/integration/pocScaffold.spec.js +163 -0
  135. package/dist/tests/integration/ralphLoopFlow.spec.js +359 -0
  136. package/dist/tests/integration/ralphLoopPartial.spec.js +368 -0
  137. package/dist/tests/integration/resumeAndBacktrack.spec.js +247 -0
  138. package/dist/tests/integration/spinnerLifecycle.spec.js +220 -0
  139. package/dist/tests/integration/summarizationFlow.spec.js +115 -0
  140. package/dist/tests/integration/testRunnerReal.spec.js +52 -0
  141. package/dist/tests/integration/webSearchAgent.spec.js +128 -0
  142. package/dist/tests/live/copilotSdkLive.spec.js +107 -0
  143. package/dist/tests/live/zavaFullWorkshop.spec.js +392 -0
  144. package/dist/tests/setup/loadEnv.js +3 -0
  145. package/dist/tests/unit/cli/developCommand.spec.js +567 -0
  146. package/dist/tests/unit/cli/directCommands.spec.js +279 -0
  147. package/dist/tests/unit/cli/envLoader.spec.js +58 -0
  148. package/dist/tests/unit/cli/ioContext.spec.js +119 -0
  149. package/dist/tests/unit/cli/preflight.spec.js +108 -0
  150. package/dist/tests/unit/cli/statusCommand.spec.js +111 -0
  151. package/dist/tests/unit/cli/workshopClientFallback.spec.js +80 -0
  152. package/dist/tests/unit/cli/workshopCommand.spec.js +329 -0
  153. package/dist/tests/unit/config/vitestEnvSetup.spec.js +13 -0
  154. package/dist/tests/unit/develop/checkpointState.spec.js +315 -0
  155. package/dist/tests/unit/develop/codeGenerator.spec.js +355 -0
  156. package/dist/tests/unit/develop/githubMcpAdapter.spec.js +231 -0
  157. package/dist/tests/unit/develop/mcpContextEnricher.spec.js +433 -0
  158. package/dist/tests/unit/develop/outputValidator.spec.js +119 -0
  159. package/dist/tests/unit/develop/pocScaffolder.spec.js +353 -0
  160. package/dist/tests/unit/develop/ralphLoop.spec.js +1248 -0
  161. package/dist/tests/unit/develop/templateRegistry.spec.js +85 -0
  162. package/dist/tests/unit/develop/testRunner.spec.js +249 -0
  163. package/dist/tests/unit/infraBicep.spec.js +92 -0
  164. package/dist/tests/unit/infraDeploy.spec.js +82 -0
  165. package/dist/tests/unit/infraTeardown.spec.js +63 -0
  166. package/dist/tests/unit/logging/logger.spec.js +43 -0
  167. package/dist/tests/unit/loop/conversationLoop.spec.js +592 -0
  168. package/dist/tests/unit/loop/phaseSummarizer.spec.js +141 -0
  169. package/dist/tests/unit/loop/streamingMarkdown.spec.js +147 -0
  170. package/dist/tests/unit/mcp/mcpManager.spec.js +279 -0
  171. package/dist/tests/unit/mcp/mcpTransport.spec.js +529 -0
  172. package/dist/tests/unit/mcp/retryPolicy.spec.js +218 -0
  173. package/dist/tests/unit/mcp/timeoutValidation.spec.js +46 -0
  174. package/dist/tests/unit/mcp/webSearch.spec.js +567 -0
  175. package/dist/tests/unit/phases/contextSummarizer.spec.js +140 -0
  176. package/dist/tests/unit/phases/discoveryEnricher.repeatCalls.spec.js +93 -0
  177. package/dist/tests/unit/phases/discoveryEnricher.spec.js +411 -0
  178. package/dist/tests/unit/phases/phaseExtractors.spec.js +352 -0
  179. package/dist/tests/unit/phases/phaseHandlers.spec.js +425 -0
  180. package/dist/tests/unit/prompts/promptLoader.spec.js +118 -0
  181. package/dist/tests/unit/schemas/pocSchemas.spec.js +412 -0
  182. package/dist/tests/unit/schemas/session.spec.js +257 -0
  183. package/dist/tests/unit/sessions/exportPaths.spec.js +31 -0
  184. package/dist/tests/unit/sessions/exportWriter.spec.js +655 -0
  185. package/dist/tests/unit/sessions/sessionManager.spec.js +151 -0
  186. package/dist/tests/unit/sessions/sessionStore.spec.js +116 -0
  187. package/dist/tests/unit/shared/activitySpinner.spec.js +175 -0
  188. package/dist/tests/unit/shared/cardsLoader.spec.js +76 -0
  189. package/dist/tests/unit/shared/copilotClient.spec.js +155 -0
  190. package/dist/tests/unit/shared/errorClassifier.spec.js +131 -0
  191. package/dist/tests/unit/shared/events.spec.js +55 -0
  192. package/dist/tests/unit/shared/markdownRenderer.spec.js +35 -0
  193. package/dist/tests/unit/shared/markdownRendererChunks.spec.js +70 -0
  194. package/dist/tests/unit/shared/tableRenderer.spec.js +34 -0
  195. package/dist/vitest.config.js +14 -0
  196. package/dist/vitest.live.config.js +18 -0
  197. package/docs/README.md +35 -0
  198. package/docs/architecture.md +169 -0
  199. package/docs/cli-usage.md +207 -0
  200. package/docs/environment.md +66 -0
  201. package/docs/export-format.md +146 -0
  202. package/docs/session-model.md +113 -0
  203. package/eslint.config.js +35 -0
  204. package/infra/deploy.sh +193 -0
  205. package/infra/gather-env.sh +211 -0
  206. package/infra/main.bicep +90 -0
  207. package/infra/main.bicepparam +18 -0
  208. package/infra/resources.bicep +134 -0
  209. package/infra/teardown.sh +114 -0
  210. package/package.json +63 -0
  211. package/specs/001-cli-workshop-rebuild/checklists/requirements.md +35 -0
  212. package/specs/001-cli-workshop-rebuild/contracts/cli.md +59 -0
  213. package/specs/001-cli-workshop-rebuild/contracts/export-summary-json.md +23 -0
  214. package/specs/001-cli-workshop-rebuild/contracts/session-json.md +30 -0
  215. package/specs/001-cli-workshop-rebuild/data-model.md +210 -0
  216. package/specs/001-cli-workshop-rebuild/plan.md +361 -0
  217. package/specs/001-cli-workshop-rebuild/quickstart.md +83 -0
  218. package/specs/001-cli-workshop-rebuild/research.md +116 -0
  219. package/specs/001-cli-workshop-rebuild/spec.md +240 -0
  220. package/specs/001-cli-workshop-rebuild/tasks.md +476 -0
  221. package/specs/002-poc-generation/contracts/poc-output.md +172 -0
  222. package/specs/002-poc-generation/contracts/ralph-loop.md +113 -0
  223. package/specs/002-poc-generation/data-model.md +172 -0
  224. package/specs/002-poc-generation/plan.md +109 -0
  225. package/specs/002-poc-generation/quickstart.md +97 -0
  226. package/specs/002-poc-generation/research.md +786 -0
  227. package/specs/002-poc-generation/spec.md +81 -0
  228. package/specs/002-poc-generation/tasks-fix.md +198 -0
  229. package/specs/002-poc-generation/tasks.md +252 -0
  230. package/specs/003-mcp-transport-integration/checklists/requirements.md +37 -0
  231. package/specs/003-mcp-transport-integration/contracts/context-enricher.md +220 -0
  232. package/specs/003-mcp-transport-integration/contracts/discovery-enricher.md +267 -0
  233. package/specs/003-mcp-transport-integration/contracts/github-adapter.md +149 -0
  234. package/specs/003-mcp-transport-integration/contracts/mcp-transport.md +288 -0
  235. package/specs/003-mcp-transport-integration/data-model.md +326 -0
  236. package/specs/003-mcp-transport-integration/plan.md +114 -0
  237. package/specs/003-mcp-transport-integration/quickstart.md +311 -0
  238. package/specs/003-mcp-transport-integration/research.md +395 -0
  239. package/specs/003-mcp-transport-integration/spec.md +234 -0
  240. package/specs/003-mcp-transport-integration/tasks.md +324 -0
  241. package/specs/003-next-spec-gaps.md +150 -0
  242. package/specs/004-dev-resume-hardening/checklists/requirements.md +37 -0
  243. package/specs/004-dev-resume-hardening/contracts/cli.md +160 -0
  244. package/specs/004-dev-resume-hardening/data-model.md +321 -0
  245. package/specs/004-dev-resume-hardening/plan.md +107 -0
  246. package/specs/004-dev-resume-hardening/quickstart.md +115 -0
  247. package/specs/004-dev-resume-hardening/research.md +142 -0
  248. package/specs/004-dev-resume-hardening/spec.md +221 -0
  249. package/specs/004-dev-resume-hardening/tasks.md +333 -0
  250. package/specs/005-ai-search-deploy/checklists/requirements.md +39 -0
  251. package/specs/005-ai-search-deploy/contracts/web-search-tool.md +241 -0
  252. package/specs/005-ai-search-deploy/data-model.md +130 -0
  253. package/specs/005-ai-search-deploy/plan.md +93 -0
  254. package/specs/005-ai-search-deploy/quickstart.md +96 -0
  255. package/specs/005-ai-search-deploy/research.md +187 -0
  256. package/specs/005-ai-search-deploy/spec.md +143 -0
  257. package/specs/005-ai-search-deploy/tasks.md +284 -0
  258. package/specs/006-workshop-extraction-fixes/checklists/requirements.md +61 -0
  259. package/specs/006-workshop-extraction-fixes/contracts/summarization-and-export.md +131 -0
  260. package/specs/006-workshop-extraction-fixes/data-model.md +149 -0
  261. package/specs/006-workshop-extraction-fixes/plan.md +123 -0
  262. package/specs/006-workshop-extraction-fixes/quickstart.md +101 -0
  263. package/specs/006-workshop-extraction-fixes/research.md +143 -0
  264. package/specs/006-workshop-extraction-fixes/spec.md +210 -0
  265. package/specs/006-workshop-extraction-fixes/tasks.md +316 -0
  266. package/src/cli/developCommand.ts +308 -0
  267. package/src/cli/directCommands.ts +195 -0
  268. package/src/cli/envLoader.ts +17 -0
  269. package/src/cli/exportCommand.ts +65 -0
  270. package/src/cli/index.ts +249 -0
  271. package/src/cli/ioContext.ts +139 -0
  272. package/src/cli/preflight.ts +86 -0
  273. package/src/cli/statusCommand.ts +118 -0
  274. package/src/cli/workshopCommand.ts +496 -0
  275. package/src/develop/checkpointState.ts +121 -0
  276. package/src/develop/codeGenerator.ts +402 -0
  277. package/src/develop/dynamicScaffolder.ts +284 -0
  278. package/src/develop/githubMcpAdapter.ts +199 -0
  279. package/src/develop/index.ts +34 -0
  280. package/src/develop/mcpContextEnricher.ts +279 -0
  281. package/src/develop/pocScaffolder.ts +646 -0
  282. package/src/develop/ralphLoop.ts +1044 -0
  283. package/src/develop/templateRegistry.ts +427 -0
  284. package/src/develop/testRunner.ts +276 -0
  285. package/src/logging/logger.ts +73 -0
  286. package/src/loop/conversationLoop.ts +355 -0
  287. package/src/loop/phaseSummarizer.ts +114 -0
  288. package/src/mcp/mcpManager.ts +365 -0
  289. package/src/mcp/mcpTransport.ts +562 -0
  290. package/src/mcp/retryPolicy.ts +87 -0
  291. package/src/mcp/webSearch.ts +388 -0
  292. package/src/originalPrompts/design_thinking.md +178 -0
  293. package/src/originalPrompts/design_thinking_persona.md +76 -0
  294. package/src/originalPrompts/document_generator_example.md +77 -0
  295. package/src/originalPrompts/document_generator_persona.md +47 -0
  296. package/src/originalPrompts/facilitator_persona.md +125 -0
  297. package/src/originalPrompts/guardrails.md +47 -0
  298. package/src/phases/contextSummarizer.ts +154 -0
  299. package/src/phases/discoveryEnricher.ts +223 -0
  300. package/src/phases/phaseExtractors.ts +247 -0
  301. package/src/phases/phaseHandlers.ts +450 -0
  302. package/src/prompts/design.md +51 -0
  303. package/src/prompts/develop-boundary.md +51 -0
  304. package/src/prompts/develop.md +111 -0
  305. package/src/prompts/discover.md +58 -0
  306. package/src/prompts/ideate.md +56 -0
  307. package/src/prompts/plan.md +51 -0
  308. package/src/prompts/promptLoader.ts +198 -0
  309. package/src/prompts/select.md +47 -0
  310. package/src/prompts/summarize/README.md +8 -0
  311. package/src/prompts/summarize/design-summary.md +37 -0
  312. package/src/prompts/summarize/develop-summary.md +25 -0
  313. package/src/prompts/summarize/ideate-summary.md +27 -0
  314. package/src/prompts/summarize/plan-summary.md +27 -0
  315. package/src/prompts/summarize/select-summary.md +21 -0
  316. package/src/prompts/system.md +28 -0
  317. package/src/sessions/exportPaths.ts +28 -0
  318. package/src/sessions/exportWriter.ts +490 -0
  319. package/src/sessions/sessionManager.ts +119 -0
  320. package/src/sessions/sessionStore.ts +69 -0
  321. package/src/shared/activitySpinner.ts +108 -0
  322. package/src/shared/copilotClient.ts +291 -0
  323. package/src/shared/data/cards.json +1249 -0
  324. package/src/shared/data/cardsLoader.ts +70 -0
  325. package/src/shared/errorClassifier.ts +160 -0
  326. package/src/shared/events.ts +103 -0
  327. package/src/shared/markdownRenderer.ts +44 -0
  328. package/src/shared/schemas/session.ts +346 -0
  329. package/src/shared/tableRenderer.ts +28 -0
  330. package/src/types/marked-terminal.d.ts +5 -0
  331. package/src/vendor/chalk.ts +2 -0
  332. package/src/vendor/cli-table3.ts +3 -0
  333. package/src/vendor/commander.ts +2 -0
  334. package/src/vendor/marked-terminal.ts +3 -0
  335. package/src/vendor/marked.ts +2 -0
  336. package/src/vendor/ora.ts +2 -0
  337. package/src/vendor/pino.ts +3 -0
  338. package/src/vendor/zod.ts +3 -0
  339. package/tests/e2e/developE2e.spec.ts +152 -0
  340. package/tests/e2e/developFailureE2e.spec.ts +289 -0
  341. package/tests/e2e/developPty.spec.ts +86 -0
  342. package/tests/e2e/discoveryWebSearchRelevance.spec.ts +103 -0
  343. package/tests/e2e/harness.spec.ts +104 -0
  344. package/tests/e2e/mcpLive.spec.ts +149 -0
  345. package/tests/e2e/newSession.e2e.spec.ts +245 -0
  346. package/tests/e2e/ralphLoopEnrichmentComparison.spec.ts +70 -0
  347. package/tests/e2e/workiqEnrichment.spec.ts +72 -0
  348. package/tests/e2e/zava-assessment/agent-interaction-script.md +258 -0
  349. package/tests/e2e/zava-assessment/company-profile.md +98 -0
  350. package/tests/e2e/zava-assessment/expected-results-checklist.md +454 -0
  351. package/tests/e2e/zavaSimulation.spec.ts +511 -0
  352. package/tests/fixtures/completedSession.json +141 -0
  353. package/tests/fixtures/test-fixture-project/package-lock.json +1585 -0
  354. package/tests/fixtures/test-fixture-project/package.json +12 -0
  355. package/tests/fixtures/test-fixture-project/src/add.ts +3 -0
  356. package/tests/fixtures/test-fixture-project/tests/failing.test.ts +7 -0
  357. package/tests/fixtures/test-fixture-project/tests/hanging.test.ts +9 -0
  358. package/tests/fixtures/test-fixture-project/tests/passing.test.ts +13 -0
  359. package/tests/fixtures/test-fixture-project/vitest.config.ts +7 -0
  360. package/tests/integration/autoStartConversation.spec.ts +168 -0
  361. package/tests/integration/defaultCommand.spec.ts +179 -0
  362. package/tests/integration/directCommandNonTty.spec.ts +260 -0
  363. package/tests/integration/directCommandTty.spec.ts +185 -0
  364. package/tests/integration/discoveryEnrichmentFlow.spec.ts +209 -0
  365. package/tests/integration/exportArtifacts.spec.ts +232 -0
  366. package/tests/integration/exportFallbackFlow.spec.ts +115 -0
  367. package/tests/integration/mcpDegradationFlow.spec.ts +231 -0
  368. package/tests/integration/mcpTransportFlow.spec.ts +178 -0
  369. package/tests/integration/newSessionFlow.spec.ts +406 -0
  370. package/tests/integration/pocGithubMcp.spec.ts +224 -0
  371. package/tests/integration/pocLocalFallback.spec.ts +205 -0
  372. package/tests/integration/pocScaffold.spec.ts +220 -0
  373. package/tests/integration/ralphLoopFlow.spec.ts +430 -0
  374. package/tests/integration/ralphLoopPartial.spec.ts +416 -0
  375. package/tests/integration/resumeAndBacktrack.spec.ts +278 -0
  376. package/tests/integration/spinnerLifecycle.spec.ts +270 -0
  377. package/tests/integration/summarizationFlow.spec.ts +135 -0
  378. package/tests/integration/testRunnerReal.spec.ts +63 -0
  379. package/tests/integration/webSearchAgent.spec.ts +155 -0
  380. package/tests/live/copilotSdkLive.spec.ts +149 -0
  381. package/tests/live/zavaFullWorkshop.spec.ts +515 -0
  382. package/tests/setup/loadEnv.ts +5 -0
  383. package/tests/unit/cli/developCommand.spec.ts +679 -0
  384. package/tests/unit/cli/directCommands.spec.ts +325 -0
  385. package/tests/unit/cli/envLoader.spec.ts +73 -0
  386. package/tests/unit/cli/ioContext.spec.ts +148 -0
  387. package/tests/unit/cli/preflight.spec.ts +125 -0
  388. package/tests/unit/cli/statusCommand.spec.ts +134 -0
  389. package/tests/unit/cli/workshopClientFallback.spec.ts +100 -0
  390. package/tests/unit/cli/workshopCommand.spec.ts +378 -0
  391. package/tests/unit/config/vitestEnvSetup.spec.ts +24 -0
  392. package/tests/unit/develop/checkpointState.spec.ts +378 -0
  393. package/tests/unit/develop/codeGenerator.spec.ts +447 -0
  394. package/tests/unit/develop/githubMcpAdapter.spec.ts +283 -0
  395. package/tests/unit/develop/mcpContextEnricher.spec.ts +564 -0
  396. package/tests/unit/develop/outputValidator.spec.ts +134 -0
  397. package/tests/unit/develop/pocScaffolder.spec.ts +451 -0
  398. package/tests/unit/develop/ralphLoop.spec.ts +1439 -0
  399. package/tests/unit/develop/templateRegistry.spec.ts +106 -0
  400. package/tests/unit/develop/testRunner.spec.ts +294 -0
  401. package/tests/unit/infraBicep.spec.ts +116 -0
  402. package/tests/unit/infraDeploy.spec.ts +102 -0
  403. package/tests/unit/infraTeardown.spec.ts +77 -0
  404. package/tests/unit/logging/logger.spec.ts +50 -0
  405. package/tests/unit/loop/conversationLoop.spec.ts +719 -0
  406. package/tests/unit/loop/phaseSummarizer.spec.ts +169 -0
  407. package/tests/unit/loop/streamingMarkdown.spec.ts +180 -0
  408. package/tests/unit/mcp/mcpManager.spec.ts +336 -0
  409. package/tests/unit/mcp/mcpTransport.spec.ts +689 -0
  410. package/tests/unit/mcp/retryPolicy.spec.ts +278 -0
  411. package/tests/unit/mcp/timeoutValidation.spec.ts +55 -0
  412. package/tests/unit/mcp/webSearch.spec.ts +718 -0
  413. package/tests/unit/phases/contextSummarizer.spec.ts +158 -0
  414. package/tests/unit/phases/discoveryEnricher.repeatCalls.spec.ts +125 -0
  415. package/tests/unit/phases/discoveryEnricher.spec.ts +512 -0
  416. package/tests/unit/phases/phaseExtractors.spec.ts +406 -0
  417. package/tests/unit/phases/phaseHandlers.spec.ts +483 -0
  418. package/tests/unit/prompts/promptLoader.spec.ts +144 -0
  419. package/tests/unit/schemas/pocSchemas.spec.ts +457 -0
  420. package/tests/unit/schemas/session.spec.ts +328 -0
  421. package/tests/unit/sessions/exportPaths.spec.ts +38 -0
  422. package/tests/unit/sessions/exportWriter.spec.ts +737 -0
  423. package/tests/unit/sessions/sessionManager.spec.ts +174 -0
  424. package/tests/unit/sessions/sessionStore.spec.ts +136 -0
  425. package/tests/unit/shared/activitySpinner.spec.ts +211 -0
  426. package/tests/unit/shared/cardsLoader.spec.ts +89 -0
  427. package/tests/unit/shared/copilotClient.spec.ts +185 -0
  428. package/tests/unit/shared/errorClassifier.spec.ts +152 -0
  429. package/tests/unit/shared/events.spec.ts +71 -0
  430. package/tests/unit/shared/markdownRenderer.spec.ts +42 -0
  431. package/tests/unit/shared/markdownRendererChunks.spec.ts +83 -0
  432. package/tests/unit/shared/tableRenderer.spec.ts +38 -0
  433. package/tsconfig.json +20 -0
  434. package/vitest.config.ts +15 -0
  435. package/vitest.live.config.ts +19 -0
@@ -0,0 +1,220 @@
1
+ /**
2
+ * Integration tests for spinner lifecycle in ConversationLoop (T089).
3
+ *
4
+ * Verifies the full spinner lifecycle during streaming: "Thinking..." appears
5
+ * after user input, transitions on ToolCall events, prints tool summary on
6
+ * ToolResult, stops on first TextDelta, and handles multi-tool sequences.
7
+ */
8
+ import { describe, it, expect, vi, beforeEach } from 'vitest';
9
+ import { Writable } from 'node:stream';
10
+ import { ConversationLoop, } from '../../src/loop/conversationLoop.js';
11
+ import { ActivitySpinner } from '../../src/shared/activitySpinner.js';
12
+ import { createTextDeltaEvent, createToolCallEvent, createToolResultEvent, } from '../../src/shared/events.js';
13
+ // ── Helpers ─────────────────────────────────────────────────────────────────
14
+ function makeSession(overrides) {
15
+ return {
16
+ sessionId: 'spinner-int-test',
17
+ schemaVersion: '1.0.0',
18
+ createdAt: '2025-01-01T00:00:00Z',
19
+ updatedAt: '2025-01-01T00:00:00Z',
20
+ phase: 'Discover',
21
+ status: 'Active',
22
+ participants: [],
23
+ artifacts: { generatedFiles: [] },
24
+ ...overrides,
25
+ };
26
+ }
27
+ function makeIO(inputs, opts) {
28
+ let inputIndex = 0;
29
+ const written = [];
30
+ const activities = [];
31
+ const toolSummaries = [];
32
+ return {
33
+ write(text) { written.push(text); },
34
+ writeActivity(text) { activities.push(text); },
35
+ writeToolSummary(toolName, summary) { toolSummaries.push({ toolName, summary }); },
36
+ async readInput() {
37
+ if (inputIndex >= inputs.length)
38
+ return null;
39
+ return inputs[inputIndex++] ?? null;
40
+ },
41
+ async showDecisionGate() { return { choice: 'continue' }; },
42
+ isJsonMode: opts?.json ?? false,
43
+ isTTY: opts?.tty ?? true,
44
+ get _written() { return written; },
45
+ get _activities() { return activities; },
46
+ get _toolSummaries() { return toolSummaries; },
47
+ };
48
+ }
49
+ function makePhaseHandler(overrides) {
50
+ return {
51
+ phase: 'Discover',
52
+ buildSystemPrompt: () => 'System prompt',
53
+ extractResult: () => ({}),
54
+ ...overrides,
55
+ };
56
+ }
57
+ function createCaptureStream() {
58
+ const chunks = [];
59
+ const stream = new Writable({
60
+ write(chunk, _encoding, callback) {
61
+ chunks.push(chunk.toString());
62
+ callback();
63
+ },
64
+ });
65
+ stream.getOutput = () => chunks.join('');
66
+ return stream;
67
+ }
68
+ /**
69
+ * Create a fake CopilotClient that yields a custom sequence of SofiaEvents.
70
+ * This allows testing ToolCall → ToolResult → TextDelta sequences.
71
+ */
72
+ function createEventSequenceClient(eventSequences) {
73
+ let seqIndex = 0;
74
+ return {
75
+ async createSession(_opts) {
76
+ const history = [];
77
+ return {
78
+ send(message) {
79
+ history.push(message);
80
+ const events = eventSequences[seqIndex] ?? [createTextDeltaEvent('[No more events]')];
81
+ seqIndex++;
82
+ return {
83
+ async *[Symbol.asyncIterator]() {
84
+ for (const event of events) {
85
+ yield event;
86
+ }
87
+ },
88
+ };
89
+ },
90
+ getHistory: () => [...history],
91
+ };
92
+ },
93
+ };
94
+ }
95
+ // ── Tests ────────────────────────────────────────────────────────────────────
96
+ describe('Spinner lifecycle integration (T089)', () => {
97
+ beforeEach(() => {
98
+ process.removeAllListeners('SIGINT');
99
+ });
100
+ it('starts Thinking spinner before sending, stops on first TextDelta', async () => {
101
+ const stream = createCaptureStream();
102
+ const spinner = new ActivitySpinner({ isTTY: true, isJsonMode: false, stream });
103
+ const startSpy = vi.spyOn(spinner, 'startThinking');
104
+ const stopSpy = vi.spyOn(spinner, 'stop');
105
+ const client = createEventSequenceClient([
106
+ [createTextDeltaEvent('Hello from LLM')],
107
+ ]);
108
+ const io = makeIO(['test input'], { tty: true });
109
+ const loop = new ConversationLoop({
110
+ client,
111
+ io,
112
+ session: makeSession(),
113
+ phaseHandler: makePhaseHandler(),
114
+ spinner,
115
+ });
116
+ await loop.run();
117
+ expect(startSpy).toHaveBeenCalled();
118
+ expect(stopSpy).toHaveBeenCalled();
119
+ expect(spinner.isActive()).toBe(false);
120
+ });
121
+ it('transitions spinner to tool name on ToolCall, completes on ToolResult', async () => {
122
+ const stream = createCaptureStream();
123
+ const spinner = new ActivitySpinner({ isTTY: true, isJsonMode: false, stream });
124
+ const toolCallSpy = vi.spyOn(spinner, 'startToolCall');
125
+ const completeSpy = vi.spyOn(spinner, 'completeToolCall');
126
+ const client = createEventSequenceClient([
127
+ [
128
+ createToolCallEvent('WorkIQ', { query: 'logistics' }),
129
+ createToolResultEvent('WorkIQ', 'Found 5 processes'),
130
+ createTextDeltaEvent('Based on the analysis...'),
131
+ ],
132
+ ]);
133
+ const io = makeIO(['analyze my processes'], { tty: true });
134
+ const loop = new ConversationLoop({
135
+ client,
136
+ io,
137
+ session: makeSession(),
138
+ phaseHandler: makePhaseHandler(),
139
+ spinner,
140
+ });
141
+ await loop.run();
142
+ expect(toolCallSpy).toHaveBeenCalledWith('WorkIQ');
143
+ expect(completeSpy).toHaveBeenCalled();
144
+ expect(spinner.isActive()).toBe(false);
145
+ });
146
+ it('handles multi-tool sequences (ToolCall → ToolResult → ToolCall → ToolResult → TextDelta)', async () => {
147
+ const stream = createCaptureStream();
148
+ const spinner = new ActivitySpinner({ isTTY: true, isJsonMode: false, stream });
149
+ const toolCallSpy = vi.spyOn(spinner, 'startToolCall');
150
+ const completeSpy = vi.spyOn(spinner, 'completeToolCall');
151
+ const client = createEventSequenceClient([
152
+ [
153
+ createToolCallEvent('WorkIQ', { query: 'tasks' }),
154
+ createToolResultEvent('WorkIQ', 'Found 3 tasks'),
155
+ createToolCallEvent('Context7', { doc: 'azure-ai' }),
156
+ createToolResultEvent('Context7', '12 docs retrieved'),
157
+ createTextDeltaEvent('Here are my findings...'),
158
+ ],
159
+ ]);
160
+ const io = makeIO(['research tasks'], { tty: true });
161
+ const loop = new ConversationLoop({
162
+ client,
163
+ io,
164
+ session: makeSession(),
165
+ phaseHandler: makePhaseHandler(),
166
+ spinner,
167
+ });
168
+ await loop.run();
169
+ expect(toolCallSpy).toHaveBeenCalledTimes(2);
170
+ expect(completeSpy).toHaveBeenCalledTimes(2);
171
+ // Tool summaries should be written to IO
172
+ expect(io._toolSummaries.length).toBe(2);
173
+ expect(io._toolSummaries[0].toolName).toBe('WorkIQ');
174
+ expect(io._toolSummaries[1].toolName).toBe('Context7');
175
+ });
176
+ it('writes tool summaries to IO on ToolResult events', async () => {
177
+ const stream = createCaptureStream();
178
+ const spinner = new ActivitySpinner({ isTTY: true, isJsonMode: false, stream });
179
+ const client = createEventSequenceClient([
180
+ [
181
+ createToolCallEvent('GitHub', { repo: 'test' }),
182
+ createToolResultEvent('GitHub', 'Found 8 repos'),
183
+ createTextDeltaEvent('The repo results are...'),
184
+ ],
185
+ ]);
186
+ const io = makeIO(['search repos'], { tty: true });
187
+ const loop = new ConversationLoop({
188
+ client,
189
+ io,
190
+ session: makeSession(),
191
+ phaseHandler: makePhaseHandler(),
192
+ spinner,
193
+ });
194
+ await loop.run();
195
+ expect(io._toolSummaries).toEqual([
196
+ { toolName: 'GitHub', summary: expect.stringContaining('Found 8 repos') },
197
+ ]);
198
+ });
199
+ it('no-op spinner works without errors in non-TTY mode', async () => {
200
+ const client = createEventSequenceClient([
201
+ [
202
+ createToolCallEvent('WorkIQ', { query: 'test' }),
203
+ createToolResultEvent('WorkIQ', 'ok'),
204
+ createTextDeltaEvent('Results.'),
205
+ ],
206
+ ]);
207
+ const io = makeIO(['query'], { tty: false });
208
+ const loop = new ConversationLoop({
209
+ client,
210
+ io,
211
+ session: makeSession(),
212
+ phaseHandler: makePhaseHandler(),
213
+ // No spinner provided — uses no-op default
214
+ });
215
+ await loop.run();
216
+ // Should work without errors
217
+ const allOutput = io._written.join('');
218
+ expect(allOutput).toContain('Results.');
219
+ });
220
+ });
@@ -0,0 +1,115 @@
1
+ /**
2
+ * Integration test: Summarization flow.
3
+ *
4
+ * Tests the full pipeline: ConversationLoop → phaseSummarize → session updated.
5
+ * Verifies that when inline extraction fails, the post-phase summarization
6
+ * call extracts structured data from the transcript.
7
+ */
8
+ import { describe, it, expect, vi } from 'vitest';
9
+ import { ConversationLoop } from '../../src/loop/conversationLoop.js';
10
+ function makeIO() {
11
+ return {
12
+ write: vi.fn(),
13
+ writeActivity: vi.fn(),
14
+ writeToolSummary: vi.fn(),
15
+ readInput: vi.fn().mockResolvedValue(null), // EOF immediately
16
+ showDecisionGate: vi.fn().mockResolvedValue({ choice: 'continue' }),
17
+ isJsonMode: false,
18
+ isTTY: false,
19
+ };
20
+ }
21
+ function makeSession(overrides) {
22
+ return {
23
+ sessionId: 'integration-test',
24
+ schemaVersion: '1.0.0',
25
+ createdAt: '2025-01-01T00:00:00Z',
26
+ updatedAt: '2025-01-01T00:00:00Z',
27
+ phase: 'Ideate',
28
+ status: 'Active',
29
+ participants: [],
30
+ artifacts: { generatedFiles: [] },
31
+ turns: [],
32
+ ...overrides,
33
+ };
34
+ }
35
+ describe('summarization flow integration', () => {
36
+ it('populates session.ideas via summarization when inline extraction fails', async () => {
37
+ // Inline extraction returns nothing (simulates LLM not embedding JSON)
38
+ const handler = {
39
+ phase: 'Ideate',
40
+ buildSystemPrompt: () => 'You are an Ideate facilitator.',
41
+ extractResult: vi.fn().mockReturnValue({}),
42
+ getInitialMessage: () => 'Start ideation.',
43
+ };
44
+ const ideas = [
45
+ { id: 'idea-1', title: 'AI Chatbot', description: 'Automated support', workflowStepIds: ['s1'] },
46
+ ];
47
+ let callCount = 0;
48
+ const fakeClient = {
49
+ createSession: vi.fn().mockImplementation(async () => ({
50
+ send: vi.fn().mockImplementation(async function* () {
51
+ callCount++;
52
+ if (callCount === 1) {
53
+ // First call: regular conversation (no JSON)
54
+ yield { type: 'TextDelta', text: 'Here are some ideas for your business.' };
55
+ }
56
+ else {
57
+ // Second call: summarization (returns JSON)
58
+ yield { type: 'TextDelta', text: '```json\n' + JSON.stringify(ideas) + '\n```' };
59
+ }
60
+ }),
61
+ })),
62
+ };
63
+ // On the summarization call, extractResult should return the ideas
64
+ handler.extractResult.mockImplementation((_session, response) => {
65
+ if (response.includes('idea-1')) {
66
+ return { ideas };
67
+ }
68
+ return {};
69
+ });
70
+ const io = makeIO();
71
+ const loop = new ConversationLoop({
72
+ client: fakeClient,
73
+ io,
74
+ session: makeSession(),
75
+ phaseHandler: handler,
76
+ initialMessage: 'Start ideation.',
77
+ });
78
+ const result = await loop.run();
79
+ // The summarization call should have populated ideas
80
+ expect(result.ideas).toEqual(ideas);
81
+ });
82
+ it('skips summarization when inline extraction succeeds', async () => {
83
+ const ideas = [
84
+ { id: 'idea-1', title: 'Test', description: 'Desc', workflowStepIds: [] },
85
+ ];
86
+ const handler = {
87
+ phase: 'Ideate',
88
+ buildSystemPrompt: () => 'Ideate prompt.',
89
+ extractResult: vi.fn().mockReturnValue({ ideas }),
90
+ getInitialMessage: () => 'Start.',
91
+ };
92
+ let sessionCalls = 0;
93
+ const fakeClient = {
94
+ createSession: vi.fn().mockImplementation(async () => {
95
+ sessionCalls++;
96
+ return {
97
+ send: vi.fn().mockImplementation(async function* () {
98
+ yield { type: 'TextDelta', text: 'Ideas generated.' };
99
+ }),
100
+ };
101
+ }),
102
+ };
103
+ const io = makeIO();
104
+ const loop = new ConversationLoop({
105
+ client: fakeClient,
106
+ io,
107
+ session: makeSession(),
108
+ phaseHandler: handler,
109
+ initialMessage: 'Start.',
110
+ });
111
+ await loop.run();
112
+ // Only one session should be created (no summarization call needed)
113
+ expect(sessionCalls).toBe(1);
114
+ });
115
+ });
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Integration tests for TestRunner using real fixture project.
3
+ *
4
+ * T042: Passing tests verify correct pass/fail/skip counts
5
+ * T043: Failing tests verify failure details parsed correctly
6
+ * T044: Timeout handling with hanging test fixture
7
+ */
8
+ import { describe, it, expect } from 'vitest';
9
+ import { join } from 'node:path';
10
+ import { TestRunner } from '../../src/develop/testRunner.js';
11
+ const FIXTURE_DIR = join(import.meta.dirname, '../fixtures/test-fixture-project');
12
+ describe('testRunner real fixture integration', () => {
13
+ it('parses passing test results correctly (T042)', async () => {
14
+ // Run only the passing test file
15
+ const runner = new TestRunner({
16
+ testCommand: 'npx vitest run tests/passing.test.ts --reporter=json',
17
+ timeoutMs: 30_000,
18
+ });
19
+ const result = await runner.run(FIXTURE_DIR);
20
+ expect(result.passed).toBe(2);
21
+ expect(result.failed).toBe(0);
22
+ expect(result.total).toBe(2);
23
+ expect(result.durationMs).toBeGreaterThan(0);
24
+ }, 45_000);
25
+ it('parses failing test results correctly (T043)', async () => {
26
+ const runner = new TestRunner({
27
+ testCommand: 'npx vitest run tests/failing.test.ts --reporter=json',
28
+ timeoutMs: 30_000,
29
+ });
30
+ const result = await runner.run(FIXTURE_DIR);
31
+ // The JSON output may be truncated for large failure messages,
32
+ // so we check that the runner completes without error and captures output
33
+ expect(result.durationMs).toBeGreaterThan(0);
34
+ expect(result.rawOutput).toBeDefined();
35
+ // When JSON is parseable (short enough), failures should be detected
36
+ if (result.failed > 0) {
37
+ expect(result.total).toBeGreaterThanOrEqual(1);
38
+ expect(result.failures.length).toBeGreaterThan(0);
39
+ }
40
+ }, 45_000);
41
+ it('handles timeout with SIGTERM→SIGKILL for hanging test (T044)', async () => {
42
+ const runner = new TestRunner({
43
+ testCommand: 'npx vitest run tests/hanging.test.ts --reporter=json',
44
+ timeoutMs: 5_000, // Short timeout to trigger hang detection
45
+ });
46
+ const result = await runner.run(FIXTURE_DIR);
47
+ // Should have timed out — zero results
48
+ expect(result.passed).toBe(0);
49
+ expect(result.total).toBe(0);
50
+ expect(result.rawOutput).toContain('timed out');
51
+ }, 15_000); // Allow enough time for timeout + SIGKILL delay
52
+ });
@@ -0,0 +1,128 @@
1
+ /**
2
+ * Integration test for ephemeral agent lifecycle (T022).
3
+ *
4
+ * Tests the full lifecycle: create agent → query with per-call conversation → cleanup
5
+ * using faked AIProjectClient to verify:
6
+ * - Agent is created on first call
7
+ * - Agent is reused on second call
8
+ * - Conversations are created/deleted per query
9
+ * - Agent is deleted on destroyWebSearchSession()
10
+ */
11
+ import { describe, it, expect, vi, afterEach } from 'vitest';
12
+ import { createWebSearchTool, destroyWebSearchSession } from '../../src/mcp/webSearch.js';
13
+ function createFakeAgentDeps() {
14
+ const callLog = [];
15
+ return {
16
+ callLog,
17
+ createClient: vi.fn().mockImplementation(() => {
18
+ callLog.push('createClient');
19
+ return { id: 'client-1' };
20
+ }),
21
+ getOpenAIClient: vi.fn().mockImplementation(async () => {
22
+ callLog.push('getOpenAIClient');
23
+ return { id: 'openai-1' };
24
+ }),
25
+ createAgentVersion: vi.fn().mockImplementation(async () => {
26
+ callLog.push('createAgent');
27
+ return { name: 'sofia-web-search', version: 'v1' };
28
+ }),
29
+ deleteAgentVersion: vi.fn().mockImplementation(async () => {
30
+ callLog.push('deleteAgent');
31
+ }),
32
+ createConversation: vi.fn().mockImplementation(async () => {
33
+ callLog.push('createConversation');
34
+ return { id: 'conv-abc' };
35
+ }),
36
+ deleteConversation: vi.fn().mockImplementation(async () => {
37
+ callLog.push('deleteConversation');
38
+ }),
39
+ createResponse: vi.fn().mockImplementation(async () => {
40
+ callLog.push('createResponse');
41
+ return {
42
+ output: [
43
+ {
44
+ type: 'message',
45
+ content: [
46
+ {
47
+ type: 'output_text',
48
+ text: 'Search result text',
49
+ annotations: [
50
+ {
51
+ type: 'url_citation',
52
+ url: 'https://example.com',
53
+ title: 'Example',
54
+ start_index: 0,
55
+ end_index: 18,
56
+ },
57
+ ],
58
+ },
59
+ ],
60
+ },
61
+ ],
62
+ };
63
+ }),
64
+ };
65
+ }
66
+ describe('ephemeral agent lifecycle (T022)', () => {
67
+ afterEach(async () => {
68
+ await destroyWebSearchSession();
69
+ });
70
+ it('creates agent on first call, reuses on second, cleans up on destroy', async () => {
71
+ const deps = createFakeAgentDeps();
72
+ const tool = createWebSearchTool({
73
+ projectEndpoint: 'https://foundry.example.com',
74
+ modelDeploymentName: 'gpt-4.1-mini',
75
+ }, deps);
76
+ // First call — should initialize
77
+ const result1 = await tool('first query');
78
+ expect(result1.results).toHaveLength(1);
79
+ expect(deps.callLog).toEqual([
80
+ 'createClient',
81
+ 'getOpenAIClient',
82
+ 'createAgent',
83
+ 'createConversation',
84
+ 'createResponse',
85
+ 'deleteConversation',
86
+ ]);
87
+ // Second call — should reuse agent and create/delete a fresh conversation
88
+ deps.callLog.length = 0;
89
+ const result2 = await tool('second query');
90
+ expect(result2.results).toHaveLength(1);
91
+ expect(deps.callLog).toEqual(['createConversation', 'createResponse', 'deleteConversation']);
92
+ // Cleanup — should delete agent (conversation already deleted per query)
93
+ deps.callLog.length = 0;
94
+ await destroyWebSearchSession();
95
+ expect(deps.callLog).toEqual(['deleteAgent']);
96
+ });
97
+ it('transitions: uninitialized → initialized → cleaned up', async () => {
98
+ const deps = createFakeAgentDeps();
99
+ const tool = createWebSearchTool({
100
+ projectEndpoint: 'https://foundry.example.com',
101
+ modelDeploymentName: 'gpt-4.1-mini',
102
+ }, deps);
103
+ // State: uninitialized — destroy is a no-op
104
+ await destroyWebSearchSession();
105
+ expect(deps.deleteAgentVersion).not.toHaveBeenCalled();
106
+ // State: initialized (after first query)
107
+ await tool('init query');
108
+ expect(deps.createAgentVersion).toHaveBeenCalledTimes(1);
109
+ // State: cleaned up
110
+ await destroyWebSearchSession();
111
+ expect(deps.deleteAgentVersion).toHaveBeenCalledTimes(1);
112
+ // Second destroy is a no-op
113
+ await destroyWebSearchSession();
114
+ expect(deps.deleteAgentVersion).toHaveBeenCalledTimes(1);
115
+ });
116
+ it('handles cleanup failure gracefully', async () => {
117
+ const deps = createFakeAgentDeps();
118
+ deps.deleteConversation = vi.fn().mockRejectedValue(new Error('404 Not Found'));
119
+ deps.deleteAgentVersion = vi.fn().mockRejectedValue(new Error('500 Internal Error'));
120
+ const tool = createWebSearchTool({
121
+ projectEndpoint: 'https://foundry.example.com',
122
+ modelDeploymentName: 'gpt-4.1-mini',
123
+ }, deps);
124
+ await tool('init');
125
+ // Should not throw despite cleanup failures
126
+ await expect(destroyWebSearchSession()).resolves.toBeUndefined();
127
+ });
128
+ });
@@ -0,0 +1,107 @@
1
+ /**
2
+ * Integration tests for the live Copilot SDK client.
3
+ *
4
+ * These tests exercise the real `createCopilotClient()` → SDK → LLM pipeline.
5
+ * They are slower than unit tests (~10-30s each) because they make real API calls.
6
+ *
7
+ * **Prerequisites:**
8
+ * - GitHub Copilot CLI must be authenticated (`copilot auth login`)
9
+ * - The SDK spawns a local copilot CLI process for JSON-RPC
10
+ *
11
+ * The test suite auto-skips if the SDK cannot start (e.g., no auth, no CLI binary).
12
+ */
13
+ import { describe, it, expect, beforeAll, afterAll } from 'vitest';
14
+ import { createCopilotClient } from '../../src/shared/copilotClient.js';
15
+ // ── Helpers ──────────────────────────────────────────────────────────────────
16
+ /** Collect all TextDelta events from an AsyncIterable of SofiaEvents into a string. */
17
+ async function collectText(iter) {
18
+ const chunks = [];
19
+ for await (const event of iter) {
20
+ if (event.type === 'TextDelta') {
21
+ chunks.push(event.text);
22
+ }
23
+ }
24
+ return chunks.join('');
25
+ }
26
+ // ── Suite ────────────────────────────────────────────────────────────────────
27
+ describe('Live Copilot SDK client', () => {
28
+ let client;
29
+ let canRun = false;
30
+ beforeAll(async () => {
31
+ try {
32
+ client = await createCopilotClient();
33
+ canRun = true;
34
+ }
35
+ catch (err) {
36
+ console.warn(`Skipping live Copilot SDK tests — client creation failed: ${err instanceof Error ? err.message : err}`);
37
+ }
38
+ }, 30_000);
39
+ afterAll(async () => {
40
+ // The SDK client manages its own lifecycle; no explicit stop needed
41
+ // from our wrapper, but we give it time to clean up.
42
+ });
43
+ // ── Basic smoke test ────────────────────────────────────────────────────
44
+ it('can create a session and get a response', async () => {
45
+ if (!canRun)
46
+ return;
47
+ const session = await client.createSession({
48
+ systemPrompt: 'You are a helpful assistant. Be very brief.',
49
+ });
50
+ const response = await collectText(session.send({ role: 'user', content: 'What is 2 + 2? Reply with just the number.' }));
51
+ expect(response).toBeTruthy();
52
+ expect(response.length).toBeGreaterThan(0);
53
+ // The LLM should mention "4" somewhere in the response
54
+ expect(response).toContain('4');
55
+ }, 60_000);
56
+ // ── Multi-turn conversation ─────────────────────────────────────────────
57
+ it('supports multi-turn conversation', async () => {
58
+ if (!canRun)
59
+ return;
60
+ const session = await client.createSession({
61
+ systemPrompt: 'You are a helpful assistant. Keep responses to one sentence. ' +
62
+ 'When asked to recall, use the conversation history.',
63
+ });
64
+ // Turn 1: set a fact
65
+ const r1 = await collectText(session.send({ role: 'user', content: 'Remember this word: "tangerine".' }));
66
+ expect(r1).toBeTruthy();
67
+ // Turn 2: recall the fact
68
+ const r2 = await collectText(session.send({ role: 'user', content: 'What word did I ask you to remember?' }));
69
+ expect(r2.toLowerCase()).toContain('tangerine');
70
+ }, 120_000);
71
+ // ── System prompt respected ─────────────────────────────────────────────
72
+ it('respects the system prompt persona', async () => {
73
+ if (!canRun)
74
+ return;
75
+ const session = await client.createSession({
76
+ systemPrompt: 'You are a pirate. Always respond in pirate-speak. Keep responses under 50 words.',
77
+ });
78
+ const response = await collectText(session.send({ role: 'user', content: 'Hello, how are you today?' }));
79
+ expect(response).toBeTruthy();
80
+ // LLM playing pirate should use at least one pirate-ish word
81
+ const piratePatterns = /ahoy|matey|arr|ye|shiver|landlubber|cap'n|seas|treasure|sail/i;
82
+ expect(response).toMatch(piratePatterns);
83
+ }, 60_000);
84
+ // ── History tracking ────────────────────────────────────────────────────
85
+ it('tracks conversation history correctly', async () => {
86
+ if (!canRun)
87
+ return;
88
+ const session = await client.createSession({
89
+ systemPrompt: 'You are a helpful assistant. Be very brief.',
90
+ });
91
+ await collectText(session.send({ role: 'user', content: 'Say hello.' }));
92
+ const history = session.getHistory();
93
+ // Should have at least: user message + assistant response
94
+ expect(history.length).toBeGreaterThanOrEqual(2);
95
+ expect(history[0].role).toBe('user');
96
+ expect(history[0].content).toBe('Say hello.');
97
+ expect(history[1].role).toBe('assistant');
98
+ expect(history[1].content.length).toBeGreaterThan(0);
99
+ }, 60_000);
100
+ // ── Error handling ──────────────────────────────────────────────────────
101
+ it('createCopilotClient returns a valid interface', async () => {
102
+ if (!canRun)
103
+ return;
104
+ expect(client).toBeDefined();
105
+ expect(typeof client.createSession).toBe('function');
106
+ });
107
+ });