sofia-cli 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/.github/agents/copilot-instructions.md +39 -0
  2. package/.github/agents/speckit.analyze.agent.md +184 -0
  3. package/.github/agents/speckit.checklist.agent.md +294 -0
  4. package/.github/agents/speckit.clarify.agent.md +181 -0
  5. package/.github/agents/speckit.constitution.agent.md +84 -0
  6. package/.github/agents/speckit.implement.agent.md +135 -0
  7. package/.github/agents/speckit.plan.agent.md +90 -0
  8. package/.github/agents/speckit.specify.agent.md +258 -0
  9. package/.github/agents/speckit.tasks.agent.md +137 -0
  10. package/.github/agents/speckit.taskstoissues.agent.md +30 -0
  11. package/.github/copilot-instructions.md +257 -0
  12. package/.github/prompts/speckit.analyze.prompt.md +3 -0
  13. package/.github/prompts/speckit.checklist.prompt.md +3 -0
  14. package/.github/prompts/speckit.clarify.prompt.md +3 -0
  15. package/.github/prompts/speckit.constitution.prompt.md +3 -0
  16. package/.github/prompts/speckit.implement.prompt.md +3 -0
  17. package/.github/prompts/speckit.plan.prompt.md +3 -0
  18. package/.github/prompts/speckit.specify.prompt.md +3 -0
  19. package/.github/prompts/speckit.tasks.prompt.md +3 -0
  20. package/.github/prompts/speckit.taskstoissues.prompt.md +3 -0
  21. package/.github/workflows/ci.yml +38 -0
  22. package/.prettierrc +6 -0
  23. package/.specify/memory/constitution.md +181 -0
  24. package/.specify/scripts/bash/check-prerequisites.sh +166 -0
  25. package/.specify/scripts/bash/common.sh +156 -0
  26. package/.specify/scripts/bash/create-new-feature.sh +297 -0
  27. package/.specify/scripts/bash/setup-plan.sh +61 -0
  28. package/.specify/scripts/bash/update-agent-context.sh +810 -0
  29. package/.specify/templates/agent-file-template.md +28 -0
  30. package/.specify/templates/checklist-template.md +40 -0
  31. package/.specify/templates/constitution-template.md +50 -0
  32. package/.specify/templates/plan-template.md +113 -0
  33. package/.specify/templates/spec-template.md +115 -0
  34. package/.specify/templates/tasks-template.md +251 -0
  35. package/.vscode/mcp.json +42 -0
  36. package/.vscode/settings.json +19 -0
  37. package/CODE_OF_CONDUCT.md +128 -0
  38. package/LICENSE +21 -0
  39. package/README.md +213 -0
  40. package/dist/src/cli/developCommand.js +240 -0
  41. package/dist/src/cli/directCommands.js +143 -0
  42. package/dist/src/cli/envLoader.js +16 -0
  43. package/dist/src/cli/exportCommand.js +53 -0
  44. package/dist/src/cli/index.js +203 -0
  45. package/dist/src/cli/ioContext.js +109 -0
  46. package/dist/src/cli/preflight.js +57 -0
  47. package/dist/src/cli/statusCommand.js +110 -0
  48. package/dist/src/cli/workshopCommand.js +400 -0
  49. package/dist/src/develop/checkpointState.js +86 -0
  50. package/dist/src/develop/codeGenerator.js +319 -0
  51. package/dist/src/develop/dynamicScaffolder.js +226 -0
  52. package/dist/src/develop/githubMcpAdapter.js +122 -0
  53. package/dist/src/develop/index.js +15 -0
  54. package/dist/src/develop/mcpContextEnricher.js +195 -0
  55. package/dist/src/develop/pocScaffolder.js +542 -0
  56. package/dist/src/develop/ralphLoop.js +659 -0
  57. package/dist/src/develop/templateRegistry.js +364 -0
  58. package/dist/src/develop/testRunner.js +202 -0
  59. package/dist/src/logging/logger.js +58 -0
  60. package/dist/src/loop/conversationLoop.js +227 -0
  61. package/dist/src/loop/phaseSummarizer.js +87 -0
  62. package/dist/src/mcp/mcpManager.js +267 -0
  63. package/dist/src/mcp/mcpTransport.js +391 -0
  64. package/dist/src/mcp/retryPolicy.js +47 -0
  65. package/dist/src/mcp/webSearch.js +254 -0
  66. package/dist/src/phases/contextSummarizer.js +101 -0
  67. package/dist/src/phases/discoveryEnricher.js +156 -0
  68. package/dist/src/phases/phaseExtractors.js +222 -0
  69. package/dist/src/phases/phaseHandlers.js +328 -0
  70. package/dist/src/prompts/design.md +51 -0
  71. package/dist/src/prompts/develop-boundary.md +51 -0
  72. package/dist/src/prompts/develop.md +111 -0
  73. package/dist/src/prompts/discover.md +58 -0
  74. package/dist/src/prompts/ideate.md +56 -0
  75. package/dist/src/prompts/plan.md +51 -0
  76. package/dist/src/prompts/promptLoader.js +167 -0
  77. package/dist/src/prompts/promptLoader.ts +198 -0
  78. package/dist/src/prompts/select.md +47 -0
  79. package/dist/src/prompts/summarize/README.md +8 -0
  80. package/dist/src/prompts/summarize/design-summary.md +37 -0
  81. package/dist/src/prompts/summarize/develop-summary.md +25 -0
  82. package/dist/src/prompts/summarize/ideate-summary.md +27 -0
  83. package/dist/src/prompts/summarize/plan-summary.md +27 -0
  84. package/dist/src/prompts/summarize/select-summary.md +21 -0
  85. package/dist/src/prompts/system.md +28 -0
  86. package/dist/src/sessions/exportPaths.js +22 -0
  87. package/dist/src/sessions/exportWriter.js +406 -0
  88. package/dist/src/sessions/sessionManager.js +81 -0
  89. package/dist/src/sessions/sessionStore.js +65 -0
  90. package/dist/src/shared/activitySpinner.js +91 -0
  91. package/dist/src/shared/copilotClient.js +129 -0
  92. package/dist/src/shared/data/cards.json +1249 -0
  93. package/dist/src/shared/data/cardsLoader.js +51 -0
  94. package/dist/src/shared/errorClassifier.js +120 -0
  95. package/dist/src/shared/events.js +28 -0
  96. package/dist/src/shared/markdownRenderer.js +34 -0
  97. package/dist/src/shared/schemas/session.js +265 -0
  98. package/dist/src/shared/tableRenderer.js +20 -0
  99. package/dist/src/vendor/chalk.js +2 -0
  100. package/dist/src/vendor/cli-table3.js +3 -0
  101. package/dist/src/vendor/commander.js +2 -0
  102. package/dist/src/vendor/marked-terminal.js +3 -0
  103. package/dist/src/vendor/marked.js +2 -0
  104. package/dist/src/vendor/ora.js +2 -0
  105. package/dist/src/vendor/pino.js +2 -0
  106. package/dist/src/vendor/zod.js +2 -0
  107. package/dist/tests/e2e/developE2e.spec.js +126 -0
  108. package/dist/tests/e2e/developFailureE2e.spec.js +247 -0
  109. package/dist/tests/e2e/developPty.spec.js +75 -0
  110. package/dist/tests/e2e/discoveryWebSearchRelevance.spec.js +84 -0
  111. package/dist/tests/e2e/harness.spec.js +83 -0
  112. package/dist/tests/e2e/mcpLive.spec.js +120 -0
  113. package/dist/tests/e2e/newSession.e2e.spec.js +177 -0
  114. package/dist/tests/e2e/ralphLoopEnrichmentComparison.spec.js +62 -0
  115. package/dist/tests/e2e/workiqEnrichment.spec.js +56 -0
  116. package/dist/tests/e2e/zavaSimulation.spec.js +452 -0
  117. package/dist/tests/fixtures/test-fixture-project/src/add.js +3 -0
  118. package/dist/tests/fixtures/test-fixture-project/tests/failing.test.js +6 -0
  119. package/dist/tests/fixtures/test-fixture-project/tests/hanging.test.js +8 -0
  120. package/dist/tests/fixtures/test-fixture-project/tests/passing.test.js +10 -0
  121. package/dist/tests/fixtures/test-fixture-project/vitest.config.js +6 -0
  122. package/dist/tests/integration/autoStartConversation.spec.js +138 -0
  123. package/dist/tests/integration/defaultCommand.spec.js +147 -0
  124. package/dist/tests/integration/directCommandNonTty.spec.js +224 -0
  125. package/dist/tests/integration/directCommandTty.spec.js +151 -0
  126. package/dist/tests/integration/discoveryEnrichmentFlow.spec.js +175 -0
  127. package/dist/tests/integration/exportArtifacts.spec.js +202 -0
  128. package/dist/tests/integration/exportFallbackFlow.spec.js +99 -0
  129. package/dist/tests/integration/mcpDegradationFlow.spec.js +190 -0
  130. package/dist/tests/integration/mcpTransportFlow.spec.js +139 -0
  131. package/dist/tests/integration/newSessionFlow.spec.js +343 -0
  132. package/dist/tests/integration/pocGithubMcp.spec.js +186 -0
  133. package/dist/tests/integration/pocLocalFallback.spec.js +171 -0
  134. package/dist/tests/integration/pocScaffold.spec.js +163 -0
  135. package/dist/tests/integration/ralphLoopFlow.spec.js +359 -0
  136. package/dist/tests/integration/ralphLoopPartial.spec.js +368 -0
  137. package/dist/tests/integration/resumeAndBacktrack.spec.js +247 -0
  138. package/dist/tests/integration/spinnerLifecycle.spec.js +220 -0
  139. package/dist/tests/integration/summarizationFlow.spec.js +115 -0
  140. package/dist/tests/integration/testRunnerReal.spec.js +52 -0
  141. package/dist/tests/integration/webSearchAgent.spec.js +128 -0
  142. package/dist/tests/live/copilotSdkLive.spec.js +107 -0
  143. package/dist/tests/live/zavaFullWorkshop.spec.js +392 -0
  144. package/dist/tests/setup/loadEnv.js +3 -0
  145. package/dist/tests/unit/cli/developCommand.spec.js +567 -0
  146. package/dist/tests/unit/cli/directCommands.spec.js +279 -0
  147. package/dist/tests/unit/cli/envLoader.spec.js +58 -0
  148. package/dist/tests/unit/cli/ioContext.spec.js +119 -0
  149. package/dist/tests/unit/cli/preflight.spec.js +108 -0
  150. package/dist/tests/unit/cli/statusCommand.spec.js +111 -0
  151. package/dist/tests/unit/cli/workshopClientFallback.spec.js +80 -0
  152. package/dist/tests/unit/cli/workshopCommand.spec.js +329 -0
  153. package/dist/tests/unit/config/vitestEnvSetup.spec.js +13 -0
  154. package/dist/tests/unit/develop/checkpointState.spec.js +315 -0
  155. package/dist/tests/unit/develop/codeGenerator.spec.js +355 -0
  156. package/dist/tests/unit/develop/githubMcpAdapter.spec.js +231 -0
  157. package/dist/tests/unit/develop/mcpContextEnricher.spec.js +433 -0
  158. package/dist/tests/unit/develop/outputValidator.spec.js +119 -0
  159. package/dist/tests/unit/develop/pocScaffolder.spec.js +353 -0
  160. package/dist/tests/unit/develop/ralphLoop.spec.js +1248 -0
  161. package/dist/tests/unit/develop/templateRegistry.spec.js +85 -0
  162. package/dist/tests/unit/develop/testRunner.spec.js +249 -0
  163. package/dist/tests/unit/infraBicep.spec.js +92 -0
  164. package/dist/tests/unit/infraDeploy.spec.js +82 -0
  165. package/dist/tests/unit/infraTeardown.spec.js +63 -0
  166. package/dist/tests/unit/logging/logger.spec.js +43 -0
  167. package/dist/tests/unit/loop/conversationLoop.spec.js +592 -0
  168. package/dist/tests/unit/loop/phaseSummarizer.spec.js +141 -0
  169. package/dist/tests/unit/loop/streamingMarkdown.spec.js +147 -0
  170. package/dist/tests/unit/mcp/mcpManager.spec.js +279 -0
  171. package/dist/tests/unit/mcp/mcpTransport.spec.js +529 -0
  172. package/dist/tests/unit/mcp/retryPolicy.spec.js +218 -0
  173. package/dist/tests/unit/mcp/timeoutValidation.spec.js +46 -0
  174. package/dist/tests/unit/mcp/webSearch.spec.js +567 -0
  175. package/dist/tests/unit/phases/contextSummarizer.spec.js +140 -0
  176. package/dist/tests/unit/phases/discoveryEnricher.repeatCalls.spec.js +93 -0
  177. package/dist/tests/unit/phases/discoveryEnricher.spec.js +411 -0
  178. package/dist/tests/unit/phases/phaseExtractors.spec.js +352 -0
  179. package/dist/tests/unit/phases/phaseHandlers.spec.js +425 -0
  180. package/dist/tests/unit/prompts/promptLoader.spec.js +118 -0
  181. package/dist/tests/unit/schemas/pocSchemas.spec.js +412 -0
  182. package/dist/tests/unit/schemas/session.spec.js +257 -0
  183. package/dist/tests/unit/sessions/exportPaths.spec.js +31 -0
  184. package/dist/tests/unit/sessions/exportWriter.spec.js +655 -0
  185. package/dist/tests/unit/sessions/sessionManager.spec.js +151 -0
  186. package/dist/tests/unit/sessions/sessionStore.spec.js +116 -0
  187. package/dist/tests/unit/shared/activitySpinner.spec.js +175 -0
  188. package/dist/tests/unit/shared/cardsLoader.spec.js +76 -0
  189. package/dist/tests/unit/shared/copilotClient.spec.js +155 -0
  190. package/dist/tests/unit/shared/errorClassifier.spec.js +131 -0
  191. package/dist/tests/unit/shared/events.spec.js +55 -0
  192. package/dist/tests/unit/shared/markdownRenderer.spec.js +35 -0
  193. package/dist/tests/unit/shared/markdownRendererChunks.spec.js +70 -0
  194. package/dist/tests/unit/shared/tableRenderer.spec.js +34 -0
  195. package/dist/vitest.config.js +14 -0
  196. package/dist/vitest.live.config.js +18 -0
  197. package/docs/README.md +35 -0
  198. package/docs/architecture.md +169 -0
  199. package/docs/cli-usage.md +207 -0
  200. package/docs/environment.md +66 -0
  201. package/docs/export-format.md +146 -0
  202. package/docs/session-model.md +113 -0
  203. package/eslint.config.js +35 -0
  204. package/infra/deploy.sh +193 -0
  205. package/infra/gather-env.sh +211 -0
  206. package/infra/main.bicep +90 -0
  207. package/infra/main.bicepparam +18 -0
  208. package/infra/resources.bicep +134 -0
  209. package/infra/teardown.sh +114 -0
  210. package/package.json +63 -0
  211. package/specs/001-cli-workshop-rebuild/checklists/requirements.md +35 -0
  212. package/specs/001-cli-workshop-rebuild/contracts/cli.md +59 -0
  213. package/specs/001-cli-workshop-rebuild/contracts/export-summary-json.md +23 -0
  214. package/specs/001-cli-workshop-rebuild/contracts/session-json.md +30 -0
  215. package/specs/001-cli-workshop-rebuild/data-model.md +210 -0
  216. package/specs/001-cli-workshop-rebuild/plan.md +361 -0
  217. package/specs/001-cli-workshop-rebuild/quickstart.md +83 -0
  218. package/specs/001-cli-workshop-rebuild/research.md +116 -0
  219. package/specs/001-cli-workshop-rebuild/spec.md +240 -0
  220. package/specs/001-cli-workshop-rebuild/tasks.md +476 -0
  221. package/specs/002-poc-generation/contracts/poc-output.md +172 -0
  222. package/specs/002-poc-generation/contracts/ralph-loop.md +113 -0
  223. package/specs/002-poc-generation/data-model.md +172 -0
  224. package/specs/002-poc-generation/plan.md +109 -0
  225. package/specs/002-poc-generation/quickstart.md +97 -0
  226. package/specs/002-poc-generation/research.md +786 -0
  227. package/specs/002-poc-generation/spec.md +81 -0
  228. package/specs/002-poc-generation/tasks-fix.md +198 -0
  229. package/specs/002-poc-generation/tasks.md +252 -0
  230. package/specs/003-mcp-transport-integration/checklists/requirements.md +37 -0
  231. package/specs/003-mcp-transport-integration/contracts/context-enricher.md +220 -0
  232. package/specs/003-mcp-transport-integration/contracts/discovery-enricher.md +267 -0
  233. package/specs/003-mcp-transport-integration/contracts/github-adapter.md +149 -0
  234. package/specs/003-mcp-transport-integration/contracts/mcp-transport.md +288 -0
  235. package/specs/003-mcp-transport-integration/data-model.md +326 -0
  236. package/specs/003-mcp-transport-integration/plan.md +114 -0
  237. package/specs/003-mcp-transport-integration/quickstart.md +311 -0
  238. package/specs/003-mcp-transport-integration/research.md +395 -0
  239. package/specs/003-mcp-transport-integration/spec.md +234 -0
  240. package/specs/003-mcp-transport-integration/tasks.md +324 -0
  241. package/specs/003-next-spec-gaps.md +150 -0
  242. package/specs/004-dev-resume-hardening/checklists/requirements.md +37 -0
  243. package/specs/004-dev-resume-hardening/contracts/cli.md +160 -0
  244. package/specs/004-dev-resume-hardening/data-model.md +321 -0
  245. package/specs/004-dev-resume-hardening/plan.md +107 -0
  246. package/specs/004-dev-resume-hardening/quickstart.md +115 -0
  247. package/specs/004-dev-resume-hardening/research.md +142 -0
  248. package/specs/004-dev-resume-hardening/spec.md +221 -0
  249. package/specs/004-dev-resume-hardening/tasks.md +333 -0
  250. package/specs/005-ai-search-deploy/checklists/requirements.md +39 -0
  251. package/specs/005-ai-search-deploy/contracts/web-search-tool.md +241 -0
  252. package/specs/005-ai-search-deploy/data-model.md +130 -0
  253. package/specs/005-ai-search-deploy/plan.md +93 -0
  254. package/specs/005-ai-search-deploy/quickstart.md +96 -0
  255. package/specs/005-ai-search-deploy/research.md +187 -0
  256. package/specs/005-ai-search-deploy/spec.md +143 -0
  257. package/specs/005-ai-search-deploy/tasks.md +284 -0
  258. package/specs/006-workshop-extraction-fixes/checklists/requirements.md +61 -0
  259. package/specs/006-workshop-extraction-fixes/contracts/summarization-and-export.md +131 -0
  260. package/specs/006-workshop-extraction-fixes/data-model.md +149 -0
  261. package/specs/006-workshop-extraction-fixes/plan.md +123 -0
  262. package/specs/006-workshop-extraction-fixes/quickstart.md +101 -0
  263. package/specs/006-workshop-extraction-fixes/research.md +143 -0
  264. package/specs/006-workshop-extraction-fixes/spec.md +210 -0
  265. package/specs/006-workshop-extraction-fixes/tasks.md +316 -0
  266. package/src/cli/developCommand.ts +308 -0
  267. package/src/cli/directCommands.ts +195 -0
  268. package/src/cli/envLoader.ts +17 -0
  269. package/src/cli/exportCommand.ts +65 -0
  270. package/src/cli/index.ts +249 -0
  271. package/src/cli/ioContext.ts +139 -0
  272. package/src/cli/preflight.ts +86 -0
  273. package/src/cli/statusCommand.ts +118 -0
  274. package/src/cli/workshopCommand.ts +496 -0
  275. package/src/develop/checkpointState.ts +121 -0
  276. package/src/develop/codeGenerator.ts +402 -0
  277. package/src/develop/dynamicScaffolder.ts +284 -0
  278. package/src/develop/githubMcpAdapter.ts +199 -0
  279. package/src/develop/index.ts +34 -0
  280. package/src/develop/mcpContextEnricher.ts +279 -0
  281. package/src/develop/pocScaffolder.ts +646 -0
  282. package/src/develop/ralphLoop.ts +1044 -0
  283. package/src/develop/templateRegistry.ts +427 -0
  284. package/src/develop/testRunner.ts +276 -0
  285. package/src/logging/logger.ts +73 -0
  286. package/src/loop/conversationLoop.ts +355 -0
  287. package/src/loop/phaseSummarizer.ts +114 -0
  288. package/src/mcp/mcpManager.ts +365 -0
  289. package/src/mcp/mcpTransport.ts +562 -0
  290. package/src/mcp/retryPolicy.ts +87 -0
  291. package/src/mcp/webSearch.ts +388 -0
  292. package/src/originalPrompts/design_thinking.md +178 -0
  293. package/src/originalPrompts/design_thinking_persona.md +76 -0
  294. package/src/originalPrompts/document_generator_example.md +77 -0
  295. package/src/originalPrompts/document_generator_persona.md +47 -0
  296. package/src/originalPrompts/facilitator_persona.md +125 -0
  297. package/src/originalPrompts/guardrails.md +47 -0
  298. package/src/phases/contextSummarizer.ts +154 -0
  299. package/src/phases/discoveryEnricher.ts +223 -0
  300. package/src/phases/phaseExtractors.ts +247 -0
  301. package/src/phases/phaseHandlers.ts +450 -0
  302. package/src/prompts/design.md +51 -0
  303. package/src/prompts/develop-boundary.md +51 -0
  304. package/src/prompts/develop.md +111 -0
  305. package/src/prompts/discover.md +58 -0
  306. package/src/prompts/ideate.md +56 -0
  307. package/src/prompts/plan.md +51 -0
  308. package/src/prompts/promptLoader.ts +198 -0
  309. package/src/prompts/select.md +47 -0
  310. package/src/prompts/summarize/README.md +8 -0
  311. package/src/prompts/summarize/design-summary.md +37 -0
  312. package/src/prompts/summarize/develop-summary.md +25 -0
  313. package/src/prompts/summarize/ideate-summary.md +27 -0
  314. package/src/prompts/summarize/plan-summary.md +27 -0
  315. package/src/prompts/summarize/select-summary.md +21 -0
  316. package/src/prompts/system.md +28 -0
  317. package/src/sessions/exportPaths.ts +28 -0
  318. package/src/sessions/exportWriter.ts +490 -0
  319. package/src/sessions/sessionManager.ts +119 -0
  320. package/src/sessions/sessionStore.ts +69 -0
  321. package/src/shared/activitySpinner.ts +108 -0
  322. package/src/shared/copilotClient.ts +291 -0
  323. package/src/shared/data/cards.json +1249 -0
  324. package/src/shared/data/cardsLoader.ts +70 -0
  325. package/src/shared/errorClassifier.ts +160 -0
  326. package/src/shared/events.ts +103 -0
  327. package/src/shared/markdownRenderer.ts +44 -0
  328. package/src/shared/schemas/session.ts +346 -0
  329. package/src/shared/tableRenderer.ts +28 -0
  330. package/src/types/marked-terminal.d.ts +5 -0
  331. package/src/vendor/chalk.ts +2 -0
  332. package/src/vendor/cli-table3.ts +3 -0
  333. package/src/vendor/commander.ts +2 -0
  334. package/src/vendor/marked-terminal.ts +3 -0
  335. package/src/vendor/marked.ts +2 -0
  336. package/src/vendor/ora.ts +2 -0
  337. package/src/vendor/pino.ts +3 -0
  338. package/src/vendor/zod.ts +3 -0
  339. package/tests/e2e/developE2e.spec.ts +152 -0
  340. package/tests/e2e/developFailureE2e.spec.ts +289 -0
  341. package/tests/e2e/developPty.spec.ts +86 -0
  342. package/tests/e2e/discoveryWebSearchRelevance.spec.ts +103 -0
  343. package/tests/e2e/harness.spec.ts +104 -0
  344. package/tests/e2e/mcpLive.spec.ts +149 -0
  345. package/tests/e2e/newSession.e2e.spec.ts +245 -0
  346. package/tests/e2e/ralphLoopEnrichmentComparison.spec.ts +70 -0
  347. package/tests/e2e/workiqEnrichment.spec.ts +72 -0
  348. package/tests/e2e/zava-assessment/agent-interaction-script.md +258 -0
  349. package/tests/e2e/zava-assessment/company-profile.md +98 -0
  350. package/tests/e2e/zava-assessment/expected-results-checklist.md +454 -0
  351. package/tests/e2e/zavaSimulation.spec.ts +511 -0
  352. package/tests/fixtures/completedSession.json +141 -0
  353. package/tests/fixtures/test-fixture-project/package-lock.json +1585 -0
  354. package/tests/fixtures/test-fixture-project/package.json +12 -0
  355. package/tests/fixtures/test-fixture-project/src/add.ts +3 -0
  356. package/tests/fixtures/test-fixture-project/tests/failing.test.ts +7 -0
  357. package/tests/fixtures/test-fixture-project/tests/hanging.test.ts +9 -0
  358. package/tests/fixtures/test-fixture-project/tests/passing.test.ts +13 -0
  359. package/tests/fixtures/test-fixture-project/vitest.config.ts +7 -0
  360. package/tests/integration/autoStartConversation.spec.ts +168 -0
  361. package/tests/integration/defaultCommand.spec.ts +179 -0
  362. package/tests/integration/directCommandNonTty.spec.ts +260 -0
  363. package/tests/integration/directCommandTty.spec.ts +185 -0
  364. package/tests/integration/discoveryEnrichmentFlow.spec.ts +209 -0
  365. package/tests/integration/exportArtifacts.spec.ts +232 -0
  366. package/tests/integration/exportFallbackFlow.spec.ts +115 -0
  367. package/tests/integration/mcpDegradationFlow.spec.ts +231 -0
  368. package/tests/integration/mcpTransportFlow.spec.ts +178 -0
  369. package/tests/integration/newSessionFlow.spec.ts +406 -0
  370. package/tests/integration/pocGithubMcp.spec.ts +224 -0
  371. package/tests/integration/pocLocalFallback.spec.ts +205 -0
  372. package/tests/integration/pocScaffold.spec.ts +220 -0
  373. package/tests/integration/ralphLoopFlow.spec.ts +430 -0
  374. package/tests/integration/ralphLoopPartial.spec.ts +416 -0
  375. package/tests/integration/resumeAndBacktrack.spec.ts +278 -0
  376. package/tests/integration/spinnerLifecycle.spec.ts +270 -0
  377. package/tests/integration/summarizationFlow.spec.ts +135 -0
  378. package/tests/integration/testRunnerReal.spec.ts +63 -0
  379. package/tests/integration/webSearchAgent.spec.ts +155 -0
  380. package/tests/live/copilotSdkLive.spec.ts +149 -0
  381. package/tests/live/zavaFullWorkshop.spec.ts +515 -0
  382. package/tests/setup/loadEnv.ts +5 -0
  383. package/tests/unit/cli/developCommand.spec.ts +679 -0
  384. package/tests/unit/cli/directCommands.spec.ts +325 -0
  385. package/tests/unit/cli/envLoader.spec.ts +73 -0
  386. package/tests/unit/cli/ioContext.spec.ts +148 -0
  387. package/tests/unit/cli/preflight.spec.ts +125 -0
  388. package/tests/unit/cli/statusCommand.spec.ts +134 -0
  389. package/tests/unit/cli/workshopClientFallback.spec.ts +100 -0
  390. package/tests/unit/cli/workshopCommand.spec.ts +378 -0
  391. package/tests/unit/config/vitestEnvSetup.spec.ts +24 -0
  392. package/tests/unit/develop/checkpointState.spec.ts +378 -0
  393. package/tests/unit/develop/codeGenerator.spec.ts +447 -0
  394. package/tests/unit/develop/githubMcpAdapter.spec.ts +283 -0
  395. package/tests/unit/develop/mcpContextEnricher.spec.ts +564 -0
  396. package/tests/unit/develop/outputValidator.spec.ts +134 -0
  397. package/tests/unit/develop/pocScaffolder.spec.ts +451 -0
  398. package/tests/unit/develop/ralphLoop.spec.ts +1439 -0
  399. package/tests/unit/develop/templateRegistry.spec.ts +106 -0
  400. package/tests/unit/develop/testRunner.spec.ts +294 -0
  401. package/tests/unit/infraBicep.spec.ts +116 -0
  402. package/tests/unit/infraDeploy.spec.ts +102 -0
  403. package/tests/unit/infraTeardown.spec.ts +77 -0
  404. package/tests/unit/logging/logger.spec.ts +50 -0
  405. package/tests/unit/loop/conversationLoop.spec.ts +719 -0
  406. package/tests/unit/loop/phaseSummarizer.spec.ts +169 -0
  407. package/tests/unit/loop/streamingMarkdown.spec.ts +180 -0
  408. package/tests/unit/mcp/mcpManager.spec.ts +336 -0
  409. package/tests/unit/mcp/mcpTransport.spec.ts +689 -0
  410. package/tests/unit/mcp/retryPolicy.spec.ts +278 -0
  411. package/tests/unit/mcp/timeoutValidation.spec.ts +55 -0
  412. package/tests/unit/mcp/webSearch.spec.ts +718 -0
  413. package/tests/unit/phases/contextSummarizer.spec.ts +158 -0
  414. package/tests/unit/phases/discoveryEnricher.repeatCalls.spec.ts +125 -0
  415. package/tests/unit/phases/discoveryEnricher.spec.ts +512 -0
  416. package/tests/unit/phases/phaseExtractors.spec.ts +406 -0
  417. package/tests/unit/phases/phaseHandlers.spec.ts +483 -0
  418. package/tests/unit/prompts/promptLoader.spec.ts +144 -0
  419. package/tests/unit/schemas/pocSchemas.spec.ts +457 -0
  420. package/tests/unit/schemas/session.spec.ts +328 -0
  421. package/tests/unit/sessions/exportPaths.spec.ts +38 -0
  422. package/tests/unit/sessions/exportWriter.spec.ts +737 -0
  423. package/tests/unit/sessions/sessionManager.spec.ts +174 -0
  424. package/tests/unit/sessions/sessionStore.spec.ts +136 -0
  425. package/tests/unit/shared/activitySpinner.spec.ts +211 -0
  426. package/tests/unit/shared/cardsLoader.spec.ts +89 -0
  427. package/tests/unit/shared/copilotClient.spec.ts +185 -0
  428. package/tests/unit/shared/errorClassifier.spec.ts +152 -0
  429. package/tests/unit/shared/events.spec.ts +71 -0
  430. package/tests/unit/shared/markdownRenderer.spec.ts +42 -0
  431. package/tests/unit/shared/markdownRendererChunks.spec.ts +83 -0
  432. package/tests/unit/shared/tableRenderer.spec.ts +38 -0
  433. package/tsconfig.json +20 -0
  434. package/vitest.config.ts +15 -0
  435. package/vitest.live.config.ts +19 -0
@@ -0,0 +1,149 @@
1
+ /**
2
+ * Integration tests for the live Copilot SDK client.
3
+ *
4
+ * These tests exercise the real `createCopilotClient()` → SDK → LLM pipeline.
5
+ * They are slower than unit tests (~10-30s each) because they make real API calls.
6
+ *
7
+ * **Prerequisites:**
8
+ * - GitHub Copilot CLI must be authenticated (`copilot auth login`)
9
+ * - The SDK spawns a local copilot CLI process for JSON-RPC
10
+ *
11
+ * The test suite auto-skips if the SDK cannot start (e.g., no auth, no CLI binary).
12
+ */
13
+ import { describe, it, expect, beforeAll, afterAll } from 'vitest';
14
+
15
+ import { createCopilotClient } from '../../src/shared/copilotClient.js';
16
+ import type { CopilotClient, ConversationSession, CopilotMessage } from '../../src/shared/copilotClient.js';
17
+
18
+ // ── Helpers ──────────────────────────────────────────────────────────────────
19
+
20
+ /** Collect all TextDelta events from an AsyncIterable of SofiaEvents into a string. */
21
+ async function collectText(iter: AsyncIterable<import('../../src/shared/events.js').SofiaEvent>): Promise<string> {
22
+ const chunks: string[] = [];
23
+ for await (const event of iter) {
24
+ if (event.type === 'TextDelta') {
25
+ chunks.push(event.text);
26
+ }
27
+ }
28
+ return chunks.join('');
29
+ }
30
+
31
+ // ── Suite ────────────────────────────────────────────────────────────────────
32
+
33
+ describe('Live Copilot SDK client', () => {
34
+ let client: CopilotClient;
35
+ let canRun = false;
36
+
37
+ beforeAll(async () => {
38
+ try {
39
+ client = await createCopilotClient();
40
+ canRun = true;
41
+ } catch (err) {
42
+ console.warn(
43
+ `Skipping live Copilot SDK tests — client creation failed: ${
44
+ err instanceof Error ? err.message : err
45
+ }`,
46
+ );
47
+ }
48
+ }, 30_000);
49
+
50
+ afterAll(async () => {
51
+ // The SDK client manages its own lifecycle; no explicit stop needed
52
+ // from our wrapper, but we give it time to clean up.
53
+ });
54
+
55
+ // ── Basic smoke test ────────────────────────────────────────────────────
56
+
57
+ it('can create a session and get a response', async () => {
58
+ if (!canRun) return;
59
+
60
+ const session: ConversationSession = await client.createSession({
61
+ systemPrompt: 'You are a helpful assistant. Be very brief.',
62
+ });
63
+
64
+ const response = await collectText(
65
+ session.send({ role: 'user', content: 'What is 2 + 2? Reply with just the number.' }),
66
+ );
67
+
68
+ expect(response).toBeTruthy();
69
+ expect(response.length).toBeGreaterThan(0);
70
+ // The LLM should mention "4" somewhere in the response
71
+ expect(response).toContain('4');
72
+ }, 60_000);
73
+
74
+ // ── Multi-turn conversation ─────────────────────────────────────────────
75
+
76
+ it('supports multi-turn conversation', async () => {
77
+ if (!canRun) return;
78
+
79
+ const session = await client.createSession({
80
+ systemPrompt:
81
+ 'You are a helpful assistant. Keep responses to one sentence. ' +
82
+ 'When asked to recall, use the conversation history.',
83
+ });
84
+
85
+ // Turn 1: set a fact
86
+ const r1 = await collectText(
87
+ session.send({ role: 'user', content: 'Remember this word: "tangerine".' }),
88
+ );
89
+ expect(r1).toBeTruthy();
90
+
91
+ // Turn 2: recall the fact
92
+ const r2 = await collectText(
93
+ session.send({ role: 'user', content: 'What word did I ask you to remember?' }),
94
+ );
95
+ expect(r2.toLowerCase()).toContain('tangerine');
96
+ }, 120_000);
97
+
98
+ // ── System prompt respected ─────────────────────────────────────────────
99
+
100
+ it('respects the system prompt persona', async () => {
101
+ if (!canRun) return;
102
+
103
+ const session = await client.createSession({
104
+ systemPrompt:
105
+ 'You are a pirate. Always respond in pirate-speak. Keep responses under 50 words.',
106
+ });
107
+
108
+ const response = await collectText(
109
+ session.send({ role: 'user', content: 'Hello, how are you today?' }),
110
+ );
111
+
112
+ expect(response).toBeTruthy();
113
+ // LLM playing pirate should use at least one pirate-ish word
114
+ const piratePatterns = /ahoy|matey|arr|ye|shiver|landlubber|cap'n|seas|treasure|sail/i;
115
+ expect(response).toMatch(piratePatterns);
116
+ }, 60_000);
117
+
118
+ // ── History tracking ────────────────────────────────────────────────────
119
+
120
+ it('tracks conversation history correctly', async () => {
121
+ if (!canRun) return;
122
+
123
+ const session = await client.createSession({
124
+ systemPrompt: 'You are a helpful assistant. Be very brief.',
125
+ });
126
+
127
+ await collectText(
128
+ session.send({ role: 'user', content: 'Say hello.' }),
129
+ );
130
+
131
+ const history: CopilotMessage[] = session.getHistory();
132
+
133
+ // Should have at least: user message + assistant response
134
+ expect(history.length).toBeGreaterThanOrEqual(2);
135
+ expect(history[0].role).toBe('user');
136
+ expect(history[0].content).toBe('Say hello.');
137
+ expect(history[1].role).toBe('assistant');
138
+ expect(history[1].content.length).toBeGreaterThan(0);
139
+ }, 60_000);
140
+
141
+ // ── Error handling ──────────────────────────────────────────────────────
142
+
143
+ it('createCopilotClient returns a valid interface', async () => {
144
+ if (!canRun) return;
145
+
146
+ expect(client).toBeDefined();
147
+ expect(typeof client.createSession).toBe('function');
148
+ });
149
+ });
@@ -0,0 +1,515 @@
1
+ /**
2
+ * Zava Industries — Full Workshop Live Test
3
+ *
4
+ * Runs a complete sofIA AI Discovery Workshop session with real LLM calls
5
+ * using the Copilot SDK. Feeds canned inputs from the Zava Industries
6
+ * company profile and evaluates outputs against the expected results checklist.
7
+ *
8
+ * Prerequisites:
9
+ * - GitHub Copilot CLI authenticated (`copilot auth login`)
10
+ * - .env with FOUNDRY_PROJECT_ENDPOINT + FOUNDRY_MODEL_DEPLOYMENT_NAME
11
+ *
12
+ * Run with: npm run test:live -- tests/live/zavaFullWorkshop.spec.ts
13
+ */
14
+ import { describe, it, expect, beforeAll, afterAll } from 'vitest';
15
+ import { readFile, writeFile, mkdir } from 'node:fs/promises';
16
+ import { join, dirname } from 'node:path';
17
+ import { fileURLToPath } from 'node:url';
18
+
19
+ import { createCopilotClient } from '../../src/shared/copilotClient.js';
20
+ import type { CopilotClient } from '../../src/shared/copilotClient.js';
21
+ import { ConversationLoop } from '../../src/loop/conversationLoop.js';
22
+ import type { LoopIO, DecisionGateResult } from '../../src/loop/conversationLoop.js';
23
+ import { createPhaseHandler, getPhaseOrder } from '../../src/phases/phaseHandlers.js';
24
+ import type { PhaseHandlerConfig } from '../../src/phases/phaseHandlers.js';
25
+ import type { WorkshopSession, PhaseValue } from '../../src/shared/schemas/session.js';
26
+ import { createDefaultStore } from '../../src/sessions/sessionStore.js';
27
+ import { isWebSearchConfigured, createWebSearchTool } from '../../src/mcp/webSearch.js';
28
+ import type { WebSearchConfig } from '../../src/mcp/webSearch.js';
29
+ import type { WebSearchClient } from '../../src/phases/discoveryEnricher.js';
30
+ import type { SofiaEvent } from '../../src/shared/events.js';
31
+
32
+ // ── Config ───────────────────────────────────────────────────────────────────
33
+
34
+ const __dirname = dirname(fileURLToPath(import.meta.url));
35
+ const PROJECT_ROOT = join(__dirname, '..', '..');
36
+ const RESULTS_DIR = join(PROJECT_ROOT, 'tests', 'e2e', 'zava-assessment', 'results');
37
+
38
+ // Per-phase timeout: 3 minutes each for LLM calls
39
+ const _PHASE_TIMEOUT = 180_000;
40
+ // Per-turn timeout: 2 minutes
41
+ const _TURN_TIMEOUT = 120_000;
42
+
43
+ // ── Zava Industries Canned Inputs ──────────────────────────────────────────
44
+
45
+ const DISCOVER_INPUTS = [
46
+ // Input 1: initial business description
47
+ `We are Zava Industries, a mid-premium fashion company based in Milan. We design and sell modern clothing for ages 20–55. Our competitive edge is trend analysis — we try to detect emerging fashion trends before competitors and turn them into collections fast. We have a team of 20: 5 designers, 4 trend analysts, 3 data scientists, 2 developers, 3 marketing people, 2 ops people, and me as Head of Innovation.
48
+
49
+ Our biggest challenge is speed. Right now our trend analysts spend about 60% of their time manually gathering data from Instagram, TikTok, Pinterest, celebrity magazines, films, and runway shows. This data ends up scattered across Google Sheets, Miro boards, Notion pages, and email threads. By the time we consolidate everything into a trend report, fast-fashion competitors like Zara and Shein have already reacted.
50
+
51
+ We do about €18M annual revenue, serve the EU primarily but are expanding to the US. Our trend-to-retail cycle is 10–14 weeks and we want to get it under 8. We're an Azure shop — Azure SQL, Blob Storage, Power BI for sales dashboards.`,
52
+
53
+ // Input 2: team/process details
54
+ `The trend analysts each specialize: Sara covers social media (Instagram, TikTok), Dimitri tracks runway and trade shows, Aisha monitors celebrity and entertainment media, and Tomás does competitor retail analysis. They each produce about 3 reports per month. The data scientists — Priya, Liam, and Mei — have built a basic demand forecasting model using Power BI and Azure ML, but it only works on historical sales data, not on forward-looking trend signals.
55
+
56
+ Our designers get a consolidated trend brief every 2–3 weeks, which they say is too slow. They want real-time or near-real-time signals. The hit rate for our collections is about 35% — only 35% of designed pieces make it to production. We believe better trend data could push that to 50% or more.
57
+
58
+ Key metrics we track: trend detection lead time (currently ~4 weeks, want <1 week), collection hit rate (35%, want 50%+), time to market (10–14 weeks, want 8), and analyst productivity (3 reports/month, want 8+).`,
59
+
60
+ // Input 3: topic selection
61
+ `I'd like to focus on Trend Intelligence and Signal Aggregation — specifically, how we can use AI to automate the gathering and scoring of trend signals from multiple sources (social media, celebrity media, runway, retail). This is the bottleneck that affects everything else downstream.`,
62
+
63
+ // Input 4: activities
64
+ `Here are the key activities in our trend analysis workflow:
65
+ 1. Social Media Scanning — Analysts browse Instagram, TikTok, Pinterest for emerging styles, colors, silhouettes.
66
+ 2. Celebrity & Entertainment Monitoring — Weekly scan of Vogue, Elle, People, Hola! plus streaming shows.
67
+ 3. Runway & Trade Show Tracking — Attend or watch livestreams of 4–6 shows/year.
68
+ 4. Competitor Retail Analysis — Manual store visits and online browsing of Zara, H&M, COS, etc.
69
+ 5. Signal Consolidation — Merge all data into a trend report. Takes 1–2 weeks.
70
+ 6. Trend Scoring & Prioritization — Team meeting to rank trends. Very subjective.
71
+ 7. Design Brief Creation — Create briefs for designers based on top trends.
72
+ 8. Designer Feedback Loop — Designers review briefs, ask questions, iterate.
73
+
74
+ What we'd do if it weren't so hard: real-time multi-source signal aggregation, automated trend scoring with a confidence index, instant visual mood board generation.`,
75
+
76
+ // Input 5: critical steps voting
77
+ `The most critical steps are:
78
+ - Social Media Scanning — Business value: 5, Human value: 4. Key metric: ~25hrs/week across the team.
79
+ - Signal Consolidation — Business value: 5, Human value: 3. Key metric: 1–2 weeks to produce report.
80
+ - Trend Scoring & Prioritization — Business value: 5, Human value: 4. Key metric: scoring consistency.
81
+ - Design Brief Creation — Business value: 4, Human value: 3. Key metric: designer satisfaction ~3.2/5.`,
82
+
83
+ // Input 6: confirm summary
84
+ `Yes, that workflow summary looks accurate. Let's proceed to ideation.`,
85
+ ];
86
+
87
+ const IDEATE_INPUTS = [
88
+ // Cards reaction
89
+ `I'm very interested in these cards: Computer Vision / Image Recognition for analyzing social media and runway photos, Natural Language Processing for extracting trend signals from captions and articles, Recommendation Systems for suggesting relevant trends, Anomaly / Pattern Detection for spotting emerging patterns, Predictive Analytics for trend lifecycle forecasting, Content Generation for auto-generating mood boards, Data Integration / Aggregation for unifying data sources, and Sentiment Analysis for gauging public reaction.`,
90
+
91
+ // Score cards
92
+ `My scores (Relevance / Feasibility / Impact): Computer Vision 5/3/5, NLP 5/4/4, Recommendation Systems 4/3/4, Anomaly Detection 5/3/5, Predictive Analytics 5/2/5, Content Generation 3/4/3, Data Integration 5/5/4, Sentiment Analysis 4/4/4.`,
93
+
94
+ // Confirm top cards
95
+ `I agree with the top cards. Aggregate "Computer Vision" and "Anomaly/Pattern Detection" under "Visual Trend Detection", and "NLP" and "Sentiment Analysis" under "Text-Based Trend Intelligence". The rest keep as individual cards.`,
96
+
97
+ // Map cards to workflow
98
+ `Visual Trend Detection → Social Media Scanning, Celebrity Monitoring, Runway Tracking. Text-Based Trend Intelligence → Social Media Scanning, Celebrity Monitoring. Data Integration → Signal Consolidation. Recommendation Systems → Trend Scoring. Predictive Analytics → Trend Scoring, Design Brief Creation. Content Generation → Design Brief Creation, Designer Feedback Loop.`,
99
+
100
+ // Idea generation
101
+ `Great ideas! My favorites: 1. TrendLens - AI visual analyzer that extracts fashion attributes from social media and celebrity photos in near-real-time. 2. TrendPulse Dashboard - unified real-time dashboard aggregating all trend signals. 3. AutoBrief Generator - AI that creates design briefs with visual mood boards. 4. Celebrity Impact Tracker - correlates celebrity outfits with social engagement and demand. 5. Trend Predictor - estimates trend lifecycle and commercial potential. I'm most excited about ideas 1 and 2.`,
102
+
103
+ // Confirm ideas
104
+ `These idea cards look great. Let's move to the Design phase.`,
105
+ ];
106
+
107
+ const DESIGN_INPUTS = [
108
+ // Refine idea cards
109
+ `For TrendLens, add: Assumptions - we can get Instagram Graph API + TikTok Research API access; Azure Cognitive Services has sufficient fashion-domain accuracy. Data Needed - social media images, celebrity photos, runway images, 6 months historical. For TrendPulse, add: Assumptions - Power BI can be extended or replaced; team will adopt new tool. Data Needed - all source feeds plus historical sales data.`,
110
+
111
+ // Feasibility/Value scores
112
+ `My scores: TrendLens - Feasibility 3, Value 5. TrendPulse Dashboard - Feasibility 4, Value 5. AutoBrief Generator - Feasibility 4, Value 3. Celebrity Impact Tracker - Feasibility 3, Value 4. Trend Predictor - Feasibility 2, Value 5.`,
113
+
114
+ // Impact assessment
115
+ `I agree with the BXT assessment. Additional risks: TrendLens - social media API rate limits and policy changes. TrendPulse - change management, analysts attached to individual tools. Biggest opportunity: combining TrendLens + TrendPulse into one platform could become a SaaS product.`,
116
+
117
+ // Confirm design output
118
+ `The architecture sketch and impact assessment look solid. Let's proceed to Selection.`,
119
+ ];
120
+
121
+ const SELECT_INPUTS = [
122
+ `I agree. I want to proceed with TrendPulse Dashboard with integrated TrendLens — the unified real-time trend intelligence platform that combines visual AI analysis with multi-source signal aggregation. This addresses our core bottleneck and has long-term SaaS potential.`,
123
+ ];
124
+
125
+ const PLAN_INPUTS = [
126
+ `The milestones look good. For the PoC, minimum scope: ingest images from one source (Instagram), extract basic fashion attributes (colors, patterns), display on a simple dashboard with trend frequency chart. Tech stack: Azure Functions backend, Azure Cognitive Services for image analysis, Azure Cosmos DB for trend storage, React frontend. Timeline: 4 weeks with 2 devs + 1 data scientist. Success criteria: process 100+ images, extract 3+ attributes per image with >70% accuracy, dashboard updating hourly.`,
127
+
128
+ `The plan and PoC definition look great. I'm ready to proceed to the Develop phase.`,
129
+ ];
130
+
131
+ const DEVELOP_INPUTS = [
132
+ `For the PoC: Target stack - TypeScript + Node.js backend, React dashboard, Azure Cognitive Services for image analysis. Key scenarios: (1) Ingest Instagram-like image feed, (2) extract fashion attributes using AI vision, (3) aggregate into trend scores, (4) display on real-time dashboard. Constraints: run locally for PoC, use mocked image data if API unavailable. Out of scope: auth, multi-language, production scaling.`,
133
+ ];
134
+
135
+ // All inputs indexed by phase
136
+ const PHASE_INPUTS: Record<string, string[]> = {
137
+ Discover: DISCOVER_INPUTS,
138
+ Ideate: IDEATE_INPUTS,
139
+ Design: DESIGN_INPUTS,
140
+ Select: SELECT_INPUTS,
141
+ Plan: PLAN_INPUTS,
142
+ Develop: DEVELOP_INPUTS,
143
+ };
144
+
145
+ // ── Test Results Collector ────────────────────────────────────────────────────
146
+
147
+ interface PhaseResult {
148
+ phase: string;
149
+ turns: Array<{ role: string; content: string }>;
150
+ events: SofiaEvent[];
151
+ session: Partial<WorkshopSession>;
152
+ durationMs: number;
153
+ errors: string[];
154
+ }
155
+
156
+ interface TestResults {
157
+ startedAt: string;
158
+ completedAt?: string;
159
+ webSearchConfigured: boolean;
160
+ phases: PhaseResult[];
161
+ sessionId?: string;
162
+ finalSession?: WorkshopSession;
163
+ overallError?: string;
164
+ }
165
+
166
+ // ── Helpers ──────────────────────────────────────────────────────────────────
167
+
168
+ function createNewSession(): WorkshopSession {
169
+ const now = new Date().toISOString();
170
+ const pad = (n: number) => String(n).padStart(2, '0');
171
+ const d = new Date();
172
+ const sessionId = `zava-test-${d.getFullYear()}-${pad(d.getMonth() + 1)}-${pad(d.getDate())}_${pad(d.getHours())}${pad(d.getMinutes())}${pad(d.getSeconds())}`;
173
+ return {
174
+ sessionId,
175
+ name: 'Zava Industries Assessment',
176
+ schemaVersion: '1.0.0',
177
+ createdAt: now,
178
+ updatedAt: now,
179
+ phase: 'Discover',
180
+ status: 'Active',
181
+ participants: [],
182
+ artifacts: { generatedFiles: [] },
183
+ turns: [],
184
+ };
185
+ }
186
+
187
+ /**
188
+ * Create a LoopIO that feeds canned inputs and captures all output.
189
+ */
190
+ function createTestIO(inputs: string[]): {
191
+ io: LoopIO;
192
+ output: string[];
193
+ activityLog: string[];
194
+ toolSummaries: Array<{ tool: string; summary: string }>;
195
+ } {
196
+ let inputIdx = 0;
197
+ const output: string[] = [];
198
+ const activityLog: string[] = [];
199
+ const toolSummaries: Array<{ tool: string; summary: string }> = [];
200
+
201
+ const io: LoopIO = {
202
+ write(text: string) {
203
+ output.push(text);
204
+ // Also print to stdout for live observation
205
+ process.stdout.write(text);
206
+ },
207
+ writeActivity(text: string) {
208
+ activityLog.push(text);
209
+ process.stderr.write(` [activity] ${text}\n`);
210
+ },
211
+ writeToolSummary(toolName: string, summary: string) {
212
+ toolSummaries.push({ tool: toolName, summary });
213
+ process.stderr.write(` ✓ ${toolName}: ${summary}\n`);
214
+ },
215
+ async readInput(_prompt?: string): Promise<string | null> {
216
+ if (inputIdx >= inputs.length) {
217
+ // No more inputs — signal "done"
218
+ process.stderr.write(` [io] No more inputs, returning null (done)\n`);
219
+ return null;
220
+ }
221
+ const input = inputs[inputIdx++];
222
+ process.stderr.write(` [io] Input ${inputIdx}/${inputs.length}: ${input.slice(0, 80)}...\n`);
223
+ return input;
224
+ },
225
+ async showDecisionGate(_phase: PhaseValue): Promise<DecisionGateResult> {
226
+ // Always continue to next phase
227
+ process.stderr.write(` [gate] Phase complete → continuing\n`);
228
+ return { choice: 'continue' };
229
+ },
230
+ isJsonMode: false,
231
+ isTTY: false,
232
+ };
233
+
234
+ return { io, output, activityLog, toolSummaries };
235
+ }
236
+
237
+ // ── Test Suite ────────────────────────────────────────────────────────────────
238
+
239
+ describe('Zava Industries — Full Workshop Session', () => {
240
+ let client: CopilotClient;
241
+ let canRun = false;
242
+ let webSearchClient: WebSearchClient | undefined;
243
+ const results: TestResults = {
244
+ startedAt: new Date().toISOString(),
245
+ webSearchConfigured: isWebSearchConfigured(),
246
+ phases: [],
247
+ };
248
+
249
+ beforeAll(async () => {
250
+ try {
251
+ // Load .env manually (the CLI does this, but tests may not)
252
+ const { config } = await import('dotenv');
253
+ config({ path: join(PROJECT_ROOT, '.env') });
254
+
255
+ client = await createCopilotClient();
256
+ canRun = true;
257
+
258
+ // FR-012: Create WebSearchClient when configured (after dotenv loads)
259
+ if (isWebSearchConfigured()) {
260
+ const wsConfig: WebSearchConfig = {
261
+ projectEndpoint: process.env.FOUNDRY_PROJECT_ENDPOINT!,
262
+ modelDeploymentName: process.env.FOUNDRY_MODEL_DEPLOYMENT_NAME!,
263
+ };
264
+ const searchFn = createWebSearchTool(wsConfig);
265
+ webSearchClient = { search: searchFn };
266
+ }
267
+
268
+ // Ensure results directory
269
+ await mkdir(RESULTS_DIR, { recursive: true });
270
+
271
+ console.log('\n╔══════════════════════════════════════════════════════╗');
272
+ console.log('║ Zava Industries — Full Workshop Live Assessment ║');
273
+ console.log(
274
+ '║ Web Search: ' +
275
+ (isWebSearchConfigured() ? 'CONFIGURED ✓' : 'NOT CONFIGURED ✗').padEnd(39) +
276
+ '║',
277
+ );
278
+ console.log('╚══════════════════════════════════════════════════════╝\n');
279
+ } catch (err) {
280
+ console.warn(
281
+ `Skipping live workshop test — Copilot client not available: ${
282
+ err instanceof Error ? err.message : err
283
+ }`,
284
+ );
285
+ }
286
+ }, 60_000);
287
+
288
+ afterAll(async () => {
289
+ results.completedAt = new Date().toISOString();
290
+
291
+ // Write results JSON
292
+ try {
293
+ await writeFile(
294
+ join(RESULTS_DIR, 'test-results.json'),
295
+ JSON.stringify(results, null, 2),
296
+ 'utf-8',
297
+ );
298
+ console.log(`\nResults saved to tests/e2e/zava-assessment/results/test-results.json`);
299
+ } catch (err) {
300
+ console.error('Failed to save results:', err);
301
+ }
302
+ });
303
+
304
+ // ── Phase-by-phase tests ──────────────────────────────────────────────
305
+
306
+ it('runs the full workshop: Discover → Ideate → Design → Select → Plan → Develop', async () => {
307
+ if (!canRun) {
308
+ console.warn('SKIPPED: Copilot SDK not available');
309
+ return;
310
+ }
311
+
312
+ const store = createDefaultStore();
313
+ let session = createNewSession();
314
+ await store.save(session);
315
+ results.sessionId = session.sessionId;
316
+
317
+ console.log(`Session ID: ${session.sessionId}\n`);
318
+
319
+ const phases = getPhaseOrder(); // ['Discover','Ideate','Design','Select','Plan','Develop']
320
+
321
+ for (const phase of phases) {
322
+ console.log(`\n${'═'.repeat(60)}`);
323
+ console.log(` PHASE: ${phase}`);
324
+ console.log(`${'═'.repeat(60)}\n`);
325
+
326
+ const phaseStart = Date.now();
327
+ const events: SofiaEvent[] = [];
328
+ const errors: string[] = [];
329
+
330
+ // Get canned inputs for this phase
331
+ const inputs = PHASE_INPUTS[phase] ?? [];
332
+ const { io, output: _output, activityLog: _activityLog, toolSummaries: _toolSummaries } = createTestIO(inputs);
333
+
334
+ try {
335
+ // Create and preload the handler (pass webSearchClient for Discover enrichment)
336
+ const handlerConfig: PhaseHandlerConfig = {
337
+ discover: {
338
+ io,
339
+ webSearchClient,
340
+ },
341
+ webSearchClient,
342
+ };
343
+ const handler = createPhaseHandler(phase as PhaseValue, handlerConfig);
344
+ await handler._preload();
345
+
346
+ // Update session phase
347
+ session.phase = phase as PhaseValue;
348
+ session.updatedAt = new Date().toISOString();
349
+ await store.save(session);
350
+
351
+ // Generate initial message
352
+ const initialMessage = handler.getInitialMessage?.(session);
353
+
354
+ // Create and run the conversation loop
355
+ const loop = new ConversationLoop({
356
+ client,
357
+ io,
358
+ session,
359
+ phaseHandler: handler,
360
+ initialMessage,
361
+ onEvent: (e) => events.push(e),
362
+ onSessionUpdate: async (updatedSession) => {
363
+ session = updatedSession;
364
+ await store.save(session);
365
+ },
366
+ });
367
+
368
+ session = await loop.run();
369
+ session.updatedAt = new Date().toISOString();
370
+ await store.save(session);
371
+ } catch (err) {
372
+ const msg = err instanceof Error ? err.message : String(err);
373
+ errors.push(msg);
374
+ console.error(`\n ✗ Phase ${phase} error: ${msg}\n`);
375
+ }
376
+
377
+ const durationMs = Date.now() - phaseStart;
378
+
379
+ // Collect phase result
380
+ const phaseTurns = (session.turns ?? [])
381
+ .filter((t) => t.phase === phase)
382
+ .map((t) => ({ role: t.role, content: t.content }));
383
+
384
+ const phaseResult: PhaseResult = {
385
+ phase,
386
+ turns: phaseTurns,
387
+ events,
388
+ session: {
389
+ businessContext: session.businessContext,
390
+ workflow: session.workflow,
391
+ ideas: session.ideas,
392
+ evaluation: session.evaluation,
393
+ selection: session.selection,
394
+ plan: session.plan,
395
+ poc: session.poc,
396
+ name: session.name,
397
+ discovery: (session as Record<string, unknown>).discovery as WorkshopSession['discovery'],
398
+ } as Partial<WorkshopSession>,
399
+ durationMs,
400
+ errors,
401
+ };
402
+
403
+ results.phases.push(phaseResult);
404
+
405
+ console.log(`\n Phase ${phase} completed in ${(durationMs / 1000).toFixed(1)}s`);
406
+ console.log(` Turns: ${phaseTurns.length}`);
407
+ console.log(` Events: ${events.length}`);
408
+ console.log(` Errors: ${errors.length}`);
409
+
410
+ // Phase-specific assertions
411
+ if (phase === 'Discover') {
412
+ console.log(` businessContext: ${session.businessContext ? '✓' : '✗'}`);
413
+ console.log(` name: ${session.name ?? '(not set)'}`);
414
+ console.log(
415
+ ` discovery.enrichment: ${(session as Record<string, unknown>).discovery ? '✓' : '✗'}`,
416
+ );
417
+ } else if (phase === 'Ideate') {
418
+ console.log(` ideas: ${session.ideas?.length ?? 0} ideas captured`);
419
+ } else if (phase === 'Design') {
420
+ console.log(` evaluation: ${session.evaluation ? '✓' : '✗'}`);
421
+ } else if (phase === 'Select') {
422
+ console.log(` selection: ${session.selection ? '✓' : '✗'}`);
423
+ console.log(` confirmedByUser: ${session.selection?.confirmedByUser ?? false}`);
424
+ } else if (phase === 'Plan') {
425
+ console.log(` plan: ${session.plan ? '✓' : '✗'}`);
426
+ console.log(` milestones: ${session.plan?.milestones?.length ?? 0}`);
427
+ } else if (phase === 'Develop') {
428
+ console.log(` poc: ${session.poc ? '✓' : '✗'}`);
429
+ }
430
+
431
+ // Save intermediate results after each phase
432
+ await writeFile(
433
+ join(RESULTS_DIR, 'test-results.json'),
434
+ JSON.stringify(results, null, 2),
435
+ 'utf-8',
436
+ );
437
+ }
438
+
439
+ // Store final session
440
+ results.finalSession = session;
441
+
442
+ // Write the full session JSON for inspection
443
+ await writeFile(
444
+ join(RESULTS_DIR, 'final-session.json'),
445
+ JSON.stringify(session, null, 2),
446
+ 'utf-8',
447
+ );
448
+
449
+ console.log(`\n${'═'.repeat(60)}`);
450
+ console.log(' WORKSHOP COMPLETE');
451
+ console.log(`${'═'.repeat(60)}`);
452
+ console.log(` Session: ${session.sessionId}`);
453
+ console.log(` Status: ${session.status}`);
454
+ console.log(` Phase: ${session.phase}`);
455
+ console.log(` Total turns: ${session.turns?.length ?? 0}`);
456
+
457
+ // ── Assertions ──────────────────────────────────────────────────────
458
+
459
+ // Basic phase progression
460
+ expect(session.businessContext).toBeTruthy();
461
+ expect(session.name).toBeTruthy();
462
+
463
+ // Ideas generated
464
+ expect(session.ideas).toBeTruthy();
465
+ expect(session.ideas!.length).toBeGreaterThan(0);
466
+
467
+ // Selection made
468
+ // (May or may not have been extracted — depends on LLM output format)
469
+ if (session.selection) {
470
+ console.log(` Selected idea: ${session.selection.ideaId}`);
471
+ }
472
+
473
+ // Plan generated
474
+ if (session.plan) {
475
+ console.log(` Plan milestones: ${session.plan.milestones.length}`);
476
+ }
477
+
478
+ console.log('\n All phases completed successfully ✓\n');
479
+ }, 3600_000); // 1-hour overall timeout
480
+
481
+ // ── Export Test ────────────────────────────────────────────────────────
482
+
483
+ it('exports artifacts after workshop completion', async () => {
484
+ if (!canRun || !results.sessionId) {
485
+ console.warn('SKIPPED: No session to export');
486
+ return;
487
+ }
488
+
489
+ const { exportSession } = await import('../../src/sessions/exportWriter.js');
490
+ const store = createDefaultStore();
491
+ const session = await store.load(results.sessionId);
492
+ const exportDir = join(RESULTS_DIR, 'export');
493
+
494
+ await mkdir(exportDir, { recursive: true });
495
+ await exportSession(session, exportDir);
496
+
497
+ // Verify export files exist
498
+ const fs = await import('node:fs/promises');
499
+ const files = await fs.readdir(exportDir);
500
+
501
+ console.log(`\n Export files: ${files.join(', ')}`);
502
+
503
+ expect(files).toContain('summary.json');
504
+ expect(files.some((f) => f.endsWith('.md'))).toBe(true);
505
+
506
+ // Read and validate summary.json
507
+ const summaryRaw = await readFile(join(exportDir, 'summary.json'), 'utf-8');
508
+ const summary = JSON.parse(summaryRaw);
509
+ expect(summary.sessionId).toBe(results.sessionId);
510
+ expect(summary.files).toBeTruthy();
511
+ expect(summary.files.length).toBeGreaterThan(0);
512
+
513
+ console.log(` Export highlights: ${summary.highlights?.join('; ') ?? 'none'}`);
514
+ }, 30_000);
515
+ });
@@ -0,0 +1,5 @@
1
+ import { join } from 'node:path';
2
+
3
+ import { loadEnvFile } from '../../src/cli/envLoader.js';
4
+
5
+ loadEnvFile(join(process.cwd(), '.env'));