sofia-cli 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/agents/copilot-instructions.md +39 -0
- package/.github/agents/speckit.analyze.agent.md +184 -0
- package/.github/agents/speckit.checklist.agent.md +294 -0
- package/.github/agents/speckit.clarify.agent.md +181 -0
- package/.github/agents/speckit.constitution.agent.md +84 -0
- package/.github/agents/speckit.implement.agent.md +135 -0
- package/.github/agents/speckit.plan.agent.md +90 -0
- package/.github/agents/speckit.specify.agent.md +258 -0
- package/.github/agents/speckit.tasks.agent.md +137 -0
- package/.github/agents/speckit.taskstoissues.agent.md +30 -0
- package/.github/copilot-instructions.md +257 -0
- package/.github/prompts/speckit.analyze.prompt.md +3 -0
- package/.github/prompts/speckit.checklist.prompt.md +3 -0
- package/.github/prompts/speckit.clarify.prompt.md +3 -0
- package/.github/prompts/speckit.constitution.prompt.md +3 -0
- package/.github/prompts/speckit.implement.prompt.md +3 -0
- package/.github/prompts/speckit.plan.prompt.md +3 -0
- package/.github/prompts/speckit.specify.prompt.md +3 -0
- package/.github/prompts/speckit.tasks.prompt.md +3 -0
- package/.github/prompts/speckit.taskstoissues.prompt.md +3 -0
- package/.github/workflows/ci.yml +38 -0
- package/.prettierrc +6 -0
- package/.specify/memory/constitution.md +181 -0
- package/.specify/scripts/bash/check-prerequisites.sh +166 -0
- package/.specify/scripts/bash/common.sh +156 -0
- package/.specify/scripts/bash/create-new-feature.sh +297 -0
- package/.specify/scripts/bash/setup-plan.sh +61 -0
- package/.specify/scripts/bash/update-agent-context.sh +810 -0
- package/.specify/templates/agent-file-template.md +28 -0
- package/.specify/templates/checklist-template.md +40 -0
- package/.specify/templates/constitution-template.md +50 -0
- package/.specify/templates/plan-template.md +113 -0
- package/.specify/templates/spec-template.md +115 -0
- package/.specify/templates/tasks-template.md +251 -0
- package/.vscode/mcp.json +42 -0
- package/.vscode/settings.json +19 -0
- package/CODE_OF_CONDUCT.md +128 -0
- package/LICENSE +21 -0
- package/README.md +213 -0
- package/dist/src/cli/developCommand.js +240 -0
- package/dist/src/cli/directCommands.js +143 -0
- package/dist/src/cli/envLoader.js +16 -0
- package/dist/src/cli/exportCommand.js +53 -0
- package/dist/src/cli/index.js +203 -0
- package/dist/src/cli/ioContext.js +109 -0
- package/dist/src/cli/preflight.js +57 -0
- package/dist/src/cli/statusCommand.js +110 -0
- package/dist/src/cli/workshopCommand.js +400 -0
- package/dist/src/develop/checkpointState.js +86 -0
- package/dist/src/develop/codeGenerator.js +319 -0
- package/dist/src/develop/dynamicScaffolder.js +226 -0
- package/dist/src/develop/githubMcpAdapter.js +122 -0
- package/dist/src/develop/index.js +15 -0
- package/dist/src/develop/mcpContextEnricher.js +195 -0
- package/dist/src/develop/pocScaffolder.js +542 -0
- package/dist/src/develop/ralphLoop.js +659 -0
- package/dist/src/develop/templateRegistry.js +364 -0
- package/dist/src/develop/testRunner.js +202 -0
- package/dist/src/logging/logger.js +58 -0
- package/dist/src/loop/conversationLoop.js +227 -0
- package/dist/src/loop/phaseSummarizer.js +87 -0
- package/dist/src/mcp/mcpManager.js +267 -0
- package/dist/src/mcp/mcpTransport.js +391 -0
- package/dist/src/mcp/retryPolicy.js +47 -0
- package/dist/src/mcp/webSearch.js +254 -0
- package/dist/src/phases/contextSummarizer.js +101 -0
- package/dist/src/phases/discoveryEnricher.js +156 -0
- package/dist/src/phases/phaseExtractors.js +222 -0
- package/dist/src/phases/phaseHandlers.js +328 -0
- package/dist/src/prompts/design.md +51 -0
- package/dist/src/prompts/develop-boundary.md +51 -0
- package/dist/src/prompts/develop.md +111 -0
- package/dist/src/prompts/discover.md +58 -0
- package/dist/src/prompts/ideate.md +56 -0
- package/dist/src/prompts/plan.md +51 -0
- package/dist/src/prompts/promptLoader.js +167 -0
- package/dist/src/prompts/promptLoader.ts +198 -0
- package/dist/src/prompts/select.md +47 -0
- package/dist/src/prompts/summarize/README.md +8 -0
- package/dist/src/prompts/summarize/design-summary.md +37 -0
- package/dist/src/prompts/summarize/develop-summary.md +25 -0
- package/dist/src/prompts/summarize/ideate-summary.md +27 -0
- package/dist/src/prompts/summarize/plan-summary.md +27 -0
- package/dist/src/prompts/summarize/select-summary.md +21 -0
- package/dist/src/prompts/system.md +28 -0
- package/dist/src/sessions/exportPaths.js +22 -0
- package/dist/src/sessions/exportWriter.js +406 -0
- package/dist/src/sessions/sessionManager.js +81 -0
- package/dist/src/sessions/sessionStore.js +65 -0
- package/dist/src/shared/activitySpinner.js +91 -0
- package/dist/src/shared/copilotClient.js +129 -0
- package/dist/src/shared/data/cards.json +1249 -0
- package/dist/src/shared/data/cardsLoader.js +51 -0
- package/dist/src/shared/errorClassifier.js +120 -0
- package/dist/src/shared/events.js +28 -0
- package/dist/src/shared/markdownRenderer.js +34 -0
- package/dist/src/shared/schemas/session.js +265 -0
- package/dist/src/shared/tableRenderer.js +20 -0
- package/dist/src/vendor/chalk.js +2 -0
- package/dist/src/vendor/cli-table3.js +3 -0
- package/dist/src/vendor/commander.js +2 -0
- package/dist/src/vendor/marked-terminal.js +3 -0
- package/dist/src/vendor/marked.js +2 -0
- package/dist/src/vendor/ora.js +2 -0
- package/dist/src/vendor/pino.js +2 -0
- package/dist/src/vendor/zod.js +2 -0
- package/dist/tests/e2e/developE2e.spec.js +126 -0
- package/dist/tests/e2e/developFailureE2e.spec.js +247 -0
- package/dist/tests/e2e/developPty.spec.js +75 -0
- package/dist/tests/e2e/discoveryWebSearchRelevance.spec.js +84 -0
- package/dist/tests/e2e/harness.spec.js +83 -0
- package/dist/tests/e2e/mcpLive.spec.js +120 -0
- package/dist/tests/e2e/newSession.e2e.spec.js +177 -0
- package/dist/tests/e2e/ralphLoopEnrichmentComparison.spec.js +62 -0
- package/dist/tests/e2e/workiqEnrichment.spec.js +56 -0
- package/dist/tests/e2e/zavaSimulation.spec.js +452 -0
- package/dist/tests/fixtures/test-fixture-project/src/add.js +3 -0
- package/dist/tests/fixtures/test-fixture-project/tests/failing.test.js +6 -0
- package/dist/tests/fixtures/test-fixture-project/tests/hanging.test.js +8 -0
- package/dist/tests/fixtures/test-fixture-project/tests/passing.test.js +10 -0
- package/dist/tests/fixtures/test-fixture-project/vitest.config.js +6 -0
- package/dist/tests/integration/autoStartConversation.spec.js +138 -0
- package/dist/tests/integration/defaultCommand.spec.js +147 -0
- package/dist/tests/integration/directCommandNonTty.spec.js +224 -0
- package/dist/tests/integration/directCommandTty.spec.js +151 -0
- package/dist/tests/integration/discoveryEnrichmentFlow.spec.js +175 -0
- package/dist/tests/integration/exportArtifacts.spec.js +202 -0
- package/dist/tests/integration/exportFallbackFlow.spec.js +99 -0
- package/dist/tests/integration/mcpDegradationFlow.spec.js +190 -0
- package/dist/tests/integration/mcpTransportFlow.spec.js +139 -0
- package/dist/tests/integration/newSessionFlow.spec.js +343 -0
- package/dist/tests/integration/pocGithubMcp.spec.js +186 -0
- package/dist/tests/integration/pocLocalFallback.spec.js +171 -0
- package/dist/tests/integration/pocScaffold.spec.js +163 -0
- package/dist/tests/integration/ralphLoopFlow.spec.js +359 -0
- package/dist/tests/integration/ralphLoopPartial.spec.js +368 -0
- package/dist/tests/integration/resumeAndBacktrack.spec.js +247 -0
- package/dist/tests/integration/spinnerLifecycle.spec.js +220 -0
- package/dist/tests/integration/summarizationFlow.spec.js +115 -0
- package/dist/tests/integration/testRunnerReal.spec.js +52 -0
- package/dist/tests/integration/webSearchAgent.spec.js +128 -0
- package/dist/tests/live/copilotSdkLive.spec.js +107 -0
- package/dist/tests/live/zavaFullWorkshop.spec.js +392 -0
- package/dist/tests/setup/loadEnv.js +3 -0
- package/dist/tests/unit/cli/developCommand.spec.js +567 -0
- package/dist/tests/unit/cli/directCommands.spec.js +279 -0
- package/dist/tests/unit/cli/envLoader.spec.js +58 -0
- package/dist/tests/unit/cli/ioContext.spec.js +119 -0
- package/dist/tests/unit/cli/preflight.spec.js +108 -0
- package/dist/tests/unit/cli/statusCommand.spec.js +111 -0
- package/dist/tests/unit/cli/workshopClientFallback.spec.js +80 -0
- package/dist/tests/unit/cli/workshopCommand.spec.js +329 -0
- package/dist/tests/unit/config/vitestEnvSetup.spec.js +13 -0
- package/dist/tests/unit/develop/checkpointState.spec.js +315 -0
- package/dist/tests/unit/develop/codeGenerator.spec.js +355 -0
- package/dist/tests/unit/develop/githubMcpAdapter.spec.js +231 -0
- package/dist/tests/unit/develop/mcpContextEnricher.spec.js +433 -0
- package/dist/tests/unit/develop/outputValidator.spec.js +119 -0
- package/dist/tests/unit/develop/pocScaffolder.spec.js +353 -0
- package/dist/tests/unit/develop/ralphLoop.spec.js +1248 -0
- package/dist/tests/unit/develop/templateRegistry.spec.js +85 -0
- package/dist/tests/unit/develop/testRunner.spec.js +249 -0
- package/dist/tests/unit/infraBicep.spec.js +92 -0
- package/dist/tests/unit/infraDeploy.spec.js +82 -0
- package/dist/tests/unit/infraTeardown.spec.js +63 -0
- package/dist/tests/unit/logging/logger.spec.js +43 -0
- package/dist/tests/unit/loop/conversationLoop.spec.js +592 -0
- package/dist/tests/unit/loop/phaseSummarizer.spec.js +141 -0
- package/dist/tests/unit/loop/streamingMarkdown.spec.js +147 -0
- package/dist/tests/unit/mcp/mcpManager.spec.js +279 -0
- package/dist/tests/unit/mcp/mcpTransport.spec.js +529 -0
- package/dist/tests/unit/mcp/retryPolicy.spec.js +218 -0
- package/dist/tests/unit/mcp/timeoutValidation.spec.js +46 -0
- package/dist/tests/unit/mcp/webSearch.spec.js +567 -0
- package/dist/tests/unit/phases/contextSummarizer.spec.js +140 -0
- package/dist/tests/unit/phases/discoveryEnricher.repeatCalls.spec.js +93 -0
- package/dist/tests/unit/phases/discoveryEnricher.spec.js +411 -0
- package/dist/tests/unit/phases/phaseExtractors.spec.js +352 -0
- package/dist/tests/unit/phases/phaseHandlers.spec.js +425 -0
- package/dist/tests/unit/prompts/promptLoader.spec.js +118 -0
- package/dist/tests/unit/schemas/pocSchemas.spec.js +412 -0
- package/dist/tests/unit/schemas/session.spec.js +257 -0
- package/dist/tests/unit/sessions/exportPaths.spec.js +31 -0
- package/dist/tests/unit/sessions/exportWriter.spec.js +655 -0
- package/dist/tests/unit/sessions/sessionManager.spec.js +151 -0
- package/dist/tests/unit/sessions/sessionStore.spec.js +116 -0
- package/dist/tests/unit/shared/activitySpinner.spec.js +175 -0
- package/dist/tests/unit/shared/cardsLoader.spec.js +76 -0
- package/dist/tests/unit/shared/copilotClient.spec.js +155 -0
- package/dist/tests/unit/shared/errorClassifier.spec.js +131 -0
- package/dist/tests/unit/shared/events.spec.js +55 -0
- package/dist/tests/unit/shared/markdownRenderer.spec.js +35 -0
- package/dist/tests/unit/shared/markdownRendererChunks.spec.js +70 -0
- package/dist/tests/unit/shared/tableRenderer.spec.js +34 -0
- package/dist/vitest.config.js +14 -0
- package/dist/vitest.live.config.js +18 -0
- package/docs/README.md +35 -0
- package/docs/architecture.md +169 -0
- package/docs/cli-usage.md +207 -0
- package/docs/environment.md +66 -0
- package/docs/export-format.md +146 -0
- package/docs/session-model.md +113 -0
- package/eslint.config.js +35 -0
- package/infra/deploy.sh +193 -0
- package/infra/gather-env.sh +211 -0
- package/infra/main.bicep +90 -0
- package/infra/main.bicepparam +18 -0
- package/infra/resources.bicep +134 -0
- package/infra/teardown.sh +114 -0
- package/package.json +63 -0
- package/specs/001-cli-workshop-rebuild/checklists/requirements.md +35 -0
- package/specs/001-cli-workshop-rebuild/contracts/cli.md +59 -0
- package/specs/001-cli-workshop-rebuild/contracts/export-summary-json.md +23 -0
- package/specs/001-cli-workshop-rebuild/contracts/session-json.md +30 -0
- package/specs/001-cli-workshop-rebuild/data-model.md +210 -0
- package/specs/001-cli-workshop-rebuild/plan.md +361 -0
- package/specs/001-cli-workshop-rebuild/quickstart.md +83 -0
- package/specs/001-cli-workshop-rebuild/research.md +116 -0
- package/specs/001-cli-workshop-rebuild/spec.md +240 -0
- package/specs/001-cli-workshop-rebuild/tasks.md +476 -0
- package/specs/002-poc-generation/contracts/poc-output.md +172 -0
- package/specs/002-poc-generation/contracts/ralph-loop.md +113 -0
- package/specs/002-poc-generation/data-model.md +172 -0
- package/specs/002-poc-generation/plan.md +109 -0
- package/specs/002-poc-generation/quickstart.md +97 -0
- package/specs/002-poc-generation/research.md +786 -0
- package/specs/002-poc-generation/spec.md +81 -0
- package/specs/002-poc-generation/tasks-fix.md +198 -0
- package/specs/002-poc-generation/tasks.md +252 -0
- package/specs/003-mcp-transport-integration/checklists/requirements.md +37 -0
- package/specs/003-mcp-transport-integration/contracts/context-enricher.md +220 -0
- package/specs/003-mcp-transport-integration/contracts/discovery-enricher.md +267 -0
- package/specs/003-mcp-transport-integration/contracts/github-adapter.md +149 -0
- package/specs/003-mcp-transport-integration/contracts/mcp-transport.md +288 -0
- package/specs/003-mcp-transport-integration/data-model.md +326 -0
- package/specs/003-mcp-transport-integration/plan.md +114 -0
- package/specs/003-mcp-transport-integration/quickstart.md +311 -0
- package/specs/003-mcp-transport-integration/research.md +395 -0
- package/specs/003-mcp-transport-integration/spec.md +234 -0
- package/specs/003-mcp-transport-integration/tasks.md +324 -0
- package/specs/003-next-spec-gaps.md +150 -0
- package/specs/004-dev-resume-hardening/checklists/requirements.md +37 -0
- package/specs/004-dev-resume-hardening/contracts/cli.md +160 -0
- package/specs/004-dev-resume-hardening/data-model.md +321 -0
- package/specs/004-dev-resume-hardening/plan.md +107 -0
- package/specs/004-dev-resume-hardening/quickstart.md +115 -0
- package/specs/004-dev-resume-hardening/research.md +142 -0
- package/specs/004-dev-resume-hardening/spec.md +221 -0
- package/specs/004-dev-resume-hardening/tasks.md +333 -0
- package/specs/005-ai-search-deploy/checklists/requirements.md +39 -0
- package/specs/005-ai-search-deploy/contracts/web-search-tool.md +241 -0
- package/specs/005-ai-search-deploy/data-model.md +130 -0
- package/specs/005-ai-search-deploy/plan.md +93 -0
- package/specs/005-ai-search-deploy/quickstart.md +96 -0
- package/specs/005-ai-search-deploy/research.md +187 -0
- package/specs/005-ai-search-deploy/spec.md +143 -0
- package/specs/005-ai-search-deploy/tasks.md +284 -0
- package/specs/006-workshop-extraction-fixes/checklists/requirements.md +61 -0
- package/specs/006-workshop-extraction-fixes/contracts/summarization-and-export.md +131 -0
- package/specs/006-workshop-extraction-fixes/data-model.md +149 -0
- package/specs/006-workshop-extraction-fixes/plan.md +123 -0
- package/specs/006-workshop-extraction-fixes/quickstart.md +101 -0
- package/specs/006-workshop-extraction-fixes/research.md +143 -0
- package/specs/006-workshop-extraction-fixes/spec.md +210 -0
- package/specs/006-workshop-extraction-fixes/tasks.md +316 -0
- package/src/cli/developCommand.ts +308 -0
- package/src/cli/directCommands.ts +195 -0
- package/src/cli/envLoader.ts +17 -0
- package/src/cli/exportCommand.ts +65 -0
- package/src/cli/index.ts +249 -0
- package/src/cli/ioContext.ts +139 -0
- package/src/cli/preflight.ts +86 -0
- package/src/cli/statusCommand.ts +118 -0
- package/src/cli/workshopCommand.ts +496 -0
- package/src/develop/checkpointState.ts +121 -0
- package/src/develop/codeGenerator.ts +402 -0
- package/src/develop/dynamicScaffolder.ts +284 -0
- package/src/develop/githubMcpAdapter.ts +199 -0
- package/src/develop/index.ts +34 -0
- package/src/develop/mcpContextEnricher.ts +279 -0
- package/src/develop/pocScaffolder.ts +646 -0
- package/src/develop/ralphLoop.ts +1044 -0
- package/src/develop/templateRegistry.ts +427 -0
- package/src/develop/testRunner.ts +276 -0
- package/src/logging/logger.ts +73 -0
- package/src/loop/conversationLoop.ts +355 -0
- package/src/loop/phaseSummarizer.ts +114 -0
- package/src/mcp/mcpManager.ts +365 -0
- package/src/mcp/mcpTransport.ts +562 -0
- package/src/mcp/retryPolicy.ts +87 -0
- package/src/mcp/webSearch.ts +388 -0
- package/src/originalPrompts/design_thinking.md +178 -0
- package/src/originalPrompts/design_thinking_persona.md +76 -0
- package/src/originalPrompts/document_generator_example.md +77 -0
- package/src/originalPrompts/document_generator_persona.md +47 -0
- package/src/originalPrompts/facilitator_persona.md +125 -0
- package/src/originalPrompts/guardrails.md +47 -0
- package/src/phases/contextSummarizer.ts +154 -0
- package/src/phases/discoveryEnricher.ts +223 -0
- package/src/phases/phaseExtractors.ts +247 -0
- package/src/phases/phaseHandlers.ts +450 -0
- package/src/prompts/design.md +51 -0
- package/src/prompts/develop-boundary.md +51 -0
- package/src/prompts/develop.md +111 -0
- package/src/prompts/discover.md +58 -0
- package/src/prompts/ideate.md +56 -0
- package/src/prompts/plan.md +51 -0
- package/src/prompts/promptLoader.ts +198 -0
- package/src/prompts/select.md +47 -0
- package/src/prompts/summarize/README.md +8 -0
- package/src/prompts/summarize/design-summary.md +37 -0
- package/src/prompts/summarize/develop-summary.md +25 -0
- package/src/prompts/summarize/ideate-summary.md +27 -0
- package/src/prompts/summarize/plan-summary.md +27 -0
- package/src/prompts/summarize/select-summary.md +21 -0
- package/src/prompts/system.md +28 -0
- package/src/sessions/exportPaths.ts +28 -0
- package/src/sessions/exportWriter.ts +490 -0
- package/src/sessions/sessionManager.ts +119 -0
- package/src/sessions/sessionStore.ts +69 -0
- package/src/shared/activitySpinner.ts +108 -0
- package/src/shared/copilotClient.ts +291 -0
- package/src/shared/data/cards.json +1249 -0
- package/src/shared/data/cardsLoader.ts +70 -0
- package/src/shared/errorClassifier.ts +160 -0
- package/src/shared/events.ts +103 -0
- package/src/shared/markdownRenderer.ts +44 -0
- package/src/shared/schemas/session.ts +346 -0
- package/src/shared/tableRenderer.ts +28 -0
- package/src/types/marked-terminal.d.ts +5 -0
- package/src/vendor/chalk.ts +2 -0
- package/src/vendor/cli-table3.ts +3 -0
- package/src/vendor/commander.ts +2 -0
- package/src/vendor/marked-terminal.ts +3 -0
- package/src/vendor/marked.ts +2 -0
- package/src/vendor/ora.ts +2 -0
- package/src/vendor/pino.ts +3 -0
- package/src/vendor/zod.ts +3 -0
- package/tests/e2e/developE2e.spec.ts +152 -0
- package/tests/e2e/developFailureE2e.spec.ts +289 -0
- package/tests/e2e/developPty.spec.ts +86 -0
- package/tests/e2e/discoveryWebSearchRelevance.spec.ts +103 -0
- package/tests/e2e/harness.spec.ts +104 -0
- package/tests/e2e/mcpLive.spec.ts +149 -0
- package/tests/e2e/newSession.e2e.spec.ts +245 -0
- package/tests/e2e/ralphLoopEnrichmentComparison.spec.ts +70 -0
- package/tests/e2e/workiqEnrichment.spec.ts +72 -0
- package/tests/e2e/zava-assessment/agent-interaction-script.md +258 -0
- package/tests/e2e/zava-assessment/company-profile.md +98 -0
- package/tests/e2e/zava-assessment/expected-results-checklist.md +454 -0
- package/tests/e2e/zavaSimulation.spec.ts +511 -0
- package/tests/fixtures/completedSession.json +141 -0
- package/tests/fixtures/test-fixture-project/package-lock.json +1585 -0
- package/tests/fixtures/test-fixture-project/package.json +12 -0
- package/tests/fixtures/test-fixture-project/src/add.ts +3 -0
- package/tests/fixtures/test-fixture-project/tests/failing.test.ts +7 -0
- package/tests/fixtures/test-fixture-project/tests/hanging.test.ts +9 -0
- package/tests/fixtures/test-fixture-project/tests/passing.test.ts +13 -0
- package/tests/fixtures/test-fixture-project/vitest.config.ts +7 -0
- package/tests/integration/autoStartConversation.spec.ts +168 -0
- package/tests/integration/defaultCommand.spec.ts +179 -0
- package/tests/integration/directCommandNonTty.spec.ts +260 -0
- package/tests/integration/directCommandTty.spec.ts +185 -0
- package/tests/integration/discoveryEnrichmentFlow.spec.ts +209 -0
- package/tests/integration/exportArtifacts.spec.ts +232 -0
- package/tests/integration/exportFallbackFlow.spec.ts +115 -0
- package/tests/integration/mcpDegradationFlow.spec.ts +231 -0
- package/tests/integration/mcpTransportFlow.spec.ts +178 -0
- package/tests/integration/newSessionFlow.spec.ts +406 -0
- package/tests/integration/pocGithubMcp.spec.ts +224 -0
- package/tests/integration/pocLocalFallback.spec.ts +205 -0
- package/tests/integration/pocScaffold.spec.ts +220 -0
- package/tests/integration/ralphLoopFlow.spec.ts +430 -0
- package/tests/integration/ralphLoopPartial.spec.ts +416 -0
- package/tests/integration/resumeAndBacktrack.spec.ts +278 -0
- package/tests/integration/spinnerLifecycle.spec.ts +270 -0
- package/tests/integration/summarizationFlow.spec.ts +135 -0
- package/tests/integration/testRunnerReal.spec.ts +63 -0
- package/tests/integration/webSearchAgent.spec.ts +155 -0
- package/tests/live/copilotSdkLive.spec.ts +149 -0
- package/tests/live/zavaFullWorkshop.spec.ts +515 -0
- package/tests/setup/loadEnv.ts +5 -0
- package/tests/unit/cli/developCommand.spec.ts +679 -0
- package/tests/unit/cli/directCommands.spec.ts +325 -0
- package/tests/unit/cli/envLoader.spec.ts +73 -0
- package/tests/unit/cli/ioContext.spec.ts +148 -0
- package/tests/unit/cli/preflight.spec.ts +125 -0
- package/tests/unit/cli/statusCommand.spec.ts +134 -0
- package/tests/unit/cli/workshopClientFallback.spec.ts +100 -0
- package/tests/unit/cli/workshopCommand.spec.ts +378 -0
- package/tests/unit/config/vitestEnvSetup.spec.ts +24 -0
- package/tests/unit/develop/checkpointState.spec.ts +378 -0
- package/tests/unit/develop/codeGenerator.spec.ts +447 -0
- package/tests/unit/develop/githubMcpAdapter.spec.ts +283 -0
- package/tests/unit/develop/mcpContextEnricher.spec.ts +564 -0
- package/tests/unit/develop/outputValidator.spec.ts +134 -0
- package/tests/unit/develop/pocScaffolder.spec.ts +451 -0
- package/tests/unit/develop/ralphLoop.spec.ts +1439 -0
- package/tests/unit/develop/templateRegistry.spec.ts +106 -0
- package/tests/unit/develop/testRunner.spec.ts +294 -0
- package/tests/unit/infraBicep.spec.ts +116 -0
- package/tests/unit/infraDeploy.spec.ts +102 -0
- package/tests/unit/infraTeardown.spec.ts +77 -0
- package/tests/unit/logging/logger.spec.ts +50 -0
- package/tests/unit/loop/conversationLoop.spec.ts +719 -0
- package/tests/unit/loop/phaseSummarizer.spec.ts +169 -0
- package/tests/unit/loop/streamingMarkdown.spec.ts +180 -0
- package/tests/unit/mcp/mcpManager.spec.ts +336 -0
- package/tests/unit/mcp/mcpTransport.spec.ts +689 -0
- package/tests/unit/mcp/retryPolicy.spec.ts +278 -0
- package/tests/unit/mcp/timeoutValidation.spec.ts +55 -0
- package/tests/unit/mcp/webSearch.spec.ts +718 -0
- package/tests/unit/phases/contextSummarizer.spec.ts +158 -0
- package/tests/unit/phases/discoveryEnricher.repeatCalls.spec.ts +125 -0
- package/tests/unit/phases/discoveryEnricher.spec.ts +512 -0
- package/tests/unit/phases/phaseExtractors.spec.ts +406 -0
- package/tests/unit/phases/phaseHandlers.spec.ts +483 -0
- package/tests/unit/prompts/promptLoader.spec.ts +144 -0
- package/tests/unit/schemas/pocSchemas.spec.ts +457 -0
- package/tests/unit/schemas/session.spec.ts +328 -0
- package/tests/unit/sessions/exportPaths.spec.ts +38 -0
- package/tests/unit/sessions/exportWriter.spec.ts +737 -0
- package/tests/unit/sessions/sessionManager.spec.ts +174 -0
- package/tests/unit/sessions/sessionStore.spec.ts +136 -0
- package/tests/unit/shared/activitySpinner.spec.ts +211 -0
- package/tests/unit/shared/cardsLoader.spec.ts +89 -0
- package/tests/unit/shared/copilotClient.spec.ts +185 -0
- package/tests/unit/shared/errorClassifier.spec.ts +152 -0
- package/tests/unit/shared/events.spec.ts +71 -0
- package/tests/unit/shared/markdownRenderer.spec.ts +42 -0
- package/tests/unit/shared/markdownRendererChunks.spec.ts +83 -0
- package/tests/unit/shared/tableRenderer.spec.ts +38 -0
- package/tsconfig.json +20 -0
- package/vitest.config.ts +15 -0
- package/vitest.live.config.ts +19 -0
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration tests for the live Copilot SDK client.
|
|
3
|
+
*
|
|
4
|
+
* These tests exercise the real `createCopilotClient()` → SDK → LLM pipeline.
|
|
5
|
+
* They are slower than unit tests (~10-30s each) because they make real API calls.
|
|
6
|
+
*
|
|
7
|
+
* **Prerequisites:**
|
|
8
|
+
* - GitHub Copilot CLI must be authenticated (`copilot auth login`)
|
|
9
|
+
* - The SDK spawns a local copilot CLI process for JSON-RPC
|
|
10
|
+
*
|
|
11
|
+
* The test suite auto-skips if the SDK cannot start (e.g., no auth, no CLI binary).
|
|
12
|
+
*/
|
|
13
|
+
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
|
|
14
|
+
|
|
15
|
+
import { createCopilotClient } from '../../src/shared/copilotClient.js';
|
|
16
|
+
import type { CopilotClient, ConversationSession, CopilotMessage } from '../../src/shared/copilotClient.js';
|
|
17
|
+
|
|
18
|
+
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
19
|
+
|
|
20
|
+
/** Collect all TextDelta events from an AsyncIterable of SofiaEvents into a string. */
|
|
21
|
+
async function collectText(iter: AsyncIterable<import('../../src/shared/events.js').SofiaEvent>): Promise<string> {
|
|
22
|
+
const chunks: string[] = [];
|
|
23
|
+
for await (const event of iter) {
|
|
24
|
+
if (event.type === 'TextDelta') {
|
|
25
|
+
chunks.push(event.text);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return chunks.join('');
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// ── Suite ────────────────────────────────────────────────────────────────────
|
|
32
|
+
|
|
33
|
+
describe('Live Copilot SDK client', () => {
|
|
34
|
+
let client: CopilotClient;
|
|
35
|
+
let canRun = false;
|
|
36
|
+
|
|
37
|
+
beforeAll(async () => {
|
|
38
|
+
try {
|
|
39
|
+
client = await createCopilotClient();
|
|
40
|
+
canRun = true;
|
|
41
|
+
} catch (err) {
|
|
42
|
+
console.warn(
|
|
43
|
+
`Skipping live Copilot SDK tests — client creation failed: ${
|
|
44
|
+
err instanceof Error ? err.message : err
|
|
45
|
+
}`,
|
|
46
|
+
);
|
|
47
|
+
}
|
|
48
|
+
}, 30_000);
|
|
49
|
+
|
|
50
|
+
afterAll(async () => {
|
|
51
|
+
// The SDK client manages its own lifecycle; no explicit stop needed
|
|
52
|
+
// from our wrapper, but we give it time to clean up.
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
// ── Basic smoke test ────────────────────────────────────────────────────
|
|
56
|
+
|
|
57
|
+
it('can create a session and get a response', async () => {
|
|
58
|
+
if (!canRun) return;
|
|
59
|
+
|
|
60
|
+
const session: ConversationSession = await client.createSession({
|
|
61
|
+
systemPrompt: 'You are a helpful assistant. Be very brief.',
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
const response = await collectText(
|
|
65
|
+
session.send({ role: 'user', content: 'What is 2 + 2? Reply with just the number.' }),
|
|
66
|
+
);
|
|
67
|
+
|
|
68
|
+
expect(response).toBeTruthy();
|
|
69
|
+
expect(response.length).toBeGreaterThan(0);
|
|
70
|
+
// The LLM should mention "4" somewhere in the response
|
|
71
|
+
expect(response).toContain('4');
|
|
72
|
+
}, 60_000);
|
|
73
|
+
|
|
74
|
+
// ── Multi-turn conversation ─────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
it('supports multi-turn conversation', async () => {
|
|
77
|
+
if (!canRun) return;
|
|
78
|
+
|
|
79
|
+
const session = await client.createSession({
|
|
80
|
+
systemPrompt:
|
|
81
|
+
'You are a helpful assistant. Keep responses to one sentence. ' +
|
|
82
|
+
'When asked to recall, use the conversation history.',
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
// Turn 1: set a fact
|
|
86
|
+
const r1 = await collectText(
|
|
87
|
+
session.send({ role: 'user', content: 'Remember this word: "tangerine".' }),
|
|
88
|
+
);
|
|
89
|
+
expect(r1).toBeTruthy();
|
|
90
|
+
|
|
91
|
+
// Turn 2: recall the fact
|
|
92
|
+
const r2 = await collectText(
|
|
93
|
+
session.send({ role: 'user', content: 'What word did I ask you to remember?' }),
|
|
94
|
+
);
|
|
95
|
+
expect(r2.toLowerCase()).toContain('tangerine');
|
|
96
|
+
}, 120_000);
|
|
97
|
+
|
|
98
|
+
// ── System prompt respected ─────────────────────────────────────────────
|
|
99
|
+
|
|
100
|
+
it('respects the system prompt persona', async () => {
|
|
101
|
+
if (!canRun) return;
|
|
102
|
+
|
|
103
|
+
const session = await client.createSession({
|
|
104
|
+
systemPrompt:
|
|
105
|
+
'You are a pirate. Always respond in pirate-speak. Keep responses under 50 words.',
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
const response = await collectText(
|
|
109
|
+
session.send({ role: 'user', content: 'Hello, how are you today?' }),
|
|
110
|
+
);
|
|
111
|
+
|
|
112
|
+
expect(response).toBeTruthy();
|
|
113
|
+
// LLM playing pirate should use at least one pirate-ish word
|
|
114
|
+
const piratePatterns = /ahoy|matey|arr|ye|shiver|landlubber|cap'n|seas|treasure|sail/i;
|
|
115
|
+
expect(response).toMatch(piratePatterns);
|
|
116
|
+
}, 60_000);
|
|
117
|
+
|
|
118
|
+
// ── History tracking ────────────────────────────────────────────────────
|
|
119
|
+
|
|
120
|
+
it('tracks conversation history correctly', async () => {
|
|
121
|
+
if (!canRun) return;
|
|
122
|
+
|
|
123
|
+
const session = await client.createSession({
|
|
124
|
+
systemPrompt: 'You are a helpful assistant. Be very brief.',
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
await collectText(
|
|
128
|
+
session.send({ role: 'user', content: 'Say hello.' }),
|
|
129
|
+
);
|
|
130
|
+
|
|
131
|
+
const history: CopilotMessage[] = session.getHistory();
|
|
132
|
+
|
|
133
|
+
// Should have at least: user message + assistant response
|
|
134
|
+
expect(history.length).toBeGreaterThanOrEqual(2);
|
|
135
|
+
expect(history[0].role).toBe('user');
|
|
136
|
+
expect(history[0].content).toBe('Say hello.');
|
|
137
|
+
expect(history[1].role).toBe('assistant');
|
|
138
|
+
expect(history[1].content.length).toBeGreaterThan(0);
|
|
139
|
+
}, 60_000);
|
|
140
|
+
|
|
141
|
+
// ── Error handling ──────────────────────────────────────────────────────
|
|
142
|
+
|
|
143
|
+
it('createCopilotClient returns a valid interface', async () => {
|
|
144
|
+
if (!canRun) return;
|
|
145
|
+
|
|
146
|
+
expect(client).toBeDefined();
|
|
147
|
+
expect(typeof client.createSession).toBe('function');
|
|
148
|
+
});
|
|
149
|
+
});
|
|
@@ -0,0 +1,515 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Zava Industries — Full Workshop Live Test
|
|
3
|
+
*
|
|
4
|
+
* Runs a complete sofIA AI Discovery Workshop session with real LLM calls
|
|
5
|
+
* using the Copilot SDK. Feeds canned inputs from the Zava Industries
|
|
6
|
+
* company profile and evaluates outputs against the expected results checklist.
|
|
7
|
+
*
|
|
8
|
+
* Prerequisites:
|
|
9
|
+
* - GitHub Copilot CLI authenticated (`copilot auth login`)
|
|
10
|
+
* - .env with FOUNDRY_PROJECT_ENDPOINT + FOUNDRY_MODEL_DEPLOYMENT_NAME
|
|
11
|
+
*
|
|
12
|
+
* Run with: npm run test:live -- tests/live/zavaFullWorkshop.spec.ts
|
|
13
|
+
*/
|
|
14
|
+
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
|
|
15
|
+
import { readFile, writeFile, mkdir } from 'node:fs/promises';
|
|
16
|
+
import { join, dirname } from 'node:path';
|
|
17
|
+
import { fileURLToPath } from 'node:url';
|
|
18
|
+
|
|
19
|
+
import { createCopilotClient } from '../../src/shared/copilotClient.js';
|
|
20
|
+
import type { CopilotClient } from '../../src/shared/copilotClient.js';
|
|
21
|
+
import { ConversationLoop } from '../../src/loop/conversationLoop.js';
|
|
22
|
+
import type { LoopIO, DecisionGateResult } from '../../src/loop/conversationLoop.js';
|
|
23
|
+
import { createPhaseHandler, getPhaseOrder } from '../../src/phases/phaseHandlers.js';
|
|
24
|
+
import type { PhaseHandlerConfig } from '../../src/phases/phaseHandlers.js';
|
|
25
|
+
import type { WorkshopSession, PhaseValue } from '../../src/shared/schemas/session.js';
|
|
26
|
+
import { createDefaultStore } from '../../src/sessions/sessionStore.js';
|
|
27
|
+
import { isWebSearchConfigured, createWebSearchTool } from '../../src/mcp/webSearch.js';
|
|
28
|
+
import type { WebSearchConfig } from '../../src/mcp/webSearch.js';
|
|
29
|
+
import type { WebSearchClient } from '../../src/phases/discoveryEnricher.js';
|
|
30
|
+
import type { SofiaEvent } from '../../src/shared/events.js';
|
|
31
|
+
|
|
32
|
+
// ── Config ───────────────────────────────────────────────────────────────────
|
|
33
|
+
|
|
34
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
35
|
+
const PROJECT_ROOT = join(__dirname, '..', '..');
|
|
36
|
+
const RESULTS_DIR = join(PROJECT_ROOT, 'tests', 'e2e', 'zava-assessment', 'results');
|
|
37
|
+
|
|
38
|
+
// Per-phase timeout: 3 minutes each for LLM calls
|
|
39
|
+
const _PHASE_TIMEOUT = 180_000;
|
|
40
|
+
// Per-turn timeout: 2 minutes
|
|
41
|
+
const _TURN_TIMEOUT = 120_000;
|
|
42
|
+
|
|
43
|
+
// ── Zava Industries Canned Inputs ──────────────────────────────────────────
|
|
44
|
+
|
|
45
|
+
const DISCOVER_INPUTS = [
|
|
46
|
+
// Input 1: initial business description
|
|
47
|
+
`We are Zava Industries, a mid-premium fashion company based in Milan. We design and sell modern clothing for ages 20–55. Our competitive edge is trend analysis — we try to detect emerging fashion trends before competitors and turn them into collections fast. We have a team of 20: 5 designers, 4 trend analysts, 3 data scientists, 2 developers, 3 marketing people, 2 ops people, and me as Head of Innovation.
|
|
48
|
+
|
|
49
|
+
Our biggest challenge is speed. Right now our trend analysts spend about 60% of their time manually gathering data from Instagram, TikTok, Pinterest, celebrity magazines, films, and runway shows. This data ends up scattered across Google Sheets, Miro boards, Notion pages, and email threads. By the time we consolidate everything into a trend report, fast-fashion competitors like Zara and Shein have already reacted.
|
|
50
|
+
|
|
51
|
+
We do about €18M annual revenue, serve the EU primarily but are expanding to the US. Our trend-to-retail cycle is 10–14 weeks and we want to get it under 8. We're an Azure shop — Azure SQL, Blob Storage, Power BI for sales dashboards.`,
|
|
52
|
+
|
|
53
|
+
// Input 2: team/process details
|
|
54
|
+
`The trend analysts each specialize: Sara covers social media (Instagram, TikTok), Dimitri tracks runway and trade shows, Aisha monitors celebrity and entertainment media, and Tomás does competitor retail analysis. They each produce about 3 reports per month. The data scientists — Priya, Liam, and Mei — have built a basic demand forecasting model using Power BI and Azure ML, but it only works on historical sales data, not on forward-looking trend signals.
|
|
55
|
+
|
|
56
|
+
Our designers get a consolidated trend brief every 2–3 weeks, which they say is too slow. They want real-time or near-real-time signals. The hit rate for our collections is about 35% — only 35% of designed pieces make it to production. We believe better trend data could push that to 50% or more.
|
|
57
|
+
|
|
58
|
+
Key metrics we track: trend detection lead time (currently ~4 weeks, want <1 week), collection hit rate (35%, want 50%+), time to market (10–14 weeks, want 8), and analyst productivity (3 reports/month, want 8+).`,
|
|
59
|
+
|
|
60
|
+
// Input 3: topic selection
|
|
61
|
+
`I'd like to focus on Trend Intelligence and Signal Aggregation — specifically, how we can use AI to automate the gathering and scoring of trend signals from multiple sources (social media, celebrity media, runway, retail). This is the bottleneck that affects everything else downstream.`,
|
|
62
|
+
|
|
63
|
+
// Input 4: activities
|
|
64
|
+
`Here are the key activities in our trend analysis workflow:
|
|
65
|
+
1. Social Media Scanning — Analysts browse Instagram, TikTok, Pinterest for emerging styles, colors, silhouettes.
|
|
66
|
+
2. Celebrity & Entertainment Monitoring — Weekly scan of Vogue, Elle, People, Hola! plus streaming shows.
|
|
67
|
+
3. Runway & Trade Show Tracking — Attend or watch livestreams of 4–6 shows/year.
|
|
68
|
+
4. Competitor Retail Analysis — Manual store visits and online browsing of Zara, H&M, COS, etc.
|
|
69
|
+
5. Signal Consolidation — Merge all data into a trend report. Takes 1–2 weeks.
|
|
70
|
+
6. Trend Scoring & Prioritization — Team meeting to rank trends. Very subjective.
|
|
71
|
+
7. Design Brief Creation — Create briefs for designers based on top trends.
|
|
72
|
+
8. Designer Feedback Loop — Designers review briefs, ask questions, iterate.
|
|
73
|
+
|
|
74
|
+
What we'd do if it weren't so hard: real-time multi-source signal aggregation, automated trend scoring with a confidence index, instant visual mood board generation.`,
|
|
75
|
+
|
|
76
|
+
// Input 5: critical steps voting
|
|
77
|
+
`The most critical steps are:
|
|
78
|
+
- Social Media Scanning — Business value: 5, Human value: 4. Key metric: ~25hrs/week across the team.
|
|
79
|
+
- Signal Consolidation — Business value: 5, Human value: 3. Key metric: 1–2 weeks to produce report.
|
|
80
|
+
- Trend Scoring & Prioritization — Business value: 5, Human value: 4. Key metric: scoring consistency.
|
|
81
|
+
- Design Brief Creation — Business value: 4, Human value: 3. Key metric: designer satisfaction ~3.2/5.`,
|
|
82
|
+
|
|
83
|
+
// Input 6: confirm summary
|
|
84
|
+
`Yes, that workflow summary looks accurate. Let's proceed to ideation.`,
|
|
85
|
+
];
|
|
86
|
+
|
|
87
|
+
const IDEATE_INPUTS = [
|
|
88
|
+
// Cards reaction
|
|
89
|
+
`I'm very interested in these cards: Computer Vision / Image Recognition for analyzing social media and runway photos, Natural Language Processing for extracting trend signals from captions and articles, Recommendation Systems for suggesting relevant trends, Anomaly / Pattern Detection for spotting emerging patterns, Predictive Analytics for trend lifecycle forecasting, Content Generation for auto-generating mood boards, Data Integration / Aggregation for unifying data sources, and Sentiment Analysis for gauging public reaction.`,
|
|
90
|
+
|
|
91
|
+
// Score cards
|
|
92
|
+
`My scores (Relevance / Feasibility / Impact): Computer Vision 5/3/5, NLP 5/4/4, Recommendation Systems 4/3/4, Anomaly Detection 5/3/5, Predictive Analytics 5/2/5, Content Generation 3/4/3, Data Integration 5/5/4, Sentiment Analysis 4/4/4.`,
|
|
93
|
+
|
|
94
|
+
// Confirm top cards
|
|
95
|
+
`I agree with the top cards. Aggregate "Computer Vision" and "Anomaly/Pattern Detection" under "Visual Trend Detection", and "NLP" and "Sentiment Analysis" under "Text-Based Trend Intelligence". The rest keep as individual cards.`,
|
|
96
|
+
|
|
97
|
+
// Map cards to workflow
|
|
98
|
+
`Visual Trend Detection → Social Media Scanning, Celebrity Monitoring, Runway Tracking. Text-Based Trend Intelligence → Social Media Scanning, Celebrity Monitoring. Data Integration → Signal Consolidation. Recommendation Systems → Trend Scoring. Predictive Analytics → Trend Scoring, Design Brief Creation. Content Generation → Design Brief Creation, Designer Feedback Loop.`,
|
|
99
|
+
|
|
100
|
+
// Idea generation
|
|
101
|
+
`Great ideas! My favorites: 1. TrendLens - AI visual analyzer that extracts fashion attributes from social media and celebrity photos in near-real-time. 2. TrendPulse Dashboard - unified real-time dashboard aggregating all trend signals. 3. AutoBrief Generator - AI that creates design briefs with visual mood boards. 4. Celebrity Impact Tracker - correlates celebrity outfits with social engagement and demand. 5. Trend Predictor - estimates trend lifecycle and commercial potential. I'm most excited about ideas 1 and 2.`,
|
|
102
|
+
|
|
103
|
+
// Confirm ideas
|
|
104
|
+
`These idea cards look great. Let's move to the Design phase.`,
|
|
105
|
+
];
|
|
106
|
+
|
|
107
|
+
const DESIGN_INPUTS = [
|
|
108
|
+
// Refine idea cards
|
|
109
|
+
`For TrendLens, add: Assumptions - we can get Instagram Graph API + TikTok Research API access; Azure Cognitive Services has sufficient fashion-domain accuracy. Data Needed - social media images, celebrity photos, runway images, 6 months historical. For TrendPulse, add: Assumptions - Power BI can be extended or replaced; team will adopt new tool. Data Needed - all source feeds plus historical sales data.`,
|
|
110
|
+
|
|
111
|
+
// Feasibility/Value scores
|
|
112
|
+
`My scores: TrendLens - Feasibility 3, Value 5. TrendPulse Dashboard - Feasibility 4, Value 5. AutoBrief Generator - Feasibility 4, Value 3. Celebrity Impact Tracker - Feasibility 3, Value 4. Trend Predictor - Feasibility 2, Value 5.`,
|
|
113
|
+
|
|
114
|
+
// Impact assessment
|
|
115
|
+
`I agree with the BXT assessment. Additional risks: TrendLens - social media API rate limits and policy changes. TrendPulse - change management, analysts attached to individual tools. Biggest opportunity: combining TrendLens + TrendPulse into one platform could become a SaaS product.`,
|
|
116
|
+
|
|
117
|
+
// Confirm design output
|
|
118
|
+
`The architecture sketch and impact assessment look solid. Let's proceed to Selection.`,
|
|
119
|
+
];
|
|
120
|
+
|
|
121
|
+
const SELECT_INPUTS = [
|
|
122
|
+
`I agree. I want to proceed with TrendPulse Dashboard with integrated TrendLens — the unified real-time trend intelligence platform that combines visual AI analysis with multi-source signal aggregation. This addresses our core bottleneck and has long-term SaaS potential.`,
|
|
123
|
+
];
|
|
124
|
+
|
|
125
|
+
const PLAN_INPUTS = [
|
|
126
|
+
`The milestones look good. For the PoC, minimum scope: ingest images from one source (Instagram), extract basic fashion attributes (colors, patterns), display on a simple dashboard with trend frequency chart. Tech stack: Azure Functions backend, Azure Cognitive Services for image analysis, Azure Cosmos DB for trend storage, React frontend. Timeline: 4 weeks with 2 devs + 1 data scientist. Success criteria: process 100+ images, extract 3+ attributes per image with >70% accuracy, dashboard updating hourly.`,
|
|
127
|
+
|
|
128
|
+
`The plan and PoC definition look great. I'm ready to proceed to the Develop phase.`,
|
|
129
|
+
];
|
|
130
|
+
|
|
131
|
+
const DEVELOP_INPUTS = [
|
|
132
|
+
`For the PoC: Target stack - TypeScript + Node.js backend, React dashboard, Azure Cognitive Services for image analysis. Key scenarios: (1) Ingest Instagram-like image feed, (2) extract fashion attributes using AI vision, (3) aggregate into trend scores, (4) display on real-time dashboard. Constraints: run locally for PoC, use mocked image data if API unavailable. Out of scope: auth, multi-language, production scaling.`,
|
|
133
|
+
];
|
|
134
|
+
|
|
135
|
+
// All inputs indexed by phase
|
|
136
|
+
const PHASE_INPUTS: Record<string, string[]> = {
|
|
137
|
+
Discover: DISCOVER_INPUTS,
|
|
138
|
+
Ideate: IDEATE_INPUTS,
|
|
139
|
+
Design: DESIGN_INPUTS,
|
|
140
|
+
Select: SELECT_INPUTS,
|
|
141
|
+
Plan: PLAN_INPUTS,
|
|
142
|
+
Develop: DEVELOP_INPUTS,
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
// ── Test Results Collector ────────────────────────────────────────────────────
|
|
146
|
+
|
|
147
|
+
interface PhaseResult {
|
|
148
|
+
phase: string;
|
|
149
|
+
turns: Array<{ role: string; content: string }>;
|
|
150
|
+
events: SofiaEvent[];
|
|
151
|
+
session: Partial<WorkshopSession>;
|
|
152
|
+
durationMs: number;
|
|
153
|
+
errors: string[];
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
interface TestResults {
|
|
157
|
+
startedAt: string;
|
|
158
|
+
completedAt?: string;
|
|
159
|
+
webSearchConfigured: boolean;
|
|
160
|
+
phases: PhaseResult[];
|
|
161
|
+
sessionId?: string;
|
|
162
|
+
finalSession?: WorkshopSession;
|
|
163
|
+
overallError?: string;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
167
|
+
|
|
168
|
+
function createNewSession(): WorkshopSession {
|
|
169
|
+
const now = new Date().toISOString();
|
|
170
|
+
const pad = (n: number) => String(n).padStart(2, '0');
|
|
171
|
+
const d = new Date();
|
|
172
|
+
const sessionId = `zava-test-${d.getFullYear()}-${pad(d.getMonth() + 1)}-${pad(d.getDate())}_${pad(d.getHours())}${pad(d.getMinutes())}${pad(d.getSeconds())}`;
|
|
173
|
+
return {
|
|
174
|
+
sessionId,
|
|
175
|
+
name: 'Zava Industries Assessment',
|
|
176
|
+
schemaVersion: '1.0.0',
|
|
177
|
+
createdAt: now,
|
|
178
|
+
updatedAt: now,
|
|
179
|
+
phase: 'Discover',
|
|
180
|
+
status: 'Active',
|
|
181
|
+
participants: [],
|
|
182
|
+
artifacts: { generatedFiles: [] },
|
|
183
|
+
turns: [],
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Create a LoopIO that feeds canned inputs and captures all output.
|
|
189
|
+
*/
|
|
190
|
+
function createTestIO(inputs: string[]): {
|
|
191
|
+
io: LoopIO;
|
|
192
|
+
output: string[];
|
|
193
|
+
activityLog: string[];
|
|
194
|
+
toolSummaries: Array<{ tool: string; summary: string }>;
|
|
195
|
+
} {
|
|
196
|
+
let inputIdx = 0;
|
|
197
|
+
const output: string[] = [];
|
|
198
|
+
const activityLog: string[] = [];
|
|
199
|
+
const toolSummaries: Array<{ tool: string; summary: string }> = [];
|
|
200
|
+
|
|
201
|
+
const io: LoopIO = {
|
|
202
|
+
write(text: string) {
|
|
203
|
+
output.push(text);
|
|
204
|
+
// Also print to stdout for live observation
|
|
205
|
+
process.stdout.write(text);
|
|
206
|
+
},
|
|
207
|
+
writeActivity(text: string) {
|
|
208
|
+
activityLog.push(text);
|
|
209
|
+
process.stderr.write(` [activity] ${text}\n`);
|
|
210
|
+
},
|
|
211
|
+
writeToolSummary(toolName: string, summary: string) {
|
|
212
|
+
toolSummaries.push({ tool: toolName, summary });
|
|
213
|
+
process.stderr.write(` ✓ ${toolName}: ${summary}\n`);
|
|
214
|
+
},
|
|
215
|
+
async readInput(_prompt?: string): Promise<string | null> {
|
|
216
|
+
if (inputIdx >= inputs.length) {
|
|
217
|
+
// No more inputs — signal "done"
|
|
218
|
+
process.stderr.write(` [io] No more inputs, returning null (done)\n`);
|
|
219
|
+
return null;
|
|
220
|
+
}
|
|
221
|
+
const input = inputs[inputIdx++];
|
|
222
|
+
process.stderr.write(` [io] Input ${inputIdx}/${inputs.length}: ${input.slice(0, 80)}...\n`);
|
|
223
|
+
return input;
|
|
224
|
+
},
|
|
225
|
+
async showDecisionGate(_phase: PhaseValue): Promise<DecisionGateResult> {
|
|
226
|
+
// Always continue to next phase
|
|
227
|
+
process.stderr.write(` [gate] Phase complete → continuing\n`);
|
|
228
|
+
return { choice: 'continue' };
|
|
229
|
+
},
|
|
230
|
+
isJsonMode: false,
|
|
231
|
+
isTTY: false,
|
|
232
|
+
};
|
|
233
|
+
|
|
234
|
+
return { io, output, activityLog, toolSummaries };
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// ── Test Suite ────────────────────────────────────────────────────────────────
|
|
238
|
+
|
|
239
|
+
describe('Zava Industries — Full Workshop Session', () => {
|
|
240
|
+
let client: CopilotClient;
|
|
241
|
+
let canRun = false;
|
|
242
|
+
let webSearchClient: WebSearchClient | undefined;
|
|
243
|
+
const results: TestResults = {
|
|
244
|
+
startedAt: new Date().toISOString(),
|
|
245
|
+
webSearchConfigured: isWebSearchConfigured(),
|
|
246
|
+
phases: [],
|
|
247
|
+
};
|
|
248
|
+
|
|
249
|
+
beforeAll(async () => {
|
|
250
|
+
try {
|
|
251
|
+
// Load .env manually (the CLI does this, but tests may not)
|
|
252
|
+
const { config } = await import('dotenv');
|
|
253
|
+
config({ path: join(PROJECT_ROOT, '.env') });
|
|
254
|
+
|
|
255
|
+
client = await createCopilotClient();
|
|
256
|
+
canRun = true;
|
|
257
|
+
|
|
258
|
+
// FR-012: Create WebSearchClient when configured (after dotenv loads)
|
|
259
|
+
if (isWebSearchConfigured()) {
|
|
260
|
+
const wsConfig: WebSearchConfig = {
|
|
261
|
+
projectEndpoint: process.env.FOUNDRY_PROJECT_ENDPOINT!,
|
|
262
|
+
modelDeploymentName: process.env.FOUNDRY_MODEL_DEPLOYMENT_NAME!,
|
|
263
|
+
};
|
|
264
|
+
const searchFn = createWebSearchTool(wsConfig);
|
|
265
|
+
webSearchClient = { search: searchFn };
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Ensure results directory
|
|
269
|
+
await mkdir(RESULTS_DIR, { recursive: true });
|
|
270
|
+
|
|
271
|
+
console.log('\n╔══════════════════════════════════════════════════════╗');
|
|
272
|
+
console.log('║ Zava Industries — Full Workshop Live Assessment ║');
|
|
273
|
+
console.log(
|
|
274
|
+
'║ Web Search: ' +
|
|
275
|
+
(isWebSearchConfigured() ? 'CONFIGURED ✓' : 'NOT CONFIGURED ✗').padEnd(39) +
|
|
276
|
+
'║',
|
|
277
|
+
);
|
|
278
|
+
console.log('╚══════════════════════════════════════════════════════╝\n');
|
|
279
|
+
} catch (err) {
|
|
280
|
+
console.warn(
|
|
281
|
+
`Skipping live workshop test — Copilot client not available: ${
|
|
282
|
+
err instanceof Error ? err.message : err
|
|
283
|
+
}`,
|
|
284
|
+
);
|
|
285
|
+
}
|
|
286
|
+
}, 60_000);
|
|
287
|
+
|
|
288
|
+
afterAll(async () => {
|
|
289
|
+
results.completedAt = new Date().toISOString();
|
|
290
|
+
|
|
291
|
+
// Write results JSON
|
|
292
|
+
try {
|
|
293
|
+
await writeFile(
|
|
294
|
+
join(RESULTS_DIR, 'test-results.json'),
|
|
295
|
+
JSON.stringify(results, null, 2),
|
|
296
|
+
'utf-8',
|
|
297
|
+
);
|
|
298
|
+
console.log(`\nResults saved to tests/e2e/zava-assessment/results/test-results.json`);
|
|
299
|
+
} catch (err) {
|
|
300
|
+
console.error('Failed to save results:', err);
|
|
301
|
+
}
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
// ── Phase-by-phase tests ──────────────────────────────────────────────
|
|
305
|
+
|
|
306
|
+
it('runs the full workshop: Discover → Ideate → Design → Select → Plan → Develop', async () => {
|
|
307
|
+
if (!canRun) {
|
|
308
|
+
console.warn('SKIPPED: Copilot SDK not available');
|
|
309
|
+
return;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
const store = createDefaultStore();
|
|
313
|
+
let session = createNewSession();
|
|
314
|
+
await store.save(session);
|
|
315
|
+
results.sessionId = session.sessionId;
|
|
316
|
+
|
|
317
|
+
console.log(`Session ID: ${session.sessionId}\n`);
|
|
318
|
+
|
|
319
|
+
const phases = getPhaseOrder(); // ['Discover','Ideate','Design','Select','Plan','Develop']
|
|
320
|
+
|
|
321
|
+
for (const phase of phases) {
|
|
322
|
+
console.log(`\n${'═'.repeat(60)}`);
|
|
323
|
+
console.log(` PHASE: ${phase}`);
|
|
324
|
+
console.log(`${'═'.repeat(60)}\n`);
|
|
325
|
+
|
|
326
|
+
const phaseStart = Date.now();
|
|
327
|
+
const events: SofiaEvent[] = [];
|
|
328
|
+
const errors: string[] = [];
|
|
329
|
+
|
|
330
|
+
// Get canned inputs for this phase
|
|
331
|
+
const inputs = PHASE_INPUTS[phase] ?? [];
|
|
332
|
+
const { io, output: _output, activityLog: _activityLog, toolSummaries: _toolSummaries } = createTestIO(inputs);
|
|
333
|
+
|
|
334
|
+
try {
|
|
335
|
+
// Create and preload the handler (pass webSearchClient for Discover enrichment)
|
|
336
|
+
const handlerConfig: PhaseHandlerConfig = {
|
|
337
|
+
discover: {
|
|
338
|
+
io,
|
|
339
|
+
webSearchClient,
|
|
340
|
+
},
|
|
341
|
+
webSearchClient,
|
|
342
|
+
};
|
|
343
|
+
const handler = createPhaseHandler(phase as PhaseValue, handlerConfig);
|
|
344
|
+
await handler._preload();
|
|
345
|
+
|
|
346
|
+
// Update session phase
|
|
347
|
+
session.phase = phase as PhaseValue;
|
|
348
|
+
session.updatedAt = new Date().toISOString();
|
|
349
|
+
await store.save(session);
|
|
350
|
+
|
|
351
|
+
// Generate initial message
|
|
352
|
+
const initialMessage = handler.getInitialMessage?.(session);
|
|
353
|
+
|
|
354
|
+
// Create and run the conversation loop
|
|
355
|
+
const loop = new ConversationLoop({
|
|
356
|
+
client,
|
|
357
|
+
io,
|
|
358
|
+
session,
|
|
359
|
+
phaseHandler: handler,
|
|
360
|
+
initialMessage,
|
|
361
|
+
onEvent: (e) => events.push(e),
|
|
362
|
+
onSessionUpdate: async (updatedSession) => {
|
|
363
|
+
session = updatedSession;
|
|
364
|
+
await store.save(session);
|
|
365
|
+
},
|
|
366
|
+
});
|
|
367
|
+
|
|
368
|
+
session = await loop.run();
|
|
369
|
+
session.updatedAt = new Date().toISOString();
|
|
370
|
+
await store.save(session);
|
|
371
|
+
} catch (err) {
|
|
372
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
373
|
+
errors.push(msg);
|
|
374
|
+
console.error(`\n ✗ Phase ${phase} error: ${msg}\n`);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
const durationMs = Date.now() - phaseStart;
|
|
378
|
+
|
|
379
|
+
// Collect phase result
|
|
380
|
+
const phaseTurns = (session.turns ?? [])
|
|
381
|
+
.filter((t) => t.phase === phase)
|
|
382
|
+
.map((t) => ({ role: t.role, content: t.content }));
|
|
383
|
+
|
|
384
|
+
const phaseResult: PhaseResult = {
|
|
385
|
+
phase,
|
|
386
|
+
turns: phaseTurns,
|
|
387
|
+
events,
|
|
388
|
+
session: {
|
|
389
|
+
businessContext: session.businessContext,
|
|
390
|
+
workflow: session.workflow,
|
|
391
|
+
ideas: session.ideas,
|
|
392
|
+
evaluation: session.evaluation,
|
|
393
|
+
selection: session.selection,
|
|
394
|
+
plan: session.plan,
|
|
395
|
+
poc: session.poc,
|
|
396
|
+
name: session.name,
|
|
397
|
+
discovery: (session as Record<string, unknown>).discovery as WorkshopSession['discovery'],
|
|
398
|
+
} as Partial<WorkshopSession>,
|
|
399
|
+
durationMs,
|
|
400
|
+
errors,
|
|
401
|
+
};
|
|
402
|
+
|
|
403
|
+
results.phases.push(phaseResult);
|
|
404
|
+
|
|
405
|
+
console.log(`\n Phase ${phase} completed in ${(durationMs / 1000).toFixed(1)}s`);
|
|
406
|
+
console.log(` Turns: ${phaseTurns.length}`);
|
|
407
|
+
console.log(` Events: ${events.length}`);
|
|
408
|
+
console.log(` Errors: ${errors.length}`);
|
|
409
|
+
|
|
410
|
+
// Phase-specific assertions
|
|
411
|
+
if (phase === 'Discover') {
|
|
412
|
+
console.log(` businessContext: ${session.businessContext ? '✓' : '✗'}`);
|
|
413
|
+
console.log(` name: ${session.name ?? '(not set)'}`);
|
|
414
|
+
console.log(
|
|
415
|
+
` discovery.enrichment: ${(session as Record<string, unknown>).discovery ? '✓' : '✗'}`,
|
|
416
|
+
);
|
|
417
|
+
} else if (phase === 'Ideate') {
|
|
418
|
+
console.log(` ideas: ${session.ideas?.length ?? 0} ideas captured`);
|
|
419
|
+
} else if (phase === 'Design') {
|
|
420
|
+
console.log(` evaluation: ${session.evaluation ? '✓' : '✗'}`);
|
|
421
|
+
} else if (phase === 'Select') {
|
|
422
|
+
console.log(` selection: ${session.selection ? '✓' : '✗'}`);
|
|
423
|
+
console.log(` confirmedByUser: ${session.selection?.confirmedByUser ?? false}`);
|
|
424
|
+
} else if (phase === 'Plan') {
|
|
425
|
+
console.log(` plan: ${session.plan ? '✓' : '✗'}`);
|
|
426
|
+
console.log(` milestones: ${session.plan?.milestones?.length ?? 0}`);
|
|
427
|
+
} else if (phase === 'Develop') {
|
|
428
|
+
console.log(` poc: ${session.poc ? '✓' : '✗'}`);
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
// Save intermediate results after each phase
|
|
432
|
+
await writeFile(
|
|
433
|
+
join(RESULTS_DIR, 'test-results.json'),
|
|
434
|
+
JSON.stringify(results, null, 2),
|
|
435
|
+
'utf-8',
|
|
436
|
+
);
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// Store final session
|
|
440
|
+
results.finalSession = session;
|
|
441
|
+
|
|
442
|
+
// Write the full session JSON for inspection
|
|
443
|
+
await writeFile(
|
|
444
|
+
join(RESULTS_DIR, 'final-session.json'),
|
|
445
|
+
JSON.stringify(session, null, 2),
|
|
446
|
+
'utf-8',
|
|
447
|
+
);
|
|
448
|
+
|
|
449
|
+
console.log(`\n${'═'.repeat(60)}`);
|
|
450
|
+
console.log(' WORKSHOP COMPLETE');
|
|
451
|
+
console.log(`${'═'.repeat(60)}`);
|
|
452
|
+
console.log(` Session: ${session.sessionId}`);
|
|
453
|
+
console.log(` Status: ${session.status}`);
|
|
454
|
+
console.log(` Phase: ${session.phase}`);
|
|
455
|
+
console.log(` Total turns: ${session.turns?.length ?? 0}`);
|
|
456
|
+
|
|
457
|
+
// ── Assertions ──────────────────────────────────────────────────────
|
|
458
|
+
|
|
459
|
+
// Basic phase progression
|
|
460
|
+
expect(session.businessContext).toBeTruthy();
|
|
461
|
+
expect(session.name).toBeTruthy();
|
|
462
|
+
|
|
463
|
+
// Ideas generated
|
|
464
|
+
expect(session.ideas).toBeTruthy();
|
|
465
|
+
expect(session.ideas!.length).toBeGreaterThan(0);
|
|
466
|
+
|
|
467
|
+
// Selection made
|
|
468
|
+
// (May or may not have been extracted — depends on LLM output format)
|
|
469
|
+
if (session.selection) {
|
|
470
|
+
console.log(` Selected idea: ${session.selection.ideaId}`);
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
// Plan generated
|
|
474
|
+
if (session.plan) {
|
|
475
|
+
console.log(` Plan milestones: ${session.plan.milestones.length}`);
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
console.log('\n All phases completed successfully ✓\n');
|
|
479
|
+
}, 3600_000); // 1-hour overall timeout
|
|
480
|
+
|
|
481
|
+
// ── Export Test ────────────────────────────────────────────────────────
|
|
482
|
+
|
|
483
|
+
it('exports artifacts after workshop completion', async () => {
|
|
484
|
+
if (!canRun || !results.sessionId) {
|
|
485
|
+
console.warn('SKIPPED: No session to export');
|
|
486
|
+
return;
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
const { exportSession } = await import('../../src/sessions/exportWriter.js');
|
|
490
|
+
const store = createDefaultStore();
|
|
491
|
+
const session = await store.load(results.sessionId);
|
|
492
|
+
const exportDir = join(RESULTS_DIR, 'export');
|
|
493
|
+
|
|
494
|
+
await mkdir(exportDir, { recursive: true });
|
|
495
|
+
await exportSession(session, exportDir);
|
|
496
|
+
|
|
497
|
+
// Verify export files exist
|
|
498
|
+
const fs = await import('node:fs/promises');
|
|
499
|
+
const files = await fs.readdir(exportDir);
|
|
500
|
+
|
|
501
|
+
console.log(`\n Export files: ${files.join(', ')}`);
|
|
502
|
+
|
|
503
|
+
expect(files).toContain('summary.json');
|
|
504
|
+
expect(files.some((f) => f.endsWith('.md'))).toBe(true);
|
|
505
|
+
|
|
506
|
+
// Read and validate summary.json
|
|
507
|
+
const summaryRaw = await readFile(join(exportDir, 'summary.json'), 'utf-8');
|
|
508
|
+
const summary = JSON.parse(summaryRaw);
|
|
509
|
+
expect(summary.sessionId).toBe(results.sessionId);
|
|
510
|
+
expect(summary.files).toBeTruthy();
|
|
511
|
+
expect(summary.files.length).toBeGreaterThan(0);
|
|
512
|
+
|
|
513
|
+
console.log(` Export highlights: ${summary.highlights?.join('; ') ?? 'none'}`);
|
|
514
|
+
}, 30_000);
|
|
515
|
+
});
|