sofia-cli 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/agents/copilot-instructions.md +39 -0
- package/.github/agents/speckit.analyze.agent.md +184 -0
- package/.github/agents/speckit.checklist.agent.md +294 -0
- package/.github/agents/speckit.clarify.agent.md +181 -0
- package/.github/agents/speckit.constitution.agent.md +84 -0
- package/.github/agents/speckit.implement.agent.md +135 -0
- package/.github/agents/speckit.plan.agent.md +90 -0
- package/.github/agents/speckit.specify.agent.md +258 -0
- package/.github/agents/speckit.tasks.agent.md +137 -0
- package/.github/agents/speckit.taskstoissues.agent.md +30 -0
- package/.github/copilot-instructions.md +257 -0
- package/.github/prompts/speckit.analyze.prompt.md +3 -0
- package/.github/prompts/speckit.checklist.prompt.md +3 -0
- package/.github/prompts/speckit.clarify.prompt.md +3 -0
- package/.github/prompts/speckit.constitution.prompt.md +3 -0
- package/.github/prompts/speckit.implement.prompt.md +3 -0
- package/.github/prompts/speckit.plan.prompt.md +3 -0
- package/.github/prompts/speckit.specify.prompt.md +3 -0
- package/.github/prompts/speckit.tasks.prompt.md +3 -0
- package/.github/prompts/speckit.taskstoissues.prompt.md +3 -0
- package/.github/workflows/ci.yml +38 -0
- package/.prettierrc +6 -0
- package/.specify/memory/constitution.md +181 -0
- package/.specify/scripts/bash/check-prerequisites.sh +166 -0
- package/.specify/scripts/bash/common.sh +156 -0
- package/.specify/scripts/bash/create-new-feature.sh +297 -0
- package/.specify/scripts/bash/setup-plan.sh +61 -0
- package/.specify/scripts/bash/update-agent-context.sh +810 -0
- package/.specify/templates/agent-file-template.md +28 -0
- package/.specify/templates/checklist-template.md +40 -0
- package/.specify/templates/constitution-template.md +50 -0
- package/.specify/templates/plan-template.md +113 -0
- package/.specify/templates/spec-template.md +115 -0
- package/.specify/templates/tasks-template.md +251 -0
- package/.vscode/mcp.json +42 -0
- package/.vscode/settings.json +19 -0
- package/CODE_OF_CONDUCT.md +128 -0
- package/LICENSE +21 -0
- package/README.md +213 -0
- package/dist/src/cli/developCommand.js +240 -0
- package/dist/src/cli/directCommands.js +143 -0
- package/dist/src/cli/envLoader.js +16 -0
- package/dist/src/cli/exportCommand.js +53 -0
- package/dist/src/cli/index.js +203 -0
- package/dist/src/cli/ioContext.js +109 -0
- package/dist/src/cli/preflight.js +57 -0
- package/dist/src/cli/statusCommand.js +110 -0
- package/dist/src/cli/workshopCommand.js +400 -0
- package/dist/src/develop/checkpointState.js +86 -0
- package/dist/src/develop/codeGenerator.js +319 -0
- package/dist/src/develop/dynamicScaffolder.js +226 -0
- package/dist/src/develop/githubMcpAdapter.js +122 -0
- package/dist/src/develop/index.js +15 -0
- package/dist/src/develop/mcpContextEnricher.js +195 -0
- package/dist/src/develop/pocScaffolder.js +542 -0
- package/dist/src/develop/ralphLoop.js +659 -0
- package/dist/src/develop/templateRegistry.js +364 -0
- package/dist/src/develop/testRunner.js +202 -0
- package/dist/src/logging/logger.js +58 -0
- package/dist/src/loop/conversationLoop.js +227 -0
- package/dist/src/loop/phaseSummarizer.js +87 -0
- package/dist/src/mcp/mcpManager.js +267 -0
- package/dist/src/mcp/mcpTransport.js +391 -0
- package/dist/src/mcp/retryPolicy.js +47 -0
- package/dist/src/mcp/webSearch.js +254 -0
- package/dist/src/phases/contextSummarizer.js +101 -0
- package/dist/src/phases/discoveryEnricher.js +156 -0
- package/dist/src/phases/phaseExtractors.js +222 -0
- package/dist/src/phases/phaseHandlers.js +328 -0
- package/dist/src/prompts/design.md +51 -0
- package/dist/src/prompts/develop-boundary.md +51 -0
- package/dist/src/prompts/develop.md +111 -0
- package/dist/src/prompts/discover.md +58 -0
- package/dist/src/prompts/ideate.md +56 -0
- package/dist/src/prompts/plan.md +51 -0
- package/dist/src/prompts/promptLoader.js +167 -0
- package/dist/src/prompts/promptLoader.ts +198 -0
- package/dist/src/prompts/select.md +47 -0
- package/dist/src/prompts/summarize/README.md +8 -0
- package/dist/src/prompts/summarize/design-summary.md +37 -0
- package/dist/src/prompts/summarize/develop-summary.md +25 -0
- package/dist/src/prompts/summarize/ideate-summary.md +27 -0
- package/dist/src/prompts/summarize/plan-summary.md +27 -0
- package/dist/src/prompts/summarize/select-summary.md +21 -0
- package/dist/src/prompts/system.md +28 -0
- package/dist/src/sessions/exportPaths.js +22 -0
- package/dist/src/sessions/exportWriter.js +406 -0
- package/dist/src/sessions/sessionManager.js +81 -0
- package/dist/src/sessions/sessionStore.js +65 -0
- package/dist/src/shared/activitySpinner.js +91 -0
- package/dist/src/shared/copilotClient.js +129 -0
- package/dist/src/shared/data/cards.json +1249 -0
- package/dist/src/shared/data/cardsLoader.js +51 -0
- package/dist/src/shared/errorClassifier.js +120 -0
- package/dist/src/shared/events.js +28 -0
- package/dist/src/shared/markdownRenderer.js +34 -0
- package/dist/src/shared/schemas/session.js +265 -0
- package/dist/src/shared/tableRenderer.js +20 -0
- package/dist/src/vendor/chalk.js +2 -0
- package/dist/src/vendor/cli-table3.js +3 -0
- package/dist/src/vendor/commander.js +2 -0
- package/dist/src/vendor/marked-terminal.js +3 -0
- package/dist/src/vendor/marked.js +2 -0
- package/dist/src/vendor/ora.js +2 -0
- package/dist/src/vendor/pino.js +2 -0
- package/dist/src/vendor/zod.js +2 -0
- package/dist/tests/e2e/developE2e.spec.js +126 -0
- package/dist/tests/e2e/developFailureE2e.spec.js +247 -0
- package/dist/tests/e2e/developPty.spec.js +75 -0
- package/dist/tests/e2e/discoveryWebSearchRelevance.spec.js +84 -0
- package/dist/tests/e2e/harness.spec.js +83 -0
- package/dist/tests/e2e/mcpLive.spec.js +120 -0
- package/dist/tests/e2e/newSession.e2e.spec.js +177 -0
- package/dist/tests/e2e/ralphLoopEnrichmentComparison.spec.js +62 -0
- package/dist/tests/e2e/workiqEnrichment.spec.js +56 -0
- package/dist/tests/e2e/zavaSimulation.spec.js +452 -0
- package/dist/tests/fixtures/test-fixture-project/src/add.js +3 -0
- package/dist/tests/fixtures/test-fixture-project/tests/failing.test.js +6 -0
- package/dist/tests/fixtures/test-fixture-project/tests/hanging.test.js +8 -0
- package/dist/tests/fixtures/test-fixture-project/tests/passing.test.js +10 -0
- package/dist/tests/fixtures/test-fixture-project/vitest.config.js +6 -0
- package/dist/tests/integration/autoStartConversation.spec.js +138 -0
- package/dist/tests/integration/defaultCommand.spec.js +147 -0
- package/dist/tests/integration/directCommandNonTty.spec.js +224 -0
- package/dist/tests/integration/directCommandTty.spec.js +151 -0
- package/dist/tests/integration/discoveryEnrichmentFlow.spec.js +175 -0
- package/dist/tests/integration/exportArtifacts.spec.js +202 -0
- package/dist/tests/integration/exportFallbackFlow.spec.js +99 -0
- package/dist/tests/integration/mcpDegradationFlow.spec.js +190 -0
- package/dist/tests/integration/mcpTransportFlow.spec.js +139 -0
- package/dist/tests/integration/newSessionFlow.spec.js +343 -0
- package/dist/tests/integration/pocGithubMcp.spec.js +186 -0
- package/dist/tests/integration/pocLocalFallback.spec.js +171 -0
- package/dist/tests/integration/pocScaffold.spec.js +163 -0
- package/dist/tests/integration/ralphLoopFlow.spec.js +359 -0
- package/dist/tests/integration/ralphLoopPartial.spec.js +368 -0
- package/dist/tests/integration/resumeAndBacktrack.spec.js +247 -0
- package/dist/tests/integration/spinnerLifecycle.spec.js +220 -0
- package/dist/tests/integration/summarizationFlow.spec.js +115 -0
- package/dist/tests/integration/testRunnerReal.spec.js +52 -0
- package/dist/tests/integration/webSearchAgent.spec.js +128 -0
- package/dist/tests/live/copilotSdkLive.spec.js +107 -0
- package/dist/tests/live/zavaFullWorkshop.spec.js +392 -0
- package/dist/tests/setup/loadEnv.js +3 -0
- package/dist/tests/unit/cli/developCommand.spec.js +567 -0
- package/dist/tests/unit/cli/directCommands.spec.js +279 -0
- package/dist/tests/unit/cli/envLoader.spec.js +58 -0
- package/dist/tests/unit/cli/ioContext.spec.js +119 -0
- package/dist/tests/unit/cli/preflight.spec.js +108 -0
- package/dist/tests/unit/cli/statusCommand.spec.js +111 -0
- package/dist/tests/unit/cli/workshopClientFallback.spec.js +80 -0
- package/dist/tests/unit/cli/workshopCommand.spec.js +329 -0
- package/dist/tests/unit/config/vitestEnvSetup.spec.js +13 -0
- package/dist/tests/unit/develop/checkpointState.spec.js +315 -0
- package/dist/tests/unit/develop/codeGenerator.spec.js +355 -0
- package/dist/tests/unit/develop/githubMcpAdapter.spec.js +231 -0
- package/dist/tests/unit/develop/mcpContextEnricher.spec.js +433 -0
- package/dist/tests/unit/develop/outputValidator.spec.js +119 -0
- package/dist/tests/unit/develop/pocScaffolder.spec.js +353 -0
- package/dist/tests/unit/develop/ralphLoop.spec.js +1248 -0
- package/dist/tests/unit/develop/templateRegistry.spec.js +85 -0
- package/dist/tests/unit/develop/testRunner.spec.js +249 -0
- package/dist/tests/unit/infraBicep.spec.js +92 -0
- package/dist/tests/unit/infraDeploy.spec.js +82 -0
- package/dist/tests/unit/infraTeardown.spec.js +63 -0
- package/dist/tests/unit/logging/logger.spec.js +43 -0
- package/dist/tests/unit/loop/conversationLoop.spec.js +592 -0
- package/dist/tests/unit/loop/phaseSummarizer.spec.js +141 -0
- package/dist/tests/unit/loop/streamingMarkdown.spec.js +147 -0
- package/dist/tests/unit/mcp/mcpManager.spec.js +279 -0
- package/dist/tests/unit/mcp/mcpTransport.spec.js +529 -0
- package/dist/tests/unit/mcp/retryPolicy.spec.js +218 -0
- package/dist/tests/unit/mcp/timeoutValidation.spec.js +46 -0
- package/dist/tests/unit/mcp/webSearch.spec.js +567 -0
- package/dist/tests/unit/phases/contextSummarizer.spec.js +140 -0
- package/dist/tests/unit/phases/discoveryEnricher.repeatCalls.spec.js +93 -0
- package/dist/tests/unit/phases/discoveryEnricher.spec.js +411 -0
- package/dist/tests/unit/phases/phaseExtractors.spec.js +352 -0
- package/dist/tests/unit/phases/phaseHandlers.spec.js +425 -0
- package/dist/tests/unit/prompts/promptLoader.spec.js +118 -0
- package/dist/tests/unit/schemas/pocSchemas.spec.js +412 -0
- package/dist/tests/unit/schemas/session.spec.js +257 -0
- package/dist/tests/unit/sessions/exportPaths.spec.js +31 -0
- package/dist/tests/unit/sessions/exportWriter.spec.js +655 -0
- package/dist/tests/unit/sessions/sessionManager.spec.js +151 -0
- package/dist/tests/unit/sessions/sessionStore.spec.js +116 -0
- package/dist/tests/unit/shared/activitySpinner.spec.js +175 -0
- package/dist/tests/unit/shared/cardsLoader.spec.js +76 -0
- package/dist/tests/unit/shared/copilotClient.spec.js +155 -0
- package/dist/tests/unit/shared/errorClassifier.spec.js +131 -0
- package/dist/tests/unit/shared/events.spec.js +55 -0
- package/dist/tests/unit/shared/markdownRenderer.spec.js +35 -0
- package/dist/tests/unit/shared/markdownRendererChunks.spec.js +70 -0
- package/dist/tests/unit/shared/tableRenderer.spec.js +34 -0
- package/dist/vitest.config.js +14 -0
- package/dist/vitest.live.config.js +18 -0
- package/docs/README.md +35 -0
- package/docs/architecture.md +169 -0
- package/docs/cli-usage.md +207 -0
- package/docs/environment.md +66 -0
- package/docs/export-format.md +146 -0
- package/docs/session-model.md +113 -0
- package/eslint.config.js +35 -0
- package/infra/deploy.sh +193 -0
- package/infra/gather-env.sh +211 -0
- package/infra/main.bicep +90 -0
- package/infra/main.bicepparam +18 -0
- package/infra/resources.bicep +134 -0
- package/infra/teardown.sh +114 -0
- package/package.json +63 -0
- package/specs/001-cli-workshop-rebuild/checklists/requirements.md +35 -0
- package/specs/001-cli-workshop-rebuild/contracts/cli.md +59 -0
- package/specs/001-cli-workshop-rebuild/contracts/export-summary-json.md +23 -0
- package/specs/001-cli-workshop-rebuild/contracts/session-json.md +30 -0
- package/specs/001-cli-workshop-rebuild/data-model.md +210 -0
- package/specs/001-cli-workshop-rebuild/plan.md +361 -0
- package/specs/001-cli-workshop-rebuild/quickstart.md +83 -0
- package/specs/001-cli-workshop-rebuild/research.md +116 -0
- package/specs/001-cli-workshop-rebuild/spec.md +240 -0
- package/specs/001-cli-workshop-rebuild/tasks.md +476 -0
- package/specs/002-poc-generation/contracts/poc-output.md +172 -0
- package/specs/002-poc-generation/contracts/ralph-loop.md +113 -0
- package/specs/002-poc-generation/data-model.md +172 -0
- package/specs/002-poc-generation/plan.md +109 -0
- package/specs/002-poc-generation/quickstart.md +97 -0
- package/specs/002-poc-generation/research.md +786 -0
- package/specs/002-poc-generation/spec.md +81 -0
- package/specs/002-poc-generation/tasks-fix.md +198 -0
- package/specs/002-poc-generation/tasks.md +252 -0
- package/specs/003-mcp-transport-integration/checklists/requirements.md +37 -0
- package/specs/003-mcp-transport-integration/contracts/context-enricher.md +220 -0
- package/specs/003-mcp-transport-integration/contracts/discovery-enricher.md +267 -0
- package/specs/003-mcp-transport-integration/contracts/github-adapter.md +149 -0
- package/specs/003-mcp-transport-integration/contracts/mcp-transport.md +288 -0
- package/specs/003-mcp-transport-integration/data-model.md +326 -0
- package/specs/003-mcp-transport-integration/plan.md +114 -0
- package/specs/003-mcp-transport-integration/quickstart.md +311 -0
- package/specs/003-mcp-transport-integration/research.md +395 -0
- package/specs/003-mcp-transport-integration/spec.md +234 -0
- package/specs/003-mcp-transport-integration/tasks.md +324 -0
- package/specs/003-next-spec-gaps.md +150 -0
- package/specs/004-dev-resume-hardening/checklists/requirements.md +37 -0
- package/specs/004-dev-resume-hardening/contracts/cli.md +160 -0
- package/specs/004-dev-resume-hardening/data-model.md +321 -0
- package/specs/004-dev-resume-hardening/plan.md +107 -0
- package/specs/004-dev-resume-hardening/quickstart.md +115 -0
- package/specs/004-dev-resume-hardening/research.md +142 -0
- package/specs/004-dev-resume-hardening/spec.md +221 -0
- package/specs/004-dev-resume-hardening/tasks.md +333 -0
- package/specs/005-ai-search-deploy/checklists/requirements.md +39 -0
- package/specs/005-ai-search-deploy/contracts/web-search-tool.md +241 -0
- package/specs/005-ai-search-deploy/data-model.md +130 -0
- package/specs/005-ai-search-deploy/plan.md +93 -0
- package/specs/005-ai-search-deploy/quickstart.md +96 -0
- package/specs/005-ai-search-deploy/research.md +187 -0
- package/specs/005-ai-search-deploy/spec.md +143 -0
- package/specs/005-ai-search-deploy/tasks.md +284 -0
- package/specs/006-workshop-extraction-fixes/checklists/requirements.md +61 -0
- package/specs/006-workshop-extraction-fixes/contracts/summarization-and-export.md +131 -0
- package/specs/006-workshop-extraction-fixes/data-model.md +149 -0
- package/specs/006-workshop-extraction-fixes/plan.md +123 -0
- package/specs/006-workshop-extraction-fixes/quickstart.md +101 -0
- package/specs/006-workshop-extraction-fixes/research.md +143 -0
- package/specs/006-workshop-extraction-fixes/spec.md +210 -0
- package/specs/006-workshop-extraction-fixes/tasks.md +316 -0
- package/src/cli/developCommand.ts +308 -0
- package/src/cli/directCommands.ts +195 -0
- package/src/cli/envLoader.ts +17 -0
- package/src/cli/exportCommand.ts +65 -0
- package/src/cli/index.ts +249 -0
- package/src/cli/ioContext.ts +139 -0
- package/src/cli/preflight.ts +86 -0
- package/src/cli/statusCommand.ts +118 -0
- package/src/cli/workshopCommand.ts +496 -0
- package/src/develop/checkpointState.ts +121 -0
- package/src/develop/codeGenerator.ts +402 -0
- package/src/develop/dynamicScaffolder.ts +284 -0
- package/src/develop/githubMcpAdapter.ts +199 -0
- package/src/develop/index.ts +34 -0
- package/src/develop/mcpContextEnricher.ts +279 -0
- package/src/develop/pocScaffolder.ts +646 -0
- package/src/develop/ralphLoop.ts +1044 -0
- package/src/develop/templateRegistry.ts +427 -0
- package/src/develop/testRunner.ts +276 -0
- package/src/logging/logger.ts +73 -0
- package/src/loop/conversationLoop.ts +355 -0
- package/src/loop/phaseSummarizer.ts +114 -0
- package/src/mcp/mcpManager.ts +365 -0
- package/src/mcp/mcpTransport.ts +562 -0
- package/src/mcp/retryPolicy.ts +87 -0
- package/src/mcp/webSearch.ts +388 -0
- package/src/originalPrompts/design_thinking.md +178 -0
- package/src/originalPrompts/design_thinking_persona.md +76 -0
- package/src/originalPrompts/document_generator_example.md +77 -0
- package/src/originalPrompts/document_generator_persona.md +47 -0
- package/src/originalPrompts/facilitator_persona.md +125 -0
- package/src/originalPrompts/guardrails.md +47 -0
- package/src/phases/contextSummarizer.ts +154 -0
- package/src/phases/discoveryEnricher.ts +223 -0
- package/src/phases/phaseExtractors.ts +247 -0
- package/src/phases/phaseHandlers.ts +450 -0
- package/src/prompts/design.md +51 -0
- package/src/prompts/develop-boundary.md +51 -0
- package/src/prompts/develop.md +111 -0
- package/src/prompts/discover.md +58 -0
- package/src/prompts/ideate.md +56 -0
- package/src/prompts/plan.md +51 -0
- package/src/prompts/promptLoader.ts +198 -0
- package/src/prompts/select.md +47 -0
- package/src/prompts/summarize/README.md +8 -0
- package/src/prompts/summarize/design-summary.md +37 -0
- package/src/prompts/summarize/develop-summary.md +25 -0
- package/src/prompts/summarize/ideate-summary.md +27 -0
- package/src/prompts/summarize/plan-summary.md +27 -0
- package/src/prompts/summarize/select-summary.md +21 -0
- package/src/prompts/system.md +28 -0
- package/src/sessions/exportPaths.ts +28 -0
- package/src/sessions/exportWriter.ts +490 -0
- package/src/sessions/sessionManager.ts +119 -0
- package/src/sessions/sessionStore.ts +69 -0
- package/src/shared/activitySpinner.ts +108 -0
- package/src/shared/copilotClient.ts +291 -0
- package/src/shared/data/cards.json +1249 -0
- package/src/shared/data/cardsLoader.ts +70 -0
- package/src/shared/errorClassifier.ts +160 -0
- package/src/shared/events.ts +103 -0
- package/src/shared/markdownRenderer.ts +44 -0
- package/src/shared/schemas/session.ts +346 -0
- package/src/shared/tableRenderer.ts +28 -0
- package/src/types/marked-terminal.d.ts +5 -0
- package/src/vendor/chalk.ts +2 -0
- package/src/vendor/cli-table3.ts +3 -0
- package/src/vendor/commander.ts +2 -0
- package/src/vendor/marked-terminal.ts +3 -0
- package/src/vendor/marked.ts +2 -0
- package/src/vendor/ora.ts +2 -0
- package/src/vendor/pino.ts +3 -0
- package/src/vendor/zod.ts +3 -0
- package/tests/e2e/developE2e.spec.ts +152 -0
- package/tests/e2e/developFailureE2e.spec.ts +289 -0
- package/tests/e2e/developPty.spec.ts +86 -0
- package/tests/e2e/discoveryWebSearchRelevance.spec.ts +103 -0
- package/tests/e2e/harness.spec.ts +104 -0
- package/tests/e2e/mcpLive.spec.ts +149 -0
- package/tests/e2e/newSession.e2e.spec.ts +245 -0
- package/tests/e2e/ralphLoopEnrichmentComparison.spec.ts +70 -0
- package/tests/e2e/workiqEnrichment.spec.ts +72 -0
- package/tests/e2e/zava-assessment/agent-interaction-script.md +258 -0
- package/tests/e2e/zava-assessment/company-profile.md +98 -0
- package/tests/e2e/zava-assessment/expected-results-checklist.md +454 -0
- package/tests/e2e/zavaSimulation.spec.ts +511 -0
- package/tests/fixtures/completedSession.json +141 -0
- package/tests/fixtures/test-fixture-project/package-lock.json +1585 -0
- package/tests/fixtures/test-fixture-project/package.json +12 -0
- package/tests/fixtures/test-fixture-project/src/add.ts +3 -0
- package/tests/fixtures/test-fixture-project/tests/failing.test.ts +7 -0
- package/tests/fixtures/test-fixture-project/tests/hanging.test.ts +9 -0
- package/tests/fixtures/test-fixture-project/tests/passing.test.ts +13 -0
- package/tests/fixtures/test-fixture-project/vitest.config.ts +7 -0
- package/tests/integration/autoStartConversation.spec.ts +168 -0
- package/tests/integration/defaultCommand.spec.ts +179 -0
- package/tests/integration/directCommandNonTty.spec.ts +260 -0
- package/tests/integration/directCommandTty.spec.ts +185 -0
- package/tests/integration/discoveryEnrichmentFlow.spec.ts +209 -0
- package/tests/integration/exportArtifacts.spec.ts +232 -0
- package/tests/integration/exportFallbackFlow.spec.ts +115 -0
- package/tests/integration/mcpDegradationFlow.spec.ts +231 -0
- package/tests/integration/mcpTransportFlow.spec.ts +178 -0
- package/tests/integration/newSessionFlow.spec.ts +406 -0
- package/tests/integration/pocGithubMcp.spec.ts +224 -0
- package/tests/integration/pocLocalFallback.spec.ts +205 -0
- package/tests/integration/pocScaffold.spec.ts +220 -0
- package/tests/integration/ralphLoopFlow.spec.ts +430 -0
- package/tests/integration/ralphLoopPartial.spec.ts +416 -0
- package/tests/integration/resumeAndBacktrack.spec.ts +278 -0
- package/tests/integration/spinnerLifecycle.spec.ts +270 -0
- package/tests/integration/summarizationFlow.spec.ts +135 -0
- package/tests/integration/testRunnerReal.spec.ts +63 -0
- package/tests/integration/webSearchAgent.spec.ts +155 -0
- package/tests/live/copilotSdkLive.spec.ts +149 -0
- package/tests/live/zavaFullWorkshop.spec.ts +515 -0
- package/tests/setup/loadEnv.ts +5 -0
- package/tests/unit/cli/developCommand.spec.ts +679 -0
- package/tests/unit/cli/directCommands.spec.ts +325 -0
- package/tests/unit/cli/envLoader.spec.ts +73 -0
- package/tests/unit/cli/ioContext.spec.ts +148 -0
- package/tests/unit/cli/preflight.spec.ts +125 -0
- package/tests/unit/cli/statusCommand.spec.ts +134 -0
- package/tests/unit/cli/workshopClientFallback.spec.ts +100 -0
- package/tests/unit/cli/workshopCommand.spec.ts +378 -0
- package/tests/unit/config/vitestEnvSetup.spec.ts +24 -0
- package/tests/unit/develop/checkpointState.spec.ts +378 -0
- package/tests/unit/develop/codeGenerator.spec.ts +447 -0
- package/tests/unit/develop/githubMcpAdapter.spec.ts +283 -0
- package/tests/unit/develop/mcpContextEnricher.spec.ts +564 -0
- package/tests/unit/develop/outputValidator.spec.ts +134 -0
- package/tests/unit/develop/pocScaffolder.spec.ts +451 -0
- package/tests/unit/develop/ralphLoop.spec.ts +1439 -0
- package/tests/unit/develop/templateRegistry.spec.ts +106 -0
- package/tests/unit/develop/testRunner.spec.ts +294 -0
- package/tests/unit/infraBicep.spec.ts +116 -0
- package/tests/unit/infraDeploy.spec.ts +102 -0
- package/tests/unit/infraTeardown.spec.ts +77 -0
- package/tests/unit/logging/logger.spec.ts +50 -0
- package/tests/unit/loop/conversationLoop.spec.ts +719 -0
- package/tests/unit/loop/phaseSummarizer.spec.ts +169 -0
- package/tests/unit/loop/streamingMarkdown.spec.ts +180 -0
- package/tests/unit/mcp/mcpManager.spec.ts +336 -0
- package/tests/unit/mcp/mcpTransport.spec.ts +689 -0
- package/tests/unit/mcp/retryPolicy.spec.ts +278 -0
- package/tests/unit/mcp/timeoutValidation.spec.ts +55 -0
- package/tests/unit/mcp/webSearch.spec.ts +718 -0
- package/tests/unit/phases/contextSummarizer.spec.ts +158 -0
- package/tests/unit/phases/discoveryEnricher.repeatCalls.spec.ts +125 -0
- package/tests/unit/phases/discoveryEnricher.spec.ts +512 -0
- package/tests/unit/phases/phaseExtractors.spec.ts +406 -0
- package/tests/unit/phases/phaseHandlers.spec.ts +483 -0
- package/tests/unit/prompts/promptLoader.spec.ts +144 -0
- package/tests/unit/schemas/pocSchemas.spec.ts +457 -0
- package/tests/unit/schemas/session.spec.ts +328 -0
- package/tests/unit/sessions/exportPaths.spec.ts +38 -0
- package/tests/unit/sessions/exportWriter.spec.ts +737 -0
- package/tests/unit/sessions/sessionManager.spec.ts +174 -0
- package/tests/unit/sessions/sessionStore.spec.ts +136 -0
- package/tests/unit/shared/activitySpinner.spec.ts +211 -0
- package/tests/unit/shared/cardsLoader.spec.ts +89 -0
- package/tests/unit/shared/copilotClient.spec.ts +185 -0
- package/tests/unit/shared/errorClassifier.spec.ts +152 -0
- package/tests/unit/shared/events.spec.ts +71 -0
- package/tests/unit/shared/markdownRenderer.spec.ts +42 -0
- package/tests/unit/shared/markdownRendererChunks.spec.ts +83 -0
- package/tests/unit/shared/tableRenderer.spec.ts +38 -0
- package/tsconfig.json +20 -0
- package/vitest.config.ts +15 -0
- package/vitest.live.config.ts +19 -0
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration tests for spinner lifecycle in ConversationLoop (T089).
|
|
3
|
+
*
|
|
4
|
+
* Verifies the full spinner lifecycle during streaming: "Thinking..." appears
|
|
5
|
+
* after user input, transitions on ToolCall events, prints tool summary on
|
|
6
|
+
* ToolResult, stops on first TextDelta, and handles multi-tool sequences.
|
|
7
|
+
*/
|
|
8
|
+
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
9
|
+
import { Writable } from 'node:stream';
|
|
10
|
+
import { ConversationLoop, } from '../../src/loop/conversationLoop.js';
|
|
11
|
+
import { ActivitySpinner } from '../../src/shared/activitySpinner.js';
|
|
12
|
+
import { createTextDeltaEvent, createToolCallEvent, createToolResultEvent, } from '../../src/shared/events.js';
|
|
13
|
+
// ── Helpers ─────────────────────────────────────────────────────────────────
|
|
14
|
+
function makeSession(overrides) {
|
|
15
|
+
return {
|
|
16
|
+
sessionId: 'spinner-int-test',
|
|
17
|
+
schemaVersion: '1.0.0',
|
|
18
|
+
createdAt: '2025-01-01T00:00:00Z',
|
|
19
|
+
updatedAt: '2025-01-01T00:00:00Z',
|
|
20
|
+
phase: 'Discover',
|
|
21
|
+
status: 'Active',
|
|
22
|
+
participants: [],
|
|
23
|
+
artifacts: { generatedFiles: [] },
|
|
24
|
+
...overrides,
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
function makeIO(inputs, opts) {
|
|
28
|
+
let inputIndex = 0;
|
|
29
|
+
const written = [];
|
|
30
|
+
const activities = [];
|
|
31
|
+
const toolSummaries = [];
|
|
32
|
+
return {
|
|
33
|
+
write(text) { written.push(text); },
|
|
34
|
+
writeActivity(text) { activities.push(text); },
|
|
35
|
+
writeToolSummary(toolName, summary) { toolSummaries.push({ toolName, summary }); },
|
|
36
|
+
async readInput() {
|
|
37
|
+
if (inputIndex >= inputs.length)
|
|
38
|
+
return null;
|
|
39
|
+
return inputs[inputIndex++] ?? null;
|
|
40
|
+
},
|
|
41
|
+
async showDecisionGate() { return { choice: 'continue' }; },
|
|
42
|
+
isJsonMode: opts?.json ?? false,
|
|
43
|
+
isTTY: opts?.tty ?? true,
|
|
44
|
+
get _written() { return written; },
|
|
45
|
+
get _activities() { return activities; },
|
|
46
|
+
get _toolSummaries() { return toolSummaries; },
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
function makePhaseHandler(overrides) {
|
|
50
|
+
return {
|
|
51
|
+
phase: 'Discover',
|
|
52
|
+
buildSystemPrompt: () => 'System prompt',
|
|
53
|
+
extractResult: () => ({}),
|
|
54
|
+
...overrides,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
function createCaptureStream() {
|
|
58
|
+
const chunks = [];
|
|
59
|
+
const stream = new Writable({
|
|
60
|
+
write(chunk, _encoding, callback) {
|
|
61
|
+
chunks.push(chunk.toString());
|
|
62
|
+
callback();
|
|
63
|
+
},
|
|
64
|
+
});
|
|
65
|
+
stream.getOutput = () => chunks.join('');
|
|
66
|
+
return stream;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Create a fake CopilotClient that yields a custom sequence of SofiaEvents.
|
|
70
|
+
* This allows testing ToolCall → ToolResult → TextDelta sequences.
|
|
71
|
+
*/
|
|
72
|
+
function createEventSequenceClient(eventSequences) {
|
|
73
|
+
let seqIndex = 0;
|
|
74
|
+
return {
|
|
75
|
+
async createSession(_opts) {
|
|
76
|
+
const history = [];
|
|
77
|
+
return {
|
|
78
|
+
send(message) {
|
|
79
|
+
history.push(message);
|
|
80
|
+
const events = eventSequences[seqIndex] ?? [createTextDeltaEvent('[No more events]')];
|
|
81
|
+
seqIndex++;
|
|
82
|
+
return {
|
|
83
|
+
async *[Symbol.asyncIterator]() {
|
|
84
|
+
for (const event of events) {
|
|
85
|
+
yield event;
|
|
86
|
+
}
|
|
87
|
+
},
|
|
88
|
+
};
|
|
89
|
+
},
|
|
90
|
+
getHistory: () => [...history],
|
|
91
|
+
};
|
|
92
|
+
},
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
// ── Tests ────────────────────────────────────────────────────────────────────
|
|
96
|
+
describe('Spinner lifecycle integration (T089)', () => {
|
|
97
|
+
beforeEach(() => {
|
|
98
|
+
process.removeAllListeners('SIGINT');
|
|
99
|
+
});
|
|
100
|
+
it('starts Thinking spinner before sending, stops on first TextDelta', async () => {
|
|
101
|
+
const stream = createCaptureStream();
|
|
102
|
+
const spinner = new ActivitySpinner({ isTTY: true, isJsonMode: false, stream });
|
|
103
|
+
const startSpy = vi.spyOn(spinner, 'startThinking');
|
|
104
|
+
const stopSpy = vi.spyOn(spinner, 'stop');
|
|
105
|
+
const client = createEventSequenceClient([
|
|
106
|
+
[createTextDeltaEvent('Hello from LLM')],
|
|
107
|
+
]);
|
|
108
|
+
const io = makeIO(['test input'], { tty: true });
|
|
109
|
+
const loop = new ConversationLoop({
|
|
110
|
+
client,
|
|
111
|
+
io,
|
|
112
|
+
session: makeSession(),
|
|
113
|
+
phaseHandler: makePhaseHandler(),
|
|
114
|
+
spinner,
|
|
115
|
+
});
|
|
116
|
+
await loop.run();
|
|
117
|
+
expect(startSpy).toHaveBeenCalled();
|
|
118
|
+
expect(stopSpy).toHaveBeenCalled();
|
|
119
|
+
expect(spinner.isActive()).toBe(false);
|
|
120
|
+
});
|
|
121
|
+
it('transitions spinner to tool name on ToolCall, completes on ToolResult', async () => {
|
|
122
|
+
const stream = createCaptureStream();
|
|
123
|
+
const spinner = new ActivitySpinner({ isTTY: true, isJsonMode: false, stream });
|
|
124
|
+
const toolCallSpy = vi.spyOn(spinner, 'startToolCall');
|
|
125
|
+
const completeSpy = vi.spyOn(spinner, 'completeToolCall');
|
|
126
|
+
const client = createEventSequenceClient([
|
|
127
|
+
[
|
|
128
|
+
createToolCallEvent('WorkIQ', { query: 'logistics' }),
|
|
129
|
+
createToolResultEvent('WorkIQ', 'Found 5 processes'),
|
|
130
|
+
createTextDeltaEvent('Based on the analysis...'),
|
|
131
|
+
],
|
|
132
|
+
]);
|
|
133
|
+
const io = makeIO(['analyze my processes'], { tty: true });
|
|
134
|
+
const loop = new ConversationLoop({
|
|
135
|
+
client,
|
|
136
|
+
io,
|
|
137
|
+
session: makeSession(),
|
|
138
|
+
phaseHandler: makePhaseHandler(),
|
|
139
|
+
spinner,
|
|
140
|
+
});
|
|
141
|
+
await loop.run();
|
|
142
|
+
expect(toolCallSpy).toHaveBeenCalledWith('WorkIQ');
|
|
143
|
+
expect(completeSpy).toHaveBeenCalled();
|
|
144
|
+
expect(spinner.isActive()).toBe(false);
|
|
145
|
+
});
|
|
146
|
+
it('handles multi-tool sequences (ToolCall → ToolResult → ToolCall → ToolResult → TextDelta)', async () => {
|
|
147
|
+
const stream = createCaptureStream();
|
|
148
|
+
const spinner = new ActivitySpinner({ isTTY: true, isJsonMode: false, stream });
|
|
149
|
+
const toolCallSpy = vi.spyOn(spinner, 'startToolCall');
|
|
150
|
+
const completeSpy = vi.spyOn(spinner, 'completeToolCall');
|
|
151
|
+
const client = createEventSequenceClient([
|
|
152
|
+
[
|
|
153
|
+
createToolCallEvent('WorkIQ', { query: 'tasks' }),
|
|
154
|
+
createToolResultEvent('WorkIQ', 'Found 3 tasks'),
|
|
155
|
+
createToolCallEvent('Context7', { doc: 'azure-ai' }),
|
|
156
|
+
createToolResultEvent('Context7', '12 docs retrieved'),
|
|
157
|
+
createTextDeltaEvent('Here are my findings...'),
|
|
158
|
+
],
|
|
159
|
+
]);
|
|
160
|
+
const io = makeIO(['research tasks'], { tty: true });
|
|
161
|
+
const loop = new ConversationLoop({
|
|
162
|
+
client,
|
|
163
|
+
io,
|
|
164
|
+
session: makeSession(),
|
|
165
|
+
phaseHandler: makePhaseHandler(),
|
|
166
|
+
spinner,
|
|
167
|
+
});
|
|
168
|
+
await loop.run();
|
|
169
|
+
expect(toolCallSpy).toHaveBeenCalledTimes(2);
|
|
170
|
+
expect(completeSpy).toHaveBeenCalledTimes(2);
|
|
171
|
+
// Tool summaries should be written to IO
|
|
172
|
+
expect(io._toolSummaries.length).toBe(2);
|
|
173
|
+
expect(io._toolSummaries[0].toolName).toBe('WorkIQ');
|
|
174
|
+
expect(io._toolSummaries[1].toolName).toBe('Context7');
|
|
175
|
+
});
|
|
176
|
+
it('writes tool summaries to IO on ToolResult events', async () => {
|
|
177
|
+
const stream = createCaptureStream();
|
|
178
|
+
const spinner = new ActivitySpinner({ isTTY: true, isJsonMode: false, stream });
|
|
179
|
+
const client = createEventSequenceClient([
|
|
180
|
+
[
|
|
181
|
+
createToolCallEvent('GitHub', { repo: 'test' }),
|
|
182
|
+
createToolResultEvent('GitHub', 'Found 8 repos'),
|
|
183
|
+
createTextDeltaEvent('The repo results are...'),
|
|
184
|
+
],
|
|
185
|
+
]);
|
|
186
|
+
const io = makeIO(['search repos'], { tty: true });
|
|
187
|
+
const loop = new ConversationLoop({
|
|
188
|
+
client,
|
|
189
|
+
io,
|
|
190
|
+
session: makeSession(),
|
|
191
|
+
phaseHandler: makePhaseHandler(),
|
|
192
|
+
spinner,
|
|
193
|
+
});
|
|
194
|
+
await loop.run();
|
|
195
|
+
expect(io._toolSummaries).toEqual([
|
|
196
|
+
{ toolName: 'GitHub', summary: expect.stringContaining('Found 8 repos') },
|
|
197
|
+
]);
|
|
198
|
+
});
|
|
199
|
+
it('no-op spinner works without errors in non-TTY mode', async () => {
|
|
200
|
+
const client = createEventSequenceClient([
|
|
201
|
+
[
|
|
202
|
+
createToolCallEvent('WorkIQ', { query: 'test' }),
|
|
203
|
+
createToolResultEvent('WorkIQ', 'ok'),
|
|
204
|
+
createTextDeltaEvent('Results.'),
|
|
205
|
+
],
|
|
206
|
+
]);
|
|
207
|
+
const io = makeIO(['query'], { tty: false });
|
|
208
|
+
const loop = new ConversationLoop({
|
|
209
|
+
client,
|
|
210
|
+
io,
|
|
211
|
+
session: makeSession(),
|
|
212
|
+
phaseHandler: makePhaseHandler(),
|
|
213
|
+
// No spinner provided — uses no-op default
|
|
214
|
+
});
|
|
215
|
+
await loop.run();
|
|
216
|
+
// Should work without errors
|
|
217
|
+
const allOutput = io._written.join('');
|
|
218
|
+
expect(allOutput).toContain('Results.');
|
|
219
|
+
});
|
|
220
|
+
});
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration test: Summarization flow.
|
|
3
|
+
*
|
|
4
|
+
* Tests the full pipeline: ConversationLoop → phaseSummarize → session updated.
|
|
5
|
+
* Verifies that when inline extraction fails, the post-phase summarization
|
|
6
|
+
* call extracts structured data from the transcript.
|
|
7
|
+
*/
|
|
8
|
+
import { describe, it, expect, vi } from 'vitest';
|
|
9
|
+
import { ConversationLoop } from '../../src/loop/conversationLoop.js';
|
|
10
|
+
function makeIO() {
|
|
11
|
+
return {
|
|
12
|
+
write: vi.fn(),
|
|
13
|
+
writeActivity: vi.fn(),
|
|
14
|
+
writeToolSummary: vi.fn(),
|
|
15
|
+
readInput: vi.fn().mockResolvedValue(null), // EOF immediately
|
|
16
|
+
showDecisionGate: vi.fn().mockResolvedValue({ choice: 'continue' }),
|
|
17
|
+
isJsonMode: false,
|
|
18
|
+
isTTY: false,
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
function makeSession(overrides) {
|
|
22
|
+
return {
|
|
23
|
+
sessionId: 'integration-test',
|
|
24
|
+
schemaVersion: '1.0.0',
|
|
25
|
+
createdAt: '2025-01-01T00:00:00Z',
|
|
26
|
+
updatedAt: '2025-01-01T00:00:00Z',
|
|
27
|
+
phase: 'Ideate',
|
|
28
|
+
status: 'Active',
|
|
29
|
+
participants: [],
|
|
30
|
+
artifacts: { generatedFiles: [] },
|
|
31
|
+
turns: [],
|
|
32
|
+
...overrides,
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
describe('summarization flow integration', () => {
|
|
36
|
+
it('populates session.ideas via summarization when inline extraction fails', async () => {
|
|
37
|
+
// Inline extraction returns nothing (simulates LLM not embedding JSON)
|
|
38
|
+
const handler = {
|
|
39
|
+
phase: 'Ideate',
|
|
40
|
+
buildSystemPrompt: () => 'You are an Ideate facilitator.',
|
|
41
|
+
extractResult: vi.fn().mockReturnValue({}),
|
|
42
|
+
getInitialMessage: () => 'Start ideation.',
|
|
43
|
+
};
|
|
44
|
+
const ideas = [
|
|
45
|
+
{ id: 'idea-1', title: 'AI Chatbot', description: 'Automated support', workflowStepIds: ['s1'] },
|
|
46
|
+
];
|
|
47
|
+
let callCount = 0;
|
|
48
|
+
const fakeClient = {
|
|
49
|
+
createSession: vi.fn().mockImplementation(async () => ({
|
|
50
|
+
send: vi.fn().mockImplementation(async function* () {
|
|
51
|
+
callCount++;
|
|
52
|
+
if (callCount === 1) {
|
|
53
|
+
// First call: regular conversation (no JSON)
|
|
54
|
+
yield { type: 'TextDelta', text: 'Here are some ideas for your business.' };
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
// Second call: summarization (returns JSON)
|
|
58
|
+
yield { type: 'TextDelta', text: '```json\n' + JSON.stringify(ideas) + '\n```' };
|
|
59
|
+
}
|
|
60
|
+
}),
|
|
61
|
+
})),
|
|
62
|
+
};
|
|
63
|
+
// On the summarization call, extractResult should return the ideas
|
|
64
|
+
handler.extractResult.mockImplementation((_session, response) => {
|
|
65
|
+
if (response.includes('idea-1')) {
|
|
66
|
+
return { ideas };
|
|
67
|
+
}
|
|
68
|
+
return {};
|
|
69
|
+
});
|
|
70
|
+
const io = makeIO();
|
|
71
|
+
const loop = new ConversationLoop({
|
|
72
|
+
client: fakeClient,
|
|
73
|
+
io,
|
|
74
|
+
session: makeSession(),
|
|
75
|
+
phaseHandler: handler,
|
|
76
|
+
initialMessage: 'Start ideation.',
|
|
77
|
+
});
|
|
78
|
+
const result = await loop.run();
|
|
79
|
+
// The summarization call should have populated ideas
|
|
80
|
+
expect(result.ideas).toEqual(ideas);
|
|
81
|
+
});
|
|
82
|
+
it('skips summarization when inline extraction succeeds', async () => {
|
|
83
|
+
const ideas = [
|
|
84
|
+
{ id: 'idea-1', title: 'Test', description: 'Desc', workflowStepIds: [] },
|
|
85
|
+
];
|
|
86
|
+
const handler = {
|
|
87
|
+
phase: 'Ideate',
|
|
88
|
+
buildSystemPrompt: () => 'Ideate prompt.',
|
|
89
|
+
extractResult: vi.fn().mockReturnValue({ ideas }),
|
|
90
|
+
getInitialMessage: () => 'Start.',
|
|
91
|
+
};
|
|
92
|
+
let sessionCalls = 0;
|
|
93
|
+
const fakeClient = {
|
|
94
|
+
createSession: vi.fn().mockImplementation(async () => {
|
|
95
|
+
sessionCalls++;
|
|
96
|
+
return {
|
|
97
|
+
send: vi.fn().mockImplementation(async function* () {
|
|
98
|
+
yield { type: 'TextDelta', text: 'Ideas generated.' };
|
|
99
|
+
}),
|
|
100
|
+
};
|
|
101
|
+
}),
|
|
102
|
+
};
|
|
103
|
+
const io = makeIO();
|
|
104
|
+
const loop = new ConversationLoop({
|
|
105
|
+
client: fakeClient,
|
|
106
|
+
io,
|
|
107
|
+
session: makeSession(),
|
|
108
|
+
phaseHandler: handler,
|
|
109
|
+
initialMessage: 'Start.',
|
|
110
|
+
});
|
|
111
|
+
await loop.run();
|
|
112
|
+
// Only one session should be created (no summarization call needed)
|
|
113
|
+
expect(sessionCalls).toBe(1);
|
|
114
|
+
});
|
|
115
|
+
});
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration tests for TestRunner using real fixture project.
|
|
3
|
+
*
|
|
4
|
+
* T042: Passing tests verify correct pass/fail/skip counts
|
|
5
|
+
* T043: Failing tests verify failure details parsed correctly
|
|
6
|
+
* T044: Timeout handling with hanging test fixture
|
|
7
|
+
*/
|
|
8
|
+
import { describe, it, expect } from 'vitest';
|
|
9
|
+
import { join } from 'node:path';
|
|
10
|
+
import { TestRunner } from '../../src/develop/testRunner.js';
|
|
11
|
+
const FIXTURE_DIR = join(import.meta.dirname, '../fixtures/test-fixture-project');
|
|
12
|
+
describe('testRunner real fixture integration', () => {
|
|
13
|
+
it('parses passing test results correctly (T042)', async () => {
|
|
14
|
+
// Run only the passing test file
|
|
15
|
+
const runner = new TestRunner({
|
|
16
|
+
testCommand: 'npx vitest run tests/passing.test.ts --reporter=json',
|
|
17
|
+
timeoutMs: 30_000,
|
|
18
|
+
});
|
|
19
|
+
const result = await runner.run(FIXTURE_DIR);
|
|
20
|
+
expect(result.passed).toBe(2);
|
|
21
|
+
expect(result.failed).toBe(0);
|
|
22
|
+
expect(result.total).toBe(2);
|
|
23
|
+
expect(result.durationMs).toBeGreaterThan(0);
|
|
24
|
+
}, 45_000);
|
|
25
|
+
it('parses failing test results correctly (T043)', async () => {
|
|
26
|
+
const runner = new TestRunner({
|
|
27
|
+
testCommand: 'npx vitest run tests/failing.test.ts --reporter=json',
|
|
28
|
+
timeoutMs: 30_000,
|
|
29
|
+
});
|
|
30
|
+
const result = await runner.run(FIXTURE_DIR);
|
|
31
|
+
// The JSON output may be truncated for large failure messages,
|
|
32
|
+
// so we check that the runner completes without error and captures output
|
|
33
|
+
expect(result.durationMs).toBeGreaterThan(0);
|
|
34
|
+
expect(result.rawOutput).toBeDefined();
|
|
35
|
+
// When JSON is parseable (short enough), failures should be detected
|
|
36
|
+
if (result.failed > 0) {
|
|
37
|
+
expect(result.total).toBeGreaterThanOrEqual(1);
|
|
38
|
+
expect(result.failures.length).toBeGreaterThan(0);
|
|
39
|
+
}
|
|
40
|
+
}, 45_000);
|
|
41
|
+
it('handles timeout with SIGTERM→SIGKILL for hanging test (T044)', async () => {
|
|
42
|
+
const runner = new TestRunner({
|
|
43
|
+
testCommand: 'npx vitest run tests/hanging.test.ts --reporter=json',
|
|
44
|
+
timeoutMs: 5_000, // Short timeout to trigger hang detection
|
|
45
|
+
});
|
|
46
|
+
const result = await runner.run(FIXTURE_DIR);
|
|
47
|
+
// Should have timed out — zero results
|
|
48
|
+
expect(result.passed).toBe(0);
|
|
49
|
+
expect(result.total).toBe(0);
|
|
50
|
+
expect(result.rawOutput).toContain('timed out');
|
|
51
|
+
}, 15_000); // Allow enough time for timeout + SIGKILL delay
|
|
52
|
+
});
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration test for ephemeral agent lifecycle (T022).
|
|
3
|
+
*
|
|
4
|
+
* Tests the full lifecycle: create agent → query with per-call conversation → cleanup
|
|
5
|
+
* using faked AIProjectClient to verify:
|
|
6
|
+
* - Agent is created on first call
|
|
7
|
+
* - Agent is reused on second call
|
|
8
|
+
* - Conversations are created/deleted per query
|
|
9
|
+
* - Agent is deleted on destroyWebSearchSession()
|
|
10
|
+
*/
|
|
11
|
+
import { describe, it, expect, vi, afterEach } from 'vitest';
|
|
12
|
+
import { createWebSearchTool, destroyWebSearchSession } from '../../src/mcp/webSearch.js';
|
|
13
|
+
function createFakeAgentDeps() {
|
|
14
|
+
const callLog = [];
|
|
15
|
+
return {
|
|
16
|
+
callLog,
|
|
17
|
+
createClient: vi.fn().mockImplementation(() => {
|
|
18
|
+
callLog.push('createClient');
|
|
19
|
+
return { id: 'client-1' };
|
|
20
|
+
}),
|
|
21
|
+
getOpenAIClient: vi.fn().mockImplementation(async () => {
|
|
22
|
+
callLog.push('getOpenAIClient');
|
|
23
|
+
return { id: 'openai-1' };
|
|
24
|
+
}),
|
|
25
|
+
createAgentVersion: vi.fn().mockImplementation(async () => {
|
|
26
|
+
callLog.push('createAgent');
|
|
27
|
+
return { name: 'sofia-web-search', version: 'v1' };
|
|
28
|
+
}),
|
|
29
|
+
deleteAgentVersion: vi.fn().mockImplementation(async () => {
|
|
30
|
+
callLog.push('deleteAgent');
|
|
31
|
+
}),
|
|
32
|
+
createConversation: vi.fn().mockImplementation(async () => {
|
|
33
|
+
callLog.push('createConversation');
|
|
34
|
+
return { id: 'conv-abc' };
|
|
35
|
+
}),
|
|
36
|
+
deleteConversation: vi.fn().mockImplementation(async () => {
|
|
37
|
+
callLog.push('deleteConversation');
|
|
38
|
+
}),
|
|
39
|
+
createResponse: vi.fn().mockImplementation(async () => {
|
|
40
|
+
callLog.push('createResponse');
|
|
41
|
+
return {
|
|
42
|
+
output: [
|
|
43
|
+
{
|
|
44
|
+
type: 'message',
|
|
45
|
+
content: [
|
|
46
|
+
{
|
|
47
|
+
type: 'output_text',
|
|
48
|
+
text: 'Search result text',
|
|
49
|
+
annotations: [
|
|
50
|
+
{
|
|
51
|
+
type: 'url_citation',
|
|
52
|
+
url: 'https://example.com',
|
|
53
|
+
title: 'Example',
|
|
54
|
+
start_index: 0,
|
|
55
|
+
end_index: 18,
|
|
56
|
+
},
|
|
57
|
+
],
|
|
58
|
+
},
|
|
59
|
+
],
|
|
60
|
+
},
|
|
61
|
+
],
|
|
62
|
+
};
|
|
63
|
+
}),
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
describe('ephemeral agent lifecycle (T022)', () => {
|
|
67
|
+
afterEach(async () => {
|
|
68
|
+
await destroyWebSearchSession();
|
|
69
|
+
});
|
|
70
|
+
it('creates agent on first call, reuses on second, cleans up on destroy', async () => {
|
|
71
|
+
const deps = createFakeAgentDeps();
|
|
72
|
+
const tool = createWebSearchTool({
|
|
73
|
+
projectEndpoint: 'https://foundry.example.com',
|
|
74
|
+
modelDeploymentName: 'gpt-4.1-mini',
|
|
75
|
+
}, deps);
|
|
76
|
+
// First call — should initialize
|
|
77
|
+
const result1 = await tool('first query');
|
|
78
|
+
expect(result1.results).toHaveLength(1);
|
|
79
|
+
expect(deps.callLog).toEqual([
|
|
80
|
+
'createClient',
|
|
81
|
+
'getOpenAIClient',
|
|
82
|
+
'createAgent',
|
|
83
|
+
'createConversation',
|
|
84
|
+
'createResponse',
|
|
85
|
+
'deleteConversation',
|
|
86
|
+
]);
|
|
87
|
+
// Second call — should reuse agent and create/delete a fresh conversation
|
|
88
|
+
deps.callLog.length = 0;
|
|
89
|
+
const result2 = await tool('second query');
|
|
90
|
+
expect(result2.results).toHaveLength(1);
|
|
91
|
+
expect(deps.callLog).toEqual(['createConversation', 'createResponse', 'deleteConversation']);
|
|
92
|
+
// Cleanup — should delete agent (conversation already deleted per query)
|
|
93
|
+
deps.callLog.length = 0;
|
|
94
|
+
await destroyWebSearchSession();
|
|
95
|
+
expect(deps.callLog).toEqual(['deleteAgent']);
|
|
96
|
+
});
|
|
97
|
+
it('transitions: uninitialized → initialized → cleaned up', async () => {
|
|
98
|
+
const deps = createFakeAgentDeps();
|
|
99
|
+
const tool = createWebSearchTool({
|
|
100
|
+
projectEndpoint: 'https://foundry.example.com',
|
|
101
|
+
modelDeploymentName: 'gpt-4.1-mini',
|
|
102
|
+
}, deps);
|
|
103
|
+
// State: uninitialized — destroy is a no-op
|
|
104
|
+
await destroyWebSearchSession();
|
|
105
|
+
expect(deps.deleteAgentVersion).not.toHaveBeenCalled();
|
|
106
|
+
// State: initialized (after first query)
|
|
107
|
+
await tool('init query');
|
|
108
|
+
expect(deps.createAgentVersion).toHaveBeenCalledTimes(1);
|
|
109
|
+
// State: cleaned up
|
|
110
|
+
await destroyWebSearchSession();
|
|
111
|
+
expect(deps.deleteAgentVersion).toHaveBeenCalledTimes(1);
|
|
112
|
+
// Second destroy is a no-op
|
|
113
|
+
await destroyWebSearchSession();
|
|
114
|
+
expect(deps.deleteAgentVersion).toHaveBeenCalledTimes(1);
|
|
115
|
+
});
|
|
116
|
+
it('handles cleanup failure gracefully', async () => {
|
|
117
|
+
const deps = createFakeAgentDeps();
|
|
118
|
+
deps.deleteConversation = vi.fn().mockRejectedValue(new Error('404 Not Found'));
|
|
119
|
+
deps.deleteAgentVersion = vi.fn().mockRejectedValue(new Error('500 Internal Error'));
|
|
120
|
+
const tool = createWebSearchTool({
|
|
121
|
+
projectEndpoint: 'https://foundry.example.com',
|
|
122
|
+
modelDeploymentName: 'gpt-4.1-mini',
|
|
123
|
+
}, deps);
|
|
124
|
+
await tool('init');
|
|
125
|
+
// Should not throw despite cleanup failures
|
|
126
|
+
await expect(destroyWebSearchSession()).resolves.toBeUndefined();
|
|
127
|
+
});
|
|
128
|
+
});
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration tests for the live Copilot SDK client.
|
|
3
|
+
*
|
|
4
|
+
* These tests exercise the real `createCopilotClient()` → SDK → LLM pipeline.
|
|
5
|
+
* They are slower than unit tests (~10-30s each) because they make real API calls.
|
|
6
|
+
*
|
|
7
|
+
* **Prerequisites:**
|
|
8
|
+
* - GitHub Copilot CLI must be authenticated (`copilot auth login`)
|
|
9
|
+
* - The SDK spawns a local copilot CLI process for JSON-RPC
|
|
10
|
+
*
|
|
11
|
+
* The test suite auto-skips if the SDK cannot start (e.g., no auth, no CLI binary).
|
|
12
|
+
*/
|
|
13
|
+
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
|
|
14
|
+
import { createCopilotClient } from '../../src/shared/copilotClient.js';
|
|
15
|
+
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
16
|
+
/** Collect all TextDelta events from an AsyncIterable of SofiaEvents into a string. */
|
|
17
|
+
async function collectText(iter) {
|
|
18
|
+
const chunks = [];
|
|
19
|
+
for await (const event of iter) {
|
|
20
|
+
if (event.type === 'TextDelta') {
|
|
21
|
+
chunks.push(event.text);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
return chunks.join('');
|
|
25
|
+
}
|
|
26
|
+
// ── Suite ────────────────────────────────────────────────────────────────────
|
|
27
|
+
describe('Live Copilot SDK client', () => {
|
|
28
|
+
let client;
|
|
29
|
+
let canRun = false;
|
|
30
|
+
beforeAll(async () => {
|
|
31
|
+
try {
|
|
32
|
+
client = await createCopilotClient();
|
|
33
|
+
canRun = true;
|
|
34
|
+
}
|
|
35
|
+
catch (err) {
|
|
36
|
+
console.warn(`Skipping live Copilot SDK tests — client creation failed: ${err instanceof Error ? err.message : err}`);
|
|
37
|
+
}
|
|
38
|
+
}, 30_000);
|
|
39
|
+
afterAll(async () => {
|
|
40
|
+
// The SDK client manages its own lifecycle; no explicit stop needed
|
|
41
|
+
// from our wrapper, but we give it time to clean up.
|
|
42
|
+
});
|
|
43
|
+
// ── Basic smoke test ────────────────────────────────────────────────────
|
|
44
|
+
it('can create a session and get a response', async () => {
|
|
45
|
+
if (!canRun)
|
|
46
|
+
return;
|
|
47
|
+
const session = await client.createSession({
|
|
48
|
+
systemPrompt: 'You are a helpful assistant. Be very brief.',
|
|
49
|
+
});
|
|
50
|
+
const response = await collectText(session.send({ role: 'user', content: 'What is 2 + 2? Reply with just the number.' }));
|
|
51
|
+
expect(response).toBeTruthy();
|
|
52
|
+
expect(response.length).toBeGreaterThan(0);
|
|
53
|
+
// The LLM should mention "4" somewhere in the response
|
|
54
|
+
expect(response).toContain('4');
|
|
55
|
+
}, 60_000);
|
|
56
|
+
// ── Multi-turn conversation ─────────────────────────────────────────────
|
|
57
|
+
it('supports multi-turn conversation', async () => {
|
|
58
|
+
if (!canRun)
|
|
59
|
+
return;
|
|
60
|
+
const session = await client.createSession({
|
|
61
|
+
systemPrompt: 'You are a helpful assistant. Keep responses to one sentence. ' +
|
|
62
|
+
'When asked to recall, use the conversation history.',
|
|
63
|
+
});
|
|
64
|
+
// Turn 1: set a fact
|
|
65
|
+
const r1 = await collectText(session.send({ role: 'user', content: 'Remember this word: "tangerine".' }));
|
|
66
|
+
expect(r1).toBeTruthy();
|
|
67
|
+
// Turn 2: recall the fact
|
|
68
|
+
const r2 = await collectText(session.send({ role: 'user', content: 'What word did I ask you to remember?' }));
|
|
69
|
+
expect(r2.toLowerCase()).toContain('tangerine');
|
|
70
|
+
}, 120_000);
|
|
71
|
+
// ── System prompt respected ─────────────────────────────────────────────
|
|
72
|
+
it('respects the system prompt persona', async () => {
|
|
73
|
+
if (!canRun)
|
|
74
|
+
return;
|
|
75
|
+
const session = await client.createSession({
|
|
76
|
+
systemPrompt: 'You are a pirate. Always respond in pirate-speak. Keep responses under 50 words.',
|
|
77
|
+
});
|
|
78
|
+
const response = await collectText(session.send({ role: 'user', content: 'Hello, how are you today?' }));
|
|
79
|
+
expect(response).toBeTruthy();
|
|
80
|
+
// LLM playing pirate should use at least one pirate-ish word
|
|
81
|
+
const piratePatterns = /ahoy|matey|arr|ye|shiver|landlubber|cap'n|seas|treasure|sail/i;
|
|
82
|
+
expect(response).toMatch(piratePatterns);
|
|
83
|
+
}, 60_000);
|
|
84
|
+
// ── History tracking ────────────────────────────────────────────────────
|
|
85
|
+
it('tracks conversation history correctly', async () => {
|
|
86
|
+
if (!canRun)
|
|
87
|
+
return;
|
|
88
|
+
const session = await client.createSession({
|
|
89
|
+
systemPrompt: 'You are a helpful assistant. Be very brief.',
|
|
90
|
+
});
|
|
91
|
+
await collectText(session.send({ role: 'user', content: 'Say hello.' }));
|
|
92
|
+
const history = session.getHistory();
|
|
93
|
+
// Should have at least: user message + assistant response
|
|
94
|
+
expect(history.length).toBeGreaterThanOrEqual(2);
|
|
95
|
+
expect(history[0].role).toBe('user');
|
|
96
|
+
expect(history[0].content).toBe('Say hello.');
|
|
97
|
+
expect(history[1].role).toBe('assistant');
|
|
98
|
+
expect(history[1].content.length).toBeGreaterThan(0);
|
|
99
|
+
}, 60_000);
|
|
100
|
+
// ── Error handling ──────────────────────────────────────────────────────
|
|
101
|
+
it('createCopilotClient returns a valid interface', async () => {
|
|
102
|
+
if (!canRun)
|
|
103
|
+
return;
|
|
104
|
+
expect(client).toBeDefined();
|
|
105
|
+
expect(typeof client.createSession).toBe('function');
|
|
106
|
+
});
|
|
107
|
+
});
|