npm - sofia-cli - Versions diffs - 0.1.2 → 0.1.4 - Mend

sofia-cli 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (136) hide show

package/README.md +42 -20
package/dist/infra/deploy.sh +193 -0
package/dist/infra/gather-env.sh +211 -0
package/dist/infra/infra/deploy.sh +193 -0
package/dist/infra/infra/gather-env.sh +211 -0
package/dist/infra/infra/main.bicep +90 -0
package/dist/infra/infra/main.bicepparam +18 -0
package/dist/infra/infra/resources.bicep +134 -0
package/dist/infra/infra/teardown.sh +114 -0
package/dist/infra/main.bicep +90 -0
package/dist/infra/main.bicepparam +18 -0
package/dist/infra/resources.bicep +134 -0
package/dist/infra/teardown.sh +114 -0
package/dist/src/cli/developCommand.js +0 -2
package/dist/src/cli/index.js +8 -1
package/dist/src/cli/workshopCommand.js +1 -1
package/dist/src/develop/index.js +1 -1
package/dist/src/develop/pocUtils.js +228 -0
package/dist/src/develop/ralphLoop.js +8 -27
package/dist/src/shared/data/cards.json +655 -670
package/docs/architecture.md +2 -1
package/package.json +5 -3
package/src/cli/developCommand.ts +1 -3
package/src/cli/index.ts +11 -1
package/src/cli/workshopCommand.ts +21 -17
package/src/develop/dynamicScaffolder.ts +36 -30
package/src/develop/index.ts +13 -2
package/src/develop/pocUtils.ts +296 -0
package/src/develop/ralphLoop.ts +8 -28
package/src/develop/templateRegistry.ts +19 -18
package/src/shared/data/cards.json +655 -670
package/tests/e2e/developE2e.spec.ts +3 -61
package/tests/e2e/developFailureE2e.spec.ts +34 -38
package/tests/integration/pocGithubMcp.spec.ts +29 -39
package/tests/integration/pocLocalFallback.spec.ts +29 -39
package/tests/integration/ralphLoopFlow.spec.ts +46 -66
package/tests/integration/ralphLoopPartial.spec.ts +30 -37
package/tests/unit/develop/githubMcpAdapter.spec.ts +0 -134
package/tests/unit/develop/outputValidator.spec.ts +45 -21
package/tests/unit/develop/ralphLoop.spec.ts +58 -94
package/tsconfig.json +2 -1
package/vitest.workspace.ts +5 -0
package/dist/src/develop/pocScaffolder.js +0 -542
package/dist/tests/e2e/developE2e.spec.js +0 -126
package/dist/tests/e2e/developFailureE2e.spec.js +0 -247
package/dist/tests/e2e/developPty.spec.js +0 -75
package/dist/tests/e2e/discoveryWebSearchRelevance.spec.js +0 -84
package/dist/tests/e2e/harness.spec.js +0 -83
package/dist/tests/e2e/mcpLive.spec.js +0 -120
package/dist/tests/e2e/newSession.e2e.spec.js +0 -177
package/dist/tests/e2e/ralphLoopEnrichmentComparison.spec.js +0 -62
package/dist/tests/e2e/workiqEnrichment.spec.js +0 -56
package/dist/tests/e2e/zavaSimulation.spec.js +0 -452
package/dist/tests/fixtures/test-fixture-project/src/add.js +0 -3
package/dist/tests/fixtures/test-fixture-project/tests/failing.test.js +0 -6
package/dist/tests/fixtures/test-fixture-project/tests/hanging.test.js +0 -8
package/dist/tests/fixtures/test-fixture-project/tests/passing.test.js +0 -10
package/dist/tests/fixtures/test-fixture-project/vitest.config.js +0 -6
package/dist/tests/integration/autoStartConversation.spec.js +0 -138
package/dist/tests/integration/defaultCommand.spec.js +0 -147
package/dist/tests/integration/directCommandNonTty.spec.js +0 -224
package/dist/tests/integration/directCommandTty.spec.js +0 -151
package/dist/tests/integration/discoveryEnrichmentFlow.spec.js +0 -175
package/dist/tests/integration/exportArtifacts.spec.js +0 -202
package/dist/tests/integration/exportFallbackFlow.spec.js +0 -99
package/dist/tests/integration/mcpDegradationFlow.spec.js +0 -190
package/dist/tests/integration/mcpTransportFlow.spec.js +0 -139
package/dist/tests/integration/newSessionFlow.spec.js +0 -343
package/dist/tests/integration/pocGithubMcp.spec.js +0 -186
package/dist/tests/integration/pocLocalFallback.spec.js +0 -171
package/dist/tests/integration/pocScaffold.spec.js +0 -163
package/dist/tests/integration/ralphLoopFlow.spec.js +0 -359
package/dist/tests/integration/ralphLoopPartial.spec.js +0 -368
package/dist/tests/integration/resumeAndBacktrack.spec.js +0 -247
package/dist/tests/integration/spinnerLifecycle.spec.js +0 -220
package/dist/tests/integration/summarizationFlow.spec.js +0 -115
package/dist/tests/integration/testRunnerReal.spec.js +0 -52
package/dist/tests/integration/webSearchAgent.spec.js +0 -128
package/dist/tests/live/copilotSdkLive.spec.js +0 -107
package/dist/tests/live/zavaFullWorkshop.spec.js +0 -392
package/dist/tests/setup/loadEnv.js +0 -3
package/dist/tests/unit/cli/developCommand.spec.js +0 -567
package/dist/tests/unit/cli/directCommands.spec.js +0 -279
package/dist/tests/unit/cli/envLoader.spec.js +0 -58
package/dist/tests/unit/cli/ioContext.spec.js +0 -119
package/dist/tests/unit/cli/preflight.spec.js +0 -108
package/dist/tests/unit/cli/statusCommand.spec.js +0 -111
package/dist/tests/unit/cli/workshopClientFallback.spec.js +0 -80
package/dist/tests/unit/cli/workshopCommand.spec.js +0 -328
package/dist/tests/unit/config/vitestEnvSetup.spec.js +0 -13
package/dist/tests/unit/develop/checkpointState.spec.js +0 -315
package/dist/tests/unit/develop/codeGenerator.spec.js +0 -355
package/dist/tests/unit/develop/githubMcpAdapter.spec.js +0 -231
package/dist/tests/unit/develop/mcpContextEnricher.spec.js +0 -433
package/dist/tests/unit/develop/outputValidator.spec.js +0 -119
package/dist/tests/unit/develop/pocScaffolder.spec.js +0 -353
package/dist/tests/unit/develop/ralphLoop.spec.js +0 -1248
package/dist/tests/unit/develop/templateRegistry.spec.js +0 -85
package/dist/tests/unit/develop/testRunner.spec.js +0 -249
package/dist/tests/unit/infraBicep.spec.js +0 -92
package/dist/tests/unit/infraDeploy.spec.js +0 -82
package/dist/tests/unit/infraTeardown.spec.js +0 -63
package/dist/tests/unit/logging/logger.spec.js +0 -43
package/dist/tests/unit/loop/conversationLoop.spec.js +0 -592
package/dist/tests/unit/loop/phaseSummarizer.spec.js +0 -141
package/dist/tests/unit/loop/streamingMarkdown.spec.js +0 -147
package/dist/tests/unit/mcp/mcpManager.spec.js +0 -279
package/dist/tests/unit/mcp/mcpTransport.spec.js +0 -529
package/dist/tests/unit/mcp/retryPolicy.spec.js +0 -218
package/dist/tests/unit/mcp/timeoutValidation.spec.js +0 -46
package/dist/tests/unit/mcp/webSearch.spec.js +0 -567
package/dist/tests/unit/phases/contextSummarizer.spec.js +0 -140
package/dist/tests/unit/phases/discoveryEnricher.repeatCalls.spec.js +0 -93
package/dist/tests/unit/phases/discoveryEnricher.spec.js +0 -411
package/dist/tests/unit/phases/phaseExtractors.spec.js +0 -352
package/dist/tests/unit/phases/phaseHandlers.spec.js +0 -425
package/dist/tests/unit/prompts/promptLoader.spec.js +0 -118
package/dist/tests/unit/schemas/pocSchemas.spec.js +0 -412
package/dist/tests/unit/schemas/session.spec.js +0 -257
package/dist/tests/unit/sessions/exportPaths.spec.js +0 -31
package/dist/tests/unit/sessions/exportWriter.spec.js +0 -655
package/dist/tests/unit/sessions/sessionManager.spec.js +0 -151
package/dist/tests/unit/sessions/sessionStore.spec.js +0 -116
package/dist/tests/unit/shared/activitySpinner.spec.js +0 -175
package/dist/tests/unit/shared/cardsLoader.spec.js +0 -76
package/dist/tests/unit/shared/copilotClient.spec.js +0 -155
package/dist/tests/unit/shared/errorClassifier.spec.js +0 -131
package/dist/tests/unit/shared/events.spec.js +0 -55
package/dist/tests/unit/shared/markdownRenderer.spec.js +0 -35
package/dist/tests/unit/shared/markdownRendererChunks.spec.js +0 -70
package/dist/tests/unit/shared/tableRenderer.spec.js +0 -34
package/dist/vitest.config.js +0 -14
package/dist/vitest.live.config.js +0 -18
package/src/develop/pocScaffolder.ts +0 -646
package/tests/integration/pocScaffold.spec.ts +0 -220
package/tests/unit/develop/pocScaffolder.spec.ts +0 -451

package/dist/tests/unit/mcp/webSearch.spec.js DELETED Viewed

@@ -1,567 +0,0 @@
-/**
- * Web search tool tests (T060, T018-T021).
- *
- * Tests for the web.search tool backed by Azure AI Foundry Agent Service.
- *
- * Covers:
- * - WebSearchConfig validation (T018)
- * - Legacy env var detection (T019)
- * - Graceful degradation scenarios (T020)
- * - Citation extraction from url_citation annotations (T021)
- * - Tool definition shape for Copilot SDK registration
- * - Successful search returning structured results
- */
-import { describe, it, expect, vi, afterEach } from 'vitest';
-import { createWebSearchTool, isWebSearchConfigured, extractCitations, destroyWebSearchSession, WEB_SEARCH_TOOL_DEFINITION, } from '../../../src/mcp/webSearch.js';
-// ── Helper: Create fake agent session deps ──────────────────────────────────
-function createFakeDeps(overrides) {
-    return {
-        createClient: vi.fn().mockReturnValue({ fake: 'client' }),
-        getOpenAIClient: vi.fn().mockResolvedValue({ fake: 'openai' }),
-        createAgentVersion: vi.fn().mockResolvedValue({ name: 'sofia-web-search', version: 'v1' }),
-        deleteAgentVersion: vi.fn().mockResolvedValue(undefined),
-        createConversation: vi.fn().mockResolvedValue({ id: 'conv-123' }),
-        deleteConversation: vi.fn().mockResolvedValue(undefined),
-        createResponse: vi.fn().mockResolvedValue({
-            output: [
-                {
-                    type: 'message',
-                    content: [
-                        {
-                            type: 'output_text',
-                            text: 'Contoso is a healthcare AI company. See source.',
-                            annotations: [
-                                {
-                                    type: 'url_citation',
-                                    url: 'https://contoso.com/about',
-                                    title: 'Contoso Ltd - About',
-                                    start_index: 0,
-                                    end_index: 40,
-                                },
-                            ],
-                        },
-                    ],
-                },
-            ],
-        }),
-        ...overrides,
-    };
-}
-describe('web.search tool', () => {
-    const originalEnv = { ...process.env };
-    afterEach(async () => {
-        process.env = { ...originalEnv };
-        await destroyWebSearchSession();
-    });
-    describe('isWebSearchConfigured', () => {
-        it('returns true when both project endpoint and model deployment name are set', () => {
-            process.env.FOUNDRY_PROJECT_ENDPOINT =
-                'https://sofia-foundry.services.ai.azure.com/api/projects/sofia-project';
-            process.env.FOUNDRY_MODEL_DEPLOYMENT_NAME = 'gpt-4.1-mini';
-            expect(isWebSearchConfigured()).toBe(true);
-        });
-        it('returns false when project endpoint is missing', () => {
-            delete process.env.FOUNDRY_PROJECT_ENDPOINT;
-            process.env.FOUNDRY_MODEL_DEPLOYMENT_NAME = 'gpt-4.1-mini';
-            expect(isWebSearchConfigured()).toBe(false);
-        });
-        it('returns false when model deployment name is missing', () => {
-            process.env.FOUNDRY_PROJECT_ENDPOINT =
-                'https://sofia-foundry.services.ai.azure.com/api/projects/sofia-project';
-            delete process.env.FOUNDRY_MODEL_DEPLOYMENT_NAME;
-            expect(isWebSearchConfigured()).toBe(false);
-        });
-        it('returns false when both are missing', () => {
-            delete process.env.FOUNDRY_PROJECT_ENDPOINT;
-            delete process.env.FOUNDRY_MODEL_DEPLOYMENT_NAME;
-            expect(isWebSearchConfigured()).toBe(false);
-        });
-        it('returns false when only legacy vars are set (T019)', () => {
-            process.env.SOFIA_FOUNDRY_AGENT_ENDPOINT = 'https://foundry.example.com';
-            process.env.SOFIA_FOUNDRY_AGENT_KEY = 'test-key-123';
-            delete process.env.FOUNDRY_PROJECT_ENDPOINT;
-            delete process.env.FOUNDRY_MODEL_DEPLOYMENT_NAME;
-            expect(isWebSearchConfigured()).toBe(false);
-        });
-    });
-    describe('WebSearchConfig validation (T018)', () => {
-        it('accepts valid config with projectEndpoint and modelDeploymentName', () => {
-            const deps = createFakeDeps();
-            const tool = createWebSearchTool({
-                projectEndpoint: 'https://sofia-foundry.services.ai.azure.com/api/projects/sofia-project',
-                modelDeploymentName: 'gpt-4.1-mini',
-            }, deps);
-            expect(tool).toBeTypeOf('function');
-        });
-        it('creates client with the provided projectEndpoint', async () => {
-            const deps = createFakeDeps();
-            const tool = createWebSearchTool({
-                projectEndpoint: 'https://my-foundry.services.ai.azure.com/api/projects/proj',
-                modelDeploymentName: 'gpt-4.1-mini',
-            }, deps);
-            await tool('test');
-            expect(deps.createClient).toHaveBeenCalledWith('https://my-foundry.services.ai.azure.com/api/projects/proj');
-        });
-        it('passes modelDeploymentName to agent creation', async () => {
-            const deps = createFakeDeps();
-            const tool = createWebSearchTool({
-                projectEndpoint: 'https://foundry.example.com',
-                modelDeploymentName: 'my-model',
-            }, deps);
-            await tool('test');
-            expect(deps.createAgentVersion).toHaveBeenCalledWith(expect.anything(), 'sofia-web-search', expect.objectContaining({ model: 'my-model' }));
-        });
-    });
-    describe('tool definition', () => {
-        it('has correct name and description', () => {
-            expect(WEB_SEARCH_TOOL_DEFINITION.name).toBe('web.search');
-            expect(WEB_SEARCH_TOOL_DEFINITION.description).toBeTruthy();
-        });
-        it('accepts a query parameter', () => {
-            const params = WEB_SEARCH_TOOL_DEFINITION.parameters;
-            expect(params).toBeDefined();
-            expect(params.properties?.query).toBeDefined();
-        });
-    });
-    describe('createWebSearchTool', () => {
-        it('returns structured results with citations on success', async () => {
-            const deps = createFakeDeps();
-            const tool = createWebSearchTool({
-                projectEndpoint: 'https://foundry.example.com',
-                modelDeploymentName: 'gpt-4.1-mini',
-            }, deps);
-            const result = await tool('Contoso healthcare');
-            expect(result.results).toHaveLength(1);
-            expect(result.results[0].title).toBe('Contoso Ltd - About');
-            expect(result.results[0].url).toBe('https://contoso.com/about');
-            expect(result.sources).toContain('https://contoso.com/about');
-        });
-        it('reuses agent on second call (lazy initialization)', async () => {
-            const deps = createFakeDeps();
-            const tool = createWebSearchTool({
-                projectEndpoint: 'https://foundry.example.com',
-                modelDeploymentName: 'gpt-4.1-mini',
-            }, deps);
-            await tool('first query');
-            await tool('second query');
-            // Agent created once, response called twice
-            expect(deps.createAgentVersion).toHaveBeenCalledTimes(1);
-            expect(deps.createResponse).toHaveBeenCalledTimes(2);
-        });
-        it('degrades gracefully when credential fails (T020)', async () => {
-            const deps = createFakeDeps({
-                getOpenAIClient: vi
-                    .fn()
-                    .mockRejectedValue(new Error('Azure authentication failed — run `az login`')),
-            });
-            const tool = createWebSearchTool({
-                projectEndpoint: 'https://foundry.example.com',
-                modelDeploymentName: 'gpt-4.1-mini',
-            }, deps);
-            const result = await tool('test query');
-            expect(result.results).toHaveLength(0);
-            expect(result.degraded).toBe(true);
-            expect(result.error).toContain('Azure authentication failed');
-        });
-        it('degrades gracefully when agent creation fails (T020)', async () => {
-            const deps = createFakeDeps({
-                createAgentVersion: vi
-                    .fn()
-                    .mockRejectedValue(new Error('Failed to create web search agent: 403 Forbidden')),
-            });
-            const tool = createWebSearchTool({
-                projectEndpoint: 'https://foundry.example.com',
-                modelDeploymentName: 'gpt-4.1-mini',
-            }, deps);
-            const result = await tool('test query');
-            expect(result.results).toHaveLength(0);
-            expect(result.degraded).toBe(true);
-            expect(result.error).toContain('Failed to create web search agent');
-        });
-        it('degrades gracefully on network error (T020)', async () => {
-            const deps = createFakeDeps({
-                createClient: vi.fn().mockImplementation(() => {
-                    throw new Error('Network error: ECONNREFUSED');
-                }),
-            });
-            const tool = createWebSearchTool({
-                projectEndpoint: 'https://foundry.example.com',
-                modelDeploymentName: 'gpt-4.1-mini',
-            }, deps);
-            const result = await tool('test query');
-            expect(result.results).toHaveLength(0);
-            expect(result.degraded).toBe(true);
-            expect(result.error).toContain('Network error');
-        });
-        it('returns empty results with degraded flag when query fails', async () => {
-            const deps = createFakeDeps({
-                createResponse: vi
-                    .fn()
-                    .mockRejectedValue(new Error('Web search query failed: 429 Rate limited')),
-            });
-            const tool = createWebSearchTool({
-                projectEndpoint: 'https://foundry.example.com',
-                modelDeploymentName: 'gpt-4.1-mini',
-            }, deps);
-            const result = await tool('test query');
-            expect(result.results).toHaveLength(0);
-            expect(result.degraded).toBe(true);
-            expect(result.error).toContain('429');
-        }, 12000);
-        it('falls back to output text snippets when citations are missing', async () => {
-            const deps = createFakeDeps({
-                createResponse: vi.fn().mockResolvedValue({
-                    output: [
-                        {
-                            type: 'message',
-                            content: [
-                                {
-                                    type: 'output_text',
-                                    text: 'Microsoft expands cloud infrastructure in Europe.',
-                                    annotations: [],
-                                },
-                            ],
-                        },
-                    ],
-                }),
-            });
-            const tool = createWebSearchTool({
-                projectEndpoint: 'https://foundry.example.com',
-                modelDeploymentName: 'gpt-4.1-mini',
-            }, deps);
-            const result = await tool('microsoft cloud europe');
-            expect(result.results).toHaveLength(1);
-            expect(result.results[0].snippet).toContain('cloud infrastructure');
-            expect(result.results[0].title).toBe('Foundry response');
-        });
-        it('returns citations on subsequent calls by isolating each query conversation', async () => {
-            let conversationCounter = 0;
-            const seenByConversation = new Map();
-            const deps = createFakeDeps({
-                createConversation: vi.fn().mockImplementation(async () => {
-                    conversationCounter += 1;
-                    return { id: `conv-${conversationCounter}` };
-                }),
-                createResponse: vi
-                    .fn()
-                    .mockImplementation(async (_openAIClient, conversationId) => {
-                    const calls = (seenByConversation.get(conversationId) ?? 0) + 1;
-                    seenByConversation.set(conversationId, calls);
-                    // Simulate Foundry behavior where only the first turn in a conversation
-                    // contains URL citations; follow-up turns may return plain text.
-                    if (calls > 1) {
-                        return {
-                            output: [
-                                {
-                                    type: 'message',
-                                    content: [
-                                        {
-                                            type: 'output_text',
-                                            text: 'No citations in follow-up turn.',
-                                            annotations: [],
-                                        },
-                                    ],
-                                },
-                            ],
-                        };
-                    }
-                    return {
-                        output: [
-                            {
-                                type: 'message',
-                                content: [
-                                    {
-                                        type: 'output_text',
-                                        text: 'Result with source.',
-                                        annotations: [
-                                            {
-                                                type: 'url_citation',
-                                                url: `https://example.com/${conversationId}`,
-                                                title: `Source ${conversationId}`,
-                                                start_index: 0,
-                                                end_index: 18,
-                                            },
-                                        ],
-                                    },
-                                ],
-                            },
-                        ],
-                    };
-                }),
-            });
-            const tool = createWebSearchTool({
-                projectEndpoint: 'https://foundry.example.com',
-                modelDeploymentName: 'gpt-4.1-mini',
-            }, deps);
-            const first = await tool('first query');
-            const second = await tool('second query');
-            expect(first.results).toHaveLength(1);
-            expect(second.results).toHaveLength(1);
-            expect(deps.createConversation).toHaveBeenCalledTimes(2);
-            expect(deps.deleteConversation).toHaveBeenCalledTimes(2);
-        });
-        it('retries on 429 rate limiting with exponential backoff', async () => {
-            let callCount = 0;
-            const deps = createFakeDeps({
-                createResponse: vi.fn().mockImplementation(async () => {
-                    callCount += 1;
-                    if (callCount < 2) {
-                        const error = new Error('Web search query failed: 429 Too Many Requests');
-                        throw error;
-                    }
-                    return {
-                        output: [
-                            {
-                                type: 'message',
-                                content: [
-                                    {
-                                        type: 'output_text',
-                                        text: 'Result after retry.',
-                                        annotations: [
-                                            {
-                                                type: 'url_citation',
-                                                url: 'https://example.com',
-                                                title: 'Example',
-                                                start_index: 0,
-                                                end_index: 18,
-                                            },
-                                        ],
-                                    },
-                                ],
-                            },
-                        ],
-                    };
-                }),
-            });
-            const tool = createWebSearchTool({
-                projectEndpoint: 'https://foundry.example.com',
-                modelDeploymentName: 'gpt-4.1-mini',
-            }, deps);
-            const result = await tool('test query');
-            expect(result.results).toHaveLength(1);
-            expect(deps.createResponse).toHaveBeenCalledTimes(2);
-        }, 6000);
-        it('stops retrying after MAX_RETRIES and returns degraded', async () => {
-            const deps = createFakeDeps({
-                createResponse: vi
-                    .fn()
-                    .mockRejectedValue(new Error('Web search query failed: 429 Too Many Requests')),
-            });
-            const tool = createWebSearchTool({
-                projectEndpoint: 'https://foundry.example.com',
-                modelDeploymentName: 'gpt-4.1-mini',
-            }, deps);
-            const result = await tool('test query');
-            expect(result.results).toHaveLength(0);
-            expect(result.degraded).toBe(true);
-            expect(deps.createResponse).toHaveBeenCalledTimes(3); // initial + 2 retries
-        }, 12000);
-        it('rotates the underlying agent after several queries', async () => {
-            const deps = createFakeDeps();
-            const tool = createWebSearchTool({
-                projectEndpoint: 'https://foundry.example.com',
-                modelDeploymentName: 'gpt-4.1-mini',
-            }, deps);
-            await tool('query one');
-            await tool('query two');
-            await tool('query three');
-            await tool('query four');
-            expect(deps.createAgentVersion).toHaveBeenCalledTimes(2);
-            expect(deps.deleteAgentVersion).toHaveBeenCalledTimes(1);
-        });
-    });
-    describe('extractCitations (T021)', () => {
-        it('extracts url_citation annotations into results', () => {
-            const output = [
-                {
-                    type: 'message',
-                    content: [
-                        {
-                            type: 'output_text',
-                            text: 'Contoso is a leader in healthcare AI.',
-                            annotations: [
-                                {
-                                    type: 'url_citation',
-                                    url: 'https://contoso.com/about',
-                                    title: 'Contoso Ltd - Healthcare AI Solutions',
-                                    start_index: 0,
-                                    end_index: 37,
-                                },
-                            ],
-                        },
-                    ],
-                },
-            ];
-            const { results, sources } = extractCitations(output);
-            expect(results).toHaveLength(1);
-            expect(results[0].title).toBe('Contoso Ltd - Healthcare AI Solutions');
-            expect(results[0].url).toBe('https://contoso.com/about');
-            expect(sources).toContain('https://contoso.com/about');
-        });
-        it('deduplicates sources by URL', () => {
-            const output = [
-                {
-                    type: 'message',
-                    content: [
-                        {
-                            type: 'output_text',
-                            text: 'First ref. Second ref to same source.',
-                            annotations: [
-                                {
-                                    type: 'url_citation',
-                                    url: 'https://example.com',
-                                    title: 'A',
-                                    start_index: 0,
-                                    end_index: 10,
-                                },
-                                {
-                                    type: 'url_citation',
-                                    url: 'https://example.com',
-                                    title: 'B',
-                                    start_index: 11,
-                                    end_index: 37,
-                                },
-                            ],
-                        },
-                    ],
-                },
-            ];
-            const { results, sources } = extractCitations(output);
-            expect(results).toHaveLength(1);
-            expect(sources).toHaveLength(1);
-        });
-        it('handles multiple distinct citations', () => {
-            const output = [
-                {
-                    type: 'message',
-                    content: [
-                        {
-                            type: 'output_text',
-                            text: 'Result text with multiple sources.',
-                            annotations: [
-                                {
-                                    type: 'url_citation',
-                                    url: 'https://a.com',
-                                    title: 'Source A',
-                                    start_index: 0,
-                                    end_index: 10,
-                                },
-                                {
-                                    type: 'url_citation',
-                                    url: 'https://b.com',
-                                    title: 'Source B',
-                                    start_index: 11,
-                                    end_index: 33,
-                                },
-                            ],
-                        },
-                    ],
-                },
-            ];
-            const { results, sources } = extractCitations(output);
-            expect(results).toHaveLength(2);
-            expect(sources).toEqual(['https://a.com', 'https://b.com']);
-        });
-        it('returns empty results for output without citations', () => {
-            const output = [
-                {
-                    type: 'message',
-                    content: [
-                        {
-                            type: 'output_text',
-                            text: 'No citations here.',
-                            annotations: [],
-                        },
-                    ],
-                },
-            ];
-            const { results, sources } = extractCitations(output);
-            expect(results).toHaveLength(0);
-            expect(sources).toHaveLength(0);
-        });
-        it('ignores non-url_citation annotations', () => {
-            const output = [
-                {
-                    type: 'message',
-                    content: [
-                        {
-                            type: 'output_text',
-                            text: 'Some text',
-                            annotations: [
-                                { type: 'file_citation', url: 'file://local', title: 'File' },
-                                {
-                                    type: 'url_citation',
-                                    url: 'https://valid.com',
-                                    title: 'Valid',
-                                    start_index: 0,
-                                    end_index: 9,
-                                },
-                            ],
-                        },
-                    ],
-                },
-            ];
-            const { results } = extractCitations(output);
-            expect(results).toHaveLength(1);
-            expect(results[0].url).toBe('https://valid.com');
-        });
-        it('ignores non-message output items', () => {
-            const output = [
-                { type: 'tool_call', name: 'web_search_preview' },
-                {
-                    type: 'message',
-                    content: [
-                        {
-                            type: 'output_text',
-                            text: 'Result text.',
-                            annotations: [
-                                {
-                                    type: 'url_citation',
-                                    url: 'https://found.com',
-                                    title: 'Found',
-                                    start_index: 0,
-                                    end_index: 12,
-                                },
-                            ],
-                        },
-                    ],
-                },
-            ];
-            const { results } = extractCitations(output);
-            expect(results).toHaveLength(1);
-            expect(results[0].url).toBe('https://found.com');
-        });
-    });
-    describe('destroyWebSearchSession', () => {
-        it('cleans up the agent on destroy (conversations are per-query)', async () => {
-            const deps = createFakeDeps();
-            const tool = createWebSearchTool({
-                projectEndpoint: 'https://foundry.example.com',
-                modelDeploymentName: 'gpt-4.1-mini',
-            }, deps);
-            // Initialize the session
-            await tool('trigger init');
-            // Destroy
-            await destroyWebSearchSession();
-            expect(deps.deleteConversation).toHaveBeenCalledWith(expect.anything(), 'conv-123');
-            expect(deps.deleteAgentVersion).toHaveBeenCalledWith(expect.anything(), 'sofia-web-search', 'v1');
-        });
-        it('is safe to call when not initialized', async () => {
-            // Should not throw
-            await destroyWebSearchSession();
-        });
-        it('logs warning but does not throw when cleanup fails', async () => {
-            const deps = createFakeDeps({
-                deleteConversation: vi.fn().mockRejectedValue(new Error('cleanup failed')),
-                deleteAgentVersion: vi.fn().mockRejectedValue(new Error('cleanup failed')),
-            });
-            const tool = createWebSearchTool({
-                projectEndpoint: 'https://foundry.example.com',
-                modelDeploymentName: 'gpt-4.1-mini',
-            }, deps);
-            await tool('trigger init');
-            // Should not throw
-            await expect(destroyWebSearchSession()).resolves.toBeUndefined();
-        });
-    });
-});