sofia-cli 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/README.md +42 -20
  2. package/dist/infra/deploy.sh +193 -0
  3. package/dist/infra/gather-env.sh +211 -0
  4. package/dist/infra/infra/deploy.sh +193 -0
  5. package/dist/infra/infra/gather-env.sh +211 -0
  6. package/dist/infra/infra/main.bicep +90 -0
  7. package/dist/infra/infra/main.bicepparam +18 -0
  8. package/dist/infra/infra/resources.bicep +134 -0
  9. package/dist/infra/infra/teardown.sh +114 -0
  10. package/dist/infra/main.bicep +90 -0
  11. package/dist/infra/main.bicepparam +18 -0
  12. package/dist/infra/resources.bicep +134 -0
  13. package/dist/infra/teardown.sh +114 -0
  14. package/dist/src/cli/developCommand.js +0 -2
  15. package/dist/src/cli/index.js +8 -1
  16. package/dist/src/cli/workshopCommand.js +1 -1
  17. package/dist/src/develop/index.js +1 -1
  18. package/dist/src/develop/pocUtils.js +228 -0
  19. package/dist/src/develop/ralphLoop.js +8 -27
  20. package/dist/src/shared/data/cards.json +655 -670
  21. package/docs/architecture.md +2 -1
  22. package/package.json +5 -3
  23. package/src/cli/developCommand.ts +1 -3
  24. package/src/cli/index.ts +11 -1
  25. package/src/cli/workshopCommand.ts +21 -17
  26. package/src/develop/dynamicScaffolder.ts +36 -30
  27. package/src/develop/index.ts +13 -2
  28. package/src/develop/pocUtils.ts +296 -0
  29. package/src/develop/ralphLoop.ts +8 -28
  30. package/src/develop/templateRegistry.ts +19 -18
  31. package/src/shared/data/cards.json +655 -670
  32. package/tests/e2e/developE2e.spec.ts +3 -61
  33. package/tests/e2e/developFailureE2e.spec.ts +34 -38
  34. package/tests/integration/pocGithubMcp.spec.ts +29 -39
  35. package/tests/integration/pocLocalFallback.spec.ts +29 -39
  36. package/tests/integration/ralphLoopFlow.spec.ts +46 -66
  37. package/tests/integration/ralphLoopPartial.spec.ts +30 -37
  38. package/tests/unit/develop/githubMcpAdapter.spec.ts +0 -134
  39. package/tests/unit/develop/outputValidator.spec.ts +45 -21
  40. package/tests/unit/develop/ralphLoop.spec.ts +58 -94
  41. package/tsconfig.json +2 -1
  42. package/vitest.workspace.ts +5 -0
  43. package/dist/src/develop/pocScaffolder.js +0 -542
  44. package/dist/tests/e2e/developE2e.spec.js +0 -126
  45. package/dist/tests/e2e/developFailureE2e.spec.js +0 -247
  46. package/dist/tests/e2e/developPty.spec.js +0 -75
  47. package/dist/tests/e2e/discoveryWebSearchRelevance.spec.js +0 -84
  48. package/dist/tests/e2e/harness.spec.js +0 -83
  49. package/dist/tests/e2e/mcpLive.spec.js +0 -120
  50. package/dist/tests/e2e/newSession.e2e.spec.js +0 -177
  51. package/dist/tests/e2e/ralphLoopEnrichmentComparison.spec.js +0 -62
  52. package/dist/tests/e2e/workiqEnrichment.spec.js +0 -56
  53. package/dist/tests/e2e/zavaSimulation.spec.js +0 -452
  54. package/dist/tests/fixtures/test-fixture-project/src/add.js +0 -3
  55. package/dist/tests/fixtures/test-fixture-project/tests/failing.test.js +0 -6
  56. package/dist/tests/fixtures/test-fixture-project/tests/hanging.test.js +0 -8
  57. package/dist/tests/fixtures/test-fixture-project/tests/passing.test.js +0 -10
  58. package/dist/tests/fixtures/test-fixture-project/vitest.config.js +0 -6
  59. package/dist/tests/integration/autoStartConversation.spec.js +0 -138
  60. package/dist/tests/integration/defaultCommand.spec.js +0 -147
  61. package/dist/tests/integration/directCommandNonTty.spec.js +0 -224
  62. package/dist/tests/integration/directCommandTty.spec.js +0 -151
  63. package/dist/tests/integration/discoveryEnrichmentFlow.spec.js +0 -175
  64. package/dist/tests/integration/exportArtifacts.spec.js +0 -202
  65. package/dist/tests/integration/exportFallbackFlow.spec.js +0 -99
  66. package/dist/tests/integration/mcpDegradationFlow.spec.js +0 -190
  67. package/dist/tests/integration/mcpTransportFlow.spec.js +0 -139
  68. package/dist/tests/integration/newSessionFlow.spec.js +0 -343
  69. package/dist/tests/integration/pocGithubMcp.spec.js +0 -186
  70. package/dist/tests/integration/pocLocalFallback.spec.js +0 -171
  71. package/dist/tests/integration/pocScaffold.spec.js +0 -163
  72. package/dist/tests/integration/ralphLoopFlow.spec.js +0 -359
  73. package/dist/tests/integration/ralphLoopPartial.spec.js +0 -368
  74. package/dist/tests/integration/resumeAndBacktrack.spec.js +0 -247
  75. package/dist/tests/integration/spinnerLifecycle.spec.js +0 -220
  76. package/dist/tests/integration/summarizationFlow.spec.js +0 -115
  77. package/dist/tests/integration/testRunnerReal.spec.js +0 -52
  78. package/dist/tests/integration/webSearchAgent.spec.js +0 -128
  79. package/dist/tests/live/copilotSdkLive.spec.js +0 -107
  80. package/dist/tests/live/zavaFullWorkshop.spec.js +0 -392
  81. package/dist/tests/setup/loadEnv.js +0 -3
  82. package/dist/tests/unit/cli/developCommand.spec.js +0 -567
  83. package/dist/tests/unit/cli/directCommands.spec.js +0 -279
  84. package/dist/tests/unit/cli/envLoader.spec.js +0 -58
  85. package/dist/tests/unit/cli/ioContext.spec.js +0 -119
  86. package/dist/tests/unit/cli/preflight.spec.js +0 -108
  87. package/dist/tests/unit/cli/statusCommand.spec.js +0 -111
  88. package/dist/tests/unit/cli/workshopClientFallback.spec.js +0 -80
  89. package/dist/tests/unit/cli/workshopCommand.spec.js +0 -328
  90. package/dist/tests/unit/config/vitestEnvSetup.spec.js +0 -13
  91. package/dist/tests/unit/develop/checkpointState.spec.js +0 -315
  92. package/dist/tests/unit/develop/codeGenerator.spec.js +0 -355
  93. package/dist/tests/unit/develop/githubMcpAdapter.spec.js +0 -231
  94. package/dist/tests/unit/develop/mcpContextEnricher.spec.js +0 -433
  95. package/dist/tests/unit/develop/outputValidator.spec.js +0 -119
  96. package/dist/tests/unit/develop/pocScaffolder.spec.js +0 -353
  97. package/dist/tests/unit/develop/ralphLoop.spec.js +0 -1248
  98. package/dist/tests/unit/develop/templateRegistry.spec.js +0 -85
  99. package/dist/tests/unit/develop/testRunner.spec.js +0 -249
  100. package/dist/tests/unit/infraBicep.spec.js +0 -92
  101. package/dist/tests/unit/infraDeploy.spec.js +0 -82
  102. package/dist/tests/unit/infraTeardown.spec.js +0 -63
  103. package/dist/tests/unit/logging/logger.spec.js +0 -43
  104. package/dist/tests/unit/loop/conversationLoop.spec.js +0 -592
  105. package/dist/tests/unit/loop/phaseSummarizer.spec.js +0 -141
  106. package/dist/tests/unit/loop/streamingMarkdown.spec.js +0 -147
  107. package/dist/tests/unit/mcp/mcpManager.spec.js +0 -279
  108. package/dist/tests/unit/mcp/mcpTransport.spec.js +0 -529
  109. package/dist/tests/unit/mcp/retryPolicy.spec.js +0 -218
  110. package/dist/tests/unit/mcp/timeoutValidation.spec.js +0 -46
  111. package/dist/tests/unit/mcp/webSearch.spec.js +0 -567
  112. package/dist/tests/unit/phases/contextSummarizer.spec.js +0 -140
  113. package/dist/tests/unit/phases/discoveryEnricher.repeatCalls.spec.js +0 -93
  114. package/dist/tests/unit/phases/discoveryEnricher.spec.js +0 -411
  115. package/dist/tests/unit/phases/phaseExtractors.spec.js +0 -352
  116. package/dist/tests/unit/phases/phaseHandlers.spec.js +0 -425
  117. package/dist/tests/unit/prompts/promptLoader.spec.js +0 -118
  118. package/dist/tests/unit/schemas/pocSchemas.spec.js +0 -412
  119. package/dist/tests/unit/schemas/session.spec.js +0 -257
  120. package/dist/tests/unit/sessions/exportPaths.spec.js +0 -31
  121. package/dist/tests/unit/sessions/exportWriter.spec.js +0 -655
  122. package/dist/tests/unit/sessions/sessionManager.spec.js +0 -151
  123. package/dist/tests/unit/sessions/sessionStore.spec.js +0 -116
  124. package/dist/tests/unit/shared/activitySpinner.spec.js +0 -175
  125. package/dist/tests/unit/shared/cardsLoader.spec.js +0 -76
  126. package/dist/tests/unit/shared/copilotClient.spec.js +0 -155
  127. package/dist/tests/unit/shared/errorClassifier.spec.js +0 -131
  128. package/dist/tests/unit/shared/events.spec.js +0 -55
  129. package/dist/tests/unit/shared/markdownRenderer.spec.js +0 -35
  130. package/dist/tests/unit/shared/markdownRendererChunks.spec.js +0 -70
  131. package/dist/tests/unit/shared/tableRenderer.spec.js +0 -34
  132. package/dist/vitest.config.js +0 -14
  133. package/dist/vitest.live.config.js +0 -18
  134. package/src/develop/pocScaffolder.ts +0 -646
  135. package/tests/integration/pocScaffold.spec.ts +0 -220
  136. package/tests/unit/develop/pocScaffolder.spec.ts +0 -451
@@ -1,567 +0,0 @@
1
- /**
2
- * Web search tool tests (T060, T018-T021).
3
- *
4
- * Tests for the web.search tool backed by Azure AI Foundry Agent Service.
5
- *
6
- * Covers:
7
- * - WebSearchConfig validation (T018)
8
- * - Legacy env var detection (T019)
9
- * - Graceful degradation scenarios (T020)
10
- * - Citation extraction from url_citation annotations (T021)
11
- * - Tool definition shape for Copilot SDK registration
12
- * - Successful search returning structured results
13
- */
14
- import { describe, it, expect, vi, afterEach } from 'vitest';
15
- import { createWebSearchTool, isWebSearchConfigured, extractCitations, destroyWebSearchSession, WEB_SEARCH_TOOL_DEFINITION, } from '../../../src/mcp/webSearch.js';
16
- // ── Helper: Create fake agent session deps ──────────────────────────────────
17
- function createFakeDeps(overrides) {
18
- return {
19
- createClient: vi.fn().mockReturnValue({ fake: 'client' }),
20
- getOpenAIClient: vi.fn().mockResolvedValue({ fake: 'openai' }),
21
- createAgentVersion: vi.fn().mockResolvedValue({ name: 'sofia-web-search', version: 'v1' }),
22
- deleteAgentVersion: vi.fn().mockResolvedValue(undefined),
23
- createConversation: vi.fn().mockResolvedValue({ id: 'conv-123' }),
24
- deleteConversation: vi.fn().mockResolvedValue(undefined),
25
- createResponse: vi.fn().mockResolvedValue({
26
- output: [
27
- {
28
- type: 'message',
29
- content: [
30
- {
31
- type: 'output_text',
32
- text: 'Contoso is a healthcare AI company. See source.',
33
- annotations: [
34
- {
35
- type: 'url_citation',
36
- url: 'https://contoso.com/about',
37
- title: 'Contoso Ltd - About',
38
- start_index: 0,
39
- end_index: 40,
40
- },
41
- ],
42
- },
43
- ],
44
- },
45
- ],
46
- }),
47
- ...overrides,
48
- };
49
- }
50
- describe('web.search tool', () => {
51
- const originalEnv = { ...process.env };
52
- afterEach(async () => {
53
- process.env = { ...originalEnv };
54
- await destroyWebSearchSession();
55
- });
56
- describe('isWebSearchConfigured', () => {
57
- it('returns true when both project endpoint and model deployment name are set', () => {
58
- process.env.FOUNDRY_PROJECT_ENDPOINT =
59
- 'https://sofia-foundry.services.ai.azure.com/api/projects/sofia-project';
60
- process.env.FOUNDRY_MODEL_DEPLOYMENT_NAME = 'gpt-4.1-mini';
61
- expect(isWebSearchConfigured()).toBe(true);
62
- });
63
- it('returns false when project endpoint is missing', () => {
64
- delete process.env.FOUNDRY_PROJECT_ENDPOINT;
65
- process.env.FOUNDRY_MODEL_DEPLOYMENT_NAME = 'gpt-4.1-mini';
66
- expect(isWebSearchConfigured()).toBe(false);
67
- });
68
- it('returns false when model deployment name is missing', () => {
69
- process.env.FOUNDRY_PROJECT_ENDPOINT =
70
- 'https://sofia-foundry.services.ai.azure.com/api/projects/sofia-project';
71
- delete process.env.FOUNDRY_MODEL_DEPLOYMENT_NAME;
72
- expect(isWebSearchConfigured()).toBe(false);
73
- });
74
- it('returns false when both are missing', () => {
75
- delete process.env.FOUNDRY_PROJECT_ENDPOINT;
76
- delete process.env.FOUNDRY_MODEL_DEPLOYMENT_NAME;
77
- expect(isWebSearchConfigured()).toBe(false);
78
- });
79
- it('returns false when only legacy vars are set (T019)', () => {
80
- process.env.SOFIA_FOUNDRY_AGENT_ENDPOINT = 'https://foundry.example.com';
81
- process.env.SOFIA_FOUNDRY_AGENT_KEY = 'test-key-123';
82
- delete process.env.FOUNDRY_PROJECT_ENDPOINT;
83
- delete process.env.FOUNDRY_MODEL_DEPLOYMENT_NAME;
84
- expect(isWebSearchConfigured()).toBe(false);
85
- });
86
- });
87
- describe('WebSearchConfig validation (T018)', () => {
88
- it('accepts valid config with projectEndpoint and modelDeploymentName', () => {
89
- const deps = createFakeDeps();
90
- const tool = createWebSearchTool({
91
- projectEndpoint: 'https://sofia-foundry.services.ai.azure.com/api/projects/sofia-project',
92
- modelDeploymentName: 'gpt-4.1-mini',
93
- }, deps);
94
- expect(tool).toBeTypeOf('function');
95
- });
96
- it('creates client with the provided projectEndpoint', async () => {
97
- const deps = createFakeDeps();
98
- const tool = createWebSearchTool({
99
- projectEndpoint: 'https://my-foundry.services.ai.azure.com/api/projects/proj',
100
- modelDeploymentName: 'gpt-4.1-mini',
101
- }, deps);
102
- await tool('test');
103
- expect(deps.createClient).toHaveBeenCalledWith('https://my-foundry.services.ai.azure.com/api/projects/proj');
104
- });
105
- it('passes modelDeploymentName to agent creation', async () => {
106
- const deps = createFakeDeps();
107
- const tool = createWebSearchTool({
108
- projectEndpoint: 'https://foundry.example.com',
109
- modelDeploymentName: 'my-model',
110
- }, deps);
111
- await tool('test');
112
- expect(deps.createAgentVersion).toHaveBeenCalledWith(expect.anything(), 'sofia-web-search', expect.objectContaining({ model: 'my-model' }));
113
- });
114
- });
115
- describe('tool definition', () => {
116
- it('has correct name and description', () => {
117
- expect(WEB_SEARCH_TOOL_DEFINITION.name).toBe('web.search');
118
- expect(WEB_SEARCH_TOOL_DEFINITION.description).toBeTruthy();
119
- });
120
- it('accepts a query parameter', () => {
121
- const params = WEB_SEARCH_TOOL_DEFINITION.parameters;
122
- expect(params).toBeDefined();
123
- expect(params.properties?.query).toBeDefined();
124
- });
125
- });
126
- describe('createWebSearchTool', () => {
127
- it('returns structured results with citations on success', async () => {
128
- const deps = createFakeDeps();
129
- const tool = createWebSearchTool({
130
- projectEndpoint: 'https://foundry.example.com',
131
- modelDeploymentName: 'gpt-4.1-mini',
132
- }, deps);
133
- const result = await tool('Contoso healthcare');
134
- expect(result.results).toHaveLength(1);
135
- expect(result.results[0].title).toBe('Contoso Ltd - About');
136
- expect(result.results[0].url).toBe('https://contoso.com/about');
137
- expect(result.sources).toContain('https://contoso.com/about');
138
- });
139
- it('reuses agent on second call (lazy initialization)', async () => {
140
- const deps = createFakeDeps();
141
- const tool = createWebSearchTool({
142
- projectEndpoint: 'https://foundry.example.com',
143
- modelDeploymentName: 'gpt-4.1-mini',
144
- }, deps);
145
- await tool('first query');
146
- await tool('second query');
147
- // Agent created once, response called twice
148
- expect(deps.createAgentVersion).toHaveBeenCalledTimes(1);
149
- expect(deps.createResponse).toHaveBeenCalledTimes(2);
150
- });
151
- it('degrades gracefully when credential fails (T020)', async () => {
152
- const deps = createFakeDeps({
153
- getOpenAIClient: vi
154
- .fn()
155
- .mockRejectedValue(new Error('Azure authentication failed — run `az login`')),
156
- });
157
- const tool = createWebSearchTool({
158
- projectEndpoint: 'https://foundry.example.com',
159
- modelDeploymentName: 'gpt-4.1-mini',
160
- }, deps);
161
- const result = await tool('test query');
162
- expect(result.results).toHaveLength(0);
163
- expect(result.degraded).toBe(true);
164
- expect(result.error).toContain('Azure authentication failed');
165
- });
166
- it('degrades gracefully when agent creation fails (T020)', async () => {
167
- const deps = createFakeDeps({
168
- createAgentVersion: vi
169
- .fn()
170
- .mockRejectedValue(new Error('Failed to create web search agent: 403 Forbidden')),
171
- });
172
- const tool = createWebSearchTool({
173
- projectEndpoint: 'https://foundry.example.com',
174
- modelDeploymentName: 'gpt-4.1-mini',
175
- }, deps);
176
- const result = await tool('test query');
177
- expect(result.results).toHaveLength(0);
178
- expect(result.degraded).toBe(true);
179
- expect(result.error).toContain('Failed to create web search agent');
180
- });
181
- it('degrades gracefully on network error (T020)', async () => {
182
- const deps = createFakeDeps({
183
- createClient: vi.fn().mockImplementation(() => {
184
- throw new Error('Network error: ECONNREFUSED');
185
- }),
186
- });
187
- const tool = createWebSearchTool({
188
- projectEndpoint: 'https://foundry.example.com',
189
- modelDeploymentName: 'gpt-4.1-mini',
190
- }, deps);
191
- const result = await tool('test query');
192
- expect(result.results).toHaveLength(0);
193
- expect(result.degraded).toBe(true);
194
- expect(result.error).toContain('Network error');
195
- });
196
- it('returns empty results with degraded flag when query fails', async () => {
197
- const deps = createFakeDeps({
198
- createResponse: vi
199
- .fn()
200
- .mockRejectedValue(new Error('Web search query failed: 429 Rate limited')),
201
- });
202
- const tool = createWebSearchTool({
203
- projectEndpoint: 'https://foundry.example.com',
204
- modelDeploymentName: 'gpt-4.1-mini',
205
- }, deps);
206
- const result = await tool('test query');
207
- expect(result.results).toHaveLength(0);
208
- expect(result.degraded).toBe(true);
209
- expect(result.error).toContain('429');
210
- }, 12000);
211
- it('falls back to output text snippets when citations are missing', async () => {
212
- const deps = createFakeDeps({
213
- createResponse: vi.fn().mockResolvedValue({
214
- output: [
215
- {
216
- type: 'message',
217
- content: [
218
- {
219
- type: 'output_text',
220
- text: 'Microsoft expands cloud infrastructure in Europe.',
221
- annotations: [],
222
- },
223
- ],
224
- },
225
- ],
226
- }),
227
- });
228
- const tool = createWebSearchTool({
229
- projectEndpoint: 'https://foundry.example.com',
230
- modelDeploymentName: 'gpt-4.1-mini',
231
- }, deps);
232
- const result = await tool('microsoft cloud europe');
233
- expect(result.results).toHaveLength(1);
234
- expect(result.results[0].snippet).toContain('cloud infrastructure');
235
- expect(result.results[0].title).toBe('Foundry response');
236
- });
237
- it('returns citations on subsequent calls by isolating each query conversation', async () => {
238
- let conversationCounter = 0;
239
- const seenByConversation = new Map();
240
- const deps = createFakeDeps({
241
- createConversation: vi.fn().mockImplementation(async () => {
242
- conversationCounter += 1;
243
- return { id: `conv-${conversationCounter}` };
244
- }),
245
- createResponse: vi
246
- .fn()
247
- .mockImplementation(async (_openAIClient, conversationId) => {
248
- const calls = (seenByConversation.get(conversationId) ?? 0) + 1;
249
- seenByConversation.set(conversationId, calls);
250
- // Simulate Foundry behavior where only the first turn in a conversation
251
- // contains URL citations; follow-up turns may return plain text.
252
- if (calls > 1) {
253
- return {
254
- output: [
255
- {
256
- type: 'message',
257
- content: [
258
- {
259
- type: 'output_text',
260
- text: 'No citations in follow-up turn.',
261
- annotations: [],
262
- },
263
- ],
264
- },
265
- ],
266
- };
267
- }
268
- return {
269
- output: [
270
- {
271
- type: 'message',
272
- content: [
273
- {
274
- type: 'output_text',
275
- text: 'Result with source.',
276
- annotations: [
277
- {
278
- type: 'url_citation',
279
- url: `https://example.com/${conversationId}`,
280
- title: `Source ${conversationId}`,
281
- start_index: 0,
282
- end_index: 18,
283
- },
284
- ],
285
- },
286
- ],
287
- },
288
- ],
289
- };
290
- }),
291
- });
292
- const tool = createWebSearchTool({
293
- projectEndpoint: 'https://foundry.example.com',
294
- modelDeploymentName: 'gpt-4.1-mini',
295
- }, deps);
296
- const first = await tool('first query');
297
- const second = await tool('second query');
298
- expect(first.results).toHaveLength(1);
299
- expect(second.results).toHaveLength(1);
300
- expect(deps.createConversation).toHaveBeenCalledTimes(2);
301
- expect(deps.deleteConversation).toHaveBeenCalledTimes(2);
302
- });
303
- it('retries on 429 rate limiting with exponential backoff', async () => {
304
- let callCount = 0;
305
- const deps = createFakeDeps({
306
- createResponse: vi.fn().mockImplementation(async () => {
307
- callCount += 1;
308
- if (callCount < 2) {
309
- const error = new Error('Web search query failed: 429 Too Many Requests');
310
- throw error;
311
- }
312
- return {
313
- output: [
314
- {
315
- type: 'message',
316
- content: [
317
- {
318
- type: 'output_text',
319
- text: 'Result after retry.',
320
- annotations: [
321
- {
322
- type: 'url_citation',
323
- url: 'https://example.com',
324
- title: 'Example',
325
- start_index: 0,
326
- end_index: 18,
327
- },
328
- ],
329
- },
330
- ],
331
- },
332
- ],
333
- };
334
- }),
335
- });
336
- const tool = createWebSearchTool({
337
- projectEndpoint: 'https://foundry.example.com',
338
- modelDeploymentName: 'gpt-4.1-mini',
339
- }, deps);
340
- const result = await tool('test query');
341
- expect(result.results).toHaveLength(1);
342
- expect(deps.createResponse).toHaveBeenCalledTimes(2);
343
- }, 6000);
344
- it('stops retrying after MAX_RETRIES and returns degraded', async () => {
345
- const deps = createFakeDeps({
346
- createResponse: vi
347
- .fn()
348
- .mockRejectedValue(new Error('Web search query failed: 429 Too Many Requests')),
349
- });
350
- const tool = createWebSearchTool({
351
- projectEndpoint: 'https://foundry.example.com',
352
- modelDeploymentName: 'gpt-4.1-mini',
353
- }, deps);
354
- const result = await tool('test query');
355
- expect(result.results).toHaveLength(0);
356
- expect(result.degraded).toBe(true);
357
- expect(deps.createResponse).toHaveBeenCalledTimes(3); // initial + 2 retries
358
- }, 12000);
359
- it('rotates the underlying agent after several queries', async () => {
360
- const deps = createFakeDeps();
361
- const tool = createWebSearchTool({
362
- projectEndpoint: 'https://foundry.example.com',
363
- modelDeploymentName: 'gpt-4.1-mini',
364
- }, deps);
365
- await tool('query one');
366
- await tool('query two');
367
- await tool('query three');
368
- await tool('query four');
369
- expect(deps.createAgentVersion).toHaveBeenCalledTimes(2);
370
- expect(deps.deleteAgentVersion).toHaveBeenCalledTimes(1);
371
- });
372
- });
373
- describe('extractCitations (T021)', () => {
374
- it('extracts url_citation annotations into results', () => {
375
- const output = [
376
- {
377
- type: 'message',
378
- content: [
379
- {
380
- type: 'output_text',
381
- text: 'Contoso is a leader in healthcare AI.',
382
- annotations: [
383
- {
384
- type: 'url_citation',
385
- url: 'https://contoso.com/about',
386
- title: 'Contoso Ltd - Healthcare AI Solutions',
387
- start_index: 0,
388
- end_index: 37,
389
- },
390
- ],
391
- },
392
- ],
393
- },
394
- ];
395
- const { results, sources } = extractCitations(output);
396
- expect(results).toHaveLength(1);
397
- expect(results[0].title).toBe('Contoso Ltd - Healthcare AI Solutions');
398
- expect(results[0].url).toBe('https://contoso.com/about');
399
- expect(sources).toContain('https://contoso.com/about');
400
- });
401
- it('deduplicates sources by URL', () => {
402
- const output = [
403
- {
404
- type: 'message',
405
- content: [
406
- {
407
- type: 'output_text',
408
- text: 'First ref. Second ref to same source.',
409
- annotations: [
410
- {
411
- type: 'url_citation',
412
- url: 'https://example.com',
413
- title: 'A',
414
- start_index: 0,
415
- end_index: 10,
416
- },
417
- {
418
- type: 'url_citation',
419
- url: 'https://example.com',
420
- title: 'B',
421
- start_index: 11,
422
- end_index: 37,
423
- },
424
- ],
425
- },
426
- ],
427
- },
428
- ];
429
- const { results, sources } = extractCitations(output);
430
- expect(results).toHaveLength(1);
431
- expect(sources).toHaveLength(1);
432
- });
433
- it('handles multiple distinct citations', () => {
434
- const output = [
435
- {
436
- type: 'message',
437
- content: [
438
- {
439
- type: 'output_text',
440
- text: 'Result text with multiple sources.',
441
- annotations: [
442
- {
443
- type: 'url_citation',
444
- url: 'https://a.com',
445
- title: 'Source A',
446
- start_index: 0,
447
- end_index: 10,
448
- },
449
- {
450
- type: 'url_citation',
451
- url: 'https://b.com',
452
- title: 'Source B',
453
- start_index: 11,
454
- end_index: 33,
455
- },
456
- ],
457
- },
458
- ],
459
- },
460
- ];
461
- const { results, sources } = extractCitations(output);
462
- expect(results).toHaveLength(2);
463
- expect(sources).toEqual(['https://a.com', 'https://b.com']);
464
- });
465
- it('returns empty results for output without citations', () => {
466
- const output = [
467
- {
468
- type: 'message',
469
- content: [
470
- {
471
- type: 'output_text',
472
- text: 'No citations here.',
473
- annotations: [],
474
- },
475
- ],
476
- },
477
- ];
478
- const { results, sources } = extractCitations(output);
479
- expect(results).toHaveLength(0);
480
- expect(sources).toHaveLength(0);
481
- });
482
- it('ignores non-url_citation annotations', () => {
483
- const output = [
484
- {
485
- type: 'message',
486
- content: [
487
- {
488
- type: 'output_text',
489
- text: 'Some text',
490
- annotations: [
491
- { type: 'file_citation', url: 'file://local', title: 'File' },
492
- {
493
- type: 'url_citation',
494
- url: 'https://valid.com',
495
- title: 'Valid',
496
- start_index: 0,
497
- end_index: 9,
498
- },
499
- ],
500
- },
501
- ],
502
- },
503
- ];
504
- const { results } = extractCitations(output);
505
- expect(results).toHaveLength(1);
506
- expect(results[0].url).toBe('https://valid.com');
507
- });
508
- it('ignores non-message output items', () => {
509
- const output = [
510
- { type: 'tool_call', name: 'web_search_preview' },
511
- {
512
- type: 'message',
513
- content: [
514
- {
515
- type: 'output_text',
516
- text: 'Result text.',
517
- annotations: [
518
- {
519
- type: 'url_citation',
520
- url: 'https://found.com',
521
- title: 'Found',
522
- start_index: 0,
523
- end_index: 12,
524
- },
525
- ],
526
- },
527
- ],
528
- },
529
- ];
530
- const { results } = extractCitations(output);
531
- expect(results).toHaveLength(1);
532
- expect(results[0].url).toBe('https://found.com');
533
- });
534
- });
535
- describe('destroyWebSearchSession', () => {
536
- it('cleans up the agent on destroy (conversations are per-query)', async () => {
537
- const deps = createFakeDeps();
538
- const tool = createWebSearchTool({
539
- projectEndpoint: 'https://foundry.example.com',
540
- modelDeploymentName: 'gpt-4.1-mini',
541
- }, deps);
542
- // Initialize the session
543
- await tool('trigger init');
544
- // Destroy
545
- await destroyWebSearchSession();
546
- expect(deps.deleteConversation).toHaveBeenCalledWith(expect.anything(), 'conv-123');
547
- expect(deps.deleteAgentVersion).toHaveBeenCalledWith(expect.anything(), 'sofia-web-search', 'v1');
548
- });
549
- it('is safe to call when not initialized', async () => {
550
- // Should not throw
551
- await destroyWebSearchSession();
552
- });
553
- it('logs warning but does not throw when cleanup fails', async () => {
554
- const deps = createFakeDeps({
555
- deleteConversation: vi.fn().mockRejectedValue(new Error('cleanup failed')),
556
- deleteAgentVersion: vi.fn().mockRejectedValue(new Error('cleanup failed')),
557
- });
558
- const tool = createWebSearchTool({
559
- projectEndpoint: 'https://foundry.example.com',
560
- modelDeploymentName: 'gpt-4.1-mini',
561
- }, deps);
562
- await tool('trigger init');
563
- // Should not throw
564
- await expect(destroyWebSearchSession()).resolves.toBeUndefined();
565
- });
566
- });
567
- });