sofia-cli 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/README.md +42 -20
  2. package/dist/infra/deploy.sh +193 -0
  3. package/dist/infra/gather-env.sh +211 -0
  4. package/dist/infra/infra/deploy.sh +193 -0
  5. package/dist/infra/infra/gather-env.sh +211 -0
  6. package/dist/infra/infra/main.bicep +90 -0
  7. package/dist/infra/infra/main.bicepparam +18 -0
  8. package/dist/infra/infra/resources.bicep +134 -0
  9. package/dist/infra/infra/teardown.sh +114 -0
  10. package/dist/infra/main.bicep +90 -0
  11. package/dist/infra/main.bicepparam +18 -0
  12. package/dist/infra/resources.bicep +134 -0
  13. package/dist/infra/teardown.sh +114 -0
  14. package/dist/src/cli/developCommand.js +0 -2
  15. package/dist/src/cli/index.js +8 -1
  16. package/dist/src/cli/workshopCommand.js +1 -1
  17. package/dist/src/develop/index.js +1 -1
  18. package/dist/src/develop/pocUtils.js +228 -0
  19. package/dist/src/develop/ralphLoop.js +8 -27
  20. package/dist/src/shared/data/cards.json +655 -670
  21. package/docs/architecture.md +2 -1
  22. package/package.json +5 -3
  23. package/src/cli/developCommand.ts +1 -3
  24. package/src/cli/index.ts +11 -1
  25. package/src/cli/workshopCommand.ts +21 -17
  26. package/src/develop/dynamicScaffolder.ts +36 -30
  27. package/src/develop/index.ts +13 -2
  28. package/src/develop/pocUtils.ts +296 -0
  29. package/src/develop/ralphLoop.ts +8 -28
  30. package/src/develop/templateRegistry.ts +19 -18
  31. package/src/shared/data/cards.json +655 -670
  32. package/tests/e2e/developE2e.spec.ts +3 -61
  33. package/tests/e2e/developFailureE2e.spec.ts +34 -38
  34. package/tests/integration/pocGithubMcp.spec.ts +29 -39
  35. package/tests/integration/pocLocalFallback.spec.ts +29 -39
  36. package/tests/integration/ralphLoopFlow.spec.ts +46 -66
  37. package/tests/integration/ralphLoopPartial.spec.ts +30 -37
  38. package/tests/unit/develop/githubMcpAdapter.spec.ts +0 -134
  39. package/tests/unit/develop/outputValidator.spec.ts +45 -21
  40. package/tests/unit/develop/ralphLoop.spec.ts +58 -94
  41. package/tsconfig.json +2 -1
  42. package/vitest.workspace.ts +5 -0
  43. package/dist/src/develop/pocScaffolder.js +0 -542
  44. package/dist/tests/e2e/developE2e.spec.js +0 -126
  45. package/dist/tests/e2e/developFailureE2e.spec.js +0 -247
  46. package/dist/tests/e2e/developPty.spec.js +0 -75
  47. package/dist/tests/e2e/discoveryWebSearchRelevance.spec.js +0 -84
  48. package/dist/tests/e2e/harness.spec.js +0 -83
  49. package/dist/tests/e2e/mcpLive.spec.js +0 -120
  50. package/dist/tests/e2e/newSession.e2e.spec.js +0 -177
  51. package/dist/tests/e2e/ralphLoopEnrichmentComparison.spec.js +0 -62
  52. package/dist/tests/e2e/workiqEnrichment.spec.js +0 -56
  53. package/dist/tests/e2e/zavaSimulation.spec.js +0 -452
  54. package/dist/tests/fixtures/test-fixture-project/src/add.js +0 -3
  55. package/dist/tests/fixtures/test-fixture-project/tests/failing.test.js +0 -6
  56. package/dist/tests/fixtures/test-fixture-project/tests/hanging.test.js +0 -8
  57. package/dist/tests/fixtures/test-fixture-project/tests/passing.test.js +0 -10
  58. package/dist/tests/fixtures/test-fixture-project/vitest.config.js +0 -6
  59. package/dist/tests/integration/autoStartConversation.spec.js +0 -138
  60. package/dist/tests/integration/defaultCommand.spec.js +0 -147
  61. package/dist/tests/integration/directCommandNonTty.spec.js +0 -224
  62. package/dist/tests/integration/directCommandTty.spec.js +0 -151
  63. package/dist/tests/integration/discoveryEnrichmentFlow.spec.js +0 -175
  64. package/dist/tests/integration/exportArtifacts.spec.js +0 -202
  65. package/dist/tests/integration/exportFallbackFlow.spec.js +0 -99
  66. package/dist/tests/integration/mcpDegradationFlow.spec.js +0 -190
  67. package/dist/tests/integration/mcpTransportFlow.spec.js +0 -139
  68. package/dist/tests/integration/newSessionFlow.spec.js +0 -343
  69. package/dist/tests/integration/pocGithubMcp.spec.js +0 -186
  70. package/dist/tests/integration/pocLocalFallback.spec.js +0 -171
  71. package/dist/tests/integration/pocScaffold.spec.js +0 -163
  72. package/dist/tests/integration/ralphLoopFlow.spec.js +0 -359
  73. package/dist/tests/integration/ralphLoopPartial.spec.js +0 -368
  74. package/dist/tests/integration/resumeAndBacktrack.spec.js +0 -247
  75. package/dist/tests/integration/spinnerLifecycle.spec.js +0 -220
  76. package/dist/tests/integration/summarizationFlow.spec.js +0 -115
  77. package/dist/tests/integration/testRunnerReal.spec.js +0 -52
  78. package/dist/tests/integration/webSearchAgent.spec.js +0 -128
  79. package/dist/tests/live/copilotSdkLive.spec.js +0 -107
  80. package/dist/tests/live/zavaFullWorkshop.spec.js +0 -392
  81. package/dist/tests/setup/loadEnv.js +0 -3
  82. package/dist/tests/unit/cli/developCommand.spec.js +0 -567
  83. package/dist/tests/unit/cli/directCommands.spec.js +0 -279
  84. package/dist/tests/unit/cli/envLoader.spec.js +0 -58
  85. package/dist/tests/unit/cli/ioContext.spec.js +0 -119
  86. package/dist/tests/unit/cli/preflight.spec.js +0 -108
  87. package/dist/tests/unit/cli/statusCommand.spec.js +0 -111
  88. package/dist/tests/unit/cli/workshopClientFallback.spec.js +0 -80
  89. package/dist/tests/unit/cli/workshopCommand.spec.js +0 -328
  90. package/dist/tests/unit/config/vitestEnvSetup.spec.js +0 -13
  91. package/dist/tests/unit/develop/checkpointState.spec.js +0 -315
  92. package/dist/tests/unit/develop/codeGenerator.spec.js +0 -355
  93. package/dist/tests/unit/develop/githubMcpAdapter.spec.js +0 -231
  94. package/dist/tests/unit/develop/mcpContextEnricher.spec.js +0 -433
  95. package/dist/tests/unit/develop/outputValidator.spec.js +0 -119
  96. package/dist/tests/unit/develop/pocScaffolder.spec.js +0 -353
  97. package/dist/tests/unit/develop/ralphLoop.spec.js +0 -1248
  98. package/dist/tests/unit/develop/templateRegistry.spec.js +0 -85
  99. package/dist/tests/unit/develop/testRunner.spec.js +0 -249
  100. package/dist/tests/unit/infraBicep.spec.js +0 -92
  101. package/dist/tests/unit/infraDeploy.spec.js +0 -82
  102. package/dist/tests/unit/infraTeardown.spec.js +0 -63
  103. package/dist/tests/unit/logging/logger.spec.js +0 -43
  104. package/dist/tests/unit/loop/conversationLoop.spec.js +0 -592
  105. package/dist/tests/unit/loop/phaseSummarizer.spec.js +0 -141
  106. package/dist/tests/unit/loop/streamingMarkdown.spec.js +0 -147
  107. package/dist/tests/unit/mcp/mcpManager.spec.js +0 -279
  108. package/dist/tests/unit/mcp/mcpTransport.spec.js +0 -529
  109. package/dist/tests/unit/mcp/retryPolicy.spec.js +0 -218
  110. package/dist/tests/unit/mcp/timeoutValidation.spec.js +0 -46
  111. package/dist/tests/unit/mcp/webSearch.spec.js +0 -567
  112. package/dist/tests/unit/phases/contextSummarizer.spec.js +0 -140
  113. package/dist/tests/unit/phases/discoveryEnricher.repeatCalls.spec.js +0 -93
  114. package/dist/tests/unit/phases/discoveryEnricher.spec.js +0 -411
  115. package/dist/tests/unit/phases/phaseExtractors.spec.js +0 -352
  116. package/dist/tests/unit/phases/phaseHandlers.spec.js +0 -425
  117. package/dist/tests/unit/prompts/promptLoader.spec.js +0 -118
  118. package/dist/tests/unit/schemas/pocSchemas.spec.js +0 -412
  119. package/dist/tests/unit/schemas/session.spec.js +0 -257
  120. package/dist/tests/unit/sessions/exportPaths.spec.js +0 -31
  121. package/dist/tests/unit/sessions/exportWriter.spec.js +0 -655
  122. package/dist/tests/unit/sessions/sessionManager.spec.js +0 -151
  123. package/dist/tests/unit/sessions/sessionStore.spec.js +0 -116
  124. package/dist/tests/unit/shared/activitySpinner.spec.js +0 -175
  125. package/dist/tests/unit/shared/cardsLoader.spec.js +0 -76
  126. package/dist/tests/unit/shared/copilotClient.spec.js +0 -155
  127. package/dist/tests/unit/shared/errorClassifier.spec.js +0 -131
  128. package/dist/tests/unit/shared/events.spec.js +0 -55
  129. package/dist/tests/unit/shared/markdownRenderer.spec.js +0 -35
  130. package/dist/tests/unit/shared/markdownRendererChunks.spec.js +0 -70
  131. package/dist/tests/unit/shared/tableRenderer.spec.js +0 -34
  132. package/dist/vitest.config.js +0 -14
  133. package/dist/vitest.live.config.js +0 -18
  134. package/src/develop/pocScaffolder.ts +0 -646
  135. package/tests/integration/pocScaffold.spec.ts +0 -220
  136. package/tests/unit/develop/pocScaffolder.spec.ts +0 -451
@@ -1,247 +0,0 @@
1
- /**
2
- * T050: E2E failure/recovery test.
3
- *
4
- * Verifies graceful termination; verifies `finalStatus` is "failed" or "partial"
5
- * in session state; verifies `terminationReason: "max-iterations"`;
6
- * verifies user-facing output includes recovery guidance (Constitution VI compliance).
7
- */
8
- import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
9
- import { mkdtemp, rm } from 'node:fs/promises';
10
- import { join } from 'node:path';
11
- import { tmpdir } from 'node:os';
12
- import { createRequire } from 'node:module';
13
- import { RalphLoop } from '../../src/develop/ralphLoop.js';
14
- vi.mock('node:child_process', async (importOriginal) => {
15
- const actual = await importOriginal();
16
- return {
17
- ...actual,
18
- spawn: vi.fn((cmd, args) => {
19
- if (cmd === 'npm' && args.includes('install')) {
20
- return {
21
- stdout: { on: vi.fn() },
22
- stderr: { on: vi.fn() },
23
- on: vi.fn((event, cb) => {
24
- if (event === 'close')
25
- cb(0);
26
- }),
27
- kill: vi.fn(),
28
- killed: false,
29
- };
30
- }
31
- return actual.spawn(cmd, args);
32
- }),
33
- };
34
- });
35
- const require = createRequire(import.meta.url);
36
- const fixtureSession = require('../fixtures/completedSession.json');
37
- describe('E2E: failure/recovery (T050)', () => {
38
- let tmpDir;
39
- let originalExitCode;
40
- beforeEach(async () => {
41
- tmpDir = await mkdtemp(join(tmpdir(), 'sofia-e2e-failure-'));
42
- originalExitCode = process.exitCode;
43
- process.exitCode = undefined;
44
- });
45
- afterEach(async () => {
46
- await rm(tmpDir, { recursive: true, force: true });
47
- process.exitCode = originalExitCode;
48
- vi.restoreAllMocks();
49
- });
50
- function makeIo() {
51
- const writtenLines = [];
52
- const activityLines = [];
53
- return {
54
- writtenLines,
55
- activityLines,
56
- write: vi.fn((text) => { writtenLines.push(text); }),
57
- writeActivity: vi.fn((text) => { activityLines.push(text); }),
58
- writeToolSummary: vi.fn(),
59
- readInput: vi.fn().mockResolvedValue(null),
60
- showDecisionGate: vi.fn(),
61
- isJsonMode: false,
62
- isTTY: false,
63
- };
64
- }
65
- function makeFakeScaffolder(outputDir) {
66
- return {
67
- scaffold: vi.fn().mockImplementation(async () => {
68
- const { writeFile, mkdir } = await import('node:fs/promises');
69
- await mkdir(join(outputDir, 'src'), { recursive: true });
70
- await writeFile(join(outputDir, 'package.json'), JSON.stringify({
71
- name: 'test-poc',
72
- scripts: { test: 'vitest run' },
73
- dependencies: {},
74
- devDependencies: {},
75
- }), 'utf-8');
76
- await writeFile(join(outputDir, 'src', 'index.ts'), 'export function main() {}', 'utf-8');
77
- return {
78
- createdFiles: ['package.json', 'src/index.ts'],
79
- skippedFiles: [],
80
- context: {
81
- projectName: 'test-poc',
82
- ideaTitle: 'Test',
83
- ideaDescription: 'Test',
84
- techStack: { language: 'TypeScript', runtime: 'Node.js 20', testRunner: 'npm test' },
85
- planSummary: 'Test',
86
- sessionId: fixtureSession.sessionId,
87
- outputDir,
88
- },
89
- };
90
- }),
91
- getTemplateFiles: () => [],
92
- };
93
- }
94
- function makeAlwaysFailingClient() {
95
- return {
96
- createSession: vi.fn().mockResolvedValue({
97
- send: vi.fn().mockReturnValue({
98
- async *[Symbol.asyncIterator]() {
99
- yield { type: 'TextDelta', text: '', timestamp: '' };
100
- },
101
- }),
102
- getHistory: () => [],
103
- }),
104
- };
105
- }
106
- function makeAlwaysFailingTestRunner() {
107
- return {
108
- run: vi.fn().mockResolvedValue({
109
- passed: 0,
110
- failed: 1,
111
- skipped: 0,
112
- total: 1,
113
- durationMs: 400,
114
- failures: [{ testName: 'test', message: 'always fails' }],
115
- rawOutput: '',
116
- }),
117
- };
118
- }
119
- it('terminates with max-iterations when all tests keep failing', async () => {
120
- const io = makeIo();
121
- const scaffolder = makeFakeScaffolder(tmpDir);
122
- const client = makeAlwaysFailingClient();
123
- const testRunner = makeAlwaysFailingTestRunner();
124
- const ralph = new RalphLoop({
125
- client,
126
- io,
127
- session: fixtureSession,
128
- outputDir: tmpDir,
129
- maxIterations: 2,
130
- testRunner,
131
- scaffolder,
132
- });
133
- const result = await ralph.run();
134
- expect(result.terminationReason).toBe('max-iterations');
135
- expect(['failed', 'partial']).toContain(result.finalStatus);
136
- });
137
- it('verifies terminationReason=max-iterations in session state', async () => {
138
- const io = makeIo();
139
- const scaffolder = makeFakeScaffolder(tmpDir);
140
- const client = makeAlwaysFailingClient();
141
- const testRunner = makeAlwaysFailingTestRunner();
142
- const ralph = new RalphLoop({
143
- client,
144
- io,
145
- session: fixtureSession,
146
- outputDir: tmpDir,
147
- maxIterations: 2,
148
- testRunner,
149
- scaffolder,
150
- });
151
- const result = await ralph.run();
152
- expect(result.session.poc?.terminationReason).toBe('max-iterations');
153
- expect(result.session.poc?.finalStatus).toBeDefined();
154
- });
155
- it('session has iteration history after failed loop', async () => {
156
- const io = makeIo();
157
- const scaffolder = makeFakeScaffolder(tmpDir);
158
- const client = makeAlwaysFailingClient();
159
- const testRunner = makeAlwaysFailingTestRunner();
160
- const ralph = new RalphLoop({
161
- client,
162
- io,
163
- session: fixtureSession,
164
- outputDir: tmpDir,
165
- maxIterations: 2,
166
- testRunner,
167
- scaffolder,
168
- });
169
- const result = await ralph.run();
170
- // Should have at least scaffold iteration
171
- expect(result.session.poc?.iterations.length).toBeGreaterThan(0);
172
- expect(result.session.poc?.iterations[0].outcome).toBe('scaffold');
173
- });
174
- it('shows recovery guidance in non-JSON output for failed status (Constitution VI)', async () => {
175
- const { developCommand } = await import('../../src/cli/developCommand.js');
176
- const devIo = makeIo();
177
- const client = makeAlwaysFailingClient();
178
- const store = {
179
- load: vi.fn().mockResolvedValue(fixtureSession),
180
- save: vi.fn().mockResolvedValue(undefined),
181
- list: vi.fn().mockResolvedValue([fixtureSession.sessionId]),
182
- };
183
- // Mock RalphLoop.prototype.run to return failed immediately
184
- const originalRun = RalphLoop.prototype.run;
185
- const sessionWithFailedPoc = {
186
- ...fixtureSession,
187
- poc: {
188
- repoSource: 'local',
189
- repoPath: tmpDir,
190
- iterations: [],
191
- finalStatus: 'failed',
192
- terminationReason: 'max-iterations',
193
- },
194
- };
195
- RalphLoop.prototype.run = vi.fn().mockResolvedValue({
196
- session: sessionWithFailedPoc,
197
- finalStatus: 'failed',
198
- terminationReason: 'max-iterations',
199
- iterationsCompleted: 2,
200
- outputDir: tmpDir,
201
- });
202
- try {
203
- await developCommand({ session: fixtureSession.sessionId, maxIterations: 1, output: tmpDir }, { store, io: devIo, client });
204
- }
205
- finally {
206
- RalphLoop.prototype.run = originalRun;
207
- }
208
- const allOutput = devIo.writtenLines.join('\n');
209
- // developCommand should show recovery guidance for non-success status
210
- expect(allOutput).toMatch(/resume|retry|force|more.*iter/i);
211
- });
212
- it('sets process.exitCode=1 when loop terminates with failed status', async () => {
213
- const { developCommand } = await import('../../src/cli/developCommand.js');
214
- const devIo = makeIo();
215
- const client = makeAlwaysFailingClient();
216
- const store = {
217
- load: vi.fn().mockResolvedValue(fixtureSession),
218
- save: vi.fn().mockResolvedValue(undefined),
219
- list: vi.fn().mockResolvedValue([fixtureSession.sessionId]),
220
- };
221
- const sessionWithFailedPoc = {
222
- ...fixtureSession,
223
- poc: {
224
- repoSource: 'local',
225
- repoPath: tmpDir,
226
- iterations: [],
227
- finalStatus: 'failed',
228
- terminationReason: 'max-iterations',
229
- },
230
- };
231
- const originalRun = RalphLoop.prototype.run;
232
- RalphLoop.prototype.run = vi.fn().mockResolvedValue({
233
- session: sessionWithFailedPoc,
234
- finalStatus: 'failed',
235
- terminationReason: 'max-iterations',
236
- iterationsCompleted: 2,
237
- outputDir: tmpDir,
238
- });
239
- try {
240
- await developCommand({ session: fixtureSession.sessionId }, { store, io: devIo, client });
241
- }
242
- finally {
243
- RalphLoop.prototype.run = originalRun;
244
- }
245
- expect(process.exitCode).toBe(1);
246
- });
247
- });
@@ -1,75 +0,0 @@
1
- /**
2
- * T049-T051: PTY-based interactive E2E tests for `sofia dev`.
3
- *
4
- * Validates Ctrl+C handling, progress output, and clean exit behavior.
5
- * Gracefully skips if node-pty allocation fails (e.g., CI without TTY).
6
- */
7
- import { describe, it, expect } from 'vitest';
8
- // ── PTY availability guard (T051) ────────────────────────────────────────────
9
- let pty;
10
- let ptyAvailable = false;
11
- try {
12
- pty = await import('node-pty');
13
- // Attempt a minimal allocation to verify PTY works
14
- const testProc = pty.spawn('echo', ['test'], { cols: 80, rows: 24 });
15
- testProc.kill();
16
- ptyAvailable = true;
17
- }
18
- catch {
19
- ptyAvailable = false;
20
- }
21
- describe('PTY-based E2E: sofia dev', () => {
22
- // T051: Skip gracefully if node-pty allocation fails
23
- const itPty = ptyAvailable ? it : it.skip;
24
- itPty('help output appears in PTY buffer (T050)', async () => {
25
- if (!pty)
26
- return;
27
- const proc = pty.spawn('npx', ['tsx', 'src/cli/index.ts', 'dev', '--help'], {
28
- cols: 120,
29
- rows: 40,
30
- cwd: process.cwd(),
31
- env: { ...process.env },
32
- });
33
- let output = '';
34
- proc.onData((data) => {
35
- output += data;
36
- });
37
- const exitCode = await new Promise((resolve) => {
38
- proc.onExit(({ exitCode: code }) => {
39
- resolve(code);
40
- });
41
- setTimeout(() => {
42
- proc.kill();
43
- resolve(-1);
44
- }, 15_000);
45
- });
46
- // --help should produce usage output containing 'dev'
47
- expect(output).toContain('dev');
48
- expect(exitCode).toBe(0);
49
- }, 20_000);
50
- itPty('Ctrl+C sends signal to running process (T049)', async () => {
51
- if (!pty)
52
- return;
53
- // Use a simple process that sleeps, then send Ctrl+C
54
- const proc = pty.spawn('sleep', ['30'], {
55
- cols: 80,
56
- rows: 24,
57
- cwd: process.cwd(),
58
- env: { ...process.env },
59
- });
60
- // Wait briefly then send Ctrl+C
61
- await new Promise((resolve) => setTimeout(resolve, 500));
62
- proc.write('\x03'); // Ctrl+C
63
- const exitCode = await new Promise((resolve) => {
64
- proc.onExit(({ exitCode: code }) => {
65
- resolve(code);
66
- });
67
- setTimeout(() => {
68
- proc.kill();
69
- resolve(-999);
70
- }, 5_000);
71
- });
72
- // Process should have been interrupted (not timed out)
73
- expect(exitCode).not.toBe(-999);
74
- }, 10_000);
75
- });
@@ -1,84 +0,0 @@
1
- /**
2
- * T041: Discovery web search enrichment relevance validation (SC-003-005).
3
- *
4
- * Validates that discovery web search enrichment retrieves keyword-relevant
5
- * context for at least 3 out of 5 test company descriptions.
6
- * Gated behind SOFIA_LIVE_MCP_TESTS=true because it requires real web search.
7
- *
8
- * Acceptance criteria:
9
- * - Run enrichFromWebSearch() for 5 different company descriptions
10
- * - At least 3/5 must return results with keyword-relevant content
11
- * - "Keyword-relevant" = at least one result snippet contains a word from the company/industry
12
- */
13
- import { describe, it, expect } from 'vitest';
14
- const LIVE = process.env.SOFIA_LIVE_MCP_TESTS === 'true';
15
- const TEST_COMPANIES = [
16
- {
17
- summary: '"Nestlé" is a global food and beverage company headquartered in Switzerland.',
18
- keywords: ['food', 'beverage', 'switzerland', 'global', 'company'],
19
- },
20
- {
21
- summary: '"Zara" is a global retail company headquartered in Spain.',
22
- keywords: ['retail', 'fashion', 'spain', 'global', 'company'],
23
- },
24
- {
25
- summary: '"Microsoft Corporation" is a global technology company headquartered in Redmond, Washington.',
26
- keywords: ['technology', 'software', 'hardware', 'cloud', 'global'],
27
- },
28
- {
29
- summary: '"Maersk" is a global shipping and logistics company headquartered in Copenhagen, Denmark.',
30
- keywords: ['shipping', 'logistics', 'denmark', 'global', 'company'],
31
- },
32
- {
33
- summary: '"Hasbro" is a global toy and entertainment company headquartered in Pawtucket, Rhode Island.',
34
- keywords: ['toy', 'entertainment', 'rhode island', 'global', 'company'],
35
- },
36
- ];
37
- describe.skipIf(!LIVE)('Discovery web search relevance validation (T041 / SC-003-005)', () => {
38
- it('at least 3/5 company descriptions return keyword-relevant results', async () => {
39
- const { DiscoveryEnricher } = await import('../../src/phases/discoveryEnricher.js');
40
- const { createWebSearchTool } = await import('../../src/mcp/webSearch.js');
41
- const webSearchFn = createWebSearchTool({
42
- projectEndpoint: process.env.FOUNDRY_PROJECT_ENDPOINT,
43
- modelDeploymentName: process.env.FOUNDRY_MODEL_DEPLOYMENT_NAME,
44
- });
45
- const webSearchClient = {
46
- search: async (query) => webSearchFn(query),
47
- };
48
- const enricher = new DiscoveryEnricher();
49
- let relevantCount = 0;
50
- const results = [];
51
- for (const company of TEST_COMPANIES) {
52
- const enrichment = await enricher.enrichFromWebSearch(company.summary, webSearchClient);
53
- // Collect all result strings from enrichment
54
- const allText = [
55
- ...(enrichment.companyNews ?? []),
56
- ...(enrichment.competitorInfo ?? []),
57
- ...(enrichment.industryTrends ?? []),
58
- ]
59
- .join(' ')
60
- .toLowerCase();
61
- const snippetCount = (enrichment.companyNews?.length ?? 0) +
62
- (enrichment.competitorInfo?.length ?? 0) +
63
- (enrichment.industryTrends?.length ?? 0);
64
- // Check if any keyword appears in the results
65
- const hasRelevantKeyword = company.keywords.some((kw) => allText.includes(kw.toLowerCase()));
66
- if (hasRelevantKeyword && snippetCount > 0) {
67
- relevantCount++;
68
- }
69
- results.push({
70
- company: company.summary.split('"')[1] || company.summary.slice(0, 30),
71
- relevant: hasRelevantKeyword && snippetCount > 0,
72
- snippetCount,
73
- });
74
- }
75
- // Log outcomes for manual review
76
- console.log('=== T041 Web Search Relevance Validation ===');
77
- for (const r of results) {
78
- console.log(` ${r.relevant ? '✓' : '✗'} ${r.company}: ${r.snippetCount} snippets`);
79
- }
80
- console.log(`Result: ${relevantCount}/5 companies have relevant results`);
81
- // Acceptance: at least 3 out of 5
82
- expect(relevantCount).toBeGreaterThanOrEqual(3);
83
- }, 120_000); // 2 minute timeout for multiple web searches
84
- });
@@ -1,83 +0,0 @@
1
- /**
2
- * E2E test harness skeleton.
3
- *
4
- * Uses node-pty to drive the sofIA CLI interactively, simulating
5
- * user input and verifying streaming output. This is a skeleton —
6
- * actual E2E test scenarios will be added in US1 (T021).
7
- *
8
- * Requirements:
9
- * - node-pty must be installed (`npm install node-pty`)
10
- * - Tests run under the `test:e2e` npm script
11
- */
12
- import { describe, it, expect } from 'vitest';
13
- import { spawn } from 'node:child_process';
14
- import { join, dirname } from 'node:path';
15
- import { fileURLToPath } from 'node:url';
16
- const __dirname = dirname(fileURLToPath(import.meta.url));
17
- const PROJECT_ROOT = join(__dirname, '..', '..');
18
- const CLI_ENTRY = join(PROJECT_ROOT, 'src', 'cli', 'index.ts');
19
- /**
20
- * Run the sofIA CLI with given arguments and return output.
21
- * Uses tsx to run TypeScript directly.
22
- */
23
- function runCli(args, timeoutMs = 10000) {
24
- return new Promise((resolve, reject) => {
25
- const child = spawn('npx', ['tsx', CLI_ENTRY, ...args], {
26
- cwd: PROJECT_ROOT,
27
- env: { ...process.env, NODE_ENV: 'test' },
28
- stdio: ['pipe', 'pipe', 'pipe'],
29
- });
30
- const stdout = [];
31
- const stderr = [];
32
- child.stdout.on('data', (chunk) => stdout.push(chunk));
33
- child.stderr.on('data', (chunk) => stderr.push(chunk));
34
- const timer = setTimeout(() => {
35
- child.kill('SIGTERM');
36
- reject(new Error(`CLI timed out after ${timeoutMs}ms`));
37
- }, timeoutMs);
38
- child.on('close', (code) => {
39
- clearTimeout(timer);
40
- resolve({
41
- stdout: Buffer.concat(stdout).toString('utf-8'),
42
- stderr: Buffer.concat(stderr).toString('utf-8'),
43
- exitCode: code,
44
- });
45
- });
46
- child.on('error', (err) => {
47
- clearTimeout(timer);
48
- reject(err);
49
- });
50
- });
51
- }
52
- // ── Tests ────────────────────────────────────────────────────────────────────
53
- describe('E2E Harness', () => {
54
- it('displays help when invoked with --help', async () => {
55
- const result = await runCli(['--help']);
56
- expect(result.exitCode).toBe(0);
57
- expect(result.stdout).toContain('sofIA');
58
- expect(result.stdout).toContain('workshop');
59
- expect(result.stdout).toContain('status');
60
- expect(result.stdout).toContain('export');
61
- }, 15_000);
62
- it('displays version when invoked with --version', async () => {
63
- const result = await runCli(['--version']);
64
- expect(result.exitCode).toBe(0);
65
- expect(result.stdout.trim()).toMatch(/^\d+\.\d+\.\d+$/);
66
- }, 15_000);
67
- it('shows workshop help', async () => {
68
- const result = await runCli(['workshop', '--help']);
69
- expect(result.exitCode).toBe(0);
70
- expect(result.stdout).toContain('workshop');
71
- }, 15_000);
72
- it('lists sessions or reports none when status invoked without session', async () => {
73
- const result = await runCli(['status', '--json']);
74
- // Either lists sessions or reports no sessions found — both valid
75
- const parsed = JSON.parse(result.stdout);
76
- expect(parsed).toBeDefined();
77
- expect('sessions' in parsed || 'error' in parsed).toBe(true);
78
- }, 15_000);
79
- it('returns error for export without session', async () => {
80
- const result = await runCli(['export', '--json']);
81
- expect(result.stdout).toContain('No session specified');
82
- }, 15_000);
83
- });
@@ -1,120 +0,0 @@
1
- /**
2
- * T039: Live MCP smoke tests.
3
- *
4
- * Gated behind SOFIA_LIVE_MCP_TESTS=true environment variable.
5
- * These tests exercise real MCP server integrations:
6
- * - GitHub MCP: create/delete a test repository (infrastructure validation only)
7
- * - Context7: resolve a library ID
8
- * - Azure MCP: return documentation for a simple query
9
- * - Web search: return results for a test query
10
- *
11
- * NOTE: GitHub MCP test validates the infrastructure works, but sofIA does NOT
12
- * automatically create GitHub repos during PoC generation. PoCs are created locally
13
- * with git init, and users manually push when ready (safer approach).
14
- *
15
- * Requires:
16
- * - GitHub MCP: GITHUB_TOKEN env var OR `gh auth login` (GitHub CLI)
17
- * - MCP servers accessible
18
- */
19
- import { describe, it, expect } from 'vitest';
20
- import { execSync } from 'node:child_process';
21
- const LIVE = process.env.SOFIA_LIVE_MCP_TESTS === 'true';
22
- /**
23
- * Check if GitHub authentication is available (env var or GitHub CLI).
24
- */
25
- function hasGitHubAuth() {
26
- if (process.env.GITHUB_TOKEN)
27
- return true;
28
- try {
29
- const token = execSync('gh auth token', {
30
- encoding: 'utf8',
31
- stdio: ['pipe', 'pipe', 'ignore'],
32
- timeout: 2000,
33
- }).trim();
34
- return !!token;
35
- }
36
- catch {
37
- return false;
38
- }
39
- }
40
- describe.skipIf(!LIVE)('Live MCP Smoke Tests (T039)', () => {
41
- it.skipIf(!hasGitHubAuth())('GitHub MCP: creates and deletes a test repository', { timeout: 35_000 }, async () => {
42
- // This test requires GITHUB_TOKEN env var OR `gh auth login` (GitHub CLI)
43
- const { McpManager, loadMcpConfig } = await import('../../src/mcp/mcpManager.js');
44
- const config = await loadMcpConfig('.vscode/mcp.json');
45
- const manager = new McpManager(config);
46
- manager.markConnected('github');
47
- const repoName = `sofia-mcp-test-${Date.now()}`;
48
- try {
49
- const createResult = await manager.callTool('github', 'create_repository', {
50
- name: repoName,
51
- description: 'Automated MCP integration test — safe to delete',
52
- private: true,
53
- }, { timeoutMs: 30_000 });
54
- expect(createResult).toBeDefined();
55
- expect(typeof createResult).toBe('object');
56
- // Verify the repository was created - McpManager already parses the content
57
- expect(createResult).toHaveProperty('url');
58
- expect(createResult.url).toContain(repoName);
59
- // Best-effort cleanup: delete the test repo using GitHub CLI
60
- // Note: This requires delete_repo scope; if it fails, the repo will need manual cleanup
61
- try {
62
- const username = execSync('gh api user --jq .login', { encoding: 'utf8' }).trim();
63
- execSync(`gh repo delete ${username}/${repoName} --yes`, {
64
- encoding: 'utf8',
65
- stdio: ['pipe', 'pipe', 'pipe'], // capture all output
66
- });
67
- }
68
- catch (_cleanupError) {
69
- // Cleanup failure is not a test failure - just log it
70
- console.warn(`⚠️ Could not auto-delete test repo ${repoName}. Please delete manually or grant delete_repo scope.`);
71
- console.warn(` Command: gh repo delete <username>/${repoName} --yes`);
72
- }
73
- }
74
- finally {
75
- await manager.disconnectAll();
76
- }
77
- });
78
- it('Context7: resolves "express" library ID', async () => {
79
- const { McpManager, loadMcpConfig } = await import('../../src/mcp/mcpManager.js');
80
- const config = await loadMcpConfig('.vscode/mcp.json');
81
- const manager = new McpManager(config);
82
- manager.markConnected('context7');
83
- try {
84
- const result = await manager.callTool('context7', 'resolve-library-id', {
85
- query: 'resolve express library id',
86
- libraryName: 'express',
87
- }, { timeoutMs: 30_000 });
88
- expect(result).toBeDefined();
89
- const rawText = typeof result.text === 'string' ? result.text : JSON.stringify(result);
90
- const content = rawText.toLowerCase();
91
- // Response should contain meaningful resolve-library-id content
92
- const expectedKeywords = ['express', 'context7-compatible library id', 'code snippets'];
93
- const matchedKeywords = expectedKeywords.filter((keyword) => content.includes(keyword));
94
- expect(matchedKeywords.length).toBeGreaterThanOrEqual(2);
95
- // Ensure at least one high-confidence Express library ID appears
96
- expect(content).toMatch(/\/expressjs\/express|\/websites\/expressjs_en/);
97
- }
98
- finally {
99
- await manager.disconnectAll();
100
- }
101
- });
102
- it('Web search: returns results for a test query', async () => {
103
- const { createWebSearchTool, isWebSearchConfigured } = await import('../../src/mcp/webSearch.js');
104
- // Skip if web search is not configured
105
- if (!isWebSearchConfigured()) {
106
- console.log('Web search not configured, skipping test');
107
- return;
108
- }
109
- const webSearch = createWebSearchTool({
110
- projectEndpoint: process.env.FOUNDRY_PROJECT_ENDPOINT,
111
- modelDeploymentName: process.env.FOUNDRY_MODEL_DEPLOYMENT_NAME,
112
- });
113
- const result = await webSearch('TypeScript Node.js framework 2025');
114
- expect(result.degraded).toBeOneOf([false, undefined]);
115
- expect(result).toBeDefined();
116
- expect(result.results).toBeDefined();
117
- expect(Array.isArray(result.results)).toBe(true);
118
- expect(result.results.length).toBeGreaterThan(0);
119
- }, 30_000); // 30 second timeout for web search
120
- });