sofia-cli 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/README.md +42 -20
  2. package/dist/infra/deploy.sh +193 -0
  3. package/dist/infra/gather-env.sh +211 -0
  4. package/dist/infra/infra/deploy.sh +193 -0
  5. package/dist/infra/infra/gather-env.sh +211 -0
  6. package/dist/infra/infra/main.bicep +90 -0
  7. package/dist/infra/infra/main.bicepparam +18 -0
  8. package/dist/infra/infra/resources.bicep +134 -0
  9. package/dist/infra/infra/teardown.sh +114 -0
  10. package/dist/infra/main.bicep +90 -0
  11. package/dist/infra/main.bicepparam +18 -0
  12. package/dist/infra/resources.bicep +134 -0
  13. package/dist/infra/teardown.sh +114 -0
  14. package/dist/src/cli/developCommand.js +0 -2
  15. package/dist/src/cli/index.js +8 -1
  16. package/dist/src/cli/workshopCommand.js +1 -1
  17. package/dist/src/develop/index.js +1 -1
  18. package/dist/src/develop/pocUtils.js +228 -0
  19. package/dist/src/develop/ralphLoop.js +8 -27
  20. package/dist/src/shared/data/cards.json +655 -670
  21. package/docs/architecture.md +2 -1
  22. package/package.json +5 -3
  23. package/src/cli/developCommand.ts +1 -3
  24. package/src/cli/index.ts +11 -1
  25. package/src/cli/workshopCommand.ts +21 -17
  26. package/src/develop/dynamicScaffolder.ts +36 -30
  27. package/src/develop/index.ts +13 -2
  28. package/src/develop/pocUtils.ts +296 -0
  29. package/src/develop/ralphLoop.ts +8 -28
  30. package/src/develop/templateRegistry.ts +19 -18
  31. package/src/shared/data/cards.json +655 -670
  32. package/tests/e2e/developE2e.spec.ts +3 -61
  33. package/tests/e2e/developFailureE2e.spec.ts +34 -38
  34. package/tests/integration/pocGithubMcp.spec.ts +29 -39
  35. package/tests/integration/pocLocalFallback.spec.ts +29 -39
  36. package/tests/integration/ralphLoopFlow.spec.ts +46 -66
  37. package/tests/integration/ralphLoopPartial.spec.ts +30 -37
  38. package/tests/unit/develop/githubMcpAdapter.spec.ts +0 -134
  39. package/tests/unit/develop/outputValidator.spec.ts +45 -21
  40. package/tests/unit/develop/ralphLoop.spec.ts +58 -94
  41. package/tsconfig.json +2 -1
  42. package/vitest.workspace.ts +5 -0
  43. package/dist/src/develop/pocScaffolder.js +0 -542
  44. package/dist/tests/e2e/developE2e.spec.js +0 -126
  45. package/dist/tests/e2e/developFailureE2e.spec.js +0 -247
  46. package/dist/tests/e2e/developPty.spec.js +0 -75
  47. package/dist/tests/e2e/discoveryWebSearchRelevance.spec.js +0 -84
  48. package/dist/tests/e2e/harness.spec.js +0 -83
  49. package/dist/tests/e2e/mcpLive.spec.js +0 -120
  50. package/dist/tests/e2e/newSession.e2e.spec.js +0 -177
  51. package/dist/tests/e2e/ralphLoopEnrichmentComparison.spec.js +0 -62
  52. package/dist/tests/e2e/workiqEnrichment.spec.js +0 -56
  53. package/dist/tests/e2e/zavaSimulation.spec.js +0 -452
  54. package/dist/tests/fixtures/test-fixture-project/src/add.js +0 -3
  55. package/dist/tests/fixtures/test-fixture-project/tests/failing.test.js +0 -6
  56. package/dist/tests/fixtures/test-fixture-project/tests/hanging.test.js +0 -8
  57. package/dist/tests/fixtures/test-fixture-project/tests/passing.test.js +0 -10
  58. package/dist/tests/fixtures/test-fixture-project/vitest.config.js +0 -6
  59. package/dist/tests/integration/autoStartConversation.spec.js +0 -138
  60. package/dist/tests/integration/defaultCommand.spec.js +0 -147
  61. package/dist/tests/integration/directCommandNonTty.spec.js +0 -224
  62. package/dist/tests/integration/directCommandTty.spec.js +0 -151
  63. package/dist/tests/integration/discoveryEnrichmentFlow.spec.js +0 -175
  64. package/dist/tests/integration/exportArtifacts.spec.js +0 -202
  65. package/dist/tests/integration/exportFallbackFlow.spec.js +0 -99
  66. package/dist/tests/integration/mcpDegradationFlow.spec.js +0 -190
  67. package/dist/tests/integration/mcpTransportFlow.spec.js +0 -139
  68. package/dist/tests/integration/newSessionFlow.spec.js +0 -343
  69. package/dist/tests/integration/pocGithubMcp.spec.js +0 -186
  70. package/dist/tests/integration/pocLocalFallback.spec.js +0 -171
  71. package/dist/tests/integration/pocScaffold.spec.js +0 -163
  72. package/dist/tests/integration/ralphLoopFlow.spec.js +0 -359
  73. package/dist/tests/integration/ralphLoopPartial.spec.js +0 -368
  74. package/dist/tests/integration/resumeAndBacktrack.spec.js +0 -247
  75. package/dist/tests/integration/spinnerLifecycle.spec.js +0 -220
  76. package/dist/tests/integration/summarizationFlow.spec.js +0 -115
  77. package/dist/tests/integration/testRunnerReal.spec.js +0 -52
  78. package/dist/tests/integration/webSearchAgent.spec.js +0 -128
  79. package/dist/tests/live/copilotSdkLive.spec.js +0 -107
  80. package/dist/tests/live/zavaFullWorkshop.spec.js +0 -392
  81. package/dist/tests/setup/loadEnv.js +0 -3
  82. package/dist/tests/unit/cli/developCommand.spec.js +0 -567
  83. package/dist/tests/unit/cli/directCommands.spec.js +0 -279
  84. package/dist/tests/unit/cli/envLoader.spec.js +0 -58
  85. package/dist/tests/unit/cli/ioContext.spec.js +0 -119
  86. package/dist/tests/unit/cli/preflight.spec.js +0 -108
  87. package/dist/tests/unit/cli/statusCommand.spec.js +0 -111
  88. package/dist/tests/unit/cli/workshopClientFallback.spec.js +0 -80
  89. package/dist/tests/unit/cli/workshopCommand.spec.js +0 -328
  90. package/dist/tests/unit/config/vitestEnvSetup.spec.js +0 -13
  91. package/dist/tests/unit/develop/checkpointState.spec.js +0 -315
  92. package/dist/tests/unit/develop/codeGenerator.spec.js +0 -355
  93. package/dist/tests/unit/develop/githubMcpAdapter.spec.js +0 -231
  94. package/dist/tests/unit/develop/mcpContextEnricher.spec.js +0 -433
  95. package/dist/tests/unit/develop/outputValidator.spec.js +0 -119
  96. package/dist/tests/unit/develop/pocScaffolder.spec.js +0 -353
  97. package/dist/tests/unit/develop/ralphLoop.spec.js +0 -1248
  98. package/dist/tests/unit/develop/templateRegistry.spec.js +0 -85
  99. package/dist/tests/unit/develop/testRunner.spec.js +0 -249
  100. package/dist/tests/unit/infraBicep.spec.js +0 -92
  101. package/dist/tests/unit/infraDeploy.spec.js +0 -82
  102. package/dist/tests/unit/infraTeardown.spec.js +0 -63
  103. package/dist/tests/unit/logging/logger.spec.js +0 -43
  104. package/dist/tests/unit/loop/conversationLoop.spec.js +0 -592
  105. package/dist/tests/unit/loop/phaseSummarizer.spec.js +0 -141
  106. package/dist/tests/unit/loop/streamingMarkdown.spec.js +0 -147
  107. package/dist/tests/unit/mcp/mcpManager.spec.js +0 -279
  108. package/dist/tests/unit/mcp/mcpTransport.spec.js +0 -529
  109. package/dist/tests/unit/mcp/retryPolicy.spec.js +0 -218
  110. package/dist/tests/unit/mcp/timeoutValidation.spec.js +0 -46
  111. package/dist/tests/unit/mcp/webSearch.spec.js +0 -567
  112. package/dist/tests/unit/phases/contextSummarizer.spec.js +0 -140
  113. package/dist/tests/unit/phases/discoveryEnricher.repeatCalls.spec.js +0 -93
  114. package/dist/tests/unit/phases/discoveryEnricher.spec.js +0 -411
  115. package/dist/tests/unit/phases/phaseExtractors.spec.js +0 -352
  116. package/dist/tests/unit/phases/phaseHandlers.spec.js +0 -425
  117. package/dist/tests/unit/prompts/promptLoader.spec.js +0 -118
  118. package/dist/tests/unit/schemas/pocSchemas.spec.js +0 -412
  119. package/dist/tests/unit/schemas/session.spec.js +0 -257
  120. package/dist/tests/unit/sessions/exportPaths.spec.js +0 -31
  121. package/dist/tests/unit/sessions/exportWriter.spec.js +0 -655
  122. package/dist/tests/unit/sessions/sessionManager.spec.js +0 -151
  123. package/dist/tests/unit/sessions/sessionStore.spec.js +0 -116
  124. package/dist/tests/unit/shared/activitySpinner.spec.js +0 -175
  125. package/dist/tests/unit/shared/cardsLoader.spec.js +0 -76
  126. package/dist/tests/unit/shared/copilotClient.spec.js +0 -155
  127. package/dist/tests/unit/shared/errorClassifier.spec.js +0 -131
  128. package/dist/tests/unit/shared/events.spec.js +0 -55
  129. package/dist/tests/unit/shared/markdownRenderer.spec.js +0 -35
  130. package/dist/tests/unit/shared/markdownRendererChunks.spec.js +0 -70
  131. package/dist/tests/unit/shared/tableRenderer.spec.js +0 -34
  132. package/dist/vitest.config.js +0 -14
  133. package/dist/vitest.live.config.js +0 -18
  134. package/src/develop/pocScaffolder.ts +0 -646
  135. package/tests/integration/pocScaffold.spec.ts +0 -220
  136. package/tests/unit/develop/pocScaffolder.spec.ts +0 -451
@@ -1,359 +0,0 @@
1
- /**
2
- * T023: Integration test for Ralph loop with fakes.
3
- *
4
- * Uses a fake CopilotClient and fake test runner:
5
- * scaffold → fail tests → LLM generates fix → tests pass → loop terminates with success.
6
- * Verifies at least one iteration where failing test guides a fix (SC-002-003).
7
- */
8
- import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
9
- import { mkdtemp, rm, readFile } from 'node:fs/promises';
10
- import { join } from 'node:path';
11
- import { tmpdir } from 'node:os';
12
- import { createRequire } from 'node:module';
13
- import { RalphLoop } from '../../src/develop/ralphLoop.js';
14
- // Mock npm install
15
- vi.mock('node:child_process', async (importOriginal) => {
16
- const actual = await importOriginal();
17
- return {
18
- ...actual,
19
- spawn: vi.fn((cmd, args) => {
20
- if (cmd === 'npm' && args.includes('install')) {
21
- return {
22
- stdout: { on: vi.fn() },
23
- stderr: { on: vi.fn() },
24
- on: vi.fn((event, cb) => {
25
- if (event === 'close')
26
- cb(0);
27
- }),
28
- kill: vi.fn(),
29
- killed: false,
30
- };
31
- }
32
- return actual.spawn(cmd, args);
33
- }),
34
- };
35
- });
36
- // Mock validatePocOutput to always pass in integration tests
37
- vi.mock('../../src/develop/pocScaffolder.js', async (importOriginal) => {
38
- const actual = await importOriginal();
39
- return {
40
- ...actual,
41
- validatePocOutput: vi.fn().mockResolvedValue({ valid: true, missingFiles: [], errors: [] }),
42
- };
43
- });
44
- const require = createRequire(import.meta.url);
45
- const fixtureSession = require('../fixtures/completedSession.json');
46
- // ── Helpers ───────────────────────────────────────────────────────────────────
47
- function makeIo() {
48
- return {
49
- write: vi.fn(),
50
- writeActivity: vi.fn(),
51
- writeToolSummary: vi.fn(),
52
- readInput: vi.fn().mockResolvedValue(null),
53
- showDecisionGate: vi.fn(),
54
- isJsonMode: false,
55
- isTTY: false,
56
- };
57
- }
58
- function makeFakeScaffolder(outputDir) {
59
- return {
60
- scaffold: vi.fn().mockImplementation(async () => {
61
- const { writeFile, mkdir } = await import('node:fs/promises');
62
- await mkdir(join(outputDir, 'src'), { recursive: true });
63
- await mkdir(join(outputDir, 'tests'), { recursive: true });
64
- await writeFile(join(outputDir, 'package.json'), JSON.stringify({
65
- name: 'route-optimizer-poc',
66
- scripts: { test: 'vitest run' },
67
- dependencies: {},
68
- devDependencies: { vitest: '^3.0.0' },
69
- }), 'utf-8');
70
- await writeFile(join(outputDir, 'src', 'index.ts'), '// TODO: implement\nexport function optimize() { return []; }', 'utf-8');
71
- await writeFile(join(outputDir, 'tests', 'index.test.ts'), 'import { describe, it, expect } from "vitest";\nimport { optimize } from "../src/index.js";\ndescribe("optimizer", () => { it("should return stops", () => { expect(optimize().length).toBeGreaterThan(0); }); });', 'utf-8');
72
- return {
73
- createdFiles: ['package.json', 'src/index.ts', 'tests/index.test.ts'],
74
- skippedFiles: [],
75
- context: {
76
- projectName: 'route-optimizer-poc',
77
- ideaTitle: 'AI-Powered Route Optimizer',
78
- ideaDescription: 'Optimize routes',
79
- techStack: { language: 'TypeScript', runtime: 'Node.js 20', testRunner: 'npm test' },
80
- planSummary: 'Route optimization',
81
- sessionId: fixtureSession.sessionId,
82
- outputDir,
83
- },
84
- };
85
- }),
86
- getTemplateFiles: () => ['package.json', 'src/index.ts', 'tests/index.test.ts'],
87
- };
88
- }
89
- // ── SC-002-003: Iterative refinement test ────────────────────────────────────
90
- describe('RalphLoop integration — iterative refinement (SC-002-003)', () => {
91
- let tmpDir;
92
- beforeEach(async () => {
93
- tmpDir = await mkdtemp(join(tmpdir(), 'sofia-ralph-flow-'));
94
- });
95
- afterEach(async () => {
96
- await rm(tmpDir, { recursive: true, force: true });
97
- vi.clearAllMocks();
98
- });
99
- it('scaffold → fail tests → LLM fix → tests pass → success', async () => {
100
- const io = makeIo();
101
- const scaffolder = makeFakeScaffolder(tmpDir);
102
- // Test runner: fails first, passes second
103
- let testCallCount = 0;
104
- const testRunner = {
105
- run: vi.fn().mockImplementation(async () => {
106
- testCallCount++;
107
- if (testCallCount === 1) {
108
- // First run: fails
109
- return {
110
- passed: 0,
111
- failed: 1,
112
- skipped: 0,
113
- total: 1,
114
- durationMs: 400,
115
- failures: [
116
- {
117
- testName: 'optimizer > should return stops',
118
- message: 'Expected length to be greater than 0',
119
- file: 'tests/index.test.ts',
120
- line: 3,
121
- },
122
- ],
123
- rawOutput: 'FAIL tests/index.test.ts',
124
- };
125
- }
126
- // Subsequent runs: pass
127
- return {
128
- passed: 1,
129
- failed: 0,
130
- skipped: 0,
131
- total: 1,
132
- durationMs: 300,
133
- failures: [],
134
- rawOutput: 'PASS tests/index.test.ts',
135
- };
136
- }),
137
- };
138
- // LLM response: generates a fix for the failing test
139
- const client = {
140
- createSession: vi.fn().mockResolvedValue({
141
- send: vi.fn().mockReturnValue({
142
- async *[Symbol.asyncIterator]() {
143
- yield {
144
- type: 'TextDelta',
145
- text: [
146
- '```typescript file=src/index.ts',
147
- '// Fixed implementation',
148
- 'export function optimize(): string[] {',
149
- ' return ["stop-1", "stop-2", "stop-3"];',
150
- '}',
151
- '```',
152
- ].join('\n') + '\n',
153
- timestamp: '',
154
- };
155
- },
156
- }),
157
- getHistory: () => [],
158
- }),
159
- };
160
- const sessionUpdates = [];
161
- const ralph = new RalphLoop({
162
- client,
163
- io,
164
- session: fixtureSession,
165
- outputDir: tmpDir,
166
- maxIterations: 5,
167
- testRunner,
168
- scaffolder,
169
- onSessionUpdate: async (session) => {
170
- sessionUpdates.push({ ...session });
171
- },
172
- });
173
- const result = await ralph.run();
174
- // Loop should succeed
175
- expect(result.finalStatus).toBe('success');
176
- expect(result.terminationReason).toBe('tests-passing');
177
- // Verify at least 2 iterations happened (scaffold + test + fix + pass)
178
- expect(result.iterationsCompleted).toBeGreaterThanOrEqual(2);
179
- // Verify session was persisted
180
- expect(sessionUpdates.length).toBeGreaterThan(0);
181
- // Verify the fix was applied
182
- const fixedContent = await readFile(join(tmpDir, 'src', 'index.ts'), 'utf-8');
183
- expect(fixedContent).toContain('stop-1'); // LLM fix was applied
184
- // Verify iteration history
185
- const poc = result.session.poc;
186
- expect(poc.iterations[0].outcome).toBe('scaffold');
187
- const lastIter = poc.iterations[poc.iterations.length - 1];
188
- expect(lastIter.outcome).toBe('tests-passing');
189
- });
190
- it('verifies failing tests are passed to LLM in iteration prompt (SC-002-003)', async () => {
191
- const io = makeIo();
192
- const scaffolder = makeFakeScaffolder(tmpDir);
193
- let testCallCount = 0;
194
- const testRunner = {
195
- run: vi.fn().mockImplementation(async () => {
196
- testCallCount++;
197
- if (testCallCount === 1) {
198
- return {
199
- passed: 0,
200
- failed: 1,
201
- skipped: 0,
202
- total: 1,
203
- durationMs: 400,
204
- failures: [{ testName: 'unique-failure-name', message: 'specific-error-message' }],
205
- rawOutput: '',
206
- };
207
- }
208
- return {
209
- passed: 1,
210
- failed: 0,
211
- skipped: 0,
212
- total: 1,
213
- durationMs: 300,
214
- failures: [],
215
- rawOutput: '',
216
- };
217
- }),
218
- };
219
- // Capture the prompt sent to LLM
220
- let capturedPrompt = '';
221
- const client = {
222
- createSession: vi.fn().mockResolvedValue({
223
- send: vi.fn().mockImplementation((msg) => {
224
- capturedPrompt = msg.content;
225
- return {
226
- async *[Symbol.asyncIterator]() {
227
- yield {
228
- type: 'TextDelta',
229
- text: '```typescript file=src/index.ts\nexport function optimize() { return [1]; }\n```\n',
230
- timestamp: '',
231
- };
232
- },
233
- };
234
- }),
235
- getHistory: () => [],
236
- }),
237
- };
238
- const ralph = new RalphLoop({
239
- client,
240
- io,
241
- session: fixtureSession,
242
- outputDir: tmpDir,
243
- maxIterations: 3,
244
- testRunner,
245
- scaffolder,
246
- });
247
- await ralph.run();
248
- // Verify the LLM received the failure context
249
- expect(capturedPrompt).toContain('unique-failure-name');
250
- expect(capturedPrompt).toContain('specific-error-message');
251
- });
252
- });
253
- // ── T074: TODO tracking writes and updates .sofia-metadata.json ────────────
254
- describe('TODO tracking integration (T074)', () => {
255
- let tmpDir;
256
- beforeEach(async () => {
257
- tmpDir = await mkdtemp(join(tmpdir(), 'sofia-todo-'));
258
- });
259
- afterEach(async () => {
260
- await rm(tmpDir, { recursive: true, force: true });
261
- });
262
- it('writes TODO counts to .sofia-metadata.json during scaffold and updates after iteration', async () => {
263
- const { writeFile, mkdir } = await import('node:fs/promises');
264
- // Create a scaffolder that writes files with TODO markers
265
- const todoScaffolder = {
266
- scaffold: vi.fn().mockImplementation(async () => {
267
- await mkdir(join(tmpDir, 'src'), { recursive: true });
268
- await mkdir(join(tmpDir, 'tests'), { recursive: true });
269
- await writeFile(join(tmpDir, 'package.json'), JSON.stringify({
270
- name: 'todo-test-poc',
271
- scripts: { test: 'vitest run' },
272
- dependencies: {},
273
- devDependencies: {},
274
- }), 'utf-8');
275
- await writeFile(join(tmpDir, 'src', 'index.ts'), '// TODO: Implement the main logic\nexport function main() { return []; }\n// TODO: Add validation\n', 'utf-8');
276
- await writeFile(join(tmpDir, '.sofia-metadata.json'), JSON.stringify({
277
- sessionId: fixtureSession.sessionId,
278
- scaffoldedAt: new Date().toISOString(),
279
- }), 'utf-8');
280
- return {
281
- createdFiles: ['package.json', 'src/index.ts', '.sofia-metadata.json'],
282
- skippedFiles: [],
283
- context: {
284
- projectName: 'todo-test-poc',
285
- ideaTitle: 'Test',
286
- ideaDescription: 'Test',
287
- techStack: { language: 'TypeScript', runtime: 'Node.js 20', testRunner: 'npm test' },
288
- planSummary: 'Test',
289
- sessionId: fixtureSession.sessionId,
290
- outputDir: tmpDir,
291
- },
292
- };
293
- }),
294
- getTemplateFiles: () => ['package.json', 'src/index.ts'],
295
- };
296
- // Test runner that fails on first call (triggering TODO rescan), then passes
297
- let runCount = 0;
298
- const failThenPassRunner = {
299
- run: vi.fn().mockImplementation(async () => {
300
- runCount++;
301
- if (runCount <= 1) {
302
- return {
303
- passed: 0,
304
- failed: 1,
305
- skipped: 0,
306
- total: 1,
307
- durationMs: 100,
308
- failures: [{ testName: 'test', message: 'fail' }],
309
- rawOutput: 'FAIL',
310
- };
311
- }
312
- return {
313
- passed: 1,
314
- failed: 0,
315
- skipped: 0,
316
- total: 1,
317
- durationMs: 100,
318
- failures: [],
319
- rawOutput: 'pass',
320
- };
321
- }),
322
- };
323
- const io = makeIo();
324
- const session = { ...fixtureSession };
325
- const client = {
326
- createSession: vi.fn().mockResolvedValue({
327
- send: vi.fn().mockReturnValue({
328
- async *[Symbol.asyncIterator]() {
329
- yield {
330
- type: 'TextDelta',
331
- text: '```typescript file=src/index.ts\nexport function main() { return [1, 2]; }\n```',
332
- timestamp: '',
333
- };
334
- },
335
- }),
336
- getHistory: () => [],
337
- }),
338
- };
339
- const ralph = new RalphLoop({
340
- client,
341
- io,
342
- session,
343
- outputDir: tmpDir,
344
- maxIterations: 3,
345
- testRunner: failThenPassRunner,
346
- scaffolder: todoScaffolder,
347
- });
348
- await ralph.run();
349
- // Verify .sofia-metadata.json has todos section (written by rescan after failing iteration)
350
- const metaRaw = await readFile(join(tmpDir, '.sofia-metadata.json'), 'utf-8');
351
- const metadata = JSON.parse(metaRaw);
352
- expect(metadata.todos).toBeDefined();
353
- expect(typeof metadata.todos.totalInitial).toBe('number');
354
- expect(typeof metadata.todos.remaining).toBe('number');
355
- expect(Array.isArray(metadata.todos.markers)).toBe(true);
356
- // After LLM fix removes TODOs from src/index.ts, remaining count should be ≤ initial scaffold count (2)
357
- expect(metadata.todos.remaining).toBeLessThanOrEqual(2);
358
- });
359
- });