@specmarket/cli 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,404 @@
1
+ import { describe, it, expect, beforeEach, afterEach } from 'vitest';
2
+ import { mkdir, writeFile, readFile, rm } from 'fs/promises';
3
+ import { join } from 'path';
4
+ import { tmpdir } from 'os';
5
+ import { randomUUID } from 'crypto';
6
+ import {
7
+ parseTokensFromOutput,
8
+ ensureMetaInstructions,
9
+ extractTestFailures,
10
+ writeTestFixTasks,
11
+ injectSteeringMessages,
12
+ } from './ralph-loop.js';
13
+ import type { SteeringEntry } from './ralph-loop.js';
14
+ import { MODEL_COST_PER_TOKEN } from '@specmarket/shared';
15
+ import { META_INSTRUCTION_FILENAME } from './meta-instructions.js';
16
+
17
+ describe('ensureMetaInstructions', () => {
18
+ let baseDir: string;
19
+ let specDir: string;
20
+ let runDir: string;
21
+
22
+ beforeEach(async () => {
23
+ baseDir = join(tmpdir(), `ralph-loop-${randomUUID()}`);
24
+ specDir = join(baseDir, 'spec');
25
+ runDir = join(baseDir, 'run');
26
+ await mkdir(specDir, { recursive: true });
27
+ await mkdir(runDir, { recursive: true });
28
+ });
29
+
30
+ afterEach(async () => {
31
+ await rm(baseDir, { recursive: true, force: true });
32
+ });
33
+
34
+ it('writes META_INSTRUCTION_FILENAME to runDir', async () => {
35
+ await writeFile(join(specDir, 'PROMPT.md'), '# Prompt');
36
+ await writeFile(join(specDir, 'SUCCESS_CRITERIA.md'), '# Criteria');
37
+ await ensureMetaInstructions(specDir, runDir);
38
+ const content = await readFile(join(runDir, META_INSTRUCTION_FILENAME), 'utf-8');
39
+ expect(content).toContain('# SpecMarket Runner Instructions');
40
+ });
41
+
42
+ it('detects specmarket format from spec files', async () => {
43
+ await writeFile(join(specDir, 'PROMPT.md'), '# Prompt');
44
+ await writeFile(join(specDir, 'SUCCESS_CRITERIA.md'), '# Criteria');
45
+ await ensureMetaInstructions(specDir, runDir);
46
+ const content = await readFile(join(runDir, META_INSTRUCTION_FILENAME), 'utf-8');
47
+ expect(content).toContain('SpecMarket (Native)');
48
+ });
49
+
50
+ it('detects speckit format from spec.md', async () => {
51
+ await writeFile(join(specDir, 'spec.md'), '# Spec');
52
+ await writeFile(join(specDir, 'tasks.md'), '- [ ] Task 1');
53
+ await ensureMetaInstructions(specDir, runDir);
54
+ const content = await readFile(join(runDir, META_INSTRUCTION_FILENAME), 'utf-8');
55
+ expect(content).toContain('Spec Kit');
56
+ });
57
+
58
+ it('respects formatOverride and skips detection', async () => {
59
+ // No spec files present — would default to custom without override
60
+ await ensureMetaInstructions(specDir, runDir, 'bmad');
61
+ const content = await readFile(join(runDir, META_INSTRUCTION_FILENAME), 'utf-8');
62
+ expect(content).toContain('BMAD Method');
63
+ });
64
+
65
+ it('overwrites existing meta-instructions file', async () => {
66
+ await writeFile(join(runDir, META_INSTRUCTION_FILENAME), 'old content');
67
+ await ensureMetaInstructions(specDir, runDir, 'ralph');
68
+ const content = await readFile(join(runDir, META_INSTRUCTION_FILENAME), 'utf-8');
69
+ expect(content).not.toBe('old content');
70
+ expect(content).toContain('Ralph');
71
+ });
72
+ });
73
+
74
+ describe('parseTokensFromOutput', () => {
75
+ // ---- Strategy 1: JSON parsing ----
76
+
77
+ it('returns 0 for empty output', () => {
78
+ expect(parseTokensFromOutput('')).toBe(0);
79
+ expect(parseTokensFromOutput(' ')).toBe(0);
80
+ });
81
+
82
+ it('extracts total_tokens from top-level JSON field', () => {
83
+ const json = JSON.stringify({ total_tokens: 12345 });
84
+ expect(parseTokensFromOutput(json)).toBe(12345);
85
+ });
86
+
87
+ it('extracts totalTokens (camelCase) from top-level JSON field', () => {
88
+ const json = JSON.stringify({ totalTokens: 9999 });
89
+ expect(parseTokensFromOutput(json)).toBe(9999);
90
+ });
91
+
92
+ it('extracts usage.total_tokens from nested JSON', () => {
93
+ const json = JSON.stringify({ usage: { total_tokens: 5000 } });
94
+ expect(parseTokensFromOutput(json)).toBe(5000);
95
+ });
96
+
97
+ it('sums usage.input_tokens + usage.output_tokens', () => {
98
+ const json = JSON.stringify({ usage: { input_tokens: 3000, output_tokens: 1000 } });
99
+ expect(parseTokensFromOutput(json)).toBe(4000);
100
+ });
101
+
102
+ it('sums usage.prompt_tokens + usage.completion_tokens', () => {
103
+ const json = JSON.stringify({ usage: { prompt_tokens: 2500, completion_tokens: 500 } });
104
+ expect(parseTokensFromOutput(json)).toBe(3000);
105
+ });
106
+
107
+ it('extracts result.usage.total_tokens from nested result object', () => {
108
+ const json = JSON.stringify({ result: { usage: { total_tokens: 7777 } } });
109
+ expect(parseTokensFromOutput(json)).toBe(7777);
110
+ });
111
+
112
+ it('uses cost_usd with default (Sonnet) pricing when model is omitted', () => {
113
+ const costUsd = 0.006; // 0.006 / 0.000006 = 1000 tokens
114
+ const json = JSON.stringify({ cost_usd: costUsd });
115
+ expect(parseTokensFromOutput(json)).toBe(1000);
116
+ });
117
+
118
+ it('uses cost_usd with default (Sonnet) pricing for unrecognised model', () => {
119
+ const costUsd = 0.006;
120
+ const json = JSON.stringify({ cost_usd: costUsd });
121
+ expect(parseTokensFromOutput(json, 'claude-unknown-model')).toBe(1000);
122
+ });
123
+
124
+ it('uses Haiku pricing when model contains "haiku" (case-insensitive)', () => {
125
+ // costPerToken for haiku = 0.0000005; 0.0005 / 0.0000005 = 1000 tokens
126
+ const costUsd = 0.0000005 * 1000;
127
+ const json = JSON.stringify({ cost_usd: costUsd });
128
+ expect(parseTokensFromOutput(json, 'claude-haiku-4-5-20251001')).toBe(1000);
129
+ });
130
+
131
+ it('uses Opus pricing when model contains "opus" (case-insensitive)', () => {
132
+ // costPerToken for opus = 0.00003; 0.03 / 0.00003 = 1000 tokens
133
+ const costUsd = 0.00003 * 1000;
134
+ const json = JSON.stringify({ cost_usd: costUsd });
135
+ expect(parseTokensFromOutput(json, 'claude-opus-4-6')).toBe(1000);
136
+ });
137
+
138
+ it('uses Sonnet pricing for model containing "sonnet"', () => {
139
+ // costPerToken for sonnet/default = 0.000006; 0.006 / 0.000006 = 1000 tokens
140
+ const costUsd = MODEL_COST_PER_TOKEN.default * 1000;
141
+ const json = JSON.stringify({ cost_usd: costUsd });
142
+ expect(parseTokensFromOutput(json, 'claude-sonnet-4-6')).toBe(1000);
143
+ });
144
+
145
+ it('skips cost_usd when cost is 0', () => {
146
+ // cost_usd=0 means no billable work — fall through to heuristic
147
+ const json = JSON.stringify({ cost_usd: 0 });
148
+ // Should NOT return 0 via cost path; falls to character heuristic
149
+ const result = parseTokensFromOutput(json);
150
+ // The JSON string is ~17 chars → heuristic gives Math.ceil(17/4) = 5
151
+ expect(result).toBeGreaterThan(0);
152
+ });
153
+
154
+ it('handles newline-delimited JSON and picks first valid structure', () => {
155
+ const lines = [
156
+ 'not json',
157
+ JSON.stringify({ total_tokens: 42 }),
158
+ JSON.stringify({ total_tokens: 99 }),
159
+ ].join('\n');
160
+ expect(parseTokensFromOutput(lines)).toBe(42);
161
+ });
162
+
163
+ // ---- Strategy 2: Regex text patterns ----
164
+
165
+ it('extracts from "total tokens: N" text pattern', () => {
166
+ const output = 'Run complete. Total tokens: 3,500 used.';
167
+ expect(parseTokensFromOutput(output)).toBe(3500);
168
+ });
169
+
170
+ it('extracts from "tokens used: N" text pattern', () => {
171
+ const output = 'Tokens used: 1200';
172
+ expect(parseTokensFromOutput(output)).toBe(1200);
173
+ });
174
+
175
+ it('extracts from quoted "total_tokens": N pattern', () => {
176
+ const output = 'Stats: "total_tokens": 8000 and other info';
177
+ expect(parseTokensFromOutput(output)).toBe(8000);
178
+ });
179
+
180
+ it('extracts from input_tokens + output_tokens text pattern', () => {
181
+ const output = 'input_tokens: 4000, output_tokens: 1000';
182
+ expect(parseTokensFromOutput(output)).toBe(5000);
183
+ });
184
+
185
+ // ---- Strategy 3: Character heuristic fallback ----
186
+
187
+ it('falls back to character heuristic for unrecognised output', () => {
188
+ const output = 'x'.repeat(400); // 400 chars → ceil(400/4) = 100 tokens
189
+ expect(parseTokensFromOutput(output)).toBe(100);
190
+ });
191
+ });
192
+
193
+ describe('extractTestFailures', () => {
194
+ it('returns empty array for empty output', () => {
195
+ expect(extractTestFailures('')).toEqual([]);
196
+ });
197
+
198
+ it('parses Vitest/Jest file-level FAIL lines', () => {
199
+ const output = [
200
+ 'FAIL src/commands/login.test.ts',
201
+ 'FAIL src/lib/auth.test.ts',
202
+ ' ✓ passing test',
203
+ ].join('\n');
204
+ const failures = extractTestFailures(output);
205
+ expect(failures).toContain('src/commands/login.test.ts');
206
+ expect(failures).toContain('src/lib/auth.test.ts');
207
+ expect(failures).not.toContain('passing test');
208
+ });
209
+
210
+ it('parses Vitest/Jest individual test × failures', () => {
211
+ const output = [
212
+ ' × login should reject bad token',
213
+ ' × auth should expire session',
214
+ ' ✓ passing test',
215
+ ].join('\n');
216
+ const failures = extractTestFailures(output);
217
+ expect(failures).toContain('login should reject bad token');
218
+ expect(failures).toContain('auth should expire session');
219
+ expect(failures).not.toContain('passing test');
220
+ });
221
+
222
+ it('parses pytest FAILED lines', () => {
223
+ const output = [
224
+ 'FAILED tests/test_auth.py::test_login_bad_token',
225
+ 'FAILED tests/test_api.py::test_rate_limit',
226
+ 'passed 5 tests',
227
+ ].join('\n');
228
+ const failures = extractTestFailures(output);
229
+ expect(failures).toContain('tests/test_auth.py::test_login_bad_token');
230
+ expect(failures).toContain('tests/test_api.py::test_rate_limit');
231
+ });
232
+
233
+ it('falls back to generic summary when specific names cannot be parsed', () => {
234
+ const output = 'Test run complete: 3 failed, 10 passed';
235
+ const failures = extractTestFailures(output);
236
+ expect(failures).toHaveLength(1);
237
+ expect(failures[0]).toContain('3 test(s) failed');
238
+ expect(failures[0]).toContain('TEST_FAILURES.md');
239
+ });
240
+
241
+ it('deduplicates repeated failure names', () => {
242
+ const output = [
243
+ 'FAIL src/foo.test.ts',
244
+ 'FAIL src/foo.test.ts',
245
+ ].join('\n');
246
+ const failures = extractTestFailures(output);
247
+ expect(failures.filter((f) => f === 'src/foo.test.ts')).toHaveLength(1);
248
+ });
249
+
250
+ it('caps results at 10 entries', () => {
251
+ const lines = Array.from({ length: 15 }, (_, i) => `FAIL src/test${i}.test.ts`);
252
+ const failures = extractTestFailures(lines.join('\n'));
253
+ expect(failures.length).toBeLessThanOrEqual(10);
254
+ });
255
+ });
256
+
257
+ describe('writeTestFixTasks', () => {
258
+ let dir: string;
259
+
260
+ beforeEach(async () => {
261
+ dir = join(tmpdir(), `write-test-fix-${randomUUID()}`);
262
+ await mkdir(dir, { recursive: true });
263
+ });
264
+
265
+ afterEach(async () => {
266
+ await rm(dir, { recursive: true, force: true });
267
+ });
268
+
269
+ it('writes TEST_FAILURES.md with the test output', async () => {
270
+ const output = 'FAIL src/foo.test.ts\n3 failed, 2 passed';
271
+ await writeTestFixTasks(dir, output);
272
+ const content = await readFile(join(dir, 'TEST_FAILURES.md'), 'utf-8');
273
+ expect(content).toContain('# Test Failures');
274
+ expect(content).toContain('FAIL src/foo.test.ts');
275
+ });
276
+
277
+ it('appends fix tasks to an existing TASKS.md', async () => {
278
+ await writeFile(join(dir, 'TASKS.md'), '# Tasks\n\n- [x] Completed task\n', 'utf-8');
279
+ const output = 'FAIL src/bar.test.ts\n1 failed';
280
+ await writeTestFixTasks(dir, output);
281
+ const content = await readFile(join(dir, 'TASKS.md'), 'utf-8');
282
+ expect(content).toContain('- [x] Completed task');
283
+ expect(content).toContain('- [ ] Fix: src/bar.test.ts');
284
+ expect(content).toContain('## Test Failures (Auto-Generated)');
285
+ });
286
+
287
+ it('creates TASKS.md if it does not exist', async () => {
288
+ const output = 'FAIL src/new.test.ts\n1 failed';
289
+ await writeTestFixTasks(dir, output);
290
+ const content = await readFile(join(dir, 'TASKS.md'), 'utf-8');
291
+ expect(content).toContain('- [ ] Fix: src/new.test.ts');
292
+ });
293
+
294
+ it('replaces previous auto-generated section on second call', async () => {
295
+ const first = 'FAIL src/first.test.ts\n1 failed';
296
+ const second = 'FAIL src/second.test.ts\n1 failed';
297
+ await writeTestFixTasks(dir, first);
298
+ await writeTestFixTasks(dir, second);
299
+ const content = await readFile(join(dir, 'TASKS.md'), 'utf-8');
300
+ // Old section replaced — only second failure should appear
301
+ expect(content).not.toContain('src/first.test.ts');
302
+ expect(content).toContain('src/second.test.ts');
303
+ // Section header appears exactly once
304
+ const sectionCount = (content.match(/## Test Failures \(Auto-Generated\)/g) ?? []).length;
305
+ expect(sectionCount).toBe(1);
306
+ });
307
+
308
+ it('does not write fix tasks when no failures can be extracted', async () => {
309
+ await writeFile(join(dir, 'TASKS.md'), '# Tasks\n', 'utf-8');
310
+ // Output has no recognisable failure patterns
311
+ await writeTestFixTasks(dir, 'All tests passed. 5 passed.');
312
+ const content = await readFile(join(dir, 'TASKS.md'), 'utf-8');
313
+ expect(content).not.toContain('## Test Failures (Auto-Generated)');
314
+ // TEST_FAILURES.md is still written (for agent reference)
315
+ const failuresFile = await readFile(join(dir, 'TEST_FAILURES.md'), 'utf-8');
316
+ expect(failuresFile).toContain('# Test Failures');
317
+ });
318
+
319
+ it('truncates very long test output in TEST_FAILURES.md', async () => {
320
+ const longOutput = 'x'.repeat(10000);
321
+ await writeTestFixTasks(dir, longOutput);
322
+ const content = await readFile(join(dir, 'TEST_FAILURES.md'), 'utf-8');
323
+ // Should be truncated to 8KB of the output portion
324
+ expect(content.length).toBeLessThan(10000 + 200); // 200 bytes for the header
325
+ });
326
+ });
327
+
328
+ describe('injectSteeringMessages', () => {
329
+ let dir: string;
330
+
331
+ beforeEach(async () => {
332
+ dir = join(tmpdir(), `inject-steering-${randomUUID()}`);
333
+ await mkdir(dir, { recursive: true });
334
+ });
335
+
336
+ afterEach(async () => {
337
+ await rm(dir, { recursive: true, force: true });
338
+ });
339
+
340
+ it('appends a steering section to an existing meta-instructions file', async () => {
341
+ await writeFile(join(dir, META_INSTRUCTION_FILENAME), '# SpecMarket Runner Instructions\n\nOriginal content.');
342
+ const log: SteeringEntry[] = [];
343
+ await injectSteeringMessages(dir, ['Focus on the auth module'], log);
344
+
345
+ const content = await readFile(join(dir, META_INSTRUCTION_FILENAME), 'utf-8');
346
+ expect(content).toContain('# SpecMarket Runner Instructions');
347
+ expect(content).toContain('Original content.');
348
+ expect(content).toContain('## Steering Input');
349
+ expect(content).toContain('> Focus on the auth module');
350
+ });
351
+
352
+ it('creates the meta-instructions file when it does not exist', async () => {
353
+ const log: SteeringEntry[] = [];
354
+ await injectSteeringMessages(dir, ['Fix the failing tests'], log);
355
+
356
+ const content = await readFile(join(dir, META_INSTRUCTION_FILENAME), 'utf-8');
357
+ expect(content).toContain('## Steering Input');
358
+ expect(content).toContain('> Fix the failing tests');
359
+ });
360
+
361
+ it('pushes SteeringEntry objects into the log', async () => {
362
+ const log: SteeringEntry[] = [];
363
+ await injectSteeringMessages(dir, ['First message', 'Second message'], log);
364
+
365
+ expect(log).toHaveLength(2);
366
+ expect(log[0].content).toBe('First message');
367
+ expect(log[1].content).toBe('Second message');
368
+ expect(log[0].timestamp).toMatch(/^\d{4}-\d{2}-\d{2}T/); // ISO-8601 prefix
369
+ expect(log[0].timestamp).toBe(log[1].timestamp); // Same batch → same timestamp
370
+ });
371
+
372
+ it('accumulates entries from multiple calls', async () => {
373
+ const log: SteeringEntry[] = [];
374
+ await injectSteeringMessages(dir, ['First steering'], log);
375
+ await injectSteeringMessages(dir, ['Second steering'], log);
376
+
377
+ expect(log).toHaveLength(2);
378
+ const content = await readFile(join(dir, META_INSTRUCTION_FILENAME), 'utf-8');
379
+ // Both sections should be present
380
+ const sectionCount = (content.match(/## Steering Input/g) ?? []).length;
381
+ expect(sectionCount).toBe(2);
382
+ expect(content).toContain('> First steering');
383
+ expect(content).toContain('> Second steering');
384
+ });
385
+
386
+ it('does nothing when messages array is empty', async () => {
387
+ await writeFile(join(dir, META_INSTRUCTION_FILENAME), '# Original');
388
+ const log: SteeringEntry[] = [];
389
+ await injectSteeringMessages(dir, [], log);
390
+
391
+ expect(log).toHaveLength(0);
392
+ const content = await readFile(join(dir, META_INSTRUCTION_FILENAME), 'utf-8');
393
+ expect(content).toBe('# Original'); // File unchanged
394
+ });
395
+
396
+ it('includes a timestamp header in the injected section', async () => {
397
+ const log: SteeringEntry[] = [];
398
+ await injectSteeringMessages(dir, ['Check the database layer'], log);
399
+
400
+ const content = await readFile(join(dir, META_INSTRUCTION_FILENAME), 'utf-8');
401
+ // Section header includes the ISO timestamp
402
+ expect(content).toMatch(/## Steering Input \(injected at \d{4}-\d{2}-\d{2}T/);
403
+ });
404
+ });