@specmarket/cli 0.0.4 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/{chunk-MS2DYACY.js → chunk-OTXWWFAO.js} +42 -3
- package/dist/chunk-OTXWWFAO.js.map +1 -0
- package/dist/{config-R5KWZSJP.js → config-5JMI3YAR.js} +2 -2
- package/dist/index.js +1945 -252
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/commands/comment.test.ts +211 -0
- package/src/commands/comment.ts +176 -0
- package/src/commands/fork.test.ts +163 -0
- package/src/commands/info.test.ts +192 -0
- package/src/commands/info.ts +66 -2
- package/src/commands/init.test.ts +245 -0
- package/src/commands/init.ts +359 -25
- package/src/commands/issues.test.ts +382 -0
- package/src/commands/issues.ts +436 -0
- package/src/commands/login.test.ts +99 -0
- package/src/commands/login.ts +2 -6
- package/src/commands/logout.test.ts +54 -0
- package/src/commands/publish.test.ts +159 -0
- package/src/commands/publish.ts +1 -0
- package/src/commands/report.test.ts +181 -0
- package/src/commands/run.test.ts +419 -0
- package/src/commands/run.ts +71 -3
- package/src/commands/search.test.ts +147 -0
- package/src/commands/validate.test.ts +206 -2
- package/src/commands/validate.ts +315 -192
- package/src/commands/whoami.test.ts +106 -0
- package/src/index.ts +6 -0
- package/src/lib/convex-client.ts +6 -2
- package/src/lib/format-detection.test.ts +223 -0
- package/src/lib/format-detection.ts +172 -0
- package/src/lib/meta-instructions.test.ts +340 -0
- package/src/lib/meta-instructions.ts +562 -0
- package/src/lib/ralph-loop.test.ts +404 -0
- package/src/lib/ralph-loop.ts +501 -95
- package/src/lib/telemetry.ts +7 -1
- package/dist/chunk-MS2DYACY.js.map +0 -1
- /package/dist/{config-R5KWZSJP.js.map → config-5JMI3YAR.js.map} +0 -0
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
|
2
|
+
import { mkdir, writeFile, rm } from 'fs/promises';
|
|
3
|
+
import { join } from 'path';
|
|
4
|
+
import { tmpdir } from 'os';
|
|
5
|
+
import { randomUUID } from 'crypto';
|
|
6
|
+
|
|
7
|
+
// --- Hoisted mocks ---
|
|
8
|
+
|
|
9
|
+
const { mockSpinner, mockRunSpec, mockSubmitTelemetry, mockPromptTelemetry, mockCheckClaude } =
|
|
10
|
+
vi.hoisted(() => {
|
|
11
|
+
const mockSpinner = {
|
|
12
|
+
start: vi.fn().mockReturnThis(),
|
|
13
|
+
stop: vi.fn().mockReturnThis(),
|
|
14
|
+
succeed: vi.fn().mockReturnThis(),
|
|
15
|
+
fail: vi.fn().mockReturnThis(),
|
|
16
|
+
text: '',
|
|
17
|
+
};
|
|
18
|
+
const mockRunSpec = vi.fn();
|
|
19
|
+
const mockSubmitTelemetry = vi.fn();
|
|
20
|
+
const mockPromptTelemetry = vi.fn();
|
|
21
|
+
const mockCheckClaude = vi.fn().mockResolvedValue(undefined);
|
|
22
|
+
return { mockSpinner, mockRunSpec, mockSubmitTelemetry, mockPromptTelemetry, mockCheckClaude };
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
vi.mock('ora', () => ({
|
|
26
|
+
default: vi.fn().mockReturnValue(mockSpinner),
|
|
27
|
+
}));
|
|
28
|
+
|
|
29
|
+
vi.mock('../lib/ralph-loop.js', () => ({
|
|
30
|
+
runSpec: mockRunSpec,
|
|
31
|
+
checkClaudeCliInstalled: mockCheckClaude,
|
|
32
|
+
}));
|
|
33
|
+
|
|
34
|
+
vi.mock('../lib/telemetry.js', () => ({
|
|
35
|
+
submitTelemetry: mockSubmitTelemetry,
|
|
36
|
+
promptTelemetryOptIn: mockPromptTelemetry,
|
|
37
|
+
}));
|
|
38
|
+
|
|
39
|
+
vi.mock('../lib/auth.js', () => ({
|
|
40
|
+
loadCredentials: vi.fn().mockResolvedValue(null),
|
|
41
|
+
isAuthenticated: vi.fn().mockResolvedValue(false),
|
|
42
|
+
}));
|
|
43
|
+
|
|
44
|
+
vi.mock('../lib/convex-client.js', () => ({
|
|
45
|
+
getConvexClient: vi.fn().mockResolvedValue({ query: vi.fn(), action: vi.fn() }),
|
|
46
|
+
}));
|
|
47
|
+
|
|
48
|
+
// Mock the module builtin so createRequire can find package.json during tests
|
|
49
|
+
vi.mock('module', () => ({
|
|
50
|
+
createRequire: vi.fn().mockReturnValue(
|
|
51
|
+
vi.fn().mockReturnValue({ version: '0.0.4' })
|
|
52
|
+
),
|
|
53
|
+
}));
|
|
54
|
+
|
|
55
|
+
const mockExit = vi.spyOn(process, 'exit').mockImplementation((() => {
|
|
56
|
+
throw new Error('process.exit called');
|
|
57
|
+
}) as any);
|
|
58
|
+
|
|
59
|
+
const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {});
|
|
60
|
+
vi.spyOn(console, 'error').mockImplementation(() => {});
|
|
61
|
+
|
|
62
|
+
import { handleRun } from './run.js';
|
|
63
|
+
import { SIDECAR_FILENAME } from '@specmarket/shared';
|
|
64
|
+
|
|
65
|
+
// --- Helpers ---
|
|
66
|
+
|
|
67
|
+
const VALID_SPECMARKET_YAML = `spec_format: specmarket
|
|
68
|
+
display_name: "Test Spec"
|
|
69
|
+
description: "A valid test spec with enough description length to pass."
|
|
70
|
+
output_type: web-app
|
|
71
|
+
primary_stack: nextjs-typescript
|
|
72
|
+
tags: []
|
|
73
|
+
estimated_tokens: 50000
|
|
74
|
+
estimated_cost_usd: 2.50
|
|
75
|
+
estimated_time_minutes: 30
|
|
76
|
+
`;
|
|
77
|
+
|
|
78
|
+
const VALID_SPEC_YAML = `name: test-spec
|
|
79
|
+
display_name: "Test Spec"
|
|
80
|
+
description: "A valid test spec with enough description length to pass."
|
|
81
|
+
output_type: web-app
|
|
82
|
+
primary_stack: nextjs-typescript
|
|
83
|
+
version: "1.0.0"
|
|
84
|
+
runner: claude
|
|
85
|
+
min_model: "claude-opus-4-5"
|
|
86
|
+
estimated_tokens: 50000
|
|
87
|
+
estimated_cost_usd: 2.50
|
|
88
|
+
estimated_time_minutes: 30
|
|
89
|
+
tags: []
|
|
90
|
+
`;
|
|
91
|
+
|
|
92
|
+
const VALID_SUCCESS_CRITERIA = `# Success Criteria
|
|
93
|
+
- [ ] Application builds
|
|
94
|
+
- [ ] Tests pass
|
|
95
|
+
`;
|
|
96
|
+
|
|
97
|
+
describe('handleRun', () => {
|
|
98
|
+
let specDir: string;
|
|
99
|
+
|
|
100
|
+
beforeEach(async () => {
|
|
101
|
+
vi.clearAllMocks();
|
|
102
|
+
mockExit.mockImplementation((() => {
|
|
103
|
+
throw new Error('process.exit called');
|
|
104
|
+
}) as any);
|
|
105
|
+
specDir = join(tmpdir(), `run-test-${randomUUID()}`);
|
|
106
|
+
await mkdir(specDir, { recursive: true });
|
|
107
|
+
await mkdir(join(specDir, 'stdlib'), { recursive: true });
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
afterEach(async () => {
|
|
111
|
+
await rm(specDir, { recursive: true, force: true }).catch(() => {});
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
it('exits with validation error when spec is invalid', async () => {
|
|
115
|
+
// Create a spec directory missing required files
|
|
116
|
+
await writeFile(join(specDir, 'spec.yaml'), 'invalid yaml content');
|
|
117
|
+
|
|
118
|
+
await expect(handleRun(specDir, {})).rejects.toThrow(
|
|
119
|
+
'process.exit called'
|
|
120
|
+
);
|
|
121
|
+
|
|
122
|
+
expect(consoleSpy).toHaveBeenCalledWith(
|
|
123
|
+
expect.stringContaining('validation failed')
|
|
124
|
+
);
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
it('runs a valid spec and prints summary', async () => {
|
|
128
|
+
await Promise.all([
|
|
129
|
+
writeFile(join(specDir, SIDECAR_FILENAME), VALID_SPECMARKET_YAML),
|
|
130
|
+
writeFile(join(specDir, 'spec.yaml'), VALID_SPEC_YAML),
|
|
131
|
+
writeFile(join(specDir, 'PROMPT.md'), '# Prompt\nBuild it.'),
|
|
132
|
+
writeFile(join(specDir, 'SPEC.md'), '# Spec\nDetails.'),
|
|
133
|
+
writeFile(join(specDir, 'SUCCESS_CRITERIA.md'), VALID_SUCCESS_CRITERIA),
|
|
134
|
+
writeFile(join(specDir, 'stdlib', 'STACK.md'), '# Stack\nNext.js'),
|
|
135
|
+
]);
|
|
136
|
+
|
|
137
|
+
mockRunSpec.mockResolvedValue({
|
|
138
|
+
report: {
|
|
139
|
+
runId: 'run-123',
|
|
140
|
+
status: 'success',
|
|
141
|
+
loopCount: 3,
|
|
142
|
+
totalTokens: 15000,
|
|
143
|
+
totalCostUsd: 1.5,
|
|
144
|
+
totalTimeMinutes: 5.2,
|
|
145
|
+
successCriteriaResults: [
|
|
146
|
+
{ criterion: 'Application builds', passed: true },
|
|
147
|
+
],
|
|
148
|
+
},
|
|
149
|
+
outputDir: '/tmp/output',
|
|
150
|
+
});
|
|
151
|
+
mockSubmitTelemetry.mockResolvedValue(false);
|
|
152
|
+
|
|
153
|
+
await handleRun(specDir, {});
|
|
154
|
+
|
|
155
|
+
expect(mockRunSpec).toHaveBeenCalledWith(
|
|
156
|
+
specDir,
|
|
157
|
+
expect.objectContaining({ name: 'test-spec', version: '1.0.0' }),
|
|
158
|
+
expect.any(Object),
|
|
159
|
+
expect.any(Function)
|
|
160
|
+
);
|
|
161
|
+
expect(consoleSpy).toHaveBeenCalledWith(
|
|
162
|
+
expect.stringContaining('Run Complete')
|
|
163
|
+
);
|
|
164
|
+
expect(consoleSpy).toHaveBeenCalledWith(
|
|
165
|
+
expect.stringContaining('SUCCESS')
|
|
166
|
+
);
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
it('prints security warning before running', async () => {
|
|
170
|
+
await Promise.all([
|
|
171
|
+
writeFile(join(specDir, SIDECAR_FILENAME), VALID_SPECMARKET_YAML),
|
|
172
|
+
writeFile(join(specDir, 'spec.yaml'), VALID_SPEC_YAML),
|
|
173
|
+
writeFile(join(specDir, 'PROMPT.md'), '# Prompt\nBuild it.'),
|
|
174
|
+
writeFile(join(specDir, 'SPEC.md'), '# Spec\nDetails.'),
|
|
175
|
+
writeFile(join(specDir, 'SUCCESS_CRITERIA.md'), VALID_SUCCESS_CRITERIA),
|
|
176
|
+
writeFile(join(specDir, 'stdlib', 'STACK.md'), '# Stack\nNext.js'),
|
|
177
|
+
]);
|
|
178
|
+
|
|
179
|
+
mockRunSpec.mockResolvedValue({
|
|
180
|
+
report: {
|
|
181
|
+
runId: 'run-123',
|
|
182
|
+
status: 'success',
|
|
183
|
+
loopCount: 1,
|
|
184
|
+
totalTokens: 1000,
|
|
185
|
+
totalCostUsd: 0.1,
|
|
186
|
+
totalTimeMinutes: 1,
|
|
187
|
+
successCriteriaResults: [],
|
|
188
|
+
},
|
|
189
|
+
outputDir: '/tmp/output',
|
|
190
|
+
});
|
|
191
|
+
mockSubmitTelemetry.mockResolvedValue(false);
|
|
192
|
+
|
|
193
|
+
await handleRun(specDir, {});
|
|
194
|
+
|
|
195
|
+
expect(consoleSpy).toHaveBeenCalledWith(
|
|
196
|
+
expect.stringContaining('SECURITY WARNING')
|
|
197
|
+
);
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
it('prints harness in run summary', async () => {
|
|
201
|
+
await Promise.all([
|
|
202
|
+
writeFile(join(specDir, SIDECAR_FILENAME), VALID_SPECMARKET_YAML),
|
|
203
|
+
writeFile(join(specDir, 'spec.yaml'), VALID_SPEC_YAML),
|
|
204
|
+
writeFile(join(specDir, 'PROMPT.md'), '# Prompt\nBuild it.'),
|
|
205
|
+
writeFile(join(specDir, 'SPEC.md'), '# Spec\nDetails.'),
|
|
206
|
+
writeFile(join(specDir, 'SUCCESS_CRITERIA.md'), VALID_SUCCESS_CRITERIA),
|
|
207
|
+
writeFile(join(specDir, 'stdlib', 'STACK.md'), '# Stack\nNext.js'),
|
|
208
|
+
]);
|
|
209
|
+
|
|
210
|
+
mockRunSpec.mockResolvedValue({
|
|
211
|
+
report: {
|
|
212
|
+
runId: 'run-123',
|
|
213
|
+
status: 'success',
|
|
214
|
+
loopCount: 1,
|
|
215
|
+
totalTokens: 1000,
|
|
216
|
+
totalCostUsd: 0.1,
|
|
217
|
+
totalTimeMinutes: 1,
|
|
218
|
+
successCriteriaResults: [],
|
|
219
|
+
},
|
|
220
|
+
outputDir: '/tmp/output',
|
|
221
|
+
});
|
|
222
|
+
mockSubmitTelemetry.mockResolvedValue(false);
|
|
223
|
+
|
|
224
|
+
await handleRun(specDir, { harness: 'codex' });
|
|
225
|
+
|
|
226
|
+
expect(consoleSpy).toHaveBeenCalledWith(
|
|
227
|
+
expect.stringContaining('codex')
|
|
228
|
+
);
|
|
229
|
+
expect(mockRunSpec).toHaveBeenCalledWith(
|
|
230
|
+
specDir,
|
|
231
|
+
expect.any(Object),
|
|
232
|
+
expect.objectContaining({ harness: 'codex' }),
|
|
233
|
+
expect.any(Function)
|
|
234
|
+
);
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
it('exits with validation error for unknown harness', async () => {
|
|
238
|
+
await Promise.all([
|
|
239
|
+
writeFile(join(specDir, SIDECAR_FILENAME), VALID_SPECMARKET_YAML),
|
|
240
|
+
writeFile(join(specDir, 'spec.yaml'), VALID_SPEC_YAML),
|
|
241
|
+
writeFile(join(specDir, 'PROMPT.md'), '# Prompt\nBuild it.'),
|
|
242
|
+
writeFile(join(specDir, 'SPEC.md'), '# Spec\nDetails.'),
|
|
243
|
+
writeFile(join(specDir, 'SUCCESS_CRITERIA.md'), VALID_SUCCESS_CRITERIA),
|
|
244
|
+
writeFile(join(specDir, 'stdlib', 'STACK.md'), '# Stack\nNext.js'),
|
|
245
|
+
]);
|
|
246
|
+
|
|
247
|
+
await expect(handleRun(specDir, { harness: 'unknown-harness' })).rejects.toThrow(
|
|
248
|
+
'process.exit called'
|
|
249
|
+
);
|
|
250
|
+
expect(consoleSpy).toHaveBeenCalledWith(
|
|
251
|
+
expect.stringContaining('Unknown harness')
|
|
252
|
+
);
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
it('passes --workdir to runSpec as workdir option', async () => {
|
|
256
|
+
await Promise.all([
|
|
257
|
+
writeFile(join(specDir, SIDECAR_FILENAME), VALID_SPECMARKET_YAML),
|
|
258
|
+
writeFile(join(specDir, 'spec.yaml'), VALID_SPEC_YAML),
|
|
259
|
+
writeFile(join(specDir, 'PROMPT.md'), '# Prompt\nBuild it.'),
|
|
260
|
+
writeFile(join(specDir, 'SPEC.md'), '# Spec\nDetails.'),
|
|
261
|
+
writeFile(join(specDir, 'SUCCESS_CRITERIA.md'), VALID_SUCCESS_CRITERIA),
|
|
262
|
+
writeFile(join(specDir, 'stdlib', 'STACK.md'), '# Stack\nNext.js'),
|
|
263
|
+
]);
|
|
264
|
+
|
|
265
|
+
mockRunSpec.mockResolvedValue({
|
|
266
|
+
report: {
|
|
267
|
+
runId: 'run-123',
|
|
268
|
+
status: 'success',
|
|
269
|
+
loopCount: 1,
|
|
270
|
+
totalTokens: 1000,
|
|
271
|
+
totalCostUsd: 0.1,
|
|
272
|
+
totalTimeMinutes: 1,
|
|
273
|
+
successCriteriaResults: [],
|
|
274
|
+
},
|
|
275
|
+
outputDir: '/tmp/myworkdir',
|
|
276
|
+
});
|
|
277
|
+
mockSubmitTelemetry.mockResolvedValue(false);
|
|
278
|
+
|
|
279
|
+
await handleRun(specDir, { workdir: '/tmp/myworkdir' });
|
|
280
|
+
|
|
281
|
+
expect(mockRunSpec).toHaveBeenCalledWith(
|
|
282
|
+
specDir,
|
|
283
|
+
expect.any(Object),
|
|
284
|
+
expect.objectContaining({ workdir: '/tmp/myworkdir' }),
|
|
285
|
+
expect.any(Function)
|
|
286
|
+
);
|
|
287
|
+
expect(consoleSpy).toHaveBeenCalledWith(
|
|
288
|
+
expect.stringContaining('/tmp/myworkdir')
|
|
289
|
+
);
|
|
290
|
+
});
|
|
291
|
+
|
|
292
|
+
it('passes steeringQueue to runSpec', async () => {
|
|
293
|
+
await Promise.all([
|
|
294
|
+
writeFile(join(specDir, SIDECAR_FILENAME), VALID_SPECMARKET_YAML),
|
|
295
|
+
writeFile(join(specDir, 'spec.yaml'), VALID_SPEC_YAML),
|
|
296
|
+
writeFile(join(specDir, 'PROMPT.md'), '# Prompt\nBuild it.'),
|
|
297
|
+
writeFile(join(specDir, 'SPEC.md'), '# Spec\nDetails.'),
|
|
298
|
+
writeFile(join(specDir, 'SUCCESS_CRITERIA.md'), VALID_SUCCESS_CRITERIA),
|
|
299
|
+
writeFile(join(specDir, 'stdlib', 'STACK.md'), '# Stack\nNext.js'),
|
|
300
|
+
]);
|
|
301
|
+
|
|
302
|
+
mockRunSpec.mockResolvedValue({
|
|
303
|
+
report: {
|
|
304
|
+
runId: 'run-123',
|
|
305
|
+
status: 'success',
|
|
306
|
+
loopCount: 2,
|
|
307
|
+
totalTokens: 5000,
|
|
308
|
+
totalCostUsd: 0.5,
|
|
309
|
+
totalTimeMinutes: 3,
|
|
310
|
+
steeringActionCount: 0,
|
|
311
|
+
successCriteriaResults: [],
|
|
312
|
+
},
|
|
313
|
+
outputDir: '/tmp/output',
|
|
314
|
+
});
|
|
315
|
+
mockSubmitTelemetry.mockResolvedValue(false);
|
|
316
|
+
|
|
317
|
+
await handleRun(specDir, {});
|
|
318
|
+
|
|
319
|
+
// runSpec must receive a steeringQueue array in opts
|
|
320
|
+
expect(mockRunSpec).toHaveBeenCalledWith(
|
|
321
|
+
specDir,
|
|
322
|
+
expect.any(Object),
|
|
323
|
+
expect.objectContaining({ steeringQueue: expect.any(Array) }),
|
|
324
|
+
expect.any(Function)
|
|
325
|
+
);
|
|
326
|
+
});
|
|
327
|
+
|
|
328
|
+
it('shows steering action count in summary when > 0', async () => {
|
|
329
|
+
await Promise.all([
|
|
330
|
+
writeFile(join(specDir, SIDECAR_FILENAME), VALID_SPECMARKET_YAML),
|
|
331
|
+
writeFile(join(specDir, 'spec.yaml'), VALID_SPEC_YAML),
|
|
332
|
+
writeFile(join(specDir, 'PROMPT.md'), '# Prompt\nBuild it.'),
|
|
333
|
+
writeFile(join(specDir, 'SPEC.md'), '# Spec\nDetails.'),
|
|
334
|
+
writeFile(join(specDir, 'SUCCESS_CRITERIA.md'), VALID_SUCCESS_CRITERIA),
|
|
335
|
+
writeFile(join(specDir, 'stdlib', 'STACK.md'), '# Stack\nNext.js'),
|
|
336
|
+
]);
|
|
337
|
+
|
|
338
|
+
mockRunSpec.mockResolvedValue({
|
|
339
|
+
report: {
|
|
340
|
+
runId: 'run-123',
|
|
341
|
+
status: 'success',
|
|
342
|
+
loopCount: 5,
|
|
343
|
+
totalTokens: 20000,
|
|
344
|
+
totalCostUsd: 2.0,
|
|
345
|
+
totalTimeMinutes: 10,
|
|
346
|
+
steeringActionCount: 3,
|
|
347
|
+
successCriteriaResults: [],
|
|
348
|
+
},
|
|
349
|
+
outputDir: '/tmp/output',
|
|
350
|
+
});
|
|
351
|
+
mockSubmitTelemetry.mockResolvedValue(false);
|
|
352
|
+
|
|
353
|
+
await handleRun(specDir, {});
|
|
354
|
+
|
|
355
|
+
expect(consoleSpy).toHaveBeenCalledWith(
|
|
356
|
+
expect.stringContaining('Steering Actions: 3')
|
|
357
|
+
);
|
|
358
|
+
});
|
|
359
|
+
|
|
360
|
+
it('omits steering count from summary when 0', async () => {
|
|
361
|
+
await Promise.all([
|
|
362
|
+
writeFile(join(specDir, SIDECAR_FILENAME), VALID_SPECMARKET_YAML),
|
|
363
|
+
writeFile(join(specDir, 'spec.yaml'), VALID_SPEC_YAML),
|
|
364
|
+
writeFile(join(specDir, 'PROMPT.md'), '# Prompt\nBuild it.'),
|
|
365
|
+
writeFile(join(specDir, 'SPEC.md'), '# Spec\nDetails.'),
|
|
366
|
+
writeFile(join(specDir, 'SUCCESS_CRITERIA.md'), VALID_SUCCESS_CRITERIA),
|
|
367
|
+
writeFile(join(specDir, 'stdlib', 'STACK.md'), '# Stack\nNext.js'),
|
|
368
|
+
]);
|
|
369
|
+
|
|
370
|
+
mockRunSpec.mockResolvedValue({
|
|
371
|
+
report: {
|
|
372
|
+
runId: 'run-123',
|
|
373
|
+
status: 'success',
|
|
374
|
+
loopCount: 2,
|
|
375
|
+
totalTokens: 5000,
|
|
376
|
+
totalCostUsd: 0.5,
|
|
377
|
+
totalTimeMinutes: 3,
|
|
378
|
+
steeringActionCount: 0,
|
|
379
|
+
successCriteriaResults: [],
|
|
380
|
+
},
|
|
381
|
+
outputDir: '/tmp/output',
|
|
382
|
+
});
|
|
383
|
+
mockSubmitTelemetry.mockResolvedValue(false);
|
|
384
|
+
|
|
385
|
+
await handleRun(specDir, {});
|
|
386
|
+
|
|
387
|
+
const calls = consoleSpy.mock.calls.map((c) => String(c[0]));
|
|
388
|
+
expect(calls.some((c) => c.includes('Steering Actions'))).toBe(false);
|
|
389
|
+
});
|
|
390
|
+
|
|
391
|
+
it('exits with budget_exceeded code on budget runs', async () => {
|
|
392
|
+
await Promise.all([
|
|
393
|
+
writeFile(join(specDir, SIDECAR_FILENAME), VALID_SPECMARKET_YAML),
|
|
394
|
+
writeFile(join(specDir, 'spec.yaml'), VALID_SPEC_YAML),
|
|
395
|
+
writeFile(join(specDir, 'PROMPT.md'), '# Prompt\nBuild it.'),
|
|
396
|
+
writeFile(join(specDir, 'SPEC.md'), '# Spec\nDetails.'),
|
|
397
|
+
writeFile(join(specDir, 'SUCCESS_CRITERIA.md'), VALID_SUCCESS_CRITERIA),
|
|
398
|
+
writeFile(join(specDir, 'stdlib', 'STACK.md'), '# Stack\nNext.js'),
|
|
399
|
+
]);
|
|
400
|
+
|
|
401
|
+
mockRunSpec.mockResolvedValue({
|
|
402
|
+
report: {
|
|
403
|
+
runId: 'run-123',
|
|
404
|
+
status: 'budget_exceeded',
|
|
405
|
+
loopCount: 50,
|
|
406
|
+
totalTokens: 500000,
|
|
407
|
+
totalCostUsd: 10.0,
|
|
408
|
+
totalTimeMinutes: 60,
|
|
409
|
+
successCriteriaResults: [],
|
|
410
|
+
},
|
|
411
|
+
outputDir: '/tmp/output',
|
|
412
|
+
});
|
|
413
|
+
mockSubmitTelemetry.mockResolvedValue(false);
|
|
414
|
+
|
|
415
|
+
await expect(handleRun(specDir, {})).rejects.toThrow(
|
|
416
|
+
'process.exit called'
|
|
417
|
+
);
|
|
418
|
+
});
|
|
419
|
+
});
|
package/src/commands/run.ts
CHANGED
|
@@ -4,12 +4,12 @@ import ora from 'ora';
|
|
|
4
4
|
import { readFile, mkdir, writeFile as writeFileFn } from 'fs/promises';
|
|
5
5
|
import { join, resolve, isAbsolute } from 'path';
|
|
6
6
|
import { parse as parseYaml } from 'yaml';
|
|
7
|
-
import { specYamlSchema, EXIT_CODES } from '@specmarket/shared';
|
|
7
|
+
import { specYamlSchema, EXIT_CODES, KNOWN_HARNESSES } from '@specmarket/shared';
|
|
8
8
|
import { validateSpec } from './validate.js';
|
|
9
9
|
import { loadCredentials, isAuthenticated } from '../lib/auth.js';
|
|
10
10
|
import { getConvexClient } from '../lib/convex-client.js';
|
|
11
11
|
import { submitTelemetry, promptTelemetryOptIn } from '../lib/telemetry.js';
|
|
12
|
-
import { runSpec } from '../lib/ralph-loop.js';
|
|
12
|
+
import { runSpec, checkClaudeCliInstalled } from '../lib/ralph-loop.js';
|
|
13
13
|
import type { LoopIteration } from '@specmarket/shared';
|
|
14
14
|
import createDebug from 'debug';
|
|
15
15
|
import { createRequire } from 'module';
|
|
@@ -46,6 +46,8 @@ export async function handleRun(
|
|
|
46
46
|
dryRun?: boolean;
|
|
47
47
|
resume?: string;
|
|
48
48
|
output?: string;
|
|
49
|
+
harness?: string;
|
|
50
|
+
workdir?: string;
|
|
49
51
|
}
|
|
50
52
|
): Promise<void> {
|
|
51
53
|
// Resolve spec directory (and registry spec ID if downloaded from registry)
|
|
@@ -94,15 +96,63 @@ export async function handleRun(
|
|
|
94
96
|
await promptTelemetryOptIn();
|
|
95
97
|
}
|
|
96
98
|
|
|
99
|
+
// Validate --harness value
|
|
100
|
+
if (opts.harness && !(KNOWN_HARNESSES as readonly string[]).includes(opts.harness)) {
|
|
101
|
+
console.log(chalk.red(`\n✗ Unknown harness "${opts.harness}". Supported: ${KNOWN_HARNESSES.join(', ')}`));
|
|
102
|
+
process.exit(EXIT_CODES.VALIDATION_ERROR);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Pre-flight check: Ensure the selected harness CLI is installed
|
|
106
|
+
try {
|
|
107
|
+
await checkClaudeCliInstalled(opts.harness);
|
|
108
|
+
} catch (err) {
|
|
109
|
+
console.log(chalk.red(`\n✗ ${(err as Error).message}`));
|
|
110
|
+
process.exit(EXIT_CODES.VALIDATION_ERROR);
|
|
111
|
+
}
|
|
112
|
+
|
|
97
113
|
const maxLoops = opts.maxLoops ? parseInt(opts.maxLoops, 10) : undefined;
|
|
98
114
|
const maxBudget = opts.maxBudget ? parseFloat(opts.maxBudget) : undefined;
|
|
99
115
|
|
|
116
|
+
const harness = opts.harness ?? 'claude-code';
|
|
100
117
|
console.log(chalk.cyan(`\nRunning spec: ${chalk.bold(specYaml.display_name)}`));
|
|
101
118
|
console.log(chalk.gray(` Version: ${specYaml.version}`));
|
|
102
119
|
console.log(chalk.gray(` Model: ${opts.model ?? specYaml.min_model}`));
|
|
120
|
+
console.log(chalk.gray(` Harness: ${harness}`));
|
|
121
|
+
if (opts.workdir) {
|
|
122
|
+
console.log(chalk.gray(` Working dir: ${opts.workdir}`));
|
|
123
|
+
}
|
|
103
124
|
console.log(chalk.gray(` Max loops: ${maxLoops ?? 50}`));
|
|
104
125
|
console.log(chalk.gray(` Estimated tokens: ${specYaml.estimated_tokens.toLocaleString()}`));
|
|
105
126
|
console.log(chalk.gray(` Estimated cost: $${specYaml.estimated_cost_usd.toFixed(2)}`));
|
|
127
|
+
|
|
128
|
+
// Set up steering input: collect lines from stdin and queue them for injection
|
|
129
|
+
// at the next iteration boundary. Works when stdin is a TTY (interactive) or
|
|
130
|
+
// a pipe (scripted input). Non-blocking — the run continues regardless.
|
|
131
|
+
const steeringQueue: string[] = [];
|
|
132
|
+
let steeringInputBuffer = '';
|
|
133
|
+
const steeringDataHandler = (chunk: Buffer | string): void => {
|
|
134
|
+
const data = typeof chunk === 'string' ? chunk : chunk.toString('utf-8');
|
|
135
|
+
steeringInputBuffer += data;
|
|
136
|
+
const lines = steeringInputBuffer.split('\n');
|
|
137
|
+
steeringInputBuffer = lines.pop() ?? '';
|
|
138
|
+
for (const line of lines) {
|
|
139
|
+
const trimmed = line.trim();
|
|
140
|
+
if (trimmed) {
|
|
141
|
+
steeringQueue.push(trimmed);
|
|
142
|
+
// Write to stderr so it doesn't overwrite the spinner on stdout
|
|
143
|
+
process.stderr.write(
|
|
144
|
+
`\n${chalk.cyan('[steering]')} Queued: "${trimmed.length > 60 ? trimmed.slice(0, 60) + '…' : trimmed}"\n`
|
|
145
|
+
);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
if (!opts.dryRun) {
|
|
151
|
+
process.stdin.setEncoding('utf-8');
|
|
152
|
+
process.stdin.resume();
|
|
153
|
+
process.stdin.on('data', steeringDataHandler);
|
|
154
|
+
console.log(chalk.gray(' Tip: Type a message + Enter to steer the agent mid-run.'));
|
|
155
|
+
}
|
|
106
156
|
console.log('');
|
|
107
157
|
|
|
108
158
|
const spinner = ora({ text: 'Starting loop iteration 1...', spinner: 'dots' }).start();
|
|
@@ -126,13 +176,20 @@ export async function handleRun(
|
|
|
126
176
|
dryRun: opts.dryRun,
|
|
127
177
|
resumeRunId: opts.resume,
|
|
128
178
|
outputDir: opts.output,
|
|
179
|
+
harness: opts.harness,
|
|
180
|
+
workdir: opts.workdir,
|
|
129
181
|
cliVersion: CLI_VERSION,
|
|
182
|
+
steeringQueue,
|
|
130
183
|
},
|
|
131
184
|
(iteration: LoopIteration) => {
|
|
132
185
|
spinner.text = `Loop ${iteration.iteration}: ${iteration.tokens.toLocaleString()} tokens, ${(iteration.durationMs / 1000).toFixed(1)}s`;
|
|
133
186
|
}
|
|
134
187
|
);
|
|
135
188
|
|
|
189
|
+
// Stop collecting steering input now that the run is complete
|
|
190
|
+
process.stdin.removeListener('data', steeringDataHandler);
|
|
191
|
+
process.stdin.pause();
|
|
192
|
+
|
|
136
193
|
const { report } = result;
|
|
137
194
|
const statusColor =
|
|
138
195
|
report.status === 'success'
|
|
@@ -150,6 +207,9 @@ export async function handleRun(
|
|
|
150
207
|
console.log(` Tokens: ${report.totalTokens.toLocaleString()}`);
|
|
151
208
|
console.log(` Cost: $${report.totalCostUsd.toFixed(4)}`);
|
|
152
209
|
console.log(` Time: ${report.totalTimeMinutes.toFixed(1)} minutes`);
|
|
210
|
+
if (report.steeringActionCount && report.steeringActionCount > 0) {
|
|
211
|
+
console.log(` Steering Actions: ${report.steeringActionCount}`);
|
|
212
|
+
}
|
|
153
213
|
console.log(` Run ID: ${chalk.gray(report.runId)}`);
|
|
154
214
|
console.log(` Output: ${chalk.gray(result.outputDir)}`);
|
|
155
215
|
|
|
@@ -362,7 +422,7 @@ async function resolveSpecPath(pathOrId: string): Promise<ResolvedSpec> {
|
|
|
362
422
|
export function createRunCommand(): Command {
|
|
363
423
|
return new Command('run')
|
|
364
424
|
.description('Execute a spec locally using the Ralph Loop')
|
|
365
|
-
.argument('
|
|
425
|
+
.argument('[path-or-id]', 'Local path to spec directory or registry ID (@user/name[@version])', '.')
|
|
366
426
|
.option('--max-loops <n>', 'Maximum loop iterations (default: 50)')
|
|
367
427
|
.option('--max-budget <usd>', 'Maximum budget in USD (default: 2x estimated)')
|
|
368
428
|
.option('--no-telemetry', 'Disable telemetry submission for this run')
|
|
@@ -370,6 +430,14 @@ export function createRunCommand(): Command {
|
|
|
370
430
|
.option('--dry-run', 'Validate and show config without executing')
|
|
371
431
|
.option('--resume <run-id>', 'Resume a previous run from where it left off')
|
|
372
432
|
.option('--output <dir>', 'Custom output directory for run artifacts')
|
|
433
|
+
.option(
|
|
434
|
+
'--harness <harness>',
|
|
435
|
+
`Agentic harness to use (default: claude-code). One of: ${KNOWN_HARNESSES.join(', ')}`
|
|
436
|
+
)
|
|
437
|
+
.option(
|
|
438
|
+
'--workdir <dir>',
|
|
439
|
+
'Run in an existing directory instead of a fresh sandbox (spec files not copied)'
|
|
440
|
+
)
|
|
373
441
|
.action(async (pathOrId: string, opts) => {
|
|
374
442
|
try {
|
|
375
443
|
await handleRun(pathOrId, opts);
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
2
|
+
|
|
3
|
+
// --- Hoisted mocks ---
|
|
4
|
+
|
|
5
|
+
const { mockQuery, mockClient, mockSpinner } = vi.hoisted(() => {
|
|
6
|
+
const mockQuery = vi.fn();
|
|
7
|
+
const mockClient = { query: mockQuery };
|
|
8
|
+
const mockSpinner = {
|
|
9
|
+
start: vi.fn().mockReturnThis(),
|
|
10
|
+
stop: vi.fn().mockReturnThis(),
|
|
11
|
+
succeed: vi.fn().mockReturnThis(),
|
|
12
|
+
fail: vi.fn().mockReturnThis(),
|
|
13
|
+
};
|
|
14
|
+
return { mockQuery, mockClient, mockSpinner };
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
vi.mock('../lib/convex-client.js', () => ({
|
|
18
|
+
getConvexClient: vi.fn().mockResolvedValue(mockClient),
|
|
19
|
+
}));
|
|
20
|
+
|
|
21
|
+
vi.mock('ora', () => ({
|
|
22
|
+
default: vi.fn().mockReturnValue(mockSpinner),
|
|
23
|
+
}));
|
|
24
|
+
|
|
25
|
+
vi.mock('@specmarket/convex/api', () => ({
|
|
26
|
+
api: {
|
|
27
|
+
specs: { search: 'specs.search' },
|
|
28
|
+
},
|
|
29
|
+
}));
|
|
30
|
+
|
|
31
|
+
const mockExit = vi.spyOn(process, 'exit').mockImplementation((() => {
|
|
32
|
+
throw new Error('process.exit called');
|
|
33
|
+
}) as any);
|
|
34
|
+
|
|
35
|
+
const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {});
|
|
36
|
+
vi.spyOn(console, 'error').mockImplementation(() => {});
|
|
37
|
+
|
|
38
|
+
import { handleSearch } from './search.js';
|
|
39
|
+
|
|
40
|
+
// --- Test data ---
|
|
41
|
+
|
|
42
|
+
const MOCK_RESULTS = [
|
|
43
|
+
{
|
|
44
|
+
scopedName: '@alice/todo-app',
|
|
45
|
+
description: 'A simple todo application with authentication and dark mode',
|
|
46
|
+
replacesSaas: 'Todoist',
|
|
47
|
+
successRate: 0.85,
|
|
48
|
+
avgCostUsd: 1.5,
|
|
49
|
+
communityRating: 4.2,
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
scopedName: '@bob/crm-tool',
|
|
53
|
+
description: 'Customer relationship manager with pipeline tracking',
|
|
54
|
+
replacesSaas: null,
|
|
55
|
+
successRate: 0,
|
|
56
|
+
avgCostUsd: 0,
|
|
57
|
+
communityRating: 0,
|
|
58
|
+
},
|
|
59
|
+
];
|
|
60
|
+
|
|
61
|
+
describe('handleSearch', () => {
|
|
62
|
+
beforeEach(() => {
|
|
63
|
+
vi.clearAllMocks();
|
|
64
|
+
mockExit.mockImplementation((() => {
|
|
65
|
+
throw new Error('process.exit called');
|
|
66
|
+
}) as any);
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it('displays results in table format', async () => {
|
|
70
|
+
mockQuery.mockResolvedValue(MOCK_RESULTS);
|
|
71
|
+
|
|
72
|
+
await handleSearch('todo', {});
|
|
73
|
+
|
|
74
|
+
expect(mockQuery).toHaveBeenCalledWith('specs.search', {
|
|
75
|
+
query: 'todo',
|
|
76
|
+
limit: 20,
|
|
77
|
+
});
|
|
78
|
+
expect(consoleSpy).toHaveBeenCalledWith(
|
|
79
|
+
expect.stringContaining('Found 2 spec(s)')
|
|
80
|
+
);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it('shows no-results message when no specs match', async () => {
|
|
84
|
+
mockQuery.mockResolvedValue([]);
|
|
85
|
+
|
|
86
|
+
await handleSearch('nonexistent', {});
|
|
87
|
+
|
|
88
|
+
expect(consoleSpy).toHaveBeenCalledWith(
|
|
89
|
+
expect.stringContaining('No specs found')
|
|
90
|
+
);
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
it('passes filter options to backend query', async () => {
|
|
94
|
+
mockQuery.mockResolvedValue([]);
|
|
95
|
+
|
|
96
|
+
await handleSearch('test', {
|
|
97
|
+
outputType: 'web-app',
|
|
98
|
+
primaryStack: 'nextjs-typescript',
|
|
99
|
+
replacesSaas: 'Notion',
|
|
100
|
+
limit: '10',
|
|
101
|
+
tag: ['productivity'],
|
|
102
|
+
freeTierOnly: true,
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
expect(mockQuery).toHaveBeenCalledWith('specs.search', {
|
|
106
|
+
query: 'test',
|
|
107
|
+
limit: 10,
|
|
108
|
+
outputType: 'web-app',
|
|
109
|
+
primaryStack: 'nextjs-typescript',
|
|
110
|
+
replacesSaas: 'Notion',
|
|
111
|
+
tags: ['productivity'],
|
|
112
|
+
freeTierOnly: true,
|
|
113
|
+
});
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it('converts min-success-rate from percentage to decimal', async () => {
|
|
117
|
+
mockQuery.mockResolvedValue([]);
|
|
118
|
+
|
|
119
|
+
await handleSearch('test', { minSuccessRate: '80' });
|
|
120
|
+
|
|
121
|
+
expect(mockQuery).toHaveBeenCalledWith('specs.search', {
|
|
122
|
+
query: 'test',
|
|
123
|
+
limit: 20,
|
|
124
|
+
minSuccessRate: 0.8,
|
|
125
|
+
});
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
it('exits with error for invalid min-success-rate', async () => {
|
|
129
|
+
await expect(
|
|
130
|
+
handleSearch('test', { minSuccessRate: '150' })
|
|
131
|
+
).rejects.toThrow('process.exit called');
|
|
132
|
+
|
|
133
|
+
expect(mockSpinner.fail).toHaveBeenCalledWith(
|
|
134
|
+
expect.stringContaining('--min-success-rate must be between 0 and 100')
|
|
135
|
+
);
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
it('exits with error for invalid max-cost', async () => {
|
|
139
|
+
await expect(
|
|
140
|
+
handleSearch('test', { maxCost: '-5' })
|
|
141
|
+
).rejects.toThrow('process.exit called');
|
|
142
|
+
|
|
143
|
+
expect(mockSpinner.fail).toHaveBeenCalledWith(
|
|
144
|
+
expect.stringContaining('--max-cost must be a non-negative number')
|
|
145
|
+
);
|
|
146
|
+
});
|
|
147
|
+
});
|