@hegemonart/get-design-done 1.20.0 → 1.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +9 -12
- package/.claude-plugin/plugin.json +8 -31
- package/CHANGELOG.md +78 -0
- package/README.md +48 -7
- package/bin/gdd-sdk +55 -0
- package/package.json +15 -47
- package/reference/codex-tools.md +53 -0
- package/reference/gemini-tools.md +53 -0
- package/reference/registry.json +14 -0
- package/scripts/e2e/run-headless.ts +514 -0
- package/scripts/lib/cli/commands/audit.ts +382 -0
- package/scripts/lib/cli/commands/init.ts +217 -0
- package/scripts/lib/cli/commands/query.ts +329 -0
- package/scripts/lib/cli/commands/run.ts +656 -0
- package/scripts/lib/cli/commands/stage.ts +468 -0
- package/scripts/lib/cli/index.ts +167 -0
- package/scripts/lib/cli/parse-args.ts +336 -0
- package/scripts/lib/context-engine/index.ts +116 -0
- package/scripts/lib/context-engine/manifest.ts +69 -0
- package/scripts/lib/context-engine/truncate.ts +282 -0
- package/scripts/lib/context-engine/types.ts +59 -0
- package/scripts/lib/discuss-parallel-runner/aggregator.ts +448 -0
- package/scripts/lib/discuss-parallel-runner/discussants.ts +430 -0
- package/scripts/lib/discuss-parallel-runner/index.ts +223 -0
- package/scripts/lib/discuss-parallel-runner/types.ts +184 -0
- package/scripts/lib/event-stream/index.ts +11 -1
- package/scripts/lib/explore-parallel-runner/index.ts +294 -0
- package/scripts/lib/explore-parallel-runner/mappers.ts +290 -0
- package/scripts/lib/explore-parallel-runner/synthesizer.ts +295 -0
- package/scripts/lib/explore-parallel-runner/types.ts +139 -0
- package/scripts/lib/harness/detect.ts +90 -0
- package/scripts/lib/harness/index.ts +64 -0
- package/scripts/lib/harness/tool-map.ts +142 -0
- package/scripts/lib/init-runner/index.ts +396 -0
- package/scripts/lib/init-runner/researchers.ts +245 -0
- package/scripts/lib/init-runner/scaffold.ts +224 -0
- package/scripts/lib/init-runner/synthesizer.ts +224 -0
- package/scripts/lib/init-runner/types.ts +143 -0
- package/scripts/lib/logger/index.ts +251 -0
- package/scripts/lib/logger/sinks.ts +269 -0
- package/scripts/lib/logger/types.ts +110 -0
- package/scripts/lib/pipeline-runner/human-gate.ts +134 -0
- package/scripts/lib/pipeline-runner/index.ts +527 -0
- package/scripts/lib/pipeline-runner/stage-handlers.ts +339 -0
- package/scripts/lib/pipeline-runner/state-machine.ts +144 -0
- package/scripts/lib/pipeline-runner/types.ts +183 -0
- package/scripts/lib/session-runner/errors.ts +406 -0
- package/scripts/lib/session-runner/index.ts +715 -0
- package/scripts/lib/session-runner/transcript.ts +189 -0
- package/scripts/lib/session-runner/types.ts +144 -0
- package/scripts/lib/tool-scoping/index.ts +219 -0
- package/scripts/lib/tool-scoping/parse-agent-tools.ts +207 -0
- package/scripts/lib/tool-scoping/stage-scopes.ts +139 -0
- package/scripts/lib/tool-scoping/types.ts +77 -0
|
@@ -0,0 +1,656 @@
|
|
|
1
|
+
// scripts/lib/cli/commands/run.ts — Plan 21-09 Task 2 (SDK-21),
|
|
2
|
+
// extended by Plan 21-11 Task 3 (dry-run).
|
|
3
|
+
//
|
|
4
|
+
// `gdd-sdk run` — drives the full design pipeline via
|
|
5
|
+
// `pipeline-runner.run()`. Builds a PipelineConfig from CLI flags,
|
|
6
|
+
// loads per-stage prompts (from --prompt-file mapping, from
|
|
7
|
+
// `.design/prompts/<stage>.md`, or embedded defaults), wires the
|
|
8
|
+
// human-gate callback, prints the outcome as JSON or human text, and
|
|
9
|
+
// maps pipeline status to an exit code.
|
|
10
|
+
//
|
|
11
|
+
// Exit codes:
|
|
12
|
+
// * 0 — PipelineStatus === 'completed' or 'stopped-after'.
|
|
13
|
+
// * 1 — PipelineStatus === 'halted'.
|
|
14
|
+
// * 2 — PipelineStatus === 'awaiting-gate'.
|
|
15
|
+
// * 3 — argument / config error (missing prompts, malformed flags).
|
|
16
|
+
//
|
|
17
|
+
// --dry-run (Plan 21-11):
|
|
18
|
+
// Installs a mocked session-runner that reads canned SessionResult
|
|
19
|
+
// objects from `<cwd>/expected-outputs/canned-<stage>.json` plus a
|
|
20
|
+
// permissive transition-stage override. Each mock "session" also
|
|
21
|
+
// writes the stage-appropriate artifact (DESIGN-PATTERNS.md,
|
|
22
|
+
// DESIGN-PLAN.md, DESIGN.md, SUMMARY.md) under `<cwd>/.design/`
|
|
23
|
+
// so callers can assert artifact shape without a real API call.
|
|
24
|
+
// Zero API cost. Intended for CI; the fixture at
|
|
25
|
+
// `test-fixture/headless-e2e/` is the canonical consumer.
|
|
26
|
+
|
|
27
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
|
|
28
|
+
import { resolve as resolvePath, join as joinPath, dirname as dirnamePath } from 'node:path';
|
|
29
|
+
|
|
30
|
+
import {
|
|
31
|
+
run as defaultPipelineRun,
|
|
32
|
+
type HumanGateDecision,
|
|
33
|
+
type HumanGateInfo,
|
|
34
|
+
type PipelineConfig,
|
|
35
|
+
type PipelineResult,
|
|
36
|
+
type RunOverrides,
|
|
37
|
+
type Stage,
|
|
38
|
+
type StageOutcome,
|
|
39
|
+
} from '../../pipeline-runner/index.ts';
|
|
40
|
+
import { getLogger } from '../../logger/index.ts';
|
|
41
|
+
import { ValidationError } from '../../gdd-errors/index.ts';
|
|
42
|
+
import type { SessionResult, SessionRunnerOptions } from '../../session-runner/types.ts';
|
|
43
|
+
|
|
44
|
+
import {
|
|
45
|
+
coerceFlags,
|
|
46
|
+
COMMON_FLAGS,
|
|
47
|
+
type FlagSpec,
|
|
48
|
+
type ParsedArgs,
|
|
49
|
+
} from '../parse-args.ts';
|
|
50
|
+
|
|
51
|
+
// ---------------------------------------------------------------------------
|
|
52
|
+
// Flag spec + help text.
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
|
|
55
|
+
const RUN_FLAGS: readonly FlagSpec[] = [
|
|
56
|
+
...COMMON_FLAGS,
|
|
57
|
+
{ name: 'stages', type: 'string' },
|
|
58
|
+
{ name: 'skip', type: 'string' },
|
|
59
|
+
{ name: 'resume-from', type: 'string' },
|
|
60
|
+
{ name: 'stop-after', type: 'string' },
|
|
61
|
+
{ name: 'prompt-file', type: 'string' },
|
|
62
|
+
{ name: 'gate-reply', type: 'string' },
|
|
63
|
+
{ name: 'dry-run', type: 'boolean', default: false },
|
|
64
|
+
{ name: 'fixture', type: 'string' },
|
|
65
|
+
];
|
|
66
|
+
|
|
67
|
+
const USAGE = `gdd-sdk run [flags]
|
|
68
|
+
|
|
69
|
+
Drive the full design pipeline headlessly.
|
|
70
|
+
|
|
71
|
+
Flags:
|
|
72
|
+
--stages <list> Comma-separated subset: brief,explore,plan,design,verify
|
|
73
|
+
--skip <list> Comma-separated stages to skip
|
|
74
|
+
--resume-from <stage> Start from this stage (inclusive)
|
|
75
|
+
--stop-after <stage> Stop after this stage (inclusive)
|
|
76
|
+
--prompt-file <spec> stage=path pairs; e.g., --prompt-file brief=./prompts/brief.md
|
|
77
|
+
--gate-reply <mode> Canned reply when a human-gate pauses:
|
|
78
|
+
stop — halt with awaiting-gate (default)
|
|
79
|
+
resume[:payload]— resume with optional payload
|
|
80
|
+
--budget-usd <n> Total USD cap (default 10.0)
|
|
81
|
+
--budget-input-tokens Input-token cap (default 200000)
|
|
82
|
+
--budget-output-tokens Output-token cap (default 50000)
|
|
83
|
+
--max-turns <n> Per-stage turn cap (default 40)
|
|
84
|
+
--cwd <dir> Working directory (default: current)
|
|
85
|
+
--log-level <lvl> debug|info|warn|error (default info)
|
|
86
|
+
--json Emit machine-parseable JSON to stdout
|
|
87
|
+
--text Force human-readable output (default)
|
|
88
|
+
--headless / --interactive Override logger auto-mode
|
|
89
|
+
--dry-run Mock mode: read canned SessionResults from
|
|
90
|
+
<fixture>/expected-outputs/canned-<stage>.json
|
|
91
|
+
and write stub artifacts under <cwd>/.design/.
|
|
92
|
+
Zero API cost; used by the E2E fixture test
|
|
93
|
+
harness (test-fixture/headless-e2e/).
|
|
94
|
+
--fixture <dir> Override the fixture root whose
|
|
95
|
+
expected-outputs/ directory supplies canned
|
|
96
|
+
SessionResults. Defaults to --cwd.
|
|
97
|
+
|
|
98
|
+
Exit codes:
|
|
99
|
+
0 completed / stopped-after
|
|
100
|
+
1 halted
|
|
101
|
+
2 awaiting-gate
|
|
102
|
+
3 arg/config error
|
|
103
|
+
`;
|
|
104
|
+
|
|
105
|
+
// ---------------------------------------------------------------------------
|
|
106
|
+
// Public types.
|
|
107
|
+
// ---------------------------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Test-injection point. Default resolves to `pipeline-runner.run`. Keeps
|
|
111
|
+
* unit tests independent of the real Agent SDK.
|
|
112
|
+
*/
|
|
113
|
+
export type PipelineRunFn = typeof defaultPipelineRun;
|
|
114
|
+
|
|
115
|
+
/** Writable streams for deterministic test capture. */
|
|
116
|
+
export interface RunCommandDeps {
|
|
117
|
+
readonly pipelineRun?: PipelineRunFn;
|
|
118
|
+
readonly stdout?: NodeJS.WritableStream;
|
|
119
|
+
readonly stderr?: NodeJS.WritableStream;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// ---------------------------------------------------------------------------
|
|
123
|
+
// runCommand — entry point.
|
|
124
|
+
// ---------------------------------------------------------------------------
|
|
125
|
+
|
|
126
|
+
const ALL_STAGES: readonly Stage[] = ['brief', 'explore', 'plan', 'design', 'verify'];
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Entry point for `gdd-sdk run`. Returns the process exit code (never
|
|
130
|
+
* throws). All diagnostic output goes to stderr; result output to stdout.
|
|
131
|
+
*/
|
|
132
|
+
export async function runCommand(
|
|
133
|
+
args: ParsedArgs,
|
|
134
|
+
deps: RunCommandDeps = {},
|
|
135
|
+
): Promise<number> {
|
|
136
|
+
const stdout = deps.stdout ?? process.stdout;
|
|
137
|
+
const stderr = deps.stderr ?? process.stderr;
|
|
138
|
+
|
|
139
|
+
if (args.flags['help'] === true || args.flags['h'] === true) {
|
|
140
|
+
stdout.write(USAGE);
|
|
141
|
+
return 0;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
let flags: Record<string, unknown>;
|
|
145
|
+
try {
|
|
146
|
+
flags = coerceFlags(args, RUN_FLAGS);
|
|
147
|
+
} catch (err) {
|
|
148
|
+
stderr.write(`gdd-sdk run: ${errMessage(err)}\n`);
|
|
149
|
+
return 3;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const cwd: string = typeof flags['cwd'] === 'string' ? (flags['cwd'] as string) : process.cwd();
|
|
153
|
+
|
|
154
|
+
// Resolve stages / skip / resumeFrom / stopAfter.
|
|
155
|
+
let stages: readonly Stage[] | undefined;
|
|
156
|
+
try {
|
|
157
|
+
stages = parseStageList(flags['stages']);
|
|
158
|
+
} catch (err) {
|
|
159
|
+
stderr.write(`gdd-sdk run: ${errMessage(err)}\n`);
|
|
160
|
+
return 3;
|
|
161
|
+
}
|
|
162
|
+
let skipStages: readonly Stage[] | undefined;
|
|
163
|
+
try {
|
|
164
|
+
skipStages = parseStageList(flags['skip']);
|
|
165
|
+
} catch (err) {
|
|
166
|
+
stderr.write(`gdd-sdk run: ${errMessage(err)}\n`);
|
|
167
|
+
return 3;
|
|
168
|
+
}
|
|
169
|
+
const resumeFrom = parseSingleStage(flags['resume-from']);
|
|
170
|
+
const stopAfter = parseSingleStage(flags['stop-after']);
|
|
171
|
+
|
|
172
|
+
// Resolve the stage subset used for prompt loading. Defaults to all 5.
|
|
173
|
+
const effectiveStages: readonly Stage[] = stages ?? ALL_STAGES;
|
|
174
|
+
|
|
175
|
+
// Load prompts.
|
|
176
|
+
let prompts: Record<Stage, string>;
|
|
177
|
+
try {
|
|
178
|
+
prompts = loadPrompts(effectiveStages, flags, cwd);
|
|
179
|
+
} catch (err) {
|
|
180
|
+
stderr.write(`gdd-sdk run: ${errMessage(err)}\n`);
|
|
181
|
+
return 3;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Build budget.
|
|
185
|
+
const budget = {
|
|
186
|
+
usdLimit: typeof flags['budget-usd'] === 'number' ? (flags['budget-usd'] as number) : 10.0,
|
|
187
|
+
inputTokensLimit:
|
|
188
|
+
typeof flags['budget-input-tokens'] === 'number'
|
|
189
|
+
? (flags['budget-input-tokens'] as number)
|
|
190
|
+
: 200_000,
|
|
191
|
+
outputTokensLimit:
|
|
192
|
+
typeof flags['budget-output-tokens'] === 'number'
|
|
193
|
+
? (flags['budget-output-tokens'] as number)
|
|
194
|
+
: 50_000,
|
|
195
|
+
perStage: true as const,
|
|
196
|
+
};
|
|
197
|
+
|
|
198
|
+
const maxTurnsPerStage: number =
|
|
199
|
+
typeof flags['max-turns'] === 'number' ? (flags['max-turns'] as number) : 40;
|
|
200
|
+
|
|
201
|
+
// Human-gate callback — default STOP (exit code 2); optional canned
|
|
202
|
+
// `--gate-reply resume[:payload]` lets tests / operators pre-seed a
|
|
203
|
+
// decision without an interactive prompt.
|
|
204
|
+
const gateReply: string | undefined =
|
|
205
|
+
typeof flags['gate-reply'] === 'string' ? (flags['gate-reply'] as string) : undefined;
|
|
206
|
+
const onHumanGate: (info: HumanGateInfo) => Promise<HumanGateDecision> = async (info) => {
|
|
207
|
+
// Always surface gate info to stderr so operators see it even in
|
|
208
|
+
// --json mode (where stdout carries the result JSON).
|
|
209
|
+
stderr.write(
|
|
210
|
+
`gdd-sdk run: human gate "${info.gateName}" at stage "${info.stage}"\n`,
|
|
211
|
+
);
|
|
212
|
+
if (gateReply === undefined) return { decision: 'stop' };
|
|
213
|
+
if (gateReply === 'stop') return { decision: 'stop' };
|
|
214
|
+
if (gateReply === 'resume') return { decision: 'resume' };
|
|
215
|
+
if (gateReply.startsWith('resume:')) {
|
|
216
|
+
return { decision: 'resume', payload: gateReply.slice('resume:'.length) };
|
|
217
|
+
}
|
|
218
|
+
stderr.write(
|
|
219
|
+
`gdd-sdk run: unrecognized --gate-reply "${gateReply}"; defaulting to stop\n`,
|
|
220
|
+
);
|
|
221
|
+
return { decision: 'stop' };
|
|
222
|
+
};
|
|
223
|
+
|
|
224
|
+
const config: PipelineConfig = {
|
|
225
|
+
prompts,
|
|
226
|
+
budget,
|
|
227
|
+
maxTurnsPerStage,
|
|
228
|
+
stageRetries: 1,
|
|
229
|
+
...(stages !== undefined ? { stages } : {}),
|
|
230
|
+
...(skipStages !== undefined ? { skipStages } : {}),
|
|
231
|
+
...(resumeFrom !== undefined ? { resumeFrom } : {}),
|
|
232
|
+
...(stopAfter !== undefined ? { stopAfter } : {}),
|
|
233
|
+
cwd,
|
|
234
|
+
onHumanGate,
|
|
235
|
+
};
|
|
236
|
+
|
|
237
|
+
const pipelineRun: PipelineRunFn = deps.pipelineRun ?? defaultPipelineRun;
|
|
238
|
+
|
|
239
|
+
// Plan 21-11: --dry-run installs canned session overrides + a
|
|
240
|
+
// permissive transition shim. Artifacts are written to disk by the
|
|
241
|
+
// override so assertions can still check artifact shape.
|
|
242
|
+
let overrides: RunOverrides = {};
|
|
243
|
+
if (flags['dry-run'] === true) {
|
|
244
|
+
const fixtureDir: string =
|
|
245
|
+
typeof flags['fixture'] === 'string' && (flags['fixture'] as string).length > 0
|
|
246
|
+
? resolvePath(process.cwd(), flags['fixture'] as string)
|
|
247
|
+
: cwd;
|
|
248
|
+
try {
|
|
249
|
+
overrides = buildDryRunOverrides(cwd, fixtureDir);
|
|
250
|
+
} catch (err) {
|
|
251
|
+
stderr.write(`gdd-sdk run: ${errMessage(err)}\n`);
|
|
252
|
+
return 3;
|
|
253
|
+
}
|
|
254
|
+
try {
|
|
255
|
+
getLogger().info('cli.run.dry_run_enabled', {
|
|
256
|
+
fixture: fixtureDir,
|
|
257
|
+
cwd,
|
|
258
|
+
});
|
|
259
|
+
} catch {
|
|
260
|
+
// Swallow logger failures.
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
let result: PipelineResult;
|
|
265
|
+
try {
|
|
266
|
+
result = await pipelineRun(config, overrides);
|
|
267
|
+
} catch (err) {
|
|
268
|
+
// pipeline-runner is contracted never to throw, but belt-and-braces:
|
|
269
|
+
// surface the error as exit 3 rather than crashing.
|
|
270
|
+
try {
|
|
271
|
+
getLogger().error('cli.run.unexpected_error', {
|
|
272
|
+
error: err instanceof Error ? err.message : String(err),
|
|
273
|
+
});
|
|
274
|
+
} catch {
|
|
275
|
+
// Swallow logger failures.
|
|
276
|
+
}
|
|
277
|
+
stderr.write(`gdd-sdk run: unexpected error: ${errMessage(err)}\n`);
|
|
278
|
+
return 3;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// Output.
|
|
282
|
+
if (flags['json'] === true) {
|
|
283
|
+
stdout.write(JSON.stringify(result, null, 2) + '\n');
|
|
284
|
+
} else {
|
|
285
|
+
stdout.write(renderHumanSummary(result));
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// Exit code mapping.
|
|
289
|
+
if (result.status === 'completed' || result.status === 'stopped-after') return 0;
|
|
290
|
+
if (result.status === 'awaiting-gate') return 2;
|
|
291
|
+
return 1;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// ---------------------------------------------------------------------------
|
|
295
|
+
// Helpers.
|
|
296
|
+
// ---------------------------------------------------------------------------
|
|
297
|
+
|
|
298
|
+
/** Default embedded prompt body when no file is supplied. */
|
|
299
|
+
const DEFAULT_PROMPTS: Readonly<Record<Stage, string>> = Object.freeze({
|
|
300
|
+
brief: 'Draft the design brief. Follow SKILL.md for the stage.',
|
|
301
|
+
explore: 'Run the explore-stage mappers and synthesize DESIGN-PATTERNS.md.',
|
|
302
|
+
plan: 'Plan the design changes. Produce locked decisions + must-haves.',
|
|
303
|
+
design: 'Implement design-stage deliverables per plan.',
|
|
304
|
+
verify: 'Verify design deliverables; close must-haves; probe regressions.',
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
function loadPrompts(
|
|
308
|
+
stages: readonly Stage[],
|
|
309
|
+
flags: Record<string, unknown>,
|
|
310
|
+
cwd: string,
|
|
311
|
+
): Record<Stage, string> {
|
|
312
|
+
// Start with defaults, then layer in per-stage file paths (convention),
|
|
313
|
+
// then layer in explicit `--prompt-file stage=path` mappings. Later
|
|
314
|
+
// sources override earlier ones.
|
|
315
|
+
const prompts: Record<Stage, string> = {
|
|
316
|
+
brief: DEFAULT_PROMPTS.brief,
|
|
317
|
+
explore: DEFAULT_PROMPTS.explore,
|
|
318
|
+
plan: DEFAULT_PROMPTS.plan,
|
|
319
|
+
design: DEFAULT_PROMPTS.design,
|
|
320
|
+
verify: DEFAULT_PROMPTS.verify,
|
|
321
|
+
};
|
|
322
|
+
|
|
323
|
+
// Convention: `.design/prompts/<stage>.md` (load if file readable).
|
|
324
|
+
for (const stage of stages) {
|
|
325
|
+
const p = resolvePath(cwd, '.design/prompts', `${stage}.md`);
|
|
326
|
+
try {
|
|
327
|
+
prompts[stage] = readFileSync(p, 'utf8');
|
|
328
|
+
} catch {
|
|
329
|
+
// Best-effort: falls back to default.
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
// Explicit --prompt-file mapping. Support comma-separated pairs plus
|
|
334
|
+
// repeated --prompt-file usage (coerceFlags last-write-wins collapses
|
|
335
|
+
// repeats, so for tests we also accept a semicolon-separated list).
|
|
336
|
+
const rawMapping = flags['prompt-file'];
|
|
337
|
+
if (typeof rawMapping === 'string' && rawMapping.length > 0) {
|
|
338
|
+
// Split by `;` or `,` to support multiple pairs; a single `stage=path`
|
|
339
|
+
// parses as one entry.
|
|
340
|
+
const parts = rawMapping.split(/[,;]/).map((s) => s.trim()).filter(Boolean);
|
|
341
|
+
for (const pair of parts) {
|
|
342
|
+
const eq = pair.indexOf('=');
|
|
343
|
+
if (eq < 0) {
|
|
344
|
+
throw new ValidationError(
|
|
345
|
+
`--prompt-file expected stage=path, got "${pair}"`,
|
|
346
|
+
'BAD_PROMPT_FILE_SPEC',
|
|
347
|
+
{ spec: pair },
|
|
348
|
+
);
|
|
349
|
+
}
|
|
350
|
+
const stageName = pair.slice(0, eq).trim();
|
|
351
|
+
const filePath = pair.slice(eq + 1).trim();
|
|
352
|
+
if (!isStage(stageName)) {
|
|
353
|
+
throw new ValidationError(
|
|
354
|
+
`--prompt-file stage "${stageName}" is not one of brief|explore|plan|design|verify`,
|
|
355
|
+
'BAD_PROMPT_FILE_STAGE',
|
|
356
|
+
{ stageName },
|
|
357
|
+
);
|
|
358
|
+
}
|
|
359
|
+
const absPath = resolvePath(cwd, filePath);
|
|
360
|
+
try {
|
|
361
|
+
prompts[stageName] = readFileSync(absPath, 'utf8');
|
|
362
|
+
} catch (err) {
|
|
363
|
+
throw new ValidationError(
|
|
364
|
+
`--prompt-file ${stageName}: cannot read "${filePath}": ${errMessage(err)}`,
|
|
365
|
+
'PROMPT_FILE_READ_ERROR',
|
|
366
|
+
{ stage: stageName, path: absPath },
|
|
367
|
+
);
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
return prompts;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
function parseStageList(value: unknown): readonly Stage[] | undefined {
|
|
376
|
+
if (value === undefined || value === null || value === '') return undefined;
|
|
377
|
+
if (typeof value !== 'string') return undefined;
|
|
378
|
+
const parts = value.split(',').map((s) => s.trim()).filter(Boolean);
|
|
379
|
+
const stages: Stage[] = [];
|
|
380
|
+
for (const p of parts) {
|
|
381
|
+
if (!isStage(p)) {
|
|
382
|
+
throw new ValidationError(
|
|
383
|
+
`stage "${p}" is not one of brief|explore|plan|design|verify`,
|
|
384
|
+
'INVALID_STAGE_NAME',
|
|
385
|
+
{ stage: p },
|
|
386
|
+
);
|
|
387
|
+
}
|
|
388
|
+
stages.push(p);
|
|
389
|
+
}
|
|
390
|
+
return stages.length === 0 ? undefined : stages;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
function parseSingleStage(value: unknown): Stage | undefined {
|
|
394
|
+
if (value === undefined || value === null || value === '') return undefined;
|
|
395
|
+
if (typeof value !== 'string') return undefined;
|
|
396
|
+
const trimmed = value.trim();
|
|
397
|
+
if (trimmed === '') return undefined;
|
|
398
|
+
if (!isStage(trimmed)) {
|
|
399
|
+
throw new ValidationError(
|
|
400
|
+
`stage "${trimmed}" is not one of brief|explore|plan|design|verify`,
|
|
401
|
+
'INVALID_STAGE_NAME',
|
|
402
|
+
{ stage: trimmed },
|
|
403
|
+
);
|
|
404
|
+
}
|
|
405
|
+
return trimmed;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
function isStage(s: string): s is Stage {
|
|
409
|
+
return (
|
|
410
|
+
s === 'brief' || s === 'explore' || s === 'plan' || s === 'design' || s === 'verify'
|
|
411
|
+
);
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
function errMessage(err: unknown): string {
|
|
415
|
+
if (err instanceof Error) return err.message;
|
|
416
|
+
return String(err);
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
function renderHumanSummary(result: PipelineResult): string {
|
|
420
|
+
const lines: string[] = [];
|
|
421
|
+
lines.push(`pipeline status: ${result.status}`);
|
|
422
|
+
if (result.halted_at !== undefined) {
|
|
423
|
+
lines.push(`halted at stage: ${result.halted_at}`);
|
|
424
|
+
}
|
|
425
|
+
if (result.gate !== undefined) {
|
|
426
|
+
lines.push(`awaiting gate: ${result.gate.gateName} (stage=${result.gate.stage})`);
|
|
427
|
+
}
|
|
428
|
+
lines.push(
|
|
429
|
+
`total usage: input=${result.total_usage.input_tokens} ` +
|
|
430
|
+
`output=${result.total_usage.output_tokens} cost=$${result.total_usage.usd_cost.toFixed(4)}`,
|
|
431
|
+
);
|
|
432
|
+
lines.push('stage outcomes:');
|
|
433
|
+
for (const outcome of result.outcomes) {
|
|
434
|
+
lines.push(` ${formatOutcome(outcome)}`);
|
|
435
|
+
}
|
|
436
|
+
return lines.join('\n') + '\n';
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
function formatOutcome(outcome: StageOutcome): string {
|
|
440
|
+
const retries = outcome.retries > 0 ? ` retries=${outcome.retries}` : '';
|
|
441
|
+
const blockers =
|
|
442
|
+
outcome.blockers !== undefined && outcome.blockers.length > 0
|
|
443
|
+
? ` blockers=[${outcome.blockers.join('; ')}]`
|
|
444
|
+
: '';
|
|
445
|
+
return `${outcome.stage}: ${outcome.status}${retries}${blockers}`;
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
// ---------------------------------------------------------------------------
|
|
449
|
+
// Dry-run support — Plan 21-11 Task 3.
|
|
450
|
+
//
|
|
451
|
+
// Reads canned SessionResult objects from
|
|
452
|
+
// `<fixtureDir>/expected-outputs/canned-<stage>.json` and writes a
|
|
453
|
+
// stub artifact per stage under `<cwd>/.design/` so downstream
|
|
454
|
+
// assertions can still grep for artifact shape. Transition-stage gate
|
|
455
|
+
// is bypassed with an always-OK override (the dry-run is about shape
|
|
456
|
+
// assertions, not full state-machine gating — that is exercised by
|
|
457
|
+
// the pipeline-runner unit test suite).
|
|
458
|
+
// ---------------------------------------------------------------------------
|
|
459
|
+
|
|
460
|
+
/**
|
|
461
|
+
* Per-stage "pretend LLM output" that the dry-run override writes to
|
|
462
|
+
* `.design/`. Each payload embeds the structural tokens that the Plan
|
|
463
|
+
* 21-11 harness asserts on (`## Tokens`, `## Components`, `Wave`, etc.).
|
|
464
|
+
*/
|
|
465
|
+
const DRY_RUN_ARTIFACTS: Readonly<Record<Stage, readonly { readonly path: string; readonly body: string }[]>> =
|
|
466
|
+
Object.freeze({
|
|
467
|
+
brief: [
|
|
468
|
+
{
|
|
469
|
+
path: '.design/BRIEF.md',
|
|
470
|
+
body: [
|
|
471
|
+
'# Design Brief — dry-run',
|
|
472
|
+
'',
|
|
473
|
+
'**Goal:** Audit design-system consistency; extract tokens; normalize spacing.',
|
|
474
|
+
'',
|
|
475
|
+
'## BRIEF COMPLETE',
|
|
476
|
+
'',
|
|
477
|
+
].join('\n'),
|
|
478
|
+
},
|
|
479
|
+
],
|
|
480
|
+
explore: [
|
|
481
|
+
{
|
|
482
|
+
path: '.design/DESIGN-PATTERNS.md',
|
|
483
|
+
body: [
|
|
484
|
+
'# Design Patterns — dry-run',
|
|
485
|
+
'',
|
|
486
|
+
'## Tokens',
|
|
487
|
+
'- #0066ff (button primary)',
|
|
488
|
+
'- #111 (heading)',
|
|
489
|
+
'- #f5f5f5 (page bg)',
|
|
490
|
+
'- #ffffff (card bg)',
|
|
491
|
+
'',
|
|
492
|
+
'## Components',
|
|
493
|
+
'- Button (2 variants: Start, Continue)',
|
|
494
|
+
'- Card (title + children slot)',
|
|
495
|
+
'',
|
|
496
|
+
'## Accessibility',
|
|
497
|
+
'- Button has no focus-visible ring; recommend outline or ring token.',
|
|
498
|
+
'',
|
|
499
|
+
'## Visual Hierarchy',
|
|
500
|
+
'- h1 28px / h2 inherits; recommend locking to scale token.',
|
|
501
|
+
'',
|
|
502
|
+
'## EXPLORE COMPLETE',
|
|
503
|
+
'',
|
|
504
|
+
].join('\n'),
|
|
505
|
+
},
|
|
506
|
+
],
|
|
507
|
+
plan: [
|
|
508
|
+
{
|
|
509
|
+
path: '.design/DESIGN-PLAN.md',
|
|
510
|
+
body: [
|
|
511
|
+
'# Design Plan — dry-run',
|
|
512
|
+
'',
|
|
513
|
+
'## Wave 1 — Token extraction',
|
|
514
|
+
'Type: refactor',
|
|
515
|
+
'Touches: src/components/Button.tsx, src/components/Card.tsx',
|
|
516
|
+
'Parallel: yes',
|
|
517
|
+
'Acceptance: every hex literal replaced with a CSS custom property.',
|
|
518
|
+
'',
|
|
519
|
+
'## Wave 2 — Spacing normalization',
|
|
520
|
+
'Type: refactor',
|
|
521
|
+
'Touches: src/components/Button.tsx, src/components/Card.tsx, src/App.tsx',
|
|
522
|
+
'Parallel: no',
|
|
523
|
+
'Acceptance: all padding values are multiples of 4px.',
|
|
524
|
+
'',
|
|
525
|
+
'## PLAN COMPLETE',
|
|
526
|
+
'',
|
|
527
|
+
].join('\n'),
|
|
528
|
+
},
|
|
529
|
+
],
|
|
530
|
+
design: [
|
|
531
|
+
{
|
|
532
|
+
path: '.design/DESIGN.md',
|
|
533
|
+
body: [
|
|
534
|
+
'# Design — dry-run',
|
|
535
|
+
'',
|
|
536
|
+
'## Tokens',
|
|
537
|
+
'- --color-primary: #0066ff',
|
|
538
|
+
'- --color-text: #111',
|
|
539
|
+
'- --space-2: 8px',
|
|
540
|
+
'- --space-3: 12px',
|
|
541
|
+
'- --space-4: 16px',
|
|
542
|
+
'',
|
|
543
|
+
'## DESIGN COMPLETE',
|
|
544
|
+
'',
|
|
545
|
+
].join('\n'),
|
|
546
|
+
},
|
|
547
|
+
],
|
|
548
|
+
verify: [
|
|
549
|
+
{
|
|
550
|
+
path: '.design/SUMMARY.md',
|
|
551
|
+
body: [
|
|
552
|
+
'# Summary — dry-run',
|
|
553
|
+
'',
|
|
554
|
+
'- M-01: pass (every hex literal extracted into a token)',
|
|
555
|
+
'- M-02: pass (all padding values are multiples of 4px)',
|
|
556
|
+
'',
|
|
557
|
+
'## VERIFY COMPLETE',
|
|
558
|
+
'',
|
|
559
|
+
].join('\n'),
|
|
560
|
+
},
|
|
561
|
+
],
|
|
562
|
+
});
|
|
563
|
+
|
|
564
|
+
/**
|
|
565
|
+
* Build the RunOverrides bundle that drives --dry-run.
|
|
566
|
+
*
|
|
567
|
+
* Reads and validates each canned-<stage>.json up-front so missing or
|
|
568
|
+
* malformed files fail fast (exit 3) before the pipeline enters stage
|
|
569
|
+
* dispatch.
|
|
570
|
+
*/
|
|
571
|
+
function buildDryRunOverrides(cwd: string, fixtureDir: string): RunOverrides {
|
|
572
|
+
// Pre-load every canned SessionResult so missing/malformed files
|
|
573
|
+
// surface as a single validation error, not partway through a run.
|
|
574
|
+
const canned: Record<Stage, SessionResult> = {
|
|
575
|
+
brief: loadCannedSession(fixtureDir, 'brief'),
|
|
576
|
+
explore: loadCannedSession(fixtureDir, 'explore'),
|
|
577
|
+
plan: loadCannedSession(fixtureDir, 'plan'),
|
|
578
|
+
design: loadCannedSession(fixtureDir, 'design'),
|
|
579
|
+
verify: loadCannedSession(fixtureDir, 'verify'),
|
|
580
|
+
};
|
|
581
|
+
|
|
582
|
+
const runOverride = async (opts: SessionRunnerOptions): Promise<SessionResult> => {
|
|
583
|
+
// `opts.stage` is narrower than the pipeline Stage union in one
|
|
584
|
+
// direction (adds `init` + `custom`). We only ever run pipeline
|
|
585
|
+
// stages under --dry-run, but narrow defensively.
|
|
586
|
+
const stage = opts.stage as Stage;
|
|
587
|
+
writeDryRunArtifacts(cwd, stage);
|
|
588
|
+
// Force zero usage regardless of what the canned JSON says — the
|
|
589
|
+
// contract is "dry-run costs nothing".
|
|
590
|
+
const source = canned[stage];
|
|
591
|
+
return {
|
|
592
|
+
...source,
|
|
593
|
+
usage: { input_tokens: 0, output_tokens: 0, usd_cost: 0 },
|
|
594
|
+
};
|
|
595
|
+
};
|
|
596
|
+
|
|
597
|
+
// Permissive transition shim: always OK, no real STATE.md mutation.
|
|
598
|
+
// The dry-run's purpose is to exercise the run() dispatch + artifact
|
|
599
|
+
// shape, not to re-test gate logic (that's covered by
|
|
600
|
+
// tests/pipeline-runner.test.ts and tests/mcp-gdd-state.test.ts).
|
|
601
|
+
const transitionStageOverride = async () => ({ ok: true as const });
|
|
602
|
+
|
|
603
|
+
return {
|
|
604
|
+
runOverride,
|
|
605
|
+
transitionStageOverride,
|
|
606
|
+
};
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
/** Load and validate one canned-<stage>.json file. Throws ValidationError on miss. */
|
|
610
|
+
function loadCannedSession(fixtureDir: string, stage: Stage): SessionResult {
|
|
611
|
+
const cannedPath = resolvePath(fixtureDir, 'expected-outputs', `canned-${stage}.json`);
|
|
612
|
+
let raw: string;
|
|
613
|
+
try {
|
|
614
|
+
raw = readFileSync(cannedPath, 'utf8');
|
|
615
|
+
} catch (err) {
|
|
616
|
+
throw new ValidationError(
|
|
617
|
+
`--dry-run: cannot read canned session for stage "${stage}" at "${cannedPath}": ${errMessage(err)}`,
|
|
618
|
+
'DRY_RUN_CANNED_MISSING',
|
|
619
|
+
{ stage, path: cannedPath },
|
|
620
|
+
);
|
|
621
|
+
}
|
|
622
|
+
let parsed: unknown;
|
|
623
|
+
try {
|
|
624
|
+
parsed = JSON.parse(raw);
|
|
625
|
+
} catch (err) {
|
|
626
|
+
throw new ValidationError(
|
|
627
|
+
`--dry-run: canned session for stage "${stage}" is not valid JSON: ${errMessage(err)}`,
|
|
628
|
+
'DRY_RUN_CANNED_INVALID',
|
|
629
|
+
{ stage, path: cannedPath },
|
|
630
|
+
);
|
|
631
|
+
}
|
|
632
|
+
if (parsed === null || typeof parsed !== 'object') {
|
|
633
|
+
throw new ValidationError(
|
|
634
|
+
`--dry-run: canned session for stage "${stage}" must be a JSON object`,
|
|
635
|
+
'DRY_RUN_CANNED_INVALID',
|
|
636
|
+
{ stage, path: cannedPath },
|
|
637
|
+
);
|
|
638
|
+
}
|
|
639
|
+
return parsed as SessionResult;
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
/**
|
|
643
|
+
* Write the per-stage dry-run artifacts under `<cwd>/.design/`. Creates
|
|
644
|
+
* parent directories as needed and is idempotent — re-running overwrites.
|
|
645
|
+
*/
|
|
646
|
+
function writeDryRunArtifacts(cwd: string, stage: Stage): void {
|
|
647
|
+
const artifacts = DRY_RUN_ARTIFACTS[stage];
|
|
648
|
+
for (const { path: relPath, body } of artifacts) {
|
|
649
|
+
const abs = joinPath(cwd, relPath);
|
|
650
|
+
const dir = dirnamePath(abs);
|
|
651
|
+
if (!existsSync(dir)) {
|
|
652
|
+
mkdirSync(dir, { recursive: true });
|
|
653
|
+
}
|
|
654
|
+
writeFileSync(abs, body, 'utf8');
|
|
655
|
+
}
|
|
656
|
+
}
|