@hegemonart/get-design-done 1.20.0 → 1.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/.claude-plugin/marketplace.json +9 -12
  2. package/.claude-plugin/plugin.json +8 -31
  3. package/CHANGELOG.md +78 -0
  4. package/README.md +48 -7
  5. package/bin/gdd-sdk +55 -0
  6. package/package.json +15 -47
  7. package/reference/codex-tools.md +53 -0
  8. package/reference/gemini-tools.md +53 -0
  9. package/reference/registry.json +14 -0
  10. package/scripts/e2e/run-headless.ts +514 -0
  11. package/scripts/lib/cli/commands/audit.ts +382 -0
  12. package/scripts/lib/cli/commands/init.ts +217 -0
  13. package/scripts/lib/cli/commands/query.ts +329 -0
  14. package/scripts/lib/cli/commands/run.ts +656 -0
  15. package/scripts/lib/cli/commands/stage.ts +468 -0
  16. package/scripts/lib/cli/index.ts +167 -0
  17. package/scripts/lib/cli/parse-args.ts +336 -0
  18. package/scripts/lib/context-engine/index.ts +116 -0
  19. package/scripts/lib/context-engine/manifest.ts +69 -0
  20. package/scripts/lib/context-engine/truncate.ts +282 -0
  21. package/scripts/lib/context-engine/types.ts +59 -0
  22. package/scripts/lib/discuss-parallel-runner/aggregator.ts +448 -0
  23. package/scripts/lib/discuss-parallel-runner/discussants.ts +430 -0
  24. package/scripts/lib/discuss-parallel-runner/index.ts +223 -0
  25. package/scripts/lib/discuss-parallel-runner/types.ts +184 -0
  26. package/scripts/lib/event-stream/index.ts +11 -1
  27. package/scripts/lib/explore-parallel-runner/index.ts +294 -0
  28. package/scripts/lib/explore-parallel-runner/mappers.ts +290 -0
  29. package/scripts/lib/explore-parallel-runner/synthesizer.ts +295 -0
  30. package/scripts/lib/explore-parallel-runner/types.ts +139 -0
  31. package/scripts/lib/harness/detect.ts +90 -0
  32. package/scripts/lib/harness/index.ts +64 -0
  33. package/scripts/lib/harness/tool-map.ts +142 -0
  34. package/scripts/lib/init-runner/index.ts +396 -0
  35. package/scripts/lib/init-runner/researchers.ts +245 -0
  36. package/scripts/lib/init-runner/scaffold.ts +224 -0
  37. package/scripts/lib/init-runner/synthesizer.ts +224 -0
  38. package/scripts/lib/init-runner/types.ts +143 -0
  39. package/scripts/lib/logger/index.ts +251 -0
  40. package/scripts/lib/logger/sinks.ts +269 -0
  41. package/scripts/lib/logger/types.ts +110 -0
  42. package/scripts/lib/pipeline-runner/human-gate.ts +134 -0
  43. package/scripts/lib/pipeline-runner/index.ts +527 -0
  44. package/scripts/lib/pipeline-runner/stage-handlers.ts +339 -0
  45. package/scripts/lib/pipeline-runner/state-machine.ts +144 -0
  46. package/scripts/lib/pipeline-runner/types.ts +183 -0
  47. package/scripts/lib/session-runner/errors.ts +406 -0
  48. package/scripts/lib/session-runner/index.ts +715 -0
  49. package/scripts/lib/session-runner/transcript.ts +189 -0
  50. package/scripts/lib/session-runner/types.ts +144 -0
  51. package/scripts/lib/tool-scoping/index.ts +219 -0
  52. package/scripts/lib/tool-scoping/parse-agent-tools.ts +207 -0
  53. package/scripts/lib/tool-scoping/stage-scopes.ts +139 -0
  54. package/scripts/lib/tool-scoping/types.ts +77 -0
@@ -0,0 +1,656 @@
1
+ // scripts/lib/cli/commands/run.ts — Plan 21-09 Task 2 (SDK-21),
2
+ // extended by Plan 21-11 Task 3 (dry-run).
3
+ //
4
+ // `gdd-sdk run` — drives the full design pipeline via
5
+ // `pipeline-runner.run()`. Builds a PipelineConfig from CLI flags,
6
+ // loads per-stage prompts (from --prompt-file mapping, from
7
+ // `.design/prompts/<stage>.md`, or embedded defaults), wires the
8
+ // human-gate callback, prints the outcome as JSON or human text, and
9
+ // maps pipeline status to an exit code.
10
+ //
11
+ // Exit codes:
12
+ // * 0 — PipelineStatus === 'completed' or 'stopped-after'.
13
+ // * 1 — PipelineStatus === 'halted'.
14
+ // * 2 — PipelineStatus === 'awaiting-gate'.
15
+ // * 3 — argument / config error (missing prompts, malformed flags).
16
+ //
17
+ // --dry-run (Plan 21-11):
18
+ // Installs a mocked session-runner that reads canned SessionResult
19
+ // objects from `<cwd>/expected-outputs/canned-<stage>.json` plus a
20
+ // permissive transition-stage override. Each mock "session" also
21
+ // writes the stage-appropriate artifact (DESIGN-PATTERNS.md,
22
+ // DESIGN-PLAN.md, DESIGN.md, SUMMARY.md) under `<cwd>/.design/`
23
+ // so callers can assert artifact shape without a real API call.
24
+ // Zero API cost. Intended for CI; the fixture at
25
+ // `test-fixture/headless-e2e/` is the canonical consumer.
26
+
27
+ import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
28
+ import { resolve as resolvePath, join as joinPath, dirname as dirnamePath } from 'node:path';
29
+
30
+ import {
31
+ run as defaultPipelineRun,
32
+ type HumanGateDecision,
33
+ type HumanGateInfo,
34
+ type PipelineConfig,
35
+ type PipelineResult,
36
+ type RunOverrides,
37
+ type Stage,
38
+ type StageOutcome,
39
+ } from '../../pipeline-runner/index.ts';
40
+ import { getLogger } from '../../logger/index.ts';
41
+ import { ValidationError } from '../../gdd-errors/index.ts';
42
+ import type { SessionResult, SessionRunnerOptions } from '../../session-runner/types.ts';
43
+
44
+ import {
45
+ coerceFlags,
46
+ COMMON_FLAGS,
47
+ type FlagSpec,
48
+ type ParsedArgs,
49
+ } from '../parse-args.ts';
50
+
51
+ // ---------------------------------------------------------------------------
52
+ // Flag spec + help text.
53
+ // ---------------------------------------------------------------------------
54
+
55
+ const RUN_FLAGS: readonly FlagSpec[] = [
56
+ ...COMMON_FLAGS,
57
+ { name: 'stages', type: 'string' },
58
+ { name: 'skip', type: 'string' },
59
+ { name: 'resume-from', type: 'string' },
60
+ { name: 'stop-after', type: 'string' },
61
+ { name: 'prompt-file', type: 'string' },
62
+ { name: 'gate-reply', type: 'string' },
63
+ { name: 'dry-run', type: 'boolean', default: false },
64
+ { name: 'fixture', type: 'string' },
65
+ ];
66
+
67
+ const USAGE = `gdd-sdk run [flags]
68
+
69
+ Drive the full design pipeline headlessly.
70
+
71
+ Flags:
72
+ --stages <list> Comma-separated subset: brief,explore,plan,design,verify
73
+ --skip <list> Comma-separated stages to skip
74
+ --resume-from <stage> Start from this stage (inclusive)
75
+ --stop-after <stage> Stop after this stage (inclusive)
76
+ --prompt-file <spec> stage=path pairs; e.g., --prompt-file brief=./prompts/brief.md
77
+ --gate-reply <mode> Canned reply when a human-gate pauses:
78
+ stop — halt with awaiting-gate (default)
79
+ resume[:payload]— resume with optional payload
80
+ --budget-usd <n> Total USD cap (default 10.0)
81
+ --budget-input-tokens Input-token cap (default 200000)
82
+ --budget-output-tokens Output-token cap (default 50000)
83
+ --max-turns <n> Per-stage turn cap (default 40)
84
+ --cwd <dir> Working directory (default: current)
85
+ --log-level <lvl> debug|info|warn|error (default info)
86
+ --json Emit machine-parseable JSON to stdout
87
+ --text Force human-readable output (default)
88
+ --headless / --interactive Override logger auto-mode
89
+ --dry-run Mock mode: read canned SessionResults from
90
+ <fixture>/expected-outputs/canned-<stage>.json
91
+ and write stub artifacts under <cwd>/.design/.
92
+ Zero API cost; used by the E2E fixture test
93
+ harness (test-fixture/headless-e2e/).
94
+ --fixture <dir> Override the fixture root whose
95
+ expected-outputs/ directory supplies canned
96
+ SessionResults. Defaults to --cwd.
97
+
98
+ Exit codes:
99
+ 0 completed / stopped-after
100
+ 1 halted
101
+ 2 awaiting-gate
102
+ 3 arg/config error
103
+ `;
104
+
105
+ // ---------------------------------------------------------------------------
106
+ // Public types.
107
+ // ---------------------------------------------------------------------------
108
+
109
+ /**
110
+ * Test-injection point. Default resolves to `pipeline-runner.run`. Keeps
111
+ * unit tests independent of the real Agent SDK.
112
+ */
113
+ export type PipelineRunFn = typeof defaultPipelineRun;
114
+
115
+ /** Writable streams for deterministic test capture. */
116
+ export interface RunCommandDeps {
117
+ readonly pipelineRun?: PipelineRunFn;
118
+ readonly stdout?: NodeJS.WritableStream;
119
+ readonly stderr?: NodeJS.WritableStream;
120
+ }
121
+
122
+ // ---------------------------------------------------------------------------
123
+ // runCommand — entry point.
124
+ // ---------------------------------------------------------------------------
125
+
126
+ const ALL_STAGES: readonly Stage[] = ['brief', 'explore', 'plan', 'design', 'verify'];
127
+
128
+ /**
129
+ * Entry point for `gdd-sdk run`. Returns the process exit code (never
130
+ * throws). All diagnostic output goes to stderr; result output to stdout.
131
+ */
132
+ export async function runCommand(
133
+ args: ParsedArgs,
134
+ deps: RunCommandDeps = {},
135
+ ): Promise<number> {
136
+ const stdout = deps.stdout ?? process.stdout;
137
+ const stderr = deps.stderr ?? process.stderr;
138
+
139
+ if (args.flags['help'] === true || args.flags['h'] === true) {
140
+ stdout.write(USAGE);
141
+ return 0;
142
+ }
143
+
144
+ let flags: Record<string, unknown>;
145
+ try {
146
+ flags = coerceFlags(args, RUN_FLAGS);
147
+ } catch (err) {
148
+ stderr.write(`gdd-sdk run: ${errMessage(err)}\n`);
149
+ return 3;
150
+ }
151
+
152
+ const cwd: string = typeof flags['cwd'] === 'string' ? (flags['cwd'] as string) : process.cwd();
153
+
154
+ // Resolve stages / skip / resumeFrom / stopAfter.
155
+ let stages: readonly Stage[] | undefined;
156
+ try {
157
+ stages = parseStageList(flags['stages']);
158
+ } catch (err) {
159
+ stderr.write(`gdd-sdk run: ${errMessage(err)}\n`);
160
+ return 3;
161
+ }
162
+ let skipStages: readonly Stage[] | undefined;
163
+ try {
164
+ skipStages = parseStageList(flags['skip']);
165
+ } catch (err) {
166
+ stderr.write(`gdd-sdk run: ${errMessage(err)}\n`);
167
+ return 3;
168
+ }
169
+ const resumeFrom = parseSingleStage(flags['resume-from']);
170
+ const stopAfter = parseSingleStage(flags['stop-after']);
171
+
172
+ // Resolve the stage subset used for prompt loading. Defaults to all 5.
173
+ const effectiveStages: readonly Stage[] = stages ?? ALL_STAGES;
174
+
175
+ // Load prompts.
176
+ let prompts: Record<Stage, string>;
177
+ try {
178
+ prompts = loadPrompts(effectiveStages, flags, cwd);
179
+ } catch (err) {
180
+ stderr.write(`gdd-sdk run: ${errMessage(err)}\n`);
181
+ return 3;
182
+ }
183
+
184
+ // Build budget.
185
+ const budget = {
186
+ usdLimit: typeof flags['budget-usd'] === 'number' ? (flags['budget-usd'] as number) : 10.0,
187
+ inputTokensLimit:
188
+ typeof flags['budget-input-tokens'] === 'number'
189
+ ? (flags['budget-input-tokens'] as number)
190
+ : 200_000,
191
+ outputTokensLimit:
192
+ typeof flags['budget-output-tokens'] === 'number'
193
+ ? (flags['budget-output-tokens'] as number)
194
+ : 50_000,
195
+ perStage: true as const,
196
+ };
197
+
198
+ const maxTurnsPerStage: number =
199
+ typeof flags['max-turns'] === 'number' ? (flags['max-turns'] as number) : 40;
200
+
201
+ // Human-gate callback — default STOP (exit code 2); optional canned
202
+ // `--gate-reply resume[:payload]` lets tests / operators pre-seed a
203
+ // decision without an interactive prompt.
204
+ const gateReply: string | undefined =
205
+ typeof flags['gate-reply'] === 'string' ? (flags['gate-reply'] as string) : undefined;
206
+ const onHumanGate: (info: HumanGateInfo) => Promise<HumanGateDecision> = async (info) => {
207
+ // Always surface gate info to stderr so operators see it even in
208
+ // --json mode (where stdout carries the result JSON).
209
+ stderr.write(
210
+ `gdd-sdk run: human gate "${info.gateName}" at stage "${info.stage}"\n`,
211
+ );
212
+ if (gateReply === undefined) return { decision: 'stop' };
213
+ if (gateReply === 'stop') return { decision: 'stop' };
214
+ if (gateReply === 'resume') return { decision: 'resume' };
215
+ if (gateReply.startsWith('resume:')) {
216
+ return { decision: 'resume', payload: gateReply.slice('resume:'.length) };
217
+ }
218
+ stderr.write(
219
+ `gdd-sdk run: unrecognized --gate-reply "${gateReply}"; defaulting to stop\n`,
220
+ );
221
+ return { decision: 'stop' };
222
+ };
223
+
224
+ const config: PipelineConfig = {
225
+ prompts,
226
+ budget,
227
+ maxTurnsPerStage,
228
+ stageRetries: 1,
229
+ ...(stages !== undefined ? { stages } : {}),
230
+ ...(skipStages !== undefined ? { skipStages } : {}),
231
+ ...(resumeFrom !== undefined ? { resumeFrom } : {}),
232
+ ...(stopAfter !== undefined ? { stopAfter } : {}),
233
+ cwd,
234
+ onHumanGate,
235
+ };
236
+
237
+ const pipelineRun: PipelineRunFn = deps.pipelineRun ?? defaultPipelineRun;
238
+
239
+ // Plan 21-11: --dry-run installs canned session overrides + a
240
+ // permissive transition shim. Artifacts are written to disk by the
241
+ // override so assertions can still check artifact shape.
242
+ let overrides: RunOverrides = {};
243
+ if (flags['dry-run'] === true) {
244
+ const fixtureDir: string =
245
+ typeof flags['fixture'] === 'string' && (flags['fixture'] as string).length > 0
246
+ ? resolvePath(process.cwd(), flags['fixture'] as string)
247
+ : cwd;
248
+ try {
249
+ overrides = buildDryRunOverrides(cwd, fixtureDir);
250
+ } catch (err) {
251
+ stderr.write(`gdd-sdk run: ${errMessage(err)}\n`);
252
+ return 3;
253
+ }
254
+ try {
255
+ getLogger().info('cli.run.dry_run_enabled', {
256
+ fixture: fixtureDir,
257
+ cwd,
258
+ });
259
+ } catch {
260
+ // Swallow logger failures.
261
+ }
262
+ }
263
+
264
+ let result: PipelineResult;
265
+ try {
266
+ result = await pipelineRun(config, overrides);
267
+ } catch (err) {
268
+ // pipeline-runner is contracted never to throw, but belt-and-braces:
269
+ // surface the error as exit 3 rather than crashing.
270
+ try {
271
+ getLogger().error('cli.run.unexpected_error', {
272
+ error: err instanceof Error ? err.message : String(err),
273
+ });
274
+ } catch {
275
+ // Swallow logger failures.
276
+ }
277
+ stderr.write(`gdd-sdk run: unexpected error: ${errMessage(err)}\n`);
278
+ return 3;
279
+ }
280
+
281
+ // Output.
282
+ if (flags['json'] === true) {
283
+ stdout.write(JSON.stringify(result, null, 2) + '\n');
284
+ } else {
285
+ stdout.write(renderHumanSummary(result));
286
+ }
287
+
288
+ // Exit code mapping.
289
+ if (result.status === 'completed' || result.status === 'stopped-after') return 0;
290
+ if (result.status === 'awaiting-gate') return 2;
291
+ return 1;
292
+ }
293
+
294
+ // ---------------------------------------------------------------------------
295
+ // Helpers.
296
+ // ---------------------------------------------------------------------------
297
+
298
+ /** Default embedded prompt body when no file is supplied. */
299
+ const DEFAULT_PROMPTS: Readonly<Record<Stage, string>> = Object.freeze({
300
+ brief: 'Draft the design brief. Follow SKILL.md for the stage.',
301
+ explore: 'Run the explore-stage mappers and synthesize DESIGN-PATTERNS.md.',
302
+ plan: 'Plan the design changes. Produce locked decisions + must-haves.',
303
+ design: 'Implement design-stage deliverables per plan.',
304
+ verify: 'Verify design deliverables; close must-haves; probe regressions.',
305
+ });
306
+
307
+ function loadPrompts(
308
+ stages: readonly Stage[],
309
+ flags: Record<string, unknown>,
310
+ cwd: string,
311
+ ): Record<Stage, string> {
312
+ // Start with defaults, then layer in per-stage file paths (convention),
313
+ // then layer in explicit `--prompt-file stage=path` mappings. Later
314
+ // sources override earlier ones.
315
+ const prompts: Record<Stage, string> = {
316
+ brief: DEFAULT_PROMPTS.brief,
317
+ explore: DEFAULT_PROMPTS.explore,
318
+ plan: DEFAULT_PROMPTS.plan,
319
+ design: DEFAULT_PROMPTS.design,
320
+ verify: DEFAULT_PROMPTS.verify,
321
+ };
322
+
323
+ // Convention: `.design/prompts/<stage>.md` (load if file readable).
324
+ for (const stage of stages) {
325
+ const p = resolvePath(cwd, '.design/prompts', `${stage}.md`);
326
+ try {
327
+ prompts[stage] = readFileSync(p, 'utf8');
328
+ } catch {
329
+ // Best-effort: falls back to default.
330
+ }
331
+ }
332
+
333
+ // Explicit --prompt-file mapping. Support comma-separated pairs plus
334
+ // repeated --prompt-file usage (coerceFlags last-write-wins collapses
335
+ // repeats, so for tests we also accept a semicolon-separated list).
336
+ const rawMapping = flags['prompt-file'];
337
+ if (typeof rawMapping === 'string' && rawMapping.length > 0) {
338
+ // Split by `;` or `,` to support multiple pairs; a single `stage=path`
339
+ // parses as one entry.
340
+ const parts = rawMapping.split(/[,;]/).map((s) => s.trim()).filter(Boolean);
341
+ for (const pair of parts) {
342
+ const eq = pair.indexOf('=');
343
+ if (eq < 0) {
344
+ throw new ValidationError(
345
+ `--prompt-file expected stage=path, got "${pair}"`,
346
+ 'BAD_PROMPT_FILE_SPEC',
347
+ { spec: pair },
348
+ );
349
+ }
350
+ const stageName = pair.slice(0, eq).trim();
351
+ const filePath = pair.slice(eq + 1).trim();
352
+ if (!isStage(stageName)) {
353
+ throw new ValidationError(
354
+ `--prompt-file stage "${stageName}" is not one of brief|explore|plan|design|verify`,
355
+ 'BAD_PROMPT_FILE_STAGE',
356
+ { stageName },
357
+ );
358
+ }
359
+ const absPath = resolvePath(cwd, filePath);
360
+ try {
361
+ prompts[stageName] = readFileSync(absPath, 'utf8');
362
+ } catch (err) {
363
+ throw new ValidationError(
364
+ `--prompt-file ${stageName}: cannot read "${filePath}": ${errMessage(err)}`,
365
+ 'PROMPT_FILE_READ_ERROR',
366
+ { stage: stageName, path: absPath },
367
+ );
368
+ }
369
+ }
370
+ }
371
+
372
+ return prompts;
373
+ }
374
+
375
+ function parseStageList(value: unknown): readonly Stage[] | undefined {
376
+ if (value === undefined || value === null || value === '') return undefined;
377
+ if (typeof value !== 'string') return undefined;
378
+ const parts = value.split(',').map((s) => s.trim()).filter(Boolean);
379
+ const stages: Stage[] = [];
380
+ for (const p of parts) {
381
+ if (!isStage(p)) {
382
+ throw new ValidationError(
383
+ `stage "${p}" is not one of brief|explore|plan|design|verify`,
384
+ 'INVALID_STAGE_NAME',
385
+ { stage: p },
386
+ );
387
+ }
388
+ stages.push(p);
389
+ }
390
+ return stages.length === 0 ? undefined : stages;
391
+ }
392
+
393
+ function parseSingleStage(value: unknown): Stage | undefined {
394
+ if (value === undefined || value === null || value === '') return undefined;
395
+ if (typeof value !== 'string') return undefined;
396
+ const trimmed = value.trim();
397
+ if (trimmed === '') return undefined;
398
+ if (!isStage(trimmed)) {
399
+ throw new ValidationError(
400
+ `stage "${trimmed}" is not one of brief|explore|plan|design|verify`,
401
+ 'INVALID_STAGE_NAME',
402
+ { stage: trimmed },
403
+ );
404
+ }
405
+ return trimmed;
406
+ }
407
+
408
+ function isStage(s: string): s is Stage {
409
+ return (
410
+ s === 'brief' || s === 'explore' || s === 'plan' || s === 'design' || s === 'verify'
411
+ );
412
+ }
413
+
414
+ function errMessage(err: unknown): string {
415
+ if (err instanceof Error) return err.message;
416
+ return String(err);
417
+ }
418
+
419
+ function renderHumanSummary(result: PipelineResult): string {
420
+ const lines: string[] = [];
421
+ lines.push(`pipeline status: ${result.status}`);
422
+ if (result.halted_at !== undefined) {
423
+ lines.push(`halted at stage: ${result.halted_at}`);
424
+ }
425
+ if (result.gate !== undefined) {
426
+ lines.push(`awaiting gate: ${result.gate.gateName} (stage=${result.gate.stage})`);
427
+ }
428
+ lines.push(
429
+ `total usage: input=${result.total_usage.input_tokens} ` +
430
+ `output=${result.total_usage.output_tokens} cost=$${result.total_usage.usd_cost.toFixed(4)}`,
431
+ );
432
+ lines.push('stage outcomes:');
433
+ for (const outcome of result.outcomes) {
434
+ lines.push(` ${formatOutcome(outcome)}`);
435
+ }
436
+ return lines.join('\n') + '\n';
437
+ }
438
+
439
+ function formatOutcome(outcome: StageOutcome): string {
440
+ const retries = outcome.retries > 0 ? ` retries=${outcome.retries}` : '';
441
+ const blockers =
442
+ outcome.blockers !== undefined && outcome.blockers.length > 0
443
+ ? ` blockers=[${outcome.blockers.join('; ')}]`
444
+ : '';
445
+ return `${outcome.stage}: ${outcome.status}${retries}${blockers}`;
446
+ }
447
+
448
+ // ---------------------------------------------------------------------------
449
+ // Dry-run support — Plan 21-11 Task 3.
450
+ //
451
+ // Reads canned SessionResult objects from
452
+ // `<fixtureDir>/expected-outputs/canned-<stage>.json` and writes a
453
+ // stub artifact per stage under `<cwd>/.design/` so downstream
454
+ // assertions can still grep for artifact shape. Transition-stage gate
455
+ // is bypassed with an always-OK override (the dry-run is about shape
456
+ // assertions, not full state-machine gating — that is exercised by
457
+ // the pipeline-runner unit test suite).
458
+ // ---------------------------------------------------------------------------
459
+
460
+ /**
461
+ * Per-stage "pretend LLM output" that the dry-run override writes to
462
+ * `.design/`. Each payload embeds the structural tokens that the Plan
463
+ * 21-11 harness asserts on (`## Tokens`, `## Components`, `Wave`, etc.).
464
+ */
465
+ const DRY_RUN_ARTIFACTS: Readonly<Record<Stage, readonly { readonly path: string; readonly body: string }[]>> =
466
+ Object.freeze({
467
+ brief: [
468
+ {
469
+ path: '.design/BRIEF.md',
470
+ body: [
471
+ '# Design Brief — dry-run',
472
+ '',
473
+ '**Goal:** Audit design-system consistency; extract tokens; normalize spacing.',
474
+ '',
475
+ '## BRIEF COMPLETE',
476
+ '',
477
+ ].join('\n'),
478
+ },
479
+ ],
480
+ explore: [
481
+ {
482
+ path: '.design/DESIGN-PATTERNS.md',
483
+ body: [
484
+ '# Design Patterns — dry-run',
485
+ '',
486
+ '## Tokens',
487
+ '- #0066ff (button primary)',
488
+ '- #111 (heading)',
489
+ '- #f5f5f5 (page bg)',
490
+ '- #ffffff (card bg)',
491
+ '',
492
+ '## Components',
493
+ '- Button (2 variants: Start, Continue)',
494
+ '- Card (title + children slot)',
495
+ '',
496
+ '## Accessibility',
497
+ '- Button has no focus-visible ring; recommend outline or ring token.',
498
+ '',
499
+ '## Visual Hierarchy',
500
+ '- h1 28px / h2 inherits; recommend locking to scale token.',
501
+ '',
502
+ '## EXPLORE COMPLETE',
503
+ '',
504
+ ].join('\n'),
505
+ },
506
+ ],
507
+ plan: [
508
+ {
509
+ path: '.design/DESIGN-PLAN.md',
510
+ body: [
511
+ '# Design Plan — dry-run',
512
+ '',
513
+ '## Wave 1 — Token extraction',
514
+ 'Type: refactor',
515
+ 'Touches: src/components/Button.tsx, src/components/Card.tsx',
516
+ 'Parallel: yes',
517
+ 'Acceptance: every hex literal replaced with a CSS custom property.',
518
+ '',
519
+ '## Wave 2 — Spacing normalization',
520
+ 'Type: refactor',
521
+ 'Touches: src/components/Button.tsx, src/components/Card.tsx, src/App.tsx',
522
+ 'Parallel: no',
523
+ 'Acceptance: all padding values are multiples of 4px.',
524
+ '',
525
+ '## PLAN COMPLETE',
526
+ '',
527
+ ].join('\n'),
528
+ },
529
+ ],
530
+ design: [
531
+ {
532
+ path: '.design/DESIGN.md',
533
+ body: [
534
+ '# Design — dry-run',
535
+ '',
536
+ '## Tokens',
537
+ '- --color-primary: #0066ff',
538
+ '- --color-text: #111',
539
+ '- --space-2: 8px',
540
+ '- --space-3: 12px',
541
+ '- --space-4: 16px',
542
+ '',
543
+ '## DESIGN COMPLETE',
544
+ '',
545
+ ].join('\n'),
546
+ },
547
+ ],
548
+ verify: [
549
+ {
550
+ path: '.design/SUMMARY.md',
551
+ body: [
552
+ '# Summary — dry-run',
553
+ '',
554
+ '- M-01: pass (every hex literal extracted into a token)',
555
+ '- M-02: pass (all padding values are multiples of 4px)',
556
+ '',
557
+ '## VERIFY COMPLETE',
558
+ '',
559
+ ].join('\n'),
560
+ },
561
+ ],
562
+ });
563
+
564
+ /**
565
+ * Build the RunOverrides bundle that drives --dry-run.
566
+ *
567
+ * Reads and validates each canned-<stage>.json up-front so missing or
568
+ * malformed files fail fast (exit 3) before the pipeline enters stage
569
+ * dispatch.
570
+ */
571
+ function buildDryRunOverrides(cwd: string, fixtureDir: string): RunOverrides {
572
+ // Pre-load every canned SessionResult so missing/malformed files
573
+ // surface as a single validation error, not partway through a run.
574
+ const canned: Record<Stage, SessionResult> = {
575
+ brief: loadCannedSession(fixtureDir, 'brief'),
576
+ explore: loadCannedSession(fixtureDir, 'explore'),
577
+ plan: loadCannedSession(fixtureDir, 'plan'),
578
+ design: loadCannedSession(fixtureDir, 'design'),
579
+ verify: loadCannedSession(fixtureDir, 'verify'),
580
+ };
581
+
582
+ const runOverride = async (opts: SessionRunnerOptions): Promise<SessionResult> => {
583
+ // `opts.stage` is narrower than the pipeline Stage union in one
584
+ // direction (adds `init` + `custom`). We only ever run pipeline
585
+ // stages under --dry-run, but narrow defensively.
586
+ const stage = opts.stage as Stage;
587
+ writeDryRunArtifacts(cwd, stage);
588
+ // Force zero usage regardless of what the canned JSON says — the
589
+ // contract is "dry-run costs nothing".
590
+ const source = canned[stage];
591
+ return {
592
+ ...source,
593
+ usage: { input_tokens: 0, output_tokens: 0, usd_cost: 0 },
594
+ };
595
+ };
596
+
597
+ // Permissive transition shim: always OK, no real STATE.md mutation.
598
+ // The dry-run's purpose is to exercise the run() dispatch + artifact
599
+ // shape, not to re-test gate logic (that's covered by
600
+ // tests/pipeline-runner.test.ts and tests/mcp-gdd-state.test.ts).
601
+ const transitionStageOverride = async () => ({ ok: true as const });
602
+
603
+ return {
604
+ runOverride,
605
+ transitionStageOverride,
606
+ };
607
+ }
608
+
609
+ /** Load and validate one canned-<stage>.json file. Throws ValidationError on miss. */
610
+ function loadCannedSession(fixtureDir: string, stage: Stage): SessionResult {
611
+ const cannedPath = resolvePath(fixtureDir, 'expected-outputs', `canned-${stage}.json`);
612
+ let raw: string;
613
+ try {
614
+ raw = readFileSync(cannedPath, 'utf8');
615
+ } catch (err) {
616
+ throw new ValidationError(
617
+ `--dry-run: cannot read canned session for stage "${stage}" at "${cannedPath}": ${errMessage(err)}`,
618
+ 'DRY_RUN_CANNED_MISSING',
619
+ { stage, path: cannedPath },
620
+ );
621
+ }
622
+ let parsed: unknown;
623
+ try {
624
+ parsed = JSON.parse(raw);
625
+ } catch (err) {
626
+ throw new ValidationError(
627
+ `--dry-run: canned session for stage "${stage}" is not valid JSON: ${errMessage(err)}`,
628
+ 'DRY_RUN_CANNED_INVALID',
629
+ { stage, path: cannedPath },
630
+ );
631
+ }
632
+ if (parsed === null || typeof parsed !== 'object') {
633
+ throw new ValidationError(
634
+ `--dry-run: canned session for stage "${stage}" must be a JSON object`,
635
+ 'DRY_RUN_CANNED_INVALID',
636
+ { stage, path: cannedPath },
637
+ );
638
+ }
639
+ return parsed as SessionResult;
640
+ }
641
+
642
+ /**
643
+ * Write the per-stage dry-run artifacts under `<cwd>/.design/`. Creates
644
+ * parent directories as needed and is idempotent — re-running overwrites.
645
+ */
646
+ function writeDryRunArtifacts(cwd: string, stage: Stage): void {
647
+ const artifacts = DRY_RUN_ARTIFACTS[stage];
648
+ for (const { path: relPath, body } of artifacts) {
649
+ const abs = joinPath(cwd, relPath);
650
+ const dir = dirnamePath(abs);
651
+ if (!existsSync(dir)) {
652
+ mkdirSync(dir, { recursive: true });
653
+ }
654
+ writeFileSync(abs, body, 'utf8');
655
+ }
656
+ }