@hegemonart/get-design-done 1.20.0 → 1.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/.claude-plugin/marketplace.json +9 -12
  2. package/.claude-plugin/plugin.json +8 -31
  3. package/CHANGELOG.md +200 -0
  4. package/README.md +48 -7
  5. package/bin/gdd-sdk +55 -0
  6. package/hooks/_hook-emit.js +81 -0
  7. package/hooks/gdd-bash-guard.js +8 -0
  8. package/hooks/gdd-decision-injector.js +2 -0
  9. package/hooks/gdd-protected-paths.js +8 -0
  10. package/hooks/gdd-trajectory-capture.js +64 -0
  11. package/hooks/hooks.json +9 -0
  12. package/package.json +19 -47
  13. package/reference/codex-tools.md +53 -0
  14. package/reference/gemini-tools.md +53 -0
  15. package/reference/registry.json +14 -0
  16. package/scripts/cli/gdd-events.mjs +283 -0
  17. package/scripts/e2e/run-headless.ts +514 -0
  18. package/scripts/lib/cli/commands/audit.ts +382 -0
  19. package/scripts/lib/cli/commands/init.ts +217 -0
  20. package/scripts/lib/cli/commands/query.ts +329 -0
  21. package/scripts/lib/cli/commands/run.ts +656 -0
  22. package/scripts/lib/cli/commands/stage.ts +468 -0
  23. package/scripts/lib/cli/index.ts +167 -0
  24. package/scripts/lib/cli/parse-args.ts +336 -0
  25. package/scripts/lib/connection-probe/index.cjs +263 -0
  26. package/scripts/lib/context-engine/index.ts +116 -0
  27. package/scripts/lib/context-engine/manifest.ts +69 -0
  28. package/scripts/lib/context-engine/truncate.ts +282 -0
  29. package/scripts/lib/context-engine/types.ts +59 -0
  30. package/scripts/lib/discuss-parallel-runner/aggregator.ts +448 -0
  31. package/scripts/lib/discuss-parallel-runner/discussants.ts +430 -0
  32. package/scripts/lib/discuss-parallel-runner/index.ts +223 -0
  33. package/scripts/lib/discuss-parallel-runner/types.ts +184 -0
  34. package/scripts/lib/event-chain.cjs +177 -0
  35. package/scripts/lib/event-stream/index.ts +31 -1
  36. package/scripts/lib/event-stream/reader.ts +139 -0
  37. package/scripts/lib/event-stream/types.ts +155 -1
  38. package/scripts/lib/event-stream/writer.ts +65 -8
  39. package/scripts/lib/explore-parallel-runner/index.ts +294 -0
  40. package/scripts/lib/explore-parallel-runner/mappers.ts +290 -0
  41. package/scripts/lib/explore-parallel-runner/synthesizer.ts +295 -0
  42. package/scripts/lib/explore-parallel-runner/types.ts +139 -0
  43. package/scripts/lib/harness/detect.ts +90 -0
  44. package/scripts/lib/harness/index.ts +64 -0
  45. package/scripts/lib/harness/tool-map.ts +142 -0
  46. package/scripts/lib/init-runner/index.ts +396 -0
  47. package/scripts/lib/init-runner/researchers.ts +245 -0
  48. package/scripts/lib/init-runner/scaffold.ts +224 -0
  49. package/scripts/lib/init-runner/synthesizer.ts +224 -0
  50. package/scripts/lib/init-runner/types.ts +143 -0
  51. package/scripts/lib/logger/index.ts +251 -0
  52. package/scripts/lib/logger/sinks.ts +269 -0
  53. package/scripts/lib/logger/types.ts +110 -0
  54. package/scripts/lib/pipeline-runner/human-gate.ts +134 -0
  55. package/scripts/lib/pipeline-runner/index.ts +527 -0
  56. package/scripts/lib/pipeline-runner/stage-handlers.ts +339 -0
  57. package/scripts/lib/pipeline-runner/state-machine.ts +144 -0
  58. package/scripts/lib/pipeline-runner/types.ts +183 -0
  59. package/scripts/lib/redact.cjs +122 -0
  60. package/scripts/lib/session-runner/errors.ts +406 -0
  61. package/scripts/lib/session-runner/index.ts +715 -0
  62. package/scripts/lib/session-runner/transcript.ts +189 -0
  63. package/scripts/lib/session-runner/types.ts +144 -0
  64. package/scripts/lib/tool-scoping/index.ts +219 -0
  65. package/scripts/lib/tool-scoping/parse-agent-tools.ts +207 -0
  66. package/scripts/lib/tool-scoping/stage-scopes.ts +139 -0
  67. package/scripts/lib/tool-scoping/types.ts +77 -0
  68. package/scripts/lib/trajectory/index.cjs +126 -0
  69. package/scripts/lib/transports/ws.cjs +179 -0
@@ -0,0 +1,339 @@
1
+ // scripts/lib/pipeline-runner/stage-handlers.ts — Plan 21-05 Task 3.
2
+ //
3
+ // Invokes a single pipeline stage. Wires together:
4
+ // * context-engine → builds the per-stage file bundle + renders it
5
+ // * tool-scoping → enforces the allowed-tools set
6
+ // * session-runner → runs the headless Agent SDK session
7
+ //
8
+ // Stage-level retry-once is implemented here via recursion on a
9
+ // `retries` budget. Test harnesses inject mocks via the override args.
10
+ //
11
+ // Mapping from `SessionResult.status` → `StageOutcome.status`:
12
+ // completed → completed (OR halted-human-gate if AWAIT_USER_GATE)
13
+ // budget_exceeded → halted-budget
14
+ // turn_cap_exceeded → halted-turn-cap
15
+ // aborted → halted-error (external cancel)
16
+ // error + retryable → recurse with retries - 1
17
+ // error otherwise → halted-error
18
+
19
+ import type { Stage, PipelineConfig, StageOutcome, HumanGateInfo } from './types.ts';
20
+ import type {
21
+ SessionResult,
22
+ SessionRunnerOptions,
23
+ } from '../session-runner/types.ts';
24
+ import type { ContextBundle, Stage as ContextStage } from '../context-engine/types.ts';
25
+ import {
26
+ buildContextBundle as defaultBuildBundle,
27
+ renderBundle,
28
+ } from '../context-engine/index.ts';
29
+ import {
30
+ enforceScope,
31
+ parseAgentToolsByName,
32
+ type Stage as ScopeStage,
33
+ } from '../tool-scoping/index.ts';
34
+ import { run as defaultRun } from '../session-runner/index.ts';
35
+ import { extractGateMarker } from './human-gate.ts';
36
+
37
+ /**
38
+ * Test-injection overrides for `invokeStage`. Every override is
39
+ * optional; omitted overrides fall through to the real module.
40
+ */
41
+ export interface InvokeStageOverrides {
42
+ /** Override session-runner.run — defaults to the real `run`. */
43
+ readonly runOverride?: (opts: SessionRunnerOptions) => Promise<SessionResult>;
44
+ /** Override context-engine.buildContextBundle — defaults to the real builder. */
45
+ readonly bundleOverride?: (stage: Stage, cwd?: string) => ContextBundle;
46
+ /**
47
+ * Override tool-scoping.enforceScope — defaults to real enforcement.
48
+ * Returns the final `allowedTools` list.
49
+ */
50
+ readonly scopeOverride?: (stage: Stage, agentPath?: string) => readonly string[];
51
+ }
52
+
53
+ export interface InvokeStageArgs extends InvokeStageOverrides {
54
+ readonly stage: Stage;
55
+ readonly config: PipelineConfig;
56
+ /** Remaining retry budget (0 or 1). */
57
+ readonly retries: 0 | 1;
58
+ /** Attempts already consumed (test hook; default 0). */
59
+ readonly _retriesConsumed?: number;
60
+ /**
61
+ * Optional prompt suffix to append (used by the driver to inject
62
+ * human-gate resume payloads into a retry).
63
+ */
64
+ readonly _promptSuffix?: string;
65
+ }
66
+
67
+ /**
68
+ * Build the sanitized context prompt by rendering the context-engine
69
+ * bundle and appending the stage's configured prompt.
70
+ */
71
+ function buildStagePrompt(args: {
72
+ stage: Stage;
73
+ config: PipelineConfig;
74
+ bundle: ContextBundle;
75
+ promptSuffix?: string;
76
+ }): string {
77
+ const rendered: string = renderBundle(args.bundle);
78
+ const stagePrompt: string = args.config.prompts[args.stage];
79
+ const parts: string[] = [rendered, '\n\n---\n\n', stagePrompt];
80
+ if (args.promptSuffix !== undefined && args.promptSuffix !== '') {
81
+ parts.push('\n\n---\n\n', args.promptSuffix);
82
+ }
83
+ return parts.join('');
84
+ }
85
+
86
+ /**
87
+ * Map a session-runner `SessionResult` onto the stage-level status.
88
+ * Human-gate detection runs on `final_text` only when the session
89
+ * itself completed cleanly — a failed session cannot also be gated.
90
+ */
91
+ function mapSessionStatus(
92
+ session: SessionResult,
93
+ ): { status: StageOutcome['status']; gate?: { name: string; stdoutTail: string } } {
94
+ switch (session.status) {
95
+ case 'completed': {
96
+ const marker = extractGateMarker(session.final_text ?? '');
97
+ if (marker !== null) {
98
+ return {
99
+ status: 'halted-human-gate',
100
+ gate: {
101
+ name: marker.name,
102
+ stdoutTail: session.final_text ?? '',
103
+ },
104
+ };
105
+ }
106
+ return { status: 'completed' };
107
+ }
108
+ case 'budget_exceeded':
109
+ return { status: 'halted-budget' };
110
+ case 'turn_cap_exceeded':
111
+ return { status: 'halted-turn-cap' };
112
+ case 'aborted':
113
+ return { status: 'halted-error' };
114
+ case 'error':
115
+ default:
116
+ return { status: 'halted-error' };
117
+ }
118
+ }
119
+
120
+ /**
121
+ * Determine whether the stage-level retry budget permits a second
122
+ * attempt. Session-runner owns its own transport-level retry; the
123
+ * STAGE retry fires only when the session returns `status: 'error'`
124
+ * AND the mapped SDK error is `retryable`.
125
+ */
126
+ function isRetryableStageError(session: SessionResult): boolean {
127
+ if (session.status !== 'error') return false;
128
+ if (session.error === undefined) return false;
129
+ // session-runner's `mapSdkError` already stamped the GDDError kind
130
+ // into session.error.kind. StateConflictError maps to retryable
131
+ // (rate-limited, overloaded, network-transient). OperationFailedError
132
+ // may or may not be retryable; we gate on the error code.
133
+ const kind = session.error.kind;
134
+ if (kind === 'state_conflict') return true;
135
+ // For operation_failed we consult the code explicitly — NETWORK_TRANSIENT
136
+ // and API_ERROR are retryable, everything else isn't.
137
+ if (kind === 'operation_failed') {
138
+ const code = session.error.code;
139
+ return code === 'NETWORK_TRANSIENT' || code === 'API_ERROR';
140
+ }
141
+ return false;
142
+ }
143
+
144
+ /**
145
+ * Cast the pipeline `Stage` into the context-engine's `Stage` union
146
+ * (which includes `init`). The pipeline stages are a strict subset of
147
+ * the context-engine's stages, so this cast is safe.
148
+ */
149
+ function toContextStage(stage: Stage): ContextStage {
150
+ return stage as ContextStage;
151
+ }
152
+
153
+ /**
154
+ * Cast the pipeline `Stage` into the tool-scoping `Stage` union
155
+ * (which also includes `init` + `custom`).
156
+ */
157
+ function toScopeStage(stage: Stage): ScopeStage {
158
+ return stage as ScopeStage;
159
+ }
160
+
161
+ /**
162
+ * Invoke one pipeline stage. Returns a StageOutcome describing what
163
+ * happened, including timing + retry count.
164
+ *
165
+ * Never throws — all failure modes surface via `outcome.status` plus
166
+ * (when relevant) `outcome.session.error`.
167
+ */
168
+ export async function invokeStage(args: InvokeStageArgs): Promise<StageOutcome> {
169
+ const started_at: string = new Date().toISOString();
170
+ const retriesConsumed: number = args._retriesConsumed ?? 0;
171
+
172
+ // -- 1. Build the context bundle for this stage. ------------------------
173
+ let bundle: ContextBundle;
174
+ try {
175
+ if (args.bundleOverride !== undefined) {
176
+ bundle = args.bundleOverride(args.stage, args.config.cwd);
177
+ } else {
178
+ bundle = defaultBuildBundle(toContextStage(args.stage), {
179
+ ...(args.config.cwd !== undefined ? { cwd: args.config.cwd } : {}),
180
+ });
181
+ }
182
+ } catch (err) {
183
+ return {
184
+ stage: args.stage,
185
+ status: 'halted-error',
186
+ started_at,
187
+ ended_at: new Date().toISOString(),
188
+ retries: retriesConsumed,
189
+ session: makeErrorSession(err, 'bundle_build_failed'),
190
+ };
191
+ }
192
+
193
+ // -- 2. Resolve the agent frontmatter override + tool scope. ------------
194
+ let allowedTools: readonly string[];
195
+ try {
196
+ if (args.scopeOverride !== undefined) {
197
+ const agentPath = args.config.agentsByStage?.[args.stage];
198
+ if (agentPath !== undefined) {
199
+ allowedTools = args.scopeOverride(args.stage, agentPath);
200
+ } else {
201
+ allowedTools = args.scopeOverride(args.stage);
202
+ }
203
+ } else {
204
+ const agentPath = args.config.agentsByStage?.[args.stage];
205
+ const agentTools: readonly string[] | null =
206
+ agentPath !== undefined ? parseAgentToolsByName(agentPath) : null;
207
+ allowedTools = enforceScope({
208
+ stage: toScopeStage(args.stage),
209
+ ...(agentTools !== null ? { agentTools } : {}),
210
+ });
211
+ }
212
+ } catch (err) {
213
+ return {
214
+ stage: args.stage,
215
+ status: 'halted-error',
216
+ started_at,
217
+ ended_at: new Date().toISOString(),
218
+ retries: retriesConsumed,
219
+ session: makeErrorSession(err, 'scope_resolution_failed'),
220
+ };
221
+ }
222
+
223
+ // -- 3. Compose the session-runner options. -----------------------------
224
+ const prompt: string = buildStagePrompt({
225
+ stage: args.stage,
226
+ config: args.config,
227
+ bundle,
228
+ ...(args._promptSuffix !== undefined ? { promptSuffix: args._promptSuffix } : {}),
229
+ });
230
+
231
+ const systemPrompt: string | undefined = args.config.systemPrompts?.[args.stage];
232
+
233
+ const runOpts: SessionRunnerOptions = {
234
+ prompt,
235
+ ...(systemPrompt !== undefined ? { systemPrompt } : {}),
236
+ allowedTools: [...allowedTools],
237
+ budget: {
238
+ usdLimit: args.config.budget.usdLimit,
239
+ inputTokensLimit: args.config.budget.inputTokensLimit,
240
+ outputTokensLimit: args.config.budget.outputTokensLimit,
241
+ },
242
+ turnCap: { maxTurns: args.config.maxTurnsPerStage },
243
+ stage: toScopeStage(args.stage),
244
+ };
245
+
246
+ // -- 4. Invoke session-runner (or the override). ------------------------
247
+ const runImpl = args.runOverride ?? defaultRun;
248
+ let session: SessionResult;
249
+ try {
250
+ session = await runImpl(runOpts);
251
+ } catch (err) {
252
+ // session-runner contracts never to throw. If the override throws,
253
+ // we still surface a halted-error outcome with a synthetic session.
254
+ return {
255
+ stage: args.stage,
256
+ status: 'halted-error',
257
+ started_at,
258
+ ended_at: new Date().toISOString(),
259
+ retries: retriesConsumed,
260
+ session: makeErrorSession(err, 'session_run_threw'),
261
+ };
262
+ }
263
+
264
+ // -- 5. Map session status → stage status. ------------------------------
265
+ const mapped = mapSessionStatus(session);
266
+
267
+ // -- 6. Stage-level retry-once on retryable error. ----------------------
268
+ if (
269
+ mapped.status === 'halted-error' &&
270
+ args.retries > 0 &&
271
+ isRetryableStageError(session)
272
+ ) {
273
+ // Recurse with retries exhausted (0). The retry must reuse the
274
+ // same config + overrides so the test harness can observe it.
275
+ const nextArgs: InvokeStageArgs = {
276
+ stage: args.stage,
277
+ config: args.config,
278
+ retries: 0,
279
+ _retriesConsumed: retriesConsumed + 1,
280
+ ...(args._promptSuffix !== undefined ? { _promptSuffix: args._promptSuffix } : {}),
281
+ ...(args.runOverride !== undefined ? { runOverride: args.runOverride } : {}),
282
+ ...(args.bundleOverride !== undefined ? { bundleOverride: args.bundleOverride } : {}),
283
+ ...(args.scopeOverride !== undefined ? { scopeOverride: args.scopeOverride } : {}),
284
+ };
285
+ return invokeStage(nextArgs);
286
+ }
287
+
288
+ const ended_at: string = new Date().toISOString();
289
+
290
+ const gateInfo: HumanGateInfo | undefined =
291
+ mapped.status === 'halted-human-gate' && mapped.gate !== undefined
292
+ ? {
293
+ stage: args.stage,
294
+ gateName: mapped.gate.name,
295
+ stdoutTail: mapped.gate.stdoutTail,
296
+ }
297
+ : undefined;
298
+
299
+ return {
300
+ stage: args.stage,
301
+ status: mapped.status,
302
+ session,
303
+ started_at,
304
+ ended_at,
305
+ retries: retriesConsumed,
306
+ ...(gateInfo !== undefined ? { gate: gateInfo } : {}),
307
+ };
308
+ }
309
+
310
+ /**
311
+ * Build a synthetic `SessionResult` describing a failure that occurred
312
+ * outside the session (bundle build, scope resolution, or a thrown
313
+ * run override). The shape matches session-runner's `SessionResult` so
314
+ * downstream code treats it uniformly.
315
+ */
316
+ function makeErrorSession(err: unknown, code: string): SessionResult {
317
+ const message: string =
318
+ err === null || err === undefined
319
+ ? 'unknown error'
320
+ : err instanceof Error
321
+ ? err.message
322
+ : typeof err === 'string'
323
+ ? err
324
+ : 'unknown error';
325
+ return {
326
+ status: 'error',
327
+ transcript_path: '',
328
+ turns: 0,
329
+ usage: { input_tokens: 0, output_tokens: 0, usd_cost: 0 },
330
+ tool_calls: [],
331
+ sanitizer: { applied: [], removedSections: [] },
332
+ error: {
333
+ code,
334
+ message,
335
+ kind: 'operation_failed',
336
+ context: {},
337
+ },
338
+ };
339
+ }
@@ -0,0 +1,144 @@
1
+ // scripts/lib/pipeline-runner/state-machine.ts — Plan 21-05 Task 2.
2
+ //
3
+ // Stage-ordering primitives. Pure — no I/O, no logging, no side effects.
4
+ // Consumed by `index.ts` (to compute the run order) and `stage-handlers.ts`
5
+ // indirectly via `nextStage()` for future lookahead hooks.
6
+ //
7
+ // Rules locked by Plan 21-05:
8
+ // * STAGE_ORDER is frozen — mutation attempts throw in strict mode.
9
+ // * `resolveStageOrder` must preserve STAGE_ORDER's relative ordering.
10
+ // Out-of-order user input (e.g., `stages: ['verify', 'brief']`) throws
11
+ // a `ValidationError` with code `INVALID_STAGE_ORDER`.
12
+ // * `resumeFrom` drops stages strictly before it (keeps self + after).
13
+ // * `stopAfter` drops stages strictly after it (keeps self + before).
14
+ // * `skipStages` is applied last, filtering any remaining stage whose
15
+ // name is in the set. Unknown stage names in `skipStages` are
16
+ // tolerated (no-op) — the filter is a membership check.
17
+
18
+ import { ValidationError } from '../gdd-errors/index.ts';
19
+ import type { Stage } from './types.ts';
20
+
21
+ /**
22
+ * Canonical pipeline order. Frozen so downstream consumers cannot
23
+ * mutate it by accident. Every other ordering primitive derives from
24
+ * this array.
25
+ */
26
+ export const STAGE_ORDER: readonly Stage[] = Object.freeze([
27
+ 'brief',
28
+ 'explore',
29
+ 'plan',
30
+ 'design',
31
+ 'verify',
32
+ ] as const);
33
+
34
+ /**
35
+ * Return the zero-based index of `stage` in `STAGE_ORDER`. Throws
36
+ * `ValidationError` for unknown stages — callers should have already
37
+ * narrowed the input to the `Stage` union, but runtime checks defend
38
+ * against `as` casts.
39
+ */
40
+ export function stageIndex(stage: Stage): number {
41
+ const idx = STAGE_ORDER.indexOf(stage);
42
+ if (idx < 0) {
43
+ throw new ValidationError(
44
+ `unknown stage: ${String(stage)}`,
45
+ 'INVALID_STAGE',
46
+ { stage, knownStages: [...STAGE_ORDER] },
47
+ );
48
+ }
49
+ return idx;
50
+ }
51
+
52
+ /**
53
+ * Return the stage that follows `current` in canonical order, or
54
+ * `null` when `current` is the terminal stage (`verify`).
55
+ */
56
+ export function nextStage(current: Stage): Stage | null {
57
+ const idx = stageIndex(current);
58
+ if (idx === STAGE_ORDER.length - 1) return null;
59
+ const next = STAGE_ORDER[idx + 1];
60
+ // noUncheckedIndexedAccess narrows to `Stage | undefined`; we just
61
+ // proved it's defined because idx + 1 < length.
62
+ if (next === undefined) return null;
63
+ return next;
64
+ }
65
+
66
+ /**
67
+ * Configuration subset relevant to stage-order resolution.
68
+ */
69
+ export interface ResolveStageOrderInput {
70
+ readonly stages?: readonly Stage[];
71
+ readonly skipStages?: readonly Stage[];
72
+ readonly resumeFrom?: Stage;
73
+ readonly stopAfter?: Stage;
74
+ }
75
+
76
+ /**
77
+ * Resolve the effective run order for a pipeline invocation, applying
78
+ * (in order) `stages` selection → `resumeFrom` → `stopAfter` →
79
+ * `skipStages`.
80
+ *
81
+ * Validates that the user-supplied `stages` array preserves the
82
+ * canonical relative ordering — out-of-order input throws a
83
+ * `ValidationError`.
84
+ *
85
+ * Validates that `resumeFrom` and `stopAfter` are mutually consistent
86
+ * when both are supplied (`resumeFrom` cannot be later than
87
+ * `stopAfter`).
88
+ *
89
+ * Returns a frozen, read-only array.
90
+ */
91
+ export function resolveStageOrder(input: ResolveStageOrderInput = {}): readonly Stage[] {
92
+ // 1. Pick the initial set of stages.
93
+ const initial: readonly Stage[] = input.stages ?? STAGE_ORDER;
94
+
95
+ // 2. Validate relative order against STAGE_ORDER.
96
+ let lastIdx = -1;
97
+ for (const s of initial) {
98
+ const idx = stageIndex(s);
99
+ if (idx <= lastIdx) {
100
+ throw new ValidationError(
101
+ `stages array out of canonical order near "${s}"; expected ascending ${STAGE_ORDER.join(' → ')}`,
102
+ 'INVALID_STAGE_ORDER',
103
+ { stages: [...initial], canonical: [...STAGE_ORDER] },
104
+ );
105
+ }
106
+ lastIdx = idx;
107
+ }
108
+
109
+ // 3. Validate resumeFrom / stopAfter consistency.
110
+ if (input.resumeFrom !== undefined && input.stopAfter !== undefined) {
111
+ const rIdx = stageIndex(input.resumeFrom);
112
+ const sIdx = stageIndex(input.stopAfter);
113
+ if (rIdx > sIdx) {
114
+ throw new ValidationError(
115
+ `resumeFrom="${input.resumeFrom}" is later than stopAfter="${input.stopAfter}"`,
116
+ 'INVALID_STAGE_WINDOW',
117
+ { resumeFrom: input.resumeFrom, stopAfter: input.stopAfter },
118
+ );
119
+ }
120
+ }
121
+
122
+ // 4. Apply resumeFrom — drop stages strictly before it.
123
+ let working: Stage[] = [...initial];
124
+ if (input.resumeFrom !== undefined) {
125
+ const resume = input.resumeFrom;
126
+ const resumeIdx = stageIndex(resume);
127
+ working = working.filter((s) => stageIndex(s) >= resumeIdx);
128
+ }
129
+
130
+ // 5. Apply stopAfter — drop stages strictly after it.
131
+ if (input.stopAfter !== undefined) {
132
+ const stop = input.stopAfter;
133
+ const stopIdx = stageIndex(stop);
134
+ working = working.filter((s) => stageIndex(s) <= stopIdx);
135
+ }
136
+
137
+ // 6. Apply skipStages — membership filter.
138
+ if (input.skipStages !== undefined && input.skipStages.length > 0) {
139
+ const skip = new Set<string>(input.skipStages);
140
+ working = working.filter((s) => !skip.has(s));
141
+ }
142
+
143
+ return Object.freeze(working);
144
+ }
@@ -0,0 +1,183 @@
1
+ // scripts/lib/pipeline-runner/types.ts — Plan 21-05 (SDK-17).
2
+ //
3
+ // Typed surface for the Brief → Verify state machine that drives the
4
+ // full headless Phase-21 pipeline. Consumed by `state-machine.ts`,
5
+ // `stage-handlers.ts`, `human-gate.ts`, and `index.ts` (the `run()`
6
+ // driver).
7
+ //
8
+ // Design notes:
9
+ // * `Stage` is the 5-stage design pipeline (brief → explore → plan →
10
+ // design → verify). It is intentionally NARROWER than the session-
11
+ // runner's Stage union (which also carries `init` + `custom`) —
12
+ // the pipeline runner orchestrates only the design stages. `init`
13
+ // is owned by Plan 21-08; `custom` is a one-off escape valve.
14
+ // * `StageStatus` encodes terminal outcomes at the stage level. Any
15
+ // status beginning with `halted-*` aborts the pipeline (except
16
+ // `halted-human-gate`, which the driver disambiguates via the
17
+ // `onHumanGate` callback).
18
+ // * `PipelineStatus` is the pipeline-level terminal state. The driver
19
+ // NEVER throws — all failure modes land here.
20
+ // * Budget + turn caps apply per-stage. `BudgetCap.perStage` is
21
+ // advisory for future aggregate-mode support (not used in this
22
+ // plan — Plan 21-11's real-SDK E2E may revisit).
23
+
24
+ /**
25
+ * The 5 stages of the design pipeline. Mirrors `.design/STATE.md`'s
26
+ * stage field (Plan 20-01's gdd-state contract).
27
+ */
28
+ export type Stage = 'brief' | 'explore' | 'plan' | 'design' | 'verify';
29
+
30
+ /**
31
+ * Terminal outcome for a single stage. `completed` and `skipped` are
32
+ * non-halting; every `halted-*` status aborts the pipeline — except
33
+ * `halted-human-gate`, which the driver may resolve by invoking the
34
+ * caller's `onHumanGate` callback.
35
+ */
36
+ export type StageStatus =
37
+ | 'completed'
38
+ | 'skipped'
39
+ | 'halted-gate-veto'
40
+ | 'halted-budget'
41
+ | 'halted-turn-cap'
42
+ | 'halted-error'
43
+ | 'halted-human-gate';
44
+
45
+ /**
46
+ * Terminal state for the whole pipeline. `awaiting-gate` means a
47
+ * human-gate paused execution; the caller may resume via a new `run()`
48
+ * invocation with `resumeFrom` set to the paused stage.
49
+ */
50
+ export type PipelineStatus =
51
+ | 'completed'
52
+ | 'halted'
53
+ | 'stopped-after'
54
+ | 'awaiting-gate';
55
+
56
+ /**
57
+ * Hard caps on cost that apply to every stage's session. See
58
+ * `session-runner/types.ts` for per-attempt semantics; `perStage=true`
59
+ * means these caps fire independently per stage, not aggregated across
60
+ * the pipeline.
61
+ */
62
+ export interface BudgetCap {
63
+ readonly usdLimit: number;
64
+ readonly inputTokensLimit: number;
65
+ readonly outputTokensLimit: number;
66
+ /**
67
+ * When `true`, the budget applies individually to each stage (default).
68
+ * When `false`, the aggregate pipeline budget is split evenly across
69
+ * the targeted stages — advisory; implementation still treats each
70
+ * session's cap as a full `usdLimit` because session-runner owns the
71
+ * per-session envelope.
72
+ */
73
+ readonly perStage: boolean;
74
+ }
75
+
76
+ /**
77
+ * Information surfaced to the caller when a stage pauses at a
78
+ * recognized `AWAIT_USER_GATE` marker. `stdoutTail` is bounded by
79
+ * `session-runner`'s transcript capture — typically the last few KiB.
80
+ */
81
+ export interface HumanGateInfo {
82
+ readonly stage: Stage;
83
+ readonly gateName: string;
84
+ readonly stdoutTail: string;
85
+ }
86
+
87
+ /**
88
+ * Caller's decision after inspecting a `HumanGateInfo`. `resume`
89
+ * re-invokes the same stage with the optional `payload` appended to
90
+ * the prompt (so the caller can inject a directive like "approve and
91
+ * proceed"). `stop` halts the pipeline with `status: awaiting-gate`.
92
+ */
93
+ export interface HumanGateDecision {
94
+ readonly decision: 'resume' | 'stop';
95
+ readonly payload?: string;
96
+ }
97
+
98
+ /**
99
+ * Per-stage agent-frontmatter override. Maps a stage to an
100
+ * `agents/<name>.md` path whose YAML `tools:` field overrides the
101
+ * stage's default tool scope. See tool-scoping (Plan 21-03).
102
+ */
103
+ export type AgentsByStage = Readonly<Partial<Record<Stage, string>>>;
104
+
105
+ /**
106
+ * Per-stage prompt + system-prompt maps. `prompts` is required for every
107
+ * stage in the run order; missing keys throw a `ValidationError` at
108
+ * driver entry. `systemPrompts` are optional.
109
+ */
110
+ export interface PipelineConfig {
111
+ /** Stages to run, defaulting to the full 5. */
112
+ readonly stages?: readonly Stage[];
113
+ /** Stages to skip (subset of stages). */
114
+ readonly skipStages?: readonly Stage[];
115
+ /** Resume from this stage (earlier stages are no-ops). */
116
+ readonly resumeFrom?: Stage;
117
+ /** Stop after completing this stage. */
118
+ readonly stopAfter?: Stage;
119
+ /** Per-stage prompt templates. Keys: stage name. Value: prompt body. */
120
+ readonly prompts: Readonly<Record<Stage, string>>;
121
+ /** Per-stage system prompts (optional). */
122
+ readonly systemPrompts?: Readonly<Partial<Record<Stage, string>>>;
123
+ /** Budget applied to every stage's session. */
124
+ readonly budget: BudgetCap;
125
+ /** Turn cap applied to every stage's session. */
126
+ readonly maxTurnsPerStage: number;
127
+ /** Max stage-level retry attempts. Must be 0 or 1; default 1. */
128
+ readonly stageRetries?: 0 | 1;
129
+ /** Callback invoked when a stage hits a human-verify gate. */
130
+ readonly onHumanGate?: (info: HumanGateInfo) => Promise<HumanGateDecision>;
131
+ /** Per-stage agent-frontmatter override map. */
132
+ readonly agentsByStage?: AgentsByStage;
133
+ /** Working directory (repo root); defaults to process.cwd(). */
134
+ readonly cwd?: string;
135
+ }
136
+
137
+ /**
138
+ * Per-stage outcome inside a `PipelineResult`. The `session` field is
139
+ * absent when the stage was skipped (never entered session-runner).
140
+ *
141
+ * `retries` is the number of stage-level re-invocations actually
142
+ * performed. `0` means the first attempt completed (or failed
143
+ * non-retryably); `1` means the first attempt failed with a retryable
144
+ * error and the second attempt terminated the stage.
145
+ */
146
+ export interface StageOutcome {
147
+ readonly stage: Stage;
148
+ readonly status: StageStatus;
149
+ /** SessionResult from the stage's run (absent if skipped). */
150
+ readonly session?: import('../session-runner/types.ts').SessionResult;
151
+ /** Blockers if `status === 'halted-gate-veto'`. */
152
+ readonly blockers?: readonly string[];
153
+ /** ISO timestamp when the stage started; absent if skipped. */
154
+ readonly started_at?: string;
155
+ /** ISO timestamp when the stage ended; absent if skipped. */
156
+ readonly ended_at?: string;
157
+ /** Number of stage-level retry attempts actually performed. */
158
+ readonly retries: number;
159
+ /** Human-gate info when `status === 'halted-human-gate'`. */
160
+ readonly gate?: HumanGateInfo;
161
+ }
162
+
163
+ /**
164
+ * Final, terminal shape returned by `run()`. Includes per-stage
165
+ * outcomes, aggregate usage, and the stage where execution halted
166
+ * (if any).
167
+ */
168
+ export interface PipelineResult {
169
+ readonly status: PipelineStatus;
170
+ readonly cycle_start: string;
171
+ readonly cycle_end: string;
172
+ readonly outcomes: readonly StageOutcome[];
173
+ /** Aggregate usage across all attempted stages. */
174
+ readonly total_usage: {
175
+ readonly input_tokens: number;
176
+ readonly output_tokens: number;
177
+ readonly usd_cost: number;
178
+ };
179
+ /** Stage at which the pipeline halted (if any). */
180
+ readonly halted_at?: Stage;
181
+ /** Human-gate pause info when `status === 'awaiting-gate'`. */
182
+ readonly gate?: HumanGateInfo;
183
+ }