@hegemonart/get-design-done 1.20.0 → 1.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/.claude-plugin/marketplace.json +9 -12
  2. package/.claude-plugin/plugin.json +8 -31
  3. package/CHANGELOG.md +200 -0
  4. package/README.md +48 -7
  5. package/bin/gdd-sdk +55 -0
  6. package/hooks/_hook-emit.js +81 -0
  7. package/hooks/gdd-bash-guard.js +8 -0
  8. package/hooks/gdd-decision-injector.js +2 -0
  9. package/hooks/gdd-protected-paths.js +8 -0
  10. package/hooks/gdd-trajectory-capture.js +64 -0
  11. package/hooks/hooks.json +9 -0
  12. package/package.json +19 -47
  13. package/reference/codex-tools.md +53 -0
  14. package/reference/gemini-tools.md +53 -0
  15. package/reference/registry.json +14 -0
  16. package/scripts/cli/gdd-events.mjs +283 -0
  17. package/scripts/e2e/run-headless.ts +514 -0
  18. package/scripts/lib/cli/commands/audit.ts +382 -0
  19. package/scripts/lib/cli/commands/init.ts +217 -0
  20. package/scripts/lib/cli/commands/query.ts +329 -0
  21. package/scripts/lib/cli/commands/run.ts +656 -0
  22. package/scripts/lib/cli/commands/stage.ts +468 -0
  23. package/scripts/lib/cli/index.ts +167 -0
  24. package/scripts/lib/cli/parse-args.ts +336 -0
  25. package/scripts/lib/connection-probe/index.cjs +263 -0
  26. package/scripts/lib/context-engine/index.ts +116 -0
  27. package/scripts/lib/context-engine/manifest.ts +69 -0
  28. package/scripts/lib/context-engine/truncate.ts +282 -0
  29. package/scripts/lib/context-engine/types.ts +59 -0
  30. package/scripts/lib/discuss-parallel-runner/aggregator.ts +448 -0
  31. package/scripts/lib/discuss-parallel-runner/discussants.ts +430 -0
  32. package/scripts/lib/discuss-parallel-runner/index.ts +223 -0
  33. package/scripts/lib/discuss-parallel-runner/types.ts +184 -0
  34. package/scripts/lib/event-chain.cjs +177 -0
  35. package/scripts/lib/event-stream/index.ts +31 -1
  36. package/scripts/lib/event-stream/reader.ts +139 -0
  37. package/scripts/lib/event-stream/types.ts +155 -1
  38. package/scripts/lib/event-stream/writer.ts +65 -8
  39. package/scripts/lib/explore-parallel-runner/index.ts +294 -0
  40. package/scripts/lib/explore-parallel-runner/mappers.ts +290 -0
  41. package/scripts/lib/explore-parallel-runner/synthesizer.ts +295 -0
  42. package/scripts/lib/explore-parallel-runner/types.ts +139 -0
  43. package/scripts/lib/harness/detect.ts +90 -0
  44. package/scripts/lib/harness/index.ts +64 -0
  45. package/scripts/lib/harness/tool-map.ts +142 -0
  46. package/scripts/lib/init-runner/index.ts +396 -0
  47. package/scripts/lib/init-runner/researchers.ts +245 -0
  48. package/scripts/lib/init-runner/scaffold.ts +224 -0
  49. package/scripts/lib/init-runner/synthesizer.ts +224 -0
  50. package/scripts/lib/init-runner/types.ts +143 -0
  51. package/scripts/lib/logger/index.ts +251 -0
  52. package/scripts/lib/logger/sinks.ts +269 -0
  53. package/scripts/lib/logger/types.ts +110 -0
  54. package/scripts/lib/pipeline-runner/human-gate.ts +134 -0
  55. package/scripts/lib/pipeline-runner/index.ts +527 -0
  56. package/scripts/lib/pipeline-runner/stage-handlers.ts +339 -0
  57. package/scripts/lib/pipeline-runner/state-machine.ts +144 -0
  58. package/scripts/lib/pipeline-runner/types.ts +183 -0
  59. package/scripts/lib/redact.cjs +122 -0
  60. package/scripts/lib/session-runner/errors.ts +406 -0
  61. package/scripts/lib/session-runner/index.ts +715 -0
  62. package/scripts/lib/session-runner/transcript.ts +189 -0
  63. package/scripts/lib/session-runner/types.ts +144 -0
  64. package/scripts/lib/tool-scoping/index.ts +219 -0
  65. package/scripts/lib/tool-scoping/parse-agent-tools.ts +207 -0
  66. package/scripts/lib/tool-scoping/stage-scopes.ts +139 -0
  67. package/scripts/lib/tool-scoping/types.ts +77 -0
  68. package/scripts/lib/trajectory/index.cjs +126 -0
  69. package/scripts/lib/transports/ws.cjs +179 -0
@@ -0,0 +1,189 @@
1
+ // scripts/lib/session-runner/transcript.ts — append-only JSONL
2
+ // transcript writer for Phase 21 headless Agent SDK sessions
3
+ // (Plan 21-01 Task 4).
4
+ //
5
+ // Design mirrors scripts/lib/event-stream/writer.ts but is scoped to one
6
+ // session per file rather than the global telemetry stream. Each session
7
+ // owns a dedicated `.design/sessions/<ISO>-<stage>.jsonl` file; the
8
+ // filename is stable for the full run and survives retries (retries
9
+ // continue to append to the same transcript).
10
+ //
11
+ // Key guarantees:
12
+ // * Atomic append via `fs.appendFileSync(..., { flag: 'a' })`. On
13
+ // POSIX O_APPEND makes single-call writes under PIPE_BUF (4 KiB)
14
+ // non-interleaved; on Windows FILE_APPEND_DATA provides the same.
15
+ // Oversized chunks are truncated (below) so we stay well under the
16
+ // POSIX atomicity ceiling.
17
+ // * Oversized payloads (> MAX_LINE_BYTES = 64 KiB) are REPLACED with
18
+ // `{ truncated: true, preview: "<first 1024 chars>" }` rather than
19
+ // dropped. The transcript always has a line per emitted chunk.
20
+ // * `close()` is a no-op today — we don't hold a file handle between
21
+ // appends (each `appendFileSync` opens/closes). The method exists
22
+ // so future buffering doesn't break callers.
23
+ // * `pathFor(stage, baseDir?)` produces Windows-safe filenames by
24
+ // replacing `:` in the ISO timestamp with `-`.
25
+ //
26
+ // Cross-reference: the run-loop in ./index.ts calls `append()` once per
27
+ // SDK message chunk. Test fixtures (tests/fixtures/session-runner/) use
28
+ // this module directly to assert JSONL line integrity.
29
+
30
+ import { appendFileSync, mkdirSync } from 'node:fs';
31
+ import { dirname, isAbsolute, join, resolve } from 'node:path';
32
+
33
+ /** Default transcript base directory (overridable via env or constructor). */
34
+ export const DEFAULT_SESSION_DIR = '.design/sessions';
35
+
36
+ /** Hard cap on serialized line size. Oversized → truncated with preview. */
37
+ export const MAX_LINE_BYTES = 64 * 1024;
38
+
39
+ /**
40
+ * Preview length when truncating. 1 KiB of the stringified payload is
41
+ * enough to spot-check what was emitted without blowing up line size.
42
+ */
43
+ export const TRUNCATION_PREVIEW_BYTES = 1024;
44
+
45
+ /**
46
+ * One line in the `.jsonl` transcript. `turn` is a monotonic 0-indexed
47
+ * counter incremented by the run-loop whenever `stop_reason` fires.
48
+ */
49
+ export interface TranscriptChunk {
50
+ /** ISO 8601 timestamp of chunk emission. */
51
+ ts: string;
52
+ /** Kind of chunk. Kept open-ended for forward-compat with SDK additions. */
53
+ type: 'user' | 'assistant' | 'tool_use' | 'tool_result' | 'system' | 'usage' | 'error';
54
+ /** Monotonic turn counter (0-indexed). */
55
+ turn: number;
56
+ /** Raw SDK message; JSON-serialized by the writer (truncated on overflow). */
57
+ payload: unknown;
58
+ }
59
+
60
+ /**
61
+ * Append-only writer for a single session's `.jsonl` file. One instance
62
+ * per session — the run-loop constructs it once and calls `append()`
63
+ * for every chunk it observes.
64
+ */
65
+ export class TranscriptWriter {
66
+ /** Resolved absolute path. */
67
+ readonly path: string;
68
+
69
+ /** `true` once we've ensured the target directory exists. */
70
+ private directoryEnsured: boolean = false;
71
+
72
+ /** Running count of chunks appended (including truncated ones). */
73
+ chunksWritten: number = 0;
74
+
75
+ /** Running count of chunks replaced by truncation. */
76
+ chunksTruncated: number = 0;
77
+
78
+ /** Most recent write error. `null` while healthy. */
79
+ lastError: Error | null = null;
80
+
81
+ constructor(rawPath: string) {
82
+ this.path = isAbsolute(rawPath) ? rawPath : resolve(process.cwd(), rawPath);
83
+ }
84
+
85
+ /**
86
+ * Append one chunk. Never throws; I/O failures are recorded on
87
+ * `lastError` and a short diagnostic is written to stderr.
88
+ *
89
+ * If serialization overflows `MAX_LINE_BYTES`, the payload is replaced
90
+ * with `{ truncated: true, preview: "<first 1024 chars of stringified payload>" }`.
91
+ */
92
+ append(chunk: TranscriptChunk): void {
93
+ try {
94
+ const line = this.serialize(chunk);
95
+ this.ensureDirectory();
96
+ appendFileSync(this.path, line, { flag: 'a' });
97
+ this.chunksWritten += 1;
98
+ } catch (err) {
99
+ this.lastError = err instanceof Error ? err : new Error(String(err));
100
+ try {
101
+ process.stderr.write(
102
+ `[session-runner:transcript] write failed: ${this.lastError.message}\n`,
103
+ );
104
+ } catch {
105
+ // No recourse — give up quietly.
106
+ }
107
+ }
108
+ }
109
+
110
+ /**
111
+ * No-op today; kept so callers that wrap the writer in a try/finally
112
+ * don't need to change when we add buffering.
113
+ */
114
+ close(): void {
115
+ // Intentional no-op.
116
+ }
117
+
118
+ /**
119
+ * Serialize a chunk to its on-disk form. Handles oversized payloads
120
+ * by substituting a truncation marker. Exposed for tests; callers
121
+ * should use {@link append}.
122
+ */
123
+ serialize(chunk: TranscriptChunk): string {
124
+ const raw = JSON.stringify(chunk) + '\n';
125
+ if (Buffer.byteLength(raw, 'utf8') <= MAX_LINE_BYTES) {
126
+ return raw;
127
+ }
128
+
129
+ this.chunksTruncated += 1;
130
+
131
+ // Build a preview string: JSON.stringify of the payload, sliced to
132
+ // TRUNCATION_PREVIEW_BYTES UTF-8 bytes. We slice by character length
133
+ // first (cheap) then hard-cap by byte length in case the prefix
134
+ // includes multi-byte characters that push us over.
135
+ let preview: string;
136
+ try {
137
+ const stringified = JSON.stringify(chunk.payload);
138
+ if (typeof stringified === 'string') {
139
+ preview = stringified.slice(0, TRUNCATION_PREVIEW_BYTES);
140
+ } else {
141
+ preview = '';
142
+ }
143
+ } catch {
144
+ preview = '';
145
+ }
146
+ // Byte-cap: walk back until we fit.
147
+ while (Buffer.byteLength(preview, 'utf8') > TRUNCATION_PREVIEW_BYTES && preview.length > 0) {
148
+ preview = preview.slice(0, preview.length - 1);
149
+ }
150
+
151
+ const replacement: TranscriptChunk = {
152
+ ts: chunk.ts,
153
+ type: chunk.type,
154
+ turn: chunk.turn,
155
+ payload: { truncated: true, preview },
156
+ };
157
+ return JSON.stringify(replacement) + '\n';
158
+ }
159
+
160
+ /**
161
+ * Build the conventional path for a session's transcript. Callers
162
+ * typically don't use this directly — they pass a pre-resolved path
163
+ * to the constructor — but the run-loop uses it to default the
164
+ * transcript location.
165
+ *
166
+ * Windows-safe: `:` characters from the ISO timestamp are replaced
167
+ * with `-` so Windows filesystems accept the filename.
168
+ *
169
+ * @param stage per-stage identifier (explore, plan, ...)
170
+ * @param baseDir optional override; defaults to
171
+ * `process.env.GDD_SESSION_DIR ?? '.design/sessions'`
172
+ * @returns absolute path string
173
+ */
174
+ static pathFor(stage: string, baseDir?: string): string {
175
+ const iso = new Date().toISOString().replace(/[:]/g, '-');
176
+ const safeStage = /^[a-z0-9][a-z0-9._-]*$/i.test(stage) ? stage : 'custom';
177
+ const dir = baseDir ?? process.env['GDD_SESSION_DIR'] ?? DEFAULT_SESSION_DIR;
178
+ const filename = `${iso}-${safeStage}.jsonl`;
179
+ const full = join(dir, filename);
180
+ return isAbsolute(full) ? full : resolve(process.cwd(), full);
181
+ }
182
+
183
+ /** Ensure the target directory exists. Memoized per-writer. */
184
+ private ensureDirectory(): void {
185
+ if (this.directoryEnsured) return;
186
+ mkdirSync(dirname(this.path), { recursive: true });
187
+ this.directoryEnsured = true;
188
+ }
189
+ }
@@ -0,0 +1,144 @@
1
+ // scripts/lib/session-runner/types.ts — public type surface for the
2
+ // Phase 21 headless Agent SDK wrapper (Plan 21-01, SDK-13).
3
+ //
4
+ // These types are consumed by every other Phase-21 runner (pipeline,
5
+ // explore, discuss, init). No other file in the repo should import
6
+ // `@anthropic-ai/claude-agent-sdk` directly — all session creation
7
+ // flows through `run(opts)` in `./index.ts`.
8
+ //
9
+ // Design notes:
10
+ // * `BudgetCap` is a hard cap across the ENTIRE session, including
11
+ // retries. Plan spec (Task 5): "budget.usdLimit caps TOTAL session
12
+ // cost across retries, NOT per-attempt."
13
+ // * `TurnCap.maxTurns` counts assistant turns (response cycles). A
14
+ // tool_use + tool_result pair is part of the SAME turn.
15
+ // * `stage` drives event payloads + transcript filenames. The union
16
+ // mirrors `Stage` from `gdd-state/types.ts` plus `init` + `custom`.
17
+ // * `queryOverride` / `sanitizeOverride` exist solely for tests; the
18
+ // default behavior imports the real SDK + the real sanitizer.
19
+
20
+ /**
21
+ * Hard caps on session cost. Any dimension exceeded aborts the session
22
+ * mid-stream and surfaces status `budget_exceeded`. All three caps are
23
+ * session-total, not per-attempt — retry usage accumulates against the
24
+ * same envelope.
25
+ */
26
+ export interface BudgetCap {
27
+ /** Hard USD limit across the session (inbound + outbound). Aborts when exceeded. */
28
+ usdLimit: number;
29
+ /** Input-token hard cap. Aborts when exceeded. */
30
+ inputTokensLimit: number;
31
+ /** Output-token hard cap. Aborts when exceeded. */
32
+ outputTokensLimit: number;
33
+ }
34
+
35
+ /**
36
+ * Hard cap on the number of assistant response turns. A `tool_use` +
37
+ * `tool_result` round-trip stays inside the same turn; the counter
38
+ * increments only when `message.stop_reason` is observed on a message.
39
+ */
40
+ export interface TurnCap {
41
+ /** Maximum assistant turns (response cycles). Counts tool_use + tool_result as part of the same turn. */
42
+ maxTurns: number;
43
+ }
44
+
45
+ /**
46
+ * Subset of the SDK `query({options})` call that the runner actually
47
+ * passes. Tests that stub `query()` can declare their parameter type as
48
+ * any superset of this shape; the runner only reads `abortSignal`,
49
+ * `allowedTools`, and `systemPrompt` from the forwarded object.
50
+ */
51
+ export interface QueryOptionsForwarded {
52
+ abortSignal?: AbortSignal;
53
+ allowedTools?: string[];
54
+ systemPrompt?: string;
55
+ [extra: string]: unknown;
56
+ }
57
+
58
+ /** Invocation shape passed to queryOverride and the real SDK's `query()`. */
59
+ export interface QueryInvocation {
60
+ prompt: unknown;
61
+ options?: QueryOptionsForwarded;
62
+ }
63
+
64
+ /**
65
+ * Test-injectable stand-in for the SDK's `query()`. Any function whose
66
+ * parameter accepts `{ prompt, options? }` and returns an async iterable
67
+ * over unknown chunks is compatible.
68
+ */
69
+ export type QueryOverride = (args: QueryInvocation) => AsyncIterable<unknown>;
70
+
71
+ /**
72
+ * One shot at the Agent SDK. Callers that need retries or backoff should
73
+ * rely on the built-in retry-once mechanism rather than wrapping this.
74
+ *
75
+ * Field-by-field:
76
+ * * `prompt` — raw skill body or operator message. MUST go through the
77
+ * prompt sanitizer before the SDK sees it; the wrapper does that
78
+ * automatically.
79
+ * * `systemPrompt` — forwarded verbatim to `query({options.systemPrompt})`.
80
+ * * `allowedTools` — forwarded verbatim; enforcement lives in Plan 21-03.
81
+ * * `budget` / `turnCap` — see types above.
82
+ * * `stage` — drives event payloads + transcript filename.
83
+ * * `transcriptDir` — override default `.design/sessions/`.
84
+ * * `signal` — external abort hook (user Ctrl+C, parent pipeline kill).
85
+ * * `maxRetries` — total attempts, not extra attempts. Default 2
86
+ * (first try + retry-once).
87
+ * * `queryOverride` / `sanitizeOverride` — test injection points.
88
+ */
89
+ export interface SessionRunnerOptions {
90
+ prompt: string;
91
+ systemPrompt?: string;
92
+ /** Allowed tool names (e.g., ["Read","Grep","Glob","Bash"]). Enforced by Plan 21-03. */
93
+ allowedTools?: string[];
94
+ budget: BudgetCap;
95
+ turnCap: TurnCap;
96
+ /** Per-stage identifier for event emission + transcript path. */
97
+ stage: 'brief' | 'explore' | 'plan' | 'design' | 'verify' | 'init' | 'custom';
98
+ /** Optional transcript directory; defaults to `.design/sessions/<ISO>-<stage>.jsonl`. */
99
+ transcriptDir?: string;
100
+ /** AbortController for external cancellation. */
101
+ signal?: AbortSignal;
102
+ /** Max retry attempts on retryable errors (default: 2, first try + retry-once). */
103
+ maxRetries?: number;
104
+ /**
105
+ * Override the SDK `query()` import (for tests). Default imports real SDK.
106
+ *
107
+ * The parameter is a single `args` object matching the SDK's call shape
108
+ * `{ prompt, options }` where `options` carries at minimum `abortSignal`
109
+ * plus the SDK's own extras. Tests can narrow `options` in their
110
+ * declaration and still satisfy the type because the runner only ever
111
+ * passes `abortSignal`, `systemPrompt`, and `allowedTools` — none of
112
+ * which widen the test's declared shape.
113
+ */
114
+ queryOverride?: QueryOverride;
115
+ /** Override the prompt sanitizer (for tests). Default calls prompt-sanitizer.sanitize(). */
116
+ sanitizeOverride?: (raw: string) => {
117
+ sanitized: string;
118
+ applied: readonly string[];
119
+ removedSections: readonly string[];
120
+ };
121
+ }
122
+
123
+ /**
124
+ * Terminal shape returned by `run()`. Union discriminant is `status`.
125
+ *
126
+ * * `completed` — session ended naturally (final `stop_reason`).
127
+ * * `budget_exceeded` — any of usdLimit / inputTokensLimit / outputTokensLimit tripped.
128
+ * * `turn_cap_exceeded` — maxTurns tripped.
129
+ * * `aborted` — external `opts.signal` fired.
130
+ * * `error` — unhandled / non-retryable / retries-exhausted.
131
+ *
132
+ * `run()` NEVER throws. Inspect `error` when `status !== 'completed'`.
133
+ */
134
+ export interface SessionResult {
135
+ status: 'completed' | 'budget_exceeded' | 'turn_cap_exceeded' | 'aborted' | 'error';
136
+ transcript_path: string;
137
+ turns: number;
138
+ usage: { input_tokens: number; output_tokens: number; usd_cost: number };
139
+ final_text?: string;
140
+ tool_calls: Array<{ name: string; input: unknown; output?: unknown; error?: string }>;
141
+ error?: { code: string; message: string; kind: string; context?: unknown };
142
+ /** Prompt-sanitizer diagnostics (pattern names that fired; removed section headings). */
143
+ sanitizer: { applied: readonly string[]; removedSections: readonly string[] };
144
+ }
@@ -0,0 +1,219 @@
1
+ // scripts/lib/tool-scoping/index.ts — public surface for the per-stage
2
+ // allowed-tools enforcement module.
3
+ //
4
+ // Exported API:
5
+ // * Types — Stage, Scope, ScopeInput, ScopeViolation
6
+ // * Registry — STAGE_SCOPES, NATIVE_TOOLS, isMcpTool, isNativeTool
7
+ // * Frontmatter — parseAgentTools, parseAgentToolsByName
8
+ // * Computation — computeScope (pure, no I/O)
9
+ // * Checking — checkTool (pure predicate, returns violation|null)
10
+ // * Enforcement — enforceScope (throws ValidationError on denial)
11
+ //
12
+ // Precedence contract (agentTools vs stage default):
13
+ // undefined/null → stage default applies
14
+ // [] → scope narrows to MCP-only (no native)
15
+ // string[] → replaces stage default entirely (override wins)
16
+ //
17
+ // MCP tools (`mcp__*`) are always allowed — they're appended to the
18
+ // scope without checking against the stage filter.
19
+ //
20
+ // Consumed by:
21
+ // * Plan 21-01 `session-runner` — computes `allowedTools` for each session.
22
+ // * Plan 21-05 `pipeline-runner` — picks the correct scope per stage.
23
+
24
+ import { ValidationError } from '../gdd-errors/index.ts';
25
+ import type { Scope, ScopeInput, ScopeViolation, Stage } from './types.ts';
26
+ import {
27
+ NATIVE_TOOLS,
28
+ STAGE_SCOPES,
29
+ isMcpTool,
30
+ isNativeTool,
31
+ } from './stage-scopes.ts';
32
+ import {
33
+ parseAgentTools,
34
+ parseAgentToolsByName,
35
+ } from './parse-agent-tools.ts';
36
+
37
+ // ---------------------------------------------------------------------------
38
+ // Re-exports — keep the module's public surface on the index file.
39
+ // ---------------------------------------------------------------------------
40
+
41
+ export type { Scope, ScopeInput, ScopeViolation, Stage } from './types.ts';
42
+ export {
43
+ NATIVE_TOOLS,
44
+ STAGE_SCOPES,
45
+ isMcpTool,
46
+ isNativeTool,
47
+ } from './stage-scopes.ts';
48
+ export {
49
+ parseAgentTools,
50
+ parseAgentToolsByName,
51
+ } from './parse-agent-tools.ts';
52
+
53
+ // ---------------------------------------------------------------------------
54
+ // Internals
55
+ // ---------------------------------------------------------------------------
56
+
57
+ /** The recognized Stage values; kept in sync with Stage union type. */
58
+ const KNOWN_STAGES: readonly Stage[] = Object.freeze([
59
+ 'brief',
60
+ 'explore',
61
+ 'plan',
62
+ 'design',
63
+ 'verify',
64
+ 'init',
65
+ 'custom',
66
+ ]);
67
+
68
+ function isKnownStage(s: string): s is Stage {
69
+ return (KNOWN_STAGES as readonly string[]).includes(s);
70
+ }
71
+
72
+ /**
73
+ * Deduplicate + sort alphabetically so `computeScope` output is
74
+ * deterministic across runs.
75
+ */
76
+ function normalize(list: readonly string[]): readonly string[] {
77
+ return Object.freeze(Array.from(new Set(list)).sort());
78
+ }
79
+
80
+ // ---------------------------------------------------------------------------
81
+ // computeScope — pure, no I/O
82
+ // ---------------------------------------------------------------------------
83
+
84
+ /**
85
+ * Compute the final `Scope` for a session. Honors precedence:
86
+ * agentTools (frontmatter) > STAGE_SCOPES default
87
+ *
88
+ * MCP tools in `additional` are always allowed — they're appended to the
89
+ * result without being filtered. Native tools in `additional` are
90
+ * merged into the scope; `enforceScope` (not this function) decides
91
+ * whether they pass the stage filter.
92
+ *
93
+ * Returns a frozen `Scope` object. `.denied` = NATIVE_TOOLS \
94
+ * (.allowed ∩ NATIVE_TOOLS) — the set of native tools explicitly not
95
+ * permitted on this session.
96
+ *
97
+ * This function is pure: no filesystem access, no globals. All I/O
98
+ * (parsing agent frontmatter) is the caller's responsibility via
99
+ * `parseAgentTools`.
100
+ */
101
+ export function computeScope(input: ScopeInput): Scope {
102
+ if (!isKnownStage(input.stage)) {
103
+ throw new ValidationError(
104
+ `unknown stage: ${String(input.stage)}`,
105
+ 'INVALID_STAGE',
106
+ { stage: input.stage, knownStages: [...KNOWN_STAGES] },
107
+ );
108
+ }
109
+
110
+ const stage: Stage = input.stage;
111
+ const stageDefault = STAGE_SCOPES[stage];
112
+
113
+ // Determine base allowed list.
114
+ // agentTools present (non-null/undefined) → use it (even if empty).
115
+ // agentTools absent → use stage default.
116
+ let base: readonly string[];
117
+ if (input.agentTools !== undefined && input.agentTools !== null) {
118
+ base = input.agentTools;
119
+ } else {
120
+ base = stageDefault.allowed;
121
+ }
122
+
123
+ // Union with caller-supplied additional tools (typically MCP).
124
+ const additional: readonly string[] = input.additional ?? [];
125
+ const combined: readonly string[] = normalize([...base, ...additional]);
126
+
127
+ // Derive denied = NATIVE_TOOLS \ (combined ∩ NATIVE_TOOLS).
128
+ const nativeAllowed: Set<string> = new Set(
129
+ combined.filter((t) => isNativeTool(t)),
130
+ );
131
+ const denied: readonly string[] = Object.freeze(
132
+ NATIVE_TOOLS.filter((t) => !nativeAllowed.has(t)).slice().sort(),
133
+ );
134
+
135
+ // bashMutation tracks the stage-level flag (authoritative this phase;
136
+ // agent overrides do not propagate here — Phase 22 revisits).
137
+ return Object.freeze({
138
+ stage,
139
+ allowed: combined,
140
+ denied,
141
+ bashMutation: stageDefault.bashMutation,
142
+ });
143
+ }
144
+
145
+ // ---------------------------------------------------------------------------
146
+ // checkTool — pure predicate
147
+ // ---------------------------------------------------------------------------
148
+
149
+ /**
150
+ * Validate that `requestedTool` is permitted by `scope`. Returns a
151
+ * `ScopeViolation` when denied; `null` when allowed.
152
+ *
153
+ * MCP tools (`mcp__*`) always pass — MCP servers declare their own
154
+ * security perimeter. The stage filter only gates native harness tools.
155
+ */
156
+ export function checkTool(
157
+ scope: Scope,
158
+ requestedTool: string,
159
+ ): ScopeViolation | null {
160
+ if (isMcpTool(requestedTool)) return null;
161
+ if (scope.allowed.includes(requestedTool)) return null;
162
+
163
+ return Object.freeze({
164
+ code: 'TOOL_NOT_ALLOWED' as const,
165
+ tool: requestedTool,
166
+ stage: scope.stage,
167
+ message:
168
+ `tool "${requestedTool}" is not permitted by the "${scope.stage}" scope ` +
169
+ `(allowed: ${scope.allowed.length === 0 ? '(empty — MCP only)' : scope.allowed.join(', ')})`,
170
+ });
171
+ }
172
+
173
+ // ---------------------------------------------------------------------------
174
+ // enforceScope — throws on violation
175
+ // ---------------------------------------------------------------------------
176
+
177
+ /**
178
+ * Enforce scope at session creation: validates all caller-supplied
179
+ * tools against the scope and throws `ValidationError` on the first
180
+ * violation. Returns the validated allowed list, ready for
181
+ * `session-runner`'s `allowedTools` parameter.
182
+ *
183
+ * Throws:
184
+ * * `ValidationError('INVALID_STAGE', ...)` — unknown stage name.
185
+ * * `ValidationError('TOOL_NOT_ALLOWED', ...)` — additional tool
186
+ * violates the effective scope (context = {stage, tool, allowed}).
187
+ *
188
+ * Empty allowed list is NOT an error here — MCP-only agents are a
189
+ * supported configuration.
190
+ */
191
+ export function enforceScope(input: ScopeInput): readonly string[] {
192
+ // Compute the full post-merge scope (for the return value).
193
+ const scope: Scope = computeScope(input);
194
+
195
+ // Additional tools must be validated against the EFFECTIVE base scope
196
+ // (agent override or stage default) — NOT against the post-union
197
+ // scope, otherwise every additional tool would trivially pass because
198
+ // computeScope already folded it in.
199
+ //
200
+ // The "base" scope for this check is computeScope without `additional`.
201
+ const baseScope: Scope = computeScope({
202
+ stage: input.stage,
203
+ ...(input.agentTools !== undefined ? { agentTools: input.agentTools } : {}),
204
+ });
205
+
206
+ const additional: readonly string[] = input.additional ?? [];
207
+ for (const tool of additional) {
208
+ const violation: ScopeViolation | null = checkTool(baseScope, tool);
209
+ if (violation !== null) {
210
+ throw new ValidationError(violation.message, violation.code, {
211
+ stage: scope.stage,
212
+ tool,
213
+ allowed: [...baseScope.allowed],
214
+ });
215
+ }
216
+ }
217
+
218
+ return scope.allowed;
219
+ }