npm - @hegemonart/get-design-done - Versions diffs - 1.20.0 → 1.22.0 - Mend

@hegemonart/get-design-done 1.20.0 → 1.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

package/.claude-plugin/marketplace.json +9 -12
package/.claude-plugin/plugin.json +8 -31
package/CHANGELOG.md +200 -0
package/README.md +48 -7
package/bin/gdd-sdk +55 -0
package/hooks/_hook-emit.js +81 -0
package/hooks/gdd-bash-guard.js +8 -0
package/hooks/gdd-decision-injector.js +2 -0
package/hooks/gdd-protected-paths.js +8 -0
package/hooks/gdd-trajectory-capture.js +64 -0
package/hooks/hooks.json +9 -0
package/package.json +19 -47
package/reference/codex-tools.md +53 -0
package/reference/gemini-tools.md +53 -0
package/reference/registry.json +14 -0
package/scripts/cli/gdd-events.mjs +283 -0
package/scripts/e2e/run-headless.ts +514 -0
package/scripts/lib/cli/commands/audit.ts +382 -0
package/scripts/lib/cli/commands/init.ts +217 -0
package/scripts/lib/cli/commands/query.ts +329 -0
package/scripts/lib/cli/commands/run.ts +656 -0
package/scripts/lib/cli/commands/stage.ts +468 -0
package/scripts/lib/cli/index.ts +167 -0
package/scripts/lib/cli/parse-args.ts +336 -0
package/scripts/lib/connection-probe/index.cjs +263 -0
package/scripts/lib/context-engine/index.ts +116 -0
package/scripts/lib/context-engine/manifest.ts +69 -0
package/scripts/lib/context-engine/truncate.ts +282 -0
package/scripts/lib/context-engine/types.ts +59 -0
package/scripts/lib/discuss-parallel-runner/aggregator.ts +448 -0
package/scripts/lib/discuss-parallel-runner/discussants.ts +430 -0
package/scripts/lib/discuss-parallel-runner/index.ts +223 -0
package/scripts/lib/discuss-parallel-runner/types.ts +184 -0
package/scripts/lib/event-chain.cjs +177 -0
package/scripts/lib/event-stream/index.ts +31 -1
package/scripts/lib/event-stream/reader.ts +139 -0
package/scripts/lib/event-stream/types.ts +155 -1
package/scripts/lib/event-stream/writer.ts +65 -8
package/scripts/lib/explore-parallel-runner/index.ts +294 -0
package/scripts/lib/explore-parallel-runner/mappers.ts +290 -0
package/scripts/lib/explore-parallel-runner/synthesizer.ts +295 -0
package/scripts/lib/explore-parallel-runner/types.ts +139 -0
package/scripts/lib/harness/detect.ts +90 -0
package/scripts/lib/harness/index.ts +64 -0
package/scripts/lib/harness/tool-map.ts +142 -0
package/scripts/lib/init-runner/index.ts +396 -0
package/scripts/lib/init-runner/researchers.ts +245 -0
package/scripts/lib/init-runner/scaffold.ts +224 -0
package/scripts/lib/init-runner/synthesizer.ts +224 -0
package/scripts/lib/init-runner/types.ts +143 -0
package/scripts/lib/logger/index.ts +251 -0
package/scripts/lib/logger/sinks.ts +269 -0
package/scripts/lib/logger/types.ts +110 -0
package/scripts/lib/pipeline-runner/human-gate.ts +134 -0
package/scripts/lib/pipeline-runner/index.ts +527 -0
package/scripts/lib/pipeline-runner/stage-handlers.ts +339 -0
package/scripts/lib/pipeline-runner/state-machine.ts +144 -0
package/scripts/lib/pipeline-runner/types.ts +183 -0
package/scripts/lib/redact.cjs +122 -0
package/scripts/lib/session-runner/errors.ts +406 -0
package/scripts/lib/session-runner/index.ts +715 -0
package/scripts/lib/session-runner/transcript.ts +189 -0
package/scripts/lib/session-runner/types.ts +144 -0
package/scripts/lib/tool-scoping/index.ts +219 -0
package/scripts/lib/tool-scoping/parse-agent-tools.ts +207 -0
package/scripts/lib/tool-scoping/stage-scopes.ts +139 -0
package/scripts/lib/tool-scoping/types.ts +77 -0
package/scripts/lib/trajectory/index.cjs +126 -0
package/scripts/lib/transports/ws.cjs +179 -0

package/scripts/lib/session-runner/transcript.ts ADDED Viewed

@@ -0,0 +1,189 @@
+// scripts/lib/session-runner/transcript.ts — append-only JSONL
+// transcript writer for Phase 21 headless Agent SDK sessions
+// (Plan 21-01 Task 4).
+//
+// Design mirrors scripts/lib/event-stream/writer.ts but is scoped to one
+// session per file rather than the global telemetry stream. Each session
+// owns a dedicated `.design/sessions/<ISO>-<stage>.jsonl` file; the
+// filename is stable for the full run and survives retries (retries
+// continue to append to the same transcript).
+//
+// Key guarantees:
+//   * Atomic append via `fs.appendFileSync(..., { flag: 'a' })`. On
+//     POSIX O_APPEND makes single-call writes under PIPE_BUF (4 KiB)
+//     non-interleaved; on Windows FILE_APPEND_DATA provides the same.
+//     Oversized chunks are truncated (below) so we stay well under the
+//     POSIX atomicity ceiling.
+//   * Oversized payloads (> MAX_LINE_BYTES = 64 KiB) are REPLACED with
+//     `{ truncated: true, preview: "<first 1024 chars>" }` rather than
+//     dropped. The transcript always has a line per emitted chunk.
+//   * `close()` is a no-op today — we don't hold a file handle between
+//     appends (each `appendFileSync` opens/closes). The method exists
+//     so future buffering doesn't break callers.
+//   * `pathFor(stage, baseDir?)` produces Windows-safe filenames by
+//     replacing `:` in the ISO timestamp with `-`.
+//
+// Cross-reference: the run-loop in ./index.ts calls `append()` once per
+// SDK message chunk. Test fixtures (tests/fixtures/session-runner/) use
+// this module directly to assert JSONL line integrity.
+import { appendFileSync, mkdirSync } from 'node:fs';
+import { dirname, isAbsolute, join, resolve } from 'node:path';
+/** Default transcript base directory (overridable via env or constructor). */
+export const DEFAULT_SESSION_DIR = '.design/sessions';
+/** Hard cap on serialized line size. Oversized → truncated with preview. */
+export const MAX_LINE_BYTES = 64 * 1024;
+/**
+ * Preview length when truncating. 1 KiB of the stringified payload is
+ * enough to spot-check what was emitted without blowing up line size.
+ */
+export const TRUNCATION_PREVIEW_BYTES = 1024;
+/**
+ * One line in the `.jsonl` transcript. `turn` is a monotonic 0-indexed
+ * counter incremented by the run-loop whenever `stop_reason` fires.
+ */
+export interface TranscriptChunk {
+  /** ISO 8601 timestamp of chunk emission. */
+  ts: string;
+  /** Kind of chunk. Kept open-ended for forward-compat with SDK additions. */
+  type: 'user' | 'assistant' | 'tool_use' | 'tool_result' | 'system' | 'usage' | 'error';
+  /** Monotonic turn counter (0-indexed). */
+  turn: number;
+  /** Raw SDK message; JSON-serialized by the writer (truncated on overflow). */
+  payload: unknown;
+}
+/**
+ * Append-only writer for a single session's `.jsonl` file. One instance
+ * per session — the run-loop constructs it once and calls `append()`
+ * for every chunk it observes.
+ */
+export class TranscriptWriter {
+  /** Resolved absolute path. */
+  readonly path: string;
+  /** `true` once we've ensured the target directory exists. */
+  private directoryEnsured: boolean = false;
+  /** Running count of chunks appended (including truncated ones). */
+  chunksWritten: number = 0;
+  /** Running count of chunks replaced by truncation. */
+  chunksTruncated: number = 0;
+  /** Most recent write error. `null` while healthy. */
+  lastError: Error | null = null;
+  constructor(rawPath: string) {
+    this.path = isAbsolute(rawPath) ? rawPath : resolve(process.cwd(), rawPath);
+  }
+  /**
+   * Append one chunk. Never throws; I/O failures are recorded on
+   * `lastError` and a short diagnostic is written to stderr.
+   *
+   * If serialization overflows `MAX_LINE_BYTES`, the payload is replaced
+   * with `{ truncated: true, preview: "<first 1024 chars of stringified payload>" }`.
+   */
+  append(chunk: TranscriptChunk): void {
+    try {
+      const line = this.serialize(chunk);
+      this.ensureDirectory();
+      appendFileSync(this.path, line, { flag: 'a' });
+      this.chunksWritten += 1;
+    } catch (err) {
+      this.lastError = err instanceof Error ? err : new Error(String(err));
+      try {
+        process.stderr.write(
+          `[session-runner:transcript] write failed: ${this.lastError.message}\n`,
+        );
+      } catch {
+        // No recourse — give up quietly.
+      }
+    }
+  }
+  /**
+   * No-op today; kept so callers that wrap the writer in a try/finally
+   * don't need to change when we add buffering.
+   */
+  close(): void {
+    // Intentional no-op.
+  }
+  /**
+   * Serialize a chunk to its on-disk form. Handles oversized payloads
+   * by substituting a truncation marker. Exposed for tests; callers
+   * should use {@link append}.
+   */
+  serialize(chunk: TranscriptChunk): string {
+    const raw = JSON.stringify(chunk) + '\n';
+    if (Buffer.byteLength(raw, 'utf8') <= MAX_LINE_BYTES) {
+      return raw;
+    }
+    this.chunksTruncated += 1;
+    // Build a preview string: JSON.stringify of the payload, sliced to
+    // TRUNCATION_PREVIEW_BYTES UTF-8 bytes. We slice by character length
+    // first (cheap) then hard-cap by byte length in case the prefix
+    // includes multi-byte characters that push us over.
+    let preview: string;
+    try {
+      const stringified = JSON.stringify(chunk.payload);
+      if (typeof stringified === 'string') {
+        preview = stringified.slice(0, TRUNCATION_PREVIEW_BYTES);
+      } else {
+        preview = '';
+      }
+    } catch {
+      preview = '';
+    }
+    // Byte-cap: walk back until we fit.
+    while (Buffer.byteLength(preview, 'utf8') > TRUNCATION_PREVIEW_BYTES && preview.length > 0) {
+      preview = preview.slice(0, preview.length - 1);
+    }
+    const replacement: TranscriptChunk = {
+      ts: chunk.ts,
+      type: chunk.type,
+      turn: chunk.turn,
+      payload: { truncated: true, preview },
+    };
+    return JSON.stringify(replacement) + '\n';
+  }
+  /**
+   * Build the conventional path for a session's transcript. Callers
+   * typically don't use this directly — they pass a pre-resolved path
+   * to the constructor — but the run-loop uses it to default the
+   * transcript location.
+   *
+   * Windows-safe: `:` characters from the ISO timestamp are replaced
+   * with `-` so Windows filesystems accept the filename.
+   *
+   * @param stage     per-stage identifier (explore, plan, ...)
+   * @param baseDir   optional override; defaults to
+   *                  `process.env.GDD_SESSION_DIR ?? '.design/sessions'`
+   * @returns         absolute path string
+   */
+  static pathFor(stage: string, baseDir?: string): string {
+    const iso = new Date().toISOString().replace(/[:]/g, '-');
+    const safeStage = /^[a-z0-9][a-z0-9._-]*$/i.test(stage) ? stage : 'custom';
+    const dir = baseDir ?? process.env['GDD_SESSION_DIR'] ?? DEFAULT_SESSION_DIR;
+    const filename = `${iso}-${safeStage}.jsonl`;
+    const full = join(dir, filename);
+    return isAbsolute(full) ? full : resolve(process.cwd(), full);
+  }
+  /** Ensure the target directory exists. Memoized per-writer. */
+  private ensureDirectory(): void {
+    if (this.directoryEnsured) return;
+    mkdirSync(dirname(this.path), { recursive: true });
+    this.directoryEnsured = true;
+  }
+}

package/scripts/lib/session-runner/types.ts ADDED Viewed

@@ -0,0 +1,144 @@
+// scripts/lib/session-runner/types.ts — public type surface for the
+// Phase 21 headless Agent SDK wrapper (Plan 21-01, SDK-13).
+//
+// These types are consumed by every other Phase-21 runner (pipeline,
+// explore, discuss, init). No other file in the repo should import
+// `@anthropic-ai/claude-agent-sdk` directly — all session creation
+// flows through `run(opts)` in `./index.ts`.
+//
+// Design notes:
+//   * `BudgetCap` is a hard cap across the ENTIRE session, including
+//     retries. Plan spec (Task 5): "budget.usdLimit caps TOTAL session
+//     cost across retries, NOT per-attempt."
+//   * `TurnCap.maxTurns` counts assistant turns (response cycles). A
+//     tool_use + tool_result pair is part of the SAME turn.
+//   * `stage` drives event payloads + transcript filenames. The union
+//     mirrors `Stage` from `gdd-state/types.ts` plus `init` + `custom`.
+//   * `queryOverride` / `sanitizeOverride` exist solely for tests; the
+//     default behavior imports the real SDK + the real sanitizer.
+/**
+ * Hard caps on session cost. Any dimension exceeded aborts the session
+ * mid-stream and surfaces status `budget_exceeded`. All three caps are
+ * session-total, not per-attempt — retry usage accumulates against the
+ * same envelope.
+ */
+export interface BudgetCap {
+  /** Hard USD limit across the session (inbound + outbound). Aborts when exceeded. */
+  usdLimit: number;
+  /** Input-token hard cap. Aborts when exceeded. */
+  inputTokensLimit: number;
+  /** Output-token hard cap. Aborts when exceeded. */
+  outputTokensLimit: number;
+}
+/**
+ * Hard cap on the number of assistant response turns. A `tool_use` +
+ * `tool_result` round-trip stays inside the same turn; the counter
+ * increments only when `message.stop_reason` is observed on a message.
+ */
+export interface TurnCap {
+  /** Maximum assistant turns (response cycles). Counts tool_use + tool_result as part of the same turn. */
+  maxTurns: number;
+}
+/**
+ * Subset of the SDK `query({options})` call that the runner actually
+ * passes. Tests that stub `query()` can declare their parameter type as
+ * any superset of this shape; the runner only reads `abortSignal`,
+ * `allowedTools`, and `systemPrompt` from the forwarded object.
+ */
+export interface QueryOptionsForwarded {
+  abortSignal?: AbortSignal;
+  allowedTools?: string[];
+  systemPrompt?: string;
+  [extra: string]: unknown;
+}
+/** Invocation shape passed to queryOverride and the real SDK's `query()`. */
+export interface QueryInvocation {
+  prompt: unknown;
+  options?: QueryOptionsForwarded;
+}
+/**
+ * Test-injectable stand-in for the SDK's `query()`. Any function whose
+ * parameter accepts `{ prompt, options? }` and returns an async iterable
+ * over unknown chunks is compatible.
+ */
+export type QueryOverride = (args: QueryInvocation) => AsyncIterable<unknown>;
+/**
+ * One shot at the Agent SDK. Callers that need retries or backoff should
+ * rely on the built-in retry-once mechanism rather than wrapping this.
+ *
+ * Field-by-field:
+ *   * `prompt` — raw skill body or operator message. MUST go through the
+ *     prompt sanitizer before the SDK sees it; the wrapper does that
+ *     automatically.
+ *   * `systemPrompt` — forwarded verbatim to `query({options.systemPrompt})`.
+ *   * `allowedTools` — forwarded verbatim; enforcement lives in Plan 21-03.
+ *   * `budget` / `turnCap` — see types above.
+ *   * `stage` — drives event payloads + transcript filename.
+ *   * `transcriptDir` — override default `.design/sessions/`.
+ *   * `signal` — external abort hook (user Ctrl+C, parent pipeline kill).
+ *   * `maxRetries` — total attempts, not extra attempts. Default 2
+ *     (first try + retry-once).
+ *   * `queryOverride` / `sanitizeOverride` — test injection points.
+ */
+export interface SessionRunnerOptions {
+  prompt: string;
+  systemPrompt?: string;
+  /** Allowed tool names (e.g., ["Read","Grep","Glob","Bash"]). Enforced by Plan 21-03. */
+  allowedTools?: string[];
+  budget: BudgetCap;
+  turnCap: TurnCap;
+  /** Per-stage identifier for event emission + transcript path. */
+  stage: 'brief' | 'explore' | 'plan' | 'design' | 'verify' | 'init' | 'custom';
+  /** Optional transcript directory; defaults to `.design/sessions/<ISO>-<stage>.jsonl`. */
+  transcriptDir?: string;
+  /** AbortController for external cancellation. */
+  signal?: AbortSignal;
+  /** Max retry attempts on retryable errors (default: 2, first try + retry-once). */
+  maxRetries?: number;
+  /**
+   * Override the SDK `query()` import (for tests). Default imports real SDK.
+   *
+   * The parameter is a single `args` object matching the SDK's call shape
+   * `{ prompt, options }` where `options` carries at minimum `abortSignal`
+   * plus the SDK's own extras. Tests can narrow `options` in their
+   * declaration and still satisfy the type because the runner only ever
+   * passes `abortSignal`, `systemPrompt`, and `allowedTools` — none of
+   * which widen the test's declared shape.
+   */
+  queryOverride?: QueryOverride;
+  /** Override the prompt sanitizer (for tests). Default calls prompt-sanitizer.sanitize(). */
+  sanitizeOverride?: (raw: string) => {
+    sanitized: string;
+    applied: readonly string[];
+    removedSections: readonly string[];
+  };
+}
+/**
+ * Terminal shape returned by `run()`. Union discriminant is `status`.
+ *
+ *   * `completed`          — session ended naturally (final `stop_reason`).
+ *   * `budget_exceeded`    — any of usdLimit / inputTokensLimit / outputTokensLimit tripped.
+ *   * `turn_cap_exceeded`  — maxTurns tripped.
+ *   * `aborted`            — external `opts.signal` fired.
+ *   * `error`              — unhandled / non-retryable / retries-exhausted.
+ *
+ * `run()` NEVER throws. Inspect `error` when `status !== 'completed'`.
+ */
+export interface SessionResult {
+  status: 'completed' | 'budget_exceeded' | 'turn_cap_exceeded' | 'aborted' | 'error';
+  transcript_path: string;
+  turns: number;
+  usage: { input_tokens: number; output_tokens: number; usd_cost: number };
+  final_text?: string;
+  tool_calls: Array<{ name: string; input: unknown; output?: unknown; error?: string }>;
+  error?: { code: string; message: string; kind: string; context?: unknown };
+  /** Prompt-sanitizer diagnostics (pattern names that fired; removed section headings). */
+  sanitizer: { applied: readonly string[]; removedSections: readonly string[] };
+}

package/scripts/lib/tool-scoping/index.ts ADDED Viewed

@@ -0,0 +1,219 @@
+// scripts/lib/tool-scoping/index.ts — public surface for the per-stage
+// allowed-tools enforcement module.
+//
+// Exported API:
+//   * Types          — Stage, Scope, ScopeInput, ScopeViolation
+//   * Registry       — STAGE_SCOPES, NATIVE_TOOLS, isMcpTool, isNativeTool
+//   * Frontmatter    — parseAgentTools, parseAgentToolsByName
+//   * Computation    — computeScope       (pure, no I/O)
+//   * Checking       — checkTool          (pure predicate, returns violation|null)
+//   * Enforcement    — enforceScope       (throws ValidationError on denial)
+//
+// Precedence contract (agentTools vs stage default):
+//   undefined/null   → stage default applies
+//   []               → scope narrows to MCP-only (no native)
+//   string[]         → replaces stage default entirely (override wins)
+//
+// MCP tools (`mcp__*`) are always allowed — they're appended to the
+// scope without checking against the stage filter.
+//
+// Consumed by:
+//   * Plan 21-01 `session-runner`  — computes `allowedTools` for each session.
+//   * Plan 21-05 `pipeline-runner` — picks the correct scope per stage.
+import { ValidationError } from '../gdd-errors/index.ts';
+import type { Scope, ScopeInput, ScopeViolation, Stage } from './types.ts';
+import {
+  NATIVE_TOOLS,
+  STAGE_SCOPES,
+  isMcpTool,
+  isNativeTool,
+} from './stage-scopes.ts';
+import {
+  parseAgentTools,
+  parseAgentToolsByName,
+} from './parse-agent-tools.ts';
+// ---------------------------------------------------------------------------
+// Re-exports — keep the module's public surface on the index file.
+// ---------------------------------------------------------------------------
+export type { Scope, ScopeInput, ScopeViolation, Stage } from './types.ts';
+export {
+  NATIVE_TOOLS,
+  STAGE_SCOPES,
+  isMcpTool,
+  isNativeTool,
+} from './stage-scopes.ts';
+export {
+  parseAgentTools,
+  parseAgentToolsByName,
+} from './parse-agent-tools.ts';
+// ---------------------------------------------------------------------------
+// Internals
+// ---------------------------------------------------------------------------
+/** The recognized Stage values; kept in sync with Stage union type. */
+const KNOWN_STAGES: readonly Stage[] = Object.freeze([
+  'brief',
+  'explore',
+  'plan',
+  'design',
+  'verify',
+  'init',
+  'custom',
+]);
+function isKnownStage(s: string): s is Stage {
+  return (KNOWN_STAGES as readonly string[]).includes(s);
+}
+/**
+ * Deduplicate + sort alphabetically so `computeScope` output is
+ * deterministic across runs.
+ */
+function normalize(list: readonly string[]): readonly string[] {
+  return Object.freeze(Array.from(new Set(list)).sort());
+}
+// ---------------------------------------------------------------------------
+// computeScope — pure, no I/O
+// ---------------------------------------------------------------------------
+/**
+ * Compute the final `Scope` for a session. Honors precedence:
+ *   agentTools (frontmatter) > STAGE_SCOPES default
+ *
+ * MCP tools in `additional` are always allowed — they're appended to the
+ * result without being filtered. Native tools in `additional` are
+ * merged into the scope; `enforceScope` (not this function) decides
+ * whether they pass the stage filter.
+ *
+ * Returns a frozen `Scope` object. `.denied` = NATIVE_TOOLS \
+ * (.allowed ∩ NATIVE_TOOLS) — the set of native tools explicitly not
+ * permitted on this session.
+ *
+ * This function is pure: no filesystem access, no globals. All I/O
+ * (parsing agent frontmatter) is the caller's responsibility via
+ * `parseAgentTools`.
+ */
+export function computeScope(input: ScopeInput): Scope {
+  if (!isKnownStage(input.stage)) {
+    throw new ValidationError(
+      `unknown stage: ${String(input.stage)}`,
+      'INVALID_STAGE',
+      { stage: input.stage, knownStages: [...KNOWN_STAGES] },
+    );
+  }
+  const stage: Stage = input.stage;
+  const stageDefault = STAGE_SCOPES[stage];
+  // Determine base allowed list.
+  //   agentTools present (non-null/undefined) → use it (even if empty).
+  //   agentTools absent → use stage default.
+  let base: readonly string[];
+  if (input.agentTools !== undefined && input.agentTools !== null) {
+    base = input.agentTools;
+  } else {
+    base = stageDefault.allowed;
+  }
+  // Union with caller-supplied additional tools (typically MCP).
+  const additional: readonly string[] = input.additional ?? [];
+  const combined: readonly string[] = normalize([...base, ...additional]);
+  // Derive denied = NATIVE_TOOLS \ (combined ∩ NATIVE_TOOLS).
+  const nativeAllowed: Set<string> = new Set(
+    combined.filter((t) => isNativeTool(t)),
+  );
+  const denied: readonly string[] = Object.freeze(
+    NATIVE_TOOLS.filter((t) => !nativeAllowed.has(t)).slice().sort(),
+  );
+  // bashMutation tracks the stage-level flag (authoritative this phase;
+  // agent overrides do not propagate here — Phase 22 revisits).
+  return Object.freeze({
+    stage,
+    allowed: combined,
+    denied,
+    bashMutation: stageDefault.bashMutation,
+  });
+}
+// ---------------------------------------------------------------------------
+// checkTool — pure predicate
+// ---------------------------------------------------------------------------
+/**
+ * Validate that `requestedTool` is permitted by `scope`. Returns a
+ * `ScopeViolation` when denied; `null` when allowed.
+ *
+ * MCP tools (`mcp__*`) always pass — MCP servers declare their own
+ * security perimeter. The stage filter only gates native harness tools.
+ */
+export function checkTool(
+  scope: Scope,
+  requestedTool: string,
+): ScopeViolation | null {
+  if (isMcpTool(requestedTool)) return null;
+  if (scope.allowed.includes(requestedTool)) return null;
+  return Object.freeze({
+    code: 'TOOL_NOT_ALLOWED' as const,
+    tool: requestedTool,
+    stage: scope.stage,
+    message:
+      `tool "${requestedTool}" is not permitted by the "${scope.stage}" scope ` +
+      `(allowed: ${scope.allowed.length === 0 ? '(empty — MCP only)' : scope.allowed.join(', ')})`,
+  });
+}
+// ---------------------------------------------------------------------------
+// enforceScope — throws on violation
+// ---------------------------------------------------------------------------
+/**
+ * Enforce scope at session creation: validates all caller-supplied
+ * tools against the scope and throws `ValidationError` on the first
+ * violation. Returns the validated allowed list, ready for
+ * `session-runner`'s `allowedTools` parameter.
+ *
+ * Throws:
+ *   * `ValidationError('INVALID_STAGE', ...)` — unknown stage name.
+ *   * `ValidationError('TOOL_NOT_ALLOWED', ...)` — additional tool
+ *     violates the effective scope (context = {stage, tool, allowed}).
+ *
+ * Empty allowed list is NOT an error here — MCP-only agents are a
+ * supported configuration.
+ */
+export function enforceScope(input: ScopeInput): readonly string[] {
+  // Compute the full post-merge scope (for the return value).
+  const scope: Scope = computeScope(input);
+  // Additional tools must be validated against the EFFECTIVE base scope
+  // (agent override or stage default) — NOT against the post-union
+  // scope, otherwise every additional tool would trivially pass because
+  // computeScope already folded it in.
+  //
+  // The "base" scope for this check is computeScope without `additional`.
+  const baseScope: Scope = computeScope({
+    stage: input.stage,
+    ...(input.agentTools !== undefined ? { agentTools: input.agentTools } : {}),
+  });
+  const additional: readonly string[] = input.additional ?? [];
+  for (const tool of additional) {
+    const violation: ScopeViolation | null = checkTool(baseScope, tool);
+    if (violation !== null) {
+      throw new ValidationError(violation.message, violation.code, {
+        stage: scope.stage,
+        tool,
+        allowed: [...baseScope.allowed],
+      });
+    }
+  }
+  return scope.allowed;
+}