npm - @hegemonart/get-design-done - Versions diffs - 1.20.0 → 1.21.0 - Mend

@hegemonart/get-design-done 1.20.0 → 1.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/.claude-plugin/marketplace.json +9 -12
package/.claude-plugin/plugin.json +8 -31
package/CHANGELOG.md +78 -0
package/README.md +48 -7
package/bin/gdd-sdk +55 -0
package/package.json +15 -47
package/reference/codex-tools.md +53 -0
package/reference/gemini-tools.md +53 -0
package/reference/registry.json +14 -0
package/scripts/e2e/run-headless.ts +514 -0
package/scripts/lib/cli/commands/audit.ts +382 -0
package/scripts/lib/cli/commands/init.ts +217 -0
package/scripts/lib/cli/commands/query.ts +329 -0
package/scripts/lib/cli/commands/run.ts +656 -0
package/scripts/lib/cli/commands/stage.ts +468 -0
package/scripts/lib/cli/index.ts +167 -0
package/scripts/lib/cli/parse-args.ts +336 -0
package/scripts/lib/context-engine/index.ts +116 -0
package/scripts/lib/context-engine/manifest.ts +69 -0
package/scripts/lib/context-engine/truncate.ts +282 -0
package/scripts/lib/context-engine/types.ts +59 -0
package/scripts/lib/discuss-parallel-runner/aggregator.ts +448 -0
package/scripts/lib/discuss-parallel-runner/discussants.ts +430 -0
package/scripts/lib/discuss-parallel-runner/index.ts +223 -0
package/scripts/lib/discuss-parallel-runner/types.ts +184 -0
package/scripts/lib/event-stream/index.ts +11 -1
package/scripts/lib/explore-parallel-runner/index.ts +294 -0
package/scripts/lib/explore-parallel-runner/mappers.ts +290 -0
package/scripts/lib/explore-parallel-runner/synthesizer.ts +295 -0
package/scripts/lib/explore-parallel-runner/types.ts +139 -0
package/scripts/lib/harness/detect.ts +90 -0
package/scripts/lib/harness/index.ts +64 -0
package/scripts/lib/harness/tool-map.ts +142 -0
package/scripts/lib/init-runner/index.ts +396 -0
package/scripts/lib/init-runner/researchers.ts +245 -0
package/scripts/lib/init-runner/scaffold.ts +224 -0
package/scripts/lib/init-runner/synthesizer.ts +224 -0
package/scripts/lib/init-runner/types.ts +143 -0
package/scripts/lib/logger/index.ts +251 -0
package/scripts/lib/logger/sinks.ts +269 -0
package/scripts/lib/logger/types.ts +110 -0
package/scripts/lib/pipeline-runner/human-gate.ts +134 -0
package/scripts/lib/pipeline-runner/index.ts +527 -0
package/scripts/lib/pipeline-runner/stage-handlers.ts +339 -0
package/scripts/lib/pipeline-runner/state-machine.ts +144 -0
package/scripts/lib/pipeline-runner/types.ts +183 -0
package/scripts/lib/session-runner/errors.ts +406 -0
package/scripts/lib/session-runner/index.ts +715 -0
package/scripts/lib/session-runner/transcript.ts +189 -0
package/scripts/lib/session-runner/types.ts +144 -0
package/scripts/lib/tool-scoping/index.ts +219 -0
package/scripts/lib/tool-scoping/parse-agent-tools.ts +207 -0
package/scripts/lib/tool-scoping/stage-scopes.ts +139 -0
package/scripts/lib/tool-scoping/types.ts +77 -0

package/scripts/lib/explore-parallel-runner/types.ts ADDED Viewed

@@ -0,0 +1,139 @@
+// scripts/lib/explore-parallel-runner/types.ts — Plan 21-06 (SDK-18).
+//
+// Public type surface for the 4-mapper parallel explore runner. Consumers
+// import from ./index.ts (the barrel); this file carries the type graph
+// so mappers.ts / synthesizer.ts / index.ts can share a single source of
+// truth without cyclic imports.
+//
+// Design notes:
+//   * `MapperName` is a closed union. Extending it requires touching the
+//     DEFAULT_MAPPERS table in index.ts, so a type-level addition forces
+//     a compile error on any registry-consuming site.
+//   * `ExploreRunnerOptions.runOverride` mirrors `SessionRunnerOptions.queryOverride`
+//     at a higher altitude — it replaces the whole `session-runner.run()`
+//     call per-mapper/per-synthesizer. Tests build a deterministic
+//     `runOverride` that returns canned `SessionResult`s without touching
+//     the Agent SDK.
+//   * All numeric fields in usage/outcome default to 0 on unpopulated
+//     sessions; we never surface `undefined` in `total_usage` math.
+//   * `parallel_count` + `serial_count` together === `specs.length`
+//     (after parallelism_safe partitioning). Synthesizer is counted
+//     separately under `.synthesizer`.
+import type {
+  BudgetCap,
+  SessionResult,
+  SessionRunnerOptions,
+} from '../session-runner/types.ts';
+/** Closed union of mapper roster. Changing this forces a compile-time
+ *  touch of `DEFAULT_MAPPERS` in index.ts — a deliberate choke point
+ *  so the roster stays the Phase-21 Locked 4. */
+export type MapperName =
+  | 'token'
+  | 'component-taxonomy'
+  | 'a11y'
+  | 'visual-hierarchy';
+/**
+ * A single mapper invocation specification. The runner does NOT own
+ * prompt construction — callers assemble the prompt body from their
+ * `DESIGN-CONTEXT.md` + roster scaffolding and pass it through here.
+ */
+export interface MapperSpec {
+  /** Mapper identifier — also used in event payloads + log scopes. */
+  readonly name: MapperName;
+  /** Path to `agents/<name>.md`. Missing file is tolerated (stage default). */
+  readonly agentPath: string;
+  /** Expected mapper output file, e.g. `.design/map/token.md`. */
+  readonly outputPath: string;
+  /** Per-mapper prompt body passed to session-runner. */
+  readonly prompt: string;
+}
+/**
+ * Terminal record for a single mapper invocation. `output_exists` /
+ * `output_bytes` are captured AFTER the session terminates — if the
+ * mapper wrote its file but the session errored in cleanup, we still
+ * surface the file's presence.
+ *
+ * `error` is populated iff `status === 'error'`.
+ */
+export interface MapperOutcome {
+  readonly name: MapperName;
+  readonly status: 'completed' | 'error' | 'skipped';
+  readonly output_exists: boolean;
+  readonly output_bytes: number;
+  readonly usage: {
+    readonly input_tokens: number;
+    readonly output_tokens: number;
+    readonly usd_cost: number;
+  };
+  readonly duration_ms: number;
+  readonly error?: { readonly code: string; readonly message: string };
+}
+/**
+ * Caller-facing run options for the explore runner. Most fields mirror
+ * session-runner's BudgetCap / turn caps but applied per-mapper.
+ *
+ * `runOverride`: when supplied, the runner invokes this instead of the
+ * real `session-runner.run()`. Tests build overrides that return canned
+ * `SessionResult`s without touching the Agent SDK.
+ */
+export interface ExploreRunnerOptions {
+  /** Override the mapper roster. Defaults to DEFAULT_MAPPERS (the locked 4). */
+  readonly mappers?: readonly MapperSpec[];
+  /** Per-mapper hard budget cap. Shared envelope — does NOT pool across mappers. */
+  readonly budget: BudgetCap;
+  /** Per-mapper turn cap. */
+  readonly maxTurnsPerMapper: number;
+  /** Max concurrent mappers. Defaults to 4 (the full locked roster). */
+  readonly concurrency?: number;
+  /** Prompt body for the synthesizer session. */
+  readonly synthesizerPrompt: string;
+  /** Budget cap for the synthesizer session. */
+  readonly synthesizerBudget: BudgetCap;
+  /** Turn cap for the synthesizer session. */
+  readonly synthesizerMaxTurns: number;
+  /**
+   * Replace the session-runner.run() call entirely (test injection). Each
+   * mapper + the synthesizer each consume one invocation of this override.
+   */
+  readonly runOverride?: (
+    opts: SessionRunnerOptions,
+  ) => Promise<SessionResult>;
+  /** Current working directory used for path resolution (fixtures / agent files). */
+  readonly cwd?: string;
+  /** Override the file-stability polling interval (ms). Default 200. */
+  readonly pollIntervalMs?: number;
+  /** Override the file-watch timeout (ms). Default 600_000 (10 min). */
+  readonly timeoutMs?: number;
+}
+/**
+ * Terminal record for the whole runner invocation. `parallel_count` +
+ * `serial_count` === mappers.length (after parallelism_safe partitioning).
+ * `total_usage` aggregates mappers + synthesizer.
+ */
+export interface ExploreRunnerResult {
+  readonly mappers: readonly MapperOutcome[];
+  readonly synthesizer: {
+    readonly status: 'completed' | 'error' | 'skipped' | 'timeout';
+    readonly output_path: string;
+    readonly usage: {
+      readonly input_tokens: number;
+      readonly output_tokens: number;
+      readonly usd_cost: number;
+    };
+    readonly files_fed: readonly string[];
+    readonly error?: { readonly code: string; readonly message: string };
+  };
+  readonly parallel_count: number;
+  readonly serial_count: number;
+  readonly total_usage: {
+    readonly input_tokens: number;
+    readonly output_tokens: number;
+    readonly usd_cost: number;
+  };
+}

package/scripts/lib/harness/detect.ts ADDED Viewed

@@ -0,0 +1,90 @@
+// scripts/lib/harness/detect.ts — Plan 21-10 (SDK-22 / SDK-23).
+//
+// Harness detection runtime. Inspects the process env (or an injected
+// env map for tests) and reports which agent harness the current process
+// is running inside: Claude Code, OpenAI Codex CLI, Google Gemini CLI,
+// or `unknown` when no harness can be identified.
+//
+// Precedence (highest wins):
+//   1. `GDD_HARNESS` — explicit override. Accepts 'claude-code' | 'codex'
+//      | 'gemini' | 'unknown' verbatim. Anything else → 'unknown'.
+//   2. `CLAUDECODE=1` OR `CLAUDE_CODE=1` → 'claude-code'.
+//   3. `CODEX_CLI_VERSION` set (any truthy value) → 'codex'.
+//   4. `GEMINI_CLI_VERSION` set (any truthy value) → 'gemini'.
+//   5. Fallback → 'unknown'.
+//
+// This module is pure — no side effects, no caching. Callers that want
+// process-wide caching go through `scripts/lib/harness/index.ts`, which
+// layers a `currentHarness()` helper on top.
+export type Harness = 'claude-code' | 'codex' | 'gemini' | 'unknown';
+/** The four canonical harness identifiers this plugin recognizes. */
+export const KNOWN_HARNESSES: readonly Harness[] = Object.freeze([
+  'claude-code',
+  'codex',
+  'gemini',
+  'unknown',
+]);
+/**
+ * Detect which agent harness the current process is running inside.
+ *
+ * Reads the supplied env map (defaults to `process.env`). Precedence is
+ * documented at the top of this file — the explicit `GDD_HARNESS` override
+ * wins over implicit env-var detection to make tests and simulated fixtures
+ * deterministic.
+ */
+export function detectHarness(env?: NodeJS.ProcessEnv): Harness {
+  const e: NodeJS.ProcessEnv = env ?? process.env;
+  // 1. Explicit override wins.
+  const override: string | undefined = e.GDD_HARNESS;
+  if (override !== undefined && override !== '') {
+    if (isHarness(override)) return override;
+    // Any other non-empty string → 'unknown' (override is present but invalid).
+    return 'unknown';
+  }
+  // 2. Claude Code: either CLAUDECODE or CLAUDE_CODE set to "1".
+  if (e.CLAUDECODE === '1' || e.CLAUDE_CODE === '1') {
+    return 'claude-code';
+  }
+  // 3. Codex: CODEX_CLI_VERSION present (any non-empty value).
+  if (e.CODEX_CLI_VERSION !== undefined && e.CODEX_CLI_VERSION !== '') {
+    return 'codex';
+  }
+  // 4. Gemini: GEMINI_CLI_VERSION present (any non-empty value).
+  if (e.GEMINI_CLI_VERSION !== undefined && e.GEMINI_CLI_VERSION !== '') {
+    return 'gemini';
+  }
+  return 'unknown';
+}
+/**
+ * True for harnesses that this plugin fully supports (Claude Code, Codex,
+ * Gemini). False for `'unknown'`.
+ *
+ * Callers use this as a gate before invoking harness-specific code paths;
+ * an unknown harness falls back to CC-native tool names (see
+ * `tool-map.ts TOOL_MAPS.unknown`).
+ */
+export function isSupportedHarness(h: Harness): boolean {
+  return h === 'claude-code' || h === 'codex' || h === 'gemini';
+}
+/**
+ * Narrow an arbitrary string to the `Harness` union. Not exported — used
+ * only by `detectHarness` to validate the `GDD_HARNESS` override.
+ */
+function isHarness(s: string): s is Harness {
+  return (
+    s === 'claude-code' ||
+    s === 'codex' ||
+    s === 'gemini' ||
+    s === 'unknown'
+  );
+}

package/scripts/lib/harness/index.ts ADDED Viewed

@@ -0,0 +1,64 @@
+// scripts/lib/harness/index.ts — Plan 21-10 (SDK-22 / SDK-23).
+//
+// Public API for the harness module. Re-exports the detect + tool-map
+// surfaces and adds a `currentHarness()` helper that caches the
+// first-call result for the life of the process.
+//
+// Why cache? Harness identity is a process-scoped invariant. The env
+// vars that drive detection (CLAUDECODE, CODEX_CLI_VERSION,
+// GEMINI_CLI_VERSION, GDD_HARNESS) are set by the harness when it
+// spawns us — they do not change mid-process. Repeated env reads are
+// cheap but the cache avoids any chance of divergent reads if a
+// downstream caller mutates process.env (tests sometimes do this, and
+// we want `currentHarness()` to stay monotonic within a test unless
+// `resetHarnessCache()` is called explicitly).
+//
+// Use `resetHarnessCache()` in test `beforeEach` to re-read env after
+// mutating it.
+import { detectHarness, isSupportedHarness, type Harness } from './detect.ts';
+export { detectHarness, isSupportedHarness, type Harness } from './detect.ts';
+export {
+  TOOL_MAPS,
+  mapTool,
+  reverseMapTool,
+  CC_TOOLS,
+  type CCTool,
+} from './tool-map.ts';
+let cached: Harness | undefined = undefined;
+/**
+ * Cached harness lookup. On first call, reads `process.env` via
+ * `detectHarness()` and stores the result. Every subsequent call
+ * returns the cached value, regardless of later env mutations.
+ *
+ * Call `resetHarnessCache()` to force a re-read.
+ */
+export function currentHarness(): Harness {
+  if (cached === undefined) {
+    cached = detectHarness(process.env);
+  }
+  return cached;
+}
+/**
+ * Clear the `currentHarness()` cache. Tests that mutate `process.env`
+ * between cases should call this in `beforeEach` (or equivalently)
+ * so each case sees a fresh detection.
+ */
+export function resetHarnessCache(): void {
+  cached = undefined;
+}
+/**
+ * True when the currently detected harness exposes MCP protocol support.
+ * Used by gdd-sdk audit to decide whether to spawn the gdd-state MCP
+ * server or import handlers directly.
+ *
+ * Claude Code, Codex, and Gemini all speak MCP; only `'unknown'` does not.
+ */
+export function harnessSupportsMCP(): boolean {
+  return isSupportedHarness(currentHarness());
+}

package/scripts/lib/harness/tool-map.ts ADDED Viewed

@@ -0,0 +1,142 @@
+// scripts/lib/harness/tool-map.ts — Plan 21-10 (SDK-22 / SDK-23).
+//
+// Cross-harness tool-name lookup table + helpers. Given a Claude Code
+// tool name (`Read`, `Write`, `Edit`, etc.) and a harness identifier,
+// return the native tool name on that harness. Also supports inverse
+// mapping (harness-native name → CC name).
+//
+// The maps are frozen (Object.freeze at two levels) — consumers cannot
+// accidentally mutate the tables. Any mutation attempt throws in strict
+// mode, silently no-ops otherwise; tests assert frozenness explicitly
+// to lock the invariant.
+//
+// Task spawning — the CC `Task` tool has no direct native equivalent on
+// Codex or Gemini (both require spawning a nested CLI instance as a
+// shell subprocess rather than a tool call). The map returns `null`
+// for those slots; callers check for null and fall back to a
+// `shell("npx gdd-sdk …")` invocation. See AGENTS.md / GEMINI.md.
+import type { Harness } from './detect.ts';
+/**
+ * Canonical Claude Code tool names the plugin references in skill prose.
+ * This is the shape against which per-harness maps are typed so that
+ * adding a new CC tool to the canonical set fails TSC on every harness
+ * map that forgets to include it.
+ */
+export type CCTool =
+  | 'Read'
+  | 'Write'
+  | 'Edit'
+  | 'Bash'
+  | 'Grep'
+  | 'Glob'
+  | 'Task'
+  | 'WebSearch'
+  | 'WebFetch';
+/** All nine CC tool names — useful for iteration in tests. */
+export const CC_TOOLS: readonly CCTool[] = Object.freeze([
+  'Read',
+  'Write',
+  'Edit',
+  'Bash',
+  'Grep',
+  'Glob',
+  'Task',
+  'WebSearch',
+  'WebFetch',
+]);
+/**
+ * Per-harness lookup. Each entry maps every CC tool to its native name
+ * on that harness, or `null` when the harness has no direct equivalent
+ * (currently only `Task` on Codex + Gemini).
+ *
+ * The `unknown` row falls back to CC names — callers that cannot identify
+ * the harness get a reasonable default that works on Claude Code and
+ * fails loudly on any other harness (the harness will refuse an
+ * unrecognized tool call).
+ */
+export const TOOL_MAPS: Readonly<Record<Harness, Readonly<Record<CCTool, string | null>>>> = Object.freeze({
+  'claude-code': Object.freeze({
+    Read: 'Read',
+    Write: 'Write',
+    Edit: 'Edit',
+    Bash: 'Bash',
+    Grep: 'Grep',
+    Glob: 'Glob',
+    Task: 'Task',
+    WebSearch: 'WebSearch',
+    WebFetch: 'WebFetch',
+  }),
+  codex: Object.freeze({
+    Read: 'read_file',
+    Write: 'apply_patch',
+    Edit: 'apply_patch',
+    Bash: 'shell',
+    Grep: 'shell',
+    Glob: 'shell',
+    Task: null, // no native Task; use CLI subprocess
+    WebSearch: 'web_search',
+    WebFetch: 'shell',
+  }),
+  gemini: Object.freeze({
+    Read: 'read_file',
+    Write: 'write_file',
+    Edit: 'replace',
+    Bash: 'run_shell_command',
+    Grep: 'search_file_content',
+    Glob: 'glob',
+    Task: null, // no native Task; use CLI subprocess
+    WebSearch: 'google_web_search',
+    WebFetch: 'web_fetch',
+  }),
+  unknown: Object.freeze({
+    Read: 'Read',
+    Write: 'Write',
+    Edit: 'Edit',
+    Bash: 'Bash',
+    Grep: 'Grep',
+    Glob: 'Glob',
+    Task: 'Task',
+    WebSearch: 'WebSearch',
+    WebFetch: 'WebFetch',
+  }),
+});
+/**
+ * Return the harness-specific tool name for a CC tool. Returns `null`
+ * when the harness has no native equivalent — currently only `Task` on
+ * Codex + Gemini. Callers that receive `null` should fall back to a
+ * `shell`/`run_shell_command` invocation of `npx gdd-sdk …`.
+ */
+export function mapTool(harness: Harness, ccTool: CCTool): string | null {
+  const row = TOOL_MAPS[harness];
+  // Every Harness key is present in TOOL_MAPS by construction (the type
+  // forces it). The index result under `noUncheckedIndexedAccess` is
+  // still `string | null | undefined`; narrow with a hasOwnProperty
+  // check to keep TSC happy.
+  const native: string | null | undefined = row[ccTool];
+  return native ?? null;
+}
+/**
+ * Inverse of `mapTool` — given a harness-native tool name (e.g.
+ * `'read_file'`), return the CC tool it came from (`'Read'`). Returns
+ * `null` when the native name is not in the harness's map at all.
+ *
+ * Note: on Codex several CC tools share a native name (`Write`, `Edit`,
+ * `WebFetch` all share `apply_patch` / `shell`). The reverse mapper
+ * returns the FIRST CC match walking in declaration order
+ * (`Read` → `Write` → `Edit` → …) — callers that need disambiguation
+ * between, e.g., create vs. update must consult the full forward map
+ * or inspect tool-call arguments.
+ */
+export function reverseMapTool(harness: Harness, nativeName: string): CCTool | null {
+  const row = TOOL_MAPS[harness];
+  for (const cc of CC_TOOLS) {
+    if (row[cc] === nativeName) return cc;
+  }
+  return null;
+}