@hegemonart/get-design-done 1.20.0 → 1.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/.claude-plugin/marketplace.json +9 -12
  2. package/.claude-plugin/plugin.json +8 -31
  3. package/CHANGELOG.md +78 -0
  4. package/README.md +48 -7
  5. package/bin/gdd-sdk +55 -0
  6. package/package.json +15 -47
  7. package/reference/codex-tools.md +53 -0
  8. package/reference/gemini-tools.md +53 -0
  9. package/reference/registry.json +14 -0
  10. package/scripts/e2e/run-headless.ts +514 -0
  11. package/scripts/lib/cli/commands/audit.ts +382 -0
  12. package/scripts/lib/cli/commands/init.ts +217 -0
  13. package/scripts/lib/cli/commands/query.ts +329 -0
  14. package/scripts/lib/cli/commands/run.ts +656 -0
  15. package/scripts/lib/cli/commands/stage.ts +468 -0
  16. package/scripts/lib/cli/index.ts +167 -0
  17. package/scripts/lib/cli/parse-args.ts +336 -0
  18. package/scripts/lib/context-engine/index.ts +116 -0
  19. package/scripts/lib/context-engine/manifest.ts +69 -0
  20. package/scripts/lib/context-engine/truncate.ts +282 -0
  21. package/scripts/lib/context-engine/types.ts +59 -0
  22. package/scripts/lib/discuss-parallel-runner/aggregator.ts +448 -0
  23. package/scripts/lib/discuss-parallel-runner/discussants.ts +430 -0
  24. package/scripts/lib/discuss-parallel-runner/index.ts +223 -0
  25. package/scripts/lib/discuss-parallel-runner/types.ts +184 -0
  26. package/scripts/lib/event-stream/index.ts +11 -1
  27. package/scripts/lib/explore-parallel-runner/index.ts +294 -0
  28. package/scripts/lib/explore-parallel-runner/mappers.ts +290 -0
  29. package/scripts/lib/explore-parallel-runner/synthesizer.ts +295 -0
  30. package/scripts/lib/explore-parallel-runner/types.ts +139 -0
  31. package/scripts/lib/harness/detect.ts +90 -0
  32. package/scripts/lib/harness/index.ts +64 -0
  33. package/scripts/lib/harness/tool-map.ts +142 -0
  34. package/scripts/lib/init-runner/index.ts +396 -0
  35. package/scripts/lib/init-runner/researchers.ts +245 -0
  36. package/scripts/lib/init-runner/scaffold.ts +224 -0
  37. package/scripts/lib/init-runner/synthesizer.ts +224 -0
  38. package/scripts/lib/init-runner/types.ts +143 -0
  39. package/scripts/lib/logger/index.ts +251 -0
  40. package/scripts/lib/logger/sinks.ts +269 -0
  41. package/scripts/lib/logger/types.ts +110 -0
  42. package/scripts/lib/pipeline-runner/human-gate.ts +134 -0
  43. package/scripts/lib/pipeline-runner/index.ts +527 -0
  44. package/scripts/lib/pipeline-runner/stage-handlers.ts +339 -0
  45. package/scripts/lib/pipeline-runner/state-machine.ts +144 -0
  46. package/scripts/lib/pipeline-runner/types.ts +183 -0
  47. package/scripts/lib/session-runner/errors.ts +406 -0
  48. package/scripts/lib/session-runner/index.ts +715 -0
  49. package/scripts/lib/session-runner/transcript.ts +189 -0
  50. package/scripts/lib/session-runner/types.ts +144 -0
  51. package/scripts/lib/tool-scoping/index.ts +219 -0
  52. package/scripts/lib/tool-scoping/parse-agent-tools.ts +207 -0
  53. package/scripts/lib/tool-scoping/stage-scopes.ts +139 -0
  54. package/scripts/lib/tool-scoping/types.ts +77 -0
@@ -0,0 +1,139 @@
1
+ // scripts/lib/explore-parallel-runner/types.ts — Plan 21-06 (SDK-18).
2
+ //
3
+ // Public type surface for the 4-mapper parallel explore runner. Consumers
4
+ // import from ./index.ts (the barrel); this file carries the type graph
5
+ // so mappers.ts / synthesizer.ts / index.ts can share a single source of
6
+ // truth without cyclic imports.
7
+ //
8
+ // Design notes:
9
+ // * `MapperName` is a closed union. Extending it requires touching the
10
+ // DEFAULT_MAPPERS table in index.ts, so a type-level addition forces
11
+ // a compile error on any registry-consuming site.
12
+ // * `ExploreRunnerOptions.runOverride` mirrors `SessionRunnerOptions.queryOverride`
13
+ // at a higher altitude — it replaces the whole `session-runner.run()`
14
+ // call per-mapper/per-synthesizer. Tests build a deterministic
15
+ // `runOverride` that returns canned `SessionResult`s without touching
16
+ // the Agent SDK.
17
+ // * All numeric fields in usage/outcome default to 0 on unpopulated
18
+ // sessions; we never surface `undefined` in `total_usage` math.
19
+ // * `parallel_count` + `serial_count` together === `specs.length`
20
+ // (after parallelism_safe partitioning). Synthesizer is counted
21
+ // separately under `.synthesizer`.
22
+
23
+ import type {
24
+ BudgetCap,
25
+ SessionResult,
26
+ SessionRunnerOptions,
27
+ } from '../session-runner/types.ts';
28
+
29
+ /** Closed union of mapper roster. Changing this forces a compile-time
30
+ * touch of `DEFAULT_MAPPERS` in index.ts — a deliberate choke point
31
+ * so the roster stays the Phase-21 Locked 4. */
32
+ export type MapperName =
33
+ | 'token'
34
+ | 'component-taxonomy'
35
+ | 'a11y'
36
+ | 'visual-hierarchy';
37
+
38
+ /**
39
+ * A single mapper invocation specification. The runner does NOT own
40
+ * prompt construction — callers assemble the prompt body from their
41
+ * `DESIGN-CONTEXT.md` + roster scaffolding and pass it through here.
42
+ */
43
+ export interface MapperSpec {
44
+ /** Mapper identifier — also used in event payloads + log scopes. */
45
+ readonly name: MapperName;
46
+ /** Path to `agents/<name>.md`. Missing file is tolerated (stage default). */
47
+ readonly agentPath: string;
48
+ /** Expected mapper output file, e.g. `.design/map/token.md`. */
49
+ readonly outputPath: string;
50
+ /** Per-mapper prompt body passed to session-runner. */
51
+ readonly prompt: string;
52
+ }
53
+
54
+ /**
55
+ * Terminal record for a single mapper invocation. `output_exists` /
56
+ * `output_bytes` are captured AFTER the session terminates — if the
57
+ * mapper wrote its file but the session errored in cleanup, we still
58
+ * surface the file's presence.
59
+ *
60
+ * `error` is populated iff `status === 'error'`.
61
+ */
62
+ export interface MapperOutcome {
63
+ readonly name: MapperName;
64
+ readonly status: 'completed' | 'error' | 'skipped';
65
+ readonly output_exists: boolean;
66
+ readonly output_bytes: number;
67
+ readonly usage: {
68
+ readonly input_tokens: number;
69
+ readonly output_tokens: number;
70
+ readonly usd_cost: number;
71
+ };
72
+ readonly duration_ms: number;
73
+ readonly error?: { readonly code: string; readonly message: string };
74
+ }
75
+
76
+ /**
77
+ * Caller-facing run options for the explore runner. Most fields mirror
78
+ * session-runner's BudgetCap / turn caps but applied per-mapper.
79
+ *
80
+ * `runOverride`: when supplied, the runner invokes this instead of the
81
+ * real `session-runner.run()`. Tests build overrides that return canned
82
+ * `SessionResult`s without touching the Agent SDK.
83
+ */
84
+ export interface ExploreRunnerOptions {
85
+ /** Override the mapper roster. Defaults to DEFAULT_MAPPERS (the locked 4). */
86
+ readonly mappers?: readonly MapperSpec[];
87
+ /** Per-mapper hard budget cap. Shared envelope — does NOT pool across mappers. */
88
+ readonly budget: BudgetCap;
89
+ /** Per-mapper turn cap. */
90
+ readonly maxTurnsPerMapper: number;
91
+ /** Max concurrent mappers. Defaults to 4 (the full locked roster). */
92
+ readonly concurrency?: number;
93
+ /** Prompt body for the synthesizer session. */
94
+ readonly synthesizerPrompt: string;
95
+ /** Budget cap for the synthesizer session. */
96
+ readonly synthesizerBudget: BudgetCap;
97
+ /** Turn cap for the synthesizer session. */
98
+ readonly synthesizerMaxTurns: number;
99
+ /**
100
+ * Replace the session-runner.run() call entirely (test injection). Each
101
+ * mapper + the synthesizer each consume one invocation of this override.
102
+ */
103
+ readonly runOverride?: (
104
+ opts: SessionRunnerOptions,
105
+ ) => Promise<SessionResult>;
106
+ /** Current working directory used for path resolution (fixtures / agent files). */
107
+ readonly cwd?: string;
108
+ /** Override the file-stability polling interval (ms). Default 200. */
109
+ readonly pollIntervalMs?: number;
110
+ /** Override the file-watch timeout (ms). Default 600_000 (10 min). */
111
+ readonly timeoutMs?: number;
112
+ }
113
+
114
+ /**
115
+ * Terminal record for the whole runner invocation. `parallel_count` +
116
+ * `serial_count` === mappers.length (after parallelism_safe partitioning).
117
+ * `total_usage` aggregates mappers + synthesizer.
118
+ */
119
+ export interface ExploreRunnerResult {
120
+ readonly mappers: readonly MapperOutcome[];
121
+ readonly synthesizer: {
122
+ readonly status: 'completed' | 'error' | 'skipped' | 'timeout';
123
+ readonly output_path: string;
124
+ readonly usage: {
125
+ readonly input_tokens: number;
126
+ readonly output_tokens: number;
127
+ readonly usd_cost: number;
128
+ };
129
+ readonly files_fed: readonly string[];
130
+ readonly error?: { readonly code: string; readonly message: string };
131
+ };
132
+ readonly parallel_count: number;
133
+ readonly serial_count: number;
134
+ readonly total_usage: {
135
+ readonly input_tokens: number;
136
+ readonly output_tokens: number;
137
+ readonly usd_cost: number;
138
+ };
139
+ }
@@ -0,0 +1,90 @@
1
+ // scripts/lib/harness/detect.ts — Plan 21-10 (SDK-22 / SDK-23).
2
+ //
3
+ // Harness detection runtime. Inspects the process env (or an injected
4
+ // env map for tests) and reports which agent harness the current process
5
+ // is running inside: Claude Code, OpenAI Codex CLI, Google Gemini CLI,
6
+ // or `unknown` when no harness can be identified.
7
+ //
8
+ // Precedence (highest wins):
9
+ // 1. `GDD_HARNESS` — explicit override. Accepts 'claude-code' | 'codex'
10
+ // | 'gemini' | 'unknown' verbatim. Anything else → 'unknown'.
11
+ // 2. `CLAUDECODE=1` OR `CLAUDE_CODE=1` → 'claude-code'.
12
+ // 3. `CODEX_CLI_VERSION` set (any truthy value) → 'codex'.
13
+ // 4. `GEMINI_CLI_VERSION` set (any truthy value) → 'gemini'.
14
+ // 5. Fallback → 'unknown'.
15
+ //
16
+ // This module is pure — no side effects, no caching. Callers that want
17
+ // process-wide caching go through `scripts/lib/harness/index.ts`, which
18
+ // layers a `currentHarness()` helper on top.
19
+
20
+ export type Harness = 'claude-code' | 'codex' | 'gemini' | 'unknown';
21
+
22
+ /** The four canonical harness identifiers this plugin recognizes. */
23
+ export const KNOWN_HARNESSES: readonly Harness[] = Object.freeze([
24
+ 'claude-code',
25
+ 'codex',
26
+ 'gemini',
27
+ 'unknown',
28
+ ]);
29
+
30
+ /**
31
+ * Detect which agent harness the current process is running inside.
32
+ *
33
+ * Reads the supplied env map (defaults to `process.env`). Precedence is
34
+ * documented at the top of this file — the explicit `GDD_HARNESS` override
35
+ * wins over implicit env-var detection to make tests and simulated fixtures
36
+ * deterministic.
37
+ */
38
+ export function detectHarness(env?: NodeJS.ProcessEnv): Harness {
39
+ const e: NodeJS.ProcessEnv = env ?? process.env;
40
+
41
+ // 1. Explicit override wins.
42
+ const override: string | undefined = e.GDD_HARNESS;
43
+ if (override !== undefined && override !== '') {
44
+ if (isHarness(override)) return override;
45
+ // Any other non-empty string → 'unknown' (override is present but invalid).
46
+ return 'unknown';
47
+ }
48
+
49
+ // 2. Claude Code: either CLAUDECODE or CLAUDE_CODE set to "1".
50
+ if (e.CLAUDECODE === '1' || e.CLAUDE_CODE === '1') {
51
+ return 'claude-code';
52
+ }
53
+
54
+ // 3. Codex: CODEX_CLI_VERSION present (any non-empty value).
55
+ if (e.CODEX_CLI_VERSION !== undefined && e.CODEX_CLI_VERSION !== '') {
56
+ return 'codex';
57
+ }
58
+
59
+ // 4. Gemini: GEMINI_CLI_VERSION present (any non-empty value).
60
+ if (e.GEMINI_CLI_VERSION !== undefined && e.GEMINI_CLI_VERSION !== '') {
61
+ return 'gemini';
62
+ }
63
+
64
+ return 'unknown';
65
+ }
66
+
67
+ /**
68
+ * True for harnesses that this plugin fully supports (Claude Code, Codex,
69
+ * Gemini). False for `'unknown'`.
70
+ *
71
+ * Callers use this as a gate before invoking harness-specific code paths;
72
+ * an unknown harness falls back to CC-native tool names (see
73
+ * `tool-map.ts TOOL_MAPS.unknown`).
74
+ */
75
+ export function isSupportedHarness(h: Harness): boolean {
76
+ return h === 'claude-code' || h === 'codex' || h === 'gemini';
77
+ }
78
+
79
+ /**
80
+ * Narrow an arbitrary string to the `Harness` union. Not exported — used
81
+ * only by `detectHarness` to validate the `GDD_HARNESS` override.
82
+ */
83
+ function isHarness(s: string): s is Harness {
84
+ return (
85
+ s === 'claude-code' ||
86
+ s === 'codex' ||
87
+ s === 'gemini' ||
88
+ s === 'unknown'
89
+ );
90
+ }
@@ -0,0 +1,64 @@
1
+ // scripts/lib/harness/index.ts — Plan 21-10 (SDK-22 / SDK-23).
2
+ //
3
+ // Public API for the harness module. Re-exports the detect + tool-map
4
+ // surfaces and adds a `currentHarness()` helper that caches the
5
+ // first-call result for the life of the process.
6
+ //
7
+ // Why cache? Harness identity is a process-scoped invariant. The env
8
+ // vars that drive detection (CLAUDECODE, CODEX_CLI_VERSION,
9
+ // GEMINI_CLI_VERSION, GDD_HARNESS) are set by the harness when it
10
+ // spawns us — they do not change mid-process. Repeated env reads are
11
+ // cheap but the cache avoids any chance of divergent reads if a
12
+ // downstream caller mutates process.env (tests sometimes do this, and
13
+ // we want `currentHarness()` to stay monotonic within a test unless
14
+ // `resetHarnessCache()` is called explicitly).
15
+ //
16
+ // Use `resetHarnessCache()` in test `beforeEach` to re-read env after
17
+ // mutating it.
18
+
19
+ import { detectHarness, isSupportedHarness, type Harness } from './detect.ts';
20
+
21
+ export { detectHarness, isSupportedHarness, type Harness } from './detect.ts';
22
+ export {
23
+ TOOL_MAPS,
24
+ mapTool,
25
+ reverseMapTool,
26
+ CC_TOOLS,
27
+ type CCTool,
28
+ } from './tool-map.ts';
29
+
30
+ let cached: Harness | undefined = undefined;
31
+
32
+ /**
33
+ * Cached harness lookup. On first call, reads `process.env` via
34
+ * `detectHarness()` and stores the result. Every subsequent call
35
+ * returns the cached value, regardless of later env mutations.
36
+ *
37
+ * Call `resetHarnessCache()` to force a re-read.
38
+ */
39
+ export function currentHarness(): Harness {
40
+ if (cached === undefined) {
41
+ cached = detectHarness(process.env);
42
+ }
43
+ return cached;
44
+ }
45
+
46
+ /**
47
+ * Clear the `currentHarness()` cache. Tests that mutate `process.env`
48
+ * between cases should call this in `beforeEach` (or equivalently)
49
+ * so each case sees a fresh detection.
50
+ */
51
+ export function resetHarnessCache(): void {
52
+ cached = undefined;
53
+ }
54
+
55
+ /**
56
+ * True when the currently detected harness exposes MCP protocol support.
57
+ * Used by gdd-sdk audit to decide whether to spawn the gdd-state MCP
58
+ * server or import handlers directly.
59
+ *
60
+ * Claude Code, Codex, and Gemini all speak MCP; only `'unknown'` does not.
61
+ */
62
+ export function harnessSupportsMCP(): boolean {
63
+ return isSupportedHarness(currentHarness());
64
+ }
@@ -0,0 +1,142 @@
1
+ // scripts/lib/harness/tool-map.ts — Plan 21-10 (SDK-22 / SDK-23).
2
+ //
3
+ // Cross-harness tool-name lookup table + helpers. Given a Claude Code
4
+ // tool name (`Read`, `Write`, `Edit`, etc.) and a harness identifier,
5
+ // return the native tool name on that harness. Also supports inverse
6
+ // mapping (harness-native name → CC name).
7
+ //
8
+ // The maps are frozen (Object.freeze at two levels) — consumers cannot
9
+ // accidentally mutate the tables. Any mutation attempt throws in strict
10
+ // mode, silently no-ops otherwise; tests assert frozenness explicitly
11
+ // to lock the invariant.
12
+ //
13
+ // Task spawning — the CC `Task` tool has no direct native equivalent on
14
+ // Codex or Gemini (both require spawning a nested CLI instance as a
15
+ // shell subprocess rather than a tool call). The map returns `null`
16
+ // for those slots; callers check for null and fall back to a
17
+ // `shell("npx gdd-sdk …")` invocation. See AGENTS.md / GEMINI.md.
18
+
19
+ import type { Harness } from './detect.ts';
20
+
21
+ /**
22
+ * Canonical Claude Code tool names the plugin references in skill prose.
23
+ * This is the shape against which per-harness maps are typed so that
24
+ * adding a new CC tool to the canonical set fails TSC on every harness
25
+ * map that forgets to include it.
26
+ */
27
+ export type CCTool =
28
+ | 'Read'
29
+ | 'Write'
30
+ | 'Edit'
31
+ | 'Bash'
32
+ | 'Grep'
33
+ | 'Glob'
34
+ | 'Task'
35
+ | 'WebSearch'
36
+ | 'WebFetch';
37
+
38
+ /** All nine CC tool names — useful for iteration in tests. */
39
+ export const CC_TOOLS: readonly CCTool[] = Object.freeze([
40
+ 'Read',
41
+ 'Write',
42
+ 'Edit',
43
+ 'Bash',
44
+ 'Grep',
45
+ 'Glob',
46
+ 'Task',
47
+ 'WebSearch',
48
+ 'WebFetch',
49
+ ]);
50
+
51
+ /**
52
+ * Per-harness lookup. Each entry maps every CC tool to its native name
53
+ * on that harness, or `null` when the harness has no direct equivalent
54
+ * (currently only `Task` on Codex + Gemini).
55
+ *
56
+ * The `unknown` row falls back to CC names — callers that cannot identify
57
+ * the harness get a reasonable default that works on Claude Code and
58
+ * fails loudly on any other harness (the harness will refuse an
59
+ * unrecognized tool call).
60
+ */
61
+ export const TOOL_MAPS: Readonly<Record<Harness, Readonly<Record<CCTool, string | null>>>> = Object.freeze({
62
+ 'claude-code': Object.freeze({
63
+ Read: 'Read',
64
+ Write: 'Write',
65
+ Edit: 'Edit',
66
+ Bash: 'Bash',
67
+ Grep: 'Grep',
68
+ Glob: 'Glob',
69
+ Task: 'Task',
70
+ WebSearch: 'WebSearch',
71
+ WebFetch: 'WebFetch',
72
+ }),
73
+ codex: Object.freeze({
74
+ Read: 'read_file',
75
+ Write: 'apply_patch',
76
+ Edit: 'apply_patch',
77
+ Bash: 'shell',
78
+ Grep: 'shell',
79
+ Glob: 'shell',
80
+ Task: null, // no native Task; use CLI subprocess
81
+ WebSearch: 'web_search',
82
+ WebFetch: 'shell',
83
+ }),
84
+ gemini: Object.freeze({
85
+ Read: 'read_file',
86
+ Write: 'write_file',
87
+ Edit: 'replace',
88
+ Bash: 'run_shell_command',
89
+ Grep: 'search_file_content',
90
+ Glob: 'glob',
91
+ Task: null, // no native Task; use CLI subprocess
92
+ WebSearch: 'google_web_search',
93
+ WebFetch: 'web_fetch',
94
+ }),
95
+ unknown: Object.freeze({
96
+ Read: 'Read',
97
+ Write: 'Write',
98
+ Edit: 'Edit',
99
+ Bash: 'Bash',
100
+ Grep: 'Grep',
101
+ Glob: 'Glob',
102
+ Task: 'Task',
103
+ WebSearch: 'WebSearch',
104
+ WebFetch: 'WebFetch',
105
+ }),
106
+ });
107
+
108
+ /**
109
+ * Return the harness-specific tool name for a CC tool. Returns `null`
110
+ * when the harness has no native equivalent — currently only `Task` on
111
+ * Codex + Gemini. Callers that receive `null` should fall back to a
112
+ * `shell`/`run_shell_command` invocation of `npx gdd-sdk …`.
113
+ */
114
+ export function mapTool(harness: Harness, ccTool: CCTool): string | null {
115
+ const row = TOOL_MAPS[harness];
116
+ // Every Harness key is present in TOOL_MAPS by construction (the type
117
+ // forces it). The index result under `noUncheckedIndexedAccess` is
118
+ // still `string | null | undefined`; narrow with a hasOwnProperty
119
+ // check to keep TSC happy.
120
+ const native: string | null | undefined = row[ccTool];
121
+ return native ?? null;
122
+ }
123
+
124
+ /**
125
+ * Inverse of `mapTool` — given a harness-native tool name (e.g.
126
+ * `'read_file'`), return the CC tool it came from (`'Read'`). Returns
127
+ * `null` when the native name is not in the harness's map at all.
128
+ *
129
+ * Note: on Codex several CC tools share a native name (`Write`, `Edit`,
130
+ * `WebFetch` all share `apply_patch` / `shell`). The reverse mapper
131
+ * returns the FIRST CC match walking in declaration order
132
+ * (`Read` → `Write` → `Edit` → …) — callers that need disambiguation
133
+ * between, e.g., create vs. update must consult the full forward map
134
+ * or inspect tool-call arguments.
135
+ */
136
+ export function reverseMapTool(harness: Harness, nativeName: string): CCTool | null {
137
+ const row = TOOL_MAPS[harness];
138
+ for (const cc of CC_TOOLS) {
139
+ if (row[cc] === nativeName) return cc;
140
+ }
141
+ return null;
142
+ }