@tekyzinc/gsd-t 3.16.12 → 3.18.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/CHANGELOG.md +61 -0
  2. package/README.md +13 -3
  3. package/bin/gsd-t-depgraph-validate.cjs +140 -0
  4. package/bin/gsd-t-economics.cjs +287 -0
  5. package/bin/gsd-t-file-disjointness.cjs +227 -0
  6. package/bin/gsd-t-in-session-usage.cjs +213 -0
  7. package/bin/gsd-t-orchestrator-config.cjs +100 -3
  8. package/bin/gsd-t-orchestrator.js +2 -1
  9. package/bin/gsd-t-parallel.cjs +382 -0
  10. package/bin/gsd-t-report-tokens.cjs +549 -0
  11. package/bin/gsd-t-task-graph.cjs +366 -0
  12. package/bin/gsd-t-token-capture.cjs +29 -14
  13. package/bin/gsd-t-token-dashboard.cjs +35 -0
  14. package/bin/gsd-t-tool-attribution.cjs +377 -0
  15. package/bin/gsd-t-tool-cost.cjs +195 -0
  16. package/bin/gsd-t-unattended-platform.cjs +7 -1
  17. package/bin/gsd-t-unattended.cjs +2 -0
  18. package/bin/gsd-t.js +155 -5
  19. package/bin/headless-auto-spawn.cjs +69 -49
  20. package/bin/headless-auto-spawn.js +18 -24
  21. package/bin/runway-estimator.cjs +212 -0
  22. package/bin/spawn-plan-derive.cjs +163 -0
  23. package/bin/spawn-plan-status-updater.cjs +292 -0
  24. package/bin/spawn-plan-writer.cjs +204 -0
  25. package/commands/gsd-t-debug.md +26 -7
  26. package/commands/gsd-t-execute.md +36 -28
  27. package/commands/gsd-t-help.md +11 -0
  28. package/commands/gsd-t-integrate.md +27 -7
  29. package/commands/gsd-t-quick.md +30 -13
  30. package/commands/gsd-t-scan.md +5 -5
  31. package/commands/gsd-t-unattended-watch.md +4 -3
  32. package/commands/gsd-t-unattended.md +9 -3
  33. package/commands/gsd-t-verify.md +5 -5
  34. package/commands/gsd-t-wave.md +21 -8
  35. package/commands/gsd.md +45 -3
  36. package/docs/GSD-T-README.md +43 -5
  37. package/docs/architecture.md +423 -3
  38. package/docs/requirements.md +203 -0
  39. package/package.json +1 -1
  40. package/scripts/gsd-t-calibration-hook.js +256 -0
  41. package/scripts/gsd-t-compact-detector.js +223 -0
  42. package/scripts/gsd-t-compaction-scanner.js +305 -0
  43. package/scripts/gsd-t-dashboard-autostart.cjs +172 -0
  44. package/scripts/gsd-t-dashboard-server.js +179 -0
  45. package/scripts/gsd-t-heartbeat.js +50 -2
  46. package/scripts/gsd-t-post-commit-spawn-plan.sh +86 -0
  47. package/scripts/gsd-t-transcript.html +546 -43
  48. package/scripts/hooks/gsd-t-in-session-usage-hook.js +84 -0
  49. package/scripts/spawn-plan-fmt-tokens.cjs +80 -0
  50. package/templates/CLAUDE-global.md +8 -3
  51. package/templates/CLAUDE-project.md +17 -14
  52. package/templates/hooks/post-commit-spawn-plan.sh +85 -0
package/CHANGELOG.md CHANGED
@@ -2,6 +2,67 @@
2
2
 
3
3
  All notable changes to GSD-T are documented here. Updated with each release.
4
4
 
5
+ ## [3.18.11] - 2026-04-23
6
+
7
+ ### Fixed
8
+
9
+ - **Flaky `m43-dashboard-autostart` test under load** — bumped `_isPortBusySync` spawnSync timeout from 2s → 10s. Under saturated full-suite execution the 2s budget could expire before the probe child even reported back, causing a falsely-free port reading and intermittent assertion failures. The 10s budget is comfortably above any observed real-world probe latency while still bounding hung-child cases.
10
+ - **Stale snapshot test in `m43-milestone-complete-detection`** — replaced the live-state assertion that hard-coded `M43=PARTITIONED` (true at the time the test was written, false ever since M43 completed) with an M42-only sanity check. M42 is the oldest stable terminal milestone and serves as a fixed anchor that won't go stale every release. The other 7 tests in the file already cover the actual `isMilestoneComplete` matcher logic via `withTmpProgress` fixtures.
11
+
12
+ ## [3.18.10] - 2026-04-23
13
+
14
+ ### Added — Cross-Domain & Cross-Task Parallelism (M44)
15
+
16
+ Task-level parallelism shipped to **both** execution modes (in-session and unattended) on equal footing, with mode-aware gating math. 8 of 9 domains landed (D1–D8 DONE; D9 parallelism-observability grafted as backlog #16 follow-up). 3 waves, 1903/1907 tests pass (4 pre-existing unrelated fails).
17
+
18
+ **Wave 1 foundation:**
19
+ - **D1 — Generic task-graph reader**: typed DAG + cycle detection + `gsd-t graph` CLI. 22/22 tests, contract v1.0.0.
20
+ - **D7 — Per-CW token attribution**: `cw_id` pass-through + post-spawn calibration hook. 19/19 tests; contracts metrics-schema v2.1.0 + compaction-events v1.1.0.
21
+
22
+ **Wave 2 parallel:**
23
+ - **D4 — Dep-graph veto gate**: refuses fan-out when deps unmet. 4/4 tasks, 13/13 tests.
24
+ - **D5 — File-disjointness prover**: union-find + git-history fallback. 4/4 tasks, 11/11 tests.
25
+ - **D6 — Pre-spawn economics estimator**: 3-tier corpus lookup (mode-aware 85%/60% thresholds) calibrated against 528-row corpus. 5/5 tasks, 9/9 tests, contract v1.0.0.
26
+
27
+ **Wave 3:**
28
+ - **D2 — `gsd-t parallel` CLI**: mode-aware gating math (in-session 85% + N=1 floor; unattended 60% + task_split signal). 5/5 tasks, 21/21 tests; wave-join-contract v1.0.0 → v1.1.0.
29
+ - **D8 — Spawn-plan-visibility**: right-side two-layer panel + `/api/spawn-plans` endpoint + SSE + post-commit token attribution hook. 7/7 tasks, 36/36 tests, contract v1.0.0.
30
+ - **D3 — Command-file integration**: additive "Optional — Parallel Dispatch (M44)" blocks in `execute`/`wave`/`quick`/`debug`/`integrate`. No hardcoded `--mode`; silent fallback to sequential. 5/5 tasks; smoke-test fixtures deferred to backlog #15.
31
+
32
+ **Mode contracts (NON-NEGOTIABLE):**
33
+ - **[in-session]** Speed + reduce compaction as much as possible. Hard rule: NEVER throw an interactive pause/resume prompt.
34
+ - **[unattended]** Run M1 → M10 end-to-end with zero human involvement and zero compaction. Per-worker CW headroom is the binding gate.
35
+
36
+ ## [3.17.10] - 2026-04-21
37
+
38
+ ### Added — Token Attribution & Always-Headless Inversion (M43)
39
+
40
+ Every token is now attributable to a specific tool / command / domain, and the framework is locked to a single rule: **the in-session channel is reserved for human↔Claude dialog. All tool-using work spawns. The visualizer is the watching surface.** No flags, no thresholds, no opt-outs — there is no "in-session mode" for commands to enter.
41
+
42
+ **Part A — Universal Token Attribution**
43
+
44
+ **In-session usage capture (D1)**: `bin/gsd-t-in-session-usage.cjs` exports `captureInSessionUsage({projectDir, sessionId, turnId, usage, model})` and `processHookPayload({projectDir, payload})`. Branch B locked: Stop hook triggers, Claude Code transcript (`~/.claude/projects/-.../{sessionId}.jsonl`) is the data source. Writes v2-schema JSONL rows with `sessionType: "in-session"` + distinct `turn_id` + parsed `input_tokens`/`output_tokens`/`cache_read_input_tokens`. Idempotent via transcript-line cursor. Live-validated: 523 rows from one 23-min session (`.gsd-t/.hook-probe/` evidence retained).
45
+
46
+ **Per-tool attribution (D2)**: `.gsd-t/contracts/tool-attribution-contract.md` v1.0.0 + `bin/gsd-t-tool-attribution.cjs` exports `joinTurnsAndEvents` / `attributeTurn` / `aggregateByTool|Command|Domain`. Output-byte ratio algorithm, 4 tie-breakers (zero-byte turn, missing tool_result, no tool calls, null usage). New CLI: `gsd-t tool-cost [--group-by tool|command|domain] [--since YYYY-MM-DD] [--milestone Mxx] [--format table|json]`. Perf gate: 30ms on 3k turns × 30k events fixture (budget 3s). `gsd-t tokens --show-tool-costs` optional integration adds "Top 10 tools by cost" section.
47
+
48
+ **Sink unification + schema v2 (D3)**: `.gsd-t/contracts/metrics-schema-contract.md` bumped v1 → v2 — adds optional `turn_id`, `session_id`, `sessionType`, `tool_attribution[]`, `compaction_pressure{}`. `recordSpawnRow` / `captureSpawn` pass-through preserves backward compat. `bin/gsd-t-token-regenerate-log.cjs` + `gsd-t tokens --regenerate-log` makes `.gsd-t/token-log.md` a regenerated view (streaming read + deterministic sort).
49
+
50
+ **Part B — Always-Headless Inversion (Channel Separation)**
51
+
52
+ **Default headless spawn (D4)**: `bin/headless-auto-spawn.cjs::shouldSpawnHeadless` collapsed to `() => true`. Removed low-water branch, context-meter-driven branching, `--in-session` opt-out parsing. Legacy `watch`/`inSession` params accepted-and-ignored with one-shot stderr deprecation warning. 7 command files stripped of spawn-mode branching (`execute`, `wave`, `integrate`, `quick`, `debug`, `verify`, `scan`). `/gsd` router preserves in-session classification only for dialog-only exploratory turns — all action turns spawn detached. `.gsd-t/contracts/headless-default-contract.md` bumped v1.0.0 → **v2.0.0** (breaking: flag removal). 40 matrix tests.
53
+
54
+ **Dialog-channel growth meter (D5)**: `bin/runway-estimator.cjs::estimateDialogGrowth({projectDir, sessionId, k = 5, modelContextCap = 200000})` returns `{slope, median_delta, latest_input_tokens, predicted_turns_to_compact, shouldWarn}`. Outlier-resistant median-of-deltas. When `shouldWarn=true`, `/gsd` router appends a one-line blockquote footer suggesting `/compact` or detached spawn. Pure read/warn — never refuses, never reroutes (there's nothing to reroute to under channel separation). Scope collapsed from originally-sketched circuit breaker; `.gsd-t/contracts/context-meter-contract.md` bumped v1.3.0 → v1.4.0 (additive subsection).
55
+
56
+ **Transcript viewer as primary surface (D6)**: `scripts/gsd-t-dashboard-server.js` gains `GET /transcript/:id/tool-cost` (D2-backed, 503 graceful fallback) + `GET /transcript/:id/usage` (per-turn JSONL rows). `scripts/gsd-t-transcript.html` gains collapsible Tool Cost sidebar panel with live SSE updates. `bin/headless-auto-spawn.cjs` prints `▶ Live transcript: http://127.0.0.1:{port}/transcript/{spawn-id}` on every spawn. New `scripts/gsd-t-dashboard-autostart.cjs` — `ensureDashboardRunning({projectDir})` port-probe + fork-detach + pid file, hooked into spawn start path (idempotent). `.gsd-t/contracts/dashboard-server-contract.md` bumped (routes + banner format + autostart sections).
57
+
58
+ **Tests**: 1708/1710 pass (2 pre-existing unrelated fails). Net additions across D1–D6: ~90 new test cases.
59
+
60
+ ## [3.16.10] - 2026-04-20
61
+
62
+ ### Added — Live Spawn Transcript Viewer (M42)
63
+
64
+ Per-spawn live transcript UI on `:7433`: stream-json tee (`bin/gsd-t-transcript-tee.cjs`), SSE route (`/transcript/:id/stream`), Claude-Code-style ndjson renderer (`scripts/gsd-t-transcript.html`), sidebar tree with parent-indent + status dots, per-spawn kill button (POST `/transcript/:id/kill`). 29 M42-specific tests; 1522/1522 suite. Intervene/SIGSTOP deferred to follow-up milestone.
65
+
5
66
  ## [3.15.10] - 2026-04-20
6
67
 
7
68
  ### Added — Universal Token Capture Across GSD-T (M41)
package/README.md CHANGED
@@ -14,10 +14,11 @@ A methodology for reliable, parallelizable development using Claude Code with op
14
14
  **Cross-project learning** — proven rules propagate to `~/.claude/metrics/` and sync across all registered projects via `update-all`. Rules validated in 3+ projects become universal; 5+ projects qualify for npm distribution. Cross-project signal comparison and global ELO rankings available via `gsd-t-metrics --cross-project` and `gsd-t-status`.
15
15
  **Stack Rules Engine** — auto-detects project tech stack (React, TypeScript, Node API, Python, Go, Rust) from manifest files and injects mandatory best-practice rules into subagent prompts at execute-time. Universal security rules always apply; stack-specific rules layer on top. Includes **design-to-code** rules for pixel-perfect frontend implementation from Figma, screenshots, or design images — with Figma MCP integration, design token extraction, stack capability evaluation, and mandatory visual verification: every screen is rendered in a real browser, screenshotted at mobile/tablet/desktop, and compared pixel-by-pixel against the Figma design. Auto-bootstraps during partition when design references are detected. Extensible: drop a `.md` file in `templates/stacks/` to add a new stack.
16
16
  **External Task Orchestrator + Streaming Watcher UI (M40, v3.14.10)** — JS orchestrator drives `claude -p` one task per spawn: short-lived, fresh context, architecturally compaction-free. Benchmarks 0.72× wall-clock vs in-session on 20-task/3-wave workloads. Paired with a zero-Claude-cost local streaming UI at `127.0.0.1:7842` that renders all workers' stream-json output as a continuous claude.ai-style feed — task/wave banners, duration + usage chips, token corner bar, localStorage filters, replay via `WS /feed?from=N`. Recovery: `--resume` reconciles interrupted runs using commit + progress.md evidence; ambiguous tasks (commit without progress entry) are flagged for operator triage, never silently claimed done. CLI: `gsd-t orchestrate`, `gsd-t benchmark-orchestrator`, `gsd-t stream-feed`. Contracts: `stream-json-sink-contract.md` v1.1.0, `wave-join-contract.md`, `completion-signal-contract.md`, `metrics-schema-contract.md`.
17
- **Headless-by-Default Spawn (M38, v3.12.10)** — long-running workflow commands (execute, wave, integrate, debug repair loops) spawn detached by default. The interactive session prints a launch banner, logs the event-stream path, and exits. Pass `--watch` to keep a live status block in the session (270s `ScheduleWakeup` ticks, cache-window-safe). Detached workers emit JSONL events to `.gsd-t/events/YYYY-MM-DD.jsonl` at every phase boundary — shared by watch command and dashboard. See `.gsd-t/contracts/headless-default-contract.md` v1.0.0 and `unattended-event-stream-contract.md` v1.0.0.
17
+ **Always-Headless Spawn (M43 D4, v3.16.x+) — Channel Separation** — every GSD-T command spawns detached, unconditionally. No `--watch`, no `--in-session`, no `--headless` opt-in, no context-meter threshold that reroutes. The dialog channel is reserved for human↔Claude conversation; every workflow turn is a detached headless child. Interactive session shows a launch banner + live-transcript URL + event-stream path, then exits results surface via the read-back banner on the user's next message. Detached workers emit JSONL events to `.gsd-t/events/YYYY-MM-DD.jsonl` at every phase boundary — shared by dashboard and (historically) the watch command. The only in-session surface is the `/gsd` router (for dialog-only exploratory turns). See `.gsd-t/contracts/headless-default-contract.md` v2.0.0 and `unattended-event-stream-contract.md` v1.0.0.
18
+ **Live Transcript as Primary Surface (M43 D6, v3.16.13)** — every detached spawn prints a one-line banner (`▶ Live transcript: http://127.0.0.1:{port}/transcript/{id}`) pointing at a browser viewer that SSE-streams the child's stdout and renders a collapsible "Tool Cost" sidebar panel showing per-tool attributed tokens and cost (sourced from `/transcript/:id/tool-cost`, which proxies to the M43 D2 tool-attribution library). The dashboard server auto-starts (`scripts/gsd-t-dashboard-autostart.cjs`) idempotently on each spawn — a port probe backs off when a server is already running, otherwise a fork-detach writes `.gsd-t/.dashboard.pid`. Port is project-scoped via `projectScopedDefaultPort(projectDir)` so multi-project workflows don't clobber each other. Contract: `dashboard-server-contract.md` v1.2.0.
18
19
  - **Surgical model selection** — `bin/model-selector.js` assigns haiku/sonnet/opus per phase via a declarative rules table; `/advisor` escalation path with convention-based fallback.
19
20
  - **Per-spawn token telemetry** — `.gsd-t/token-metrics.jsonl` records one 18-field row per Task subagent spawn.
20
- **Context Meter (M34/M38)** — PostToolUse hook writes `.gsd-t/.context-meter-state.json` via local token estimation. Single-band model (`context-meter-contract.md` v1.3.0): one threshold (default 85%), one actionhand off to a detached headless spawn. The meter informs spawn-time routing, not in-flight pauses.
21
+ **Context Meter (M34/M38/M43 D4) — Observational Only** — PostToolUse hook writes `.gsd-t/.context-meter-state.json` via local token estimation. Under M43 D4 (channel-separation inversion, `headless-default-contract.md` v2.0.0) the meter is OBSERVATIONAL ONLY: the pct is recorded into the token-log `Ctx%` column on the next spawn, but no threshold gates any routing decision every command spawns detached regardless. The `context-meter-contract.md` single-band model is preserved for the value itself; it no longer drives in-flight pauses or spawn-time rerouting.
21
22
  **Quality North Star** — projects define a `## Quality North Star` section in CLAUDE.md (1–3 sentences, e.g., "This is a published npm library. Every public API must be intuitive and backward-compatible."). `gsd-t-init` auto-detects preset (library/web-app/cli) from package.json signals; `gsd-t-setup` configures it for existing projects. Subagents read it as a quality lens; absent = silent skip (backward compatible).
22
23
  **Design Brief Artifact** — during partition, UI/frontend projects (React, Vue, Svelte, Flutter, Tailwind) automatically get `.gsd-t/contracts/design-brief.md` with color palette, typography, spacing system, component patterns, and tone/voice. Non-UI projects skip silently. User-customized briefs are preserved. Referenced in plan phase for visual consistency.
23
24
  **Design Verification Agent** — after QA passes on design-to-code projects, a dedicated verification agent opens a browser with both the built frontend AND the original design (Figma page, design image, or MCP screenshot) side-by-side for direct visual comparison. Produces a structured element-by-element comparison table (30+ rows) with specific design values vs. implementation values and MATCH/DEVIATION verdicts. An artifact gate enforces that the comparison table exists — missing it blocks completion. Separation of concerns: coding agents code, verification agents verify. Wired into execute (Step 5.25) and quick (Step 5.25). Only fires when `.gsd-t/contracts/design-contract.md` exists — non-design projects are unaffected.
@@ -101,8 +102,17 @@ gsd-t headless --debug-loop --max-iterations=10 # Cap at 10 iterations
101
102
  gsd-t headless --debug-loop --test-cmd="npm test" # Override test command
102
103
  gsd-t headless --debug-loop --fix-scope="src/auth/**" # Limit fix scope
103
104
  gsd-t headless --debug-loop --json --log # Structured output + per-iteration logs
105
+
106
+ # Parallel CLI (M44 D2 — task-level parallelism, mode-aware gating)
107
+ gsd-t parallel --help # Usage, flags, gates, contract ref
108
+ gsd-t parallel --dry-run # Print worker plan table + exit (no spawn)
109
+ gsd-t parallel --mode in-session --dry-run # 85% orchestrator-CW ceiling; N=1 floor
110
+ gsd-t parallel --mode unattended --dry-run # 60% per-worker ceiling; > 60% → task_split
111
+ gsd-t parallel --milestone M44 --domain m44-d2-parallel-cli --dry-run
104
112
  ```
105
113
 
114
+ `gsd-t parallel` consumes the M44 task-graph (D1) and applies three pre-spawn gates (D4 depgraph validation → D5 file-disjointness → D6 economics) followed by mode-aware headroom/split math. Extends — does not replace — the M40 orchestrator. Contract: `.gsd-t/contracts/wave-join-contract.md` v1.1.0.
115
+
106
116
  Each iteration runs as a fresh `claude -p` session. A cumulative debug ledger (`.gsd-t/debug-state.jsonl`) preserves hypothesis/fix/learning history across sessions. An anti-repetition preamble prevents retrying failed approaches.
107
117
 
108
118
  **Escalation tiers**: sonnet (iterations 1–5) → opus (6–15) → STOP with diagnostic summary (16–20)
@@ -235,7 +245,7 @@ This will replace changed command files, back up your CLAUDE.md if customized, a
235
245
  | **Discuss** | Explore design decisions | Both |
236
246
  | **Plan** | Create atomic task lists | Solo (always) |
237
247
  | **Impact** | Downstream effect analysis | Solo |
238
- | **Execute** | Build it | Both |
248
+ | **Execute** | Build it — task-level parallelism via `gsd-t parallel --help` (M44 D2/D3); conditional on >1 gate-passing task, falls back to sequential silently | Both |
239
249
  | **Test-Sync** | Maintain test coverage | Solo |
240
250
  | **Integrate** | Wire domains together | Solo (always) |
241
251
  | **Verify** | Quality gates | Both |
@@ -0,0 +1,140 @@
1
+ "use strict";
2
+
3
+ /**
4
+ * gsd-t-depgraph-validate — M44 D4
5
+ *
6
+ * Pre-spawn dependency gate. Consumes the DAG from `bin/gsd-t-task-graph.cjs`
7
+ * and filters candidate-ready tasks down to those whose declared `deps[]`
8
+ * are all in DONE status. Vetoed tasks (any unmet dep, or any dep
9
+ * referencing an unknown task id) are removed from the returned ready set
10
+ * AND appended to the event stream as `dep_gate_veto` records so that
11
+ * "why wasn't this task spawned?" is observable.
12
+ *
13
+ * Contract: .gsd-t/contracts/depgraph-validation-contract.md (v1.0.0)
14
+ *
15
+ * Hard rules (from constraints.md):
16
+ * - Zero external runtime deps (Node built-ins only)
17
+ * - Never throws on unmet deps; only throws on programming errors
18
+ * - Read-only on tasks.md / scope.md / contracts; only writes appended
19
+ * JSONL lines to .gsd-t/events/YYYY-MM-DD.jsonl
20
+ * - Synchronous; < 50 ms on realistic graphs
21
+ * - Mode-agnostic (no in-session vs unattended branching)
22
+ * - A task's dep is satisfied iff graph.byId[depId] exists AND
23
+ * byId[depId].status === 'done' (skipped/failed/pending all veto)
24
+ */
25
+
26
+ const fs = require("node:fs");
27
+ const path = require("node:path");
28
+
29
+ // ─── event stream writer (self-contained; no external deps) ───────────────
30
+
31
+ /**
32
+ * Append one JSON-line event to `.gsd-t/events/YYYY-MM-DD.jsonl`.
33
+ * Creates the events directory on demand. Never throws on I/O failure —
34
+ * event logging must never break the caller's control flow.
35
+ */
36
+ function appendEvent(projectDir, event) {
37
+ try {
38
+ const eventsDir = path.join(projectDir, ".gsd-t", "events");
39
+ fs.mkdirSync(eventsDir, { recursive: true });
40
+ const day = event.ts.slice(0, 10); // YYYY-MM-DD from ISO string
41
+ const file = path.join(eventsDir, `${day}.jsonl`);
42
+ fs.appendFileSync(file, JSON.stringify(event) + "\n");
43
+ } catch {
44
+ // Intentionally swallowed — D4 must not throw on event-log failure.
45
+ }
46
+ }
47
+
48
+ /**
49
+ * Build a `dep_gate_veto` event with the base event-schema fields filled in
50
+ * (null for optional context the gate doesn't own) plus the D4-specific
51
+ * extras documented in depgraph-validation-contract.md §4.
52
+ */
53
+ function buildVetoEvent(task, unmetDeps) {
54
+ return {
55
+ ts: new Date().toISOString(),
56
+ event_type: "dep_gate_veto",
57
+ command: null,
58
+ phase: null,
59
+ agent_id: null,
60
+ parent_agent_id: null,
61
+ trace_id: null,
62
+ reasoning: `unmet deps: ${unmetDeps.join(", ")}`,
63
+ outcome: "deferred",
64
+ model: null,
65
+ // D4-specific additive fields:
66
+ task_id: task.id,
67
+ domain: task.domain,
68
+ unmet_deps: unmetDeps.slice(),
69
+ };
70
+ }
71
+
72
+ // ─── Public API ───────────────────────────────────────────────────────────
73
+
74
+ /**
75
+ * Validate that each candidate-ready task has all of its deps satisfied.
76
+ * Returns the reduced ready set (node objects, ordered as in graph.ready)
77
+ * and the list of vetoes (one per task, carrying the full node + the
78
+ * unmet-dep ids).
79
+ *
80
+ * Never throws on unmet deps; only throws on malformed input.
81
+ *
82
+ * @param {{graph: object, projectDir: string}} opts
83
+ * @returns {{ready: object[], vetoed: {task: object, unmet_deps: string[]}[]}}
84
+ */
85
+ function validateDepGraph(opts) {
86
+ if (!opts || typeof opts !== "object") {
87
+ throw new TypeError("validateDepGraph: opts must be an object");
88
+ }
89
+ const { graph, projectDir } = opts;
90
+ if (!graph || typeof graph !== "object") {
91
+ throw new TypeError("validateDepGraph: opts.graph must be the DAG object from buildTaskGraph");
92
+ }
93
+ const byId = graph.byId || Object.create(null);
94
+ const pd = projectDir || process.cwd();
95
+
96
+ // Candidate set: if graph.ready exists use it, else fall back to every
97
+ // pending node (defensive — keeps the function useful when the caller
98
+ // passes a hand-built fixture graph without a pre-computed ready mask).
99
+ let candidateIds;
100
+ if (Array.isArray(graph.ready) && graph.ready.length) {
101
+ candidateIds = graph.ready.slice();
102
+ } else if (Array.isArray(graph.nodes)) {
103
+ candidateIds = graph.nodes
104
+ .filter((n) => n && n.status === "pending")
105
+ .map((n) => n.id);
106
+ } else {
107
+ candidateIds = [];
108
+ }
109
+
110
+ const ready = [];
111
+ const vetoed = [];
112
+
113
+ for (const id of candidateIds) {
114
+ const task = byId[id];
115
+ if (!task) continue; // stale id in ready mask — skip silently
116
+ const deps = Array.isArray(task.deps) ? task.deps : [];
117
+ const unmet = [];
118
+ for (const d of deps) {
119
+ const dep = byId[d];
120
+ if (!dep || dep.status !== "done") {
121
+ unmet.push(d);
122
+ }
123
+ }
124
+ if (unmet.length === 0) {
125
+ ready.push(task);
126
+ } else {
127
+ vetoed.push({ task, unmet_deps: unmet });
128
+ appendEvent(pd, buildVetoEvent(task, unmet));
129
+ }
130
+ }
131
+
132
+ return { ready, vetoed };
133
+ }
134
+
135
+ module.exports = {
136
+ validateDepGraph,
137
+ // Exposed for unit tests only; not a stable public surface.
138
+ _buildVetoEvent: buildVetoEvent,
139
+ _appendEvent: appendEvent,
140
+ };
@@ -0,0 +1,287 @@
1
+ "use strict";
2
+
3
+ /**
4
+ * gsd-t-economics — M44 D6 (pre-spawn economics estimator)
5
+ *
6
+ * Contract: .gsd-t/contracts/economics-estimator-contract.md
7
+ *
8
+ * Hard invariants:
9
+ * - Zero external runtime deps (Node built-ins only).
10
+ * - Corpus loaded ONCE per projectDir at module init (sync read, cached).
11
+ * - NEVER returns undefined — global median fallback guarantees a number.
12
+ * - D6 is a HINT only; D2 owns the final gate decision.
13
+ * - Event emission is best-effort; failures never fail the estimate.
14
+ */
15
+
16
+ const fs = require("node:fs");
17
+ const path = require("node:path");
18
+
19
+ // ─── Constants ────────────────────────────────────────────────────────────
20
+
21
+ /**
22
+ * Effective CW ceiling in tokens. Matches:
23
+ * - bin/token-budget.cjs (200000)
24
+ * - bin/context-meter-config.cjs (modelWindowSize: 200000)
25
+ * - bin/runway-estimator.cjs (DEFAULT_MODEL_CONTEXT_CAP = 200000)
26
+ */
27
+ const CW_CEILING_TOKENS = 200000;
28
+
29
+ /** Confidence tier cutoffs (exact-match row counts). */
30
+ const HIGH_CONFIDENCE_MIN = 5; // ≥5 exact matches
31
+ // MEDIUM: 1-4 exact matches.
32
+ // LOW: fuzzy match (domain-only or command-only).
33
+ // FALLBACK: global median.
34
+
35
+ /** Mode-specific gate thresholds (percent of CW ceiling). */
36
+ const IN_SESSION_PARALLEL_OK_PCT = 85;
37
+ const UNATTENDED_PARALLEL_OK_PCT = 60;
38
+ const UNATTENDED_SPLIT_PCT = 60;
39
+
40
+ // ─── Corpus loading (ONCE per projectDir) ─────────────────────────────────
41
+
42
+ const _corpusCache = new Map(); // projectDir → loaded corpus index
43
+
44
+ /**
45
+ * Synchronously load the token-usage corpus for a given projectDir.
46
+ * Cached indefinitely per process per projectDir.
47
+ *
48
+ * @param {string} projectDir
49
+ * @returns {{
50
+ * rows: object[],
51
+ * exact: Map<string, number[]>, // "cmd|step|dom" → [totals]
52
+ * byDomain: Map<string, number[]>, // dom → [totals]
53
+ * byCommand: Map<string, number[]>, // cmd → [totals]
54
+ * globalMedian: number,
55
+ * globalPct: number,
56
+ * }}
57
+ */
58
+ function loadCorpus(projectDir) {
59
+ if (_corpusCache.has(projectDir)) return _corpusCache.get(projectDir);
60
+
61
+ const corpusPath = path.join(projectDir, ".gsd-t", "metrics", "token-usage.jsonl");
62
+ let raw = "";
63
+ try {
64
+ raw = fs.readFileSync(corpusPath, "utf8");
65
+ } catch {
66
+ raw = "";
67
+ }
68
+
69
+ const rows = [];
70
+ for (const line of raw.split(/\r?\n/)) {
71
+ if (!line) continue;
72
+ try {
73
+ const r = JSON.parse(line);
74
+ if (r && typeof r === "object") rows.push(r);
75
+ } catch {
76
+ /* skip malformed */
77
+ }
78
+ }
79
+
80
+ const exact = new Map();
81
+ const byDomain = new Map();
82
+ const byCommand = new Map();
83
+ const allTotals = [];
84
+
85
+ for (const r of rows) {
86
+ const total = rowTotalTokens(r);
87
+ const cmd = r.command || "-";
88
+ const step = r.step || "-";
89
+ const dom = r.domain || "-";
90
+ const key = `${cmd}|${step}|${dom}`;
91
+ pushMap(exact, key, total);
92
+ pushMap(byDomain, dom, total);
93
+ pushMap(byCommand, cmd, total);
94
+ allTotals.push(total);
95
+ }
96
+
97
+ const globalMedian = median(allTotals);
98
+ const globalPct = tokensToCwPct(globalMedian);
99
+
100
+ const idx = { rows, exact, byDomain, byCommand, globalMedian, globalPct };
101
+ _corpusCache.set(projectDir, idx);
102
+ return idx;
103
+ }
104
+
105
+ /** Row-level total = input + output + cacheRead + cacheCreation. */
106
+ function rowTotalTokens(r) {
107
+ return (r.inputTokens || 0)
108
+ + (r.outputTokens || 0)
109
+ + (r.cacheReadInputTokens || 0)
110
+ + (r.cacheCreationInputTokens || 0);
111
+ }
112
+
113
+ function pushMap(m, k, v) {
114
+ if (!m.has(k)) m.set(k, []);
115
+ m.get(k).push(v);
116
+ }
117
+
118
+ function median(arr) {
119
+ if (!arr || arr.length === 0) return 0;
120
+ const a = arr.slice().sort((x, y) => x - y);
121
+ const mid = Math.floor(a.length / 2);
122
+ return a.length % 2 === 1 ? a[mid] : (a[mid - 1] + a[mid]) / 2;
123
+ }
124
+
125
+ function tokensToCwPct(tokens) {
126
+ if (!Number.isFinite(tokens) || tokens <= 0) return 0;
127
+ return (tokens / CW_CEILING_TOKENS) * 100;
128
+ }
129
+
130
+ // ─── Event emission (best-effort) ─────────────────────────────────────────
131
+
132
+ function writeEconomicsEvent(projectDir, ev) {
133
+ try {
134
+ const dir = path.join(projectDir, ".gsd-t", "events");
135
+ fs.mkdirSync(dir, { recursive: true });
136
+ const now = new Date(ev.ts || Date.now());
137
+ const y = now.getUTCFullYear();
138
+ const m = String(now.getUTCMonth() + 1).padStart(2, "0");
139
+ const d = String(now.getUTCDate()).padStart(2, "0");
140
+ const file = path.join(dir, `${y}-${m}-${d}.jsonl`);
141
+ const line = JSON.stringify(ev) + "\n";
142
+ fs.appendFileSync(file, line, "utf8");
143
+ return true;
144
+ } catch {
145
+ return false; // best-effort — never fails the estimate
146
+ }
147
+ }
148
+
149
+ // ─── Lookup algorithm ─────────────────────────────────────────────────────
150
+
151
+ /**
152
+ * Three-tier lookup against the corpus:
153
+ * 1. Exact `command|step|domain` match → HIGH (≥5) or MEDIUM (1–4).
154
+ * 2. Fuzzy match (domain-only, then command-only) → LOW.
155
+ * 3. Global median → FALLBACK.
156
+ *
157
+ * Returns { estimatedTokens, matchedRows, confidence }.
158
+ */
159
+ function lookupInCorpus(taskNode, corpus) {
160
+ const cmd = (taskNode && taskNode.command) || "-";
161
+ const step = (taskNode && taskNode.step) || "-";
162
+ const dom = (taskNode && taskNode.domain) || "-";
163
+
164
+ // Tier 1: exact triplet.
165
+ const exactKey = `${cmd}|${step}|${dom}`;
166
+ const exactRows = corpus.exact.get(exactKey);
167
+ if (exactRows && exactRows.length > 0) {
168
+ const n = exactRows.length;
169
+ return {
170
+ estimatedTokens: median(exactRows),
171
+ matchedRows: n,
172
+ confidence: n >= HIGH_CONFIDENCE_MIN ? "HIGH" : "MEDIUM",
173
+ };
174
+ }
175
+
176
+ // Tier 2a: domain-only fuzzy match.
177
+ if (dom && dom !== "-") {
178
+ const domRows = corpus.byDomain.get(dom);
179
+ if (domRows && domRows.length > 0) {
180
+ return {
181
+ estimatedTokens: median(domRows),
182
+ matchedRows: domRows.length,
183
+ confidence: "LOW",
184
+ };
185
+ }
186
+ }
187
+
188
+ // Tier 2b: command-only fuzzy match.
189
+ if (cmd && cmd !== "-") {
190
+ const cmdRows = corpus.byCommand.get(cmd);
191
+ if (cmdRows && cmdRows.length > 0) {
192
+ return {
193
+ estimatedTokens: median(cmdRows),
194
+ matchedRows: cmdRows.length,
195
+ confidence: "LOW",
196
+ };
197
+ }
198
+ }
199
+
200
+ // Tier 3: global median fallback.
201
+ return {
202
+ estimatedTokens: corpus.globalMedian,
203
+ matchedRows: 0,
204
+ confidence: "FALLBACK",
205
+ };
206
+ }
207
+
208
+ // ─── Gate arithmetic ──────────────────────────────────────────────────────
209
+
210
+ function decideGates(mode, estimatedCwPct, confidence) {
211
+ let parallelOk;
212
+ let split;
213
+
214
+ if (mode === "unattended") {
215
+ parallelOk = estimatedCwPct <= UNATTENDED_PARALLEL_OK_PCT;
216
+ split = estimatedCwPct > UNATTENDED_SPLIT_PCT;
217
+ } else {
218
+ // 'in-session' (default)
219
+ parallelOk = estimatedCwPct <= IN_SESSION_PARALLEL_OK_PCT;
220
+ split = false;
221
+ }
222
+
223
+ // Worker-count recommendation: 1 by default; halve (floor 1) for FALLBACK
224
+ // confidence per §5 guidance. LOW confidence keeps 1 worker at this level —
225
+ // D2 applies the "reduce by 1–2" heuristic itself (it owns pool sizing).
226
+ let workerCount = 1;
227
+ if (confidence === "FALLBACK") workerCount = 1; // already 1; documented for clarity
228
+ return { parallelOk, split, workerCount };
229
+ }
230
+
231
+ // ─── Public API ───────────────────────────────────────────────────────────
232
+
233
+ /**
234
+ * Estimate a task's CW footprint and produce a mode-specific recommendation.
235
+ *
236
+ * @param {object} opts
237
+ * @param {{id?:string, command?:string, step?:string, domain?:string}} opts.taskNode
238
+ * @param {'in-session'|'unattended'} opts.mode
239
+ * @param {string} [opts.projectDir]
240
+ * @returns {{estimatedCwPct:number, parallelOk:boolean, split:boolean, workerCount:number, matchedRows:number, confidence:'HIGH'|'MEDIUM'|'LOW'|'FALLBACK'}}
241
+ */
242
+ function estimateTaskFootprint(opts) {
243
+ const taskNode = (opts && opts.taskNode) || {};
244
+ const mode = (opts && opts.mode) || "in-session";
245
+ const projectDir = (opts && opts.projectDir) || process.cwd();
246
+
247
+ const corpus = loadCorpus(projectDir);
248
+
249
+ const { estimatedTokens, matchedRows, confidence } = lookupInCorpus(taskNode, corpus);
250
+ const estimatedCwPct = tokensToCwPct(estimatedTokens);
251
+
252
+ const { parallelOk, split, workerCount } = decideGates(mode, estimatedCwPct, confidence);
253
+
254
+ // Best-effort event emission.
255
+ writeEconomicsEvent(projectDir, {
256
+ type: "economics_decision",
257
+ ts: new Date().toISOString(),
258
+ task_id: taskNode.id || null,
259
+ mode,
260
+ estimatedCwPct,
261
+ parallelOk,
262
+ split,
263
+ confidence,
264
+ matchedRows,
265
+ });
266
+
267
+ return {
268
+ estimatedCwPct,
269
+ parallelOk,
270
+ split,
271
+ workerCount,
272
+ matchedRows,
273
+ confidence,
274
+ };
275
+ }
276
+
277
+ module.exports = {
278
+ estimateTaskFootprint,
279
+ // Internals exposed for unit tests + calibration tooling:
280
+ _CW_CEILING_TOKENS: CW_CEILING_TOKENS,
281
+ _loadCorpus: loadCorpus,
282
+ _lookupInCorpus: lookupInCorpus,
283
+ _rowTotalTokens: rowTotalTokens,
284
+ _median: median,
285
+ _tokensToCwPct: tokensToCwPct,
286
+ _resetCorpusCache: function resetCorpusCache() { _corpusCache.clear(); },
287
+ };