npm - @hegemonart/get-design-done - Versions diffs - 1.32.0 → 1.33.0 - Mend

@hegemonart/get-design-done 1.32.0 → 1.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/CHANGELOG.md +32 -0
package/NOTICE +43 -5
package/README.md +13 -0
package/package.json +2 -2
package/reference/schemas/pressure-scenario.schema.json +69 -0
package/scripts/lib/skill-behavior/runner.cjs +187 -0
package/scripts/lib/skill-behavior/stub-invoker.cjs +95 -0
package/scripts/lib/skill-behavior/telemetry.cjs +379 -0
package/scripts/lib/cli/index.ts +0 -29
package/scripts/lib/error-classifier.cjs +0 -29
package/scripts/lib/event-stream/index.ts +0 -29
package/scripts/lib/gdd-errors/index.ts +0 -29
package/scripts/lib/gdd-state/index.ts +0 -29
package/scripts/lib/iteration-budget.cjs +0 -29
package/scripts/lib/jittered-backoff.cjs +0 -29
package/scripts/lib/lockfile.cjs +0 -29
package/scripts/mcp-servers/gdd-mcp/server.ts +0 -35
package/scripts/mcp-servers/gdd-state/server.ts +0 -34

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -5,14 +5,14 @@
   },
   "metadata": {
     "description": "Get Design Done — 5-stage agent-orchestrated design pipeline with 9 connections, handoff-first workflow, bidirectional Figma write-back, 22+ specialized agents, queryable knowledge layer (intel store, dependency analysis, learnings extraction), and a self-improvement loop (reflector, frontmatter + budget feedback, global-skills layer). v1.20.0 ships the SDK foundation: gdd-state MCP server (11 typed tools), lockfile-safe STATE.md mutations, event stream, and resilience primitives (jittered-backoff, rate-guard, error-classifier, iteration-budget) for rate-limit + 429 + context-overflow recovery. Full CI/CD pipeline (Node 22/24 × Linux/macOS/Windows) and release automation (auto-tag + GitHub Release + release-time smoke test).",
-    "version": "1.32.0"
+    "version": "1.33.0"
   },
   "plugins": [
     {
       "name": "get-design-done",
       "source": "./",
       "description": "Agent-orchestrated 5-stage design pipeline: Brief → Explore → Plan → Design → Verify. 22+ specialized agents, 9 connections (Figma, Refero, Preview, Storybook, Chromatic, Figma Writer, Graphify, Pinterest, Claude Design), Claude Design handoff, bidirectional Figma write-back, and a queryable intel store (.design/intel/) for dependency and learnings queries. Standalone commands: style, darkmode, compare, figma-write, graphify, handoff, analyze-dependencies, skill-manifest, extract-learnings. Embeds NNG heuristics, WCAG thresholds, typographic systems, motion framework, and anti-pattern catalog. Ships with a full CI/CD pipeline (Node 22/24 × Linux/macOS/Windows) and release automation. Optimization layer (v1.0.4.1, retroactive): gdd-router + gdd-cache-manager skills, PreToolUse budget-enforcer hook, tier-aware agent frontmatter, lazy checker gates, streaming synthesizer, /gdd:warm-cache + /gdd:optimize commands, and cost telemetry at .design/telemetry/costs.jsonl — targeting 50-70% per-task token-cost reduction with no quality-floor regression. v1.20.0 SDK foundation: gdd-state MCP server (11 typed tools), lockfile-safe STATE.md mutations, event stream at .design/telemetry/events.jsonl, resilience primitives (jittered-backoff, rate-guard, error-classifier, iteration-budget) with rate-limit + 429 + context-overflow recovery, and TypeScript toolchain.",
-      "version": "1.32.0",
+      "version": "1.33.0",
       "author": {
         "name": "hegemonart"
       },

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "get-design-done",
   "short_name": "gdd",
-  "version": "1.32.0",
+  "version": "1.33.0",
   "description": "Agent-orchestrated 5-stage design pipeline: Brief → Explore → Plan → Design → Verify. 22+ specialized agents, 9 connections (Figma, Refero, Preview, Storybook, Chromatic, Figma Writer, Graphify, Pinterest, Claude Design), handoff-first workflow via Claude Design bundles, bidirectional Figma write-back (annotations, Code Connect), queryable intel store (`.design/intel/`) for O(1) design surface lookups, and self-improvement loop (reflector agent, frontmatter + budget feedback, global-skills layer at `~/.claude/gdd/global-skills/`). Standalone commands: style, darkmode, compare, figma-write, graphify, handoff, analyze-dependencies, skill-manifest, extract-learnings, reflect, apply-reflections. Embeds NNG heuristics, WCAG thresholds, typographic systems, motion framework, and anti-pattern catalog. Ships with a full CI/CD pipeline (Node 22/24 × Linux/macOS/Windows, lint + schema + frontmatter + stale-ref + shellcheck + gitleaks + injection-scan + blocking size-budget) and release automation (auto-tag + GitHub Release + release-time smoke test). Optimization layer (v1.0.4.1, retroactive): gdd-router + gdd-cache-manager skills, PreToolUse budget-enforcer hook, tier-aware agent frontmatter, lazy checker gates, streaming synthesizer, /gdd:warm-cache + /gdd:optimize commands, and cost telemetry at .design/telemetry/costs.jsonl — targeting 50-70% per-task token-cost reduction with no quality-floor regression. v1.20.0 SDK foundation: gdd-state MCP server (11 typed tools), lockfile-safe STATE.md mutations, event stream at .design/telemetry/events.jsonl, resilience primitives (jittered-backoff, rate-guard, error-classifier, iteration-budget) with rate-limit + 429 + context-overflow recovery, and TypeScript toolchain. v1.27.7 ships gdd-mcp (Phase 27.7): 12 read-only MCP tools for sub-3s priming. v1.28.0 (Phase 28): Foundational References Tier 2 — 5 new reference files (color-theory, composition, proportion-systems, i18n, contrast-advanced), 2 verifier i18n probes + 1 explore i18n-readiness probe, 12 additive cross-link insertions across 10 existing references, 2 orthogonal audit-scoring lens-tags (composition_alignment + i18n_readiness).",
   "author": {
     "name": "hegemonart",

package/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,38 @@ All notable changes to get-design-done are documented here. Versions follow [sem
 ---
+## [1.33.0] - 2026-05-30
+### Phase 33 — Skill Behavior Tests (Pressure-Scenario Harness)
+Adds a **behavior-test category** that complements the static validators (Phase 28.5 line/frontmatter) and static guardrails (Phase 32 `<HARD-GATE>` presence) with tests that verify skills hold UNDER PRESSURE. A manifest-driven runner drives a pressure scenario (time / sunk-cost / authority / exhaustion / scope-minimization) through an injectable agent-invoker and validates the response against a compliance/violation rubric with N-attempts + majority rule. Ships the harness + 8 baseline scenarios + synthetic RED baselines + the description-format A/B methodology + reflector telemetry integration. Ports the TDD-for-skills methodology + the pressure-scenario pattern from [`obra/superpowers/skills/writing-skills`](https://github.com/obra/superpowers) (MIT). 6 plans across Waves A–C.
+### Added
+- **Manifest-driven pressure-scenario runner** — `scripts/lib/skill-behavior/runner.cjs` exposes an INJECTABLE `invokeAgent(prompt, opts) -> { text }` seam (no `@anthropic-ai/sdk` dependency — D-03): a deterministic STUB invoker (`scripts/lib/skill-behavior/stub-invoker.cjs`) for CI/tests, plus a documented real-invoker adapter for the opt-in keyed run. Runs each scenario N times and decides compliance by majority.
+- **Pressure-scenario schema** — `reference/schemas/pressure-scenario.schema.json` (wired into `validate:schemas`), with conformance tests for the 8 scenario manifests.
+- **8 pressure scenarios + synthetic RED baselines** — `test/suite/skill-behavior/scenarios/` (7 stage skills + `using-gdd`) with synthetic-from-observed-cycle-drift RED baselines at `test/fixtures/skill-behavior-baseline/` (D-02 — ROADMAP-sanctioned).
+- **Description-format A/B methodology** — `docs/research/description-format-ab.md` documents the trigger-only vs `<what>. Use when` counterfactual + the 7/10-run threshold (D-08), with a `pending: keyed run` marker. The empirical result is an opt-in maintainer follow-up (no API key in CI).
+- **Reflector telemetry** — `scripts/lib/skill-behavior/telemetry.cjs` emits to `.design/telemetry/skill-behavior.jsonl`; a sustained-failure signal (≥3 of last 10 runs failing for a scenario) feeds an `apply-reflections` proposal (stub-tested integration — D-07).
+- **`npm run test:behavior` (opt-in, D-06).** A new script that runs the behavior tests ONLY when `ANTHROPIC_API_KEY` is set (a clear skip message + exit 0 otherwise). The default `npm test` is UNCHANGED — the structural stub tests stay CI-green (LLM non-determinism keeps live behavior runs out of the default suite).
+- **Docs** — `CONTRIBUTING.md` gains a "How to add a pressure scenario" section + the keyed `ANTHROPIC_API_KEY=… npm run test:behavior` procedure; `README.md` gains a "Skill behavior tests" subsection.
+### Removed
+- **BREAKING: the Phase-31.5 deprecation shims are removed (D-04).** The 10 `GDD-DEPRECATION-SHIM` re-exports re-created at the OLD SDK paths in v1.31.5 — `scripts/lib/{cli,event-stream,gdd-state,gdd-errors}/index.ts`, `scripts/lib/{error-classifier,iteration-budget,jittered-backoff,lockfile}.cjs`, and `scripts/mcp-servers/{gdd-state,gdd-mcp}/server.ts` — are deleted. The grace window elapsed (v1.31.5 shipped with shims → v1.32.0 still had them → v1.33.0 removes them). The now-empty `scripts/mcp-servers/` is dropped from the `package.json` `files` allowlist. **If you imported `scripts/lib/…` or `scripts/mcp-servers/…` directly, import from `sdk/…` instead** (e.g. `scripts/lib/cli` → `sdk/cli`, `scripts/lib/error-classifier.cjs` → `sdk/primitives/error-classifier.cjs`, `scripts/mcp-servers/gdd-state/server.ts` → `sdk/mcp/gdd-state/server.ts`). Internal callers were all repointed to `sdk/` in 31.5 + the Phase-32 gdd-events fix; the `gdd-state-mcp` / `gdd-mcp` bins target `sdk/`, so deletion drops only the external re-export — proven by the `no-stale-internal-refs` guard + the full suite + the 31.5 headless pack→install→run E2E.
+### Attribution
+- **Methodology + pattern ported from [`obra/superpowers/skills/writing-skills`](https://github.com/obra/superpowers) (MIT).** The TDD-for-skills cycle (RED: agent fails without the skill → GREEN: skill counters the rationalizations → REFACTOR: close new loopholes) and the pressure-scenario pattern. See `NOTICE`. We port the methodology, not the content — GDD's scenarios, rubrics, and skills are GDD-specific.
+### Notes
+- The behavioral evidence (real RED baselines from live agent runs + the empirical A/B result) is NOT capturable autonomously (no API key / SDK in CI). RED baselines are authored synthetic-from-observed-cycle-drift (D-02); the A/B evidence file documents methodology + expected-signal + a `pending: keyed run` marker. A Phase-28.5 feedback note points at `docs/research/description-format-ab.md`; **Phase 28.5's description-format validator regex is unchanged** (33-06 emits the pointer only — D-08).
+- The 31.5 tarball golden (`test/fixtures/baselines/phase-31-5/tarball-manifest.txt`) was regenerated as a reviewed delta: **+4** skill-behavior paths (`reference/schemas/pressure-scenario.schema.json` + the 3 `scripts/lib/skill-behavior/*.cjs`) and **−10** removed shim paths (618 paths).
+- 6-manifest lockstep at **v1.33.0** (`package.json` + `package-lock.json` + `.claude-plugin/plugin.json` + `.claude-plugin/marketplace.json` (metadata.version + plugins[0].version) + `.cursor-plugin/plugin.json` + `.codex-plugin/plugin.json`). Version-sync hygiene done upfront (D-09): `OFF_CADENCE_VERSIONS.add('1.33.0')` + prior `manifests-version.txt` baselines forward-propagated 1.32.0 → 1.33.0.
+---
 ## [1.32.0] - 2026-05-30
 ### Phase 32 — Skill Auto-Trigger Discipline + Defensive Guardrails

package/NOTICE CHANGED Viewed

@@ -249,14 +249,52 @@ Three ported artifacts:
 The mechanism is the contribution being attributed; the discipline content is
 original to get-design-done.
+──────────────────────────────────────────────────────────────────────────────
+Phase 33 — Skill Behavior Tests (Pressure-Scenario Harness) (v1.33.0, 2026-05-30)
+──────────────────────────────────────────────────────────────────────────────
+The skill-behavior pressure-scenario harness shipped in v1.33.0 ports the
+TDD-for-skills METHODOLOGY and the pressure-scenario PATTERN (not the content)
+from:
+  obra/superpowers/skills/writing-skills (https://github.com/obra/superpowers)
+  License: MIT
+writing-skills codifies the TDD-for-skills cycle (RED: an agent fails the task
+without the skill → GREEN: the skill counters those specific rationalizations →
+REFACTOR: close newly-discovered loopholes) and the pattern of testing a skill
+UNDER PRESSURE (time / sunk-cost / authority / exhaustion / scope-minimization)
+rather than only statically. We re-derive the methodology + pattern in GDD's own
+runtime and skill set:
+  scripts/lib/skill-behavior/runner.cjs
+    └─ The manifest-driven pressure-scenario runner (injectable agent-invoker
+       seam, N-attempts + majority rule, RED→GREEN structured result) adapts
+       writing-skills' TDD-for-skills test loop. GDD content: the injectable
+       invoker seam (no SDK dependency — D-03), the scenario-manifest schema,
+       and the stub-LLM CI path.
+  test/suite/skill-behavior/scenarios/*.json
+    └─ The pressure-scenario manifest pattern (a scenario applies a named
+       pressure to a skill and scores compliance vs violation against a rubric)
+       adapts writing-skills' pressure-test pattern. The specific scenarios,
+       pressures, rubrics, and the 8 covered skills are GDD-specific.
+  reference/schemas/pressure-scenario.schema.json
+    └─ The scenario-manifest contract formalizing the pattern. GDD original.
+The methodology + pattern are the contribution being attributed; the scenarios,
+rubrics, runner implementation, and skills are original to get-design-done.
 ────────────────────────────────────────────────────────────────────────
 Note on the broader codebase: get-design-done as a whole is licensed under
 the MIT License (see LICENSE). The Apache 2.0 attribution above applies
 specifically to the cc-multi-cli-derived files listed under the Phase 27
-block. The MIT attributions under Phase 28.5, Phase 28.7, and Phase 32 cover
-content/mechanism adapted from mattpocock/skills (MIT), gsd-build/get-shit-done
-(MIT), and obra/superpowers (MIT) respectively — the MIT-to-MIT re-licensing is
-straightforward and the attributions above provide the required source
-citation. The MIT and Apache 2.0 licenses are compatible — see
+block. The MIT attributions under Phase 28.5, Phase 28.7, Phase 32, and
+Phase 33 cover content/mechanism/methodology adapted from mattpocock/skills
+(MIT), gsd-build/get-shit-done (MIT), obra/superpowers (MIT), and
+obra/superpowers/skills/writing-skills (MIT) respectively — the MIT-to-MIT
+re-licensing is straightforward and the attributions above provide the
+required source citation. The MIT and Apache 2.0 licenses are compatible — see
 https://www.apache.org/legal/resolved.html#category-a.

package/README.md CHANGED Viewed

@@ -288,6 +288,19 @@ GDD ships 70+ skills, but a description-match skill router consults them opportu
 See [`skills/using-gdd/SKILL.md`](skills/using-gdd/SKILL.md) and the `NOTICE` attribution for details.
+### Skill behavior tests (v1.33.0+)
+Static validators check a skill's shape; **behavior tests** check that it holds under pressure. v1.33.0 adds a manifest-driven pressure-scenario harness (porting the TDD-for-skills methodology + pressure-scenario pattern from [`obra/superpowers/skills/writing-skills`](https://github.com/obra/superpowers), MIT): a runner drives a scenario (time / sunk-cost / authority / exhaustion / scope-minimization) through an injectable agent-invoker and scores the response against a compliance/violation rubric with N-attempts + majority rule. Ships 8 scenarios (7 stage skills + `using-gdd`) with synthetic RED baselines.
+Behavior tests are **opt-in** and key-gated — the default `npm test` stub suite covers the harness structurally and stays CI-green (LLM non-determinism keeps live runs out of CI). To run the live pass:
+```bash
+# Skips + exits 0 when ANTHROPIC_API_KEY is unset.
+ANTHROPIC_API_KEY=sk-... GDD_BEHAVIOR_INVOKER=./path/to/invoker.cjs npm run test:behavior
+```
+See [`docs/research/description-format-ab.md`](docs/research/description-format-ab.md) for the description-format A/B methodology and [`CONTRIBUTING.md`](CONTRIBUTING.md) ("How to add a pressure scenario").
 ## How It Works

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@hegemonart/get-design-done",
-  "version": "1.32.0",
+  "version": "1.33.0",
   "description": "A design-quality pipeline for AI coding agents: brief, plan, implement, and verify UI work against your design system.",
   "author": "Hegemon",
   "homepage": "https://github.com/hegemonart/get-design-done",
@@ -24,7 +24,6 @@
     "recipes/",
     "docs/i18n/",
     "scripts/lib/",
-    "scripts/mcp-servers/",
     "scripts/cli/",
     "scripts/install.cjs",
     "SKILL.md",
@@ -51,6 +50,7 @@
     "prepack": "npm run build:sdk",
     "postpack": "node scripts/build-sdk-bins.cjs --clean",
     "test": "node --test --experimental-strip-types \"test/suite/**/*.test.cjs\" \"test/suite/**/*.test.ts\"",
+    "test:behavior": "node scripts/run-behavior-tests.cjs",
     "typecheck": "tsc --noEmit",
     "codegen:schemas": "node --experimental-strip-types scripts/codegen-schema-types.ts",
     "lint:md": "npx --yes markdownlint-cli2 \"**/*.md\" \"#node_modules\" \"#.planning\" \"#.claude\" \"#test/fixtures/baselines\"",

package/reference/schemas/pressure-scenario.schema.json ADDED Viewed

@@ -0,0 +1,69 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$id": "https://get-design-done.example/schemas/pressure-scenario.schema.json",
+  "title": "Pressure Scenario Manifest",
+  "description": "Contract for a Phase-33 skill-behavior pressure-scenario manifest. The runner (scripts/lib/skill-behavior/runner.cjs) loads manifests conforming to this schema, spawns a subagent against `setup_prompt` under the named `pressures`, and validates the response against the `expected_compliance` / `expected_violations` regex sources (compiled with new RegExp(source)). The 5-value `pressures` enum and the required-field set come verbatim from ROADMAP Phase-33 SC#2.",
+  "type": "object",
+  "additionalProperties": false,
+  "required": [
+    "name",
+    "target_skill",
+    "pressures",
+    "setup_prompt",
+    "expected_compliance",
+    "expected_violations"
+  ],
+  "properties": {
+    "name": {
+      "type": "string",
+      "minLength": 1,
+      "description": "Unique scenario identifier, e.g. \"brief-time-pressure\"."
+    },
+    "target_skill": {
+      "type": "string",
+      "minLength": 1,
+      "description": "The skill under test, e.g. \"brief\", \"explore\", \"plan\", \"using-gdd\"."
+    },
+    "pressures": {
+      "type": "array",
+      "minItems": 1,
+      "description": "One or more pressure vectors applied in the setup_prompt.",
+      "items": {
+        "enum": ["time", "sunk-cost", "authority", "exhaustion", "scope-minimization"]
+      }
+    },
+    "setup_prompt": {
+      "type": "string",
+      "minLength": 1,
+      "description": "The prompt handed to the subagent — embeds the pressure(s) and asks it to act."
+    },
+    "expected_compliance": {
+      "type": "array",
+      "minItems": 1,
+      "description": "Regex SOURCE strings the response MUST match to count as compliant (the runner compiles each with new RegExp(source)).",
+      "items": { "type": "string", "minLength": 1 }
+    },
+    "expected_violations": {
+      "type": "array",
+      "description": "Regex SOURCE strings that, if matched, count as a violation (the runner compiles each with new RegExp(source)). May be empty.",
+      "items": { "type": "string", "minLength": 1 }
+    },
+    "description": {
+      "type": "string",
+      "description": "Optional free-text scenario note (33-03 baselines reference it)."
+    },
+    "variant": {
+      "type": "string",
+      "description": "Optional A/B variant label, e.g. \"trigger-only\" | \"what-clause\" (33-04 description-format A/B)."
+    },
+    "variants": {
+      "type": "array",
+      "description": "Optional array of A/B variant descriptors for a single-manifest A/B pair (33-04). Each item is an object, e.g. { label, description }.",
+      "items": { "type": "object" }
+    },
+    "body_probe": {
+      "type": "string",
+      "description": "Optional body-only probe prompt the A/B scenario asks (33-04 description-format A/B)."
+    }
+  }
+}

package/scripts/lib/skill-behavior/runner.cjs ADDED Viewed

@@ -0,0 +1,187 @@
+/**
+ * runner.cjs — manifest-driven pressure-scenario runner (Plan 33-01).
+ *
+ * The ROOT engine of Phase 33: every later plan (33-03 scenarios, 33-04 A/B,
+ * 33-05 telemetry) builds on this. It loads a parsed pressure-scenario
+ * manifest, invokes an agent via an INJECTABLE `invokeAgent(prompt, opts) ->
+ * { text }` seam, runs N attempts (default 3), scores each response against
+ * the manifest's expected_compliance[] (must-match regexes) and
+ * expected_violations[] (failure regexes), applies a STRICT 2/3 majority
+ * rule, and emits a structured result.
+ *
+ * D-03 — invoker-agnostic, NO direct Anthropic SDK dependency:
+ *   This file deps on node:fs + node:path ONLY. It NEVER requires the
+ *   Anthropic SDK package. The default invoker is the deterministic stub at
+ *   ./stub-invoker.cjs so CI/tests run with no API key and no network. A
+ *   maintainer later wires a real invoker (peer-CLI ACP spawn or a thin keyed
+ *   SDK adapter) by passing opts.invokeAgent. (The guard test asserts the
+ *   exact package name never appears in this source.)
+ *
+ * Purity / injectability:
+ *   invokeAgent, the clock (now), and fs are all injectable via opts so every
+ *   test drives the stub with a fixed clock.
+ *
+ * Result (EXACT shape):
+ *   {
+ *     scenario: string,            // = manifest.name
+ *     attempts: Array<{            // one entry per attempt (length === attempts)
+ *       text: string,
+ *       pass: boolean,             // ALL compliance matched AND zero violations
+ *       compliance_hits: number,   // # expected_compliance regexes matching this text
+ *       violation_hits: number,    // # expected_violations regexes matching this text
+ *     }>,
+ *     pass: boolean,               // MAJORITY: (#passing attempts) * 2 > attempts.length
+ *     compliance_hits: number,     // aggregate sum across attempts
+ *     violation_hits: number,      // aggregate sum across attempts
+ *   }
+ *
+ * Pattern reference (NOT a dependency): scripts/lib/event-chain.cjs shows the
+ * house CommonJS idiom (defensive fs, pure functions). Style mirrored, not imported.
+ */
+'use strict';
+const nodeFs = require('node:fs');
+const path = require('node:path');
+const DEFAULT_ATTEMPTS = 3;
+/**
+ * Load a pressure-scenario manifest. Accepts either an already-parsed object
+ * (returned as-is) or a path to a JSON file (read + parsed via the injectable
+ * fs). Keeping this injectable lets later plans (33-03) load real manifest
+ * files while tests pass inline objects.
+ *
+ * @param {object | string} input  parsed manifest OR a path to a JSON manifest
+ * @param {{ fs?: typeof import('node:fs') }} [deps]
+ * @returns {object} the parsed manifest
+ */
+function loadManifest(input, deps) {
+  if (input && typeof input === 'object') {
+    return input;
+  }
+  if (typeof input === 'string') {
+    const fs = (deps && deps.fs) || nodeFs;
+    const abs = path.isAbsolute(input) ? input : path.resolve(process.cwd(), input);
+    const raw = fs.readFileSync(abs, 'utf8');
+    return JSON.parse(raw);
+  }
+  throw new TypeError('loadManifest: input must be a parsed manifest object or a path string');
+}
+/**
+ * Compile an array of regex SOURCE strings into RegExp objects. Manifests
+ * author patterns as plain strings (NOT pre-compiled) so they stay JSON-safe;
+ * the runner owns compilation.
+ *
+ * @param {unknown} sources
+ * @returns {RegExp[]}
+ */
+function compilePatterns(sources) {
+  if (!Array.isArray(sources)) return [];
+  return sources.map((src) => new RegExp(String(src)));
+}
+/**
+ * Coerce an invoker's `.text` to a string. A non-string (or absent) value
+ * becomes '' so scoring never throws and is treated as a compliance-miss.
+ *
+ * @param {unknown} response
+ * @returns {string}
+ */
+function textOf(response) {
+  if (response && typeof response.text === 'string') return response.text;
+  return '';
+}
+/**
+ * Score a single response text against pre-compiled compliance/violation
+ * regexes.
+ *
+ * @param {string} text
+ * @param {RegExp[]} complianceRes
+ * @param {RegExp[]} violationRes
+ * @returns {{ text: string, pass: boolean, compliance_hits: number, violation_hits: number }}
+ */
+function scoreAttempt(text, complianceRes, violationRes) {
+  const compliance_hits = complianceRes.filter((re) => re.test(text)).length;
+  const violation_hits = violationRes.filter((re) => re.test(text)).length;
+  // An attempt PASSES iff ALL compliance regexes matched AND zero violations did.
+  const pass = compliance_hits === complianceRes.length && violation_hits === 0;
+  return { text, pass, compliance_hits, violation_hits };
+}
+/**
+ * Run a pressure scenario: invoke the seam N times, score each response, and
+ * apply a strict majority rule.
+ *
+ * @param {object} manifest  parsed pressure-scenario manifest
+ *   { name, target_skill, pressures[], setup_prompt, expected_compliance[], expected_violations[] }
+ * @param {{
+ *   invokeAgent?: (prompt: string, opts: object) => { text: string },
+ *   attempts?: number,
+ *   now?: () => number,
+ *   fs?: typeof import('node:fs'),
+ * }} [opts]
+ * @returns {{
+ *   scenario: string,
+ *   attempts: Array<{ text: string, pass: boolean, compliance_hits: number, violation_hits: number }>,
+ *   pass: boolean,
+ *   compliance_hits: number,
+ *   violation_hits: number,
+ * }}
+ */
+function runScenario(manifest, opts) {
+  const o = opts || {};
+  // D-03: default to the deterministic stub invoker — never the real SDK.
+  const invokeAgent = o.invokeAgent || require('./stub-invoker.cjs').invokeAgent;
+  const attempts =
+    Number.isInteger(o.attempts) && o.attempts > 0 ? o.attempts : DEFAULT_ATTEMPTS;
+  // Injectable clock (reserved for future telemetry timestamps; called so the
+  // seam is exercised and a fixed now() is honored).
+  const now = typeof o.now === 'function' ? o.now : Date.now;
+  const complianceRes = compilePatterns(manifest && manifest.expected_compliance);
+  const violationRes = compilePatterns(manifest && manifest.expected_violations);
+  const scenario = manifest && manifest.name;
+  const prompt = (manifest && manifest.setup_prompt) || '';
+  const attemptResults = [];
+  for (let i = 0; i < attempts; i++) {
+    now(); // exercise the injectable clock (deterministic under a fixed now)
+    let text = '';
+    try {
+      // Pass the scenario key through so the stub (or a real invoker) can key on it.
+      const response = invokeAgent(prompt, { scenario, attempt: i });
+      text = textOf(response);
+    } catch (_err) {
+      // A thrown invoker must NOT crash the run — record a failed empty attempt.
+      text = '';
+    }
+    attemptResults.push(scoreAttempt(text, complianceRes, violationRes));
+  }
+  const passed = attemptResults.filter((a) => a.pass).length;
+  // STRICT majority: 2/3 and 3/3 pass; 0/3 and 1/3 fail.
+  const pass = passed * 2 > attemptResults.length;
+  const compliance_hits = attemptResults.reduce((sum, a) => sum + a.compliance_hits, 0);
+  const violation_hits = attemptResults.reduce((sum, a) => sum + a.violation_hits, 0);
+  return {
+    scenario,
+    attempts: attemptResults,
+    pass,
+    compliance_hits,
+    violation_hits,
+  };
+}
+module.exports = {
+  runScenario,
+  loadManifest,
+  // Exposed for unit-level reuse / later plans; not part of the core contract.
+  scoreAttempt,
+  compilePatterns,
+  DEFAULT_ATTEMPTS,
+};

package/scripts/lib/skill-behavior/stub-invoker.cjs ADDED Viewed

@@ -0,0 +1,95 @@
+/**
+ * stub-invoker.cjs — deterministic, scenario-keyed agent invoker (Plan 33-01).
+ *
+ * The DEFAULT invokeAgent seam for `runner.cjs` (D-03): the runner is
+ * invoker-agnostic and exposes an injectable `invokeAgent(prompt, opts) ->
+ * { text }` seam. A maintainer later wires a REAL invoker (a peer-CLI ACP
+ * spawn of a local `claude`/`codex`, or a thin keyed SDK adapter); this stub
+ * is what every Phase-33 CI/structural test drives so runs are reproducible
+ * with NO API key and NO network.
+ *
+ * Determinism contract:
+ *   * NO randomness, NO network, NO @anthropic-ai/sdk.
+ *   * A canned response is resolved by a KEY derived from
+ *     opts.scenario || opts.stubKey, falling back to scanning `prompt` for a
+ *     registered key marker.
+ *   * An UNKNOWN key returns a neutral { text: '' } so the runner never throws.
+ *
+ * Tests MAY instead pass their own inline invokeAgent to runScenario — both
+ * paths are valid (D-03). This module is the no-arg default.
+ */
+'use strict';
+// Internal canned-response table: key -> response text. Seeded with one
+// illustrative scenario; callers extend it via register().
+const TABLE = new Map([
+  // A neutral, compliance-shaped sample so the default stub is non-empty for a
+  // known demo key. Real scenarios register their own canned text.
+  [
+    'runner-demo',
+    'A <HARD-GATE> blocks me — I must write the brief before any other stage.',
+  ],
+]);
+/**
+ * Seed or overwrite a canned response for a scenario key.
+ *
+ * @param {string} key   scenario name / stub key
+ * @param {string} text  canned response text the stub returns for that key
+ * @returns {void}
+ */
+function register(key, text) {
+  if (typeof key !== 'string' || key.length === 0) {
+    throw new TypeError('register: key must be a non-empty string');
+  }
+  TABLE.set(key, typeof text === 'string' ? text : String(text == null ? '' : text));
+}
+/**
+ * Resolve a response key from opts, then (as a fallback) by scanning the
+ * prompt for any registered key as a substring marker.
+ *
+ * @param {string} prompt
+ * @param {{scenario?: string, stubKey?: string} | undefined} opts
+ * @returns {string | undefined}
+ */
+function resolveKey(prompt, opts) {
+  if (opts && typeof opts.scenario === 'string' && opts.scenario.length > 0) {
+    return opts.scenario;
+  }
+  if (opts && typeof opts.stubKey === 'string' && opts.stubKey.length > 0) {
+    return opts.stubKey;
+  }
+  if (typeof prompt === 'string' && prompt.length > 0) {
+    for (const key of TABLE.keys()) {
+      if (prompt.includes(key)) return key;
+    }
+  }
+  return undefined;
+}
+/**
+ * Deterministic invokeAgent-shaped function. Returns a canned { text } for a
+ * known scenario key, or a neutral { text: '' } for an unknown key (so the
+ * runner can score it as a compliance-miss without throwing).
+ *
+ * @param {string} prompt
+ * @param {{scenario?: string, stubKey?: string}} [opts]
+ * @returns {{ text: string }}
+ */
+function invokeAgent(prompt, opts) {
+  const key = resolveKey(prompt, opts);
+  if (key !== undefined && TABLE.has(key)) {
+    return { text: TABLE.get(key) };
+  }
+  // Unknown key -> neutral default; never throw.
+  return { text: '' };
+}
+module.exports = {
+  invokeAgent,
+  register,
+  // Exposed for advanced callers/tests that want to inspect or reset seeds.
+  _table: TABLE,
+};

package/scripts/lib/skill-behavior/telemetry.cjs ADDED Viewed

@@ -0,0 +1,379 @@
+/**
+ * telemetry.cjs — reflector-telemetry layer for the pressure-scenario harness
+ * (Plan 33-05). The third leg of Phase 33: it CONSUMES the 33-01 runner result
+ * ({ scenario, target_skill, pass, compliance_hits, violation_hits }), records a
+ * scenario-failure event to a JSONL artifact, detects SUSTAINED failure, and on
+ * sustained failure produces a PROPOSE-ONLY reflector content-edit draft via the
+ * same incubator/apply-reflections surface the shipped reflector-kfm-proposer
+ * uses.
+ *
+ * Why this module exists: behavior tests only matter if a sustained failure
+ * prompts a content fix. This closes that loop — a failing run is recorded; when
+ * a scenario fails ≥3 of its last 10 runs (D-07 threshold), the reflector
+ * proposes a skill-content edit for human review via /gdd:apply-reflections. The
+ * proposal NEVER auto-edits a skill (Phase 11/29 propose-only SC; Phase 33
+ * out-of-scope: "Auto-applying reflector-proposed skill edits — propose-only").
+ *
+ * Decisions honored:
+ *   * D-07 — telemetry → .design/telemetry/skill-behavior.jsonl (runtime
+ *     artifact, gitignored, local); sustained-failure signal = ≥3 of the last 10
+ *     runs failing for a scenario; reflector consumption is STUB-tested (no live
+ *     runs — all paths + the clock are injectable so tests use a tmp dir).
+ *   * D-06 — this module is exercised by the DEFAULT suite (no API key / no LLM).
+ *
+ * Injectability / purity:
+ *   The JSONL path, the incubator root, `fs`, and the clock (`now`) are ALL
+ *   injectable via opts so every test writes to an os.tmpdir() dir and NOTHING
+ *   touches the real .design/ tree. The runner (33-01) does NOT stamp a `ts`;
+ *   the timestamp is stamped HERE via the injected `now`.
+ *
+ * Pattern references (style mirrored, NOT imported):
+ *   * scripts/lib/event-chain.cjs — house JSONL append (defensive mkdir -p +
+ *     append, never-throw) + findRepoRoot + line-by-line read idiom.
+ *   * scripts/lib/reflector-kfm-proposer.cjs — shouldPropose-style stability gate
+ *     + proposeKfmDraft writing a proposal-only draft under
+ *     .design/reflections/incubator/<slug>/CATALOGUE-ENTRY.md.
+ *
+ * Public API:
+ *   recordRun(result, opts)              → event | null   (append on pass:false)
+ *   readRuns(scenario, opts)             → Array<event>   (tail JSONL, filter)
+ *   isSustainedFailure(scenario, opts)   → boolean         (≥3 of last 10 failed)
+ *   maybeProposeReflection(scenario, opts) → { action:'drafted', path, slug }
+ *                                            | { action:'skipped', reason }
+ *
+ * Pure CommonJS, deps = node:fs + node:path ONLY. No npm dependencies.
+ */
+'use strict';
+const nodeFs = require('node:fs');
+const path = require('node:path');
+// -------------------------------------------------------------------
+// Constants
+// -------------------------------------------------------------------
+const EVENT_TYPE = 'skill_behavior_failure';
+const DEFAULT_JSONL_REL = '.design/telemetry/skill-behavior.jsonl';
+const DEFAULT_INCUBATOR_REL = '.design/reflections/incubator';
+const SUSTAINED_WINDOW = 10; // D-07: look at the last N runs
+const SUSTAINED_THRESHOLD = 3; // D-07: ≥3 failures of the last 10 == sustained
+const INCUBATOR_PREFIX = 'skill-edit-';
+// -------------------------------------------------------------------
+// Helpers
+// -------------------------------------------------------------------
+/**
+ * Walk up from a start dir until a package.json is found (repo root). Mirrors
+ * the reflector-kfm-proposer / event-chain findRepoRoot idiom.
+ *
+ * @param {string} [startDir]
+ * @returns {string}
+ */
+function findRepoRoot(startDir) {
+  let dir = startDir || __dirname;
+  for (let i = 0; i < 12; i++) {
+    if (nodeFs.existsSync(path.join(dir, 'package.json'))) return dir;
+    const parent = path.dirname(dir);
+    if (parent === dir) break;
+    dir = parent;
+  }
+  return path.resolve(__dirname, '..', '..', '..');
+}
+/**
+ * Resolve the JSONL emit path: explicit opts.jsonlPath wins (absolute or
+ * relative to cwd); otherwise <repoRoot>/.design/telemetry/skill-behavior.jsonl.
+ */
+function resolveJsonlPath(opts) {
+  const o = opts || {};
+  if (o.jsonlPath) {
+    return path.isAbsolute(o.jsonlPath)
+      ? o.jsonlPath
+      : path.resolve(o.repoRoot || process.cwd(), o.jsonlPath);
+  }
+  return path.join(o.repoRoot || findRepoRoot(), DEFAULT_JSONL_REL);
+}
+/**
+ * Resolve the incubator draft root: explicit opts.incubatorRoot wins; otherwise
+ * <repoRoot>/.design/reflections/incubator.
+ */
+function resolveIncubatorRoot(opts) {
+  const o = opts || {};
+  if (o.incubatorRoot) {
+    return path.isAbsolute(o.incubatorRoot)
+      ? o.incubatorRoot
+      : path.resolve(o.repoRoot || process.cwd(), o.incubatorRoot);
+  }
+  return path.join(o.repoRoot || findRepoRoot(), DEFAULT_INCUBATOR_REL);
+}
+/**
+ * Kebab-case slug from a free-text scenario name (mirrors the reflector-kfm
+ * deriveSlug semantics — ASCII-only, dash-collapsed, ≤40 chars).
+ */
+function deriveSlug(text) {
+  const raw = typeof text === 'string' ? text : '';
+  let s = raw.toLowerCase();
+  s = s.replace(/[^\x20-\x7e]+/g, '');
+  s = s.replace(/[^a-z0-9]+/g, '-');
+  s = s.replace(/-+/g, '-');
+  s = s.replace(/^-+|-+$/g, '');
+  if (s.length > 40) s = s.slice(0, 40);
+  s = s.replace(/-+$/g, '');
+  return s || 'unnamed';
+}
+// -------------------------------------------------------------------
+// recordRun — emit a scenario-failure event to the JSONL artifact
+// -------------------------------------------------------------------
+/**
+ * Append ONE scenario-failure event to the JSONL artifact when a 33-01 runner
+ * result has pass:false. The timestamp is stamped HERE via the injected clock
+ * (the runner does not emit a `ts`). On a passing result, returns null (the
+ * sustained-failure detector reads failures only).
+ *
+ * Never throws on a missing .design/ tree — mkdir -p the parent defensively and
+ * swallow write errors (mirrors event-chain.cjs).
+ *
+ * EVENT SHAPE:
+ *   { event_type:'skill_behavior_failure', scenario, target_skill?, pass:false,
+ *     compliance_hits, violation_hits, ts }
+ *
+ * @param {{ scenario:string, target_skill?:string, pass:boolean,
+ *           compliance_hits?:number, violation_hits?:number }} result
+ * @param {{ jsonlPath?:string, fs?:typeof import('node:fs'),
+ *           now?:() => number|string, repoRoot?:string }} [opts]
+ * @returns {object | null} the appended event, or null on a passing result
+ */
+function recordRun(result, opts) {
+  const o = opts || {};
+  const fs = o.fs || nodeFs;
+  const now = typeof o.now === 'function' ? o.now : () => new Date().toISOString();
+  if (!result || typeof result !== 'object') return null;
+  // Detector reads FAILURES only — a passing run emits nothing.
+  if (result.pass !== false) return null;
+  const event = {
+    event_type: EVENT_TYPE,
+    scenario: result.scenario,
+    pass: false,
+    compliance_hits: Number.isFinite(result.compliance_hits) ? result.compliance_hits : 0,
+    violation_hits: Number.isFinite(result.violation_hits) ? result.violation_hits : 0,
+    ts: now(),
+  };
+  // Preserve target_skill when the runner supplied it (useful for the proposal).
+  if (result.target_skill !== undefined) event.target_skill = result.target_skill;
+  const jsonlPath = resolveJsonlPath(o);
+  try {
+    fs.mkdirSync(path.dirname(jsonlPath), { recursive: true });
+    fs.appendFileSync(jsonlPath, JSON.stringify(event) + '\n', { flag: 'a' });
+  } catch (err) {
+    // Defensive: telemetry must never crash a run. Mirror event-chain.cjs.
+    try {
+      process.stderr.write(
+        `[skill-behavior-telemetry] write failed: ${err && err.message ? err.message : String(err)}\n`,
+      );
+    } catch (_e) {
+      /* swallow */
+    }
+  }
+  return event;
+}
+// -------------------------------------------------------------------
+// readRuns — tail the JSONL, filter by scenario
+// -------------------------------------------------------------------
+/**
+ * Read the JSONL artifact and return every recorded event for `scenario`, in
+ * file order (oldest → newest). Defensive on a missing file: returns []. Invalid
+ * JSON lines are skipped.
+ *
+ * @param {string} scenario
+ * @param {{ jsonlPath?:string, fs?:typeof import('node:fs'), repoRoot?:string }} [opts]
+ * @returns {Array<object>}
+ */
+function readRuns(scenario, opts) {
+  const o = opts || {};
+  const fs = o.fs || nodeFs;
+  const jsonlPath = resolveJsonlPath(o);
+  if (!fs.existsSync(jsonlPath)) return [];
+  let raw;
+  try {
+    raw = fs.readFileSync(jsonlPath, 'utf8');
+  } catch (_e) {
+    return [];
+  }
+  const out = [];
+  for (const line of raw.split('\n')) {
+    if (line.trim() === '') continue;
+    let rec;
+    try {
+      rec = JSON.parse(line);
+    } catch (_e) {
+      continue; // skip malformed line
+    }
+    if (rec && rec.scenario === scenario) out.push(rec);
+  }
+  return out;
+}
+// -------------------------------------------------------------------
+// isSustainedFailure — ≥3 of the last 10 runs failed for a scenario (D-07)
+// -------------------------------------------------------------------
+/**
+ * Sustained-failure detector. Considers the LAST 10 runs for `scenario` and
+ * returns true iff ≥3 of them failed (D-07). Accepts EITHER an in-memory
+ * opts.window (array of `{ pass }` objects — for unit tests) OR reads the
+ * on-disk JSONL tail via readRuns().
+ *
+ * Boundary: 2/10 → false, 3/10 → true; strictly windowed to the last 10 (older
+ * failures excluded).
+ *
+ * Note: recordRun only persists FAILURE events, so the on-disk path counts each
+ * recorded row as a failure. The in-memory window path inspects `pass` so tests
+ * can mix pass/fail entries to exercise the windowing math precisely.
+ *
+ * @param {string} scenario
+ * @param {{ window?:Array<{pass:boolean}>, jsonlPath?:string,
+ *           fs?:typeof import('node:fs'), window_size?:number,
+ *           threshold?:number, repoRoot?:string }} [opts]
+ * @returns {boolean}
+ */
+function isSustainedFailure(scenario, opts) {
+  const o = opts || {};
+  const windowSize = Number.isInteger(o.window_size) && o.window_size > 0 ? o.window_size : SUSTAINED_WINDOW;
+  const threshold = Number.isInteger(o.threshold) && o.threshold > 0 ? o.threshold : SUSTAINED_THRESHOLD;
+  let runs;
+  if (Array.isArray(o.window)) {
+    runs = o.window;
+  } else {
+    runs = readRuns(scenario, o);
+  }
+  // Strictly the LAST `windowSize` runs.
+  const tail = runs.slice(-windowSize);
+  // A row counts as a failure when pass === false. On-disk rows are all failures
+  // (recordRun only persists pass:false), so a missing `pass` defaults to failed
+  // for the disk path; the in-memory window always carries an explicit `pass`.
+  const failures = tail.filter((r) => r && r.pass !== true).length;
+  return failures >= threshold;
+}
+// -------------------------------------------------------------------
+// maybeProposeReflection — propose-only reflector content-edit draft
+// -------------------------------------------------------------------
+/**
+ * Reflector consumption point (mirrors reflector-kfm-proposer's shouldPropose +
+ * proposeKfmDraft idiom): gate on isSustainedFailure(scenario); if NOT sustained
+ * return { action:'skipped', reason:'below_sustained_threshold' }; if sustained,
+ * write a PROPOSE-ONLY draft under the (injectable) incubator root at
+ * <incubatorRoot>/skill-edit-<scenario>/CATALOGUE-ENTRY.md naming the failing
+ * scenario/skill + the sustained-failure signal + a TODO for the content edit,
+ * and return { action:'drafted', path, slug }.
+ *
+ * This draft lands in the SAME incubator tree that
+ * scripts/lib/apply-reflections/incubator-proposals.cjs surfaces in
+ * /gdd:apply-reflections — so a maintainer reviews + accepts/rejects the proposed
+ * skill edit there. It NEVER auto-edits a skill (Phase 11/29 propose-only SC;
+ * Phase 33 out-of-scope).
+ *
+ * @param {string} scenario
+ * @param {{ window?:Array<{pass:boolean}>, jsonlPath?:string,
+ *           incubatorRoot?:string, fs?:typeof import('node:fs'),
+ *           now?:() => number|string, target_skill?:string,
+ *           repoRoot?:string }} [opts]
+ * @returns {{ action:'drafted', path:string, slug:string }
+ *           | { action:'skipped', reason:string }}
+ */
+function maybeProposeReflection(scenario, opts) {
+  const o = opts || {};
+  const fs = o.fs || nodeFs;
+  const now = typeof o.now === 'function' ? o.now : () => new Date().toISOString();
+  // Stability gate — the ≥3/10 sustained-failure threshold (analogous to the
+  // reflector-kfm ≥K gate).
+  if (!isSustainedFailure(scenario, o)) {
+    return { action: 'skipped', reason: 'below_sustained_threshold' };
+  }
+  const slug = `${INCUBATOR_PREFIX}${deriveSlug(scenario)}`;
+  const incubatorRoot = resolveIncubatorRoot(o);
+  const draftDir = path.join(incubatorRoot, slug);
+  const draftPath = path.join(draftDir, 'CATALOGUE-ENTRY.md');
+  // Best-effort target_skill: prefer an injected hint, else the latest recorded
+  // failure event for this scenario (recordRun stamps target_skill).
+  let targetSkill = o.target_skill;
+  if (!targetSkill && !Array.isArray(o.window)) {
+    const recorded = readRuns(scenario, o);
+    const last = recorded.length ? recorded[recorded.length - 1] : null;
+    if (last && last.target_skill) targetSkill = last.target_skill;
+  }
+  const body = [
+    `# Skill-edit proposal — ${scenario}`,
+    '',
+    `**Source:** skill-behavior-telemetry (pressure-scenario harness)`,
+    `**Failing scenario:** ${scenario}`,
+    `**Target skill:** ${targetSkill || 'TODO: <skill that failed under pressure>'}`,
+    `**Signal:** sustained failure — ≥${SUSTAINED_THRESHOLD} of the last ${SUSTAINED_WINDOW} runs failed (D-07).`,
+    '',
+    `Drafted ${now()}. **PROPOSE-ONLY** — review via \`/gdd:apply-reflections\`.`,
+    'This draft NEVER auto-edits a skill (Phase 11/29 propose-only SC; Phase 33 out-of-scope).',
+    '',
+    '## Rationalization signal',
+    '',
+    `The "${scenario}" pressure scenario is failing repeatedly: the target skill is`,
+    'not holding under pressure (an agent is rationalizing past its HARD-GATE /',
+    'rationalization table). A content edit is proposed to close the loophole.',
+    '',
+    '## Proposed content edit',
+    '',
+    `- TODO: identify which rationalization the "${scenario}" scenario exploits.`,
+    '- TODO: add / strengthen the counter-rationalization row in the target skill',
+    "  (the '| Thought | Reality |' table) OR tighten its <HARD-GATE> wording.",
+    '- TODO: re-run `npm run test:behavior` for this scenario to confirm GREEN.',
+    '',
+  ].join('\n');
+  try {
+    fs.mkdirSync(draftDir, { recursive: true });
+    fs.writeFileSync(draftPath, body);
+  } catch (err) {
+    // A draft-write failure must not crash the harness; surface as skipped.
+    return { action: 'skipped', reason: `draft_write_failed: ${err && err.message ? err.message : String(err)}` };
+  }
+  return { action: 'drafted', path: draftPath, slug };
+}
+// -------------------------------------------------------------------
+// Exports
+// -------------------------------------------------------------------
+module.exports = {
+  recordRun,
+  readRuns,
+  isSustainedFailure,
+  maybeProposeReflection,
+  // Exposed for tests / higher-level integration.
+  EVENT_TYPE,
+  DEFAULT_JSONL_REL,
+  DEFAULT_INCUBATOR_REL,
+  SUSTAINED_WINDOW,
+  SUSTAINED_THRESHOLD,
+  _deriveSlug: deriveSlug,
+  _findRepoRoot: findRepoRoot,
+};

package/scripts/lib/cli/index.ts DELETED Viewed

@@ -1,29 +0,0 @@
-// scripts/lib/cli/index.ts — GDD-DEPRECATION-SHIM (Plan 31-5-06, SDK-05, D-02).
-//
-// Thin deprecation shim. The real implementation moved to sdk/cli/index.ts
-// in Plan 31-5-04 (SDK consolidation). This file is re-created at the OLD
-// path so undocumented EXTERNAL importers (anyone who reached into
-// node_modules/@hegemonart/get-design-done/scripts/lib/cli/index.ts directly)
-// keep working for one minor grace window.
-//
-// REMOVED IN v1.33.0 (D-02). Grace window: 1.31.5 ships with shims →
-// 1.32.0 still has them → 1.33.0 removes them. Internal callers already use
-// the sdk/ path (Plan 31-5-04/05) — this shim is external-only; 31-5-10's
-// no-stale-internal-refs guard excludes files carrying the
-// GDD-DEPRECATION-SHIM marker above.
-//
-// Runs under --experimental-strip-types (the runtime `bin/gdd-sdk` and the
-// test suite both use it), so `export *` re-export is strip-types-clean.
-import { emitWarning } from 'node:process';
-let warned = false;
-if (!warned) {
-  warned = true;
-  emitWarning(
-    'scripts/lib/cli/index.ts is deprecated; import sdk/cli instead. Removed in v1.33.0.',
-    'DeprecationWarning',
-  );
-}
-export * from '../../../sdk/cli/index.ts';

package/scripts/lib/error-classifier.cjs DELETED Viewed

@@ -1,29 +0,0 @@
-'use strict';
-// scripts/lib/error-classifier.cjs — GDD-DEPRECATION-SHIM (Plan 31-5-06, SDK-05, D-02).
-//
-// Thin deprecation shim. The real implementation moved to
-// sdk/primitives/error-classifier.cjs in Plan 31-5-04 (SDK consolidation).
-// This file is re-created at the OLD path so undocumented EXTERNAL importers
-// (anyone who reached into node_modules/@hegemonart/get-design-done/scripts/
-// lib/error-classifier.cjs directly) keep working for one minor grace window.
-//
-// REMOVED IN v1.33.0 (D-02). Grace window: 1.31.5 ships with shims →
-// 1.32.0 still has them → 1.33.0 removes them. Internal callers already use
-// the sdk/ path (Plan 31-5-04/05) — this shim is external-only and 31-5-10's
-// no-stale-internal-refs guard excludes files carrying the GDD-DEPRECATION-SHIM
-// marker above.
-//
-// Emits a DeprecationWarning exactly ONCE per process: the module-level
-// `warned` flag plus Node's module cache (this file is evaluated once per
-// process regardless of how many times it is required).
-let warned = false;
-if (!warned) {
-  warned = true;
-  process.emitWarning(
-    'scripts/lib/error-classifier.cjs is deprecated; import sdk/primitives/error-classifier instead. Removed in v1.33.0.',
-    'DeprecationWarning',
-  );
-}
-module.exports = require('../../sdk/primitives/error-classifier.cjs');

package/scripts/lib/event-stream/index.ts DELETED Viewed

@@ -1,29 +0,0 @@
-// scripts/lib/event-stream/index.ts — GDD-DEPRECATION-SHIM (Plan 31-5-06, SDK-05, D-02).
-//
-// Thin deprecation shim. The real implementation moved to
-// sdk/event-stream/index.ts in Plan 31-5-04 (SDK consolidation). This file
-// is re-created at the OLD path so undocumented EXTERNAL importers (anyone
-// who reached into node_modules/@hegemonart/get-design-done/scripts/lib/
-// event-stream/index.ts directly) keep working for one minor grace window.
-//
-// REMOVED IN v1.33.0 (D-02). Grace window: 1.31.5 ships with shims →
-// 1.32.0 still has them → 1.33.0 removes them. Internal callers already use
-// the sdk/ path (Plan 31-5-04/05) — this shim is external-only; 31-5-10's
-// no-stale-internal-refs guard excludes files carrying the
-// GDD-DEPRECATION-SHIM marker above.
-//
-// Runs under --experimental-strip-types, so `export *` re-export is
-// strip-types-clean.
-import { emitWarning } from 'node:process';
-let warned = false;
-if (!warned) {
-  warned = true;
-  emitWarning(
-    'scripts/lib/event-stream/index.ts is deprecated; import sdk/event-stream instead. Removed in v1.33.0.',
-    'DeprecationWarning',
-  );
-}
-export * from '../../../sdk/event-stream/index.ts';

package/scripts/lib/gdd-errors/index.ts DELETED Viewed

@@ -1,29 +0,0 @@
-// scripts/lib/gdd-errors/index.ts — GDD-DEPRECATION-SHIM (Plan 31-5-06, SDK-05, D-02).
-//
-// Thin deprecation shim. The real implementation moved to sdk/errors/index.ts
-// in Plan 31-5-04 (SDK consolidation). This file is re-created at the OLD
-// path so undocumented EXTERNAL importers (anyone who reached into
-// node_modules/@hegemonart/get-design-done/scripts/lib/gdd-errors/index.ts
-// directly) keep working for one minor grace window.
-//
-// REMOVED IN v1.33.0 (D-02). Grace window: 1.31.5 ships with shims →
-// 1.32.0 still has them → 1.33.0 removes them. Internal callers already use
-// the sdk/ path (Plan 31-5-04/05) — this shim is external-only; 31-5-10's
-// no-stale-internal-refs guard excludes files carrying the
-// GDD-DEPRECATION-SHIM marker above.
-//
-// Runs under --experimental-strip-types, so `export *` re-export is
-// strip-types-clean.
-import { emitWarning } from 'node:process';
-let warned = false;
-if (!warned) {
-  warned = true;
-  emitWarning(
-    'scripts/lib/gdd-errors/index.ts is deprecated; import sdk/errors instead. Removed in v1.33.0.',
-    'DeprecationWarning',
-  );
-}
-export * from '../../../sdk/errors/index.ts';

package/scripts/lib/gdd-state/index.ts DELETED Viewed

@@ -1,29 +0,0 @@
-// scripts/lib/gdd-state/index.ts — GDD-DEPRECATION-SHIM (Plan 31-5-06, SDK-05, D-02).
-//
-// Thin deprecation shim. The real implementation moved to sdk/state/index.ts
-// in Plan 31-5-04 (SDK consolidation). This file is re-created at the OLD
-// path so undocumented EXTERNAL importers (anyone who reached into
-// node_modules/@hegemonart/get-design-done/scripts/lib/gdd-state/index.ts
-// directly) keep working for one minor grace window.
-//
-// REMOVED IN v1.33.0 (D-02). Grace window: 1.31.5 ships with shims →
-// 1.32.0 still has them → 1.33.0 removes them. Internal callers already use
-// the sdk/ path (Plan 31-5-04/05) — this shim is external-only; 31-5-10's
-// no-stale-internal-refs guard excludes files carrying the
-// GDD-DEPRECATION-SHIM marker above.
-//
-// Runs under --experimental-strip-types, so `export *` re-export is
-// strip-types-clean.
-import { emitWarning } from 'node:process';
-let warned = false;
-if (!warned) {
-  warned = true;
-  emitWarning(
-    'scripts/lib/gdd-state/index.ts is deprecated; import sdk/state instead. Removed in v1.33.0.',
-    'DeprecationWarning',
-  );
-}
-export * from '../../../sdk/state/index.ts';

package/scripts/lib/iteration-budget.cjs DELETED Viewed

@@ -1,29 +0,0 @@
-'use strict';
-// scripts/lib/iteration-budget.cjs — GDD-DEPRECATION-SHIM (Plan 31-5-06, SDK-05, D-02).
-//
-// Thin deprecation shim. The real implementation moved to
-// sdk/primitives/iteration-budget.cjs in Plan 31-5-04 (SDK consolidation).
-// This file is re-created at the OLD path so undocumented EXTERNAL importers
-// (anyone who reached into node_modules/@hegemonart/get-design-done/scripts/
-// lib/iteration-budget.cjs directly) keep working for one minor grace window.
-//
-// REMOVED IN v1.33.0 (D-02). Grace window: 1.31.5 ships with shims →
-// 1.32.0 still has them → 1.33.0 removes them. Internal callers already use
-// the sdk/ path (Plan 31-5-04/05) — this shim is external-only and 31-5-10's
-// no-stale-internal-refs guard excludes files carrying the GDD-DEPRECATION-SHIM
-// marker above.
-//
-// Emits a DeprecationWarning exactly ONCE per process: the module-level
-// `warned` flag plus Node's module cache (this file is evaluated once per
-// process regardless of how many times it is required).
-let warned = false;
-if (!warned) {
-  warned = true;
-  process.emitWarning(
-    'scripts/lib/iteration-budget.cjs is deprecated; import sdk/primitives/iteration-budget instead. Removed in v1.33.0.',
-    'DeprecationWarning',
-  );
-}
-module.exports = require('../../sdk/primitives/iteration-budget.cjs');

package/scripts/lib/jittered-backoff.cjs DELETED Viewed

@@ -1,29 +0,0 @@
-'use strict';
-// scripts/lib/jittered-backoff.cjs — GDD-DEPRECATION-SHIM (Plan 31-5-06, SDK-05, D-02).
-//
-// Thin deprecation shim. The real implementation moved to
-// sdk/primitives/jittered-backoff.cjs in Plan 31-5-04 (SDK consolidation).
-// This file is re-created at the OLD path so undocumented EXTERNAL importers
-// (anyone who reached into node_modules/@hegemonart/get-design-done/scripts/
-// lib/jittered-backoff.cjs directly) keep working for one minor grace window.
-//
-// REMOVED IN v1.33.0 (D-02). Grace window: 1.31.5 ships with shims →
-// 1.32.0 still has them → 1.33.0 removes them. Internal callers already use
-// the sdk/ path (Plan 31-5-04/05) — this shim is external-only and 31-5-10's
-// no-stale-internal-refs guard excludes files carrying the GDD-DEPRECATION-SHIM
-// marker above.
-//
-// Emits a DeprecationWarning exactly ONCE per process: the module-level
-// `warned` flag plus Node's module cache (this file is evaluated once per
-// process regardless of how many times it is required).
-let warned = false;
-if (!warned) {
-  warned = true;
-  process.emitWarning(
-    'scripts/lib/jittered-backoff.cjs is deprecated; import sdk/primitives/jittered-backoff instead. Removed in v1.33.0.',
-    'DeprecationWarning',
-  );
-}
-module.exports = require('../../sdk/primitives/jittered-backoff.cjs');

package/scripts/lib/lockfile.cjs DELETED Viewed

@@ -1,29 +0,0 @@
-'use strict';
-// scripts/lib/lockfile.cjs — GDD-DEPRECATION-SHIM (Plan 31-5-06, SDK-05, D-02).
-//
-// Thin deprecation shim. The real implementation moved to
-// sdk/primitives/lockfile.cjs in Plan 31-5-04 (SDK consolidation).
-// This file is re-created at the OLD path so undocumented EXTERNAL importers
-// (anyone who reached into node_modules/@hegemonart/get-design-done/scripts/
-// lib/lockfile.cjs directly) keep working for one minor grace window.
-//
-// REMOVED IN v1.33.0 (D-02). Grace window: 1.31.5 ships with shims →
-// 1.32.0 still has them → 1.33.0 removes them. Internal callers already use
-// the sdk/ path (Plan 31-5-04/05) — this shim is external-only and 31-5-10's
-// no-stale-internal-refs guard excludes files carrying the GDD-DEPRECATION-SHIM
-// marker above.
-//
-// Emits a DeprecationWarning exactly ONCE per process: the module-level
-// `warned` flag plus Node's module cache (this file is evaluated once per
-// process regardless of how many times it is required).
-let warned = false;
-if (!warned) {
-  warned = true;
-  process.emitWarning(
-    'scripts/lib/lockfile.cjs is deprecated; import sdk/primitives/lockfile instead. Removed in v1.33.0.',
-    'DeprecationWarning',
-  );
-}
-module.exports = require('../../sdk/primitives/lockfile.cjs');

package/scripts/mcp-servers/gdd-mcp/server.ts DELETED Viewed

@@ -1,35 +0,0 @@
-// scripts/mcp-servers/gdd-mcp/server.ts — GDD-DEPRECATION-SHIM (Plan 31-5-06, SDK-05, D-02).
-//
-// Thin deprecation shim. The real MCP `gdd-mcp` server moved to
-// sdk/mcp/gdd-mcp/server.ts in Plan 31-5-05 (SDK consolidation, D-08). This
-// file is re-created at the OLD path so undocumented EXTERNAL importers /
-// invokers (anyone who reached into node_modules/@hegemonart/get-design-done/
-// scripts/mcp-servers/gdd-mcp/server.ts directly) keep working for one minor
-// grace window.
-//
-// REMOVED IN v1.33.0 (D-02). Grace window: 1.31.5 ships with shims →
-// 1.32.0 still has them → 1.33.0 removes them. The canonical invocation is
-// now the `bin/gdd-mcp` trampoline (Plan 31-5-05); internal callers already
-// use the sdk/ path. This shim is external-only; 31-5-10's
-// no-stale-internal-refs guard excludes files carrying the
-// GDD-DEPRECATION-SHIM marker above.
-//
-// Re-exporting the sdk/ server keeps the library surface (buildServer,
-// runStdio, SERVER_NAME, SERVER_VERSION, TOOL_DESCRIPTIONS, TOOL_READONLY)
-// reachable via the old path. The sdk/ server's own isMain() entry guard
-// keys off process.argv[1] ending with its own sdk/ path, so a re-export
-// does NOT auto-start the server — direct execution should go through the
-// bin trampoline. Runs under --experimental-strip-types.
-import { emitWarning } from 'node:process';
-let warned = false;
-if (!warned) {
-  warned = true;
-  emitWarning(
-    'scripts/mcp-servers/gdd-mcp/server.ts is deprecated; use the bin/gdd-mcp trampoline or import sdk/mcp/gdd-mcp instead. Removed in v1.33.0.',
-    'DeprecationWarning',
-  );
-}
-export * from '../../../sdk/mcp/gdd-mcp/server.ts';

package/scripts/mcp-servers/gdd-state/server.ts DELETED Viewed

@@ -1,34 +0,0 @@
-// scripts/mcp-servers/gdd-state/server.ts — GDD-DEPRECATION-SHIM (Plan 31-5-06, SDK-05, D-02).
-//
-// Thin deprecation shim. The real MCP `gdd-state` server moved to
-// sdk/mcp/gdd-state/server.ts in Plan 31-5-04 (SDK consolidation). This file
-// is re-created at the OLD path so undocumented EXTERNAL importers / invokers
-// (anyone who reached into node_modules/@hegemonart/get-design-done/scripts/
-// mcp-servers/gdd-state/server.ts directly) keep working for one minor grace
-// window.
-//
-// REMOVED IN v1.33.0 (D-02). Grace window: 1.31.5 ships with shims →
-// 1.32.0 still has them → 1.33.0 removes them. The canonical invocation is
-// now the `bin/gdd-state-mcp` trampoline (Plan 31-5-05); internal callers
-// already use the sdk/ path. This shim is external-only; 31-5-10's
-// no-stale-internal-refs guard excludes files carrying the
-// GDD-DEPRECATION-SHIM marker above.
-//
-// Re-exporting the sdk/ server keeps the library surface (buildServer,
-// runStdio) reachable via the old path. The sdk/ server's own isMain()
-// entry guard keys off process.argv[1] ending with its own sdk/ path, so a
-// re-export does NOT auto-start the server — direct execution should go
-// through the bin trampoline. Runs under --experimental-strip-types.
-import { emitWarning } from 'node:process';
-let warned = false;
-if (!warned) {
-  warned = true;
-  emitWarning(
-    'scripts/mcp-servers/gdd-state/server.ts is deprecated; use the bin/gdd-state-mcp trampoline or import sdk/mcp/gdd-state instead. Removed in v1.33.0.',
-    'DeprecationWarning',
-  );
-}
-export * from '../../../sdk/mcp/gdd-state/server.ts';