npm - @hegemonart/get-design-done - Versions diffs - 1.59.4 → 1.59.5 - Mend

@hegemonart/get-design-done 1.59.4 → 1.59.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/CHANGELOG.md +30 -0
package/hooks/budget-enforcer.ts +134 -7
package/package.json +1 -1
package/reference/runtime-models.md +15 -15
package/reference/schemas/generated.d.ts +4 -0
package/reference/schemas/runtime-models.schema.json +5 -0
package/scripts/lib/bandit-router/integration.cjs +38 -0
package/scripts/lib/install/installer.cjs +133 -1

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -5,14 +5,14 @@
   },
   "metadata": {
     "description": "Get Design Done — 5-stage agent-orchestrated design pipeline (Brief → Explore → Plan → Design → Verify) for AI coding agents. 64 agents, 95 skills, 39 connection integrations, two MCP servers, opt-in SQLite state backbone, bidirectional Figma write-back, and a reflector-driven self-improvement loop. Cross-runtime install for Claude Code, Codex, Cursor, OpenCode, Gemini, and more.",
-    "version": "1.59.4"
+    "version": "1.59.5"
   },
   "plugins": [
     {
       "name": "get-design-done",
       "source": "./",
       "description": "Agent-orchestrated 5-stage design pipeline (Brief → Explore → Plan → Design → Verify) for AI coding agents. 64 specialized agents, 95 skills, 39 connection integrations (Figma, Refero, Preview, Storybook, Chromatic, Graphify, Linear, Jira, Notion, …), bidirectional Figma write-back, queryable intel store, opt-in SQLite state backbone, and a reflector-driven self-improvement loop. Two MCP servers (gdd-state for typed STATE mutators, gdd-mcp for 13 read-only project-priming tools), tier-aware routing with cost telemetry, and defense-in-depth hooks (protected paths, MCP circuit breaker, injection scanner, budget enforcer). Cross-runtime install for Claude Code, Codex, Cursor, OpenCode, Gemini, Copilot, and more.",
-      "version": "1.59.4",
+      "version": "1.59.5",
       "author": {
         "name": "hegemonart"
       },

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "get-design-done",
   "short_name": "gdd",
-  "version": "1.59.4",
+  "version": "1.59.5",
   "description": "Agent-orchestrated 5-stage design pipeline (Brief → Explore → Plan → Design → Verify) for AI coding agents. 64 specialized agents, 95 skills, 39 connection integrations (Figma, Refero, Preview, Storybook, Chromatic, Graphify, Linear, Jira, Notion, …), bidirectional Figma write-back, queryable intel store for O(1) design-surface lookups, opt-in SQLite state backbone, and a reflector-driven self-improvement loop. Two MCP servers (`gdd-state` for typed STATE mutators, `gdd-mcp` for 13 read-only project-priming tools), tier-aware agent routing with cost telemetry, defense-in-depth hooks (protected paths, MCP circuit breaker, injection scanner, budget enforcer), and a cross-runtime install layer for Claude Code, Codex, Cursor, OpenCode, Gemini, Copilot, and more.",
   "author": {
     "name": "hegemonart",

package/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,36 @@ All notable changes to get-design-done are documented here. Versions follow [sem
 ---
+## [1.59.5] - 2026-06-05
+Fifth point release of the **v1.59 "Audit Closeout & Honesty Pass"** milestone. Batch H polish + runtime-model provenance.
+### Added
+- **Risk-calibration now learns from the bandit feedback loop.** The bandit's post-spawn `recordOutcome` also updates the per-agent calibration table (best-effort), so calibration tracks the same signal that drives routing. (H2)
+- **Runtime-model provenance guard.** `budget-enforcer` no longer applies a HARD budget cap from a BYOK / unverified runtime-model row (it degrades to advisory), and the schema records the provenance. This closes the risk of an unverified placeholder tier hard-blocking a user. (P1)
+### Fixed
+- **Cursor installs no longer drop co-located skill reference files.** `installMultiArtifact` now carries a skill's sibling `*-procedure.md` reference files alongside `SKILL.md` for Cursor's flat layout, with symmetric uninstall cleanup. (H6)
+### Hardening
+- Pinned the state backup-guard rotation cap (10 slots) + non-empty corruption check with a dedicated test, and verified `state-store.migrate()` is async with complete JSDoc. (H5/H7)
+### Notes
+- Batch-H item H8 (the `composes_with` composition-graph backfill) is owned by Phase 58, not this release; no double-implementation here.
+- Documented follow-ups (each a larger-than-a-slice scope cut): calibration `detectDrift` to reflector consumption, SQLite-header corruption detection in the backup guard, the cursor sibling-carry generalized to all flat-layout runtimes, and the structural `status:` key on runtime-model entries (currently blocked by the parser's allowed-key enforcement).
+### Breaking changes
+None.
+5,070/5,070 tests pass.
+---
 ## [1.59.4] - 2026-06-04
 Fourth point release of the **v1.59 "Audit Closeout & Honesty Pass"** milestone. Skill-surface + build hygiene.

package/hooks/budget-enforcer.ts CHANGED Viewed

@@ -191,6 +191,21 @@ const tierResolver = nodeRequire(
   '../scripts/lib/tier-resolver.cjs',
 ) as TierResolverModule;
+// Phase 59.5 P1: runtime-models parser for the BYOK/unverified provenance
+// guard. We read the parsed runtime rows to learn a runtime's `status`
+// ("verified" | "byok" | "unverified"). The parser is pure + never invoked
+// for its model-resolution side here; only to classify the runtime so an
+// unverified row never drives a HARD budget cap. Soft-imported defensively:
+// any parser failure degrades to the built-in verified allowlist below.
+interface RuntimeModelsParserModule {
+  parseRuntimeModels(opts?: { cwd?: string }): {
+    runtimes: Array<{ id: string; status?: string }>;
+  };
+}
+const runtimeModelsParser = nodeRequire(
+  '../scripts/lib/install/parse-runtime-models.cjs',
+) as RuntimeModelsParserModule;
 // Plan 33.6-03 (SC#6, D-08, D-12): OpenRouter tier-resolver adapter. When the
 // user opts in (`.design/config.json#openrouter_enabled: true` OR
 // `OPENROUTER_API_KEY` present), the hook consults this adapter FIRST for a
@@ -506,6 +521,75 @@ export function loadBudget(): ResolvedBudget {
   }
 }
+// ── runtime provenance status (Phase 59.5 P1) ───────────────────────────────
+/**
+ * Phase 59.5 P1: provenance confidence of a runtime's tier→model row, as
+ * documented in reference/runtime-models.md and enumerated by
+ * reference/schemas/runtime-models.schema.json#status.
+ */
+export type RuntimeStatus = 'verified' | 'byok' | 'unverified';
+/**
+ * Built-in verified allowlist: the 4 runtimes whose tier maps are confirmed
+ * against runtime-author docs (the runtime-models.md banner: "4 of 14 ...
+ * verified (claude, codex, gemini, qwen)"). Used as the fallback classifier
+ * when the parsed row carries no structured `status` field yet (the markdown
+ * JSON blocks do not emit `status` at the time of this plan; the schema is
+ * ready, the parser wiring is a deferred follow-up). Once a row DOES carry
+ * `status`, the parsed value takes precedence over this allowlist.
+ */
+const VERIFIED_RUNTIME_IDS: ReadonlySet<string> = new Set([
+  'claude',
+  'codex',
+  'gemini',
+  'qwen',
+]);
+/** Per-process memo of runtime-id → parsed `status` (null until first read). */
+let _runtimeStatusMap: Map<string, RuntimeStatus> | null = null;
+function isRuntimeStatus(v: unknown): v is RuntimeStatus {
+  return v === 'verified' || v === 'byok' || v === 'unverified';
+}
+/**
+ * Resolve a runtime's provenance status. Reads the parsed runtime-models
+ * doc once per process; if a row carries a structured `status` it wins,
+ * otherwise the built-in verified allowlist decides (verified vs unverified).
+ * Fail-open: any parser error → allowlist-only classification. Never throws.
+ *
+ * @param runtimeId runtime id (e.g. 'claude', 'cline'); falsy → 'unverified'.
+ */
+export function runtimeStatus(runtimeId: string | null | undefined): RuntimeStatus {
+  if (typeof runtimeId !== 'string' || runtimeId.length === 0) {
+    return 'unverified';
+  }
+  if (_runtimeStatusMap === null) {
+    _runtimeStatusMap = new Map();
+    try {
+      const parsed = runtimeModelsParser.parseRuntimeModels({ cwd: process.cwd() });
+      const rows = Array.isArray(parsed?.runtimes) ? parsed.runtimes : [];
+      for (const row of rows) {
+        if (row && typeof row.id === 'string' && isRuntimeStatus(row.status)) {
+          _runtimeStatusMap.set(row.id, row.status);
+        }
+      }
+    } catch {
+      // Fail open: parser error degrades to the verified allowlist below.
+    }
+  }
+  const parsedStatus = _runtimeStatusMap.get(runtimeId);
+  if (parsedStatus !== undefined) return parsedStatus;
+  return VERIFIED_RUNTIME_IDS.has(runtimeId) ? 'verified' : 'unverified';
+}
+/** True when the runtime row must NOT drive a HARD budget cap (P1 guard). */
+export function isUnverifiedRuntime(runtimeId: string | null | undefined): boolean {
+  const s = runtimeStatus(runtimeId);
+  return s === 'byok' || s === 'unverified';
+}
 // ── cumulative phase spend (WR-02) ──────────────────────────────────────────
 /**
@@ -1138,7 +1222,40 @@ export async function main(): Promise<void> {
   // no router decision is supplied, behavior is identical to pre-25.
   const perSpawnCap = resolvePerSpawnCap(budget, complexityClass);
-  if (budget.enforcement_mode === 'enforce') {
+  // ── Phase 59.5 P1: BYOK/unverified provenance guard ────────────────────────
+  //
+  // Resolve the runtime id (router-supplied `runtime`, else env detection,
+  // else 'claude', same precedence the cost-recording block uses below) so we
+  // can consult its runtime-models provenance `status` BEFORE the hard-cap
+  // branches. When the runtime row is byok/unverified the resolved per-runtime
+  // model is best-effort (the user's actual provider may diverge from the
+  // Anthropic-default fill), so an estimated cost computed against it must NOT
+  // hard-block the user. We degrade enforce-mode to advisory ('warn') for THIS
+  // spawn only: the per-spawn + per-phase 100% caps stop blocking and surface a
+  // stderr warning instead, while the 80% auto-downgrade still applies (a tier
+  // downgrade is non-blocking and strictly cheaper, so it is safe to keep).
+  // Verified runtimes (claude/codex/gemini/qwen) are unaffected (full hard
+  // enforcement). The project-level cap above is intentionally NOT degraded: it
+  // is governed by total ledger spend, not a per-runtime resolved model.
+  const guardRuntimeId =
+    (typeof routerDecision?.runtime === 'string' && routerDecision.runtime.length > 0
+      ? routerDecision.runtime
+      : runtimeDetect.detect()) ?? 'claude';
+  const runtimeIsUnverified = isUnverifiedRuntime(guardRuntimeId);
+  const effectiveEnforcementMode: ResolvedBudget['enforcement_mode'] =
+    budget.enforcement_mode === 'enforce' && runtimeIsUnverified
+      ? 'warn'
+      : budget.enforcement_mode;
+  if (budget.enforcement_mode === 'enforce' && runtimeIsUnverified) {
+    process.stderr.write(
+      `gdd-budget-enforcer WARN: runtime '${guardRuntimeId}' has provenance status ` +
+        `'${runtimeStatus(guardRuntimeId)}' (BYOK/unverified tier→model row); ` +
+        `hard budget caps degraded to advisory for this spawn so an unverified ` +
+        `cost estimate never hard-blocks you.\n`,
+    );
+  }
+  if (effectiveEnforcementMode === 'enforce') {
     // Branch C: 100% per-spawn cap hard block (class-specific or per_task).
     if (estCost >= perSpawnCap) {
       writeTelemetry({
@@ -1202,12 +1319,24 @@ export async function main(): Promise<void> {
       toolInput._tier_override = 'haiku';
       toolInput._tier_downgraded = true;
     }
-  } else if (budget.enforcement_mode === 'warn') {
+  } else if (effectiveEnforcementMode === 'warn') {
     if (estCost >= perSpawnCap) {
       process.stderr.write(
         `gdd-budget-enforcer WARN: per-spawn cap will be exceeded ($${estCost.toFixed(4)} >= $${perSpawnCap})\n`,
       );
     }
+    // Phase 59.5 P1: when enforce was degraded to advisory for a byok/unverified
+    // runtime, also surface the per-phase breach that the hard branch above
+    // would otherwise have reported (it is skipped for unverified runtimes).
+    if (
+      budget.enforcement_mode === 'enforce' &&
+      phaseSpend + estCost >= budget.per_phase_cap_usd
+    ) {
+      process.stderr.write(
+        `gdd-budget-enforcer WARN: per-phase cap will be exceeded for ${phase} ` +
+          `($${(phaseSpend + estCost).toFixed(4)} >= $${budget.per_phase_cap_usd.toFixed(2)})\n`,
+      );
+    }
   }
   // enforcement_mode === 'log': telemetry only.
@@ -1230,11 +1359,9 @@ export async function main(): Promise<void> {
     toolInput._tier_override ?? toolInput._default_tier ?? 'sonnet';
   // Runtime tag: prefer the router's explicit `runtime` (D-08) field;
   // fall back to env-var detection; default to 'claude' since the .ts
-  // hook itself only runs inside Claude Code.
-  const runtimeId =
-    (typeof routerDecision?.runtime === 'string' && routerDecision.runtime.length > 0
-      ? routerDecision.runtime
-      : runtimeDetect.detect()) ?? 'claude';
+  // hook itself only runs inside Claude Code. Reuse the id already resolved
+  // for the Phase 59.5 P1 provenance guard above (single resolution source).
+  const runtimeId = guardRuntimeId;
   // ── Plan 27.5-02 — bandit consultation ────────────────────────────────────
   //

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@hegemonart/get-design-done",
-  "version": "1.59.4",
+  "version": "1.59.5",
   "description": "A design-quality pipeline for AI coding agents: brief, explore, plan, design, and verify UI work against your design system.",
   "author": "Hegemon",
   "homepage": "https://github.com/hegemonart/get-design-done",

package/reference/runtime-models.md CHANGED Viewed

@@ -10,7 +10,7 @@ Single canonical map from Anthropic tier names (`opus|sonnet|haiku`) and runtime
 >
 > Unverified: kilo, copilot, cursor, windsurf, antigravity, augment, trae, codebuddy, cline, opencode.
 >
-> The schema (`reference/schemas/runtime-models.schema.json`) explicitly accepts the placeholder marker so the file ships shape-valid; the unverified-ness is a content gap, not a structural defect.
+> **Provenance status field.** Each row below is annotated with a structured `status` in its section heading: `verified` (confirmed against runtime-author docs), `byok` (BYOK / multi-provider, where the user-configured model may diverge from the Anthropic-default fill), or `unverified` (placeholder fill pending researcher confirmation). The schema (`reference/schemas/runtime-models.schema.json`) accepts an optional `status` enum of exactly these three values, so verified rows MAY omit it and remain shape-valid. The `hooks/budget-enforcer.ts` guard consults this status (or its built-in verified allowlist) so a `byok`/`unverified` row never drives a HARD budget cap: it degrades to advisory enforcement for that spawn. This makes the unverified-ness machine-readable, not just a content gap in prose.
 This file is parsed by `scripts/lib/install/parse-runtime-models.cjs` and consumed by:
@@ -36,7 +36,7 @@ This file is parsed by `scripts/lib/install/parse-runtime-models.cjs` and consum
 ---
-## claude - Claude Code
+## claude - Claude Code (status: verified)
 Anthropic's first-party runtime. Public tier docs at https://docs.anthropic.com/en/docs/about-claude/models. Seed picks per CONTEXT.md D-02.
@@ -66,7 +66,7 @@ Anthropic's first-party runtime. Public tier docs at https://docs.anthropic.com/
 ---
-## codex - OpenAI Codex CLI
+## codex - OpenAI Codex CLI (status: verified)
 OpenAI's Codex CLI runtime. Public tier docs at https://platform.openai.com/docs/models. Seed picks per CONTEXT.md D-02.
@@ -96,7 +96,7 @@ OpenAI's Codex CLI runtime. Public tier docs at https://platform.openai.com/docs
 ---
-## gemini - Gemini CLI
+## gemini - Gemini CLI (status: verified)
 Google's Gemini CLI runtime. Public tier docs at https://ai.google.dev/gemini-api/docs/models. Seed picks per CONTEXT.md D-02.
@@ -126,7 +126,7 @@ Google's Gemini CLI runtime. Public tier docs at https://ai.google.dev/gemini-ap
 ---
-## qwen - Qwen Code
+## qwen - Qwen Code (status: verified)
 Alibaba's Qwen Code runtime. Public tier docs at https://github.com/QwenLM/qwen-code. Seed picks per CONTEXT.md D-02.
@@ -156,7 +156,7 @@ Alibaba's Qwen Code runtime. Public tier docs at https://github.com/QwenLM/qwen-
 ---
-## kilo - Kilo Code
+## kilo - Kilo Code (status: byok)
 Kilo Code adapter - multi-provider, Anthropic-default fill until runtime-author docs confirm. Researcher fill needed (CONTEXT.md D-02).
@@ -186,7 +186,7 @@ Kilo Code adapter - multi-provider, Anthropic-default fill until runtime-author
 ---
-## copilot - GitHub Copilot CLI
+## copilot - GitHub Copilot CLI (status: byok)
 GitHub Copilot CLI - multi-provider routing under the hood. Researcher fill needed (CONTEXT.md D-02).
@@ -216,7 +216,7 @@ GitHub Copilot CLI - multi-provider routing under the hood. Researcher fill need
 ---
-## cursor - Cursor
+## cursor - Cursor (status: byok)
 Cursor IDE/CLI - multi-provider routing. Researcher fill needed (CONTEXT.md D-02).
@@ -246,7 +246,7 @@ Cursor IDE/CLI - multi-provider routing. Researcher fill needed (CONTEXT.md D-02
 ---
-## windsurf - Windsurf
+## windsurf - Windsurf (status: byok)
 Windsurf (formerly Codeium) - multi-provider Cascade router. Researcher fill needed (CONTEXT.md D-02).
@@ -276,7 +276,7 @@ Windsurf (formerly Codeium) - multi-provider Cascade router. Researcher fill nee
 ---
-## antigravity - Antigravity
+## antigravity - Antigravity (status: unverified)
 Antigravity - Google's agentic coding platform. Researcher fill needed (CONTEXT.md D-02).
@@ -306,7 +306,7 @@ Antigravity - Google's agentic coding platform. Researcher fill needed (CONTEXT.
 ---
-## augment - Augment
+## augment - Augment (status: byok)
 Augment Code - multi-provider agentic IDE. Researcher fill needed (CONTEXT.md D-02).
@@ -336,7 +336,7 @@ Augment Code - multi-provider agentic IDE. Researcher fill needed (CONTEXT.md D-
 ---
-## trae - Trae
+## trae - Trae (status: unverified)
 Trae - single-model session runtime per CONTEXT.md D-02 example. `single_tier: true` annotates the row. Researcher fill needed.
@@ -367,7 +367,7 @@ Trae - single-model session runtime per CONTEXT.md D-02 example. `single_tier: t
 ---
-## codebuddy - CodeBuddy
+## codebuddy - CodeBuddy (status: byok)
 CodeBuddy (Tencent) - multi-provider routing. Researcher fill needed (CONTEXT.md D-02).
@@ -397,7 +397,7 @@ CodeBuddy (Tencent) - multi-provider routing. Researcher fill needed (CONTEXT.md
 ---
-## cline - Cline
+## cline - Cline (status: byok)
 Cline (formerly Claude Dev) - multi-provider VS Code agent. Researcher fill needed (CONTEXT.md D-02).
@@ -427,7 +427,7 @@ Cline (formerly Claude Dev) - multi-provider VS Code agent. Researcher fill need
 ---
-## opencode - OpenCode
+## opencode - OpenCode (status: byok)
 OpenCode - open-source AI coding agent, BYOK multi-provider. Researcher fill needed (CONTEXT.md D-02).

package/reference/schemas/generated.d.ts CHANGED Viewed

@@ -1042,6 +1042,10 @@ export interface RuntimeEntry {
    * When true, the runtime exposes a single model that maps to all three tiers (D-02). Downstream consumers (router, budget-enforcer) may render a UI affordance noting tier-selection has no cost effect for this runtime.
    */
   single_tier?: boolean;
+  /**
+   * Provenance confidence of this runtime's tier map. 'verified' = confirmed against runtime-author docs (claude, codex, gemini, qwen). 'byok' = BYOK / multi-provider runtime whose user-configured model may diverge from the Anthropic-default fill. 'unverified' = placeholder fill pending researcher confirmation. Optional: rows omitting this field are treated as unverified-unless-stated by consumers, and verified rows MAY omit it. The budget-enforcer guard reads this (or its built-in verified allowlist) so a byok/unverified row never drives a HARD budget cap (degrades to advisory).
+   */
+  status?: 'verified' | 'byok' | 'unverified';
   /**
    * Map of canonical Anthropic tier names (D-03) to the runtime's concrete model identifier. All three keys are required even when single_tier=true (assign the same model three times).
    */

package/reference/schemas/runtime-models.schema.json CHANGED Viewed

@@ -47,6 +47,11 @@
           "type": "boolean",
           "description": "When true, the runtime exposes a single model that maps to all three tiers (D-02). Downstream consumers (router, budget-enforcer) may render a UI affordance noting tier-selection has no cost effect for this runtime."
         },
+        "status": {
+          "type": "string",
+          "enum": ["verified", "byok", "unverified"],
+          "description": "Provenance confidence of this runtime's tier map. 'verified' = confirmed against runtime-author docs (claude, codex, gemini, qwen). 'byok' = BYOK / multi-provider runtime whose user-configured model may diverge from the Anthropic-default fill. 'unverified' = placeholder fill pending researcher confirmation. Optional: rows omitting this field are treated as unverified-unless-stated by consumers, and verified rows MAY omit it. The budget-enforcer guard reads this (or its built-in verified allowlist) so a byok/unverified row never drives a HARD budget cap (degrades to advisory)."
+        },
         "tier_to_model": {
           "type": "object",
           "additionalProperties": false,

package/scripts/lib/bandit-router/integration.cjs CHANGED Viewed

@@ -35,6 +35,12 @@
 const banditRouter = require('../bandit-router.cjs');
 const adaptiveModeLib = require('../adaptive-mode.cjs');
+// Phase 56 (CAL-01) per-agent risk calibration. recordOutcome feeds the same
+// {agent, status} signal it gives the bandit into this table so calibration
+// learns from the post-spawn outcome too. Lazy-tolerant: the call is wrapped in
+// its own best-effort try/catch (D-04) so a calibration write can never break
+// the bandit path.
+const calibration = require('../risk/calibration.cjs');
 const DELEGATE_NONE = banditRouter.DELEGATE_NONE; // 'none'
 const VALID_DELEGATES = banditRouter.DEFAULT_DELEGATES; // ['none','gemini','codex','cursor','copilot','qwen']
@@ -299,6 +305,38 @@ function recordOutcome(input) {
     }
   }
+  // CAL-01: also fold the same outcome into the per-agent risk calibration
+  // table so the calibration layer (compute-risk feedback) learns from the
+  // identical post-spawn signal the bandit just saw. Independent best-effort
+  // try/catch (D-04): a calibration write failure must NEVER throw into or
+  // break the bandit path above. The bandit signal carries no emitted risk
+  // score, so `risk` degrades to 0 via normalizeRecord; status drives the
+  // correctness axis (completed → applied-correct, anything else → not-correct).
+  // Writes to calibration.DEFAULT_CALIBRATION_PATH ('.design/telemetry/
+  // calibration.json') under baseDir — the module's own canonical location.
+  try {
+    calibration.updateCalibration(
+      input.agent,
+      {
+        accepted: true,
+        post_apply_correct: input.status === 'completed',
+      },
+      { root: input.baseDir, baseDir: input.baseDir },
+    );
+  } catch (err) {
+    if (process.env.GDD_BANDIT_DEBUG === '1') {
+      try {
+        process.stderr.write(
+          '[bandit-integration] recordOutcome calibration swallowed: ' +
+            (err && err.message ? err.message : String(err)) +
+            '\n',
+        );
+      } catch {
+        /* swallow */
+      }
+    }
+  }
   return undefined;
 }

package/scripts/lib/install/installer.cjs CHANGED Viewed

@@ -334,6 +334,58 @@ function listSourceSkills(skillsRoot) {
     });
 }
+/**
+ * Enumerate co-located sibling `*.md` reference files for a skill.
+ *
+ * A skill source directory may ship reference files next to SKILL.md
+ * (e.g. `<name>-procedure.md`, `<name>-rules.md`, `cache-policy.md`).
+ * SKILL.md references these via relative links; if they are not installed
+ * the links resolve to nothing. This returns the top-level sibling `.md`
+ * files only (NOT SKILL.md itself, NOT files in nested subdirectories).
+ *
+ * Best-effort: any fs error yields an empty list (never throws). A single
+ * unreadable skill dir must not crash the whole install.
+ *
+ * @param {string} skillSrcDir  absolute path to `<skillsRoot>/<name>`
+ * @returns {string[]} basenames of sibling `.md` files (excluding SKILL.md)
+ */
+function listSiblingRefFiles(skillSrcDir) {
+  let entries;
+  try {
+    entries = fs.readdirSync(skillSrcDir, { withFileTypes: true });
+  } catch {
+    return [];
+  }
+  return entries
+    .filter((ent) => {
+      if (!ent.isFile()) return false;
+      if (ent.name === 'SKILL.md') return false;
+      return ent.name.toLowerCase().endsWith('.md');
+    })
+    .map((ent) => ent.name);
+}
+/**
+ * Wrap a passthrough sibling reference file's content with a plugin
+ * fingerprint header so foreign-file protection + uninstall can recognize
+ * it as plugin-owned. Idempotent: re-wrapping a file that already carries
+ * the fingerprint returns it unchanged.
+ *
+ * The fingerprint matches `merge.cjs#GDD_ADAPTER_FINGERPRINT`, the same
+ * marker every SKILL converter injects via `shared.ensureAdapterHeader`,
+ * so `isPluginOwned` treats the sibling as owned.
+ *
+ * @param {string} raw  source sibling file content
+ * @returns {string}
+ */
+function fingerprintSiblingRef(raw) {
+  const text = typeof raw === 'string' ? raw : '';
+  if (isPluginOwned(text)) return text;
+  const header =
+    '<!-- gdd: auto-generated from Claude SKILL.md. Reference adapter -->\n\n';
+  return header + text;
+}
 /**
  * Install all artifacts for a `multi-artifact` runtime.
  *
@@ -395,6 +447,48 @@ function installMultiArtifact(runtime, configDir, dryRun, opts) {
         action: writeResult.action,
         ...(writeResult.reason ? { reason: writeResult.reason } : {}),
       });
+      // Batch H6: carry co-located sibling `*.md` reference files alongside
+      // SKILL.md. The skills layout only stages SKILL.md per skill, so
+      // reference siblings (e.g. `<name>-procedure.md`) are otherwise lost.
+      // Scoped to cursor (the audited flat-layout runtime); other runtimes
+      // keep their prior single-SKILL.md behavior. Siblings are passthrough
+      // copies fingerprinted so foreign-file protection + uninstall treat
+      // them as plugin-owned. Broader skillsKind-runtime carry is deferred
+      // (see converters/cursor.cjs KNOWN LIMITATION).
+      if (kind.kind === 'skills' && runtime.id === 'cursor' && item.srcPath) {
+        const skillSrcDir = path.dirname(item.srcPath);
+        const skillDestDir = path.dirname(destPath);
+        for (const sibling of listSiblingRefFiles(skillSrcDir)) {
+          let rawSibling;
+          try {
+            rawSibling = fs.readFileSync(
+              path.join(skillSrcDir, sibling),
+              'utf8',
+            );
+          } catch (err) {
+            perFile.push({
+              kind: 'skill-ref',
+              path: path.join(skillDestDir, sibling),
+              action: 'skipped-foreign',
+              reason: `Could not read sibling ${sibling}: ${err.message}`,
+            });
+            continue;
+          }
+          const siblingDest = path.join(skillDestDir, sibling);
+          const siblingWrite = writeFingerprinted(
+            siblingDest,
+            fingerprintSiblingRef(rawSibling),
+            dryRun,
+          );
+          perFile.push({
+            kind: 'skill-ref',
+            path: siblingDest,
+            action: siblingWrite.action,
+            ...(siblingWrite.reason ? { reason: siblingWrite.reason } : {}),
+          });
+        }
+      }
     }
   }
@@ -489,7 +583,45 @@ function uninstallMultiArtifact(runtime, configDir, dryRun, opts) {
       // If we removed a SKILL.md, remember to trim its now-empty parent.
       if (kind.kind === 'skills') {
-        skillDirsToTrim.push(path.dirname(destPath));
+        const skillDestDir = path.dirname(destPath);
+        skillDirsToTrim.push(skillDestDir);
+        // Batch H6: symmetric cleanup for the sibling reference files the
+        // cursor install carries alongside SKILL.md. Remove only the
+        // plugin-owned siblings so a now-empty dir can be trimmed below;
+        // user-authored siblings are left in place (foreign-file discipline).
+        if (runtime.id === 'cursor') {
+          for (const sibling of listSiblingRefFiles(skillDestDir)) {
+            const siblingPath = path.join(skillDestDir, sibling);
+            let siblingContent;
+            try {
+              siblingContent = fs.readFileSync(siblingPath, 'utf8');
+            } catch (err) {
+              perFile.push({
+                kind: 'skill-ref',
+                path: siblingPath,
+                action: 'skipped-foreign',
+                reason: `Could not read sibling ${sibling}: ${err.message}`,
+              });
+              continue;
+            }
+            if (!isPluginOwned(siblingContent)) {
+              perFile.push({
+                kind: 'skill-ref',
+                path: siblingPath,
+                action: 'skipped-foreign',
+                reason: `Existing ${sibling} was not authored by this plugin; not removing.`,
+              });
+              continue;
+            }
+            if (!dryRun) fs.unlinkSync(siblingPath);
+            perFile.push({
+              kind: 'skill-ref',
+              path: siblingPath,
+              action: 'removed',
+            });
+          }
+        }
       }
     }
   }