@hegemonart/get-design-done 1.54.0 → 1.56.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/CHANGELOG.md +92 -0
  4. package/README.md +6 -0
  5. package/SKILL.md +1 -0
  6. package/agents/design-fixer.md +16 -0
  7. package/bin/gdd-dashboard +91 -0
  8. package/dist/claude-code/.claude/skills/override/SKILL.md +86 -0
  9. package/hooks/gdd-decision-injector.js +58 -0
  10. package/hooks/gdd-fact-force.js +345 -0
  11. package/hooks/gdd-risk-gate.js +406 -0
  12. package/hooks/hooks.json +18 -0
  13. package/package.json +2 -1
  14. package/reference/schemas/events.schema.json +61 -1
  15. package/reference/skill-graph.md +2 -1
  16. package/scripts/lib/dashboard/graph-html.cjs +0 -0
  17. package/scripts/lib/health-mirror/index.cjs +146 -1
  18. package/scripts/lib/manifest/skills.json +8 -0
  19. package/scripts/lib/risk/calibration.cjs +385 -0
  20. package/scripts/lib/risk/compute-risk.cjs +229 -0
  21. package/scripts/lib/risk/consumers.cjs +211 -0
  22. package/scripts/lib/risk/override.cjs +87 -0
  23. package/scripts/lib/risk/route.cjs +59 -0
  24. package/scripts/lib/risk/tables.cjs +221 -0
  25. package/sdk/cli/commands/dashboard.ts +419 -0
  26. package/sdk/cli/index.js +253 -2
  27. package/sdk/cli/index.ts +7 -0
  28. package/sdk/dashboard/data/_pkg-root.cjs +92 -0
  29. package/sdk/dashboard/data/cost-aggregator.cjs +187 -0
  30. package/sdk/dashboard/data/discovery.cjs +297 -0
  31. package/sdk/dashboard/data/risk-surface.cjs +136 -0
  32. package/sdk/dashboard/data/source.cjs +576 -0
  33. package/sdk/dashboard/tui/ansi.cjs +355 -0
  34. package/sdk/dashboard/tui/index.cjs +778 -0
  35. package/sdk/mcp/gdd-mcp/server.js +70 -0
  36. package/skills/override/SKILL.md +86 -0
@@ -8,7 +8,7 @@
8
8
  // Surface:
9
9
  // async getHealthChecks(rootDir) → { checks: HealthCheck[] }
10
10
  //
11
- // The 9 checks (in stable order) are:
11
+ // The 10 checks (in stable order) are:
12
12
  // 1. claude_md — CLAUDE.md presence
13
13
  // 2. planning_dir — .planning/ presence
14
14
  // 3. design_dir — .design/ presence
@@ -18,6 +18,7 @@
18
18
  // 7. skill_discipline — using-gdd bootstrap + SessionStart inject (Plan 32-07)
19
19
  // 8. harness_freshness — per-harness last_verified age (Phase 44)
20
20
  // 9. stack_addendums — Phase 54 coverage: N/M detected stacks have addendums
21
+ // 10. dashboard_reachable — Phase 55: bin/gdd-dashboard on disk + data plane loads
21
22
  //
22
23
  // Check 5 was added in Plan 30-06 — surfaces the report-issue kill-switch
23
24
  // (env or config disable) so users can verify why the command is
@@ -48,6 +49,22 @@
48
49
  // inject-using-gdd entry)
49
50
  // status: 'ok' when ready, 'warn' otherwise. PURE read-only (rootDir-relative
50
51
  // file + JSON inspection only) — NEVER throws, NEVER networks.
52
+ //
53
+ // Check 10 was added in Phase 55 — surfaces whether the GDD dashboard is
54
+ // reachable so a user running /gdd:health knows the `gdd dashboard` entrypoint
55
+ // is wired. GRACEFUL-ABSENT by design (D-8 risk surfacing precedent): the
56
+ // dashboard is an opt-in, read-only surface that also works via file-scrape, so
57
+ // a missing bin or absent data plane is a 'warn' (actionable note), NEVER a
58
+ // hard 'fail'. The status is 'ok' when BOTH the bin/gdd-dashboard trampoline
59
+ // resolves on disk (located via a package-root walk-up — the Phase 53/54 lesson,
60
+ // NEVER a fixed __dirname jump) AND the dashboard data plane module
61
+ // (sdk/dashboard/data/source.cjs) loads + exposes loadDashboardModel. The detail
62
+ // line is one of:
63
+ // - "dashboard: bin/gdd-dashboard present; data plane ok"
64
+ // - "dashboard: bin missing" (trampoline not on disk)
65
+ // - "dashboard: data plane unavailable" (bin present, source.cjs absent)
66
+ // - "dashboard: bin missing; data plane unavailable"
67
+ // PURE read-only (fs.statSync + a wrapped require) — NEVER throws, NEVER networks.
51
68
 
52
69
  const fs = require('node:fs');
53
70
  const path = require('node:path');
@@ -310,6 +327,39 @@ async function getHealthChecks(rootDir) {
310
327
  checks.push({ name: 'stack_addendums', status, detail });
311
328
  }
312
329
 
330
+ // 10. dashboard_reachable — Phase 55. GRACEFUL-ABSENT: reports whether the
331
+ // GDD dashboard entrypoint is wired (bin/gdd-dashboard on disk) AND its data
332
+ // plane module loads. NEVER 'fail' — a missing bin is a 'warn' note because
333
+ // the dashboard is opt-in and also works via file-scrape. PURE read-only
334
+ // (fs.statSync + a wrapped require); NEVER throws, NEVER networks.
335
+ {
336
+ let status;
337
+ let detail;
338
+ try {
339
+ const gddRoot = resolveDashboardRoot(rootDir);
340
+ const binPresent = dashboardBinResolves(gddRoot);
341
+ const dataPlaneOk = dashboardDataPlaneLoads(gddRoot);
342
+ if (binPresent && dataPlaneOk) {
343
+ status = 'ok';
344
+ detail = 'dashboard: bin/gdd-dashboard present; data plane ok';
345
+ } else {
346
+ status = 'warn';
347
+ if (!binPresent && !dataPlaneOk) {
348
+ detail = 'dashboard: bin missing; data plane unavailable';
349
+ } else if (!binPresent) {
350
+ detail = 'dashboard: bin missing';
351
+ } else {
352
+ detail = 'dashboard: data plane unavailable';
353
+ }
354
+ }
355
+ } catch {
356
+ // Absolute safety net — the health probe must never crash on this check.
357
+ status = 'warn';
358
+ detail = 'dashboard: unavailable';
359
+ }
360
+ checks.push({ name: 'dashboard_reachable', status, detail });
361
+ }
362
+
313
363
  return { checks };
314
364
  }
315
365
 
@@ -402,4 +452,99 @@ function figmaVariablesBlockedLocally(rootDir) {
402
452
  }
403
453
  }
404
454
 
455
+ /**
456
+ * Walk UP from `startDir` to the GDD package root (the first ancestor whose
457
+ * package.json `name` is the GDD package). This mirrors the Phase 53/54 lesson
458
+ * (sdk/dashboard/data/_pkg-root.cjs): NEVER resolve a cross-tree sibling via a
459
+ * fixed __dirname-relative jump. The shipped package name is scoped
460
+ * ("@hegemonart/get-design-done"); dev/self-host/fixture roots may use the bare
461
+ * "get-design-done" — both match. Bounded climb; defensive. Returns null if no
462
+ * GDD root marker is found.
463
+ *
464
+ * @param {string} startDir
465
+ * @returns {string|null} absolute package-root dir, or null
466
+ */
467
+ function findGddPackageRoot(startDir) {
468
+ try {
469
+ let dir = path.resolve(startDir);
470
+ for (let i = 0; i < 12; i++) {
471
+ try {
472
+ const pkg = JSON.parse(fs.readFileSync(path.join(dir, 'package.json'), 'utf8'));
473
+ if (pkg && typeof pkg.name === 'string') {
474
+ if (pkg.name === 'get-design-done' || /\/get-design-done$/.test(pkg.name)) {
475
+ return dir;
476
+ }
477
+ }
478
+ } catch {
479
+ // no/garbage package.json at this level — keep climbing
480
+ }
481
+ const parent = path.dirname(dir);
482
+ if (parent === dir) break;
483
+ dir = parent;
484
+ }
485
+ return null;
486
+ } catch {
487
+ return null;
488
+ }
489
+ }
490
+
491
+ /**
492
+ * Resolve the AUTHORITATIVE GDD package root for the dashboard probe, given the
493
+ * project root being health-checked. Resolution (D-7 walk-up, never a fixed
494
+ * __dirname jump):
495
+ * - If `rootDir` itself sits inside a GDD checkout (dev / self-host / a
496
+ * hermetic fixture that declares the GDD name), THAT root is authoritative —
497
+ * its own bin/ + sdk/dashboard/ are the truth. No cross-root fallback (so a
498
+ * fixture is hermetic + deterministic regardless of the shipped tree).
499
+ * - Otherwise `rootDir` is an unrelated CONSUMER project (no GDD marker); the
500
+ * dashboard ships alongside THIS module, so walk up from __dirname.
501
+ * Returns null only if neither resolves (degrades the check to 'warn').
502
+ *
503
+ * @param {string} rootDir project root passed to getHealthChecks
504
+ * @returns {string|null}
505
+ */
506
+ function resolveDashboardRoot(rootDir) {
507
+ const fromRoot = findGddPackageRoot(rootDir);
508
+ if (fromRoot) return fromRoot;
509
+ return findGddPackageRoot(__dirname);
510
+ }
511
+
512
+ /**
513
+ * Does the bin/gdd-dashboard trampoline resolve on disk under the authoritative
514
+ * GDD root? (Phase 55, check 10.) `fs.statSync` follows symlinks, so an npm
515
+ * bin-linked trampoline (a symlink resolving to a file) counts as present. PURE
516
+ * read-only; NEVER throws.
517
+ *
518
+ * @param {string} gddRoot authoritative GDD root (or null)
519
+ * @returns {boolean} true iff bin/gdd-dashboard is present on disk
520
+ */
521
+ function dashboardBinResolves(gddRoot) {
522
+ if (!gddRoot) return false;
523
+ try {
524
+ return fs.statSync(path.join(gddRoot, 'bin', 'gdd-dashboard')).isFile();
525
+ } catch {
526
+ return false;
527
+ }
528
+ }
529
+
530
+ /**
531
+ * Does the dashboard data plane module load + expose loadDashboardModel under
532
+ * the authoritative GDD root? (Phase 55, check 10.) The data plane is
533
+ * sdk/dashboard/data/source.cjs (R1 — the in-process shared-lib read surface the
534
+ * dashboard renders). A missing module or a require error degrades to false
535
+ * (→ 'warn'), NEVER throws.
536
+ *
537
+ * @param {string} gddRoot authoritative GDD root (or null)
538
+ * @returns {boolean} true iff source.cjs loads and exports loadDashboardModel
539
+ */
540
+ function dashboardDataPlaneLoads(gddRoot) {
541
+ if (!gddRoot) return false;
542
+ try {
543
+ const mod = require(path.join(gddRoot, 'sdk', 'dashboard', 'data', 'source.cjs'));
544
+ return !!(mod && typeof mod.loadDashboardModel === 'function');
545
+ } catch {
546
+ return false;
547
+ }
548
+ }
549
+
405
550
  module.exports = { getHealthChecks };
@@ -319,6 +319,14 @@
319
319
  "user_invocable": true,
320
320
  "tools": "Read, Bash, Grep, Write"
321
321
  },
322
+ {
323
+ "name": "override",
324
+ "description": "Escalation surface for a risk-blocked action or a fact-force gate. Use when the Phase 56 risk gate blocked a writer action (suggested_action=block) and a reviewer has signed off, or when the first-write fact-force gate is holding a file you have legitimately reviewed. Activates for requests involving overriding a blocked edit, approving a high-risk change, or clearing a fact-force hold on a path.",
325
+ "argument_hint": "<finding-id | factforce <path>> [--approver <who>] [--reason <text>]",
326
+ "user_invocable": true,
327
+ "tools": "Read, Write, Bash, Grep, Glob",
328
+ "registered_in_phase": "56"
329
+ },
322
330
  {
323
331
  "name": "pause",
324
332
  "description": "Write a numbered checkpoint so work can resume in a new session without re-running completed stages.",
@@ -0,0 +1,385 @@
1
+ 'use strict';
2
+ /**
3
+ * scripts/lib/risk/calibration.cjs — Phase 56 (CAL-01) per-agent risk
4
+ * calibration + the bandit reward bridge.
5
+ *
6
+ * The risk scorer (scripts/lib/risk/compute-risk.cjs) is a STATIC table-driven
7
+ * model — it cannot learn that a particular writer agent chronically under- or
8
+ * over-scores its own actions. This module is the feedback layer: it records
9
+ * per-agent outcomes in a rolling-50 window, derives three calibration
10
+ * statistics, flags drift, and feeds a reward signal into the Phase 23.5
11
+ * Thompson-sampling bandit (scripts/lib/bandit-router.cjs) so the adaptive
12
+ * router can react to a mis-calibrated agent over time.
13
+ *
14
+ * Persistence:
15
+ * .design/telemetry/calibration.json
16
+ * {
17
+ * schema_version: '56.0',
18
+ * generated_at: ISO,
19
+ * agents: {
20
+ * "<agent>": {
21
+ * window: [ { risk, accepted, user_undo, post_apply_correct }, … ≤50 ],
22
+ * mean_risk_emitted: number, // mean(window.risk)
23
+ * override_rate: number, // P(rejected OR undone)
24
+ * post_apply_correctness: number // P(correct | applied)
25
+ * }, …
26
+ * }
27
+ * }
28
+ * Atomic .tmp + rename (mirrors instinct-store.save / ds-arms.save). The
29
+ * `.design/` tree is gitignored + worktree-local (R5).
30
+ *
31
+ * Purity contract:
32
+ * - detectDrift + riskReward are PURE (no I/O, no Date.now / Math.random;
33
+ * the DRIFT thresholds are frozen). Deterministic for the suite.
34
+ * - updateCalibration reads/writes the FS, but only via the injected
35
+ * `{root}` (or `file`) so tests run hermetically under a tmpdir. The only
36
+ * non-determinism is `generated_at` (an ISO stamp), which callers can pin
37
+ * via opts.now.
38
+ * - recordRiskOutcome calls bandit-router.update BEST-EFFORT — it never
39
+ * throws (a telemetry write must never break a hook / agent turn).
40
+ *
41
+ * Zero new dependency. CommonJS to match the scripts/lib/ siblings.
42
+ */
43
+
44
+ const fs = require('node:fs');
45
+ const path = require('node:path');
46
+
47
+ const SCHEMA_VERSION = '56.0';
48
+ const DEFAULT_CALIBRATION_PATH = '.design/telemetry/calibration.json';
49
+
50
+ /** Rolling window length (CAL-01): keep the last 50 outcomes per agent. */
51
+ const WINDOW_SIZE = 50;
52
+
53
+ /**
54
+ * Drift thresholds (frozen). detectDrift compares the rolling stats against
55
+ * these bands:
56
+ * under_scoring — the agent emits LOW risk yet the user overrides OFTEN:
57
+ * the scores are too tame (false sense of safety).
58
+ * over_scoring — the agent emits HIGH risk yet applied actions are almost
59
+ * always correct AND the user rarely overrides: the scores
60
+ * are too alarmist (friction without payoff).
61
+ */
62
+ const DRIFT = Object.freeze({
63
+ under_scoring: Object.freeze({ mean_risk_max: 0.35, override_rate_min: 0.30 }),
64
+ over_scoring: Object.freeze({
65
+ mean_risk_min: 0.65,
66
+ correctness_min: 0.90,
67
+ override_rate_max: 0.10,
68
+ }),
69
+ });
70
+
71
+ /**
72
+ * Clamp to [0, 1]. Non-finite -> 0 (matches compute-risk.clamp01 semantics).
73
+ * @param {number} n
74
+ * @returns {number}
75
+ */
76
+ function clamp01(n) {
77
+ if (typeof n !== 'number' || Number.isNaN(n)) return 0;
78
+ if (n < 0) return 0;
79
+ if (n > 1) return 1;
80
+ return n;
81
+ }
82
+
83
+ /**
84
+ * Resolve the on-disk calibration file, honouring an absolute override.
85
+ * Accepts `{ file }` (explicit path) or `{ root }` / `{ baseDir }` (a project
86
+ * root under which DEFAULT_CALIBRATION_PATH is resolved).
87
+ * @param {{file?:string, root?:string, baseDir?:string}} [opts]
88
+ * @returns {{file:string, dir:string}}
89
+ */
90
+ function paths(opts = {}) {
91
+ let file;
92
+ if (opts.file) {
93
+ file = path.isAbsolute(opts.file)
94
+ ? opts.file
95
+ : path.resolve(opts.root ?? opts.baseDir ?? process.cwd(), opts.file);
96
+ } else {
97
+ file = path.resolve(opts.root ?? opts.baseDir ?? process.cwd(), DEFAULT_CALIBRATION_PATH);
98
+ }
99
+ return { file, dir: path.dirname(file) };
100
+ }
101
+
102
+ /**
103
+ * Load the calibration store, or a fresh envelope when absent/corrupt.
104
+ * @param {{file?:string, root?:string, baseDir?:string}} [opts]
105
+ * @returns {{schema_version:string, generated_at?:string, agents:object}}
106
+ */
107
+ function load(opts = {}) {
108
+ const { file } = paths(opts);
109
+ if (!fs.existsSync(file)) {
110
+ return { schema_version: SCHEMA_VERSION, agents: {} };
111
+ }
112
+ try {
113
+ const data = JSON.parse(fs.readFileSync(file, 'utf8'));
114
+ if (!data || typeof data !== 'object' || typeof data.agents !== 'object' || data.agents === null) {
115
+ return { schema_version: SCHEMA_VERSION, agents: {} };
116
+ }
117
+ return data;
118
+ } catch {
119
+ return { schema_version: SCHEMA_VERSION, agents: {} };
120
+ }
121
+ }
122
+
123
+ /**
124
+ * Persist the calibration store atomically (.tmp + rename).
125
+ * @param {object} store
126
+ * @param {{file?:string, root?:string, baseDir?:string, now?:string|Date}} [opts]
127
+ * @returns {string} absolute path written
128
+ */
129
+ function save(store, opts = {}) {
130
+ const { file, dir } = paths(opts);
131
+ fs.mkdirSync(dir, { recursive: true });
132
+ store.schema_version = SCHEMA_VERSION;
133
+ store.generated_at =
134
+ opts.now instanceof Date
135
+ ? opts.now.toISOString()
136
+ : typeof opts.now === 'string'
137
+ ? opts.now
138
+ : new Date().toISOString();
139
+ const tmp = file + '.tmp';
140
+ fs.writeFileSync(tmp, JSON.stringify(store, null, 2) + '\n');
141
+ fs.renameSync(tmp, file);
142
+ return file;
143
+ }
144
+
145
+ /**
146
+ * Coerce a raw outcome into the canonical window record. Unknown / missing
147
+ * fields degrade safely:
148
+ * risk -> clamp01(number), default 0
149
+ * accepted -> boolean (default true — an action that produced an
150
+ * outcome without an explicit reject is treated as
151
+ * applied)
152
+ * user_undo -> boolean (default false)
153
+ * post_apply_correct -> boolean | null (null = "not yet known"; only counts
154
+ * toward post_apply_correctness once resolved)
155
+ * @param {object} record
156
+ * @returns {{risk:number, accepted:boolean, user_undo:boolean, post_apply_correct:(boolean|null)}}
157
+ */
158
+ function normalizeRecord(record) {
159
+ const r = record && typeof record === 'object' ? record : {};
160
+ return {
161
+ risk: clamp01(typeof r.risk === 'number' ? r.risk : 0),
162
+ accepted: r.accepted === undefined ? true : Boolean(r.accepted),
163
+ user_undo: Boolean(r.user_undo),
164
+ post_apply_correct:
165
+ r.post_apply_correct === undefined || r.post_apply_correct === null
166
+ ? null
167
+ : Boolean(r.post_apply_correct),
168
+ };
169
+ }
170
+
171
+ /**
172
+ * Recompute the three rolling statistics over a window of normalized records.
173
+ *
174
+ * mean_risk_emitted = mean(risk) (0 when empty)
175
+ * override_rate = P(!accepted OR user_undo) (0 when empty)
176
+ * post_apply_correctness = P(post_apply_correct | applied) (1 when no
177
+ * resolved applied records — an agent with no known-bad applied actions
178
+ * reads as fully correct; this is the conservative direction for the
179
+ * over_scoring drift gate, which additionally requires high mean risk +
180
+ * low override, so an empty window never spuriously trips it)
181
+ *
182
+ * @param {Array} window normalized records
183
+ * @returns {{mean_risk_emitted:number, override_rate:number, post_apply_correctness:number}}
184
+ */
185
+ function computeStats(window) {
186
+ const w = Array.isArray(window) ? window : [];
187
+ const n = w.length;
188
+ if (n === 0) {
189
+ return { mean_risk_emitted: 0, override_rate: 0, post_apply_correctness: 1 };
190
+ }
191
+ let riskSum = 0;
192
+ let overrides = 0;
193
+ let appliedResolved = 0;
194
+ let appliedCorrect = 0;
195
+ for (const rec of w) {
196
+ riskSum += rec.risk;
197
+ const overridden = !rec.accepted || rec.user_undo;
198
+ if (overridden) overrides += 1;
199
+ // "applied" = accepted AND not undone. Only resolved (non-null) correctness
200
+ // signals count toward the correctness rate.
201
+ const applied = rec.accepted && !rec.user_undo;
202
+ if (applied && rec.post_apply_correct !== null) {
203
+ appliedResolved += 1;
204
+ if (rec.post_apply_correct === true) appliedCorrect += 1;
205
+ }
206
+ }
207
+ return {
208
+ mean_risk_emitted: riskSum / n,
209
+ override_rate: overrides / n,
210
+ post_apply_correctness: appliedResolved === 0 ? 1 : appliedCorrect / appliedResolved,
211
+ };
212
+ }
213
+
214
+ /**
215
+ * Record one risk outcome for `agent`, append to its rolling-50 window, drop
216
+ * the oldest beyond 50, recompute the three statistics, and persist atomically.
217
+ *
218
+ * @param {string} agent the writer agent the assessment scored (e.g. 'design-fixer')
219
+ * @param {{risk?:number, accepted?:boolean, user_undo?:boolean, post_apply_correct?:boolean}} record
220
+ * @param {{file?:string, root?:string, baseDir?:string, now?:string|Date}} [opts]
221
+ * @returns {{agent:string, stats:{mean_risk_emitted:number, override_rate:number, post_apply_correctness:number}, windowSize:number, path:string}}
222
+ */
223
+ function updateCalibration(agent, record, opts = {}) {
224
+ if (typeof agent !== 'string' || agent.length === 0) {
225
+ throw new TypeError('updateCalibration: agent (non-empty string) required');
226
+ }
227
+ const store = load(opts);
228
+ if (!store.agents || typeof store.agents !== 'object') store.agents = {};
229
+
230
+ const prev = store.agents[agent];
231
+ const prevWindow =
232
+ prev && Array.isArray(prev.window) ? prev.window.map(normalizeRecord) : [];
233
+
234
+ prevWindow.push(normalizeRecord(record));
235
+ // Keep only the last WINDOW_SIZE entries (rolling window).
236
+ const window =
237
+ prevWindow.length > WINDOW_SIZE ? prevWindow.slice(prevWindow.length - WINDOW_SIZE) : prevWindow;
238
+
239
+ const stats = computeStats(window);
240
+ store.agents[agent] = {
241
+ window,
242
+ mean_risk_emitted: stats.mean_risk_emitted,
243
+ override_rate: stats.override_rate,
244
+ post_apply_correctness: stats.post_apply_correctness,
245
+ };
246
+
247
+ const written = save(store, opts);
248
+ return { agent, stats, windowSize: window.length, path: written };
249
+ }
250
+
251
+ /**
252
+ * Classify calibration drift from an agent's rolling stats. PURE.
253
+ *
254
+ * under_scoring: mean_risk_emitted < 0.35 && override_rate > 0.30
255
+ * over_scoring: mean_risk_emitted > 0.65 && post_apply_correctness > 0.90
256
+ * && override_rate < 0.10
257
+ * else: 'none'
258
+ *
259
+ * under_scoring is checked first; the two bands are mutually exclusive by
260
+ * construction (mean-risk bands do not overlap) but the explicit order makes
261
+ * the contract unambiguous.
262
+ *
263
+ * @param {{mean_risk_emitted?:number, override_rate?:number, post_apply_correctness?:number}} stats
264
+ * @param {object} [cfg] defaults to the frozen DRIFT thresholds
265
+ * @returns {'under_scoring'|'over_scoring'|'none'}
266
+ */
267
+ function detectDrift(stats, cfg = DRIFT) {
268
+ const s = stats && typeof stats === 'object' ? stats : {};
269
+ const mean = typeof s.mean_risk_emitted === 'number' ? s.mean_risk_emitted : 0;
270
+ const override = typeof s.override_rate === 'number' ? s.override_rate : 0;
271
+ const correct = typeof s.post_apply_correctness === 'number' ? s.post_apply_correctness : 0;
272
+
273
+ const under = cfg && cfg.under_scoring ? cfg.under_scoring : DRIFT.under_scoring;
274
+ const over = cfg && cfg.over_scoring ? cfg.over_scoring : DRIFT.over_scoring;
275
+
276
+ if (mean < under.mean_risk_max && override > under.override_rate_min) {
277
+ return 'under_scoring';
278
+ }
279
+ if (
280
+ mean > over.mean_risk_min &&
281
+ correct > over.correctness_min &&
282
+ override < over.override_rate_max
283
+ ) {
284
+ return 'over_scoring';
285
+ }
286
+ return 'none';
287
+ }
288
+
289
+ /**
290
+ * Map a single risk outcome to a bandit reward in [0, 1]. PURE.
291
+ *
292
+ * Contract (mirrors the Phase 23.5 lexicographic shape — correctness first):
293
+ * - rejected (accepted === false) OR undone (user_undo === true) -> 0
294
+ * (the user vetoed the action; no credit regardless of risk).
295
+ * - otherwise -> clamp01(1 - 0.5 * risk)
296
+ * (an accepted, not-undone action earns a reward that decays linearly with
297
+ * the risk it carried: a confident low-risk accept ≈ 1.0; a high-risk
298
+ * accept still earns partial credit ≈ 0.5 because the user did keep it).
299
+ *
300
+ * Examples (the calibration suite pins these):
301
+ * {accepted:true, risk:0.2} -> 0.9
302
+ * {accepted:false, risk:0.2} -> 0
303
+ * {accepted:true, risk:0.9} -> 0.55
304
+ * {accepted:true, risk:0.0, user_undo:true} -> 0
305
+ *
306
+ * @param {{accepted?:boolean, risk?:number, user_undo?:boolean}} input
307
+ * @returns {number} reward in [0, 1]
308
+ */
309
+ function riskReward(input) {
310
+ const i = input && typeof input === 'object' ? input : {};
311
+ // An explicit reject, or any user_undo, zeroes the reward.
312
+ if (i.accepted === false) return 0;
313
+ if (i.user_undo === true) return 0;
314
+ const risk = clamp01(typeof i.risk === 'number' ? i.risk : 0);
315
+ return clamp01(1 - 0.5 * risk);
316
+ }
317
+
318
+ /**
319
+ * Thin best-effort bridge: compute the risk reward for an outcome and feed it
320
+ * into the Thompson-sampling bandit (scripts/lib/bandit-router.cjs update()).
321
+ *
322
+ * NEVER throws — a telemetry/learning write must not break the hook or agent
323
+ * turn that triggered it. On any failure (bandit module absent, bad input,
324
+ * FS error) it returns `{ recorded:false, reason }` and swallows the error.
325
+ *
326
+ * The bandit's update() needs `(agent, bin, tier, reward)`. The caller supplies
327
+ * the routing context it used (bin = touches-size bin, tier = model tier). When
328
+ * a context field is missing we DO NOT guess — we skip the bandit write and
329
+ * report it, because writing to the wrong arm would corrupt the posterior.
330
+ *
331
+ * @param {{
332
+ * agent: string,
333
+ * bin?: string,
334
+ * tier?: string,
335
+ * accepted?: boolean,
336
+ * risk?: number,
337
+ * user_undo?: boolean,
338
+ * bandit?: object, // injectable for tests (defaults to require'd module)
339
+ * root?: string, baseDir?: string, posteriorPath?: string,
340
+ * }} input
341
+ * @returns {{recorded:boolean, reward:number, reason?:string}}
342
+ */
343
+ function recordRiskOutcome(input) {
344
+ const reward = riskReward(input || {});
345
+ try {
346
+ const i = input && typeof input === 'object' ? input : {};
347
+ if (typeof i.agent !== 'string' || i.agent.length === 0) {
348
+ return { recorded: false, reward, reason: 'agent required for bandit update' };
349
+ }
350
+ if (typeof i.bin !== 'string' || i.bin.length === 0 || typeof i.tier !== 'string' || i.tier.length === 0) {
351
+ // Without a routing context we cannot address an arm — skip cleanly.
352
+ return { recorded: false, reward, reason: 'bin+tier required for bandit update' };
353
+ }
354
+ // Lazy require so a missing/breaking bandit module degrades to best-effort.
355
+ const bandit = i.bandit || require('../bandit-router.cjs');
356
+ bandit.update({
357
+ agent: i.agent,
358
+ bin: i.bin,
359
+ tier: i.tier,
360
+ reward,
361
+ baseDir: i.baseDir ?? i.root,
362
+ posteriorPath: i.posteriorPath,
363
+ });
364
+ return { recorded: true, reward };
365
+ } catch (err) {
366
+ return { recorded: false, reward, reason: err && err.message ? err.message : String(err) };
367
+ }
368
+ }
369
+
370
+ module.exports = {
371
+ updateCalibration,
372
+ detectDrift,
373
+ riskReward,
374
+ recordRiskOutcome,
375
+ // Exposed for tests + sibling reuse.
376
+ computeStats,
377
+ normalizeRecord,
378
+ load,
379
+ save,
380
+ clamp01,
381
+ DRIFT,
382
+ WINDOW_SIZE,
383
+ SCHEMA_VERSION,
384
+ DEFAULT_CALIBRATION_PATH,
385
+ };