@hegemonart/get-design-done 1.55.0 → 1.57.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +90 -0
- package/README.md +6 -0
- package/SKILL.md +2 -0
- package/agents/design-fixer.md +16 -0
- package/dist/claude-code/.claude/skills/override/SKILL.md +86 -0
- package/dist/claude-code/.claude/skills/state/SKILL.md +106 -0
- package/hooks/gdd-decision-injector.js +58 -0
- package/hooks/gdd-fact-force.js +434 -0
- package/hooks/gdd-risk-gate.js +406 -0
- package/hooks/hooks.json +18 -0
- package/package.json +1 -1
- package/reference/schemas/events.schema.json +61 -1
- package/reference/skill-graph.md +3 -1
- package/scripts/lib/manifest/skills.json +16 -0
- package/scripts/lib/risk/calibration.cjs +385 -0
- package/scripts/lib/risk/compute-risk.cjs +229 -0
- package/scripts/lib/risk/consumers.cjs +211 -0
- package/scripts/lib/risk/override.cjs +87 -0
- package/scripts/lib/risk/route.cjs +59 -0
- package/scripts/lib/risk/tables.cjs +221 -0
- package/scripts/lib/state/migrate-to-sqlite.cjs +664 -0
- package/scripts/lib/state/query-surface.cjs +391 -0
- package/scripts/lib/state/render-markdown.cjs +717 -0
- package/scripts/lib/state/state-backend.cjs +345 -0
- package/scripts/lib/state/state-store.cjs +735 -0
- package/sdk/cli/index.js +193 -96
- package/sdk/dashboard/data/source.cjs +44 -5
- package/sdk/mcp/gdd-state/server.js +127 -30
- package/sdk/mcp/gdd-state/tools/get.ts +8 -0
- package/sdk/state/index.ts +267 -13
- package/sdk/state/lockfile.ts +48 -0
- package/sdk/state/schema.sql +218 -0
- package/skills/override/SKILL.md +86 -0
- package/skills/state/SKILL.md +106 -0
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
/**
|
|
3
|
+
* scripts/lib/risk/calibration.cjs — Phase 56 (CAL-01) per-agent risk
|
|
4
|
+
* calibration + the bandit reward bridge.
|
|
5
|
+
*
|
|
6
|
+
* The risk scorer (scripts/lib/risk/compute-risk.cjs) is a STATIC table-driven
|
|
7
|
+
* model — it cannot learn that a particular writer agent chronically under- or
|
|
8
|
+
* over-scores its own actions. This module is the feedback layer: it records
|
|
9
|
+
* per-agent outcomes in a rolling-50 window, derives three calibration
|
|
10
|
+
* statistics, flags drift, and feeds a reward signal into the Phase 23.5
|
|
11
|
+
* Thompson-sampling bandit (scripts/lib/bandit-router.cjs) so the adaptive
|
|
12
|
+
* router can react to a mis-calibrated agent over time.
|
|
13
|
+
*
|
|
14
|
+
* Persistence:
|
|
15
|
+
* .design/telemetry/calibration.json
|
|
16
|
+
* {
|
|
17
|
+
* schema_version: '56.0',
|
|
18
|
+
* generated_at: ISO,
|
|
19
|
+
* agents: {
|
|
20
|
+
* "<agent>": {
|
|
21
|
+
* window: [ { risk, accepted, user_undo, post_apply_correct }, … ≤50 ],
|
|
22
|
+
* mean_risk_emitted: number, // mean(window.risk)
|
|
23
|
+
* override_rate: number, // P(rejected OR undone)
|
|
24
|
+
* post_apply_correctness: number // P(correct | applied)
|
|
25
|
+
* }, …
|
|
26
|
+
* }
|
|
27
|
+
* }
|
|
28
|
+
* Atomic .tmp + rename (mirrors instinct-store.save / ds-arms.save). The
|
|
29
|
+
* `.design/` tree is gitignored + worktree-local (R5).
|
|
30
|
+
*
|
|
31
|
+
* Purity contract:
|
|
32
|
+
* - detectDrift + riskReward are PURE (no I/O, no Date.now / Math.random;
|
|
33
|
+
* the DRIFT thresholds are frozen). Deterministic for the suite.
|
|
34
|
+
* - updateCalibration reads/writes the FS, but only via the injected
|
|
35
|
+
* `{root}` (or `file`) so tests run hermetically under a tmpdir. The only
|
|
36
|
+
* non-determinism is `generated_at` (an ISO stamp), which callers can pin
|
|
37
|
+
* via opts.now.
|
|
38
|
+
* - recordRiskOutcome calls bandit-router.update BEST-EFFORT — it never
|
|
39
|
+
* throws (a telemetry write must never break a hook / agent turn).
|
|
40
|
+
*
|
|
41
|
+
* Zero new dependency. CommonJS to match the scripts/lib/ siblings.
|
|
42
|
+
*/
|
|
43
|
+
|
|
44
|
+
const fs = require('node:fs');
|
|
45
|
+
const path = require('node:path');
|
|
46
|
+
|
|
47
|
+
const SCHEMA_VERSION = '56.0';
|
|
48
|
+
const DEFAULT_CALIBRATION_PATH = '.design/telemetry/calibration.json';
|
|
49
|
+
|
|
50
|
+
/** Rolling window length (CAL-01): keep the last 50 outcomes per agent. */
|
|
51
|
+
const WINDOW_SIZE = 50;
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Drift thresholds (frozen). detectDrift compares the rolling stats against
|
|
55
|
+
* these bands:
|
|
56
|
+
* under_scoring — the agent emits LOW risk yet the user overrides OFTEN:
|
|
57
|
+
* the scores are too tame (false sense of safety).
|
|
58
|
+
* over_scoring — the agent emits HIGH risk yet applied actions are almost
|
|
59
|
+
* always correct AND the user rarely overrides: the scores
|
|
60
|
+
* are too alarmist (friction without payoff).
|
|
61
|
+
*/
|
|
62
|
+
const DRIFT = Object.freeze({
|
|
63
|
+
under_scoring: Object.freeze({ mean_risk_max: 0.35, override_rate_min: 0.30 }),
|
|
64
|
+
over_scoring: Object.freeze({
|
|
65
|
+
mean_risk_min: 0.65,
|
|
66
|
+
correctness_min: 0.90,
|
|
67
|
+
override_rate_max: 0.10,
|
|
68
|
+
}),
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Clamp to [0, 1]. Non-finite -> 0 (matches compute-risk.clamp01 semantics).
|
|
73
|
+
* @param {number} n
|
|
74
|
+
* @returns {number}
|
|
75
|
+
*/
|
|
76
|
+
function clamp01(n) {
|
|
77
|
+
if (typeof n !== 'number' || Number.isNaN(n)) return 0;
|
|
78
|
+
if (n < 0) return 0;
|
|
79
|
+
if (n > 1) return 1;
|
|
80
|
+
return n;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Resolve the on-disk calibration file, honouring an absolute override.
|
|
85
|
+
* Accepts `{ file }` (explicit path) or `{ root }` / `{ baseDir }` (a project
|
|
86
|
+
* root under which DEFAULT_CALIBRATION_PATH is resolved).
|
|
87
|
+
* @param {{file?:string, root?:string, baseDir?:string}} [opts]
|
|
88
|
+
* @returns {{file:string, dir:string}}
|
|
89
|
+
*/
|
|
90
|
+
function paths(opts = {}) {
|
|
91
|
+
let file;
|
|
92
|
+
if (opts.file) {
|
|
93
|
+
file = path.isAbsolute(opts.file)
|
|
94
|
+
? opts.file
|
|
95
|
+
: path.resolve(opts.root ?? opts.baseDir ?? process.cwd(), opts.file);
|
|
96
|
+
} else {
|
|
97
|
+
file = path.resolve(opts.root ?? opts.baseDir ?? process.cwd(), DEFAULT_CALIBRATION_PATH);
|
|
98
|
+
}
|
|
99
|
+
return { file, dir: path.dirname(file) };
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Load the calibration store, or a fresh envelope when absent/corrupt.
|
|
104
|
+
* @param {{file?:string, root?:string, baseDir?:string}} [opts]
|
|
105
|
+
* @returns {{schema_version:string, generated_at?:string, agents:object}}
|
|
106
|
+
*/
|
|
107
|
+
function load(opts = {}) {
|
|
108
|
+
const { file } = paths(opts);
|
|
109
|
+
if (!fs.existsSync(file)) {
|
|
110
|
+
return { schema_version: SCHEMA_VERSION, agents: {} };
|
|
111
|
+
}
|
|
112
|
+
try {
|
|
113
|
+
const data = JSON.parse(fs.readFileSync(file, 'utf8'));
|
|
114
|
+
if (!data || typeof data !== 'object' || typeof data.agents !== 'object' || data.agents === null) {
|
|
115
|
+
return { schema_version: SCHEMA_VERSION, agents: {} };
|
|
116
|
+
}
|
|
117
|
+
return data;
|
|
118
|
+
} catch {
|
|
119
|
+
return { schema_version: SCHEMA_VERSION, agents: {} };
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Persist the calibration store atomically (.tmp + rename).
|
|
125
|
+
* @param {object} store
|
|
126
|
+
* @param {{file?:string, root?:string, baseDir?:string, now?:string|Date}} [opts]
|
|
127
|
+
* @returns {string} absolute path written
|
|
128
|
+
*/
|
|
129
|
+
function save(store, opts = {}) {
|
|
130
|
+
const { file, dir } = paths(opts);
|
|
131
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
132
|
+
store.schema_version = SCHEMA_VERSION;
|
|
133
|
+
store.generated_at =
|
|
134
|
+
opts.now instanceof Date
|
|
135
|
+
? opts.now.toISOString()
|
|
136
|
+
: typeof opts.now === 'string'
|
|
137
|
+
? opts.now
|
|
138
|
+
: new Date().toISOString();
|
|
139
|
+
const tmp = file + '.tmp';
|
|
140
|
+
fs.writeFileSync(tmp, JSON.stringify(store, null, 2) + '\n');
|
|
141
|
+
fs.renameSync(tmp, file);
|
|
142
|
+
return file;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Coerce a raw outcome into the canonical window record. Unknown / missing
|
|
147
|
+
* fields degrade safely:
|
|
148
|
+
* risk -> clamp01(number), default 0
|
|
149
|
+
* accepted -> boolean (default true — an action that produced an
|
|
150
|
+
* outcome without an explicit reject is treated as
|
|
151
|
+
* applied)
|
|
152
|
+
* user_undo -> boolean (default false)
|
|
153
|
+
* post_apply_correct -> boolean | null (null = "not yet known"; only counts
|
|
154
|
+
* toward post_apply_correctness once resolved)
|
|
155
|
+
* @param {object} record
|
|
156
|
+
* @returns {{risk:number, accepted:boolean, user_undo:boolean, post_apply_correct:(boolean|null)}}
|
|
157
|
+
*/
|
|
158
|
+
function normalizeRecord(record) {
|
|
159
|
+
const r = record && typeof record === 'object' ? record : {};
|
|
160
|
+
return {
|
|
161
|
+
risk: clamp01(typeof r.risk === 'number' ? r.risk : 0),
|
|
162
|
+
accepted: r.accepted === undefined ? true : Boolean(r.accepted),
|
|
163
|
+
user_undo: Boolean(r.user_undo),
|
|
164
|
+
post_apply_correct:
|
|
165
|
+
r.post_apply_correct === undefined || r.post_apply_correct === null
|
|
166
|
+
? null
|
|
167
|
+
: Boolean(r.post_apply_correct),
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Recompute the three rolling statistics over a window of normalized records.
|
|
173
|
+
*
|
|
174
|
+
* mean_risk_emitted = mean(risk) (0 when empty)
|
|
175
|
+
* override_rate = P(!accepted OR user_undo) (0 when empty)
|
|
176
|
+
* post_apply_correctness = P(post_apply_correct | applied) (1 when no
|
|
177
|
+
* resolved applied records — an agent with no known-bad applied actions
|
|
178
|
+
* reads as fully correct; this is the conservative direction for the
|
|
179
|
+
* over_scoring drift gate, which additionally requires high mean risk +
|
|
180
|
+
* low override, so an empty window never spuriously trips it)
|
|
181
|
+
*
|
|
182
|
+
* @param {Array} window normalized records
|
|
183
|
+
* @returns {{mean_risk_emitted:number, override_rate:number, post_apply_correctness:number}}
|
|
184
|
+
*/
|
|
185
|
+
function computeStats(window) {
|
|
186
|
+
const w = Array.isArray(window) ? window : [];
|
|
187
|
+
const n = w.length;
|
|
188
|
+
if (n === 0) {
|
|
189
|
+
return { mean_risk_emitted: 0, override_rate: 0, post_apply_correctness: 1 };
|
|
190
|
+
}
|
|
191
|
+
let riskSum = 0;
|
|
192
|
+
let overrides = 0;
|
|
193
|
+
let appliedResolved = 0;
|
|
194
|
+
let appliedCorrect = 0;
|
|
195
|
+
for (const rec of w) {
|
|
196
|
+
riskSum += rec.risk;
|
|
197
|
+
const overridden = !rec.accepted || rec.user_undo;
|
|
198
|
+
if (overridden) overrides += 1;
|
|
199
|
+
// "applied" = accepted AND not undone. Only resolved (non-null) correctness
|
|
200
|
+
// signals count toward the correctness rate.
|
|
201
|
+
const applied = rec.accepted && !rec.user_undo;
|
|
202
|
+
if (applied && rec.post_apply_correct !== null) {
|
|
203
|
+
appliedResolved += 1;
|
|
204
|
+
if (rec.post_apply_correct === true) appliedCorrect += 1;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
return {
|
|
208
|
+
mean_risk_emitted: riskSum / n,
|
|
209
|
+
override_rate: overrides / n,
|
|
210
|
+
post_apply_correctness: appliedResolved === 0 ? 1 : appliedCorrect / appliedResolved,
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Record one risk outcome for `agent`, append to its rolling-50 window, drop
|
|
216
|
+
* the oldest beyond 50, recompute the three statistics, and persist atomically.
|
|
217
|
+
*
|
|
218
|
+
* @param {string} agent the writer agent the assessment scored (e.g. 'design-fixer')
|
|
219
|
+
* @param {{risk?:number, accepted?:boolean, user_undo?:boolean, post_apply_correct?:boolean}} record
|
|
220
|
+
* @param {{file?:string, root?:string, baseDir?:string, now?:string|Date}} [opts]
|
|
221
|
+
* @returns {{agent:string, stats:{mean_risk_emitted:number, override_rate:number, post_apply_correctness:number}, windowSize:number, path:string}}
|
|
222
|
+
*/
|
|
223
|
+
function updateCalibration(agent, record, opts = {}) {
|
|
224
|
+
if (typeof agent !== 'string' || agent.length === 0) {
|
|
225
|
+
throw new TypeError('updateCalibration: agent (non-empty string) required');
|
|
226
|
+
}
|
|
227
|
+
const store = load(opts);
|
|
228
|
+
if (!store.agents || typeof store.agents !== 'object') store.agents = {};
|
|
229
|
+
|
|
230
|
+
const prev = store.agents[agent];
|
|
231
|
+
const prevWindow =
|
|
232
|
+
prev && Array.isArray(prev.window) ? prev.window.map(normalizeRecord) : [];
|
|
233
|
+
|
|
234
|
+
prevWindow.push(normalizeRecord(record));
|
|
235
|
+
// Keep only the last WINDOW_SIZE entries (rolling window).
|
|
236
|
+
const window =
|
|
237
|
+
prevWindow.length > WINDOW_SIZE ? prevWindow.slice(prevWindow.length - WINDOW_SIZE) : prevWindow;
|
|
238
|
+
|
|
239
|
+
const stats = computeStats(window);
|
|
240
|
+
store.agents[agent] = {
|
|
241
|
+
window,
|
|
242
|
+
mean_risk_emitted: stats.mean_risk_emitted,
|
|
243
|
+
override_rate: stats.override_rate,
|
|
244
|
+
post_apply_correctness: stats.post_apply_correctness,
|
|
245
|
+
};
|
|
246
|
+
|
|
247
|
+
const written = save(store, opts);
|
|
248
|
+
return { agent, stats, windowSize: window.length, path: written };
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Classify calibration drift from an agent's rolling stats. PURE.
|
|
253
|
+
*
|
|
254
|
+
* under_scoring: mean_risk_emitted < 0.35 && override_rate > 0.30
|
|
255
|
+
* over_scoring: mean_risk_emitted > 0.65 && post_apply_correctness > 0.90
|
|
256
|
+
* && override_rate < 0.10
|
|
257
|
+
* else: 'none'
|
|
258
|
+
*
|
|
259
|
+
* under_scoring is checked first; the two bands are mutually exclusive by
|
|
260
|
+
* construction (mean-risk bands do not overlap) but the explicit order makes
|
|
261
|
+
* the contract unambiguous.
|
|
262
|
+
*
|
|
263
|
+
* @param {{mean_risk_emitted?:number, override_rate?:number, post_apply_correctness?:number}} stats
|
|
264
|
+
* @param {object} [cfg] defaults to the frozen DRIFT thresholds
|
|
265
|
+
* @returns {'under_scoring'|'over_scoring'|'none'}
|
|
266
|
+
*/
|
|
267
|
+
function detectDrift(stats, cfg = DRIFT) {
|
|
268
|
+
const s = stats && typeof stats === 'object' ? stats : {};
|
|
269
|
+
const mean = typeof s.mean_risk_emitted === 'number' ? s.mean_risk_emitted : 0;
|
|
270
|
+
const override = typeof s.override_rate === 'number' ? s.override_rate : 0;
|
|
271
|
+
const correct = typeof s.post_apply_correctness === 'number' ? s.post_apply_correctness : 0;
|
|
272
|
+
|
|
273
|
+
const under = cfg && cfg.under_scoring ? cfg.under_scoring : DRIFT.under_scoring;
|
|
274
|
+
const over = cfg && cfg.over_scoring ? cfg.over_scoring : DRIFT.over_scoring;
|
|
275
|
+
|
|
276
|
+
if (mean < under.mean_risk_max && override > under.override_rate_min) {
|
|
277
|
+
return 'under_scoring';
|
|
278
|
+
}
|
|
279
|
+
if (
|
|
280
|
+
mean > over.mean_risk_min &&
|
|
281
|
+
correct > over.correctness_min &&
|
|
282
|
+
override < over.override_rate_max
|
|
283
|
+
) {
|
|
284
|
+
return 'over_scoring';
|
|
285
|
+
}
|
|
286
|
+
return 'none';
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Map a single risk outcome to a bandit reward in [0, 1]. PURE.
|
|
291
|
+
*
|
|
292
|
+
* Contract (mirrors the Phase 23.5 lexicographic shape — correctness first):
|
|
293
|
+
* - rejected (accepted === false) OR undone (user_undo === true) -> 0
|
|
294
|
+
* (the user vetoed the action; no credit regardless of risk).
|
|
295
|
+
* - otherwise -> clamp01(1 - 0.5 * risk)
|
|
296
|
+
* (an accepted, not-undone action earns a reward that decays linearly with
|
|
297
|
+
* the risk it carried: a confident low-risk accept ≈ 1.0; a high-risk
|
|
298
|
+
* accept still earns partial credit ≈ 0.5 because the user did keep it).
|
|
299
|
+
*
|
|
300
|
+
* Examples (the calibration suite pins these):
|
|
301
|
+
* {accepted:true, risk:0.2} -> 0.9
|
|
302
|
+
* {accepted:false, risk:0.2} -> 0
|
|
303
|
+
* {accepted:true, risk:0.9} -> 0.55
|
|
304
|
+
* {accepted:true, risk:0.0, user_undo:true} -> 0
|
|
305
|
+
*
|
|
306
|
+
* @param {{accepted?:boolean, risk?:number, user_undo?:boolean}} input
|
|
307
|
+
* @returns {number} reward in [0, 1]
|
|
308
|
+
*/
|
|
309
|
+
function riskReward(input) {
|
|
310
|
+
const i = input && typeof input === 'object' ? input : {};
|
|
311
|
+
// An explicit reject, or any user_undo, zeroes the reward.
|
|
312
|
+
if (i.accepted === false) return 0;
|
|
313
|
+
if (i.user_undo === true) return 0;
|
|
314
|
+
const risk = clamp01(typeof i.risk === 'number' ? i.risk : 0);
|
|
315
|
+
return clamp01(1 - 0.5 * risk);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/**
|
|
319
|
+
* Thin best-effort bridge: compute the risk reward for an outcome and feed it
|
|
320
|
+
* into the Thompson-sampling bandit (scripts/lib/bandit-router.cjs update()).
|
|
321
|
+
*
|
|
322
|
+
* NEVER throws — a telemetry/learning write must not break the hook or agent
|
|
323
|
+
* turn that triggered it. On any failure (bandit module absent, bad input,
|
|
324
|
+
* FS error) it returns `{ recorded:false, reason }` and swallows the error.
|
|
325
|
+
*
|
|
326
|
+
* The bandit's update() needs `(agent, bin, tier, reward)`. The caller supplies
|
|
327
|
+
* the routing context it used (bin = touches-size bin, tier = model tier). When
|
|
328
|
+
* a context field is missing we DO NOT guess — we skip the bandit write and
|
|
329
|
+
* report it, because writing to the wrong arm would corrupt the posterior.
|
|
330
|
+
*
|
|
331
|
+
* @param {{
|
|
332
|
+
* agent: string,
|
|
333
|
+
* bin?: string,
|
|
334
|
+
* tier?: string,
|
|
335
|
+
* accepted?: boolean,
|
|
336
|
+
* risk?: number,
|
|
337
|
+
* user_undo?: boolean,
|
|
338
|
+
* bandit?: object, // injectable for tests (defaults to require'd module)
|
|
339
|
+
* root?: string, baseDir?: string, posteriorPath?: string,
|
|
340
|
+
* }} input
|
|
341
|
+
* @returns {{recorded:boolean, reward:number, reason?:string}}
|
|
342
|
+
*/
|
|
343
|
+
function recordRiskOutcome(input) {
|
|
344
|
+
const reward = riskReward(input || {});
|
|
345
|
+
try {
|
|
346
|
+
const i = input && typeof input === 'object' ? input : {};
|
|
347
|
+
if (typeof i.agent !== 'string' || i.agent.length === 0) {
|
|
348
|
+
return { recorded: false, reward, reason: 'agent required for bandit update' };
|
|
349
|
+
}
|
|
350
|
+
if (typeof i.bin !== 'string' || i.bin.length === 0 || typeof i.tier !== 'string' || i.tier.length === 0) {
|
|
351
|
+
// Without a routing context we cannot address an arm — skip cleanly.
|
|
352
|
+
return { recorded: false, reward, reason: 'bin+tier required for bandit update' };
|
|
353
|
+
}
|
|
354
|
+
// Lazy require so a missing/breaking bandit module degrades to best-effort.
|
|
355
|
+
const bandit = i.bandit || require('../bandit-router.cjs');
|
|
356
|
+
bandit.update({
|
|
357
|
+
agent: i.agent,
|
|
358
|
+
bin: i.bin,
|
|
359
|
+
tier: i.tier,
|
|
360
|
+
reward,
|
|
361
|
+
baseDir: i.baseDir ?? i.root,
|
|
362
|
+
posteriorPath: i.posteriorPath,
|
|
363
|
+
});
|
|
364
|
+
return { recorded: true, reward };
|
|
365
|
+
} catch (err) {
|
|
366
|
+
return { recorded: false, reward, reason: err && err.message ? err.message : String(err) };
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
module.exports = {
|
|
371
|
+
updateCalibration,
|
|
372
|
+
detectDrift,
|
|
373
|
+
riskReward,
|
|
374
|
+
recordRiskOutcome,
|
|
375
|
+
// Exposed for tests + sibling reuse.
|
|
376
|
+
computeStats,
|
|
377
|
+
normalizeRecord,
|
|
378
|
+
load,
|
|
379
|
+
save,
|
|
380
|
+
clamp01,
|
|
381
|
+
DRIFT,
|
|
382
|
+
WINDOW_SIZE,
|
|
383
|
+
SCHEMA_VERSION,
|
|
384
|
+
DEFAULT_CALIBRATION_PATH,
|
|
385
|
+
};
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
/**
|
|
3
|
+
* scripts/lib/risk/compute-risk.cjs — PURE, deterministic action-risk scorer
|
|
4
|
+
* for the Phase 56 risk gate.
|
|
5
|
+
*
|
|
6
|
+
* NO I/O. NO Date.now / Math.random. Given the same (tool_name, input,
|
|
7
|
+
* thresholds) it always returns the same result. Frozen static tables live in
|
|
8
|
+
* ./tables.cjs; config overrides are merged by the HOOK (which reads
|
|
9
|
+
* .design/config.json and passes the merged thresholds/tables in) — this
|
|
10
|
+
* module stays side-effect-free so the routing matrix is unit-testable.
|
|
11
|
+
*
|
|
12
|
+
* Contract:
|
|
13
|
+
* computeRisk(tool_name, input, thresholds = THRESHOLDS, tables = defaults)
|
|
14
|
+
* -> { score:0..1, reasons:string[], suggested_action, breakdown }
|
|
15
|
+
*
|
|
16
|
+
* score = clamp01( base * fileMult + fileAdd + sum(inputAdds) )
|
|
17
|
+
* suggested_action in 'allow' | 'review' | 'require_confirmation' | 'block'
|
|
18
|
+
*
|
|
19
|
+
* loadRiskConfig(cwd) is provided (mirrors blast-radius.loadConfig) so the hook
|
|
20
|
+
* can read `.design/config.json#risk.{thresholds, base_tool_extra,
|
|
21
|
+
* file_sensitivity_extra, input_pattern_extra}` and EXTEND the defaults
|
|
22
|
+
* (extend-only — protected-paths discipline). computeRisk itself never calls it.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
const fs = require('fs');
|
|
26
|
+
const path = require('path');
|
|
27
|
+
|
|
28
|
+
const TABLES = require('./tables.cjs');
|
|
29
|
+
const { BASE_TOOL_RISK, FILE_SENSITIVITY, INPUT_PATTERN_RISK, THRESHOLDS } = TABLES;
|
|
30
|
+
|
|
31
|
+
function clamp01(n) {
|
|
32
|
+
if (typeof n !== 'number' || Number.isNaN(n)) return 0;
|
|
33
|
+
if (n < 0) return 0;
|
|
34
|
+
if (n > 1) return 1;
|
|
35
|
+
return n;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function normPath(p) {
|
|
39
|
+
return String(p == null ? '' : p).replace(/\\/g, '/').replace(/^\.\//, '');
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* pathsFor(tool, input) — the file paths a tool action touches.
|
|
44
|
+
* Edit/Write/NotebookEdit -> file_path / notebook_path
|
|
45
|
+
* MultiEdit -> the shared file_path (edits[] all target it)
|
|
46
|
+
* Bash -> best-effort path-ish tokens extracted from the command
|
|
47
|
+
*/
|
|
48
|
+
function pathsFor(tool, input) {
|
|
49
|
+
const out = [];
|
|
50
|
+
if (!input || typeof input !== 'object') return out;
|
|
51
|
+
if (typeof input.file_path === 'string') out.push(normPath(input.file_path));
|
|
52
|
+
if (typeof input.notebook_path === 'string') out.push(normPath(input.notebook_path));
|
|
53
|
+
if (typeof input.path === 'string') out.push(normPath(input.path));
|
|
54
|
+
if (tool === 'Bash' && typeof input.command === 'string') {
|
|
55
|
+
for (const t of extractBashPaths(input.command)) out.push(normPath(t));
|
|
56
|
+
}
|
|
57
|
+
// de-dup, drop empties
|
|
58
|
+
return Array.from(new Set(out.filter(Boolean)));
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Small, linear extractor: pull whitespace-delimited tokens that look like
|
|
62
|
+
// file paths (contain a slash or a dot-extension, no shell metachars). Linear
|
|
63
|
+
// scan — no backtracking-prone regex.
|
|
64
|
+
function extractBashPaths(command) {
|
|
65
|
+
const tokens = String(command).split(/\s+/);
|
|
66
|
+
const paths = [];
|
|
67
|
+
for (const raw of tokens) {
|
|
68
|
+
const t = raw.replace(/^['"]|['"]$/g, '');
|
|
69
|
+
if (!t || t.startsWith('-')) continue;
|
|
70
|
+
if (/[|;&$`(){}<>*?!]/.test(t)) continue; // skip shell-operator/glob tokens
|
|
71
|
+
if (t.includes('/') || /\.[A-Za-z0-9]{1,8}$/.test(t)) paths.push(t);
|
|
72
|
+
}
|
|
73
|
+
return paths;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* pickMaxFileSensitivity(paths, table) — the single highest-WEIGHT matching
|
|
78
|
+
* entry across all touched paths. "Weight" = mult + add so a clearly higher-mult
|
|
79
|
+
* entry wins over a low de-risking one even when both match (e.g. a file under
|
|
80
|
+
* both `tests/` and `hooks/` resolves to the hook entry). Returns
|
|
81
|
+
* { mult:1, add:0, label:null } when nothing matches.
|
|
82
|
+
*/
|
|
83
|
+
function pickMaxFileSensitivity(paths, table) {
|
|
84
|
+
let best = null;
|
|
85
|
+
let bestWeight = -Infinity;
|
|
86
|
+
for (const entry of table) {
|
|
87
|
+
for (const p of paths) {
|
|
88
|
+
if (entry.test.test(p)) {
|
|
89
|
+
const w = (typeof entry.mult === 'number' ? entry.mult : 1) + (typeof entry.add === 'number' ? entry.add : 0);
|
|
90
|
+
if (w > bestWeight) {
|
|
91
|
+
bestWeight = w;
|
|
92
|
+
best = entry;
|
|
93
|
+
}
|
|
94
|
+
break; // this entry already matched; move to the next entry
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
if (!best) return { mult: 1, add: 0, label: null };
|
|
99
|
+
return { mult: typeof best.mult === 'number' ? best.mult : 1, add: typeof best.add === 'number' ? best.add : 0, label: best.label };
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function actionFor(score, thresholds) {
|
|
103
|
+
const t = thresholds || THRESHOLDS;
|
|
104
|
+
if (score >= t.block) return 'block';
|
|
105
|
+
if (score >= t.require_confirmation) return 'require_confirmation';
|
|
106
|
+
if (score >= t.review) return 'review';
|
|
107
|
+
return 'allow';
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* computeRisk — the pure scorer.
|
|
112
|
+
* @param {string} tool_name
|
|
113
|
+
* @param {object} input tool_input (Edit/Write/MultiEdit/Bash/...)
|
|
114
|
+
* @param {object} [thresholds] defaults to TABLES.THRESHOLDS
|
|
115
|
+
* @param {object} [tables] { BASE_TOOL_RISK, FILE_SENSITIVITY, INPUT_PATTERN_RISK } — defaults to the frozen tables
|
|
116
|
+
* @returns {{score:number, reasons:string[], suggested_action:string, breakdown:object}}
|
|
117
|
+
*/
|
|
118
|
+
function computeRisk(tool_name, input, thresholds = THRESHOLDS, tables) {
|
|
119
|
+
const baseTbl = (tables && tables.BASE_TOOL_RISK) || BASE_TOOL_RISK;
|
|
120
|
+
const fileTbl = (tables && tables.FILE_SENSITIVITY) || FILE_SENSITIVITY;
|
|
121
|
+
const inputTbl = (tables && tables.INPUT_PATTERN_RISK) || INPUT_PATTERN_RISK;
|
|
122
|
+
|
|
123
|
+
const reasons = [];
|
|
124
|
+
|
|
125
|
+
// 1. Base tool risk.
|
|
126
|
+
const base = typeof baseTbl[tool_name] === 'number' ? baseTbl[tool_name] : baseTbl.__default;
|
|
127
|
+
reasons.push(`base:${tool_name}=${round(base)}`);
|
|
128
|
+
|
|
129
|
+
// 2. File sensitivity (highest-weight match across touched paths).
|
|
130
|
+
const paths = pathsFor(tool_name, input);
|
|
131
|
+
const fs_ = pickMaxFileSensitivity(paths, fileTbl);
|
|
132
|
+
if (fs_.label) {
|
|
133
|
+
reasons.push(`file:${fs_.label}(x${fs_.mult}+${fs_.add})`);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// 3. Input-pattern addends (fixed table order).
|
|
137
|
+
const inputAdds = [];
|
|
138
|
+
let inputAddSum = 0;
|
|
139
|
+
for (const entry of inputTbl) {
|
|
140
|
+
let hit;
|
|
141
|
+
try {
|
|
142
|
+
hit = entry.when(tool_name, input);
|
|
143
|
+
} catch {
|
|
144
|
+
hit = false;
|
|
145
|
+
}
|
|
146
|
+
if (!hit) continue;
|
|
147
|
+
const add = typeof entry.add === 'function' ? entry.add(hit, tool_name, input) : entry.add;
|
|
148
|
+
const a = typeof add === 'number' && Number.isFinite(add) ? add : 0;
|
|
149
|
+
if (a === 0) continue;
|
|
150
|
+
inputAdds.push({ label: entry.label, add: a });
|
|
151
|
+
inputAddSum += a;
|
|
152
|
+
reasons.push(`input:${entry.label}=+${round(a)}`);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// 4. Combine + clamp.
|
|
156
|
+
const rawScore = base * fs_.mult + fs_.add + inputAddSum;
|
|
157
|
+
const score = clamp01(rawScore);
|
|
158
|
+
|
|
159
|
+
const suggested_action = actionFor(score, thresholds);
|
|
160
|
+
|
|
161
|
+
return {
|
|
162
|
+
score,
|
|
163
|
+
reasons,
|
|
164
|
+
suggested_action,
|
|
165
|
+
breakdown: {
|
|
166
|
+
base,
|
|
167
|
+
tool: tool_name,
|
|
168
|
+
paths,
|
|
169
|
+
file: { mult: fs_.mult, add: fs_.add, label: fs_.label },
|
|
170
|
+
inputAdds,
|
|
171
|
+
inputAddSum: round3(inputAddSum),
|
|
172
|
+
raw: round3(rawScore),
|
|
173
|
+
thresholds,
|
|
174
|
+
},
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function round(n) {
|
|
179
|
+
return Math.round(n * 100) / 100;
|
|
180
|
+
}
|
|
181
|
+
function round3(n) {
|
|
182
|
+
return Math.round(n * 1000) / 1000;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// ── Config loader (used by the HOOK, not by computeRisk) ────────────────────
|
|
186
|
+
// Mirrors blast-radius.loadConfig. Reads .design/config.json#risk and returns
|
|
187
|
+
// merged thresholds + EXTEND-only table extras. Defaults are returned when the
|
|
188
|
+
// file/keys are absent or malformed. This is the ONLY function here that does
|
|
189
|
+
// I/O; computeRisk stays pure.
|
|
190
|
+
function loadRiskConfig(cwd) {
|
|
191
|
+
const configPath = path.join(cwd || process.cwd(), '.design', 'config.json');
|
|
192
|
+
let cfg = {};
|
|
193
|
+
try { cfg = JSON.parse(fs.readFileSync(configPath, 'utf8')); } catch { cfg = {}; }
|
|
194
|
+
const risk = (cfg && typeof cfg === 'object' && cfg.risk) || {};
|
|
195
|
+
const t = (risk && typeof risk.thresholds === 'object' && risk.thresholds) || {};
|
|
196
|
+
return {
|
|
197
|
+
thresholds: {
|
|
198
|
+
review: numOrInRange(t.review, THRESHOLDS.review),
|
|
199
|
+
require_confirmation: numOrInRange(t.require_confirmation, THRESHOLDS.require_confirmation),
|
|
200
|
+
block: numOrInRange(t.block, THRESHOLDS.block),
|
|
201
|
+
},
|
|
202
|
+
// Extend-only table extras (the hook merges these onto the frozen defaults).
|
|
203
|
+
base_tool_extra: (risk && typeof risk.base_tool_extra === 'object' && risk.base_tool_extra) || {},
|
|
204
|
+
file_sensitivity_extra: Array.isArray(risk.file_sensitivity_extra) ? risk.file_sensitivity_extra : [],
|
|
205
|
+
input_pattern_extra: Array.isArray(risk.input_pattern_extra) ? risk.input_pattern_extra : [],
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
function numOrInRange(v, fallback) {
|
|
210
|
+
if (typeof v === 'number' && Number.isFinite(v) && v >= 0 && v <= 1) return v;
|
|
211
|
+
return fallback;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
module.exports = {
|
|
215
|
+
computeRisk,
|
|
216
|
+
// helpers exported for the hook + tests
|
|
217
|
+
pathsFor,
|
|
218
|
+
pickMaxFileSensitivity,
|
|
219
|
+
actionFor,
|
|
220
|
+
clamp01,
|
|
221
|
+
loadRiskConfig,
|
|
222
|
+
_extractBashPaths: extractBashPaths,
|
|
223
|
+
// re-export the tables so consumers (B/C/D) can `require('./compute-risk')`
|
|
224
|
+
// and get THRESHOLDS without a second import.
|
|
225
|
+
THRESHOLDS,
|
|
226
|
+
BASE_TOOL_RISK,
|
|
227
|
+
FILE_SENSITIVITY,
|
|
228
|
+
INPUT_PATTERN_RISK,
|
|
229
|
+
};
|