thumbgate 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/.claude-plugin/README.md +4 -4
  2. package/.claude-plugin/marketplace.json +1 -1
  3. package/.claude-plugin/plugin.json +1 -1
  4. package/.well-known/mcp/server-card.json +1 -1
  5. package/README.md +48 -16
  6. package/adapters/README.md +1 -1
  7. package/adapters/claude/.mcp.json +2 -2
  8. package/adapters/codex/config.toml +2 -2
  9. package/adapters/mcp/server-stdio.js +11 -8
  10. package/adapters/opencode/opencode.json +1 -1
  11. package/bin/cli.js +20 -11
  12. package/config/github-about.json +1 -1
  13. package/config/model-tiers.json +11 -0
  14. package/package.json +22 -11
  15. package/plugins/claude-codex-bridge/.claude-plugin/plugin.json +1 -1
  16. package/plugins/claude-codex-bridge/.mcp.json +1 -1
  17. package/plugins/codex-profile/.codex-plugin/plugin.json +1 -1
  18. package/plugins/codex-profile/.mcp.json +1 -1
  19. package/plugins/codex-profile/INSTALL.md +1 -1
  20. package/plugins/codex-profile/README.md +1 -1
  21. package/plugins/cursor-marketplace/.cursor-plugin/plugin.json +1 -1
  22. package/plugins/cursor-marketplace/README.md +2 -2
  23. package/plugins/cursor-marketplace/commands/capture-feedback.md +2 -2
  24. package/plugins/cursor-marketplace/rules/feedback-capture.mdc +3 -3
  25. package/plugins/cursor-marketplace/skills/capture-feedback/SKILL.md +3 -2
  26. package/plugins/opencode-profile/INSTALL.md +1 -1
  27. package/public/compare.html +302 -0
  28. package/public/guide.html +4 -4
  29. package/public/index.html +77 -38
  30. package/public/learn/ai-agent-persistent-memory.html +1 -0
  31. package/public/lessons.html +325 -17
  32. package/scripts/__pycache__/train_from_feedback.cpython-312.pyc +0 -0
  33. package/scripts/ai-search-visibility.js +142 -0
  34. package/scripts/audit-trail.js +6 -0
  35. package/scripts/capture-railway-diagnostics.sh +97 -0
  36. package/scripts/changeset-check.js +372 -0
  37. package/scripts/check-congruence.js +8 -5
  38. package/scripts/claude-feedback-sync.js +320 -0
  39. package/scripts/cli-telemetry.js +4 -1
  40. package/scripts/computer-use-firewall.js +45 -15
  41. package/scripts/contextfs.js +32 -23
  42. package/scripts/dashboard.js +84 -0
  43. package/scripts/docker-sandbox-planner.js +208 -0
  44. package/scripts/feedback-loop.js +16 -0
  45. package/scripts/github-about.js +56 -0
  46. package/scripts/intervention-policy.js +696 -0
  47. package/scripts/local-model-profile.js +18 -2
  48. package/scripts/model-tier-router.js +10 -1
  49. package/scripts/operational-integrity.js +361 -32
  50. package/scripts/prove-adapters.js +1 -0
  51. package/scripts/prove-automation.js +2 -2
  52. package/scripts/prove-packaged-runtime.js +260 -0
  53. package/scripts/prove-runtime.js +13 -0
  54. package/scripts/published-cli.js +10 -1
  55. package/scripts/rate-limiter.js +3 -3
  56. package/scripts/statusline-links.js +238 -0
  57. package/scripts/statusline-local-stats.js +2 -0
  58. package/scripts/statusline.sh +200 -10
  59. package/scripts/sync-github-about.js +7 -4
  60. package/scripts/tool-registry.js +2 -2
  61. package/scripts/workflow-sentinel.js +197 -39
  62. package/skills/thumbgate/SKILL.md +1 -1
  63. package/src/api/server.js +12 -1
@@ -0,0 +1,696 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ const fs = require('fs');
5
+ const path = require('path');
6
+ const { resolveFeedbackDir } = require('./feedback-paths');
7
+
8
+ const LABELS = ['allow', 'recall', 'verify', 'warn', 'deny'];
9
+ const DAY_MS = 24 * 60 * 60 * 1000;
10
+ const DEFAULT_HOLDOUT_RATIO = 0.2;
11
+ const MIN_HOLDOUT_EXAMPLES = 5;
12
+ const MIN_TRAINING_EXAMPLES = 8;
13
+ const MAX_TEXT_TOKENS = 24;
14
+ const MODEL_FILENAME = 'intervention-policy.json';
15
+
16
+ const SURFACE_RULES = [
17
+ { key: 'policy', pattern: /^(?:AGENTS\.md|CLAUDE(?:\.local)?\.md|GEMINI\.md|config\/gates\/|config\/mcp-allowlists\.json|scripts\/tool-registry\.js)/i },
18
+ { key: 'release', pattern: /^(?:package\.json|package-lock\.json|server\.json|\.github\/workflows\/|scripts\/publish-decision\.js|scripts\/pr-manager\.js)/i },
19
+ { key: 'runtime', pattern: /^(?:scripts\/|src\/api\/|adapters\/mcp\/)/i },
20
+ { key: 'tests', pattern: /^(?:tests\/|proof\/)/i },
21
+ { key: 'docs', pattern: /^(?:docs\/|README\.md|CHANGELOG\.md|WORKFLOW\.md)/i },
22
+ { key: 'public', pattern: /^(?:public\/|\.well-known\/)/i },
23
+ ];
24
+
25
+ const TEXT_STOPWORDS = new Set([
26
+ 'the', 'and', 'for', 'that', 'this', 'with', 'from', 'have', 'has', 'had',
27
+ 'were', 'been', 'into', 'your', 'their', 'about', 'after', 'before', 'while',
28
+ 'then', 'than', 'just', 'very', 'more', 'when', 'what', 'which', 'would',
29
+ 'could', 'should', 'again', 'same', 'tool', 'action', 'agent', 'workflow',
30
+ 'thumbs', 'positive', 'negative', 'signal', 'recorded',
31
+ ]);
32
+
33
+ function modelPathFor(feedbackDir) {
34
+ return path.join(resolveFeedbackDir({ feedbackDir }), MODEL_FILENAME);
35
+ }
36
+
37
+ function readJSONL(filePath) {
38
+ if (!fs.existsSync(filePath)) return [];
39
+ const raw = fs.readFileSync(filePath, 'utf8').trim();
40
+ if (!raw) return [];
41
+ return raw
42
+ .split('\n')
43
+ .map((line) => {
44
+ try {
45
+ return JSON.parse(line);
46
+ } catch {
47
+ return null;
48
+ }
49
+ })
50
+ .filter(Boolean);
51
+ }
52
+
53
+ function safeRate(numerator, denominator) {
54
+ if (!denominator) return 0;
55
+ return numerator / denominator;
56
+ }
57
+
58
+ function normalizeText(value) {
59
+ return String(value || '')
60
+ .toLowerCase()
61
+ .replace(/\/users\/[^\s/]+/g, '/users/redacted')
62
+ .replace(/[^a-z0-9/_-]+/g, ' ')
63
+ .trim();
64
+ }
65
+
66
+ function tokenizeText(value, limit = MAX_TEXT_TOKENS) {
67
+ if (!value) return [];
68
+ const tokens = normalizeText(value)
69
+ .split(/\s+/)
70
+ .filter((token) => token.length >= 3 && !TEXT_STOPWORDS.has(token));
71
+ return [...new Set(tokens)].slice(0, limit);
72
+ }
73
+
74
+ function pushToken(tokens, token) {
75
+ const normalized = normalizeText(token);
76
+ if (!normalized) return;
77
+ tokens.add(normalized.replace(/\s+/g, '_'));
78
+ }
79
+
80
+ function pushTextTokens(tokens, prefix, value, limit = 6) {
81
+ for (const token of tokenizeText(value, limit)) {
82
+ pushToken(tokens, `${prefix}:${token}`);
83
+ }
84
+ }
85
+
86
+ function normalizeSignal(value) {
87
+ const text = normalizeText(value);
88
+ if (['up', 'positive', 'thumbsup', 'thumbs_up', 'thumbs-up'].includes(text)) return 'positive';
89
+ if (['down', 'negative', 'thumbsdown', 'thumbs_down', 'thumbs-down'].includes(text)) return 'negative';
90
+ return text || 'unknown';
91
+ }
92
+
93
+ function classifySurface(filePath) {
94
+ const normalized = String(filePath || '').replace(/\\/g, '/').replace(/^\.\//, '');
95
+ if (!normalized) return 'unknown';
96
+ for (const rule of SURFACE_RULES) {
97
+ if (rule.pattern.test(normalized)) return rule.key;
98
+ }
99
+ return 'product';
100
+ }
101
+
102
+ function extractCommandTokens(command) {
103
+ const tokens = new Set();
104
+ const text = normalizeText(command);
105
+ if (!text) return [...tokens];
106
+
107
+ if (/\bgit push\b/.test(text)) pushToken(tokens, 'cmd:git_push');
108
+ if (/\bgit push\b.*(?:--force|-f)\b/.test(text)) pushToken(tokens, 'cmd:force_push');
109
+ if (/\bgh pr create\b/.test(text)) pushToken(tokens, 'cmd:pr_create');
110
+ if (/\bgh pr merge\b/.test(text)) pushToken(tokens, 'cmd:pr_merge');
111
+ if (/\bnpm publish\b|\byarn publish\b|\bpnpm publish\b/.test(text)) pushToken(tokens, 'cmd:publish');
112
+ if (/\brm -rf\b/.test(text)) pushToken(tokens, 'cmd:destructive_delete');
113
+ if (/\b(test|jest|vitest|coverage|prove:|self-heal:check)\b/.test(text)) pushToken(tokens, 'cmd:verification');
114
+ if (/\b(deploy|release|tag)\b/.test(text)) pushToken(tokens, 'cmd:release');
115
+ if (/\b(readme|docs?)\b/.test(text)) pushToken(tokens, 'cmd:docs');
116
+
117
+ pushTextTokens(tokens, 'cmdtok', text, 8);
118
+ return [...tokens];
119
+ }
120
+
121
+ function extractFileTokens(filePath) {
122
+ const tokens = new Set();
123
+ const normalized = String(filePath || '').replace(/\\/g, '/').replace(/^\.\//, '');
124
+ if (!normalized) return [...tokens];
125
+
126
+ pushToken(tokens, `surface:${classifySurface(normalized)}`);
127
+ const head = normalized.split('/')[0];
128
+ if (head) pushToken(tokens, `path:${head}`);
129
+ const ext = path.extname(normalized).replace('.', '');
130
+ if (ext) pushToken(tokens, `ext:${ext}`);
131
+ return [...tokens];
132
+ }
133
+
134
+ function buildFeatureTokens(parts = []) {
135
+ const tokens = new Set();
136
+ for (const token of parts.flat().filter(Boolean)) {
137
+ pushToken(tokens, token);
138
+ }
139
+ return [...tokens];
140
+ }
141
+
142
+ function maybeReadJson(filePath) {
143
+ if (!fs.existsSync(filePath)) return null;
144
+ try {
145
+ return JSON.parse(fs.readFileSync(filePath, 'utf8'));
146
+ } catch {
147
+ return null;
148
+ }
149
+ }
150
+
151
+ function toLocalDayKey(value) {
152
+ const ts = value instanceof Date ? value : new Date(value);
153
+ if (Number.isNaN(ts.getTime())) return null;
154
+ const year = ts.getFullYear();
155
+ const month = String(ts.getMonth() + 1).padStart(2, '0');
156
+ const day = String(ts.getDate()).padStart(2, '0');
157
+ return `${year}-${month}-${day}`;
158
+ }
159
+
160
+ function detectVerificationSignal(text) {
161
+ return /\b(test|tests|verify|verified|verification|coverage|proof|failing|failed|assert|ci)\b/i.test(text);
162
+ }
163
+
164
+ function detectDenySignal(text) {
165
+ return /\b(block|blocked|deny|denied|force push|protected|publish|release|secret|security|credential|rm -rf|destructive)\b/i.test(text);
166
+ }
167
+
168
+ function detectRecallSignal(text) {
169
+ return /\b(recall|lesson|again|repeat|repeated|same pattern|same mistake|prior|history|context|retrieve_lessons)\b/i.test(text);
170
+ }
171
+
172
+ function deriveLabelFromFeedback(entry) {
173
+ const signal = normalizeSignal(entry.signal || entry.feedback);
174
+ if (signal === 'positive') return 'allow';
175
+ if (signal !== 'negative') return 'warn';
176
+
177
+ const diagnosis = entry.diagnosis || {};
178
+ const tags = Array.isArray(entry.tags) ? entry.tags.map((tag) => String(tag).toLowerCase()) : [];
179
+ const text = [
180
+ entry.context,
181
+ entry.whatWentWrong,
182
+ entry.what_went_wrong,
183
+ entry.whatToChange,
184
+ entry.what_to_change,
185
+ diagnosis.rootCauseCategory,
186
+ diagnosis.criticalFailureStep,
187
+ tags.join(' '),
188
+ ].filter(Boolean).join(' ');
189
+
190
+ if (detectVerificationSignal(text) || String(diagnosis.criticalFailureStep || '').toLowerCase() === 'verification') {
191
+ return 'verify';
192
+ }
193
+ if (detectDenySignal(text)) {
194
+ return 'warn';
195
+ }
196
+ if (detectRecallSignal(text) || tags.some((tag) => tag.includes('repeat') || tag.includes('lesson'))) {
197
+ return 'recall';
198
+ }
199
+ return 'warn';
200
+ }
201
+
202
+ function deriveLabelFromAudit(entry) {
203
+ const decision = normalizeText(entry.decision);
204
+ if (decision === 'allow') return 'allow';
205
+ if (decision === 'deny') return 'deny';
206
+ if (decision === 'warn') return 'warn';
207
+ return null;
208
+ }
209
+
210
+ function deriveLabelFromDiagnostic(entry) {
211
+ const diagnosis = entry && entry.diagnosis ? entry.diagnosis : {};
212
+ const rootCause = String(diagnosis.rootCauseCategory || '').toLowerCase();
213
+ const step = String(diagnosis.criticalFailureStep || entry.step || '').toLowerCase();
214
+ const text = [rootCause, step, entry.context].filter(Boolean).join(' ');
215
+
216
+ if (detectDenySignal(text) || rootCause === 'guardrail_triggered') return 'deny';
217
+ if (step === 'verification' || detectVerificationSignal(text)) return 'verify';
218
+ if (detectRecallSignal(text) || /intent|context|plan|memory|retrieval/.test(rootCause)) return 'recall';
219
+ return 'warn';
220
+ }
221
+
222
+ function buildFeedbackExample(entry) {
223
+ const label = deriveLabelFromFeedback(entry);
224
+ const diagnosis = entry.diagnosis || {};
225
+ const toolName = entry.toolName || entry.tool_name || diagnosis.toolName || 'unknown';
226
+ const tokens = buildFeatureTokens([
227
+ `kind:feedback`,
228
+ `signal:${normalizeSignal(entry.signal || entry.feedback)}`,
229
+ `tool:${toolName}`,
230
+ entry.skill ? `skill:${entry.skill}` : null,
231
+ diagnosis.rootCauseCategory ? `root:${diagnosis.rootCauseCategory}` : null,
232
+ diagnosis.criticalFailureStep ? `step:${diagnosis.criticalFailureStep}` : null,
233
+ ...extractCommandTokens(entry.context || ''),
234
+ ...(Array.isArray(entry.tags) ? entry.tags.map((tag) => `tag:${String(tag).toLowerCase()}`) : []),
235
+ ...(entry.richContext && Array.isArray(entry.richContext.filePaths)
236
+ ? entry.richContext.filePaths.flatMap((filePath) => extractFileTokens(filePath))
237
+ : []),
238
+ ...pushableDiagnosisViolationTokens(diagnosis),
239
+ ...tokenizeText([
240
+ entry.context,
241
+ entry.whatWentWrong,
242
+ entry.what_went_wrong,
243
+ entry.whatToChange,
244
+ entry.what_to_change,
245
+ ].filter(Boolean).join(' '), 10).map((token) => `text:${token}`),
246
+ ]);
247
+
248
+ if (!tokens.length) return null;
249
+ return {
250
+ id: entry.id || null,
251
+ source: 'feedback',
252
+ label,
253
+ timestamp: entry.timestamp || new Date().toISOString(),
254
+ tokens,
255
+ };
256
+ }
257
+
258
+ function pushableDiagnosisViolationTokens(diagnosis = {}) {
259
+ const tokens = [];
260
+ const violations = Array.isArray(diagnosis.violations) ? diagnosis.violations : [];
261
+ for (const violation of violations) {
262
+ if (violation && violation.constraintId) {
263
+ tokens.push(`constraint:${violation.constraintId}`);
264
+ }
265
+ }
266
+ return tokens;
267
+ }
268
+
269
+ function buildAuditExample(entry) {
270
+ const label = deriveLabelFromAudit(entry);
271
+ if (!label) return null;
272
+ const toolInput = entry.toolInput && typeof entry.toolInput === 'object' ? entry.toolInput : {};
273
+ const changedFiles = []
274
+ .concat(Array.isArray(toolInput.changed_files) ? toolInput.changed_files : [])
275
+ .concat(Array.isArray(toolInput.changedFiles) ? toolInput.changedFiles : [])
276
+ .concat(typeof toolInput.filePath === 'string' ? [toolInput.filePath] : [])
277
+ .concat(typeof toolInput.file_path === 'string' ? [toolInput.file_path] : [])
278
+ .filter(Boolean);
279
+ const tokens = buildFeatureTokens([
280
+ 'kind:audit',
281
+ `decision:${entry.decision || 'allow'}`,
282
+ `tool:${entry.toolName || 'unknown'}`,
283
+ entry.gateId ? `gate:${entry.gateId}` : null,
284
+ entry.source ? `source:${entry.source}` : null,
285
+ entry.severity ? `severity:${entry.severity}` : null,
286
+ ...extractCommandTokens(toolInput.command || ''),
287
+ ...changedFiles.flatMap((filePath) => extractFileTokens(filePath)),
288
+ ...tokenizeText(entry.message || '', 8).map((token) => `msg:${token}`),
289
+ ]);
290
+
291
+ return {
292
+ id: entry.id || null,
293
+ source: 'audit',
294
+ label,
295
+ timestamp: entry.timestamp || new Date().toISOString(),
296
+ tokens,
297
+ };
298
+ }
299
+
300
+ function buildDiagnosticExample(entry) {
301
+ const label = deriveLabelFromDiagnostic(entry);
302
+ const diagnosis = entry.diagnosis || {};
303
+ const tokens = buildFeatureTokens([
304
+ 'kind:diagnostic',
305
+ entry.source ? `source:${entry.source}` : null,
306
+ diagnosis.rootCauseCategory ? `root:${diagnosis.rootCauseCategory}` : null,
307
+ diagnosis.criticalFailureStep ? `step:${diagnosis.criticalFailureStep}` : null,
308
+ ...pushableDiagnosisViolationTokens(diagnosis),
309
+ ...(entry.metadata && Array.isArray(entry.metadata.tags)
310
+ ? entry.metadata.tags.map((tag) => `tag:${String(tag).toLowerCase()}`)
311
+ : []),
312
+ ...(entry.metadata && entry.metadata.skill ? [`skill:${entry.metadata.skill}`] : []),
313
+ ...tokenizeText(entry.context || '', 10).map((token) => `ctx:${token}`),
314
+ ]);
315
+
316
+ if (!tokens.length) return null;
317
+ return {
318
+ id: entry.id || null,
319
+ source: 'diagnostic',
320
+ label,
321
+ timestamp: entry.timestamp || new Date().toISOString(),
322
+ tokens,
323
+ };
324
+ }
325
+
326
+ function buildExamplesFromFeedbackDir(feedbackDir) {
327
+ const resolvedDir = resolveFeedbackDir({ feedbackDir });
328
+ const feedbackEntries = readJSONL(path.join(resolvedDir, 'feedback-log.jsonl'));
329
+ const auditEntries = readJSONL(path.join(resolvedDir, 'audit-trail.jsonl'));
330
+ const diagnosticEntries = readJSONL(path.join(resolvedDir, 'diagnostic-log.jsonl'));
331
+
332
+ const examples = [];
333
+ const sourceCounts = { feedback: 0, audit: 0, diagnostic: 0 };
334
+
335
+ for (const entry of feedbackEntries) {
336
+ const example = buildFeedbackExample(entry);
337
+ if (!example) continue;
338
+ sourceCounts.feedback += 1;
339
+ examples.push(example);
340
+ }
341
+ for (const entry of auditEntries) {
342
+ const example = buildAuditExample(entry);
343
+ if (!example) continue;
344
+ sourceCounts.audit += 1;
345
+ examples.push(example);
346
+ }
347
+ for (const entry of diagnosticEntries) {
348
+ const example = buildDiagnosticExample(entry);
349
+ if (!example) continue;
350
+ sourceCounts.diagnostic += 1;
351
+ examples.push(example);
352
+ }
353
+
354
+ examples.sort((left, right) => {
355
+ return Date.parse(left.timestamp || 0) - Date.parse(right.timestamp || 0);
356
+ });
357
+
358
+ return {
359
+ examples,
360
+ sourceCounts,
361
+ };
362
+ }
363
+
364
+ function splitExamples(examples) {
365
+ if (examples.length < MIN_HOLDOUT_EXAMPLES * 2) {
366
+ return { train: examples.slice(), holdout: [] };
367
+ }
368
+ const holdoutSize = Math.max(MIN_HOLDOUT_EXAMPLES, Math.floor(examples.length * DEFAULT_HOLDOUT_RATIO));
369
+ const splitIndex = Math.max(MIN_TRAINING_EXAMPLES, examples.length - holdoutSize);
370
+ return {
371
+ train: examples.slice(0, splitIndex),
372
+ holdout: examples.slice(splitIndex),
373
+ };
374
+ }
375
+
376
+ function createEmptyModel() {
377
+ return {
378
+ version: 1,
379
+ modelType: 'multinomial_naive_bayes',
380
+ labels: LABELS.slice(),
381
+ exampleCount: 0,
382
+ labelCounts: Object.fromEntries(LABELS.map((label) => [label, 0])),
383
+ labelTokenTotals: Object.fromEntries(LABELS.map((label) => [label, 0])),
384
+ labelTokenCounts: Object.fromEntries(LABELS.map((label) => [label, {}])),
385
+ vocabularySize: 0,
386
+ metrics: {
387
+ trainingAccuracy: 0,
388
+ holdoutAccuracy: 0,
389
+ holdoutSize: 0,
390
+ },
391
+ sourceCounts: {},
392
+ updatedAt: null,
393
+ };
394
+ }
395
+
396
+ function fitNaiveBayes(examples) {
397
+ const model = createEmptyModel();
398
+ const vocabulary = new Set();
399
+
400
+ for (const example of examples) {
401
+ const label = LABELS.includes(example.label) ? example.label : 'warn';
402
+ model.exampleCount += 1;
403
+ model.labelCounts[label] += 1;
404
+
405
+ for (const token of example.tokens || []) {
406
+ const normalized = String(token || '').trim();
407
+ if (!normalized) continue;
408
+ vocabulary.add(normalized);
409
+ model.labelTokenCounts[label][normalized] = (model.labelTokenCounts[label][normalized] || 0) + 1;
410
+ model.labelTokenTotals[label] += 1;
411
+ }
412
+ }
413
+
414
+ model.vocabularySize = vocabulary.size;
415
+ return model;
416
+ }
417
+
418
+ function scoreExample(model, tokens) {
419
+ const totalExamples = Math.max(1, model.exampleCount || 0);
420
+ const raw = {};
421
+ const uniqueTokens = [...new Set(tokens || [])];
422
+
423
+ for (const label of LABELS) {
424
+ const labelCount = model.labelCounts[label] || 0;
425
+ const prior = Math.log((labelCount + 1) / (totalExamples + LABELS.length));
426
+ const counts = model.labelTokenCounts[label] || {};
427
+ let score = prior;
428
+ for (const token of uniqueTokens) {
429
+ const count = counts[token] || 0;
430
+ if (count > 0) {
431
+ score += Math.log((count + 1) / (labelCount + 1));
432
+ }
433
+ }
434
+ raw[label] = score;
435
+ }
436
+
437
+ const maxScore = Math.max(...Object.values(raw));
438
+ const exps = Object.fromEntries(Object.entries(raw).map(([label, score]) => [label, Math.exp(score - maxScore)]));
439
+ const total = Object.values(exps).reduce((sum, value) => sum + value, 0) || 1;
440
+ const probabilities = Object.fromEntries(Object.entries(exps).map(([label, value]) => [label, value / total]));
441
+ const ranked = [...LABELS].sort((left, right) => probabilities[right] - probabilities[left]);
442
+
443
+ return {
444
+ label: ranked[0],
445
+ confidence: Number((probabilities[ranked[0]] || 0).toFixed(4)),
446
+ probabilities: Object.fromEntries(ranked.map((label) => [label, Number((probabilities[label] || 0).toFixed(4))])),
447
+ };
448
+ }
449
+
450
+ function evaluateModel(model, examples) {
451
+ if (!Array.isArray(examples) || examples.length === 0) {
452
+ return {
453
+ accuracy: 0,
454
+ total: 0,
455
+ correct: 0,
456
+ labelMetrics: Object.fromEntries(LABELS.map((label) => [label, { total: 0, correct: 0, accuracy: 0 }])),
457
+ };
458
+ }
459
+
460
+ let correct = 0;
461
+ const labelMetrics = Object.fromEntries(LABELS.map((label) => [label, { total: 0, correct: 0, accuracy: 0 }]));
462
+
463
+ for (const example of examples) {
464
+ const prediction = scoreExample(model, example.tokens);
465
+ labelMetrics[example.label].total += 1;
466
+ if (prediction.label === example.label) {
467
+ correct += 1;
468
+ labelMetrics[example.label].correct += 1;
469
+ }
470
+ }
471
+
472
+ for (const label of LABELS) {
473
+ const metric = labelMetrics[label];
474
+ metric.accuracy = Number(safeRate(metric.correct, metric.total).toFixed(4));
475
+ }
476
+
477
+ return {
478
+ accuracy: Number(safeRate(correct, examples.length).toFixed(4)),
479
+ total: examples.length,
480
+ correct,
481
+ labelMetrics,
482
+ };
483
+ }
484
+
485
+ function summarizeTopTokens(model, limit = 5) {
486
+ const summary = {};
487
+ for (const label of LABELS) {
488
+ summary[label] = Object.entries(model.labelTokenCounts[label] || {})
489
+ .sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))
490
+ .slice(0, limit)
491
+ .map(([token, count]) => ({ token, count }));
492
+ }
493
+ return summary;
494
+ }
495
+
496
+ function trainInterventionPolicy(examples, options = {}) {
497
+ const split = splitExamples(examples);
498
+ const evaluationModel = fitNaiveBayes(split.train);
499
+ const deployedModel = fitNaiveBayes(examples);
500
+ const trainingMetrics = evaluateModel(evaluationModel, split.train);
501
+ const holdoutMetrics = evaluateModel(evaluationModel, split.holdout);
502
+
503
+ deployedModel.metrics = {
504
+ trainingAccuracy: trainingMetrics.accuracy,
505
+ holdoutAccuracy: holdoutMetrics.accuracy,
506
+ holdoutSize: holdoutMetrics.total,
507
+ trainingExamples: trainingMetrics.total,
508
+ };
509
+ deployedModel.labelMetrics = holdoutMetrics.total > 0 ? holdoutMetrics.labelMetrics : trainingMetrics.labelMetrics;
510
+ deployedModel.topTokens = summarizeTopTokens(deployedModel, options.topTokenLimit || 4);
511
+ deployedModel.updatedAt = new Date().toISOString();
512
+
513
+ return deployedModel;
514
+ }
515
+
516
+ function saveInterventionPolicy(model, feedbackDir) {
517
+ const targetPath = modelPathFor(feedbackDir);
518
+ fs.mkdirSync(path.dirname(targetPath), { recursive: true });
519
+ fs.writeFileSync(targetPath, `${JSON.stringify(model, null, 2)}\n`);
520
+ return targetPath;
521
+ }
522
+
523
+ function loadInterventionPolicy(feedbackDir) {
524
+ const targetPath = modelPathFor(feedbackDir);
525
+ if (!fs.existsSync(targetPath)) return null;
526
+ return maybeReadJson(targetPath);
527
+ }
528
+
529
+ function trainAndPersistInterventionPolicy(feedbackDir, options = {}) {
530
+ const resolvedDir = resolveFeedbackDir({ feedbackDir });
531
+ const { examples, sourceCounts } = buildExamplesFromFeedbackDir(resolvedDir);
532
+ const model = trainInterventionPolicy(examples, options);
533
+ model.sourceCounts = sourceCounts;
534
+ const modelPath = saveInterventionPolicy(model, resolvedDir);
535
+ return {
536
+ model,
537
+ modelPath,
538
+ examples,
539
+ sourceCounts,
540
+ };
541
+ }
542
+
543
+ function buildRuntimeCandidate(params = {}) {
544
+ const affectedFiles = Array.isArray(params.affectedFiles) ? params.affectedFiles : [];
545
+ const blockers = params.integrity && Array.isArray(params.integrity.blockers) ? params.integrity.blockers : [];
546
+ const memoryGuard = params.memoryGuard || {};
547
+ const blastRadius = params.blastRadius || {};
548
+ const protectedSurface = params.protectedSurface || {};
549
+ const tokens = buildFeatureTokens([
550
+ 'kind:runtime',
551
+ `tool:${params.toolName || 'unknown'}`,
552
+ params.riskBand ? `risk:${params.riskBand}` : null,
553
+ blastRadius.severity ? `blast:${blastRadius.severity}` : null,
554
+ memoryGuard.mode ? `memory:${memoryGuard.mode}` : null,
555
+ params.taskScopeViolation ? `scope:${params.taskScopeViolation.reasonCode || 'violation'}` : null,
556
+ blastRadius.surfaceCount >= 3 ? 'shape:multi_surface' : blastRadius.surfaceCount >= 2 ? 'shape:two_surface' : 'shape:single_surface',
557
+ affectedFiles.length >= 4 ? 'shape:multi_file' : affectedFiles.length > 0 ? 'shape:small_file_set' : 'shape:no_files',
558
+ (protectedSurface.unapprovedProtectedFiles || []).length > 0 ? 'protected:unapproved' : null,
559
+ (blastRadius.releaseSensitiveFiles || []).length > 0 ? 'release:sensitive' : null,
560
+ ...extractCommandTokens(params.command || ''),
561
+ ...affectedFiles.flatMap((filePath) => extractFileTokens(filePath)),
562
+ ...blockers.slice(0, 6).map((blocker) => `blocker:${blocker.code || 'unknown'}`),
563
+ ...tokenizeText(memoryGuard.reason || '', 6).map((token) => `memorytok:${token}`),
564
+ ]);
565
+
566
+ return {
567
+ tokens,
568
+ metadata: {
569
+ toolName: params.toolName || 'unknown',
570
+ affectedFiles,
571
+ blockerCount: blockers.length,
572
+ },
573
+ };
574
+ }
575
+
576
+ function getInterventionRecommendation(params = {}, options = {}) {
577
+ const resolvedDir = resolveFeedbackDir({ feedbackDir: options.feedbackDir || params.feedbackDir });
578
+ let model = options.model || loadInterventionPolicy(resolvedDir);
579
+ const candidate = options.candidate || buildRuntimeCandidate(params);
580
+
581
+ if (!model) {
582
+ const bootstrapped = buildExamplesFromFeedbackDir(resolvedDir);
583
+ if (bootstrapped.examples.length >= MIN_TRAINING_EXAMPLES) {
584
+ const trained = trainAndPersistInterventionPolicy(resolvedDir);
585
+ model = trained.model;
586
+ }
587
+ }
588
+
589
+ if (!model || Number(model.exampleCount || 0) < MIN_TRAINING_EXAMPLES) {
590
+ return {
591
+ enabled: false,
592
+ reason: 'insufficient_training_examples',
593
+ exampleCount: Number(model && model.exampleCount || 0),
594
+ candidate,
595
+ };
596
+ }
597
+
598
+ const prediction = scoreExample(model, candidate.tokens);
599
+ return {
600
+ enabled: true,
601
+ candidate,
602
+ prediction,
603
+ metrics: model.metrics || {},
604
+ topTokens: model.topTokens && model.topTokens[prediction.label]
605
+ ? model.topTokens[prediction.label]
606
+ : [],
607
+ updatedAt: model.updatedAt || null,
608
+ exampleCount: model.exampleCount || 0,
609
+ };
610
+ }
611
+
612
+ function computeDailySeries(examples, dayCount = 14) {
613
+ const today = new Date();
614
+ today.setHours(0, 0, 0, 0);
615
+ const byDay = new Map();
616
+ for (const example of examples) {
617
+ const dayKey = toLocalDayKey(example.timestamp);
618
+ if (!dayKey) continue;
619
+ if (!byDay.has(dayKey)) {
620
+ byDay.set(dayKey, Object.fromEntries([...LABELS, 'total'].map((label) => [label, 0])));
621
+ }
622
+ const record = byDay.get(dayKey);
623
+ record[example.label] += 1;
624
+ record.total += 1;
625
+ }
626
+
627
+ const days = [];
628
+ for (let offset = dayCount - 1; offset >= 0; offset -= 1) {
629
+ const day = new Date(today);
630
+ day.setDate(today.getDate() - offset);
631
+ const dayKey = toLocalDayKey(day);
632
+ const record = byDay.get(dayKey) || Object.fromEntries([...LABELS, 'total'].map((label) => [label, 0]));
633
+ days.push({ dayKey, ...record });
634
+ }
635
+ return days;
636
+ }
637
+
638
+ function getInterventionPolicySummary(feedbackDir, options = {}) {
639
+ const resolvedDir = resolveFeedbackDir({ feedbackDir });
640
+ const { examples, sourceCounts } = buildExamplesFromFeedbackDir(resolvedDir);
641
+ const model = loadInterventionPolicy(resolvedDir) || trainInterventionPolicy(examples);
642
+ const labelCounts = Object.assign({}, model.labelCounts || {});
643
+ const daily = computeDailySeries(examples, options.dayCount || 14);
644
+ const recent = daily.slice(-7).reduce((acc, day) => {
645
+ for (const label of LABELS) {
646
+ acc[label] += day[label] || 0;
647
+ }
648
+ acc.total += day.total || 0;
649
+ return acc;
650
+ }, Object.fromEntries([...LABELS, 'total'].map((label) => [label, 0])));
651
+
652
+ return {
653
+ enabled: Number(model.exampleCount || 0) >= MIN_TRAINING_EXAMPLES,
654
+ modelType: model.modelType,
655
+ exampleCount: model.exampleCount || 0,
656
+ updatedAt: model.updatedAt || null,
657
+ labelCounts,
658
+ metrics: model.metrics || {},
659
+ sourceCounts,
660
+ topTokens: model.topTokens || {},
661
+ daily,
662
+ recent,
663
+ nonAllowRate: Number(safeRate(
664
+ (recent.recall || 0) + (recent.verify || 0) + (recent.warn || 0) + (recent.deny || 0),
665
+ recent.total || 0
666
+ ).toFixed(4)),
667
+ };
668
+ }
669
+
670
+ module.exports = {
671
+ LABELS,
672
+ MIN_TRAINING_EXAMPLES,
673
+ buildExamplesFromFeedbackDir,
674
+ buildRuntimeCandidate,
675
+ createEmptyModel,
676
+ getInterventionPolicySummary,
677
+ getInterventionRecommendation,
678
+ loadInterventionPolicy,
679
+ modelPathFor,
680
+ predictIntervention: scoreExample,
681
+ readJSONL,
682
+ saveInterventionPolicy,
683
+ trainAndPersistInterventionPolicy,
684
+ trainInterventionPolicy,
685
+ };
686
+
687
+ function isDirectExecution() {
688
+ if (!Array.isArray(process.argv) || !process.argv[1]) return false;
689
+ return path.resolve(process.argv[1]) === __filename;
690
+ }
691
+
692
+ if (isDirectExecution()) {
693
+ const feedbackDir = process.argv[2] || resolveFeedbackDir();
694
+ const { modelPath, model } = trainAndPersistInterventionPolicy(feedbackDir);
695
+ process.stdout.write(`${JSON.stringify({ modelPath, model }, null, 2)}\n`);
696
+ }