thumbgate 1.5.0 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,354 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /**
5
+ * Decision Trace — full observability for gate evaluations.
6
+ *
7
+ * Inspired by Ethan Mollick's observation that operators need to *see* what
8
+ * the agent was thinking when it made a decision. ThumbGate already captures
9
+ * what was blocked; Decision Trace adds:
10
+ *
11
+ * 1. Full audit of every evaluation (passes, blocks, AND near-misses)
12
+ * 2. Near-miss detection: constraints that almost matched
13
+ * 3. Session trace summaries: single-glance safety posture view
14
+ *
15
+ * Near-miss heuristic: extract literal tokens from a regex deny pattern,
16
+ * count how many appear in the input. If >50% match but the full regex
17
+ * doesn't, flag as near-miss.
18
+ */
19
+
20
+ const crypto = require('node:crypto');
21
+ const path = require('node:path');
22
+ const { readJsonl, appendJsonl } = require('./fs-utils');
23
+ const { resolveFeedbackDir } = require('./feedback-paths');
24
+ const {
25
+ evaluateConstraints,
26
+ evaluateInvariants,
27
+ loadSpecDir,
28
+ } = require('./spec-gate');
29
+
30
+ const TRACE_FILE = 'decision-trace.jsonl';
31
+ const NEAR_MISS_THRESHOLD = 0.5;
32
+
33
+ // ---------------------------------------------------------------------------
34
+ // Near-Miss Detection
35
+ // ---------------------------------------------------------------------------
36
+
37
+ /**
38
+ * Extract literal tokens from a regex pattern.
39
+ * Strips metacharacters and splits on boundaries to find human-readable tokens.
40
+ */
41
+ function extractLiteralTokens(pattern) {
42
+ // Remove common regex metacharacters and quantifiers
43
+ const cleaned = pattern
44
+ .replace(/\\[sdwbSDWB]/g, ' ') // char classes
45
+ .replace(/[.*+?^${}()|[\]\\]/g, ' ') // metacharacters
46
+ .replace(/\s+/g, ' ')
47
+ .trim();
48
+
49
+ return cleaned
50
+ .split(/\s+/)
51
+ .filter((t) => t.length >= 2) // ignore single chars
52
+ .map((t) => t.toLowerCase());
53
+ }
54
+
55
+ /**
56
+ * Compute near-miss score for a constraint against input text.
57
+ * Returns { isNearMiss, score, matchedTokens, totalTokens }.
58
+ */
59
+ function computeNearMiss(constraint, inputText) {
60
+ const tokens = extractLiteralTokens(constraint.deny);
61
+ if (tokens.length === 0) {
62
+ return { isNearMiss: false, score: 0, matchedTokens: 0, totalTokens: 0 };
63
+ }
64
+
65
+ const lower = String(inputText).toLowerCase();
66
+ let matched = 0;
67
+ for (const token of tokens) {
68
+ if (lower.includes(token)) matched++;
69
+ }
70
+
71
+ const score = matched / tokens.length;
72
+ return {
73
+ isNearMiss: score >= NEAR_MISS_THRESHOLD && score < 1.0,
74
+ score: Math.round(score * 100) / 100,
75
+ matchedTokens: matched,
76
+ totalTokens: tokens.length,
77
+ };
78
+ }
79
+
80
+ // ---------------------------------------------------------------------------
81
+ // Trace Evaluation
82
+ // ---------------------------------------------------------------------------
83
+
84
+ /**
85
+ * Build the combined input text used for near-miss detection.
86
+ */
87
+ function buildCombinedInput({ tool, command, content } = {}) {
88
+ return [command, content, tool].filter(Boolean).join(' ');
89
+ }
90
+
91
+ /**
92
+ * Evaluate specs with full trace: passes, blocks, and near-misses.
93
+ */
94
+ function traceEvaluation(specs, context = {}) {
95
+ const traceId = `trace_${Date.now()}_${crypto.randomBytes(4).toString('hex')}`;
96
+ const timestamp = new Date().toISOString();
97
+ const combinedInput = buildCombinedInput(context);
98
+ const results = [];
99
+
100
+ for (const spec of specs) {
101
+ const constraintResults = evaluateConstraints(spec, context);
102
+ const invariantResults = evaluateInvariants(spec, context);
103
+
104
+ // Annotate constraint results with near-miss info
105
+ for (const cr of constraintResults) {
106
+ const constraint = spec.constraints.find((c) => c.id === cr.constraintId);
107
+ let nearMiss = { isNearMiss: false, score: 0, matchedTokens: 0, totalTokens: 0 };
108
+
109
+ if (cr.passed && constraint) {
110
+ // Only compute near-miss for constraints that passed (weren't blocked)
111
+ nearMiss = computeNearMiss(constraint, combinedInput);
112
+ }
113
+
114
+ results.push({
115
+ ...cr,
116
+ nearMiss: nearMiss.isNearMiss,
117
+ nearMissScore: nearMiss.score,
118
+ nearMissDetail: nearMiss.isNearMiss ? nearMiss : null,
119
+ });
120
+ }
121
+
122
+ // Invariant results (no near-miss concept for invariants)
123
+ for (const ir of invariantResults) {
124
+ results.push({
125
+ ...ir,
126
+ nearMiss: false,
127
+ nearMissScore: 0,
128
+ nearMissDetail: null,
129
+ });
130
+ }
131
+ }
132
+
133
+ const blocked = results.filter((r) => !r.passed);
134
+ const nearMisses = results.filter((r) => r.nearMiss);
135
+ const passed = results.filter((r) => r.passed && !r.nearMiss);
136
+
137
+ return {
138
+ traceId,
139
+ timestamp,
140
+ allowed: blocked.length === 0,
141
+ results,
142
+ blocked,
143
+ nearMisses,
144
+ passed,
145
+ counts: {
146
+ total: results.length,
147
+ blocked: blocked.length,
148
+ nearMiss: nearMisses.length,
149
+ passed: passed.length,
150
+ },
151
+ context: {
152
+ tool: context.tool || null,
153
+ command: truncate(context.command, 200),
154
+ action: truncate(context.action, 200),
155
+ },
156
+ };
157
+ }
158
+
159
+ // ---------------------------------------------------------------------------
160
+ // Trace Persistence
161
+ // ---------------------------------------------------------------------------
162
+
163
+ function getTracePath({ feedbackDir } = {}) {
164
+ const dir = feedbackDir || resolveFeedbackDir();
165
+ return path.join(dir, TRACE_FILE);
166
+ }
167
+
168
+ function recordTrace(trace, options = {}) {
169
+ const entry = {
170
+ traceId: trace.traceId,
171
+ timestamp: trace.timestamp,
172
+ allowed: trace.allowed,
173
+ counts: trace.counts,
174
+ blocked: trace.blocked.map(summarizeResult),
175
+ nearMisses: trace.nearMisses.map(summarizeResult),
176
+ context: trace.context,
177
+ };
178
+ appendJsonl(getTracePath(options), entry);
179
+ return entry;
180
+ }
181
+
182
+ function loadTraces(options = {}) {
183
+ return readJsonl(getTracePath(options));
184
+ }
185
+
186
+ function summarizeResult(r) {
187
+ return {
188
+ specName: r.specName,
189
+ id: r.constraintId || r.invariantId,
190
+ type: r.type,
191
+ reason: r.reason,
192
+ severity: r.severity,
193
+ nearMissScore: r.nearMissScore || 0,
194
+ };
195
+ }
196
+
197
+ // ---------------------------------------------------------------------------
198
+ // Session Trace Summary
199
+ // ---------------------------------------------------------------------------
200
+
201
+ /**
202
+ * Summarize all traces from a session into a single-glance safety posture.
203
+ */
204
+ function summarizeSessionTraces(traces) {
205
+ let totalEvaluations = traces.length;
206
+ let totalChecks = 0;
207
+ let totalBlocked = 0;
208
+ let totalNearMisses = 0;
209
+ let totalPassed = 0;
210
+
211
+ const blocksBySpec = new Map();
212
+ const blocksByConstraint = new Map();
213
+ const nearMissByConstraint = new Map();
214
+
215
+ for (const trace of traces) {
216
+ const counts = trace.counts || {};
217
+ totalChecks += counts.total || 0;
218
+ totalBlocked += counts.blocked || 0;
219
+ totalNearMisses += counts.nearMiss || 0;
220
+ totalPassed += counts.passed || 0;
221
+
222
+ for (const block of trace.blocked || []) {
223
+ const specKey = block.specName || 'unknown';
224
+ blocksBySpec.set(specKey, (blocksBySpec.get(specKey) || 0) + 1);
225
+ const cKey = block.id || 'unknown';
226
+ blocksByConstraint.set(cKey, (blocksByConstraint.get(cKey) || 0) + 1);
227
+ }
228
+
229
+ for (const nm of trace.nearMisses || []) {
230
+ const cKey = nm.id || 'unknown';
231
+ const existing = nearMissByConstraint.get(cKey) || { count: 0, maxScore: 0 };
232
+ existing.count += 1;
233
+ existing.maxScore = Math.max(existing.maxScore, nm.nearMissScore || 0);
234
+ nearMissByConstraint.set(cKey, existing);
235
+ }
236
+ }
237
+
238
+ return {
239
+ totalEvaluations,
240
+ totalChecks,
241
+ totalBlocked,
242
+ totalNearMisses,
243
+ totalPassed,
244
+ blockRate: totalChecks > 0 ? Math.round((totalBlocked / totalChecks) * 100) : 0,
245
+ nearMissRate: totalChecks > 0 ? Math.round((totalNearMisses / totalChecks) * 100) : 0,
246
+ safetyPosture: computeSafetyPosture(totalBlocked, totalNearMisses, totalChecks),
247
+ topBlockedSpecs: mapToSorted(blocksBySpec, 'name', 'count'),
248
+ topBlockedConstraints: mapToSorted(blocksByConstraint, 'id', 'count'),
249
+ topNearMisses: Array.from(nearMissByConstraint.entries())
250
+ .sort(([, a], [, b]) => b.count - a.count)
251
+ .slice(0, 10)
252
+ .map(([id, data]) => ({ id, count: data.count, maxScore: data.maxScore })),
253
+ };
254
+ }
255
+
256
+ /**
257
+ * Format a trace summary as human-readable text.
258
+ */
259
+ function formatTraceSummary(summary) {
260
+ const lines = [];
261
+ lines.push(`Safety Posture: ${summary.safetyPosture.toUpperCase()}`);
262
+ lines.push(`Evaluations: ${summary.totalEvaluations} | Checks: ${summary.totalChecks}`);
263
+ lines.push(`Blocked: ${summary.totalBlocked} (${summary.blockRate}%) | Near-Misses: ${summary.totalNearMisses} (${summary.nearMissRate}%) | Passed: ${summary.totalPassed}`);
264
+
265
+ if (summary.topBlockedConstraints.length > 0) {
266
+ lines.push('');
267
+ lines.push('Top Blocked:');
268
+ for (const c of summary.topBlockedConstraints) {
269
+ lines.push(` - ${c.id}: ${c.count}x`);
270
+ }
271
+ }
272
+
273
+ if (summary.topNearMisses.length > 0) {
274
+ lines.push('');
275
+ lines.push('Top Near-Misses:');
276
+ for (const nm of summary.topNearMisses) {
277
+ lines.push(` - ${nm.id}: ${nm.count}x (max score: ${nm.maxScore})`);
278
+ }
279
+ }
280
+
281
+ return lines.join('\n');
282
+ }
283
+
284
+ // ---------------------------------------------------------------------------
285
+ // Helpers
286
+ // ---------------------------------------------------------------------------
287
+
288
+ function computeSafetyPosture(blocked, nearMisses, total) {
289
+ if (total === 0) return 'unknown';
290
+ if (blocked > 0) return 'critical';
291
+ if (nearMisses > 0) return 'cautious';
292
+ return 'clean';
293
+ }
294
+
295
+ function mapToSorted(map, keyName, valueName) {
296
+ return Array.from(map.entries())
297
+ .sort(([, a], [, b]) => b - a)
298
+ .slice(0, 10)
299
+ .map(([k, v]) => ({ [keyName]: k, [valueName]: v }));
300
+ }
301
+
302
+ function truncate(value, maxLength) {
303
+ if (value === undefined || value === null) return null;
304
+ const text = String(value).trim();
305
+ return text ? text.slice(0, maxLength) : null;
306
+ }
307
+
308
+ // ---------------------------------------------------------------------------
309
+ // CLI
310
+ // ---------------------------------------------------------------------------
311
+
312
+ function isCliInvocation(argv = process.argv) {
313
+ const invokedPath = argv[1];
314
+ return invokedPath ? path.resolve(invokedPath) === __filename : false;
315
+ }
316
+
317
+ if (isCliInvocation()) {
318
+ const command = process.argv[2] || 'summary';
319
+
320
+ if (command === 'summary') {
321
+ const traces = loadTraces();
322
+ const summary = summarizeSessionTraces(traces);
323
+ console.log(formatTraceSummary(summary));
324
+ } else if (command === 'json') {
325
+ const traces = loadTraces();
326
+ const summary = summarizeSessionTraces(traces);
327
+ console.log(JSON.stringify(summary, null, 2));
328
+ } else if (command === 'eval') {
329
+ // Evaluate current specs against a test command
330
+ const testCommand = process.argv[3] || '';
331
+ const specs = loadSpecDir();
332
+ const trace = traceEvaluation(specs, { command: testCommand, action: testCommand });
333
+ console.log(JSON.stringify({
334
+ allowed: trace.allowed,
335
+ counts: trace.counts,
336
+ blocked: trace.blocked.map(summarizeResult),
337
+ nearMisses: trace.nearMisses.map(summarizeResult),
338
+ }, null, 2));
339
+ } else {
340
+ console.error(`Unknown command: ${command}. Use: summary, json, eval`);
341
+ process.exit(1);
342
+ }
343
+ }
344
+
345
+ module.exports = {
346
+ NEAR_MISS_THRESHOLD,
347
+ computeNearMiss,
348
+ extractLiteralTokens,
349
+ formatTraceSummary,
350
+ loadTraces,
351
+ recordTrace,
352
+ summarizeSessionTraces,
353
+ traceEvaluation,
354
+ };
@@ -1393,8 +1393,6 @@ function analyzeFeedback(logPath) {
1393
1393
  let totalNegative = 0;
1394
1394
 
1395
1395
  for (const entry of entries) {
1396
- if (isAuditTrailEntry(entry)) continue;
1397
-
1398
1396
  if (entry.signal === 'positive') totalPositive++;
1399
1397
  if (entry.signal === 'negative') totalNegative++;
1400
1398
 
@@ -1428,8 +1426,7 @@ function analyzeFeedback(logPath) {
1428
1426
 
1429
1427
  const total = totalPositive + totalNegative;
1430
1428
  const approvalRate = total > 0 ? Math.round((totalPositive / total) * 1000) / 1000 : 0;
1431
- const realEntries = entries.filter((entry) => !isAuditTrailEntry(entry));
1432
- const recent = realEntries.slice(-20);
1429
+ const recent = entries.slice(-20);
1433
1430
  const recentPos = recent.filter((e) => e.signal === 'positive').length;
1434
1431
  const recentRate = recent.length > 0 ? Math.round((recentPos / recent.length) * 1000) / 1000 : 0;
1435
1432
 
@@ -1438,7 +1435,7 @@ function analyzeFeedback(logPath) {
1438
1435
  const SEVEN_DAYS_MS = 7 * 24 * 60 * 60 * 1000;
1439
1436
  const THIRTY_DAYS_MS = 30 * 24 * 60 * 60 * 1000;
1440
1437
  const windowStats = { '7d': { total: 0, positive: 0 }, '30d': { total: 0, positive: 0 } };
1441
- for (const entry of realEntries) {
1438
+ for (const entry of entries) {
1442
1439
  const ts = entry.timestamp ? new Date(entry.timestamp).getTime() : 0;
1443
1440
  const age = now - ts;
1444
1441
  if (age <= SEVEN_DAYS_MS) {
@@ -1701,12 +1698,11 @@ function writePreventionRules(filePath, minOccurrences = 2) {
1701
1698
  function feedbackSummary(recentN = 20, options = {}) {
1702
1699
  const { FEEDBACK_LOG_PATH } = getFeedbackPaths(options);
1703
1700
  const entries = readJSONL(FEEDBACK_LOG_PATH);
1704
- const realEntries = entries.filter((entry) => !isAuditTrailEntry(entry));
1705
- if (realEntries.length === 0) {
1701
+ if (entries.length === 0) {
1706
1702
  return '## Feedback Summary\nNo feedback recorded yet.';
1707
1703
  }
1708
1704
 
1709
- const recent = realEntries.slice(-recentN);
1705
+ const recent = entries.slice(-recentN);
1710
1706
  const positive = recent.filter((e) => e.signal === 'positive').length;
1711
1707
  const negative = recent.filter((e) => e.signal === 'negative').length;
1712
1708
  const pct = Math.round((positive / recent.length) * 100);