thumbgate 1.22.0 → 1.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +1 -0
- package/adapters/chatgpt/openapi.yaml +10 -0
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +1 -1
- package/adapters/opencode/opencode.json +1 -1
- package/bin/cli.js +194 -30
- package/openapi/openapi.yaml +10 -0
- package/package.json +13 -3
- package/public/agents-cost-savings.html +151 -0
- package/public/ai-malpractice-prevention.html +183 -0
- package/public/codex-plugin.html +1 -1
- package/public/index.html +3 -3
- package/public/numbers.html +2 -2
- package/public/pricing.html +1 -1
- package/scripts/cli-telemetry.js +6 -1
- package/scripts/gates-engine.js +119 -6
- package/scripts/meta-agent-loop.js +32 -0
- package/scripts/pro-local-dashboard.js +4 -4
- package/scripts/rate-limiter.js +7 -1
- package/scripts/self-healing-check.js +193 -0
- package/scripts/silent-failure-cluster.js +512 -0
- package/scripts/telemetry-analytics.js +38 -0
- package/src/api/server.js +252 -36
|
@@ -0,0 +1,512 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Silent-Failure Clustering — Unsupervised candidate source for the meta-agent loop
|
|
6
|
+
*
|
|
7
|
+
* Off by default. Enabled with: THUMBGATE_SILENT_FAILURE_CLUSTERING=1
|
|
8
|
+
*
|
|
9
|
+
* Problem: ThumbGate's HITL loop only learns from explicit thumbs-down. Tool calls
|
|
10
|
+
* that fail without user feedback (exit_code != 0, regex-matched error in output,
|
|
11
|
+
* agent silently recovers) are invisible to `auto-promote-gates.js`. This module
|
|
12
|
+
* mines those silent failures from the JSONL conversation logs, clusters them by
|
|
13
|
+
* (tool, normalized-arg-signature), and emits candidate prevention rules that
|
|
14
|
+
* flow through the EXISTING meta-agent-loop fp-rate eval — never bypassed.
|
|
15
|
+
*
|
|
16
|
+
* Pipeline:
|
|
17
|
+
* 1. Reuse `discoverConversationLogs` from `self-distill-agent.js` to find logs
|
|
18
|
+
* 2. Read each JSONL line; extract tool calls (Bash, Edit, Write, …) with their args
|
|
19
|
+
* and adjacent tool_result entries that carry exit_code / error text
|
|
20
|
+
* 3. Filter to "failed" calls (exit_code != 0 OR matches one of ERROR_PATTERNS,
|
|
21
|
+
* mirroring `self-distill-agent.js`)
|
|
22
|
+
* 4. Drop any call whose timestamp is within ±5min of a feedback-log entry —
|
|
23
|
+
* those are already in the HITL loop and would double-count
|
|
24
|
+
* 5. Normalize args: absolute paths → `<HOME>/…`, redact secrets per the
|
|
25
|
+
* canonical regex set in `~/.claude/hooks/daily-log-append.sh`
|
|
26
|
+
* 6. Cluster by exact tuple `(tool, normalized-arg-signature)`, min size 3
|
|
27
|
+
* 7. Emit each cluster as a candidate with `origin: 'silent-failure-cluster'`
|
|
28
|
+
* so meta-agent-loop tags it for downstream precision measurement
|
|
29
|
+
*
|
|
30
|
+
* Known limitations (locked in by the spec):
|
|
31
|
+
* - Only worthwhile on workspaces generating ≥ 50 tool calls/day. Surfaces
|
|
32
|
+
* "insufficient data, skipped" cleanly rather than emitting noise.
|
|
33
|
+
* - Cluster ≠ bad; we rely on the exit_code / ERROR_PATTERNS filter to make
|
|
34
|
+
* a cluster a *failure* cluster.
|
|
35
|
+
* - No drift detection. If tools change, old clusters pollute. Out of scope for v1.
|
|
36
|
+
*/
|
|
37
|
+
|
|
38
|
+
const fs = require('fs');
|
|
39
|
+
const os = require('os');
|
|
40
|
+
const path = require('path');
|
|
41
|
+
|
|
42
|
+
const {
|
|
43
|
+
discoverConversationLogs,
|
|
44
|
+
} = require('./self-distill-agent');
|
|
45
|
+
|
|
46
|
+
// Mirrors self-distill-agent.js ERROR_PATTERNS exactly. self-distill does NOT
|
|
47
|
+
// export this constant; duplicating here is the smallest-surface choice that
|
|
48
|
+
// keeps both modules independently testable. If self-distill ever exports it,
|
|
49
|
+
// switch to the import.
|
|
50
|
+
const ERROR_PATTERNS = [
|
|
51
|
+
/\bError:/i,
|
|
52
|
+
/\bFAIL\b/,
|
|
53
|
+
/\bnot ok\b/,
|
|
54
|
+
/exit code\s*(?:!=\s*0|[1-9]\d*)/i,
|
|
55
|
+
/\bERROR\b/,
|
|
56
|
+
/\bTypeError\b/,
|
|
57
|
+
/\bReferenceError\b/,
|
|
58
|
+
/\bSyntaxError\b/,
|
|
59
|
+
/\bcommand failed\b/i,
|
|
60
|
+
/\bexited with\s+[1-9]/i,
|
|
61
|
+
];
|
|
62
|
+
|
|
63
|
+
const HOME = process.env.HOME || process.env.USERPROFILE || os.homedir() || '';
|
|
64
|
+
|
|
65
|
+
const MIN_CLUSTER_SIZE = 3;
|
|
66
|
+
const MIN_DAILY_CALLS_FOR_USEFUL_CLUSTERING = 50;
|
|
67
|
+
const FEEDBACK_PROXIMITY_WINDOW_MS = 5 * 60 * 1000; // ±5 min
|
|
68
|
+
|
|
69
|
+
// ---------------------------------------------------------------------------
|
|
70
|
+
// Redaction — keep in sync with ~/.claude/hooks/daily-log-append.sh
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
const SECRET_PATTERNS = [
|
|
74
|
+
// Stripe + GitHub + Slack + AWS + Google + npm + Anthropic keys
|
|
75
|
+
{
|
|
76
|
+
re: /(sk_live_|sk_test_|rk_live_|rk_test_|ghp_|gho_|ghu_|ghs_|ghr_|github_pat_|xoxb-|xoxp-|xapp-|AKIA|AIza|npm_|sk-ant-[A-Za-z0-9]*-?|sk-proj-|sk-svcacct-)[A-Za-z0-9_-]{8,}/g,
|
|
77
|
+
replacement: '[REDACTED]',
|
|
78
|
+
},
|
|
79
|
+
// JWT (3 base64url segments)
|
|
80
|
+
{ re: /eyJ[A-Za-z0-9_-]{20,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}/g, replacement: '[REDACTED-JWT]' },
|
|
81
|
+
// Slack webhook
|
|
82
|
+
{ re: /https:\/\/hooks\.slack\.com\/services\/[A-Z0-9/]+/g, replacement: '[REDACTED-SLACK-WEBHOOK]' },
|
|
83
|
+
// Private key header
|
|
84
|
+
{ re: /-----BEGIN [A-Z ]*PRIVATE KEY-----/g, replacement: '[REDACTED-PRIVATE-KEY-HEADER]' },
|
|
85
|
+
];
|
|
86
|
+
|
|
87
|
+
function redactSecrets(text) {
|
|
88
|
+
let out = String(text == null ? '' : text);
|
|
89
|
+
for (const { re, replacement } of SECRET_PATTERNS) {
|
|
90
|
+
out = out.replace(re, replacement);
|
|
91
|
+
}
|
|
92
|
+
return out;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// ---------------------------------------------------------------------------
|
|
96
|
+
// Path normalization
|
|
97
|
+
// ---------------------------------------------------------------------------
|
|
98
|
+
|
|
99
|
+
function normalizePaths(text) {
|
|
100
|
+
let out = String(text == null ? '' : text);
|
|
101
|
+
if (HOME) {
|
|
102
|
+
// Replace exact HOME prefix
|
|
103
|
+
out = out.split(HOME).join('<HOME>');
|
|
104
|
+
}
|
|
105
|
+
// Replace generic /Users/<name>/... and /home/<name>/... that don't match this HOME
|
|
106
|
+
out = out.replace(/\/Users\/[^/\s"']+/g, '<HOME>');
|
|
107
|
+
out = out.replace(/\/home\/[^/\s"']+/g, '<HOME>');
|
|
108
|
+
out = out.replace(/\/tmp\/[A-Za-z0-9._-]+/g, '/tmp/<X>'); // NOSONAR — regex on strings, not filesystem
|
|
109
|
+
out = out.replace(/\/private\/tmp\/[A-Za-z0-9._-]+/g, '/tmp/<X>'); // NOSONAR
|
|
110
|
+
return out;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function normalizeForSignature(value) {
|
|
114
|
+
// Order matters: redact first (some secrets contain path-ish chars), then paths.
|
|
115
|
+
return normalizePaths(redactSecrets(value));
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// ---------------------------------------------------------------------------
|
|
119
|
+
// JSONL parsing
|
|
120
|
+
// ---------------------------------------------------------------------------
|
|
121
|
+
|
|
122
|
+
function readJsonlSafe(filePath) {
|
|
123
|
+
if (!filePath || !fs.existsSync(filePath)) return [];
|
|
124
|
+
try {
|
|
125
|
+
const raw = fs.readFileSync(filePath, 'utf-8');
|
|
126
|
+
if (!raw.trim()) return [];
|
|
127
|
+
return raw
|
|
128
|
+
.split('\n')
|
|
129
|
+
.filter(Boolean)
|
|
130
|
+
.map((line) => {
|
|
131
|
+
try {
|
|
132
|
+
return JSON.parse(line);
|
|
133
|
+
} catch {
|
|
134
|
+
return null;
|
|
135
|
+
}
|
|
136
|
+
})
|
|
137
|
+
.filter(Boolean);
|
|
138
|
+
} catch {
|
|
139
|
+
return [];
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// ---------------------------------------------------------------------------
|
|
144
|
+
// Tool-call + failure extraction
|
|
145
|
+
// ---------------------------------------------------------------------------
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Extract tool-call records from a parsed transcript entry.
|
|
149
|
+
*
|
|
150
|
+
* Supports two shapes:
|
|
151
|
+
* (A) Claude Code transcript format:
|
|
152
|
+
* { type:"assistant", message:{ content:[ { type:"tool_use", name, input, id } ] } }
|
|
153
|
+
* { type:"user", message:{ content:[ { type:"tool_result", tool_use_id, content, is_error } ] }, toolUseResult: {...} }
|
|
154
|
+
* (B) Simplified test fixture format:
|
|
155
|
+
* { type:"tool_call", tool, args, timestamp }
|
|
156
|
+
* { type:"tool_result", tool_use_id, exit_code, output, timestamp }
|
|
157
|
+
*
|
|
158
|
+
* Both shapes are normalized to:
|
|
159
|
+
* { kind:'call', tool, args, callId, timestamp }
|
|
160
|
+
* { kind:'result', callId, exitCode, output, isError, timestamp }
|
|
161
|
+
*/
|
|
162
|
+
function extractToolEvents(entry) {
|
|
163
|
+
if (!entry || typeof entry !== 'object') return [];
|
|
164
|
+
const events = [];
|
|
165
|
+
const ts = entry.timestamp || entry.ts || null;
|
|
166
|
+
|
|
167
|
+
// Shape (B) — test fixture / simplified
|
|
168
|
+
if (entry.type === 'tool_call' && entry.tool) {
|
|
169
|
+
events.push({
|
|
170
|
+
kind: 'call',
|
|
171
|
+
tool: String(entry.tool),
|
|
172
|
+
args: entry.args || entry.input || {},
|
|
173
|
+
callId: entry.callId || entry.id || null,
|
|
174
|
+
timestamp: ts,
|
|
175
|
+
});
|
|
176
|
+
return events;
|
|
177
|
+
}
|
|
178
|
+
if (entry.type === 'tool_result' && (entry.tool_use_id || entry.callId)) {
|
|
179
|
+
events.push({
|
|
180
|
+
kind: 'result',
|
|
181
|
+
callId: entry.tool_use_id || entry.callId,
|
|
182
|
+
exitCode: typeof entry.exit_code === 'number' ? entry.exit_code : (typeof entry.exitCode === 'number' ? entry.exitCode : null),
|
|
183
|
+
output: String(entry.output || entry.content || ''),
|
|
184
|
+
isError: Boolean(entry.is_error || entry.isError),
|
|
185
|
+
timestamp: ts,
|
|
186
|
+
});
|
|
187
|
+
return events;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// Shape (A) — Claude Code transcript
|
|
191
|
+
const msg = entry.message;
|
|
192
|
+
if (entry.type === 'assistant' && msg && Array.isArray(msg.content)) {
|
|
193
|
+
for (const part of msg.content) {
|
|
194
|
+
if (part && part.type === 'tool_use' && part.name) {
|
|
195
|
+
events.push({
|
|
196
|
+
kind: 'call',
|
|
197
|
+
tool: String(part.name),
|
|
198
|
+
args: part.input || {},
|
|
199
|
+
callId: part.id || null,
|
|
200
|
+
timestamp: ts,
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
if (entry.type === 'user' && msg && Array.isArray(msg.content)) {
|
|
206
|
+
for (const part of msg.content) {
|
|
207
|
+
if (part && part.type === 'tool_result') {
|
|
208
|
+
const tur = entry.toolUseResult || {};
|
|
209
|
+
const exitCode = typeof tur.stderr === 'string' && tur.stderr.length > 0 && typeof tur.interrupted === 'undefined'
|
|
210
|
+
? null
|
|
211
|
+
: (typeof tur.exit_code === 'number' ? tur.exit_code : (typeof tur.exitCode === 'number' ? tur.exitCode : null));
|
|
212
|
+
const outputText = typeof part.content === 'string'
|
|
213
|
+
? part.content
|
|
214
|
+
: (Array.isArray(part.content)
|
|
215
|
+
? part.content.map((c) => (typeof c === 'string' ? c : (c && c.text) || '')).join('\n')
|
|
216
|
+
: '');
|
|
217
|
+
events.push({
|
|
218
|
+
kind: 'result',
|
|
219
|
+
callId: part.tool_use_id || null,
|
|
220
|
+
exitCode,
|
|
221
|
+
output: outputText,
|
|
222
|
+
isError: Boolean(part.is_error),
|
|
223
|
+
timestamp: ts,
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
return events;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* Pair tool calls with their results by callId; for calls without a matching
|
|
233
|
+
* result, treat them as having no failure signal (skipped).
|
|
234
|
+
*/
|
|
235
|
+
function pairCallsWithResults(events) {
|
|
236
|
+
const calls = new Map(); // callId → call
|
|
237
|
+
const orphanCalls = [];
|
|
238
|
+
for (const e of events) {
|
|
239
|
+
if (e.kind === 'call') {
|
|
240
|
+
if (e.callId) calls.set(e.callId, e);
|
|
241
|
+
else orphanCalls.push(e);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
const paired = [];
|
|
245
|
+
for (const e of events) {
|
|
246
|
+
if (e.kind !== 'result') continue;
|
|
247
|
+
const call = e.callId ? calls.get(e.callId) : null;
|
|
248
|
+
if (!call) continue;
|
|
249
|
+
paired.push({ call, result: e });
|
|
250
|
+
}
|
|
251
|
+
return paired;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
function isFailedCall(pair) {
|
|
255
|
+
const { result } = pair;
|
|
256
|
+
if (!result) return false;
|
|
257
|
+
if (result.isError === true) return true;
|
|
258
|
+
if (typeof result.exitCode === 'number' && result.exitCode !== 0) return true;
|
|
259
|
+
const output = String(result.output || '');
|
|
260
|
+
for (const re of ERROR_PATTERNS) {
|
|
261
|
+
if (re.test(output)) return true;
|
|
262
|
+
}
|
|
263
|
+
return false;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// ---------------------------------------------------------------------------
|
|
267
|
+
// Feedback-log proximity filter
|
|
268
|
+
// ---------------------------------------------------------------------------
|
|
269
|
+
|
|
270
|
+
function loadFeedbackTimestamps(feedbackLogPath) {
|
|
271
|
+
const entries = readJsonlSafe(feedbackLogPath);
|
|
272
|
+
const timestamps = [];
|
|
273
|
+
for (const e of entries) {
|
|
274
|
+
const ts = e && (e.timestamp || e.ts);
|
|
275
|
+
if (!ts) continue;
|
|
276
|
+
const t = Date.parse(ts);
|
|
277
|
+
if (Number.isFinite(t)) timestamps.push(t);
|
|
278
|
+
}
|
|
279
|
+
return timestamps.sort((a, b) => a - b);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
function hasAdjacentFeedback(timestampIso, feedbackTimestamps, windowMs = FEEDBACK_PROXIMITY_WINDOW_MS) {
|
|
283
|
+
if (!timestampIso || !feedbackTimestamps || feedbackTimestamps.length === 0) return false;
|
|
284
|
+
const t = Date.parse(timestampIso);
|
|
285
|
+
if (!Number.isFinite(t)) return false;
|
|
286
|
+
// Linear scan — feedback log is small (HITL = sparse). If it gets large,
|
|
287
|
+
// switch to binary search; not worth the complexity at v1.
|
|
288
|
+
for (const f of feedbackTimestamps) {
|
|
289
|
+
if (Math.abs(f - t) <= windowMs) return true;
|
|
290
|
+
}
|
|
291
|
+
return false;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// ---------------------------------------------------------------------------
|
|
295
|
+
// Signature + clustering
|
|
296
|
+
// ---------------------------------------------------------------------------
|
|
297
|
+
|
|
298
|
+
function argsToSignature(tool, args) {
|
|
299
|
+
// Stable string signature over args. For Bash we use the command (first ~120
|
|
300
|
+
// chars after normalization); for file-tools we use the file_path; otherwise
|
|
301
|
+
// a sorted-key shallow JSON.
|
|
302
|
+
const norm = (v) => normalizeForSignature(String(v == null ? '' : v));
|
|
303
|
+
if (tool === 'Bash' && args && typeof args.command === 'string') {
|
|
304
|
+
return `Bash:${norm(args.command).slice(0, 160)}`;
|
|
305
|
+
}
|
|
306
|
+
if ((tool === 'Read' || tool === 'Edit' || tool === 'Write') && args && typeof args.file_path === 'string') {
|
|
307
|
+
return `${tool}:${norm(args.file_path)}`;
|
|
308
|
+
}
|
|
309
|
+
// Generic fallback — sorted keys, normalized values
|
|
310
|
+
try {
|
|
311
|
+
const keys = Object.keys(args || {}).sort();
|
|
312
|
+
const parts = keys.map((k) => {
|
|
313
|
+
const v = args[k];
|
|
314
|
+
const s = typeof v === 'string' ? v : JSON.stringify(v);
|
|
315
|
+
return `${k}=${norm(s).slice(0, 80)}`;
|
|
316
|
+
});
|
|
317
|
+
return `${tool}:${parts.join('|')}`;
|
|
318
|
+
} catch {
|
|
319
|
+
return `${tool}:<unserializable>`;
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
function clusterFailures(failures, { minClusterSize = MIN_CLUSTER_SIZE } = {}) {
|
|
324
|
+
const buckets = new Map();
|
|
325
|
+
for (const f of failures) {
|
|
326
|
+
const sig = argsToSignature(f.tool, f.args);
|
|
327
|
+
if (!buckets.has(sig)) buckets.set(sig, []);
|
|
328
|
+
buckets.get(sig).push(f);
|
|
329
|
+
}
|
|
330
|
+
const clusters = [];
|
|
331
|
+
for (const [signature, members] of buckets.entries()) {
|
|
332
|
+
if (members.length < minClusterSize) continue;
|
|
333
|
+
// Sample excerpt from the first member's output for the rule message.
|
|
334
|
+
const sample = members[0];
|
|
335
|
+
clusters.push({
|
|
336
|
+
signature,
|
|
337
|
+
tool: sample.tool,
|
|
338
|
+
size: members.length,
|
|
339
|
+
// Keep redacted+normalized excerpts only.
|
|
340
|
+
sampleArgs: normalizeForSignature(JSON.stringify(sample.args || {})).slice(0, 200),
|
|
341
|
+
sampleOutput: normalizeForSignature(String(sample.output || '')).slice(0, 200),
|
|
342
|
+
});
|
|
343
|
+
}
|
|
344
|
+
return clusters.sort((a, b) => b.size - a.size);
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// ---------------------------------------------------------------------------
|
|
348
|
+
// Candidate emission — same shape as meta-agent-loop.js candidates
|
|
349
|
+
// ---------------------------------------------------------------------------
|
|
350
|
+
|
|
351
|
+
function candidateFromCluster(cluster) {
|
|
352
|
+
// Build a regex that targets this normalized signature. We escape regex
|
|
353
|
+
// metacharacters and cap the pattern length — meta-agent-loop's matchesEntry
|
|
354
|
+
// will compile this with `new RegExp(pattern, 'i')`.
|
|
355
|
+
const escape = (s) => String(s).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
356
|
+
// Use a few keywords from the signature to form a flexible pattern.
|
|
357
|
+
const sigBody = cluster.signature.replace(/^[^:]+:/, '');
|
|
358
|
+
const words = sigBody
|
|
359
|
+
.split(/[\s/|=]+/)
|
|
360
|
+
.map((w) => w.replace(/[<>]/g, '').trim())
|
|
361
|
+
.filter((w) => w.length >= 4 && !/^[0-9]+$/.test(w))
|
|
362
|
+
.slice(0, 3);
|
|
363
|
+
const pattern = words.length >= 2
|
|
364
|
+
? words.map(escape).join('.*')
|
|
365
|
+
: escape(sigBody.slice(0, 60));
|
|
366
|
+
|
|
367
|
+
return {
|
|
368
|
+
pattern,
|
|
369
|
+
action: 'warn',
|
|
370
|
+
message: `Silent-failure cluster (${cluster.size}× ${cluster.tool}): ${cluster.sampleOutput.slice(0, 100) || cluster.sampleArgs.slice(0, 100)}`,
|
|
371
|
+
severity: 'medium',
|
|
372
|
+
rationale: `Observed ${cluster.size} silent failures matching ${cluster.tool} signature; never thumbed-down by user.`,
|
|
373
|
+
source: 'silent-failure-cluster',
|
|
374
|
+
origin: 'silent-failure-cluster',
|
|
375
|
+
clusterSize: cluster.size,
|
|
376
|
+
clusterSignature: cluster.signature,
|
|
377
|
+
};
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// ---------------------------------------------------------------------------
|
|
381
|
+
// Main entry point
|
|
382
|
+
// ---------------------------------------------------------------------------
|
|
383
|
+
|
|
384
|
+
/**
|
|
385
|
+
* Generate candidate rules from clustered silent failures.
|
|
386
|
+
*
|
|
387
|
+
* @param {object} opts
|
|
388
|
+
* @param {string[]} [opts.logPaths] — override conversation-log discovery (tests)
|
|
389
|
+
* @param {string} [opts.feedbackLogPath] — feedback-log.jsonl to exclude HITL'd calls
|
|
390
|
+
* @param {number} [opts.minClusterSize]
|
|
391
|
+
* @param {number} [opts.minDailyCalls]
|
|
392
|
+
* @returns {{
|
|
393
|
+
* candidates: object[],
|
|
394
|
+
* stats: {
|
|
395
|
+
* totalToolCalls: number,
|
|
396
|
+
* failedCalls: number,
|
|
397
|
+
* filteredByFeedback: number,
|
|
398
|
+
* clusters: number,
|
|
399
|
+
* skippedReason: string|null
|
|
400
|
+
* }
|
|
401
|
+
* }}
|
|
402
|
+
*/
|
|
403
|
+
function generateSilentFailureCandidates(opts = {}) {
|
|
404
|
+
const {
|
|
405
|
+
logPaths = discoverConversationLogs({ limit: 50 }),
|
|
406
|
+
feedbackLogPath = null,
|
|
407
|
+
minClusterSize = MIN_CLUSTER_SIZE,
|
|
408
|
+
minDailyCalls = MIN_DAILY_CALLS_FOR_USEFUL_CLUSTERING,
|
|
409
|
+
} = opts;
|
|
410
|
+
|
|
411
|
+
const stats = {
|
|
412
|
+
totalToolCalls: 0,
|
|
413
|
+
failedCalls: 0,
|
|
414
|
+
filteredByFeedback: 0,
|
|
415
|
+
clusters: 0,
|
|
416
|
+
skippedReason: null,
|
|
417
|
+
};
|
|
418
|
+
|
|
419
|
+
const feedbackTimestamps = feedbackLogPath ? loadFeedbackTimestamps(feedbackLogPath) : [];
|
|
420
|
+
|
|
421
|
+
const allFailures = [];
|
|
422
|
+
|
|
423
|
+
for (const logPath of logPaths) {
|
|
424
|
+
const entries = readJsonlSafe(logPath);
|
|
425
|
+
const allEvents = entries.flatMap(extractToolEvents);
|
|
426
|
+
const pairs = pairCallsWithResults(allEvents);
|
|
427
|
+
stats.totalToolCalls += pairs.length;
|
|
428
|
+
|
|
429
|
+
for (const pair of pairs) {
|
|
430
|
+
if (!isFailedCall(pair)) continue;
|
|
431
|
+
stats.failedCalls += 1;
|
|
432
|
+
const ts = pair.call.timestamp || pair.result.timestamp;
|
|
433
|
+
if (hasAdjacentFeedback(ts, feedbackTimestamps)) {
|
|
434
|
+
stats.filteredByFeedback += 1;
|
|
435
|
+
continue;
|
|
436
|
+
}
|
|
437
|
+
allFailures.push({
|
|
438
|
+
tool: pair.call.tool,
|
|
439
|
+
args: pair.call.args,
|
|
440
|
+
output: pair.result.output,
|
|
441
|
+
timestamp: ts,
|
|
442
|
+
});
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// Insufficient-data path — emit empty cluster set cleanly.
|
|
447
|
+
if (stats.totalToolCalls < minDailyCalls) {
|
|
448
|
+
stats.skippedReason = `insufficient-data: ${stats.totalToolCalls} tool calls < ${minDailyCalls} threshold`;
|
|
449
|
+
return { candidates: [], stats };
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
const clusters = clusterFailures(allFailures, { minClusterSize });
|
|
453
|
+
stats.clusters = clusters.length;
|
|
454
|
+
|
|
455
|
+
const candidates = clusters.map(candidateFromCluster);
|
|
456
|
+
return { candidates, stats };
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// ---------------------------------------------------------------------------
|
|
460
|
+
// CLI
|
|
461
|
+
// ---------------------------------------------------------------------------
|
|
462
|
+
|
|
463
|
+
async function main() {
|
|
464
|
+
if (process.env.THUMBGATE_SILENT_FAILURE_CLUSTERING !== '1') {
|
|
465
|
+
process.stdout.write('silent-failure-cluster: disabled (set THUMBGATE_SILENT_FAILURE_CLUSTERING=1 to enable)\n');
|
|
466
|
+
return;
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
const { resolveFeedbackDir } = require('./feedback-paths');
|
|
470
|
+
let feedbackLogPath = null;
|
|
471
|
+
try {
|
|
472
|
+
feedbackLogPath = path.join(resolveFeedbackDir(), 'feedback-log.jsonl');
|
|
473
|
+
} catch {
|
|
474
|
+
// running outside a configured feedback dir — fine, just skip the proximity filter
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
const result = generateSilentFailureCandidates({ feedbackLogPath });
|
|
478
|
+
process.stdout.write(JSON.stringify({
|
|
479
|
+
enabled: true,
|
|
480
|
+
candidateCount: result.candidates.length,
|
|
481
|
+
stats: result.stats,
|
|
482
|
+
candidates: result.candidates,
|
|
483
|
+
}, null, 2) + '\n');
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
if (require.main === module) {
|
|
487
|
+
main().catch((err) => {
|
|
488
|
+
process.stderr.write(`silent-failure-cluster failed: ${err.message}\n`);
|
|
489
|
+
process.exitCode = 1;
|
|
490
|
+
});
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
module.exports = {
|
|
494
|
+
generateSilentFailureCandidates,
|
|
495
|
+
// exported for testing
|
|
496
|
+
redactSecrets,
|
|
497
|
+
normalizePaths,
|
|
498
|
+
normalizeForSignature,
|
|
499
|
+
extractToolEvents,
|
|
500
|
+
pairCallsWithResults,
|
|
501
|
+
isFailedCall,
|
|
502
|
+
hasAdjacentFeedback,
|
|
503
|
+
loadFeedbackTimestamps,
|
|
504
|
+
argsToSignature,
|
|
505
|
+
clusterFailures,
|
|
506
|
+
candidateFromCluster,
|
|
507
|
+
readJsonlSafe,
|
|
508
|
+
ERROR_PATTERNS,
|
|
509
|
+
MIN_CLUSTER_SIZE,
|
|
510
|
+
MIN_DAILY_CALLS_FOR_USEFUL_CLUSTERING,
|
|
511
|
+
FEEDBACK_PROXIMITY_WINDOW_MS,
|
|
512
|
+
};
|
|
@@ -313,6 +313,12 @@ function sanitizeTelemetryPayload(payload = {}, headers = {}) {
|
|
|
313
313
|
pipelineStatus: pickFirstText(raw.pipelineStatus, raw.workflowSprintStatus, raw.status),
|
|
314
314
|
reasonCode,
|
|
315
315
|
reasonDetail: pickFirstText(raw.reasonDetail, raw.reasonText, raw.otherReason, raw.notes),
|
|
316
|
+
integration: pickFirstText(raw.integration, raw.actionIntegration),
|
|
317
|
+
actionOperation: pickFirstText(raw.actionOperation, raw.operationId),
|
|
318
|
+
endpoint: pickFirstText(raw.endpoint, raw.apiPath),
|
|
319
|
+
decisionMode: pickFirstText(raw.decisionMode, raw.executionMode),
|
|
320
|
+
actionStatus: pickFirstText(raw.actionStatus, raw.status),
|
|
321
|
+
accepted: raw.accepted === undefined || raw.accepted === null ? null : Boolean(raw.accepted),
|
|
316
322
|
pricingInterest: pickFirstText(raw.pricingInterest, raw.interestLevel),
|
|
317
323
|
seoQuery: pickFirstText(raw.seoQuery, raw.query),
|
|
318
324
|
seoSurface: pickFirstText(raw.seoSurface, raw.searchSurface, raw.surface),
|
|
@@ -526,6 +532,12 @@ function getTelemetrySummary(feedbackDir, options = {}) {
|
|
|
526
532
|
let exitEngagementMsCount = 0;
|
|
527
533
|
let exitScrollPercentTotal = 0;
|
|
528
534
|
let exitScrollPercentCount = 0;
|
|
535
|
+
let chatgptActionCalls = 0;
|
|
536
|
+
let chatgptActionAccepted = 0;
|
|
537
|
+
const chatgptActionsByOperation = {};
|
|
538
|
+
const chatgptActionsByEndpoint = {};
|
|
539
|
+
const chatgptActionsByStatus = {};
|
|
540
|
+
const chatgptActionsByDecisionMode = {};
|
|
529
541
|
|
|
530
542
|
for (const entry of events) {
|
|
531
543
|
incrementCounter(byClientType, entry.clientType || entry.client || 'unknown');
|
|
@@ -714,6 +726,18 @@ function getTelemetrySummary(feedbackDir, options = {}) {
|
|
|
714
726
|
}
|
|
715
727
|
}
|
|
716
728
|
|
|
729
|
+
if (
|
|
730
|
+
String(entry.eventType || entry.event || '').startsWith('chatgpt_action_') ||
|
|
731
|
+
entry.integration === 'chatgpt_gpt'
|
|
732
|
+
) {
|
|
733
|
+
chatgptActionCalls += 1;
|
|
734
|
+
if (entry.accepted === true) chatgptActionAccepted += 1;
|
|
735
|
+
incrementCounter(chatgptActionsByOperation, entry.actionOperation);
|
|
736
|
+
incrementCounter(chatgptActionsByEndpoint, entry.endpoint);
|
|
737
|
+
incrementCounter(chatgptActionsByStatus, entry.actionStatus);
|
|
738
|
+
incrementCounter(chatgptActionsByDecisionMode, entry.decisionMode);
|
|
739
|
+
}
|
|
740
|
+
|
|
717
741
|
if ((entry.clientType || entry.client) === 'cli') {
|
|
718
742
|
if (entry.installId) cliInstalls.add(entry.installId);
|
|
719
743
|
incrementCounter(cliByPlatform, entry.platform);
|
|
@@ -770,6 +794,7 @@ function getTelemetrySummary(feedbackDir, options = {}) {
|
|
|
770
794
|
landingViews: pageViews,
|
|
771
795
|
installCopies,
|
|
772
796
|
gptOpens,
|
|
797
|
+
gptActionCalls: chatgptActionCalls,
|
|
773
798
|
checkoutStarts,
|
|
774
799
|
checkoutInterstitialViews,
|
|
775
800
|
checkoutInterstitialClicks,
|
|
@@ -777,6 +802,7 @@ function getTelemetrySummary(feedbackDir, options = {}) {
|
|
|
777
802
|
proConversions,
|
|
778
803
|
landingToInstallCopyRate: safeRate(installCopies, pageViews),
|
|
779
804
|
landingToGptOpenRate: safeRate(gptOpens, pageViews),
|
|
805
|
+
gptOpenToActionRate: safeRate(chatgptActionCalls, gptOpens),
|
|
780
806
|
landingToCheckoutRate: safeRate(checkoutStarts, pageViews),
|
|
781
807
|
checkoutInterstitialClickRate: safeRate(checkoutInterstitialClicks, checkoutInterstitialViews),
|
|
782
808
|
checkoutInterstitialProConfirmRate: safeRate(checkoutInterstitialProConfirms, checkoutInterstitialViews),
|
|
@@ -835,6 +861,17 @@ function getTelemetrySummary(feedbackDir, options = {}) {
|
|
|
835
861
|
byPlatform: cliByPlatform,
|
|
836
862
|
byVersion: cliByVersion,
|
|
837
863
|
},
|
|
864
|
+
chatgpt: {
|
|
865
|
+
gptOpens,
|
|
866
|
+
actionCalls: chatgptActionCalls,
|
|
867
|
+
acceptedActionCalls: chatgptActionAccepted,
|
|
868
|
+
openToActionRate: safeRate(chatgptActionCalls, gptOpens),
|
|
869
|
+
acceptedActionRate: safeRate(chatgptActionAccepted, chatgptActionCalls),
|
|
870
|
+
byOperation: chatgptActionsByOperation,
|
|
871
|
+
byEndpoint: chatgptActionsByEndpoint,
|
|
872
|
+
byStatus: chatgptActionsByStatus,
|
|
873
|
+
byDecisionMode: chatgptActionsByDecisionMode,
|
|
874
|
+
},
|
|
838
875
|
marketing: {
|
|
839
876
|
pageViewsBySource,
|
|
840
877
|
pageViewsByCampaign,
|
|
@@ -929,6 +966,7 @@ function getTelemetryAnalytics(feedbackDir, options = {}) {
|
|
|
929
966
|
byClientType: summary.byClientType,
|
|
930
967
|
byEventType: summary.byEventType,
|
|
931
968
|
conversionFunnel: summary.conversionFunnel,
|
|
969
|
+
chatgpt: summary.chatgpt,
|
|
932
970
|
visitors: {
|
|
933
971
|
totalEvents: summary.web.totalEvents,
|
|
934
972
|
uniqueVisitors: summary.web.uniqueVisitors,
|