thumbgate 1.26.0 → 1.26.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,121 @@
1
+ 'use strict';
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // repeat-metric — first-class "repeat-attempts blocked before execution" metric
5
+ //
6
+ // This module exposes data ThumbGate already collects in gate-stats state. It
7
+ // does NOT write to disk; it is a pure function over gates-engine.loadStats().
8
+ //
9
+ // The headline number is stats.recurringBlocks — incremented by recordStat()
10
+ // in gates-engine.js every time the SAME gateId fires twice within one session
11
+ // bucket. That is exactly "a pre-action gate fire that stopped a tool call the
12
+ // agent had already been blocked on", i.e. a repeat attempt prevented before it
13
+ // could round-trip and execute.
14
+ // ---------------------------------------------------------------------------
15
+
16
+ const gatesEngine = require('./gates-engine');
17
+
18
+ /**
19
+ * Derive a per-gate { firstBlocks, repeatBlocks } split from the raw stats.
20
+ *
21
+ * recordStat() records, per session bucket, which gates have fired
22
+ * (stats.sessionFiredGates[sessionKey][gateId] === true). The FIRST fire of a
23
+ * gate in a bucket marks the flag; every subsequent fire in that same bucket
24
+ * increments stats.recurringBlocks. So for each gate:
25
+ * firstBlocks = number of distinct session buckets the gate fired in
26
+ * repeatBlocks = (total block+warn events for the gate) - firstBlocks
27
+ *
28
+ * total block+warn events come from stats.byGate[id] (blocked + warned), which
29
+ * recordStat() also maintains. repeatBlocks is clamped to >= 0 to stay robust
30
+ * against partially-written / legacy state.
31
+ *
32
+ * @param {object} stats raw object returned by gates-engine.loadStats()
33
+ * @returns {Object<string,{firstBlocks:number, repeatBlocks:number}>}
34
+ */
35
+ function computeByGateSplit(stats) {
36
+ const byGate = {};
37
+ const sessionFiredGates = (stats && stats.sessionFiredGates) || {};
38
+ const rawByGate = (stats && stats.byGate) || {};
39
+
40
+ // Count distinct session buckets each gate fired in => firstBlocks.
41
+ const firstBlocksByGate = {};
42
+ for (const sessionKey of Object.keys(sessionFiredGates)) {
43
+ const fired = sessionFiredGates[sessionKey] || {};
44
+ for (const gateId of Object.keys(fired)) {
45
+ if (fired[gateId]) {
46
+ firstBlocksByGate[gateId] = (firstBlocksByGate[gateId] || 0) + 1;
47
+ }
48
+ }
49
+ }
50
+
51
+ // Union of every gate id we know about from either source.
52
+ const gateIds = new Set([
53
+ ...Object.keys(rawByGate),
54
+ ...Object.keys(firstBlocksByGate),
55
+ ]);
56
+
57
+ for (const gateId of gateIds) {
58
+ const gateStat = rawByGate[gateId] || {};
59
+ const totalFires = (gateStat.blocked || 0) + (gateStat.warned || 0);
60
+ const firstBlocks = firstBlocksByGate[gateId] || 0;
61
+ // Repeat fires are total fires beyond the first fire per session bucket.
62
+ const repeatBlocks = Math.max(0, totalFires - firstBlocks);
63
+ byGate[gateId] = { firstBlocks, repeatBlocks };
64
+ }
65
+
66
+ return byGate;
67
+ }
68
+
69
+ /**
70
+ * Compute the repeat-attempts-blocked-before-execution metric.
71
+ *
72
+ * Pure read of gates-engine.loadStats(); no disk writes.
73
+ *
74
+ * @returns {{
75
+ * repeatBlocksBeforeExecution: number,
76
+ * recurringBlocks: number,
77
+ * totalBlocked: number,
78
+ * byGate: Object<string,{firstBlocks:number, repeatBlocks:number}>
79
+ * }}
80
+ */
81
+ function computeRepeatMetric() {
82
+ let stats;
83
+ try {
84
+ stats = gatesEngine.loadStats() || {};
85
+ } catch (_) {
86
+ stats = {};
87
+ }
88
+
89
+ const recurringBlocks = Number(stats.recurringBlocks || 0);
90
+ const totalBlocked = Number(stats.blocked || 0);
91
+
92
+ return {
93
+ // Headline: a pre-action block that stopped a tool call the agent had
94
+ // already been blocked on this session.
95
+ repeatBlocksBeforeExecution: recurringBlocks,
96
+ recurringBlocks,
97
+ totalBlocked,
98
+ byGate: computeByGateSplit(stats),
99
+ };
100
+ }
101
+
102
+ /**
103
+ * Add a `repeat` sub-key to a gate-stats object WITHOUT mutating the original.
104
+ *
105
+ * Takes the object returned by gate-stats.calculateStats() or
106
+ * dashboard.computeGateStats() and returns a shallow copy with the repeat
107
+ * metric attached. The caller's file does not need to import any internals.
108
+ *
109
+ * @param {object} gateStatsObject
110
+ * @returns {object} copy of gateStatsObject with `.repeat`
111
+ */
112
+ function mergeRepeatMetricIntoGateStats(gateStatsObject) {
113
+ const base = gateStatsObject && typeof gateStatsObject === 'object' ? gateStatsObject : {};
114
+ return Object.assign({}, base, { repeat: computeRepeatMetric() });
115
+ }
116
+
117
+ module.exports = {
118
+ computeRepeatMetric,
119
+ mergeRepeatMetricIntoGateStats,
120
+ computeByGateSplit,
121
+ };
@@ -4,7 +4,14 @@
4
4
  /**
5
5
  * Silent-Failure Clustering — Unsupervised candidate source for the meta-agent loop
6
6
  *
7
- * Off by default. Enabled with: THUMBGATE_SILENT_FAILURE_CLUSTERING=1
7
+ * Default-ON since 2026-05-21. Opt-out with: THUMBGATE_SILENT_FAILURE_CLUSTERING=0
8
+ * (or set NODE_ENV=test to skip in test runs). Was opt-in for the initial
9
+ * landing of PR #2285; flipped to default-on because the entire point is to
10
+ * cover the case where users never give thumbs-down — keeping it opt-in
11
+ * means lazy users (the ones who need it most) never benefit. Bounded risk:
12
+ * candidates still flow through meta-agent-loop's existing fp-rate eval, so
13
+ * a noisy cluster can't auto-promote to a real gate without passing the
14
+ * same precision/recall thresholds as LLM-generated candidates.
8
15
  *
9
16
  * Problem: ThumbGate's HITL loop only learns from explicit thumbs-down. Tool calls
10
17
  * that fail without user feedback (exit_code != 0, regex-matched error in output,
@@ -460,9 +467,20 @@ function generateSilentFailureCandidates(opts = {}) {
460
467
  // CLI
461
468
  // ---------------------------------------------------------------------------
462
469
 
470
+ /**
471
+ * Resolve the enabled state. Default ON. Explicit "0" or "false" opts out;
472
+ * NODE_ENV=test also opts out to keep test runs deterministic.
473
+ */
474
+ function isSilentFailureClusteringEnabled(env = process.env) {
475
+ if (env.NODE_ENV === 'test') return false;
476
+ const raw = (env.THUMBGATE_SILENT_FAILURE_CLUSTERING || '').toLowerCase();
477
+ if (raw === '0' || raw === 'false' || raw === 'off' || raw === 'no') return false;
478
+ return true;
479
+ }
480
+
463
481
  async function main() {
464
- if (process.env.THUMBGATE_SILENT_FAILURE_CLUSTERING !== '1') {
465
- process.stdout.write('silent-failure-cluster: disabled (set THUMBGATE_SILENT_FAILURE_CLUSTERING=1 to enable)\n');
482
+ if (!isSilentFailureClusteringEnabled()) {
483
+ process.stdout.write('silent-failure-cluster: disabled (THUMBGATE_SILENT_FAILURE_CLUSTERING=0 or NODE_ENV=test)\n');
466
484
  return;
467
485
  }
468
486
 
@@ -492,6 +510,7 @@ if (require.main === module) {
492
510
 
493
511
  module.exports = {
494
512
  generateSilentFailureCandidates,
513
+ isSilentFailureClusteringEnabled,
495
514
  // exported for testing
496
515
  redactSecrets,
497
516
  normalizePaths,
@@ -895,6 +895,56 @@ const TOOLS = [
895
895
  },
896
896
  },
897
897
  }),
898
+ destructiveTool({
899
+ name: 'detect_noop',
900
+ title: 'Detect No-op Action',
901
+ description: 'Detect whether a tool call was a no-op (state unchanged) or identical to a prior attempt in the session — a cheap repeat-loop signal. Records the action attempt state for repeat detection.',
902
+ inputSchema: {
903
+ type: 'object',
904
+ required: ['actionId'],
905
+ properties: {
906
+ actionId: { type: 'string', description: 'Stable identifier for the action being checked (e.g. the file path or command being attempted)' },
907
+ kind: { type: 'string', enum: ['file', 'command'], description: 'Action kind: file edit/write or command execution' },
908
+ filePath: { type: 'string', description: 'Path of the file the action targets (file kind)' },
909
+ beforeContent: { type: 'string', description: 'File content before the action (file kind)' },
910
+ afterContent: { type: 'string', description: 'File content after the action (file kind)' },
911
+ exitCode: { type: 'number', description: 'Command exit code (command kind)' },
912
+ stdout: { type: 'string', description: 'Command stdout (command kind)' },
913
+ stderr: { type: 'string', description: 'Command stderr (command kind)' },
914
+ sessionId: { type: 'string', description: 'Optional session id used to scope repeat-attempt detection' },
915
+ },
916
+ },
917
+ }),
918
+ destructiveTool({
919
+ name: 'record_action_receipt',
920
+ title: 'Record Action Receipt',
921
+ description: 'Pair a tracked tool call with its outcome (diff, exit code, test result) so a promoted lesson encodes "this action -> this outcome", not just a thumbs signal. Appends to the action-receipts log.',
922
+ inputSchema: {
923
+ type: 'object',
924
+ required: ['actionId'],
925
+ properties: {
926
+ actionId: { type: 'string', description: 'Identifier of the tracked action this receipt pairs with' },
927
+ toolName: { type: 'string', description: 'Name of the tool that was invoked' },
928
+ toolInput: { type: 'object', description: 'Structured input the tool was called with' },
929
+ diff: { type: 'string', description: 'Optional unified diff or change summary produced by the action' },
930
+ exitCode: { type: 'number', description: 'Optional command exit code outcome' },
931
+ testOutcome: { type: 'string', description: 'Optional test outcome (e.g. passed, failed, 12/12)' },
932
+ stateHash: { type: 'string', description: 'Optional post-action state hash (from detect_noop)' },
933
+ },
934
+ },
935
+ }),
936
+ readOnlyTool({
937
+ name: 'get_action_receipts',
938
+ title: 'Get Action Receipts',
939
+ description: 'Read outcome-paired action receipts. Returns the receipt for a specific actionId, or the most recent receipts when no actionId is given.',
940
+ inputSchema: {
941
+ type: 'object',
942
+ properties: {
943
+ actionId: { type: 'string', description: 'Optional action id to fetch the matching receipt for' },
944
+ limit: { type: 'number', description: 'Max number of recent receipts to return when no actionId is given (default 20)' },
945
+ },
946
+ },
947
+ }),
898
948
  readOnlyTool({
899
949
  name: 'verify_claim',
900
950
  description: 'Check whether a claim has enough tracked evidence before the agent asserts it.',