thumbgate 1.15.0 → 1.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/.claude-plugin/marketplace.json +6 -6
  2. package/.claude-plugin/plugin.json +3 -3
  3. package/.well-known/llms.txt +5 -5
  4. package/.well-known/mcp/server-card.json +1 -1
  5. package/README.md +59 -35
  6. package/adapters/chatgpt/openapi.yaml +118 -2
  7. package/adapters/claude/.mcp.json +2 -2
  8. package/adapters/mcp/server-stdio.js +210 -84
  9. package/adapters/opencode/opencode.json +1 -1
  10. package/bench/prompt-eval-suite.json +5 -1
  11. package/bin/cli.js +157 -8
  12. package/config/evals/agent-safety-eval.json +338 -22
  13. package/config/gates/routine.json +43 -0
  14. package/config/github-about.json +3 -3
  15. package/config/model-candidates.json +131 -0
  16. package/openapi/openapi.yaml +118 -2
  17. package/package.json +57 -49
  18. package/public/blog.html +7 -7
  19. package/public/codex-plugin.html +6 -6
  20. package/public/compare.html +29 -23
  21. package/public/dashboard.html +82 -10
  22. package/public/guide.html +28 -28
  23. package/public/index.html +216 -98
  24. package/public/learn.html +50 -22
  25. package/public/lessons.html +1 -1
  26. package/public/numbers.html +17 -17
  27. package/public/pro.html +82 -18
  28. package/scripts/agent-audit-trace.js +55 -0
  29. package/scripts/agent-memory-lifecycle.js +96 -0
  30. package/scripts/agent-readiness-plan.js +118 -0
  31. package/scripts/agentic-data-pipeline.js +21 -1
  32. package/scripts/agents-sdk-sandbox-plan.js +57 -0
  33. package/scripts/ai-org-governance.js +98 -0
  34. package/scripts/ai-search-distribution.js +43 -0
  35. package/scripts/artifact-agent-plan.js +81 -0
  36. package/scripts/billing.js +27 -8
  37. package/scripts/cli-schema.js +18 -2
  38. package/scripts/code-mode-mcp-plan.js +71 -0
  39. package/scripts/context-engine.js +1 -2
  40. package/scripts/context-manager.js +4 -1
  41. package/scripts/dashboard-render-spec.js +1 -1
  42. package/scripts/dashboard.js +275 -9
  43. package/scripts/decision-journal.js +13 -3
  44. package/scripts/document-workflow-governance.js +62 -0
  45. package/scripts/enterprise-agent-rollout.js +34 -0
  46. package/scripts/experience-replay-governance.js +69 -0
  47. package/scripts/export-hf-dataset.js +1 -1
  48. package/scripts/feedback-loop.js +92 -4
  49. package/scripts/feedback-to-rules.js +17 -23
  50. package/scripts/gates-engine.js +4 -6
  51. package/scripts/growth-campaigns.js +49 -0
  52. package/scripts/harness-selector.js +16 -4
  53. package/scripts/hybrid-supervisor-agent.js +64 -0
  54. package/scripts/inference-cache-policy.js +72 -0
  55. package/scripts/inference-economics.js +53 -0
  56. package/scripts/internal-agent-bootstrap.js +12 -2
  57. package/scripts/knowledge-layer-plan.js +108 -0
  58. package/scripts/lesson-inference.js +183 -44
  59. package/scripts/lesson-search.js +4 -1
  60. package/scripts/llm-client.js +157 -26
  61. package/scripts/mailer/resend-mailer.js +112 -1
  62. package/scripts/mcp-transport-strategy.js +66 -0
  63. package/scripts/memory-store-governance.js +60 -0
  64. package/scripts/meta-agent-loop.js +7 -13
  65. package/scripts/model-access-eligibility.js +38 -0
  66. package/scripts/model-migration-readiness.js +55 -0
  67. package/scripts/operational-integrity.js +96 -3
  68. package/scripts/otel-declarative-config.js +56 -0
  69. package/scripts/perplexity-client.js +1 -1
  70. package/scripts/post-training-governance.js +34 -0
  71. package/scripts/private-core-boundary.js +72 -0
  72. package/scripts/production-agent-readiness.js +40 -0
  73. package/scripts/prompt-eval.js +564 -32
  74. package/scripts/prompt-programs.js +93 -0
  75. package/scripts/provider-action-normalizer.js +585 -0
  76. package/scripts/scaling-law-claims.js +60 -0
  77. package/scripts/security-scanner.js +1 -1
  78. package/scripts/self-distill-agent.js +7 -32
  79. package/scripts/seo-gsd.js +232 -55
  80. package/scripts/skill-rag-router.js +53 -0
  81. package/scripts/spec-gate.js +1 -1
  82. package/scripts/student-consistent-training.js +73 -0
  83. package/scripts/synthetic-data-provenance.js +98 -0
  84. package/scripts/task-context-result.js +81 -0
  85. package/scripts/telemetry-analytics.js +149 -0
  86. package/scripts/thompson-sampling.js +2 -2
  87. package/scripts/token-savings.js +7 -6
  88. package/scripts/token-tco.js +46 -0
  89. package/scripts/tool-registry.js +63 -3
  90. package/scripts/verification-loop.js +10 -1
  91. package/scripts/verifier-scoring.js +71 -0
  92. package/scripts/workflow-sentinel.js +284 -28
  93. package/scripts/workspace-agent-routines.js +118 -0
  94. package/src/api/server.js +381 -120
  95. package/scripts/analytics-report.js +0 -328
  96. package/scripts/autonomous-workflow.js +0 -377
  97. package/scripts/billing-setup.js +0 -109
  98. package/scripts/creator-campaigns.js +0 -239
  99. package/scripts/cross-encoder-reranker.js +0 -235
  100. package/scripts/daemon-manager.js +0 -108
  101. package/scripts/decision-trace.js +0 -354
  102. package/scripts/delegation-runtime.js +0 -896
  103. package/scripts/dispatch-brief.js +0 -159
  104. package/scripts/distribution-surfaces.js +0 -110
  105. package/scripts/feedback-history-distiller.js +0 -382
  106. package/scripts/funnel-analytics.js +0 -35
  107. package/scripts/history-distiller.js +0 -200
  108. package/scripts/hosted-job-launcher.js +0 -256
  109. package/scripts/intent-router.js +0 -392
  110. package/scripts/lesson-reranker.js +0 -263
  111. package/scripts/lesson-retrieval.js +0 -148
  112. package/scripts/managed-lesson-agent.js +0 -183
  113. package/scripts/operational-dashboard.js +0 -103
  114. package/scripts/operational-summary.js +0 -129
  115. package/scripts/operator-artifacts.js +0 -608
  116. package/scripts/optimize-context.js +0 -17
  117. package/scripts/org-dashboard.js +0 -206
  118. package/scripts/partner-orchestration.js +0 -146
  119. package/scripts/predictive-insights.js +0 -356
  120. package/scripts/pulse.js +0 -80
  121. package/scripts/reflector-agent.js +0 -221
  122. package/scripts/sales-pipeline.js +0 -681
  123. package/scripts/session-episode-store.js +0 -329
  124. package/scripts/session-health-sensor.js +0 -242
  125. package/scripts/session-report.js +0 -120
  126. package/scripts/swarm-coordinator.js +0 -81
  127. package/scripts/tool-kpi-tracker.js +0 -12
  128. package/scripts/webhook-delivery.js +0 -62
  129. package/scripts/workflow-sprint-intake.js +0 -475
@@ -1,354 +0,0 @@
1
- #!/usr/bin/env node
2
- 'use strict';
3
-
4
- /**
5
- * Decision Trace — full observability for gate evaluations.
6
- *
7
- * Inspired by Ethan Mollick's observation that operators need to *see* what
8
- * the agent was thinking when it made a decision. ThumbGate already captures
9
- * what was blocked; Decision Trace adds:
10
- *
11
- * 1. Full audit of every evaluation (passes, blocks, AND near-misses)
12
- * 2. Near-miss detection: constraints that almost matched
13
- * 3. Session trace summaries: single-glance safety posture view
14
- *
15
- * Near-miss heuristic: extract literal tokens from a regex deny pattern,
16
- * count how many appear in the input. If >50% match but the full regex
17
- * doesn't, flag as near-miss.
18
- */
19
-
20
- const crypto = require('node:crypto');
21
- const path = require('node:path');
22
- const { readJsonl, appendJsonl } = require('./fs-utils');
23
- const { resolveFeedbackDir } = require('./feedback-paths');
24
- const {
25
- evaluateConstraints,
26
- evaluateInvariants,
27
- loadSpecDir,
28
- } = require('./spec-gate');
29
-
30
- const TRACE_FILE = 'decision-trace.jsonl';
31
- const NEAR_MISS_THRESHOLD = 0.5;
32
-
33
- // ---------------------------------------------------------------------------
34
- // Near-Miss Detection
35
- // ---------------------------------------------------------------------------
36
-
37
- /**
38
- * Extract literal tokens from a regex pattern.
39
- * Strips metacharacters and splits on boundaries to find human-readable tokens.
40
- */
41
- function extractLiteralTokens(pattern) {
42
- // Remove common regex metacharacters and quantifiers
43
- const cleaned = pattern
44
- .replace(/\\[sdwbSDWB]/g, ' ') // char classes
45
- .replace(/[.*+?^${}()|[\]\\]/g, ' ') // metacharacters
46
- .replace(/\s+/g, ' ')
47
- .trim();
48
-
49
- return cleaned
50
- .split(/\s+/)
51
- .filter((t) => t.length >= 2) // ignore single chars
52
- .map((t) => t.toLowerCase());
53
- }
54
-
55
- /**
56
- * Compute near-miss score for a constraint against input text.
57
- * Returns { isNearMiss, score, matchedTokens, totalTokens }.
58
- */
59
- function computeNearMiss(constraint, inputText) {
60
- const tokens = extractLiteralTokens(constraint.deny);
61
- if (tokens.length === 0) {
62
- return { isNearMiss: false, score: 0, matchedTokens: 0, totalTokens: 0 };
63
- }
64
-
65
- const lower = String(inputText).toLowerCase();
66
- let matched = 0;
67
- for (const token of tokens) {
68
- if (lower.includes(token)) matched++;
69
- }
70
-
71
- const score = matched / tokens.length;
72
- return {
73
- isNearMiss: score >= NEAR_MISS_THRESHOLD && score < 1.0,
74
- score: Math.round(score * 100) / 100,
75
- matchedTokens: matched,
76
- totalTokens: tokens.length,
77
- };
78
- }
79
-
80
- // ---------------------------------------------------------------------------
81
- // Trace Evaluation
82
- // ---------------------------------------------------------------------------
83
-
84
- /**
85
- * Build the combined input text used for near-miss detection.
86
- */
87
- function buildCombinedInput({ tool, command, content } = {}) {
88
- return [command, content, tool].filter(Boolean).join(' ');
89
- }
90
-
91
- /**
92
- * Evaluate specs with full trace: passes, blocks, and near-misses.
93
- */
94
- function traceEvaluation(specs, context = {}) {
95
- const traceId = `trace_${Date.now()}_${crypto.randomBytes(4).toString('hex')}`;
96
- const timestamp = new Date().toISOString();
97
- const combinedInput = buildCombinedInput(context);
98
- const results = [];
99
-
100
- for (const spec of specs) {
101
- const constraintResults = evaluateConstraints(spec, context);
102
- const invariantResults = evaluateInvariants(spec, context);
103
-
104
- // Annotate constraint results with near-miss info
105
- for (const cr of constraintResults) {
106
- const constraint = spec.constraints.find((c) => c.id === cr.constraintId);
107
- let nearMiss = { isNearMiss: false, score: 0, matchedTokens: 0, totalTokens: 0 };
108
-
109
- if (cr.passed && constraint) {
110
- // Only compute near-miss for constraints that passed (weren't blocked)
111
- nearMiss = computeNearMiss(constraint, combinedInput);
112
- }
113
-
114
- results.push({
115
- ...cr,
116
- nearMiss: nearMiss.isNearMiss,
117
- nearMissScore: nearMiss.score,
118
- nearMissDetail: nearMiss.isNearMiss ? nearMiss : null,
119
- });
120
- }
121
-
122
- // Invariant results (no near-miss concept for invariants)
123
- for (const ir of invariantResults) {
124
- results.push({
125
- ...ir,
126
- nearMiss: false,
127
- nearMissScore: 0,
128
- nearMissDetail: null,
129
- });
130
- }
131
- }
132
-
133
- const blocked = results.filter((r) => !r.passed);
134
- const nearMisses = results.filter((r) => r.nearMiss);
135
- const passed = results.filter((r) => r.passed && !r.nearMiss);
136
-
137
- return {
138
- traceId,
139
- timestamp,
140
- allowed: blocked.length === 0,
141
- results,
142
- blocked,
143
- nearMisses,
144
- passed,
145
- counts: {
146
- total: results.length,
147
- blocked: blocked.length,
148
- nearMiss: nearMisses.length,
149
- passed: passed.length,
150
- },
151
- context: {
152
- tool: context.tool || null,
153
- command: truncate(context.command, 200),
154
- action: truncate(context.action, 200),
155
- },
156
- };
157
- }
158
-
159
- // ---------------------------------------------------------------------------
160
- // Trace Persistence
161
- // ---------------------------------------------------------------------------
162
-
163
- function getTracePath({ feedbackDir } = {}) {
164
- const dir = feedbackDir || resolveFeedbackDir();
165
- return path.join(dir, TRACE_FILE);
166
- }
167
-
168
- function recordTrace(trace, options = {}) {
169
- const entry = {
170
- traceId: trace.traceId,
171
- timestamp: trace.timestamp,
172
- allowed: trace.allowed,
173
- counts: trace.counts,
174
- blocked: trace.blocked.map(summarizeResult),
175
- nearMisses: trace.nearMisses.map(summarizeResult),
176
- context: trace.context,
177
- };
178
- appendJsonl(getTracePath(options), entry);
179
- return entry;
180
- }
181
-
182
- function loadTraces(options = {}) {
183
- return readJsonl(getTracePath(options));
184
- }
185
-
186
- function summarizeResult(r) {
187
- return {
188
- specName: r.specName,
189
- id: r.constraintId || r.invariantId,
190
- type: r.type,
191
- reason: r.reason,
192
- severity: r.severity,
193
- nearMissScore: r.nearMissScore || 0,
194
- };
195
- }
196
-
197
- // ---------------------------------------------------------------------------
198
- // Session Trace Summary
199
- // ---------------------------------------------------------------------------
200
-
201
- /**
202
- * Summarize all traces from a session into a single-glance safety posture.
203
- */
204
- function summarizeSessionTraces(traces) {
205
- let totalEvaluations = traces.length;
206
- let totalChecks = 0;
207
- let totalBlocked = 0;
208
- let totalNearMisses = 0;
209
- let totalPassed = 0;
210
-
211
- const blocksBySpec = new Map();
212
- const blocksByConstraint = new Map();
213
- const nearMissByConstraint = new Map();
214
-
215
- for (const trace of traces) {
216
- const counts = trace.counts || {};
217
- totalChecks += counts.total || 0;
218
- totalBlocked += counts.blocked || 0;
219
- totalNearMisses += counts.nearMiss || 0;
220
- totalPassed += counts.passed || 0;
221
-
222
- for (const block of trace.blocked || []) {
223
- const specKey = block.specName || 'unknown';
224
- blocksBySpec.set(specKey, (blocksBySpec.get(specKey) || 0) + 1);
225
- const cKey = block.id || 'unknown';
226
- blocksByConstraint.set(cKey, (blocksByConstraint.get(cKey) || 0) + 1);
227
- }
228
-
229
- for (const nm of trace.nearMisses || []) {
230
- const cKey = nm.id || 'unknown';
231
- const existing = nearMissByConstraint.get(cKey) || { count: 0, maxScore: 0 };
232
- existing.count += 1;
233
- existing.maxScore = Math.max(existing.maxScore, nm.nearMissScore || 0);
234
- nearMissByConstraint.set(cKey, existing);
235
- }
236
- }
237
-
238
- return {
239
- totalEvaluations,
240
- totalChecks,
241
- totalBlocked,
242
- totalNearMisses,
243
- totalPassed,
244
- blockRate: totalChecks > 0 ? Math.round((totalBlocked / totalChecks) * 100) : 0,
245
- nearMissRate: totalChecks > 0 ? Math.round((totalNearMisses / totalChecks) * 100) : 0,
246
- safetyPosture: computeSafetyPosture(totalBlocked, totalNearMisses, totalChecks),
247
- topBlockedSpecs: mapToSorted(blocksBySpec, 'name', 'count'),
248
- topBlockedConstraints: mapToSorted(blocksByConstraint, 'id', 'count'),
249
- topNearMisses: Array.from(nearMissByConstraint.entries())
250
- .sort(([, a], [, b]) => b.count - a.count)
251
- .slice(0, 10)
252
- .map(([id, data]) => ({ id, count: data.count, maxScore: data.maxScore })),
253
- };
254
- }
255
-
256
- /**
257
- * Format a trace summary as human-readable text.
258
- */
259
- function formatTraceSummary(summary) {
260
- const lines = [];
261
- lines.push(`Safety Posture: ${summary.safetyPosture.toUpperCase()}`);
262
- lines.push(`Evaluations: ${summary.totalEvaluations} | Checks: ${summary.totalChecks}`);
263
- lines.push(`Blocked: ${summary.totalBlocked} (${summary.blockRate}%) | Near-Misses: ${summary.totalNearMisses} (${summary.nearMissRate}%) | Passed: ${summary.totalPassed}`);
264
-
265
- if (summary.topBlockedConstraints.length > 0) {
266
- lines.push('');
267
- lines.push('Top Blocked:');
268
- for (const c of summary.topBlockedConstraints) {
269
- lines.push(` - ${c.id}: ${c.count}x`);
270
- }
271
- }
272
-
273
- if (summary.topNearMisses.length > 0) {
274
- lines.push('');
275
- lines.push('Top Near-Misses:');
276
- for (const nm of summary.topNearMisses) {
277
- lines.push(` - ${nm.id}: ${nm.count}x (max score: ${nm.maxScore})`);
278
- }
279
- }
280
-
281
- return lines.join('\n');
282
- }
283
-
284
- // ---------------------------------------------------------------------------
285
- // Helpers
286
- // ---------------------------------------------------------------------------
287
-
288
- function computeSafetyPosture(blocked, nearMisses, total) {
289
- if (total === 0) return 'unknown';
290
- if (blocked > 0) return 'critical';
291
- if (nearMisses > 0) return 'cautious';
292
- return 'clean';
293
- }
294
-
295
- function mapToSorted(map, keyName, valueName) {
296
- return Array.from(map.entries())
297
- .sort(([, a], [, b]) => b - a)
298
- .slice(0, 10)
299
- .map(([k, v]) => ({ [keyName]: k, [valueName]: v }));
300
- }
301
-
302
- function truncate(value, maxLength) {
303
- if (value === undefined || value === null) return null;
304
- const text = String(value).trim();
305
- return text ? text.slice(0, maxLength) : null;
306
- }
307
-
308
- // ---------------------------------------------------------------------------
309
- // CLI
310
- // ---------------------------------------------------------------------------
311
-
312
- function isCliInvocation(argv = process.argv) {
313
- const invokedPath = argv[1];
314
- return invokedPath ? path.resolve(invokedPath) === __filename : false;
315
- }
316
-
317
- if (isCliInvocation()) {
318
- const command = process.argv[2] || 'summary';
319
-
320
- if (command === 'summary') {
321
- const traces = loadTraces();
322
- const summary = summarizeSessionTraces(traces);
323
- console.log(formatTraceSummary(summary));
324
- } else if (command === 'json') {
325
- const traces = loadTraces();
326
- const summary = summarizeSessionTraces(traces);
327
- console.log(JSON.stringify(summary, null, 2));
328
- } else if (command === 'eval') {
329
- // Evaluate current specs against a test command
330
- const testCommand = process.argv[3] || '';
331
- const specs = loadSpecDir();
332
- const trace = traceEvaluation(specs, { command: testCommand, action: testCommand });
333
- console.log(JSON.stringify({
334
- allowed: trace.allowed,
335
- counts: trace.counts,
336
- blocked: trace.blocked.map(summarizeResult),
337
- nearMisses: trace.nearMisses.map(summarizeResult),
338
- }, null, 2));
339
- } else {
340
- console.error(`Unknown command: ${command}. Use: summary, json, eval`);
341
- process.exit(1);
342
- }
343
- }
344
-
345
- module.exports = {
346
- NEAR_MISS_THRESHOLD,
347
- computeNearMiss,
348
- extractLiteralTokens,
349
- formatTraceSummary,
350
- loadTraces,
351
- recordTrace,
352
- summarizeSessionTraces,
353
- traceEvaluation,
354
- };