thumbgate 1.14.1 → 1.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/.claude-plugin/marketplace.json +6 -6
  2. package/.claude-plugin/plugin.json +3 -3
  3. package/.well-known/llms.txt +5 -5
  4. package/.well-known/mcp/server-card.json +1 -1
  5. package/README.md +60 -35
  6. package/adapters/chatgpt/openapi.yaml +118 -2
  7. package/adapters/claude/.mcp.json +2 -2
  8. package/adapters/mcp/server-stdio.js +217 -84
  9. package/adapters/opencode/opencode.json +1 -1
  10. package/bench/prompt-eval-suite.json +5 -1
  11. package/bin/cli.js +211 -8
  12. package/config/enforcement.json +59 -7
  13. package/config/evals/agent-safety-eval.json +338 -22
  14. package/config/gates/default.json +33 -0
  15. package/config/gates/routine.json +43 -0
  16. package/config/github-about.json +3 -3
  17. package/config/mcp-allowlists.json +4 -0
  18. package/config/merge-quality-checks.json +2 -1
  19. package/config/model-candidates.json +131 -0
  20. package/openapi/openapi.yaml +118 -2
  21. package/package.json +70 -51
  22. package/public/blog.html +7 -7
  23. package/public/codex-plugin.html +13 -7
  24. package/public/compare.html +29 -23
  25. package/public/dashboard.html +105 -12
  26. package/public/guide.html +28 -28
  27. package/public/index.html +233 -97
  28. package/public/learn.html +87 -20
  29. package/public/lessons.html +26 -2
  30. package/public/numbers.html +271 -0
  31. package/public/pro.html +89 -19
  32. package/scripts/agent-audit-trace.js +55 -0
  33. package/scripts/agent-memory-lifecycle.js +96 -0
  34. package/scripts/agent-readiness-plan.js +118 -0
  35. package/scripts/agentic-data-pipeline.js +21 -1
  36. package/scripts/agents-sdk-sandbox-plan.js +57 -0
  37. package/scripts/ai-org-governance.js +98 -0
  38. package/scripts/ai-search-distribution.js +43 -0
  39. package/scripts/artifact-agent-plan.js +81 -0
  40. package/scripts/billing.js +27 -8
  41. package/scripts/cli-feedback.js +2 -1
  42. package/scripts/cli-schema.js +60 -5
  43. package/scripts/code-mode-mcp-plan.js +71 -0
  44. package/scripts/commercial-offer.js +1 -1
  45. package/scripts/context-engine.js +1 -2
  46. package/scripts/context-manager.js +4 -1
  47. package/scripts/contextfs.js +214 -32
  48. package/scripts/dashboard-render-spec.js +1 -1
  49. package/scripts/dashboard.js +275 -9
  50. package/scripts/decision-journal.js +13 -3
  51. package/scripts/document-workflow-governance.js +62 -0
  52. package/scripts/enterprise-agent-rollout.js +34 -0
  53. package/scripts/experience-replay-governance.js +69 -0
  54. package/scripts/export-hf-dataset.js +1 -1
  55. package/scripts/feedback-loop.js +141 -9
  56. package/scripts/feedback-to-rules.js +17 -23
  57. package/scripts/gates-engine.js +4 -6
  58. package/scripts/growth-campaigns.js +49 -0
  59. package/scripts/harness-selector.js +145 -1
  60. package/scripts/hybrid-supervisor-agent.js +64 -0
  61. package/scripts/inference-cache-policy.js +72 -0
  62. package/scripts/inference-economics.js +53 -0
  63. package/scripts/internal-agent-bootstrap.js +12 -2
  64. package/scripts/knowledge-layer-plan.js +108 -0
  65. package/scripts/lesson-canonical.js +181 -0
  66. package/scripts/lesson-db.js +71 -10
  67. package/scripts/lesson-inference.js +183 -44
  68. package/scripts/lesson-search.js +4 -1
  69. package/scripts/lesson-synthesis.js +23 -2
  70. package/scripts/llm-client.js +157 -26
  71. package/scripts/mailer/resend-mailer.js +112 -1
  72. package/scripts/mcp-transport-strategy.js +66 -0
  73. package/scripts/memory-store-governance.js +60 -0
  74. package/scripts/meta-agent-loop.js +7 -13
  75. package/scripts/model-access-eligibility.js +38 -0
  76. package/scripts/model-migration-readiness.js +55 -0
  77. package/scripts/native-messaging-audit.js +514 -0
  78. package/scripts/operational-integrity.js +96 -3
  79. package/scripts/otel-declarative-config.js +56 -0
  80. package/scripts/perplexity-client.js +1 -1
  81. package/scripts/post-training-governance.js +34 -0
  82. package/scripts/pr-manager.js +47 -7
  83. package/scripts/private-core-boundary.js +72 -0
  84. package/scripts/production-agent-readiness.js +40 -0
  85. package/scripts/profile-router.js +16 -1
  86. package/scripts/prompt-eval.js +564 -32
  87. package/scripts/prompt-programs.js +93 -0
  88. package/scripts/provider-action-normalizer.js +585 -0
  89. package/scripts/rule-validator.js +285 -0
  90. package/scripts/scaling-law-claims.js +60 -0
  91. package/scripts/security-scanner.js +1 -1
  92. package/scripts/self-distill-agent.js +7 -32
  93. package/scripts/seo-gsd.js +400 -43
  94. package/scripts/skill-rag-router.js +53 -0
  95. package/scripts/spec-gate.js +1 -1
  96. package/scripts/student-consistent-training.js +73 -0
  97. package/scripts/synthetic-data-provenance.js +98 -0
  98. package/scripts/task-context-result.js +81 -0
  99. package/scripts/telemetry-analytics.js +149 -0
  100. package/scripts/thompson-sampling.js +2 -2
  101. package/scripts/token-savings.js +7 -6
  102. package/scripts/token-tco.js +46 -0
  103. package/scripts/tool-registry.js +75 -3
  104. package/scripts/verification-loop.js +10 -1
  105. package/scripts/verifier-scoring.js +71 -0
  106. package/scripts/workflow-sentinel.js +284 -28
  107. package/scripts/workspace-agent-routines.js +118 -0
  108. package/skills/thumbgate/SKILL.md +1 -1
  109. package/src/api/server.js +434 -120
  110. package/.claude-plugin/README.md +0 -170
  111. package/adapters/README.md +0 -12
  112. package/scripts/analytics-report.js +0 -328
  113. package/scripts/autonomous-workflow.js +0 -377
  114. package/scripts/billing-setup.js +0 -109
  115. package/scripts/creator-campaigns.js +0 -239
  116. package/scripts/cross-encoder-reranker.js +0 -235
  117. package/scripts/daemon-manager.js +0 -108
  118. package/scripts/decision-trace.js +0 -354
  119. package/scripts/delegation-runtime.js +0 -896
  120. package/scripts/dispatch-brief.js +0 -159
  121. package/scripts/distribution-surfaces.js +0 -110
  122. package/scripts/feedback-history-distiller.js +0 -382
  123. package/scripts/funnel-analytics.js +0 -35
  124. package/scripts/history-distiller.js +0 -200
  125. package/scripts/hosted-job-launcher.js +0 -256
  126. package/scripts/intent-router.js +0 -392
  127. package/scripts/lesson-reranker.js +0 -263
  128. package/scripts/lesson-retrieval.js +0 -148
  129. package/scripts/managed-lesson-agent.js +0 -183
  130. package/scripts/operational-dashboard.js +0 -103
  131. package/scripts/operational-summary.js +0 -129
  132. package/scripts/operator-artifacts.js +0 -608
  133. package/scripts/optimize-context.js +0 -17
  134. package/scripts/org-dashboard.js +0 -206
  135. package/scripts/partner-orchestration.js +0 -146
  136. package/scripts/predictive-insights.js +0 -356
  137. package/scripts/pulse.js +0 -80
  138. package/scripts/reflector-agent.js +0 -221
  139. package/scripts/sales-pipeline.js +0 -681
  140. package/scripts/session-episode-store.js +0 -329
  141. package/scripts/session-health-sensor.js +0 -242
  142. package/scripts/session-report.js +0 -120
  143. package/scripts/swarm-coordinator.js +0 -81
  144. package/scripts/tool-kpi-tracker.js +0 -12
  145. package/scripts/webhook-delivery.js +0 -62
  146. package/scripts/workflow-sprint-intake.js +0 -475
  147. package/skills/agent-memory/SKILL.md +0 -97
  148. package/skills/solve-architecture-autonomy/SKILL.md +0 -17
  149. package/skills/solve-architecture-autonomy/tool.js +0 -33
  150. package/skills/thumbgate-feedback/SKILL.md +0 -49
@@ -1,354 +0,0 @@
1
- #!/usr/bin/env node
2
- 'use strict';
3
-
4
- /**
5
- * Decision Trace — full observability for gate evaluations.
6
- *
7
- * Inspired by Ethan Mollick's observation that operators need to *see* what
8
- * the agent was thinking when it made a decision. ThumbGate already captures
9
- * what was blocked; Decision Trace adds:
10
- *
11
- * 1. Full audit of every evaluation (passes, blocks, AND near-misses)
12
- * 2. Near-miss detection: constraints that almost matched
13
- * 3. Session trace summaries: single-glance safety posture view
14
- *
15
- * Near-miss heuristic: extract literal tokens from a regex deny pattern,
16
- * count how many appear in the input. If >50% match but the full regex
17
- * doesn't, flag as near-miss.
18
- */
19
-
20
- const crypto = require('node:crypto');
21
- const path = require('node:path');
22
- const { readJsonl, appendJsonl } = require('./fs-utils');
23
- const { resolveFeedbackDir } = require('./feedback-paths');
24
- const {
25
- evaluateConstraints,
26
- evaluateInvariants,
27
- loadSpecDir,
28
- } = require('./spec-gate');
29
-
30
- const TRACE_FILE = 'decision-trace.jsonl';
31
- const NEAR_MISS_THRESHOLD = 0.5;
32
-
33
- // ---------------------------------------------------------------------------
34
- // Near-Miss Detection
35
- // ---------------------------------------------------------------------------
36
-
37
- /**
38
- * Extract literal tokens from a regex pattern.
39
- * Strips metacharacters and splits on boundaries to find human-readable tokens.
40
- */
41
- function extractLiteralTokens(pattern) {
42
- // Remove common regex metacharacters and quantifiers
43
- const cleaned = pattern
44
- .replace(/\\[sdwbSDWB]/g, ' ') // char classes
45
- .replace(/[.*+?^${}()|[\]\\]/g, ' ') // metacharacters
46
- .replace(/\s+/g, ' ')
47
- .trim();
48
-
49
- return cleaned
50
- .split(/\s+/)
51
- .filter((t) => t.length >= 2) // ignore single chars
52
- .map((t) => t.toLowerCase());
53
- }
54
-
55
- /**
56
- * Compute near-miss score for a constraint against input text.
57
- * Returns { isNearMiss, score, matchedTokens, totalTokens }.
58
- */
59
- function computeNearMiss(constraint, inputText) {
60
- const tokens = extractLiteralTokens(constraint.deny);
61
- if (tokens.length === 0) {
62
- return { isNearMiss: false, score: 0, matchedTokens: 0, totalTokens: 0 };
63
- }
64
-
65
- const lower = String(inputText).toLowerCase();
66
- let matched = 0;
67
- for (const token of tokens) {
68
- if (lower.includes(token)) matched++;
69
- }
70
-
71
- const score = matched / tokens.length;
72
- return {
73
- isNearMiss: score >= NEAR_MISS_THRESHOLD && score < 1.0,
74
- score: Math.round(score * 100) / 100,
75
- matchedTokens: matched,
76
- totalTokens: tokens.length,
77
- };
78
- }
79
-
80
- // ---------------------------------------------------------------------------
81
- // Trace Evaluation
82
- // ---------------------------------------------------------------------------
83
-
84
- /**
85
- * Build the combined input text used for near-miss detection.
86
- */
87
- function buildCombinedInput({ tool, command, content } = {}) {
88
- return [command, content, tool].filter(Boolean).join(' ');
89
- }
90
-
91
- /**
92
- * Evaluate specs with full trace: passes, blocks, and near-misses.
93
- */
94
- function traceEvaluation(specs, context = {}) {
95
- const traceId = `trace_${Date.now()}_${crypto.randomBytes(4).toString('hex')}`;
96
- const timestamp = new Date().toISOString();
97
- const combinedInput = buildCombinedInput(context);
98
- const results = [];
99
-
100
- for (const spec of specs) {
101
- const constraintResults = evaluateConstraints(spec, context);
102
- const invariantResults = evaluateInvariants(spec, context);
103
-
104
- // Annotate constraint results with near-miss info
105
- for (const cr of constraintResults) {
106
- const constraint = spec.constraints.find((c) => c.id === cr.constraintId);
107
- let nearMiss = { isNearMiss: false, score: 0, matchedTokens: 0, totalTokens: 0 };
108
-
109
- if (cr.passed && constraint) {
110
- // Only compute near-miss for constraints that passed (weren't blocked)
111
- nearMiss = computeNearMiss(constraint, combinedInput);
112
- }
113
-
114
- results.push({
115
- ...cr,
116
- nearMiss: nearMiss.isNearMiss,
117
- nearMissScore: nearMiss.score,
118
- nearMissDetail: nearMiss.isNearMiss ? nearMiss : null,
119
- });
120
- }
121
-
122
- // Invariant results (no near-miss concept for invariants)
123
- for (const ir of invariantResults) {
124
- results.push({
125
- ...ir,
126
- nearMiss: false,
127
- nearMissScore: 0,
128
- nearMissDetail: null,
129
- });
130
- }
131
- }
132
-
133
- const blocked = results.filter((r) => !r.passed);
134
- const nearMisses = results.filter((r) => r.nearMiss);
135
- const passed = results.filter((r) => r.passed && !r.nearMiss);
136
-
137
- return {
138
- traceId,
139
- timestamp,
140
- allowed: blocked.length === 0,
141
- results,
142
- blocked,
143
- nearMisses,
144
- passed,
145
- counts: {
146
- total: results.length,
147
- blocked: blocked.length,
148
- nearMiss: nearMisses.length,
149
- passed: passed.length,
150
- },
151
- context: {
152
- tool: context.tool || null,
153
- command: truncate(context.command, 200),
154
- action: truncate(context.action, 200),
155
- },
156
- };
157
- }
158
-
159
- // ---------------------------------------------------------------------------
160
- // Trace Persistence
161
- // ---------------------------------------------------------------------------
162
-
163
- function getTracePath({ feedbackDir } = {}) {
164
- const dir = feedbackDir || resolveFeedbackDir();
165
- return path.join(dir, TRACE_FILE);
166
- }
167
-
168
- function recordTrace(trace, options = {}) {
169
- const entry = {
170
- traceId: trace.traceId,
171
- timestamp: trace.timestamp,
172
- allowed: trace.allowed,
173
- counts: trace.counts,
174
- blocked: trace.blocked.map(summarizeResult),
175
- nearMisses: trace.nearMisses.map(summarizeResult),
176
- context: trace.context,
177
- };
178
- appendJsonl(getTracePath(options), entry);
179
- return entry;
180
- }
181
-
182
- function loadTraces(options = {}) {
183
- return readJsonl(getTracePath(options));
184
- }
185
-
186
- function summarizeResult(r) {
187
- return {
188
- specName: r.specName,
189
- id: r.constraintId || r.invariantId,
190
- type: r.type,
191
- reason: r.reason,
192
- severity: r.severity,
193
- nearMissScore: r.nearMissScore || 0,
194
- };
195
- }
196
-
197
- // ---------------------------------------------------------------------------
198
- // Session Trace Summary
199
- // ---------------------------------------------------------------------------
200
-
201
- /**
202
- * Summarize all traces from a session into a single-glance safety posture.
203
- */
204
- function summarizeSessionTraces(traces) {
205
- let totalEvaluations = traces.length;
206
- let totalChecks = 0;
207
- let totalBlocked = 0;
208
- let totalNearMisses = 0;
209
- let totalPassed = 0;
210
-
211
- const blocksBySpec = new Map();
212
- const blocksByConstraint = new Map();
213
- const nearMissByConstraint = new Map();
214
-
215
- for (const trace of traces) {
216
- const counts = trace.counts || {};
217
- totalChecks += counts.total || 0;
218
- totalBlocked += counts.blocked || 0;
219
- totalNearMisses += counts.nearMiss || 0;
220
- totalPassed += counts.passed || 0;
221
-
222
- for (const block of trace.blocked || []) {
223
- const specKey = block.specName || 'unknown';
224
- blocksBySpec.set(specKey, (blocksBySpec.get(specKey) || 0) + 1);
225
- const cKey = block.id || 'unknown';
226
- blocksByConstraint.set(cKey, (blocksByConstraint.get(cKey) || 0) + 1);
227
- }
228
-
229
- for (const nm of trace.nearMisses || []) {
230
- const cKey = nm.id || 'unknown';
231
- const existing = nearMissByConstraint.get(cKey) || { count: 0, maxScore: 0 };
232
- existing.count += 1;
233
- existing.maxScore = Math.max(existing.maxScore, nm.nearMissScore || 0);
234
- nearMissByConstraint.set(cKey, existing);
235
- }
236
- }
237
-
238
- return {
239
- totalEvaluations,
240
- totalChecks,
241
- totalBlocked,
242
- totalNearMisses,
243
- totalPassed,
244
- blockRate: totalChecks > 0 ? Math.round((totalBlocked / totalChecks) * 100) : 0,
245
- nearMissRate: totalChecks > 0 ? Math.round((totalNearMisses / totalChecks) * 100) : 0,
246
- safetyPosture: computeSafetyPosture(totalBlocked, totalNearMisses, totalChecks),
247
- topBlockedSpecs: mapToSorted(blocksBySpec, 'name', 'count'),
248
- topBlockedConstraints: mapToSorted(blocksByConstraint, 'id', 'count'),
249
- topNearMisses: Array.from(nearMissByConstraint.entries())
250
- .sort(([, a], [, b]) => b.count - a.count)
251
- .slice(0, 10)
252
- .map(([id, data]) => ({ id, count: data.count, maxScore: data.maxScore })),
253
- };
254
- }
255
-
256
- /**
257
- * Format a trace summary as human-readable text.
258
- */
259
- function formatTraceSummary(summary) {
260
- const lines = [];
261
- lines.push(`Safety Posture: ${summary.safetyPosture.toUpperCase()}`);
262
- lines.push(`Evaluations: ${summary.totalEvaluations} | Checks: ${summary.totalChecks}`);
263
- lines.push(`Blocked: ${summary.totalBlocked} (${summary.blockRate}%) | Near-Misses: ${summary.totalNearMisses} (${summary.nearMissRate}%) | Passed: ${summary.totalPassed}`);
264
-
265
- if (summary.topBlockedConstraints.length > 0) {
266
- lines.push('');
267
- lines.push('Top Blocked:');
268
- for (const c of summary.topBlockedConstraints) {
269
- lines.push(` - ${c.id}: ${c.count}x`);
270
- }
271
- }
272
-
273
- if (summary.topNearMisses.length > 0) {
274
- lines.push('');
275
- lines.push('Top Near-Misses:');
276
- for (const nm of summary.topNearMisses) {
277
- lines.push(` - ${nm.id}: ${nm.count}x (max score: ${nm.maxScore})`);
278
- }
279
- }
280
-
281
- return lines.join('\n');
282
- }
283
-
284
- // ---------------------------------------------------------------------------
285
- // Helpers
286
- // ---------------------------------------------------------------------------
287
-
288
- function computeSafetyPosture(blocked, nearMisses, total) {
289
- if (total === 0) return 'unknown';
290
- if (blocked > 0) return 'critical';
291
- if (nearMisses > 0) return 'cautious';
292
- return 'clean';
293
- }
294
-
295
- function mapToSorted(map, keyName, valueName) {
296
- return Array.from(map.entries())
297
- .sort(([, a], [, b]) => b - a)
298
- .slice(0, 10)
299
- .map(([k, v]) => ({ [keyName]: k, [valueName]: v }));
300
- }
301
-
302
- function truncate(value, maxLength) {
303
- if (value === undefined || value === null) return null;
304
- const text = String(value).trim();
305
- return text ? text.slice(0, maxLength) : null;
306
- }
307
-
308
- // ---------------------------------------------------------------------------
309
- // CLI
310
- // ---------------------------------------------------------------------------
311
-
312
- function isCliInvocation(argv = process.argv) {
313
- const invokedPath = argv[1];
314
- return invokedPath ? path.resolve(invokedPath) === __filename : false;
315
- }
316
-
317
- if (isCliInvocation()) {
318
- const command = process.argv[2] || 'summary';
319
-
320
- if (command === 'summary') {
321
- const traces = loadTraces();
322
- const summary = summarizeSessionTraces(traces);
323
- console.log(formatTraceSummary(summary));
324
- } else if (command === 'json') {
325
- const traces = loadTraces();
326
- const summary = summarizeSessionTraces(traces);
327
- console.log(JSON.stringify(summary, null, 2));
328
- } else if (command === 'eval') {
329
- // Evaluate current specs against a test command
330
- const testCommand = process.argv[3] || '';
331
- const specs = loadSpecDir();
332
- const trace = traceEvaluation(specs, { command: testCommand, action: testCommand });
333
- console.log(JSON.stringify({
334
- allowed: trace.allowed,
335
- counts: trace.counts,
336
- blocked: trace.blocked.map(summarizeResult),
337
- nearMisses: trace.nearMisses.map(summarizeResult),
338
- }, null, 2));
339
- } else {
340
- console.error(`Unknown command: ${command}. Use: summary, json, eval`);
341
- process.exit(1);
342
- }
343
- }
344
-
345
- module.exports = {
346
- NEAR_MISS_THRESHOLD,
347
- computeNearMiss,
348
- extractLiteralTokens,
349
- formatTraceSummary,
350
- loadTraces,
351
- recordTrace,
352
- summarizeSessionTraces,
353
- traceEvaluation,
354
- };