thumbgate 1.16.12 → 1.16.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.well-known/mcp/server-card.json +1 -1
  4. package/README.md +3 -1
  5. package/adapters/claude/.mcp.json +2 -2
  6. package/adapters/mcp/server-stdio.js +26 -1
  7. package/adapters/opencode/opencode.json +1 -1
  8. package/bin/cli.js +420 -1
  9. package/config/gate-templates.json +372 -0
  10. package/config/mcp-allowlists.json +25 -0
  11. package/config/model-candidates.json +59 -2
  12. package/config/model-tiers.json +4 -1
  13. package/package.json +79 -22
  14. package/public/compare.html +6 -0
  15. package/public/index.html +144 -11
  16. package/public/numbers.html +11 -11
  17. package/public/pro.html +22 -24
  18. package/scripts/agent-design-governance.js +211 -0
  19. package/scripts/agent-reasoning-traces.js +683 -0
  20. package/scripts/agent-reward-model.js +438 -0
  21. package/scripts/agent-stack-survival-audit.js +231 -0
  22. package/scripts/ai-engineering-stack-guardrails.js +256 -0
  23. package/scripts/billing.js +16 -4
  24. package/scripts/chatgpt-ads-readiness-pack.js +195 -0
  25. package/scripts/cli-schema.js +277 -0
  26. package/scripts/code-graph-guardrails.js +176 -0
  27. package/scripts/deepseek-v4-runtime-guardrails.js +253 -0
  28. package/scripts/gemini-embedding-policy.js +198 -0
  29. package/scripts/inference-cache-policy.js +39 -0
  30. package/scripts/judge-reward-function.js +396 -0
  31. package/scripts/llm-behavior-monitor.js +251 -0
  32. package/scripts/long-running-agent-context-guardrails.js +176 -0
  33. package/scripts/multimodal-retrieval-plan.js +31 -11
  34. package/scripts/oss-pr-opportunity-scout.js +240 -0
  35. package/scripts/proactive-agent-eval-guardrails.js +230 -0
  36. package/scripts/profile-router.js +5 -4
  37. package/scripts/prompting-operating-system.js +273 -0
  38. package/scripts/proxy-pointer-rag-guardrails.js +189 -0
  39. package/scripts/rag-precision-guardrails.js +202 -0
  40. package/scripts/rate-limiter.js +1 -1
  41. package/scripts/reasoning-efficiency-guardrails.js +176 -0
  42. package/scripts/reward-hacking-guardrails.js +251 -0
  43. package/scripts/seo-gsd.js +1201 -11
  44. package/scripts/single-use-credential-gate.js +182 -0
  45. package/scripts/structured-prompt-driven.js +226 -0
  46. package/scripts/telemetry-analytics.js +31 -6
  47. package/scripts/tool-registry.js +92 -0
  48. package/scripts/upstream-contribution-engine.js +379 -0
  49. package/scripts/vector-store.js +119 -4
  50. package/src/api/server.js +333 -100
  51. package/scripts/agents-sdk-sandbox-plan.js +0 -57
  52. package/scripts/ai-org-governance.js +0 -98
  53. package/scripts/artifact-agent-plan.js +0 -81
  54. package/scripts/enterprise-agent-rollout.js +0 -34
  55. package/scripts/experience-replay-governance.js +0 -69
  56. package/scripts/inference-economics.js +0 -53
  57. package/scripts/knowledge-layer-plan.js +0 -108
  58. package/scripts/memory-store-governance.js +0 -60
  59. package/scripts/post-training-governance.js +0 -34
  60. package/scripts/production-agent-readiness.js +0 -40
  61. package/scripts/scaling-law-claims.js +0 -60
  62. package/scripts/student-consistent-training.js +0 -73
@@ -0,0 +1,182 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /**
5
+ * Single-Use Credential Gate
6
+ *
7
+ * Converts the Link CLI pattern into local ThumbGate policy: risky agent
8
+ * actions should request narrow, one-time credentials with synchronous
9
+ * approval instead of reusing long-lived secrets.
10
+ */
11
+
12
+ const crypto = require('node:crypto');
13
+ const path = require('node:path');
14
+
15
+ const DEFAULT_TTL_SECONDS = 300;
16
+ const RISK_PATTERNS = [
17
+ { tag: 'purchase', pattern: /\b(buy|buys|buying|purchase|purchases|checkout|payment|gumroad|stripe|card)\b/i },
18
+ { tag: 'credential', pattern: /\b(token|secret|credential|api[_-]?key|oauth|login)\b/i },
19
+ { tag: 'deploy', pattern: /\b(deploy|production|railway|release)\b/i },
20
+ { tag: 'external-write', pattern: /\b(post|reply|send|email|upload|publish|create order)\b/i },
21
+ ];
22
+
23
+ function planSingleUseCredentialRequest(action = {}, options = {}) {
24
+ const text = buildActionText(action);
25
+ const riskTags = RISK_PATTERNS.filter((item) => item.pattern.test(text)).map((item) => item.tag);
26
+ const highRisk = riskTags.length > 0 || Boolean(action.requiresCredential);
27
+ const scope = normalizeScope(action.scope || inferScope(text));
28
+ const ttlSeconds = clamp(Number(action.ttlSeconds || options.ttlSeconds || DEFAULT_TTL_SECONDS), 30, 900);
29
+
30
+ return {
31
+ required: highRisk,
32
+ riskTags,
33
+ scope,
34
+ ttlSeconds,
35
+ singleUse: true,
36
+ approvalMode: highRisk ? 'synchronous' : 'not-required',
37
+ approvalPrompt: highRisk
38
+ ? `Approve one-time credential for ${scope.resource} (${scope.operation})? Expires in ${ttlSeconds}s and cannot be reused.`
39
+ : 'No credential approval required.',
40
+ deniedReasons: buildDeniedReasons(action, scope),
41
+ };
42
+ }
43
+
44
+ function mintCredentialGrant(request = {}, approval = {}) {
45
+ const approved = Boolean(approval.approved);
46
+ return {
47
+ grantId: `cred_${Date.now()}_${crypto.randomBytes(4).toString('hex')}`,
48
+ approved,
49
+ singleUse: request.singleUse !== false,
50
+ scope: normalizeScope(request.scope),
51
+ issuedAt: new Date().toISOString(),
52
+ expiresAt: new Date(Date.now() + (Number(request.ttlSeconds || DEFAULT_TTL_SECONDS) * 1000)).toISOString(),
53
+ approvedBy: approval.approvedBy || null,
54
+ approvalEvidence: approval.evidence || null,
55
+ usedAt: null,
56
+ };
57
+ }
58
+
59
+ function evaluateCredentialUse(grant = {}, action = {}, now = new Date()) {
60
+ const reasons = [];
61
+ if (!grant.approved) reasons.push('credential_not_approved');
62
+ if (!grant.singleUse) reasons.push('credential_not_single_use');
63
+ if (grant.usedAt) reasons.push('credential_already_used');
64
+ if (grant.expiresAt && new Date(grant.expiresAt).getTime() < now.getTime()) reasons.push('credential_expired');
65
+
66
+ const actionScope = normalizeScope(action.scope || inferScope(buildActionText(action)));
67
+ const grantScope = normalizeScope(grant.scope);
68
+ if (!scopeAllows(grantScope, actionScope)) reasons.push('credential_scope_mismatch');
69
+
70
+ return {
71
+ allowed: reasons.length === 0,
72
+ reasons,
73
+ grantId: grant.grantId || null,
74
+ requiredScope: actionScope,
75
+ grantedScope: grantScope,
76
+ };
77
+ }
78
+
79
+ function markCredentialUsed(grant = {}, now = new Date()) {
80
+ return {
81
+ ...grant,
82
+ usedAt: now.toISOString(),
83
+ };
84
+ }
85
+
86
+ function buildActionText(action = {}) {
87
+ return [
88
+ action.command,
89
+ action.intent,
90
+ action.description,
91
+ action.url,
92
+ ...(action.tags || []),
93
+ ].filter(Boolean).join(' ');
94
+ }
95
+
96
+ function inferScope(text = '') {
97
+ if (/\b(stripe|checkout|payment|card)\b/i.test(text)) return { resource: 'payments', operation: 'write' };
98
+ if (/\b(gumroad|buy|buys|buying|purchase|purchases)\b/i.test(text)) return { resource: 'purchase', operation: 'create' };
99
+ if (/\b(deploy|railway|production)\b/i.test(text)) return { resource: 'deployment', operation: 'write' };
100
+ if (/\b(post|reply|email|send|publish)\b/i.test(text)) return { resource: 'external-message', operation: 'send' };
101
+ return { resource: 'local', operation: 'read' };
102
+ }
103
+
104
+ function normalizeScope(scope = {}) {
105
+ if (typeof scope === 'string') {
106
+ const [resource, operation = 'use'] = scope.split(':');
107
+ return { resource: resource || 'local', operation };
108
+ }
109
+ return {
110
+ resource: String(scope.resource || 'local'),
111
+ operation: String(scope.operation || 'read'),
112
+ };
113
+ }
114
+
115
+ function scopeAllows(granted, required) {
116
+ if (granted.resource === '*') return true;
117
+ if (granted.resource !== required.resource) return false;
118
+ return granted.operation === '*' || granted.operation === required.operation;
119
+ }
120
+
121
+ function buildDeniedReasons(action, scope) {
122
+ const reasons = [];
123
+ if (action.persistent === true) reasons.push('persistent_credentials_not_allowed');
124
+ if (scope.resource === '*' || scope.operation === '*') reasons.push('credential_scope_too_broad');
125
+ return reasons;
126
+ }
127
+
128
+ function clamp(value, min, max) {
129
+ if (!Number.isFinite(value)) return min;
130
+ return Math.min(max, Math.max(min, value));
131
+ }
132
+
133
+ function formatCredentialPlan(plan = {}) {
134
+ return [
135
+ '# Single-Use Credential Plan',
136
+ '',
137
+ `Required: ${plan.required ? 'yes' : 'no'}`,
138
+ `Approval mode: ${plan.approvalMode}`,
139
+ `Scope: ${plan.scope?.resource}:${plan.scope?.operation}`,
140
+ `TTL seconds: ${plan.ttlSeconds}`,
141
+ `Denied reasons: ${(plan.deniedReasons || []).join(', ') || 'none'}`,
142
+ '',
143
+ plan.approvalPrompt || '',
144
+ '',
145
+ ].join('\n');
146
+ }
147
+
148
+ function parseArgs(argv = process.argv.slice(2)) {
149
+ const args = { command: argv[0] || 'plan', intent: '' };
150
+ for (const arg of argv.slice(1)) {
151
+ if (arg.startsWith('--intent=')) args.intent = arg.slice('--intent='.length);
152
+ if (arg.startsWith('--action=')) args.intent = arg.slice('--action='.length);
153
+ if (arg.startsWith('--description=')) args.description = arg.slice('--description='.length);
154
+ if (arg.startsWith('--scope=')) args.scope = arg.slice('--scope='.length);
155
+ }
156
+ return args;
157
+ }
158
+
159
+ function isCliInvocation(argv = process.argv) {
160
+ return Boolean(argv[1] && path.resolve(argv[1]) === __filename);
161
+ }
162
+
163
+ if (isCliInvocation()) {
164
+ const args = parseArgs();
165
+ const plan = planSingleUseCredentialRequest(args);
166
+ if (args.command === 'json') {
167
+ console.log(JSON.stringify(plan, null, 2));
168
+ } else if (args.command === 'plan') {
169
+ console.log(formatCredentialPlan(plan));
170
+ } else {
171
+ console.error(`Unknown command: ${args.command}. Use: plan, json`);
172
+ process.exit(1);
173
+ }
174
+ }
175
+
176
+ module.exports = {
177
+ evaluateCredentialUse,
178
+ formatCredentialPlan,
179
+ markCredentialUsed,
180
+ mintCredentialGrant,
181
+ planSingleUseCredentialRequest,
182
+ };
@@ -0,0 +1,226 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /**
5
+ * Structured Prompt-Driven Development (SPDD) Gate
6
+ *
7
+ * Makes code-generation prompts governable artifacts by requiring a compact
8
+ * REASONS canvas before risky implementation work proceeds.
9
+ */
10
+
11
+ const path = require('node:path');
12
+
13
+ const FIELD_DEFINITIONS = [
14
+ ['requirements', 'Problem, business value, scope, and definition of done.'],
15
+ ['entities', 'Domain nouns, relationships, and data contracts.'],
16
+ ['approach', 'Strategy for satisfying the requirements.'],
17
+ ['structure', 'Files, modules, dependencies, and integration boundaries.'],
18
+ ['operations', 'Concrete, testable implementation steps.'],
19
+ ['norms', 'Reusable engineering standards and team conventions.'],
20
+ ['safeguards', 'Non-negotiable constraints, risks, and verification gates.'],
21
+ ];
22
+
23
+ const FIELD_KEYS = FIELD_DEFINITIONS.map(([key]) => key);
24
+
25
+ function buildReasonsCanvas(input = {}) {
26
+ const source = typeof input === 'string' ? { request: input } : input;
27
+ const request = String(source.request || source.story || source.task || '').trim();
28
+ const canvas = {};
29
+
30
+ for (const key of FIELD_KEYS) {
31
+ canvas[key] = normalizeList(source[key]);
32
+ }
33
+
34
+ if (request && canvas.requirements.length === 0) {
35
+ canvas.requirements.push(request);
36
+ }
37
+ if (source.acceptanceCriteria) {
38
+ canvas.requirements.push(...normalizeList(source.acceptanceCriteria));
39
+ }
40
+ if (source.files || source.changedFiles) {
41
+ canvas.structure.push(...normalizeList(source.files || source.changedFiles));
42
+ }
43
+ if (source.tests || source.verification) {
44
+ canvas.safeguards.push(...normalizeList(source.tests || source.verification).map((item) => `Verification: ${item}`));
45
+ }
46
+ if (canvas.norms.length === 0) {
47
+ canvas.norms.push('Keep prompt, code, and tests synchronized in version control.');
48
+ }
49
+ if (canvas.safeguards.length === 0) {
50
+ canvas.safeguards.push('Do not claim completion without passing verification evidence.');
51
+ }
52
+
53
+ return {
54
+ title: source.title || inferTitle(request),
55
+ canvas,
56
+ source: {
57
+ request,
58
+ artifactPath: source.artifactPath || 'docs/prompts/<feature>.reasons.md',
59
+ },
60
+ };
61
+ }
62
+
63
+ function evaluateReasonsCanvas(document = {}, options = {}) {
64
+ const canvas = document.canvas || document;
65
+ const gates = [];
66
+ const missing = FIELD_KEYS.filter((key) => normalizeList(canvas[key]).length === 0);
67
+
68
+ for (const key of missing) {
69
+ gates.push({
70
+ id: `missing-${key}`,
71
+ severity: key === 'requirements' || key === 'safeguards' ? 'block' : 'warn',
72
+ reason: `${labelFor(key)} is empty; the agent lacks a governed ${key} boundary.`,
73
+ });
74
+ }
75
+
76
+ const operations = normalizeList(canvas.operations);
77
+ if (operations.length > 0 && !operations.some(isTestableOperation)) {
78
+ gates.push({
79
+ id: 'operations-not-testable',
80
+ severity: 'block',
81
+ reason: 'At least one operation must be concrete and testable before code generation.',
82
+ });
83
+ }
84
+
85
+ const safeguards = normalizeList(canvas.safeguards).join('\n');
86
+ if (!/\b(tests?|verify|verification|evidence|gate|security|privacy|rollback|performance)\b/i.test(safeguards)) {
87
+ gates.push({
88
+ id: 'safeguards-without-verification',
89
+ severity: 'block',
90
+ reason: 'Safeguards must name verification, evidence, or non-negotiable risk controls.',
91
+ });
92
+ }
93
+
94
+ const changedFiles = normalizeList(options.changedFiles);
95
+ const structure = normalizeList(canvas.structure).join('\n');
96
+ if (changedFiles.length > 0 && !changedFiles.some((file) => structure.includes(file) || structure.includes(path.basename(file)))) {
97
+ gates.push({
98
+ id: 'code-prompt-drift',
99
+ severity: 'warn',
100
+ reason: 'Changed files are not represented in the prompt structure; sync the canvas before review.',
101
+ });
102
+ }
103
+
104
+ const hardBlocks = gates.filter((gate) => gate.severity === 'block');
105
+ const warnings = gates.filter((gate) => gate.severity === 'warn');
106
+ return {
107
+ allowed: hardBlocks.length === 0,
108
+ score: Math.max(0, 100 - (hardBlocks.length * 30) - (warnings.length * 10)),
109
+ gates,
110
+ missing,
111
+ recommendation: hardBlocks.length
112
+ ? 'Fix the structured prompt before generating or merging code.'
113
+ : warnings.length
114
+ ? 'Proceed only after syncing prompt drift and documenting review evidence.'
115
+ : 'Structured prompt is ready for code generation and review.',
116
+ };
117
+ }
118
+
119
+ function buildPromptSyncPlan(document = {}, changes = {}) {
120
+ const evaluation = evaluateReasonsCanvas(document, changes);
121
+ const changedFiles = normalizeList(changes.changedFiles);
122
+ const verification = normalizeList(changes.verification || changes.tests);
123
+ return {
124
+ promptFirst: evaluation.allowed,
125
+ artifactPath: document.source?.artifactPath || changes.artifactPath || 'docs/prompts/<feature>.reasons.md',
126
+ requiredUpdates: [
127
+ ...(evaluation.gates || []).map((gate) => gate.id),
128
+ ...(changedFiles.length ? ['sync-structure-with-changed-files'] : []),
129
+ ...(verification.length ? ['attach-verification-evidence'] : ['add-verification-evidence']),
130
+ ],
131
+ reviewChecklist: [
132
+ 'Review intent and scope before reviewing code diff.',
133
+ 'Confirm operations map to focused tests.',
134
+ 'Update the canvas when implementation reality diverges.',
135
+ 'Store prompt artifact beside the feature or PR evidence.',
136
+ ],
137
+ };
138
+ }
139
+
140
+ function formatReasonsCanvas(document = {}, evaluation = evaluateReasonsCanvas(document)) {
141
+ const canvas = document.canvas || document;
142
+ return [
143
+ `# ${document.title || 'Structured Prompt Canvas'}`,
144
+ '',
145
+ `Artifact: ${document.source?.artifactPath || 'docs/prompts/<feature>.reasons.md'}`,
146
+ `Readiness: ${evaluation.allowed ? 'ready' : 'blocked'} (${evaluation.score}/100)`,
147
+ '',
148
+ ...FIELD_DEFINITIONS.flatMap(([key, description]) => [
149
+ `## ${labelFor(key)}`,
150
+ '',
151
+ `_${description}_`,
152
+ '',
153
+ ...renderList(normalizeList(canvas[key])),
154
+ '',
155
+ ]),
156
+ '## Gates',
157
+ '',
158
+ ...(evaluation.gates.length ? evaluation.gates.map((gate) => `- ${gate.severity}: ${gate.id} — ${gate.reason}`) : ['- pass: canvas-ready — Structured prompt is complete enough to govern generation.']),
159
+ '',
160
+ `Recommendation: ${evaluation.recommendation}`,
161
+ '',
162
+ ].join('\n');
163
+ }
164
+
165
+ function normalizeList(value) {
166
+ if (!value) return [];
167
+ if (Array.isArray(value)) return value.map((item) => String(item).trim()).filter(Boolean);
168
+ return String(value)
169
+ .split(/\n|;/)
170
+ .map((item) => item.replace(/^[-*]\s*/, '').trim())
171
+ .filter(Boolean);
172
+ }
173
+
174
+ function renderList(items) {
175
+ return items.length ? items.map((item) => `- ${item}`) : ['- <missing>'];
176
+ }
177
+
178
+ function labelFor(key) {
179
+ return key.charAt(0).toUpperCase() + key.slice(1);
180
+ }
181
+
182
+ function isTestableOperation(operation) {
183
+ return /\b(add|update|remove|implement|verify|test|run|assert|block|allow|return|emit)\b/i.test(operation);
184
+ }
185
+
186
+ function inferTitle(request) {
187
+ if (!request) return 'Structured Prompt Canvas';
188
+ return request.length > 70 ? `${request.slice(0, 67)}...` : request;
189
+ }
190
+
191
+ function parseArgs(argv = process.argv.slice(2)) {
192
+ const args = { command: argv[0] || 'canvas', request: '' };
193
+ for (const arg of argv.slice(1)) {
194
+ if (arg.startsWith('--request=')) args.request = arg.slice('--request='.length);
195
+ if (arg.startsWith('--file=')) args.files = [...(args.files || []), arg.slice('--file='.length)];
196
+ if (arg.startsWith('--test=')) args.tests = [...(args.tests || []), arg.slice('--test='.length)];
197
+ if (arg.startsWith('--operation=')) args.operations = [...(args.operations || []), arg.slice('--operation='.length)];
198
+ if (arg.startsWith('--safeguard=')) args.safeguards = [...(args.safeguards || []), arg.slice('--safeguard='.length)];
199
+ }
200
+ return args;
201
+ }
202
+
203
+ function isCliInvocation(argv = process.argv) {
204
+ return Boolean(argv[1] && path.resolve(argv[1]) === __filename);
205
+ }
206
+
207
+ if (isCliInvocation()) {
208
+ const args = parseArgs();
209
+ const document = buildReasonsCanvas(args);
210
+ const evaluation = evaluateReasonsCanvas(document, { changedFiles: args.files });
211
+ if (args.command === 'json') {
212
+ console.log(JSON.stringify({ document, evaluation, syncPlan: buildPromptSyncPlan(document, { changedFiles: args.files, tests: args.tests }) }, null, 2));
213
+ } else if (args.command === 'canvas') {
214
+ console.log(formatReasonsCanvas(document, evaluation));
215
+ } else {
216
+ console.error(`Unknown command: ${args.command}. Use: canvas, json`);
217
+ process.exit(1);
218
+ }
219
+ }
220
+
221
+ module.exports = {
222
+ buildPromptSyncPlan,
223
+ buildReasonsCanvas,
224
+ evaluateReasonsCanvas,
225
+ formatReasonsCanvas,
226
+ };
@@ -344,9 +344,31 @@ function appendTelemetryEvent(feedbackDir, payload = {}, headers = {}) {
344
344
  return entry;
345
345
  }
346
346
 
347
- function loadTelemetryEventsFromPath(filePath) {
348
- if (!fs.existsSync(filePath)) return [];
349
- const raw = fs.readFileSync(filePath, 'utf-8').trim();
347
+ const DEFAULT_BOUNDED_TELEMETRY_TAIL_BYTES = 8 * 1024 * 1024;
348
+
349
+ function readTelemetryText(filePath, options = {}) {
350
+ if (!fs.existsSync(filePath)) return '';
351
+ const maxBytes = Number(options.maxBytes || 0);
352
+ if (maxBytes > 0) {
353
+ const stats = fs.statSync(filePath);
354
+ if (stats.size > maxBytes) {
355
+ const fd = fs.openSync(filePath, 'r');
356
+ try {
357
+ const buffer = Buffer.alloc(maxBytes);
358
+ fs.readSync(fd, buffer, 0, maxBytes, stats.size - maxBytes);
359
+ const text = buffer.toString('utf-8');
360
+ const firstNewline = text.indexOf('\n');
361
+ return firstNewline >= 0 ? text.slice(firstNewline + 1) : text;
362
+ } finally {
363
+ fs.closeSync(fd);
364
+ }
365
+ }
366
+ }
367
+ return fs.readFileSync(filePath, 'utf-8');
368
+ }
369
+
370
+ function loadTelemetryEventsFromPath(filePath, options = {}) {
371
+ const raw = readTelemetryText(filePath, options).trim();
350
372
  if (!raw) return [];
351
373
  return raw
352
374
  .split('\n')
@@ -365,13 +387,13 @@ function loadTelemetryEventsFromPath(filePath) {
365
387
  .filter(Boolean);
366
388
  }
367
389
 
368
- function loadTelemetryEvents(feedbackDir) {
390
+ function loadTelemetryEvents(feedbackDir, options = {}) {
369
391
  const diagnostics = getTelemetrySourceDiagnostics(feedbackDir);
370
392
  const merged = [];
371
393
  const seen = new Set();
372
394
 
373
395
  for (const filePath of diagnostics.activePaths) {
374
- const rows = loadTelemetryEventsFromPath(filePath);
396
+ const rows = loadTelemetryEventsFromPath(filePath, options);
375
397
  for (const row of rows) {
376
398
  const key = JSON.stringify(row);
377
399
  if (seen.has(key)) continue;
@@ -406,8 +428,11 @@ function summarizeRecentEvents(events) {
406
428
 
407
429
  function getTelemetrySummary(feedbackDir, options = {}) {
408
430
  const analyticsWindow = resolveAnalyticsWindow(options);
431
+ const telemetryLoadOptions = analyticsWindow.bounded
432
+ ? { maxBytes: Number(options.telemetryTailBytes || DEFAULT_BOUNDED_TELEMETRY_TAIL_BYTES) }
433
+ : {};
409
434
  const events = filterEntriesForWindow(
410
- loadTelemetryEvents(feedbackDir),
435
+ loadTelemetryEvents(feedbackDir, telemetryLoadOptions),
411
436
  analyticsWindow,
412
437
  (entry) => entry && (entry.receivedAt || entry.timestamp)
413
438
  );
@@ -192,6 +192,98 @@ const TOOLS = [
192
192
  },
193
193
  },
194
194
  }),
195
+ readOnlyTool({
196
+ name: 'plan_agent_design_governance',
197
+ description: 'Evaluate an agent workflow before adding tools, autonomy, or subagents. Recommends single-agent vs manager/decentralized patterns, baseline evals, instruction fixes, and tool safeguards.',
198
+ inputSchema: {
199
+ type: 'object',
200
+ properties: {
201
+ workflow: { type: 'string', description: 'Workflow name or short description.' },
202
+ tools: { type: 'array', items: { type: 'string' }, description: 'Tool names available to the agent.' },
203
+ toolCount: { type: 'number', description: 'Total tools when names are not listed.' },
204
+ similarToolCount: { type: 'number', description: 'Number of similar or overlapping tools.' },
205
+ conditionalBranches: { type: 'number', description: 'Rough count of if/then instruction branches.' },
206
+ handoffCount: { type: 'number', description: 'Existing or proposed handoff count.' },
207
+ highRiskTools: { type: 'array', items: { type: 'string' }, description: 'Tools that affect production, money, data, secrets, or outbound actions.' },
208
+ writeTools: { type: 'array', items: { type: 'string' }, description: 'Write-capable tools.' },
209
+ hasBaselineEvals: { type: 'boolean', description: 'Whether baseline agent evals exist.' },
210
+ hasDocs: { type: 'boolean', description: 'Instructions draw on existing workflow docs.' },
211
+ hasExamples: { type: 'boolean', description: 'Instructions include concrete examples.' },
212
+ hasEdgeCases: { type: 'boolean', description: 'Instructions include edge cases and failure paths.' },
213
+ hasToolApprovals: { type: 'boolean', description: 'Risky tool calls require approval.' },
214
+ hasExitCondition: { type: 'boolean', description: 'Instructions define when the run is complete.' },
215
+ reversibleActions: { type: 'boolean', description: 'Risky actions are reversible or have rollback procedures.' },
216
+ },
217
+ },
218
+ }),
219
+ readOnlyTool({
220
+ name: 'plan_proactive_agent_eval_guardrails',
221
+ description: 'Map proactive-assistant eval gaps to PARE-style state-machine, active-user-simulation, goal-inference, intervention-timing, and multi-app orchestration gates.',
222
+ inputSchema: {
223
+ type: 'object',
224
+ properties: {
225
+ workflow: { type: 'string', description: 'Proactive assistant workflow name.' },
226
+ apps: { type: 'array', items: { type: 'string' }, description: 'Apps involved in the proactive workflow.' },
227
+ states: { type: 'array', items: { type: 'string' }, description: 'Modeled app states.' },
228
+ stateCount: { type: 'number', description: 'Number of modeled states.' },
229
+ actionCount: { type: 'number', description: 'Number of state-dependent actions.' },
230
+ taskCount: { type: 'number', description: 'Number of benchmark tasks or scenarios.' },
231
+ hasStateMachine: { type: 'boolean', description: 'Whether apps are modeled as finite state machines.' },
232
+ hasActiveUserSimulation: { type: 'boolean', description: 'Whether active user simulation exists.' },
233
+ hasGoalInferenceEvals: { type: 'boolean', description: 'Whether goal inference is graded.' },
234
+ hasInterventionTimingEvals: { type: 'boolean', description: 'Whether intervention timing is graded.' },
235
+ hasMultiAppEvals: { type: 'boolean', description: 'Whether multi-app orchestration is graded.' },
236
+ flatToolApiOnly: { type: 'boolean', description: 'Current eval only covers flat tool calls.' },
237
+ proactiveWrites: { type: 'boolean', description: 'Proactive agent can write or mutate state.' },
238
+ userVisibleActions: { type: 'boolean', description: 'Interventions can notify, schedule, send, or affect users.' },
239
+ },
240
+ },
241
+ }),
242
+ readOnlyTool({
243
+ name: 'plan_reward_hacking_guardrails',
244
+ description: 'Detect reward-hacking patterns such as unsupported completion claims, sycophancy, verbosity-as-proof, benchmark overfitting, evaluator manipulation, and proxy-only metrics.',
245
+ inputSchema: {
246
+ type: 'object',
247
+ properties: {
248
+ workflow: { type: 'string', description: 'Agent workflow or release lane being evaluated.' },
249
+ text: { type: 'string', description: 'Candidate response, claim, summary, or verifier output to inspect.' },
250
+ evidence: { type: 'array', items: { type: 'string' }, description: 'Evidence artifacts attached to the claim.' },
251
+ metrics: { type: 'array', items: { type: 'string' }, description: 'Proxy metrics or reward scores used by the workflow.' },
252
+ hasHoldout: { type: 'boolean', description: 'Whether holdout, regression, or real-workflow evidence exists.' },
253
+ hasHumanObjective: { type: 'boolean', description: 'Whether proxy metrics are mapped to a user objective.' },
254
+ hasVerifierTrace: { type: 'boolean', description: 'Whether verifier trace, run log, or proof artifact exists.' },
255
+ optimizedForScore: { type: 'boolean', description: 'Whether an eval, benchmark, or reward score is being optimized.' },
256
+ multimodal: { type: 'boolean', description: 'Whether claims depend on screenshots, PDFs, charts, images, or video.' },
257
+ },
258
+ },
259
+ }),
260
+ readOnlyTool({
261
+ name: 'plan_oss_pr_opportunity_scout',
262
+ description: 'Rank upstream GitHub repositories ThumbGate depends on for proof-backed issue, bounty, and PR opportunities without spammy drive-by contributions.',
263
+ inputSchema: {
264
+ type: 'object',
265
+ properties: {
266
+ packagePath: { type: 'string', description: 'Path to package.json used to discover dependencies.' },
267
+ dependencies: { type: 'array', items: { type: 'string' }, description: 'Dependency names to scout instead of package.json.' },
268
+ maxRepos: { type: 'number', description: 'Maximum mapped repositories to include.' },
269
+ includeBounties: { type: 'boolean', description: 'Include bug-bounty and security search queries.' },
270
+ },
271
+ },
272
+ }),
273
+ readOnlyTool({
274
+ name: 'plan_chatgpt_ads_readiness',
275
+ description: 'Prepare ThumbGate intent clusters, ad copy, proof links, UTM measurement, and launch gates for ChatGPT Ads Manager tests.',
276
+ inputSchema: {
277
+ type: 'object',
278
+ properties: {
279
+ offer: { type: 'string', description: 'Offer to advertise, such as Pro or Workflow Hardening Sprint.' },
280
+ audience: { type: 'string', description: 'Audience segment to target.' },
281
+ budget: { type: 'number', description: 'Initial test budget.' },
282
+ keywords: { type: 'array', items: { type: 'string' }, description: 'High-intent conversational queries.' },
283
+ proofLinks: { type: 'array', items: { type: 'string' }, description: 'Proof URLs required by ad claims.' },
284
+ },
285
+ },
286
+ }),
195
287
  destructiveTool({
196
288
  name: 'import_document',
197
289
  description: 'Import a local policy or runbook document into ThumbGate, normalize it for search, and propose provenance-backed gate candidates.',