thumbgate 1.16.12 → 1.16.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.well-known/mcp/server-card.json +1 -1
  4. package/README.md +3 -1
  5. package/adapters/claude/.mcp.json +2 -2
  6. package/adapters/mcp/server-stdio.js +26 -1
  7. package/adapters/opencode/opencode.json +1 -1
  8. package/bin/cli.js +420 -1
  9. package/config/gate-templates.json +372 -0
  10. package/config/mcp-allowlists.json +25 -0
  11. package/config/model-candidates.json +59 -2
  12. package/config/model-tiers.json +4 -1
  13. package/package.json +79 -22
  14. package/public/compare.html +6 -0
  15. package/public/index.html +144 -11
  16. package/public/numbers.html +11 -11
  17. package/public/pro.html +22 -24
  18. package/scripts/agent-design-governance.js +211 -0
  19. package/scripts/agent-reasoning-traces.js +683 -0
  20. package/scripts/agent-reward-model.js +438 -0
  21. package/scripts/agent-stack-survival-audit.js +231 -0
  22. package/scripts/ai-engineering-stack-guardrails.js +256 -0
  23. package/scripts/billing.js +16 -4
  24. package/scripts/chatgpt-ads-readiness-pack.js +195 -0
  25. package/scripts/cli-schema.js +277 -0
  26. package/scripts/code-graph-guardrails.js +176 -0
  27. package/scripts/deepseek-v4-runtime-guardrails.js +253 -0
  28. package/scripts/gemini-embedding-policy.js +198 -0
  29. package/scripts/inference-cache-policy.js +39 -0
  30. package/scripts/judge-reward-function.js +396 -0
  31. package/scripts/llm-behavior-monitor.js +251 -0
  32. package/scripts/long-running-agent-context-guardrails.js +176 -0
  33. package/scripts/multimodal-retrieval-plan.js +31 -11
  34. package/scripts/oss-pr-opportunity-scout.js +240 -0
  35. package/scripts/proactive-agent-eval-guardrails.js +230 -0
  36. package/scripts/profile-router.js +5 -4
  37. package/scripts/prompting-operating-system.js +273 -0
  38. package/scripts/proxy-pointer-rag-guardrails.js +189 -0
  39. package/scripts/rag-precision-guardrails.js +202 -0
  40. package/scripts/rate-limiter.js +1 -1
  41. package/scripts/reasoning-efficiency-guardrails.js +176 -0
  42. package/scripts/reward-hacking-guardrails.js +251 -0
  43. package/scripts/seo-gsd.js +1201 -11
  44. package/scripts/single-use-credential-gate.js +182 -0
  45. package/scripts/structured-prompt-driven.js +226 -0
  46. package/scripts/telemetry-analytics.js +31 -6
  47. package/scripts/tool-registry.js +92 -0
  48. package/scripts/upstream-contribution-engine.js +379 -0
  49. package/scripts/vector-store.js +119 -4
  50. package/src/api/server.js +333 -100
  51. package/scripts/agents-sdk-sandbox-plan.js +0 -57
  52. package/scripts/ai-org-governance.js +0 -98
  53. package/scripts/artifact-agent-plan.js +0 -81
  54. package/scripts/enterprise-agent-rollout.js +0 -34
  55. package/scripts/experience-replay-governance.js +0 -69
  56. package/scripts/inference-economics.js +0 -53
  57. package/scripts/knowledge-layer-plan.js +0 -108
  58. package/scripts/memory-store-governance.js +0 -60
  59. package/scripts/post-training-governance.js +0 -34
  60. package/scripts/production-agent-readiness.js +0 -40
  61. package/scripts/scaling-law-claims.js +0 -60
  62. package/scripts/student-consistent-training.js +0 -73
@@ -0,0 +1,438 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /**
5
+ * Agent Reward Model — deterministic RL-style scoring for ThumbGate episodes.
6
+ *
7
+ * This turns session episodes into state/action/outcome/reward records so the
8
+ * loop can prioritize gates, create preference pairs, and spend deeper
9
+ * verification only when the next action is actually risky.
10
+ */
11
+
12
+ const path = require('node:path');
13
+ const { loadEpisodes } = require('./session-episode-store');
14
+
15
+ const HIGH_RISK_TAGS = new Set([
16
+ 'billing',
17
+ 'checkout',
18
+ 'data-loss',
19
+ 'deploy',
20
+ 'deploy-prod',
21
+ 'destructive',
22
+ 'force-push-main',
23
+ 'payments',
24
+ 'production',
25
+ 'public-post',
26
+ 'secrets',
27
+ 'stripe',
28
+ ]);
29
+
30
+ const ACTION_KEYWORDS = [
31
+ { pattern: /\b(deploy|railway|production|prod)\b/i, tag: 'deploy-prod' },
32
+ { pattern: /\b(delete|remove|rm -rf|drop|truncate|destructive)\b/i, tag: 'destructive' },
33
+ { pattern: /\b(secret|token|api[_ -]?key|credential|password)\b/i, tag: 'secrets' },
34
+ { pattern: /\b(stripe|checkout|payment|price|billing|subscription)\b/i, tag: 'payments' },
35
+ { pattern: /\b(post|reply|comment|tweet|linkedin|bluesky|threads|reddit)\b/i, tag: 'public-post' },
36
+ { pattern: /\b(force push|force-push|main branch)\b/i, tag: 'force-push-main' },
37
+ ];
38
+
39
+ const ROUND_TO = 1000;
40
+
41
+ function normalizeTags(values) {
42
+ return Array.from(new Set((values || [])
43
+ .map((value) => String(value || '').trim().toLowerCase())
44
+ .filter(Boolean)));
45
+ }
46
+
47
+ function deriveActionTags(episode = {}) {
48
+ const tags = new Set(normalizeTags([...(episode.tags || []), ...(episode.categories || [])]));
49
+ const haystack = [
50
+ episode.recommendation,
51
+ ...(episode.errorFingerprints || []),
52
+ ...(episode.signals || []).map((signal) => `${signal.signal || ''} ${signal.severity || ''}`),
53
+ ].filter(Boolean).join(' ');
54
+
55
+ for (const { pattern, tag } of ACTION_KEYWORDS) {
56
+ if (pattern.test(haystack)) tags.add(tag);
57
+ }
58
+ return Array.from(tags);
59
+ }
60
+
61
+ function computeEpisodeReward(episode = {}, options = {}) {
62
+ const score = clamp(Number(episode.score ?? 50), 0, 100);
63
+ const grade = String(episode.grade || 'unknown').toLowerCase();
64
+ const negativeCount = Math.max(0, Number(episode.negativeCount || 0));
65
+ const positiveCount = Math.max(0, Number(episode.positiveCount || 0));
66
+ const errorCount = Array.isArray(episode.errorFingerprints) ? episode.errorFingerprints.length : 0;
67
+ const actionTags = deriveActionTags(episode);
68
+ const highRiskCount = actionTags.filter((tag) => HIGH_RISK_TAGS.has(tag)).length;
69
+ const durationPenalty = computeDurationPenalty(episode.durationMs);
70
+ const preventedRepeatBonus = Number(episode.preventedRepeatCount || 0) * 0.25;
71
+
72
+ const components = {
73
+ health: round((score - 50) / 50),
74
+ grade: gradeReward(grade),
75
+ positiveFeedback: round(positiveCount * 0.15),
76
+ negativeFeedback: round(negativeCount * -0.45),
77
+ recurringErrors: round(errorCount * -0.25),
78
+ highRiskExposure: round(highRiskCount * -0.2),
79
+ duration: round(durationPenalty),
80
+ preventedRepeat: round(preventedRepeatBonus),
81
+ };
82
+
83
+ const rawTotal = Object.values(components).reduce((sum, value) => sum + value, 0);
84
+ const scale = Number(options.scale || 1);
85
+ const total = round(clamp(rawTotal * scale, -3, 3));
86
+
87
+ return {
88
+ total,
89
+ label: rewardLabel(total),
90
+ components,
91
+ actionTags,
92
+ evidence: {
93
+ score,
94
+ grade,
95
+ negativeCount,
96
+ positiveCount,
97
+ errorCount,
98
+ highRiskCount,
99
+ durationMs: episode.durationMs ?? null,
100
+ },
101
+ };
102
+ }
103
+
104
+ function episodeToRlTuple(episode = {}, options = {}) {
105
+ const reward = computeEpisodeReward(episode, options);
106
+ const actionTags = reward.actionTags;
107
+ return {
108
+ id: episode.sessionId || episode.id || null,
109
+ state: {
110
+ hourOfDay: episode.hourOfDay ?? null,
111
+ dayOfWeek: episode.dayOfWeek ?? null,
112
+ categories: normalizeTags(episode.categories || []),
113
+ tags: normalizeTags(episode.tags || []),
114
+ priorGrade: episode.priorGrade || null,
115
+ },
116
+ action: {
117
+ tags: actionTags,
118
+ recommendation: episode.recommendation || null,
119
+ signals: Array.isArray(episode.signals) ? episode.signals : [],
120
+ },
121
+ outcome: {
122
+ score: episode.score ?? null,
123
+ grade: episode.grade || null,
124
+ negativeCount: episode.negativeCount || 0,
125
+ positiveCount: episode.positiveCount || 0,
126
+ errorFingerprints: episode.errorFingerprints || [],
127
+ },
128
+ reward,
129
+ evidence: reward.evidence,
130
+ };
131
+ }
132
+
133
+ function buildPreferencePairFromEpisodes(a, b, options = {}) {
134
+ const tupleA = isRlTuple(a) ? a : episodeToRlTuple(a, options);
135
+ const tupleB = isRlTuple(b) ? b : episodeToRlTuple(b, options);
136
+ const chosen = tupleA.reward.total >= tupleB.reward.total ? tupleA : tupleB;
137
+ const rejected = chosen === tupleA ? tupleB : tupleA;
138
+ const delta = round(chosen.reward.total - rejected.reward.total);
139
+
140
+ if (delta <= 0) return null;
141
+ return {
142
+ prompt: inferPreferencePrompt(chosen, rejected),
143
+ chosen: describeEpisodePolicy(chosen),
144
+ rejected: describeEpisodePolicy(rejected),
145
+ metadata: {
146
+ chosenEpisodeId: chosen.id,
147
+ rejectedEpisodeId: rejected.id,
148
+ chosenReward: chosen.reward.total,
149
+ rejectedReward: rejected.reward.total,
150
+ rewardDelta: delta,
151
+ chosenLabel: chosen.reward.label,
152
+ rejectedLabel: rejected.reward.label,
153
+ categories: Array.from(new Set([
154
+ ...chosen.state.categories,
155
+ ...rejected.state.categories,
156
+ ])).slice(0, 12),
157
+ },
158
+ };
159
+ }
160
+
161
+ function isRlTuple(value) {
162
+ return Boolean(value?.state && value.action && value.outcome && value.reward);
163
+ }
164
+
165
+ function buildPreferencePairs(episodes = [], options = {}) {
166
+ const tuples = episodes
167
+ .map((episode) => episodeToRlTuple(episode, options))
168
+ .sort((a, b) => a.reward.total - b.reward.total);
169
+ if (tuples.length < 2) return [];
170
+
171
+ const pairs = [];
172
+ const maxPairs = Math.max(1, Number(options.maxPairs || 10));
173
+ const lows = tuples.slice(0, Math.min(maxPairs, Math.floor(tuples.length / 2)));
174
+ const highs = tuples.slice(Math.max(lows.length, tuples.length - lows.length)).reverse();
175
+
176
+ for (let i = 0; i < Math.min(lows.length, highs.length); i++) {
177
+ const pair = buildPreferencePairFromEpisodes(highs[i], lows[i], options);
178
+ if (pair) pairs.push(pair);
179
+ }
180
+ return pairs;
181
+ }
182
+
183
+ function rankGateCandidatesByReward(episodes = [], options = {}) {
184
+ const minOccurrences = Math.max(1, Number(options.minOccurrences || 2));
185
+ const buckets = new Map();
186
+
187
+ for (const episode of episodes) {
188
+ const tuple = episodeToRlTuple(episode, options);
189
+ const keys = new Set([
190
+ ...(episode.errorFingerprints || []).map((fp) => `error:${fp}`),
191
+ ...tuple.action.tags.map((tag) => `tag:${tag}`),
192
+ ...(episode.categories || []).map((category) => `category:${String(category).toLowerCase()}`),
193
+ ]);
194
+
195
+ for (const key of keys) {
196
+ if (!buckets.has(key)) {
197
+ buckets.set(key, {
198
+ key,
199
+ occurrences: 0,
200
+ totalReward: 0,
201
+ negativeEpisodes: 0,
202
+ highRiskEpisodes: 0,
203
+ examples: [],
204
+ });
205
+ }
206
+ const bucket = buckets.get(key);
207
+ bucket.occurrences += 1;
208
+ bucket.totalReward += tuple.reward.total;
209
+ if (tuple.reward.total < 0) bucket.negativeEpisodes += 1;
210
+ if (tuple.reward.actionTags.some((tag) => HIGH_RISK_TAGS.has(tag))) bucket.highRiskEpisodes += 1;
211
+ if (bucket.examples.length < 3) bucket.examples.push(tuple.id);
212
+ }
213
+ }
214
+
215
+ return Array.from(buckets.values())
216
+ .filter((bucket) => bucket.occurrences >= minOccurrences)
217
+ .map((bucket) => {
218
+ const averageReward = round(bucket.totalReward / bucket.occurrences);
219
+ const failureRate = bucket.negativeEpisodes / bucket.occurrences;
220
+ const riskBoost = bucket.highRiskEpisodes > 0 ? 0.5 : 0;
221
+ const priorityScore = round((Math.max(0, -averageReward) * 2) + (failureRate * 2) + Math.log2(bucket.occurrences + 1) + riskBoost);
222
+ return {
223
+ ...bucket,
224
+ averageReward,
225
+ failureRate: round(failureRate),
226
+ priorityScore,
227
+ gateId: bucket.key.replaceAll(/[^a-z0-9]+/gi, '-').replaceAll(/^-|-$/g, '').toLowerCase().slice(0, 80),
228
+ recommendation: buildGateRecommendation(bucket.key, bucket.occurrences, averageReward),
229
+ };
230
+ })
231
+ .sort((a, b) => b.priorityScore - a.priorityScore || b.occurrences - a.occurrences);
232
+ }
233
+
234
+ function gradeReward(grade) {
235
+ if (grade === 'healthy') return 0.35;
236
+ if (grade === 'degraded') return -0.35;
237
+ if (grade === 'critical') return -0.8;
238
+ return 0;
239
+ }
240
+
241
+ function allocateTestTimeCompute(action = {}) {
242
+ const text = [
243
+ action.command,
244
+ action.intent,
245
+ action.tool,
246
+ action.description,
247
+ ...(action.tags || []),
248
+ ].filter(Boolean).join(' ');
249
+ const tags = normalizeTags([...(action.tags || [])]);
250
+
251
+ for (const { pattern, tag } of ACTION_KEYWORDS) {
252
+ if (pattern.test(text)) tags.push(tag);
253
+ }
254
+
255
+ const uniqueTags = normalizeTags(tags);
256
+ const highRiskTags = uniqueTags.filter((tag) => HIGH_RISK_TAGS.has(tag));
257
+
258
+ if (highRiskTags.some((tag) => ['secrets', 'payments', 'deploy-prod', 'data-loss', 'force-push-main'].includes(tag))) {
259
+ return {
260
+ budget: 'xhigh',
261
+ maxVerifierSteps: 8,
262
+ requiresHumanApproval: highRiskTags.includes('public-post'),
263
+ requiredChecks: [
264
+ 'confirm exact target surface',
265
+ 'run focused tests',
266
+ 'verify rollback path',
267
+ 'check secrets and billing impact',
268
+ 'capture evidence before claiming done',
269
+ ],
270
+ riskTags: highRiskTags,
271
+ };
272
+ }
273
+
274
+ if (highRiskTags.length > 0) {
275
+ return {
276
+ budget: 'deep',
277
+ maxVerifierSteps: 5,
278
+ requiresHumanApproval: highRiskTags.includes('public-post'),
279
+ requiredChecks: [
280
+ 'verify target and scope',
281
+ 'run focused validation',
282
+ 'capture evidence before claiming done',
283
+ ],
284
+ riskTags: highRiskTags,
285
+ };
286
+ }
287
+
288
+ if (/\b(test|lint|docs|read|inspect|status)\b/i.test(text)) {
289
+ return {
290
+ budget: 'fast',
291
+ maxVerifierSteps: 2,
292
+ requiresHumanApproval: false,
293
+ requiredChecks: ['run the relevant focused check'],
294
+ riskTags: [],
295
+ };
296
+ }
297
+
298
+ return {
299
+ budget: 'standard',
300
+ maxVerifierSteps: 3,
301
+ requiresHumanApproval: false,
302
+ requiredChecks: ['inspect diff', 'run focused validation'],
303
+ riskTags: [],
304
+ };
305
+ }
306
+
307
+ function buildRewardReport(episodes = [], options = {}) {
308
+ const tuples = episodes.map((episode) => episodeToRlTuple(episode, options));
309
+ const rewards = tuples.map((tuple) => tuple.reward.total);
310
+ const averageReward = rewards.length ? round(rewards.reduce((sum, value) => sum + value, 0) / rewards.length) : 0;
311
+ const worstEpisodes = tuples
312
+ .filter((tuple) => tuple.reward.total < 0)
313
+ .sort((a, b) => a.reward.total - b.reward.total)
314
+ .slice(0, Number(options.maxWorst || 5));
315
+
316
+ return {
317
+ generatedAt: new Date().toISOString(),
318
+ episodesAnalyzed: episodes.length,
319
+ averageReward,
320
+ rewardDistribution: {
321
+ positive: tuples.filter((tuple) => tuple.reward.total > 0).length,
322
+ neutral: tuples.filter((tuple) => tuple.reward.total === 0).length,
323
+ negative: tuples.filter((tuple) => tuple.reward.total < 0).length,
324
+ },
325
+ worstEpisodes,
326
+ preferencePairs: buildPreferencePairs(episodes, options),
327
+ gateCandidates: rankGateCandidatesByReward(episodes, options).slice(0, Number(options.maxGateCandidates || 10)),
328
+ computePolicy: {
329
+ fast: 'read-only, tests, lint, docs',
330
+ standard: 'ordinary implementation with focused verification',
331
+ deep: 'destructive, public, or production-adjacent work',
332
+ xhigh: 'payments, secrets, deploy-prod, data-loss, force-push-main',
333
+ },
334
+ };
335
+ }
336
+
337
+ function inferPreferencePrompt(chosen, rejected) {
338
+ const categories = Array.from(new Set([
339
+ ...chosen.state.categories,
340
+ ...rejected.state.categories,
341
+ ])).filter(Boolean);
342
+ const domain = categories.length ? categories.join(', ') : 'agent workflow';
343
+ return `Task domain: ${domain}. Which policy should the agent follow to maximize verified outcomes and avoid repeat mistakes?`;
344
+ }
345
+
346
+ function describeEpisodePolicy(tuple) {
347
+ const tags = tuple.action.tags.length ? tuple.action.tags.join(', ') : 'no explicit action tags';
348
+ const errors = tuple.outcome.errorFingerprints.length
349
+ ? `Errors: ${tuple.outcome.errorFingerprints.slice(0, 3).join('; ')}.`
350
+ : 'No recurring errors recorded.';
351
+ const recommendation = tuple.action.recommendation || 'Use evidence-first execution and verify before claiming completion.';
352
+ return [
353
+ `Reward ${tuple.reward.total} (${tuple.reward.label}).`,
354
+ `Action tags: ${tags}.`,
355
+ `Outcome: ${tuple.outcome.grade || 'unknown'} with score ${tuple.outcome.score ?? 'n/a'}.`,
356
+ errors,
357
+ `Policy: ${recommendation}`,
358
+ ].join(' ');
359
+ }
360
+
361
+ function buildGateRecommendation(key, occurrences, averageReward) {
362
+ const label = key.replace(/^(error|tag|category):/, '');
363
+ if (key.startsWith('error:')) {
364
+ return `Promote recurring error "${label.slice(0, 100)}" into a pre-action prevention rule; ${occurrences} episodes average reward ${averageReward}.`;
365
+ }
366
+ if (key.startsWith('tag:')) {
367
+ return `Add a risk-aware verifier for "${label}" actions; ${occurrences} episodes average reward ${averageReward}.`;
368
+ }
369
+ return `Break "${label}" work into smaller gated steps; ${occurrences} episodes average reward ${averageReward}.`;
370
+ }
371
+
372
+ function computeDurationPenalty(durationMs) {
373
+ const duration = Number(durationMs);
374
+ if (!Number.isFinite(duration) || duration <= 0) return 0;
375
+ const hours = duration / (60 * 60 * 1000);
376
+ if (hours <= 1) return 0;
377
+ return -Math.min(0.5, (hours - 1) * 0.1);
378
+ }
379
+
380
+ function rewardLabel(total) {
381
+ if (total >= 0.75) return 'strong_positive';
382
+ if (total > 0) return 'positive';
383
+ if (total === 0) return 'neutral';
384
+ if (total > -0.75) return 'negative';
385
+ return 'strong_negative';
386
+ }
387
+
388
+ function clamp(value, min, max) {
389
+ if (!Number.isFinite(value)) return min;
390
+ return Math.min(max, Math.max(min, value));
391
+ }
392
+
393
+ function round(value) {
394
+ if (!Number.isFinite(value)) return 0;
395
+ return Math.round(value * ROUND_TO) / ROUND_TO;
396
+ }
397
+
398
+ function parseArgs(argv) {
399
+ const args = { command: argv[2] || 'report' };
400
+ for (const arg of argv.slice(3)) {
401
+ if (!arg.startsWith('--')) continue;
402
+ const [key, rawValue] = arg.slice(2).split('=');
403
+ args[key] = rawValue === undefined ? true : rawValue;
404
+ }
405
+ return args;
406
+ }
407
+
408
+ function isCliInvocation(argv = process.argv) {
409
+ const invokedPath = argv[1];
410
+ return invokedPath ? path.resolve(invokedPath) === __filename : false;
411
+ }
412
+
413
+ if (isCliInvocation()) {
414
+ const args = parseArgs(process.argv);
415
+ const episodes = loadEpisodes();
416
+ if (args.command === 'report') {
417
+ console.log(JSON.stringify(buildRewardReport(episodes), null, 2));
418
+ } else if (args.command === 'pairs') {
419
+ console.log(JSON.stringify(buildPreferencePairs(episodes), null, 2));
420
+ } else if (args.command === 'gates') {
421
+ console.log(JSON.stringify(rankGateCandidatesByReward(episodes), null, 2));
422
+ } else {
423
+ console.error(`Unknown command: ${args.command}. Use: report, pairs, gates`);
424
+ process.exit(1);
425
+ }
426
+ }
427
+
428
+ module.exports = {
429
+ HIGH_RISK_TAGS,
430
+ allocateTestTimeCompute,
431
+ buildPreferencePairFromEpisodes,
432
+ buildPreferencePairs,
433
+ buildRewardReport,
434
+ computeEpisodeReward,
435
+ deriveActionTags,
436
+ episodeToRlTuple,
437
+ rankGateCandidatesByReward,
438
+ };
@@ -0,0 +1,231 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /**
5
+ * Agent Stack Survival Audit
6
+ *
7
+ * High-ROI response to the "AI scaffolding layer is collapsing" thesis:
8
+ * keep ThumbGate thin, context-rich, model-agnostic, sandboxed, and easy to
9
+ * throw away or swap as frontier model/tool patterns change.
10
+ */
11
+
12
+ const fs = require('node:fs');
13
+ const path = require('node:path');
14
+
15
+ const REPO_ROOT = path.resolve(__dirname, '..');
16
+
17
+ const HEAVY_SCAFFOLDING_DEPS = [
18
+ 'langchain',
19
+ '@langchain/core',
20
+ '@langchain/community',
21
+ 'llamaindex',
22
+ 'llama-index',
23
+ 'crew-ai',
24
+ 'autogen',
25
+ 'semantic-kernel',
26
+ ];
27
+
28
+ const CONTEXT_MOAT_FILES = [
29
+ 'scripts/document-intake.js',
30
+ 'scripts/contextfs.js',
31
+ 'scripts/context-engine.js',
32
+ 'scripts/lesson-retrieval.js',
33
+ 'scripts/memalign.js',
34
+ 'config/mcp-allowlists.json',
35
+ ];
36
+
37
+ const SANDBOX_FILES = [
38
+ 'scripts/cloudflare-dynamic-sandbox.js',
39
+ 'scripts/docker-sandbox-planner.js',
40
+ 'config/gates/computer-use.json',
41
+ 'config/gates/code-edit.json',
42
+ ];
43
+
44
+ function buildStackSurvivalAudit(options = {}) {
45
+ const root = path.resolve(options.root || REPO_ROOT);
46
+ const packageJson = readJson(path.join(root, 'package.json'));
47
+ const dependencies = {
48
+ ...packageJson.dependencies,
49
+ ...packageJson.devDependencies,
50
+ ...packageJson.optionalDependencies,
51
+ };
52
+ const dependencyNames = Object.keys(dependencies || {});
53
+ const adapterDirs = listDirs(path.join(root, 'adapters'));
54
+ const modelCandidates = readJson(path.join(root, 'config', 'model-candidates.json'));
55
+ const guideFiles = listFiles(path.join(root, 'public', 'guides'));
56
+
57
+ const heavyDeps = dependencyNames.filter((dep) => HEAVY_SCAFFOLDING_DEPS.includes(dep));
58
+ const contextFilesPresent = CONTEXT_MOAT_FILES.filter((file) => exists(root, file));
59
+ const sandboxFilesPresent = SANDBOX_FILES.filter((file) => exists(root, file));
60
+ const contextGuides = guideFiles.filter((file) => /context|guardrail|pre-action|agent|workflow/i.test(file));
61
+ const candidateCount = Object.keys(modelCandidates.candidates || modelCandidates || {}).length;
62
+
63
+ const categories = {
64
+ contextMoat: scoreCategory({
65
+ score: contextFilesPresent.length / CONTEXT_MOAT_FILES.length,
66
+ evidence: contextFilesPresent,
67
+ recommendation: 'Prioritize parsers, context packs, lesson retrieval, and evidence surfaces over custom workflow scaffolding.',
68
+ }),
69
+ modularity: scoreCategory({
70
+ score: Math.min(1, (adapterDirs.length / 6) * 0.55 + (candidateCount / 4) * 0.45),
71
+ evidence: [
72
+ `${adapterDirs.length} adapter directories`,
73
+ `${candidateCount} model candidates`,
74
+ ],
75
+ recommendation: 'Keep providers swappable through adapters, MCP, and model routing rather than hard-coded orchestration.',
76
+ }),
77
+ sandboxReadiness: scoreCategory({
78
+ score: sandboxFilesPresent.length / SANDBOX_FILES.length,
79
+ evidence: sandboxFilesPresent,
80
+ recommendation: 'Preserve agent-plus-sandbox controls for code edits, computer use, and risky workflow execution.',
81
+ }),
82
+ scaffoldingThinness: scoreCategory({
83
+ score: heavyDeps.length === 0 ? 1 : Math.max(0, 1 - (heavyDeps.length * 0.2)),
84
+ evidence: heavyDeps.length ? heavyDeps : ['no heavy orchestration framework dependencies detected'],
85
+ recommendation: heavyDeps.length
86
+ ? 'Reduce framework lock-in; keep orchestration disposable and context/gates durable.'
87
+ : 'Good: no obvious heavy orchestration dependency lock-in in package.json.',
88
+ }),
89
+ aiSearchContext: scoreCategory({
90
+ score: Math.min(1, contextGuides.length / 8),
91
+ evidence: contextGuides.slice(0, 12),
92
+ recommendation: 'Continue publishing context-rich guides that let AI search explain why ThumbGate survives stack churn.',
93
+ }),
94
+ };
95
+
96
+ const overallScore = round(Object.values(categories).reduce((sum, category) => sum + category.score, 0) / Object.keys(categories).length);
97
+ return {
98
+ generatedAt: new Date().toISOString(),
99
+ overallScore,
100
+ verdict: survivalVerdict(overallScore),
101
+ categories,
102
+ highRoiActions: buildHighRoiActions(categories),
103
+ };
104
+ }
105
+
106
+ function buildHighRoiActions(categories = {}) {
107
+ const actions = [];
108
+ for (const [name, category] of Object.entries(categories)) {
109
+ if (category.score >= 0.85) continue;
110
+ actions.push({
111
+ area: name,
112
+ priority: category.score < 0.6 ? 'high' : 'medium',
113
+ action: category.recommendation,
114
+ });
115
+ }
116
+ if (actions.length === 0) {
117
+ actions.push({
118
+ area: 'stack',
119
+ priority: 'maintenance',
120
+ action: 'Keep ThumbGate thin: invest new effort in context extraction, evidence, gates, and adapters.',
121
+ });
122
+ }
123
+ return actions;
124
+ }
125
+
126
+ function formatStackSurvivalReport(audit = {}) {
127
+ const lines = [
128
+ '# Agent Stack Survival Audit',
129
+ '',
130
+ `Generated: ${audit.generatedAt}`,
131
+ `Verdict: ${audit.verdict}`,
132
+ `Overall score: ${audit.overallScore}`,
133
+ '',
134
+ '## Categories',
135
+ '',
136
+ ];
137
+
138
+ for (const [name, category] of Object.entries(audit.categories || {})) {
139
+ lines.push(`- ${name}: ${category.score} (${category.status})`);
140
+ }
141
+ lines.push('', '## High-ROI Actions', '');
142
+ for (const action of audit.highRoiActions || []) {
143
+ lines.push(`- ${action.priority}: ${action.action}`);
144
+ }
145
+ lines.push('', 'Positioning: ThumbGate should sell durable context, evidence, and pre-action gates, not brittle orchestration scaffolding.', '');
146
+ return `${lines.join('\n')}\n`;
147
+ }
148
+
149
+ function scoreCategory({ score, evidence, recommendation }) {
150
+ const rounded = round(score);
151
+ return {
152
+ score: rounded,
153
+ status: categoryStatus(rounded),
154
+ evidence,
155
+ recommendation,
156
+ };
157
+ }
158
+
159
+ function readJson(filePath) {
160
+ try {
161
+ return JSON.parse(fs.readFileSync(filePath, 'utf8'));
162
+ } catch {
163
+ return {};
164
+ }
165
+ }
166
+
167
+ function exists(root, relativePath) {
168
+ return fs.existsSync(path.join(root, relativePath));
169
+ }
170
+
171
+ function listDirs(dirPath) {
172
+ try {
173
+ return fs.readdirSync(dirPath, { withFileTypes: true })
174
+ .filter((entry) => entry.isDirectory())
175
+ .map((entry) => entry.name)
176
+ .sort((a, b) => a.localeCompare(b));
177
+ } catch {
178
+ return [];
179
+ }
180
+ }
181
+
182
+ function listFiles(dirPath) {
183
+ try {
184
+ return fs.readdirSync(dirPath, { withFileTypes: true })
185
+ .filter((entry) => entry.isFile())
186
+ .map((entry) => entry.name)
187
+ .sort((a, b) => a.localeCompare(b));
188
+ } catch {
189
+ return [];
190
+ }
191
+ }
192
+
193
+ function round(value) {
194
+ if (!Number.isFinite(value)) return 0;
195
+ return Math.round(value * 1000) / 1000;
196
+ }
197
+
198
+ function survivalVerdict(score) {
199
+ if (score >= 0.85) return 'survives';
200
+ if (score >= 0.65) return 'watch';
201
+ return 'fragile';
202
+ }
203
+
204
+ function categoryStatus(score) {
205
+ if (score >= 0.85) return 'strong';
206
+ if (score >= 0.65) return 'watch';
207
+ return 'weak';
208
+ }
209
+
210
+ function isCliInvocation(argv = process.argv) {
211
+ return Boolean(argv[1] && path.resolve(argv[1]) === __filename);
212
+ }
213
+
214
+ if (isCliInvocation()) {
215
+ const command = process.argv[2] || 'report';
216
+ const audit = buildStackSurvivalAudit();
217
+ if (command === 'json') {
218
+ console.log(JSON.stringify(audit, null, 2));
219
+ } else if (command === 'report') {
220
+ console.log(formatStackSurvivalReport(audit));
221
+ } else {
222
+ console.error(`Unknown command: ${command}. Use: report, json`);
223
+ process.exit(1);
224
+ }
225
+ }
226
+
227
+ module.exports = {
228
+ HEAVY_SCAFFOLDING_DEPS,
229
+ buildStackSurvivalAudit,
230
+ formatStackSurvivalReport,
231
+ };