thumbgate 1.4.3 → 1.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.well-known/llms.txt +12 -8
  4. package/.well-known/mcp/server-card.json +1 -1
  5. package/README.md +18 -8
  6. package/adapters/README.md +1 -1
  7. package/adapters/claude/.mcp.json +2 -2
  8. package/adapters/codex/config.toml +2 -2
  9. package/adapters/mcp/server-stdio.js +1 -1
  10. package/adapters/opencode/opencode.json +1 -1
  11. package/config/github-about.json +2 -2
  12. package/package.json +158 -10
  13. package/scripts/billing.js +5 -2
  14. package/scripts/statusline.sh +1 -0
  15. package/src/api/server.js +113 -16
  16. package/src/index.js +3 -0
  17. package/.claude-plugin/bundle/icon.png +0 -0
  18. package/.claude-plugin/bundle/icon.svg +0 -18
  19. package/.claude-plugin/bundle/server/index.js +0 -24
  20. package/adapters/chatgpt/INSTALL.md +0 -158
  21. package/adapters/perplexity/.mcp.json +0 -36
  22. package/adapters/perplexity/config.toml +0 -16
  23. package/adapters/perplexity/opencode.json +0 -29
  24. package/bin/memory.sh +0 -64
  25. package/bin/obsidian-sync.sh +0 -20
  26. package/plugins/amp-skill/INSTALL.md +0 -52
  27. package/plugins/amp-skill/SKILL.md +0 -64
  28. package/plugins/claude-codex-bridge/.claude-plugin/plugin.json +0 -22
  29. package/plugins/claude-codex-bridge/.mcp.json +0 -14
  30. package/plugins/claude-codex-bridge/INSTALL.md +0 -43
  31. package/plugins/claude-codex-bridge/README.md +0 -46
  32. package/plugins/claude-codex-bridge/scripts/codex-bridge.js +0 -286
  33. package/plugins/claude-codex-bridge/skills/adversarial-review/SKILL.md +0 -24
  34. package/plugins/claude-codex-bridge/skills/result/SKILL.md +0 -22
  35. package/plugins/claude-codex-bridge/skills/review/SKILL.md +0 -28
  36. package/plugins/claude-codex-bridge/skills/second-pass/SKILL.md +0 -27
  37. package/plugins/claude-codex-bridge/skills/setup/SKILL.md +0 -21
  38. package/plugins/claude-codex-bridge/skills/status/SKILL.md +0 -19
  39. package/plugins/claude-skill/INSTALL.md +0 -55
  40. package/plugins/claude-skill/SKILL.md +0 -46
  41. package/plugins/codex-profile/.codex-plugin/plugin.json +0 -43
  42. package/plugins/codex-profile/.mcp.json +0 -14
  43. package/plugins/codex-profile/AGENTS.md +0 -20
  44. package/plugins/codex-profile/INSTALL.md +0 -89
  45. package/plugins/codex-profile/README.md +0 -61
  46. package/plugins/cursor-marketplace/.cursor-plugin/plugin.json +0 -23
  47. package/plugins/cursor-marketplace/CHANGELOG.md +0 -30
  48. package/plugins/cursor-marketplace/LICENSE +0 -21
  49. package/plugins/cursor-marketplace/README.md +0 -124
  50. package/plugins/cursor-marketplace/agents/reliability-reviewer.md +0 -31
  51. package/plugins/cursor-marketplace/assets/logo-400x400.png +0 -0
  52. package/plugins/cursor-marketplace/commands/capture-feedback.md +0 -33
  53. package/plugins/cursor-marketplace/commands/check-gates.md +0 -25
  54. package/plugins/cursor-marketplace/commands/show-lessons.md +0 -27
  55. package/plugins/cursor-marketplace/hooks/hooks.json +0 -10
  56. package/plugins/cursor-marketplace/mcp.json +0 -14
  57. package/plugins/cursor-marketplace/rules/feedback-capture.mdc +0 -34
  58. package/plugins/cursor-marketplace/rules/pre-action-gates.mdc +0 -30
  59. package/plugins/cursor-marketplace/rules/session-continuity.mdc +0 -28
  60. package/plugins/cursor-marketplace/scripts/gate-check.sh +0 -21
  61. package/plugins/cursor-marketplace/skills/capture-feedback/SKILL.md +0 -48
  62. package/plugins/cursor-marketplace/skills/prevention-rules/SKILL.md +0 -31
  63. package/plugins/cursor-marketplace/skills/recall-context/SKILL.md +0 -30
  64. package/plugins/cursor-marketplace/skills/search-lessons/SKILL.md +0 -33
  65. package/plugins/gemini-extension/INSTALL.md +0 -92
  66. package/plugins/gemini-extension/gemini_prompt.txt +0 -14
  67. package/plugins/gemini-extension/tool_contract.json +0 -45
  68. package/plugins/opencode-profile/INSTALL.md +0 -57
  69. package/public/assets/instagram-card.png +0 -0
  70. package/public/assets/tiktok-agent-memory.mp4 +0 -0
  71. package/public/blog.html +0 -474
  72. package/public/compare/mem0.html +0 -189
  73. package/public/compare/speclock.html +0 -180
  74. package/public/compare.html +0 -310
  75. package/public/dashboard.html +0 -1100
  76. package/public/guide.html +0 -317
  77. package/public/guides/claude-code-prevent-repeated-mistakes.html +0 -161
  78. package/public/guides/codex-cli-guardrails.html +0 -158
  79. package/public/guides/cursor-prevent-repeated-mistakes.html +0 -161
  80. package/public/guides/pre-action-gates.html +0 -162
  81. package/public/guides/stop-repeated-ai-agent-mistakes.html +0 -159
  82. package/public/index.html +0 -1225
  83. package/public/js/buyer-intent.js +0 -252
  84. package/public/learn/agent-harness-pattern.html +0 -180
  85. package/public/learn/ai-agent-persistent-memory.html +0 -203
  86. package/public/learn/learn.css +0 -45
  87. package/public/learn/mcp-pre-action-gates-explained.html +0 -172
  88. package/public/learn/stop-ai-agent-force-push.html +0 -134
  89. package/public/learn/vibe-coding-safety-net.html +0 -142
  90. package/public/learn.html +0 -274
  91. package/public/lessons.html +0 -967
  92. package/public/llm-context.md +0 -156
  93. package/public/pro.html +0 -1087
  94. package/public/vercel.json +0 -8
  95. package/scripts/a2ui-engine.js +0 -73
  96. package/scripts/adk-consolidator.js +0 -274
  97. package/scripts/agent-security-hardening.js +0 -225
  98. package/scripts/ai-search-visibility.js +0 -116
  99. package/scripts/autonomous-sales-agent.js +0 -39
  100. package/scripts/autoresearch-runner.js +0 -216
  101. package/scripts/background-agent-governance.js +0 -229
  102. package/scripts/behavioral-extraction.js +0 -93
  103. package/scripts/budget-enforcer.js +0 -173
  104. package/scripts/budget-guard.js +0 -173
  105. package/scripts/build-claude-mcpb.js +0 -255
  106. package/scripts/build-codex-plugin.js +0 -152
  107. package/scripts/capture-railway-diagnostics.sh +0 -97
  108. package/scripts/changeset-check.js +0 -372
  109. package/scripts/check-congruence.js +0 -443
  110. package/scripts/computer-use-firewall.js +0 -280
  111. package/scripts/content-engine/linkedin-content-generator.js +0 -154
  112. package/scripts/content-engine/output/linkedin-memento-validation.md +0 -17
  113. package/scripts/content-engine/output/linkedin-posts-2026-04-09.md +0 -175
  114. package/scripts/content-engine/reddit-thread-finder.js +0 -154
  115. package/scripts/context-engine.js +0 -710
  116. package/scripts/daily-digest.js +0 -11
  117. package/scripts/data-governance.js +0 -173
  118. package/scripts/deploy-gcp.sh +0 -44
  119. package/scripts/deploy-policy.js +0 -249
  120. package/scripts/disagreement-mining.js +0 -315
  121. package/scripts/dpo-optimizer.js +0 -206
  122. package/scripts/ensure-repo-bootstrap.js +0 -130
  123. package/scripts/ephemeral-agent-store.js +0 -212
  124. package/scripts/eval-harness.js +0 -56
  125. package/scripts/export-kto-pairs.js +0 -309
  126. package/scripts/export-training.js +0 -446
  127. package/scripts/feedback-fallback.js +0 -111
  128. package/scripts/feedback-inbox-read.js +0 -162
  129. package/scripts/feedback-root-consolidator.js +0 -233
  130. package/scripts/feedback-to-memory.js +0 -185
  131. package/scripts/gate-satisfy.js +0 -42
  132. package/scripts/generate-paperbanana-diagrams.sh +0 -99
  133. package/scripts/generate-pretool-hook.sh +0 -40
  134. package/scripts/github-about.js +0 -430
  135. package/scripts/github-outreach.js +0 -65
  136. package/scripts/gtm-revenue-loop.js +0 -535
  137. package/scripts/hallucination-detector.js +0 -226
  138. package/scripts/hf-papers.js +0 -317
  139. package/scripts/hook-auto-capture.sh +0 -100
  140. package/scripts/hook-stop-pr-thread-check.sh +0 -68
  141. package/scripts/hook-stop-self-score.sh +0 -51
  142. package/scripts/hook-stop-verify-deploy.sh +0 -31
  143. package/scripts/hook-verify-before-done.sh +0 -20
  144. package/scripts/managed-dpo-export.js +0 -91
  145. package/scripts/markdown-escape.js +0 -12
  146. package/scripts/marketing-experiment.js +0 -657
  147. package/scripts/memalign-recall.js +0 -111
  148. package/scripts/memory-migration.js +0 -296
  149. package/scripts/meta-policy.js +0 -190
  150. package/scripts/metered-billing.js +0 -16
  151. package/scripts/model-tier-router.js +0 -310
  152. package/scripts/money-watcher.js +0 -218
  153. package/scripts/multi-hop-recall.js +0 -240
  154. package/scripts/per-step-scoring.js +0 -163
  155. package/scripts/perplexity-command-center.js +0 -644
  156. package/scripts/perplexity-marketing.js +0 -454
  157. package/scripts/pii-scanner.js +0 -153
  158. package/scripts/plan-gate.js +0 -154
  159. package/scripts/post-everywhere.js +0 -341
  160. package/scripts/post-to-x-retry.sh +0 -22
  161. package/scripts/post-to-x.js +0 -369
  162. package/scripts/pr-manager.js +0 -421
  163. package/scripts/principle-extractor.js +0 -162
  164. package/scripts/pro-features.js +0 -41
  165. package/scripts/prompt-dlp.js +0 -222
  166. package/scripts/prove-adapters.js +0 -860
  167. package/scripts/prove-attribution.js +0 -361
  168. package/scripts/prove-automation.js +0 -651
  169. package/scripts/prove-autoresearch.js +0 -304
  170. package/scripts/prove-claim-verification.js +0 -277
  171. package/scripts/prove-cloudflare-sandbox.js +0 -161
  172. package/scripts/prove-data-pipeline.js +0 -408
  173. package/scripts/prove-data-quality.js +0 -227
  174. package/scripts/prove-evolution.js +0 -352
  175. package/scripts/prove-harnesses.js +0 -287
  176. package/scripts/prove-intelligence.js +0 -257
  177. package/scripts/prove-lancedb.js +0 -425
  178. package/scripts/prove-local-intelligence.js +0 -340
  179. package/scripts/prove-loop-closure.js +0 -263
  180. package/scripts/prove-packaged-runtime.js +0 -327
  181. package/scripts/prove-predictive-insights.js +0 -355
  182. package/scripts/prove-runtime.js +0 -363
  183. package/scripts/prove-seo-gsd.js +0 -234
  184. package/scripts/prove-settings.js +0 -279
  185. package/scripts/prove-subway-upgrades.js +0 -277
  186. package/scripts/prove-tessl.js +0 -229
  187. package/scripts/prove-training-export.js +0 -325
  188. package/scripts/prove-workflow-contract.js +0 -112
  189. package/scripts/prove-xmemory.js +0 -332
  190. package/scripts/publish-decision.js +0 -159
  191. package/scripts/ralph-loop.js +0 -376
  192. package/scripts/ralph-mode-ci.js +0 -434
  193. package/scripts/reddit-dm-outreach.js +0 -192
  194. package/scripts/reddit-monitor-cron.sh +0 -26
  195. package/scripts/reminder-engine.js +0 -132
  196. package/scripts/revenue-status.js +0 -472
  197. package/scripts/rotate-stripe-webhook-secret.js +0 -314
  198. package/scripts/schedule-manager.js +0 -249
  199. package/scripts/self-healing-check.js +0 -193
  200. package/scripts/session-analyzer.js +0 -533
  201. package/scripts/shieldcortex-memory-firewall-runner.mjs +0 -53
  202. package/scripts/skill-exporter.js +0 -260
  203. package/scripts/skill-materializer.js +0 -134
  204. package/scripts/skill-packs.js +0 -136
  205. package/scripts/skill-proposer.js +0 -99
  206. package/scripts/skill-quality-tracker.js +0 -282
  207. package/scripts/slow-loop.js +0 -72
  208. package/scripts/social-analytics/db/marketing-db.js +0 -179
  209. package/scripts/social-analytics/db/schema.sql +0 -55
  210. package/scripts/social-analytics/digest.js +0 -256
  211. package/scripts/social-analytics/engagement-audit.js +0 -185
  212. package/scripts/social-analytics/generate-instagram-card.js +0 -123
  213. package/scripts/social-analytics/generate-slides.js +0 -268
  214. package/scripts/social-analytics/instagram-thumbgate-post.js +0 -111
  215. package/scripts/social-analytics/install-growth-automation.js +0 -114
  216. package/scripts/social-analytics/load-env.js +0 -77
  217. package/scripts/social-analytics/mcp-server.js +0 -289
  218. package/scripts/social-analytics/normalizer.js +0 -580
  219. package/scripts/social-analytics/notify.js +0 -162
  220. package/scripts/social-analytics/poll-all.js +0 -107
  221. package/scripts/social-analytics/pollers/github.js +0 -195
  222. package/scripts/social-analytics/pollers/instagram.js +0 -253
  223. package/scripts/social-analytics/pollers/linkedin.js +0 -340
  224. package/scripts/social-analytics/pollers/plausible.js +0 -245
  225. package/scripts/social-analytics/pollers/reddit.js +0 -306
  226. package/scripts/social-analytics/pollers/threads.js +0 -233
  227. package/scripts/social-analytics/pollers/tiktok.js +0 -203
  228. package/scripts/social-analytics/pollers/x.js +0 -227
  229. package/scripts/social-analytics/pollers/youtube.js +0 -304
  230. package/scripts/social-analytics/pollers/zernio.js +0 -183
  231. package/scripts/social-analytics/post-video.js +0 -316
  232. package/scripts/social-analytics/publish-instagram-thumbgate.js +0 -104
  233. package/scripts/social-analytics/publish-thumbgate-launch.js +0 -322
  234. package/scripts/social-analytics/publishers/devto.js +0 -122
  235. package/scripts/social-analytics/publishers/instagram.js +0 -317
  236. package/scripts/social-analytics/publishers/linkedin.js +0 -294
  237. package/scripts/social-analytics/publishers/reddit.js +0 -385
  238. package/scripts/social-analytics/publishers/threads.js +0 -275
  239. package/scripts/social-analytics/publishers/tiktok.js +0 -217
  240. package/scripts/social-analytics/publishers/x.js +0 -259
  241. package/scripts/social-analytics/publishers/youtube.js +0 -223
  242. package/scripts/social-analytics/publishers/zernio.js +0 -568
  243. package/scripts/social-analytics/reconcile-thumbgate-campaign.js +0 -165
  244. package/scripts/social-analytics/run-digest.js +0 -34
  245. package/scripts/social-analytics/schedule-thumbgate-campaign.js +0 -275
  246. package/scripts/social-analytics/store.js +0 -455
  247. package/scripts/social-analytics/sync-launch-assets.js +0 -185
  248. package/scripts/social-analytics/utm.js +0 -143
  249. package/scripts/social-pipeline.js +0 -2626
  250. package/scripts/social-post-hourly.js +0 -228
  251. package/scripts/social-quality-gate.js +0 -134
  252. package/scripts/social-reply-monitor.js +0 -592
  253. package/scripts/status-dashboard.js +0 -155
  254. package/scripts/stripe-live-status.js +0 -115
  255. package/scripts/subagent-profiles.js +0 -79
  256. package/scripts/sync-branch-protection.js +0 -340
  257. package/scripts/sync-gh-secrets-from-env.sh +0 -70
  258. package/scripts/sync-github-about.js +0 -55
  259. package/scripts/sync-version.js +0 -479
  260. package/scripts/synthetic-dpo.js +0 -234
  261. package/scripts/tessl-export.js +0 -369
  262. package/scripts/test-coverage.js +0 -128
  263. package/scripts/thumbgate-bench.js +0 -494
  264. package/scripts/thumbgate_session_start.sh +0 -32
  265. package/scripts/train_from_feedback.py +0 -929
  266. package/scripts/validate-feedback.js +0 -581
  267. package/scripts/verify-obsidian-setup.sh +0 -269
  268. package/scripts/verify-run.js +0 -269
  269. package/scripts/weekly-auto-post.js +0 -124
  270. package/scripts/x-autonomous-marketing.js +0 -139
@@ -1,651 +0,0 @@
1
- #!/usr/bin/env node
2
- const fs = require('fs');
3
- const path = require('path');
4
- const os = require('os');
5
- const {
6
- captureFeedback,
7
- analyzeFeedback,
8
- buildPreventionRules,
9
- getFeedbackPaths,
10
- readJSONL,
11
- waitForBackgroundSideEffects,
12
- } = require('./feedback-loop');
13
- const { exportDpoFromMemories } = require('./export-dpo-pairs');
14
- const { planIntent } = require('./intent-router');
15
- const { startHandoff, completeHandoff } = require('./delegation-runtime');
16
- const { startServer } = require('../src/api/server');
17
- const { handleRequest } = require('../adapters/mcp/server-stdio');
18
- const { collectHealthReport } = require('./self-healing-check');
19
- const { runSelfHeal } = require('./self-heal');
20
- const { getContextFsRoot, NAMESPACES } = require('./contextfs');
21
- const { traceForProofCheck, aggregateTraces } = require('./code-reasoning');
22
- const { runVerificationLoop } = require('./verification-loop');
23
- const { run: runGateCheck } = require('./gates-engine');
24
- const { evaluatePromptGuard } = require('./prompt-guard');
25
- const { ensureDir } = require('./fs-utils');
26
-
27
- const ROOT = path.join(__dirname, '..');
28
- const DEFAULT_PROOF_DIR = path.join(ROOT, 'proof', 'automation');
29
-
30
-
31
- function check(condition, message) {
32
- if (!condition) throw new Error(message);
33
- }
34
-
35
- async function fetchWithRetry(url, options, { retries = 5, delayMs = 100 } = {}) {
36
- let lastError = null;
37
-
38
- for (let attempt = 0; attempt <= retries; attempt += 1) {
39
- try {
40
- return await fetch(url, options);
41
- } catch (err) {
42
- lastError = err;
43
- if (attempt === retries) {
44
- throw err;
45
- }
46
- await new Promise((resolve) => setTimeout(resolve, delayMs * (attempt + 1)));
47
- }
48
- }
49
-
50
- throw lastError;
51
- }
52
-
53
- async function runAutomationProof(options = {}) {
54
- const proofDir = options.proofDir || process.env.THUMBGATE_AUTOMATION_PROOF_DIR || DEFAULT_PROOF_DIR;
55
- const writeArtifacts = options.writeArtifacts !== false;
56
- const proofPort = options.port ?? 0;
57
-
58
- if (writeArtifacts) ensureDir(proofDir);
59
-
60
- const tmpFeedbackDir = fs.mkdtempSync(path.join(os.tmpdir(), 'thumbgate-automation-proof-'));
61
- const previousCodegraphStub = process.env.THUMBGATE_CODEGRAPH_STUB_RESPONSE;
62
- process.env.THUMBGATE_FEEDBACK_DIR = tmpFeedbackDir;
63
- process.env.THUMBGATE_API_KEY = 'automation-proof-key';
64
- process.env.THUMBGATE_MCP_PROFILE = 'default';
65
- process.env.THUMBGATE_CODEGRAPH_STUB_RESPONSE = JSON.stringify({
66
- source: 'stub',
67
- symbols: ['planIntent'],
68
- callers: ['src/api/server.js -> planIntent', 'adapters/mcp/server-stdio.js -> planIntent'],
69
- callees: ['rankActions', 'decomposeActions'],
70
- deadCode: ['legacyIntentPlanner'],
71
- });
72
-
73
- const report = {
74
- generatedAt: new Date().toISOString(),
75
- checks: [],
76
- summary: { passed: 0, failed: 0 },
77
- };
78
-
79
- function addResult(name, passed, details) {
80
- report.checks.push({ name, passed, details });
81
- if (passed) report.summary.passed += 1;
82
- else report.summary.failed += 1;
83
- }
84
-
85
- const { server, port } = await startServer({ port: proofPort });
86
- const baseUrl = `http://127.0.0.1:${port}`;
87
- let currentCheck = 'bootstrap';
88
- try {
89
- // 1) Positive with valid rubric -> accepted
90
- {
91
- const result = captureFeedback({
92
- signal: 'up',
93
- context: 'Implemented with tests and evidence',
94
- whatWorked: 'Used proof harness and verification logs',
95
- tags: ['verification', 'automation'],
96
- rubricScores: [
97
- { criterion: 'correctness', score: 4, evidence: 'all tests pass', judge: 'judge-a' },
98
- { criterion: 'verification_evidence', score: 4, evidence: 'proof attached', judge: 'judge-a' },
99
- { criterion: 'safety', score: 4, evidence: 'path checks enabled', judge: 'judge-a' },
100
- ],
101
- guardrails: {
102
- testsPassed: true,
103
- pathSafety: true,
104
- budgetCompliant: true,
105
- },
106
- });
107
- check(result.accepted === true, 'expected rubric-valid positive feedback to be accepted');
108
- check(Boolean(result.memoryRecord && result.memoryRecord.rubricSummary), 'accepted learning should include rubricSummary');
109
- addResult('feedback.capture.rubric_pass', true, {
110
- accepted: result.accepted,
111
- weightedScore: result.memoryRecord.rubricSummary.weightedScore,
112
- });
113
- }
114
-
115
- // 2) Positive with failed guardrail/disagreement -> blocked
116
- {
117
- const result = captureFeedback({
118
- signal: 'up',
119
- context: 'Claimed done without logs',
120
- whatWorked: 'Reviewer approved despite missing logs',
121
- tags: ['verification', 'automation'],
122
- rubricScores: [
123
- { criterion: 'verification_evidence', score: 5, judge: 'judge-a' },
124
- { criterion: 'verification_evidence', score: 2, judge: 'judge-b', evidence: 'logs missing' },
125
- ],
126
- guardrails: {
127
- testsPassed: false,
128
- pathSafety: true,
129
- budgetCompliant: true,
130
- },
131
- });
132
- check(result.accepted === false, 'expected rubric-gated positive feedback to be rejected');
133
- check(/Rubric gate prevented promotion/i.test(String(result.reason)), 'expected rubric gate reason');
134
- addResult('feedback.capture.rubric_block', true, { accepted: result.accepted, reason: result.reason });
135
- }
136
-
137
- // 3) Negative with rubric failures -> accepted mistake memory with rubric tags
138
- {
139
- const result = captureFeedback({
140
- signal: 'down',
141
- context: 'Skipped verification before completion claim',
142
- whatWentWrong: 'No test evidence',
143
- whatToChange: 'Always include test output',
144
- tags: ['verification', 'automation'],
145
- rubricScores: [
146
- { criterion: 'verification_evidence', score: 1, evidence: 'no logs', judge: 'judge-a' },
147
- { criterion: 'correctness', score: 2, evidence: 'regression detected', judge: 'judge-a' },
148
- ],
149
- guardrails: {
150
- testsPassed: false,
151
- pathSafety: true,
152
- budgetCompliant: true,
153
- },
154
- });
155
- check(result.accepted === true, 'expected negative feedback to be accepted as mistake memory');
156
- check(result.memoryRecord.tags.includes('rubric-verification_evidence'), 'expected rubric failure tags');
157
- addResult('feedback.capture.negative_with_rubric', true, {
158
- accepted: result.accepted,
159
- tags: result.memoryRecord.tags,
160
- });
161
- }
162
-
163
- // 4) analytics tracks rubric blocks/failures
164
- {
165
- const { FEEDBACK_LOG_PATH } = getFeedbackPaths();
166
- const stats = analyzeFeedback(FEEDBACK_LOG_PATH);
167
- check(stats.rubric.samples >= 3, 'expected rubric samples to be tracked');
168
- check(stats.rubric.blockedPromotions >= 1, 'expected blocked rubric promotions to be tracked');
169
- check(stats.diagnostics.totalDiagnosed >= 2, 'expected diagnostic counts for failed/suspect feedback');
170
- addResult('analytics.rubric_tracking', true, stats.rubric);
171
- }
172
-
173
- // 5) failed verification emits structured diagnosis and critical step
174
- {
175
- currentCheck = 'verification.failure_diagnostics';
176
- const { MEMORY_LOG_PATH } = getFeedbackPaths();
177
- fs.appendFileSync(MEMORY_LOG_PATH, `${JSON.stringify({
178
- id: 'mem_verification_failure',
179
- category: 'error',
180
- title: 'MISTAKE: agent claimed done without running tests',
181
- content: 'How to avoid: Run npm test before claiming completion',
182
- })}\n`);
183
- const verification = runVerificationLoop({
184
- context: 'Agent claimed done without running tests or verification',
185
- tags: ['verification', 'testing'],
186
- maxRetries: 0,
187
- modelPath: path.join(tmpFeedbackDir, 'verification-model.json'),
188
- });
189
- check(verification.accepted === false, 'expected failed verification for unverified completion claim');
190
- check(Boolean(verification.finalVerification && verification.finalVerification.diagnosis), 'failed verification should include diagnosis');
191
- check(verification.finalVerification.diagnosis.rootCauseCategory === 'tool_output_misread', 'verification diagnosis should classify output misread');
192
- addResult('verification.failure_diagnostics', true, {
193
- rootCauseCategory: verification.finalVerification.diagnosis.rootCauseCategory,
194
- criticalFailureStep: verification.finalVerification.diagnosis.criticalFailureStep,
195
- });
196
- }
197
-
198
- // 6) prevention rules include rubric dimensions and root causes
199
- {
200
- const markdown = buildPreventionRules(1);
201
- check(markdown.includes('Rubric Failure Dimensions'), 'expected rubric section in prevention rules');
202
- check(markdown.includes('verification_evidence'), 'expected criterion in prevention rules');
203
- check(markdown.includes('Root Cause Categories'), 'expected diagnosis section in prevention rules');
204
- addResult('prevention_rules.rubric_dimensions', true, { hasRubricSection: true });
205
- }
206
-
207
- // 7) DPO export includes rubric delta metadata
208
- {
209
- const { MEMORY_LOG_PATH } = getFeedbackPaths();
210
- const memories = readJSONL(MEMORY_LOG_PATH);
211
- const result = exportDpoFromMemories(memories);
212
- check(result.pairs.length >= 1, 'expected at least one DPO pair');
213
- const first = result.pairs[0];
214
- check(Boolean(first.metadata && first.metadata.rubric), 'expected rubric metadata in DPO pair');
215
- addResult('dpo_export.rubric_metadata', true, first.metadata.rubric);
216
- }
217
-
218
- // 8) API rubric gate returns 422
219
- {
220
- currentCheck = 'api.rubric_gate';
221
- const res = await fetchWithRetry(`${baseUrl}/v1/feedback/capture`, {
222
- method: 'POST',
223
- headers: {
224
- Authorization: 'Bearer automation-proof-key',
225
- 'Content-Type': 'application/json',
226
- },
227
- body: JSON.stringify({
228
- signal: 'up',
229
- context: 'unsafe api approval attempt',
230
- whatWorked: 'claimed success',
231
- tags: ['verification', 'automation'],
232
- rubricScores: [
233
- { criterion: 'verification_evidence', score: 5, judge: 'judge-a' },
234
- { criterion: 'verification_evidence', score: 2, judge: 'judge-b', evidence: 'missing logs' },
235
- ],
236
- guardrails: { testsPassed: false, pathSafety: true, budgetCompliant: true },
237
- }),
238
- });
239
- check(res.status === 422, `expected 422 from API rubric gate, got ${res.status}`);
240
- const body = await res.json();
241
- check(body.accepted === false, 'API rubric-gated capture must be rejected');
242
- addResult('api.rubric_gate', true, { status: res.status });
243
- }
244
-
245
- // 9) MCP rubric gate returns accepted=false
246
- {
247
- currentCheck = 'mcp.rubric_gate';
248
- const call = await handleRequest({
249
- jsonrpc: '2.0',
250
- id: 91,
251
- method: 'tools/call',
252
- params: {
253
- name: 'capture_feedback',
254
- arguments: {
255
- signal: 'up',
256
- context: 'unsafe mcp approval attempt',
257
- whatWorked: 'claimed success',
258
- rubricScores: [
259
- { criterion: 'verification_evidence', score: 5, judge: 'judge-a' },
260
- { criterion: 'verification_evidence', score: 2, judge: 'judge-b', evidence: 'missing logs' },
261
- ],
262
- guardrails: { testsPassed: false, pathSafety: true, budgetCompliant: true },
263
- },
264
- },
265
- });
266
- const payload = JSON.parse(call.content[0].text);
267
- check(payload.accepted === false, 'MCP rubric-gated capture must be rejected');
268
- addResult('mcp.rubric_gate', true, { accepted: payload.accepted });
269
- }
270
-
271
- // 10) PreToolUse blocks reads of secret-bearing files
272
- {
273
- currentCheck = 'secret_guard.read_block';
274
- const secretPath = path.join(tmpFeedbackDir, '.env');
275
- const stripeKey = ['sk', '_live_', '1234567890abcdefghijklmnopqrstuvwxyz'].join('');
276
- fs.writeFileSync(secretPath, `STRIPE_SECRET_KEY=${stripeKey}\n`);
277
- const gateOutput = JSON.parse(runGateCheck({
278
- tool_name: 'Read',
279
- tool_input: { file_path: secretPath },
280
- cwd: tmpFeedbackDir,
281
- }));
282
- check(gateOutput.hookSpecificOutput.permissionDecision === 'deny', 'expected secret file read to be blocked');
283
- addResult('secret_guard.read_block', true, {
284
- decision: gateOutput.hookSpecificOutput.permissionDecision,
285
- reason: gateOutput.hookSpecificOutput.permissionDecisionReason,
286
- });
287
- }
288
-
289
- // 11) UserPromptSubmit blocks prompts with inline secrets
290
- {
291
- currentCheck = 'secret_guard.prompt_block';
292
- const gitHubPat = ['gh', 'p_', 'abcdefghijklmnopqrstuvwxyz1234'].join('');
293
- const result = evaluatePromptGuard(`Ship this token to support: ${gitHubPat}`);
294
- check(result && result.continue === false, 'expected prompt guard to block secret-bearing prompt');
295
- addResult('secret_guard.prompt_block', true, {
296
- continue: result.continue,
297
- stopReason: result.stopReason,
298
- });
299
- }
300
-
301
- // 12) MCP failure diagnostics compile schema and approval constraints
302
- {
303
- currentCheck = 'mcp.failure_diagnostics';
304
- const call = await handleRequest({
305
- jsonrpc: '2.0',
306
- id: 92,
307
- method: 'tools/call',
308
- params: {
309
- name: 'diagnose_failure',
310
- arguments: {
311
- step: 'capture_feedback',
312
- context: 'Attempted to approve publish flow without required approval',
313
- toolName: 'capture_feedback',
314
- toolArgs: {},
315
- intentId: 'publish_dpo_training_data',
316
- mcpProfile: 'default',
317
- },
318
- },
319
- });
320
- const payload = JSON.parse(call.content[0].text);
321
- check(payload.rootCauseCategory === 'intent_plan_misalignment', 'diagnose_failure should classify approval mismatch');
322
- check(payload.compiledConstraints.summary.toolSchemaCount >= 1, 'diagnose_failure should include MCP schema constraints');
323
- addResult('mcp.failure_diagnostics', true, {
324
- rootCauseCategory: payload.rootCauseCategory,
325
- toolSchemaCount: payload.compiledConstraints.summary.toolSchemaCount,
326
- });
327
- }
328
-
329
- // 13) intent checkpoints still enforced
330
- {
331
- currentCheck = 'intent.checkpoint_enforcement';
332
- const planBlocked = planIntent({
333
- intentId: 'publish_dpo_training_data',
334
- mcpProfile: 'default',
335
- approved: false,
336
- });
337
- check(planBlocked.status === 'checkpoint_required', 'expected checkpoint_required for high-risk intent');
338
-
339
- const planApproved = planIntent({
340
- intentId: 'publish_dpo_training_data',
341
- mcpProfile: 'default',
342
- approved: true,
343
- });
344
- check(planApproved.status === 'ready', 'expected ready when approved');
345
- addResult('intent.checkpoint_enforcement', true, {
346
- blocked: planBlocked.status,
347
- approved: planApproved.status,
348
- });
349
- }
350
-
351
- // 14) partner-aware planning returns execution strategy
352
- {
353
- currentCheck = 'intent.partner_strategy';
354
- const partnerPlan = planIntent({
355
- intentId: 'incident_postmortem',
356
- mcpProfile: 'default',
357
- partnerProfile: 'strict-reviewer',
358
- });
359
- check(partnerPlan.partnerProfile === 'strict_reviewer', 'expected normalized strict_reviewer partner profile');
360
- check(Boolean(partnerPlan.partnerStrategy), 'expected partner strategy metadata');
361
- check(partnerPlan.partnerStrategy.verificationMode === 'evidence_first', 'expected evidence_first verification mode');
362
- check(partnerPlan.tokenBudget.contextPack > 6000, 'expected boosted contextPack budget for strict reviewer');
363
- check(Array.isArray(partnerPlan.actionScores), 'expected action scores for partner-aware plan');
364
- addResult('intent.partner_strategy', true, {
365
- partnerProfile: partnerPlan.partnerProfile,
366
- verificationMode: partnerPlan.partnerStrategy.verificationMode,
367
- contextPack: partnerPlan.tokenBudget.contextPack,
368
- });
369
- }
370
-
371
- // 15) coding workflows include structural impact evidence and dead-code checks
372
- {
373
- currentCheck = 'intent.delegation_decision';
374
- const plan = planIntent({
375
- intentId: 'improve_response_quality',
376
- context: 'Improve the response with evidence and prevention rules',
377
- mcpProfile: 'default',
378
- delegationMode: 'auto',
379
- });
380
- check(plan.executionMode === 'sequential_delegate', 'expected delegation decision for eligible multi-phase task');
381
- check(plan.delegateProfile === 'pr_workflow', 'expected pr_workflow delegate profile');
382
- check(Boolean(plan.handoffContract), 'expected handoff contract on delegated plan');
383
- addResult('intent.delegation_decision', true, {
384
- executionMode: plan.executionMode,
385
- delegateProfile: plan.delegateProfile,
386
- delegationScore: plan.delegationScore,
387
- });
388
- }
389
-
390
- // 16) sequential handoff contract is explicit and blocks duplicate starts
391
- {
392
- currentCheck = 'handoff.contract_shape';
393
- const plan = planIntent({
394
- intentId: 'improve_response_quality',
395
- context: 'Improve the response with evidence and prevention rules',
396
- mcpProfile: 'default',
397
- delegationMode: 'auto',
398
- });
399
- const started = startHandoff({
400
- plan,
401
- context: plan.context,
402
- mcpProfile: plan.mcpProfile,
403
- partnerProfile: plan.partnerProfile,
404
- });
405
- check(Boolean(started.handoffContract), 'expected handoff contract');
406
- check(Array.isArray(started.handoffContract.scopeIn), 'handoff contract should include scopeIn');
407
- check(Array.isArray(started.handoffContract.requiredEvidence), 'handoff contract should include requiredEvidence');
408
- check(Array.isArray(started.handoffContract.requiredChecks), 'handoff contract should include requiredChecks');
409
- addResult('handoff.contract_shape', true, {
410
- handoffId: started.handoffId,
411
- requiredEvidence: started.handoffContract.requiredEvidence,
412
- requiredChecks: started.handoffContract.requiredChecks,
413
- });
414
-
415
- currentCheck = 'handoff.sequential_guard';
416
- let guardErr = null;
417
- try {
418
- startHandoff({
419
- plan,
420
- context: plan.context,
421
- mcpProfile: plan.mcpProfile,
422
- partnerProfile: plan.partnerProfile,
423
- });
424
- } catch (err) {
425
- guardErr = err;
426
- }
427
- check(Boolean(guardErr), 'expected duplicate handoff start to fail');
428
- check(/unresolved handoff/i.test(guardErr.message), 'expected unresolved handoff guard');
429
- addResult('handoff.sequential_guard', true, {
430
- statusCode: guardErr.statusCode,
431
- message: guardErr.message,
432
- });
433
-
434
- currentCheck = 'handoff.failure_diagnostics';
435
- const completed = completeHandoff({
436
- handoffId: started.handoffId,
437
- outcome: 'accepted',
438
- attempts: 1,
439
- violationCount: 1,
440
- summary: 'Returned without test evidence.',
441
- resultContext: 'Agent claimed done without running tests or verification',
442
- });
443
- check(completed.verificationAccepted === false, 'expected handoff verification to fail');
444
- check(Boolean(completed.diagnosis), 'expected handoff completion diagnosis');
445
- addResult('handoff.failure_diagnostics', true, {
446
- verificationAccepted: completed.verificationAccepted,
447
- rootCauseCategory: completed.diagnosis.rootCauseCategory,
448
- });
449
- }
450
-
451
- // 17) coding workflows include structural impact evidence and dead-code checks
452
- {
453
- currentCheck = 'intent.codegraph_impact';
454
- const plan = planIntent({
455
- intentId: 'incident_postmortem',
456
- context: 'Refactor `planIntent` in scripts/intent-router.js',
457
- mcpProfile: 'default',
458
- repoPath: ROOT,
459
- });
460
- check(plan.codegraphImpact.enabled === true, 'expected codegraph impact to be enabled');
461
- check(plan.codegraphImpact.evidence.deadCodeCount >= 1, 'expected dead-code candidates in codegraph evidence');
462
- check(
463
- plan.partnerStrategy.recommendedChecks.some((item) => /dead code/i.test(item)),
464
- 'expected structural verification checks to be appended',
465
- );
466
- addResult('intent.codegraph_impact', true, {
467
- source: plan.codegraphImpact.source,
468
- impactScore: plan.codegraphImpact.evidence.impactScore,
469
- deadCodeCount: plan.codegraphImpact.evidence.deadCodeCount,
470
- });
471
- }
472
-
473
- // 18) context evaluate stores rubric evaluation
474
- {
475
- currentCheck = 'context.evaluate.construct';
476
- const construct = await fetchWithRetry(`${baseUrl}/v1/context/construct`, {
477
- method: 'POST',
478
- headers: {
479
- Authorization: 'Bearer automation-proof-key',
480
- 'Content-Type': 'application/json',
481
- },
482
- body: JSON.stringify({ query: 'verification automation', maxItems: 5, maxChars: 5000 }),
483
- });
484
- check(construct.status === 200, `context construct expected 200, got ${construct.status}`);
485
- const pack = await construct.json();
486
-
487
- currentCheck = 'context.evaluate.rubric';
488
- const evaluate = await fetchWithRetry(`${baseUrl}/v1/context/evaluate`, {
489
- method: 'POST',
490
- headers: {
491
- Authorization: 'Bearer automation-proof-key',
492
- 'Content-Type': 'application/json',
493
- },
494
- body: JSON.stringify({
495
- packId: pack.packId,
496
- outcome: 'useful',
497
- signal: 'positive',
498
- rubricScores: [
499
- { criterion: 'correctness', score: 4, evidence: 'tests pass', judge: 'judge-a' },
500
- { criterion: 'verification_evidence', score: 4, evidence: 'logs attached', judge: 'judge-a' },
501
- ],
502
- guardrails: { testsPassed: true, pathSafety: true, budgetCompliant: true },
503
- }),
504
- });
505
- check(evaluate.status === 200, `context evaluate expected 200, got ${evaluate.status}`);
506
- const evalBody = await evaluate.json();
507
- check(Boolean(evalBody.rubricEvaluation), 'expected rubricEvaluation on context evaluate result');
508
- addResult('context.evaluate.rubric', true, { rubricId: evalBody.rubricEvaluation.rubricId });
509
- }
510
-
511
- // 19) semantic cache hit on equivalent query
512
- {
513
- currentCheck = 'context.semantic_cache.hit.first';
514
- fs.rmSync(path.join(getContextFsRoot(), NAMESPACES.provenance, 'semantic-cache.jsonl'), { force: true });
515
- const first = await fetchWithRetry(`${baseUrl}/v1/context/construct`, {
516
- method: 'POST',
517
- headers: {
518
- Authorization: 'Bearer automation-proof-key',
519
- 'Content-Type': 'application/json',
520
- },
521
- body: JSON.stringify({ query: 'verification testing evidence', maxItems: 5, maxChars: 5000 }),
522
- });
523
- check(first.status === 200, `first context construct expected 200, got ${first.status}`);
524
- const firstPack = await first.json();
525
-
526
- currentCheck = 'context.semantic_cache.hit.second';
527
- const second = await fetchWithRetry(`${baseUrl}/v1/context/construct`, {
528
- method: 'POST',
529
- headers: {
530
- Authorization: 'Bearer automation-proof-key',
531
- 'Content-Type': 'application/json',
532
- },
533
- body: JSON.stringify({ query: 'testing verification evidence', maxItems: 5, maxChars: 5000 }),
534
- });
535
- check(second.status === 200, `second context construct expected 200, got ${second.status}`);
536
- const secondPack = await second.json();
537
- check(firstPack.cache && firstPack.cache.hit === false, 'first pack expected cache miss');
538
- check(secondPack.cache && secondPack.cache.hit === true, 'second pack expected cache hit');
539
- addResult('context.semantic_cache.hit', true, {
540
- firstHit: firstPack.cache.hit,
541
- secondHit: secondPack.cache.hit,
542
- similarity: secondPack.cache.similarity,
543
- });
544
- }
545
-
546
- // 20) self-healing helpers produce healthy reports in baseline state
547
- {
548
- const health = collectHealthReport({
549
- checks: [
550
- { name: 'noop', command: ['node', '-e', 'process.exit(0)'] },
551
- ],
552
- });
553
- check(health.overall_status === 'healthy', 'health report expected healthy for noop check');
554
- const unhealthy = collectHealthReport({
555
- checks: [
556
- { name: 'explode', command: ['node', '-e', 'process.exit(2)'] },
557
- ],
558
- });
559
- check(unhealthy.checks[0].diagnosis.rootCauseCategory === 'system_failure', 'unhealthy self-heal check should include system_failure diagnosis');
560
-
561
- const heal = runSelfHeal({ reason: 'automation-proof', cwd: ROOT });
562
- check(heal.healthy === true, 'self-heal expected healthy execution');
563
- check(Boolean(heal.reasoning), 'self-heal must include reasoning traces');
564
- check(heal.traces.length === heal.plan.length, 'self-heal traces count must match plan length');
565
- addResult('self_healing.helpers', true, {
566
- healthStatus: health.overall_status,
567
- changed: heal.changed,
568
- reasoning: heal.reasoning,
569
- });
570
- }
571
-
572
- // 21) code reasoning traces verify DPO pair quality
573
- {
574
- const { MEMORY_LOG_PATH } = getFeedbackPaths();
575
- const memories = readJSONL(MEMORY_LOG_PATH);
576
- const result = exportDpoFromMemories(memories);
577
- if (result.pairs.length >= 1) {
578
- const first = result.pairs[0];
579
- check(Boolean(first.metadata.reasoningTrace), 'DPO pair must include reasoningTrace metadata');
580
- check(typeof first.metadata.reasoningTrace.confidence === 'number', 'reasoningTrace must have confidence score');
581
- check(typeof first.metadata.reasoningTrace.traceId === 'string', 'reasoningTrace must have traceId');
582
- check(Boolean(result.reasoning), 'DPO export must include aggregate reasoning summary');
583
- addResult('code_reasoning.dpo_traces', true, {
584
- traceId: first.metadata.reasoningTrace.traceId,
585
- confidence: first.metadata.reasoningTrace.confidence,
586
- aggregateConfidence: result.reasoning.averageConfidence,
587
- });
588
- } else {
589
- addResult('code_reasoning.dpo_traces', true, { skipped: true, reason: 'no DPO pairs to trace' });
590
- }
591
- }
592
-
593
- // 22) code reasoning traces attached to proof checks
594
- {
595
- const proofTraces = report.checks.map((chk) => traceForProofCheck(chk));
596
- const aggregate = aggregateTraces(proofTraces);
597
- check(aggregate.totalTraces === report.checks.length, 'proof trace count must match check count');
598
- check(aggregate.refuted === 0, 'no proof check should have refuted steps');
599
- check(aggregate.averageConfidence > 0, 'proof traces must have positive confidence');
600
- report.reasoning = aggregate;
601
- report.proofTraces = proofTraces;
602
- addResult('code_reasoning.proof_gate', true, {
603
- totalTraces: aggregate.totalTraces,
604
- averageConfidence: aggregate.averageConfidence,
605
- allPassed: aggregate.allPassed,
606
- });
607
- }
608
- } catch (err) {
609
- addResult('fatal', false, {
610
- check: currentCheck,
611
- error: err.message,
612
- cause: err.cause && err.cause.message ? err.cause.message : null,
613
- });
614
- } finally {
615
- await new Promise((resolve) => server.close(resolve));
616
- await waitForBackgroundSideEffects();
617
- fs.rmSync(tmpFeedbackDir, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 });
618
- if (previousCodegraphStub === undefined) delete process.env.THUMBGATE_CODEGRAPH_STUB_RESPONSE;
619
- else process.env.THUMBGATE_CODEGRAPH_STUB_RESPONSE = previousCodegraphStub;
620
- }
621
-
622
- if (writeArtifacts) {
623
- fs.writeFileSync(path.join(proofDir, 'report.json'), `${JSON.stringify(report, null, 2)}\n`);
624
- const mdLines = [
625
- '# Automation Proof',
626
- '',
627
- `Generated: ${report.generatedAt}`,
628
- '',
629
- `Passed: ${report.summary.passed}`,
630
- `Failed: ${report.summary.failed}`,
631
- '',
632
- '## Checks',
633
- ...report.checks.map((checkItem) => `- ${checkItem.passed ? 'PASS' : 'FAIL'} ${checkItem.name}`),
634
- '',
635
- ];
636
- fs.writeFileSync(path.join(proofDir, 'report.md'), `${mdLines.join('\n')}\n`);
637
- }
638
-
639
- if (report.summary.failed > 0) process.exitCode = 1;
640
- return report;
641
- }
642
-
643
- module.exports = {
644
- runAutomationProof,
645
- };
646
-
647
- if (require.main === module) {
648
- runAutomationProof().then((report) => {
649
- console.log(JSON.stringify(report.summary, null, 2));
650
- });
651
- }