thumbgate 1.4.2 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. package/.claude-plugin/README.md +45 -34
  2. package/.claude-plugin/marketplace.json +3 -3
  3. package/.claude-plugin/plugin.json +3 -3
  4. package/.well-known/llms.txt +1 -1
  5. package/.well-known/mcp/server-card.json +1 -1
  6. package/README.md +26 -2
  7. package/adapters/README.md +4 -1
  8. package/adapters/claude/.mcp.json +2 -2
  9. package/adapters/codex/config.toml +2 -2
  10. package/adapters/mcp/server-stdio.js +10 -4
  11. package/adapters/opencode/opencode.json +1 -1
  12. package/bin/cli.js +246 -90
  13. package/config/mcp-allowlists.json +11 -3
  14. package/package.json +184 -21
  15. package/scripts/audit-trail.js +25 -15
  16. package/scripts/auto-wire-hooks.js +127 -0
  17. package/scripts/cli-demo.js +102 -0
  18. package/scripts/cli-schema.js +285 -0
  19. package/scripts/cli-status.js +166 -0
  20. package/scripts/cross-encoder-reranker.js +235 -0
  21. package/scripts/explore-subcommands.js +277 -0
  22. package/scripts/explore.js +569 -0
  23. package/scripts/feedback-loop.js +20 -6
  24. package/scripts/lesson-inference.js +7 -1
  25. package/scripts/lesson-reranker.js +263 -0
  26. package/scripts/lesson-retrieval.js +34 -17
  27. package/scripts/lesson-search.js +69 -0
  28. package/scripts/perplexity-client.js +210 -0
  29. package/scripts/reflector-agent.js +2 -2
  30. package/scripts/statusline-local-stats.js +3 -1
  31. package/scripts/statusline.sh +12 -11
  32. package/src/api/server.js +178 -17
  33. package/src/index.js +3 -0
  34. package/.claude-plugin/bundle/icon.png +0 -0
  35. package/.claude-plugin/bundle/icon.svg +0 -18
  36. package/.claude-plugin/bundle/server/index.js +0 -24
  37. package/adapters/chatgpt/INSTALL.md +0 -138
  38. package/bin/memory.sh +0 -64
  39. package/bin/obsidian-sync.sh +0 -20
  40. package/plugins/amp-skill/INSTALL.md +0 -52
  41. package/plugins/amp-skill/SKILL.md +0 -64
  42. package/plugins/claude-codex-bridge/.claude-plugin/plugin.json +0 -22
  43. package/plugins/claude-codex-bridge/.mcp.json +0 -14
  44. package/plugins/claude-codex-bridge/INSTALL.md +0 -43
  45. package/plugins/claude-codex-bridge/README.md +0 -46
  46. package/plugins/claude-codex-bridge/scripts/codex-bridge.js +0 -286
  47. package/plugins/claude-codex-bridge/skills/adversarial-review/SKILL.md +0 -24
  48. package/plugins/claude-codex-bridge/skills/result/SKILL.md +0 -22
  49. package/plugins/claude-codex-bridge/skills/review/SKILL.md +0 -28
  50. package/plugins/claude-codex-bridge/skills/second-pass/SKILL.md +0 -27
  51. package/plugins/claude-codex-bridge/skills/setup/SKILL.md +0 -21
  52. package/plugins/claude-codex-bridge/skills/status/SKILL.md +0 -19
  53. package/plugins/claude-skill/INSTALL.md +0 -55
  54. package/plugins/claude-skill/SKILL.md +0 -46
  55. package/plugins/codex-profile/.codex-plugin/plugin.json +0 -43
  56. package/plugins/codex-profile/.mcp.json +0 -14
  57. package/plugins/codex-profile/AGENTS.md +0 -20
  58. package/plugins/codex-profile/INSTALL.md +0 -89
  59. package/plugins/codex-profile/README.md +0 -61
  60. package/plugins/cursor-marketplace/.cursor-plugin/plugin.json +0 -23
  61. package/plugins/cursor-marketplace/CHANGELOG.md +0 -30
  62. package/plugins/cursor-marketplace/LICENSE +0 -21
  63. package/plugins/cursor-marketplace/README.md +0 -124
  64. package/plugins/cursor-marketplace/agents/reliability-reviewer.md +0 -31
  65. package/plugins/cursor-marketplace/assets/logo-400x400.png +0 -0
  66. package/plugins/cursor-marketplace/commands/capture-feedback.md +0 -33
  67. package/plugins/cursor-marketplace/commands/check-gates.md +0 -25
  68. package/plugins/cursor-marketplace/commands/show-lessons.md +0 -27
  69. package/plugins/cursor-marketplace/hooks/hooks.json +0 -10
  70. package/plugins/cursor-marketplace/mcp.json +0 -14
  71. package/plugins/cursor-marketplace/rules/feedback-capture.mdc +0 -34
  72. package/plugins/cursor-marketplace/rules/pre-action-gates.mdc +0 -30
  73. package/plugins/cursor-marketplace/rules/session-continuity.mdc +0 -28
  74. package/plugins/cursor-marketplace/scripts/gate-check.sh +0 -21
  75. package/plugins/cursor-marketplace/skills/capture-feedback/SKILL.md +0 -48
  76. package/plugins/cursor-marketplace/skills/prevention-rules/SKILL.md +0 -31
  77. package/plugins/cursor-marketplace/skills/recall-context/SKILL.md +0 -30
  78. package/plugins/cursor-marketplace/skills/search-lessons/SKILL.md +0 -33
  79. package/plugins/gemini-extension/INSTALL.md +0 -92
  80. package/plugins/gemini-extension/gemini_prompt.txt +0 -14
  81. package/plugins/gemini-extension/tool_contract.json +0 -45
  82. package/plugins/opencode-profile/INSTALL.md +0 -57
  83. package/public/assets/instagram-card.png +0 -0
  84. package/public/assets/tiktok-agent-memory.mp4 +0 -0
  85. package/public/blog.html +0 -474
  86. package/public/compare/mem0.html +0 -189
  87. package/public/compare/speclock.html +0 -180
  88. package/public/compare.html +0 -310
  89. package/public/dashboard.html +0 -1100
  90. package/public/guide.html +0 -317
  91. package/public/guides/claude-code-prevent-repeated-mistakes.html +0 -161
  92. package/public/guides/codex-cli-guardrails.html +0 -158
  93. package/public/guides/cursor-prevent-repeated-mistakes.html +0 -161
  94. package/public/guides/pre-action-gates.html +0 -162
  95. package/public/guides/stop-repeated-ai-agent-mistakes.html +0 -159
  96. package/public/index.html +0 -1128
  97. package/public/js/buyer-intent.js +0 -252
  98. package/public/learn/agent-harness-pattern.html +0 -180
  99. package/public/learn/ai-agent-persistent-memory.html +0 -203
  100. package/public/learn/learn.css +0 -45
  101. package/public/learn/mcp-pre-action-gates-explained.html +0 -172
  102. package/public/learn/stop-ai-agent-force-push.html +0 -134
  103. package/public/learn/vibe-coding-safety-net.html +0 -142
  104. package/public/learn.html +0 -274
  105. package/public/lessons.html +0 -967
  106. package/public/llm-context.md +0 -140
  107. package/public/pro.html +0 -1087
  108. package/public/vercel.json +0 -8
  109. package/scripts/a2ui-engine.js +0 -73
  110. package/scripts/adk-consolidator.js +0 -274
  111. package/scripts/agent-security-hardening.js +0 -225
  112. package/scripts/ai-search-visibility.js +0 -142
  113. package/scripts/autonomous-sales-agent.js +0 -39
  114. package/scripts/autoresearch-runner.js +0 -216
  115. package/scripts/background-agent-governance.js +0 -229
  116. package/scripts/behavioral-extraction.js +0 -93
  117. package/scripts/budget-enforcer.js +0 -173
  118. package/scripts/budget-guard.js +0 -173
  119. package/scripts/build-claude-mcpb.js +0 -255
  120. package/scripts/build-codex-plugin.js +0 -152
  121. package/scripts/capture-railway-diagnostics.sh +0 -97
  122. package/scripts/changeset-check.js +0 -372
  123. package/scripts/check-congruence.js +0 -443
  124. package/scripts/computer-use-firewall.js +0 -280
  125. package/scripts/content-engine/linkedin-content-generator.js +0 -154
  126. package/scripts/content-engine/output/linkedin-memento-validation.md +0 -17
  127. package/scripts/content-engine/output/linkedin-posts-2026-04-09.md +0 -175
  128. package/scripts/content-engine/reddit-thread-finder.js +0 -154
  129. package/scripts/context-engine.js +0 -710
  130. package/scripts/daily-digest.js +0 -11
  131. package/scripts/data-governance.js +0 -173
  132. package/scripts/deploy-gcp.sh +0 -44
  133. package/scripts/deploy-policy.js +0 -249
  134. package/scripts/disagreement-mining.js +0 -315
  135. package/scripts/dpo-optimizer.js +0 -206
  136. package/scripts/ensure-repo-bootstrap.js +0 -130
  137. package/scripts/ephemeral-agent-store.js +0 -212
  138. package/scripts/eval-harness.js +0 -56
  139. package/scripts/export-kto-pairs.js +0 -309
  140. package/scripts/export-training.js +0 -446
  141. package/scripts/feedback-fallback.js +0 -111
  142. package/scripts/feedback-inbox-read.js +0 -162
  143. package/scripts/feedback-root-consolidator.js +0 -233
  144. package/scripts/feedback-to-memory.js +0 -185
  145. package/scripts/gate-satisfy.js +0 -42
  146. package/scripts/generate-paperbanana-diagrams.sh +0 -99
  147. package/scripts/generate-pretool-hook.sh +0 -40
  148. package/scripts/github-about.js +0 -430
  149. package/scripts/github-outreach.js +0 -65
  150. package/scripts/gtm-revenue-loop.js +0 -535
  151. package/scripts/hallucination-detector.js +0 -226
  152. package/scripts/hf-papers.js +0 -317
  153. package/scripts/hook-auto-capture.sh +0 -100
  154. package/scripts/hook-stop-pr-thread-check.sh +0 -68
  155. package/scripts/hook-stop-self-score.sh +0 -51
  156. package/scripts/hook-stop-verify-deploy.sh +0 -31
  157. package/scripts/hook-verify-before-done.sh +0 -20
  158. package/scripts/managed-dpo-export.js +0 -91
  159. package/scripts/markdown-escape.js +0 -12
  160. package/scripts/marketing-experiment.js +0 -657
  161. package/scripts/memalign-recall.js +0 -111
  162. package/scripts/memory-migration.js +0 -296
  163. package/scripts/meta-policy.js +0 -190
  164. package/scripts/metered-billing.js +0 -16
  165. package/scripts/model-tier-router.js +0 -310
  166. package/scripts/money-watcher.js +0 -218
  167. package/scripts/multi-hop-recall.js +0 -240
  168. package/scripts/per-step-scoring.js +0 -163
  169. package/scripts/perplexity-marketing.js +0 -466
  170. package/scripts/pii-scanner.js +0 -153
  171. package/scripts/plan-gate.js +0 -154
  172. package/scripts/post-everywhere.js +0 -341
  173. package/scripts/post-to-x-retry.sh +0 -22
  174. package/scripts/post-to-x.js +0 -369
  175. package/scripts/pr-manager.js +0 -421
  176. package/scripts/principle-extractor.js +0 -162
  177. package/scripts/pro-features.js +0 -41
  178. package/scripts/prompt-dlp.js +0 -222
  179. package/scripts/prove-adapters.js +0 -860
  180. package/scripts/prove-attribution.js +0 -361
  181. package/scripts/prove-automation.js +0 -651
  182. package/scripts/prove-autoresearch.js +0 -304
  183. package/scripts/prove-claim-verification.js +0 -277
  184. package/scripts/prove-cloudflare-sandbox.js +0 -161
  185. package/scripts/prove-data-pipeline.js +0 -408
  186. package/scripts/prove-data-quality.js +0 -227
  187. package/scripts/prove-evolution.js +0 -352
  188. package/scripts/prove-harnesses.js +0 -287
  189. package/scripts/prove-intelligence.js +0 -257
  190. package/scripts/prove-lancedb.js +0 -425
  191. package/scripts/prove-local-intelligence.js +0 -340
  192. package/scripts/prove-loop-closure.js +0 -263
  193. package/scripts/prove-packaged-runtime.js +0 -326
  194. package/scripts/prove-predictive-insights.js +0 -355
  195. package/scripts/prove-runtime.js +0 -363
  196. package/scripts/prove-seo-gsd.js +0 -234
  197. package/scripts/prove-settings.js +0 -279
  198. package/scripts/prove-subway-upgrades.js +0 -277
  199. package/scripts/prove-tessl.js +0 -229
  200. package/scripts/prove-training-export.js +0 -325
  201. package/scripts/prove-workflow-contract.js +0 -112
  202. package/scripts/prove-xmemory.js +0 -332
  203. package/scripts/publish-decision.js +0 -159
  204. package/scripts/ralph-loop.js +0 -376
  205. package/scripts/ralph-mode-ci.js +0 -331
  206. package/scripts/reddit-dm-outreach.js +0 -192
  207. package/scripts/reddit-monitor-cron.sh +0 -26
  208. package/scripts/reminder-engine.js +0 -132
  209. package/scripts/revenue-status.js +0 -472
  210. package/scripts/rotate-stripe-webhook-secret.js +0 -314
  211. package/scripts/schedule-manager.js +0 -249
  212. package/scripts/self-healing-check.js +0 -193
  213. package/scripts/shieldcortex-memory-firewall-runner.mjs +0 -53
  214. package/scripts/skill-exporter.js +0 -260
  215. package/scripts/skill-materializer.js +0 -134
  216. package/scripts/skill-packs.js +0 -136
  217. package/scripts/skill-proposer.js +0 -99
  218. package/scripts/skill-quality-tracker.js +0 -282
  219. package/scripts/slow-loop.js +0 -72
  220. package/scripts/social-analytics/db/analytics.sqlite +0 -0
  221. package/scripts/social-analytics/db/schema.sql +0 -32
  222. package/scripts/social-analytics/digest.js +0 -256
  223. package/scripts/social-analytics/engagement-audit.js +0 -185
  224. package/scripts/social-analytics/generate-instagram-card.js +0 -97
  225. package/scripts/social-analytics/instagram-thumbgate-post.js +0 -111
  226. package/scripts/social-analytics/install-growth-automation.js +0 -114
  227. package/scripts/social-analytics/load-env.js +0 -77
  228. package/scripts/social-analytics/mcp-server.js +0 -289
  229. package/scripts/social-analytics/normalizer.js +0 -580
  230. package/scripts/social-analytics/notify.js +0 -162
  231. package/scripts/social-analytics/poll-all.js +0 -107
  232. package/scripts/social-analytics/pollers/github.js +0 -195
  233. package/scripts/social-analytics/pollers/instagram.js +0 -253
  234. package/scripts/social-analytics/pollers/linkedin.js +0 -340
  235. package/scripts/social-analytics/pollers/plausible.js +0 -245
  236. package/scripts/social-analytics/pollers/reddit.js +0 -306
  237. package/scripts/social-analytics/pollers/threads.js +0 -233
  238. package/scripts/social-analytics/pollers/tiktok.js +0 -203
  239. package/scripts/social-analytics/pollers/x.js +0 -227
  240. package/scripts/social-analytics/pollers/youtube.js +0 -304
  241. package/scripts/social-analytics/pollers/zernio.js +0 -183
  242. package/scripts/social-analytics/publish-instagram-thumbgate.js +0 -104
  243. package/scripts/social-analytics/publish-thumbgate-launch.js +0 -322
  244. package/scripts/social-analytics/publishers/devto.js +0 -122
  245. package/scripts/social-analytics/publishers/instagram.js +0 -317
  246. package/scripts/social-analytics/publishers/linkedin.js +0 -294
  247. package/scripts/social-analytics/publishers/reddit.js +0 -385
  248. package/scripts/social-analytics/publishers/threads.js +0 -275
  249. package/scripts/social-analytics/publishers/tiktok.js +0 -217
  250. package/scripts/social-analytics/publishers/x.js +0 -259
  251. package/scripts/social-analytics/publishers/youtube.js +0 -223
  252. package/scripts/social-analytics/publishers/zernio.js +0 -539
  253. package/scripts/social-analytics/reconcile-thumbgate-campaign.js +0 -165
  254. package/scripts/social-analytics/run-digest.js +0 -34
  255. package/scripts/social-analytics/schedule-thumbgate-campaign.js +0 -275
  256. package/scripts/social-analytics/store.js +0 -455
  257. package/scripts/social-analytics/sync-launch-assets.js +0 -185
  258. package/scripts/social-analytics/utm.js +0 -143
  259. package/scripts/social-pipeline.js +0 -2626
  260. package/scripts/social-post-hourly.js +0 -228
  261. package/scripts/social-quality-gate.js +0 -134
  262. package/scripts/social-reply-monitor.js +0 -592
  263. package/scripts/status-dashboard.js +0 -155
  264. package/scripts/stripe-live-status.js +0 -115
  265. package/scripts/subagent-profiles.js +0 -79
  266. package/scripts/sync-branch-protection.js +0 -340
  267. package/scripts/sync-gh-secrets-from-env.sh +0 -70
  268. package/scripts/sync-github-about.js +0 -55
  269. package/scripts/sync-version.js +0 -479
  270. package/scripts/synthetic-dpo.js +0 -234
  271. package/scripts/tessl-export.js +0 -369
  272. package/scripts/test-coverage.js +0 -128
  273. package/scripts/thumbgate_session_start.sh +0 -32
  274. package/scripts/train_from_feedback.py +0 -929
  275. package/scripts/validate-feedback.js +0 -581
  276. package/scripts/verify-obsidian-setup.sh +0 -269
  277. package/scripts/verify-run.js +0 -269
  278. package/scripts/weekly-auto-post.js +0 -124
  279. package/scripts/x-autonomous-marketing.js +0 -139
@@ -1,304 +0,0 @@
1
- 'use strict';
2
- /**
3
- * Phase 9: Autoresearch — Proof Gate
4
- *
5
- * Validates all AUTORESEARCH-01 through AUTORESEARCH-06 requirements offline.
6
- * Mirrors the pattern of prove-loop-closure.js.
7
- *
8
- * Usage:
9
- * node scripts/prove-autoresearch.js
10
- *
11
- * Produces:
12
- * proof/autoresearch-report.json
13
- * proof/autoresearch-report.md
14
- */
15
-
16
- const fs = require('fs');
17
- const os = require('os');
18
- const path = require('path');
19
-
20
- const ROOT = path.join(__dirname, '..');
21
-
22
- function resolveProofPaths() {
23
- const proofDir = process.env.THUMBGATE_PROOF_DIR || path.join(ROOT, 'proof');
24
- return {
25
- proofDir,
26
- reportJson: path.join(proofDir, 'autoresearch-report.json'),
27
- reportMd: path.join(proofDir, 'autoresearch-report.md'),
28
- };
29
- }
30
-
31
- async function run() {
32
- const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'thumbgate-autoresearch-proof-'));
33
- const results = { passed: 0, failed: 0, requirements: {} };
34
- const { proofDir, reportJson, reportMd } = resolveProofPaths();
35
-
36
- const checks = [
37
- {
38
- id: 'AUTORESEARCH-01',
39
- desc: 'experiment-tracker.js: createExperiment() returns valid experiment with id, status=pending',
40
- fn: () => {
41
- process.env.THUMBGATE_FEEDBACK_DIR = tmpDir;
42
- delete require.cache[require.resolve('./experiment-tracker')];
43
- delete require.cache[require.resolve('./feedback-loop')];
44
- const m = require('./experiment-tracker');
45
-
46
- if (typeof m.createExperiment !== 'function') throw new Error('createExperiment not exported');
47
- if (typeof m.recordResult !== 'function') throw new Error('recordResult not exported');
48
- if (typeof m.getProgress !== 'function') throw new Error('getProgress not exported');
49
- if (typeof m.getBestExperiment !== 'function') throw new Error('getBestExperiment not exported');
50
- if (typeof m.loadExperiments !== 'function') throw new Error('loadExperiments not exported');
51
-
52
- const exp = m.createExperiment({
53
- name: 'proof-test',
54
- hypothesis: 'Proof gate validates experiment lifecycle',
55
- mutationType: 'config',
56
- });
57
- if (!exp.id.startsWith('exp_')) throw new Error('Experiment id must start with exp_');
58
- if (exp.status !== 'pending') throw new Error('Experiment status must be pending');
59
- if (!exp.createdAt) throw new Error('Experiment must have createdAt');
60
- },
61
- },
62
- {
63
- id: 'AUTORESEARCH-02',
64
- desc: 'experiment-tracker.js: recordResult() keeps improved experiments, discards regressions',
65
- fn: () => {
66
- process.env.THUMBGATE_FEEDBACK_DIR = tmpDir;
67
- delete require.cache[require.resolve('./experiment-tracker')];
68
- delete require.cache[require.resolve('./feedback-loop')];
69
- const m = require('./experiment-tracker');
70
-
71
- // Kept: score improved
72
- const exp1 = m.createExperiment({ name: 'kept', hypothesis: 'improve' });
73
- const r1 = m.recordResult({ experimentId: exp1.id, score: 0.95, baseline: 0.90, testsPassed: true });
74
- if (!r1.kept) throw new Error('Should keep improved experiment');
75
- if (r1.status !== 'completed') throw new Error('Status must be completed');
76
-
77
- // Discarded: score regressed
78
- const exp2 = m.createExperiment({ name: 'discarded', hypothesis: 'regress' });
79
- const r2 = m.recordResult({ experimentId: exp2.id, score: 0.80, baseline: 0.90 });
80
- if (r2.kept) throw new Error('Should discard regressed experiment');
81
-
82
- // Discarded: tests failed
83
- const exp3 = m.createExperiment({ name: 'test-fail', hypothesis: 'fail' });
84
- const r3 = m.recordResult({ experimentId: exp3.id, score: 0.99, baseline: 0.50, testsPassed: false });
85
- if (r3.kept) throw new Error('Should discard experiment with failed tests');
86
- },
87
- },
88
- {
89
- id: 'AUTORESEARCH-03',
90
- desc: 'experiment-tracker.js: getProgress() returns valid progress with keepRate',
91
- fn: () => {
92
- process.env.THUMBGATE_FEEDBACK_DIR = tmpDir;
93
- delete require.cache[require.resolve('./experiment-tracker')];
94
- delete require.cache[require.resolve('./feedback-loop')];
95
- const m = require('./experiment-tracker');
96
-
97
- const p = m.getProgress();
98
- if (typeof p.totalExperiments !== 'number') throw new Error('totalExperiments must be a number');
99
- if (typeof p.completed !== 'number') throw new Error('completed must be a number');
100
- if (typeof p.kept !== 'number') throw new Error('kept must be a number');
101
- if (typeof p.keepRate !== 'string') throw new Error('keepRate must be a string');
102
- if (!p.lastUpdated) throw new Error('lastUpdated required');
103
-
104
- const paths = m.getExperimentPaths();
105
- if (!fs.existsSync(paths.progressPath)) throw new Error('Progress file must be persisted');
106
- },
107
- },
108
- {
109
- id: 'AUTORESEARCH-04',
110
- desc: 'autoresearch-runner.js: scoreSuite() correctly parses node:test output and bounds score in [0,1]',
111
- fn: () => {
112
- delete require.cache[require.resolve('./autoresearch-runner')];
113
- const m = require('./autoresearch-runner');
114
-
115
- if (typeof m.scoreSuite !== 'function') throw new Error('scoreSuite not exported');
116
- if (typeof m.runIteration !== 'function') throw new Error('runIteration not exported');
117
- if (typeof m.runLoop !== 'function') throw new Error('runLoop not exported');
118
- if (!Array.isArray(m.MUTATION_TARGETS)) throw new Error('MUTATION_TARGETS not exported');
119
-
120
- // Perfect run
121
- const perfect = m.scoreSuite({ testOutput: 'ℹ tests 50\nℹ pass 50\nℹ fail 0', approvalRate: 1.0 });
122
- if (perfect.score < 0.95) throw new Error(`Perfect score should be >= 0.95, got ${perfect.score}`);
123
- if (perfect.score > 1.0) throw new Error(`Score must not exceed 1.0`);
124
-
125
- // Partial failure
126
- const partial = m.scoreSuite({ testOutput: 'ℹ tests 10\nℹ pass 5\nℹ fail 5', approvalRate: 0.5 });
127
- if (partial.score <= 0 || partial.score >= 1) throw new Error(`Partial score must be in (0,1), got ${partial.score}`);
128
-
129
- // Empty
130
- const empty = m.scoreSuite({ testOutput: '' });
131
- if (typeof empty.score !== 'number') throw new Error('Empty output must still return numeric score');
132
- },
133
- },
134
- {
135
- id: 'AUTORESEARCH-05',
136
- desc: 'MUTATION_TARGETS resolve to evolution-state settings with valid ranges',
137
- fn: () => {
138
- delete require.cache[require.resolve('./autoresearch-runner')];
139
- delete require.cache[require.resolve('./evolution-state')];
140
- const m = require('./autoresearch-runner');
141
- const { DEFAULT_SETTINGS } = require('./evolution-state');
142
-
143
- for (const target of m.MUTATION_TARGETS) {
144
- const value = DEFAULT_SETTINGS[target.settingKey];
145
- if (!Number.isFinite(value)) throw new Error(`Default setting missing for ${target.settingKey}`);
146
- if (value < target.range[0] || value > target.range[1]) {
147
- throw new Error(`Current value ${value} for ${target.name} outside range [${target.range}]`);
148
- }
149
- }
150
- },
151
- },
152
- {
153
- id: 'AUTORESEARCH-06',
154
- desc: 'research-backed autoresearch ingests paper context, records citations, and preserves score execution flow',
155
- fn: async () => {
156
- process.env.THUMBGATE_FEEDBACK_DIR = tmpDir;
157
- delete require.cache[require.resolve('./contextfs')];
158
- delete require.cache[require.resolve('./hf-papers')];
159
- delete require.cache[require.resolve('./autoresearch-runner')];
160
- delete require.cache[require.resolve('./experiment-tracker')];
161
- delete require.cache[require.resolve('./feedback-loop')];
162
-
163
- const runner = require('./autoresearch-runner');
164
- const { constructTemplatedPack, NAMESPACES } = require('./contextfs');
165
-
166
- const result = await runner.runIteration({
167
- targetName: 'half_life_days',
168
- nextValue: 8,
169
- testCommand: `${JSON.stringify(process.execPath)} -e ${JSON.stringify([
170
- 'const { readEvolutionState } = require("./scripts/evolution-state");',
171
- 'const passed = readEvolutionState().settings.half_life_days === 8;',
172
- 'console.log("ℹ tests 1");',
173
- 'console.log("ℹ pass " + (passed ? 1 : 0));',
174
- 'console.log("ℹ fail " + (passed ? 0 : 1));',
175
- 'if (!passed) process.exit(1);',
176
- ].join(' '))}`,
177
- holdoutCommands: [`${JSON.stringify(process.execPath)} -e ${JSON.stringify([
178
- 'const { readEvolutionState } = require("./scripts/evolution-state");',
179
- 'const passed = readEvolutionState().settings.half_life_days === 8;',
180
- 'console.log("ℹ tests 1");',
181
- 'console.log("ℹ pass " + (passed ? 1 : 0));',
182
- 'console.log("ℹ fail " + (passed ? 0 : 1));',
183
- 'if (!passed) process.exit(1);',
184
- ].join(' '))}`],
185
- timeoutMs: 5000,
186
- researchQuery: 'rank fusion',
187
- searchPapersImpl: async () => [{
188
- paperId: '2603.01896',
189
- title: 'Agentic Rank Fusion for Research Systems',
190
- summary: 'Retrieval fusion for agent workflows.',
191
- authors: ['Ada Lovelace'],
192
- tags: ['retrieval'],
193
- url: 'https://arxiv.org/abs/2603.01896',
194
- source: 'huggingface-papers',
195
- }],
196
- });
197
-
198
- if (result.metrics.researchQuery !== 'rank fusion') {
199
- throw new Error('Research query metadata missing from autoresearch result');
200
- }
201
- if (!result.metrics.researchPackId) {
202
- throw new Error('Research pack id missing from autoresearch result');
203
- }
204
- if (!result.metrics.researchPaperIds.includes('2603.01896')) {
205
- throw new Error('Research paper id missing from autoresearch result');
206
- }
207
- if (result.metrics.baselineEvaluation.primary.results[0].details.pass !== 0) {
208
- throw new Error('Expected baseline to fail before the candidate mutation');
209
- }
210
- if (result.metrics.candidateEvaluation.primary.results[0].details.pass !== 1) {
211
- throw new Error('Research context changed score execution flow');
212
- }
213
-
214
- const pack = constructTemplatedPack({
215
- template: 'autoresearch-brief',
216
- query: 'rank fusion retrieval',
217
- });
218
- if (!pack.items.some((item) => item.namespace === NAMESPACES.research)) {
219
- throw new Error('Autoresearch pack did not include research namespace content');
220
- }
221
- },
222
- },
223
- ];
224
-
225
- console.log('Phase 9: Autoresearch — Proof Gate\n');
226
- console.log('Checking requirements:\n');
227
-
228
- for (const check of checks) {
229
- try {
230
- await check.fn();
231
- results.passed++;
232
- results.requirements[check.id] = { status: 'pass', desc: check.desc };
233
- console.log(` PASS ${check.id}: ${check.desc}`);
234
- } catch (err) {
235
- results.failed++;
236
- results.requirements[check.id] = {
237
- status: 'fail',
238
- desc: check.desc,
239
- error: err.message,
240
- };
241
- console.error(` FAIL ${check.id}: ${err.message}`);
242
- }
243
- }
244
-
245
- // Cleanup
246
- try {
247
- fs.rmSync(tmpDir, { recursive: true, force: true });
248
- } catch {}
249
- delete process.env.THUMBGATE_FEEDBACK_DIR;
250
-
251
- // Write proof artifacts
252
- fs.mkdirSync(proofDir, { recursive: true });
253
-
254
- const report = {
255
- phase: '09-autoresearch',
256
- generatedAt: new Date().toISOString(),
257
- passed: results.passed,
258
- failed: results.failed,
259
- total: checks.length,
260
- requirements: results.requirements,
261
- };
262
-
263
- fs.writeFileSync(reportJson, JSON.stringify(report, null, 2) + '\n');
264
-
265
- const md = [
266
- '# Phase 9: Autoresearch — Proof Report',
267
- '',
268
- `Generated: ${report.generatedAt}`,
269
- `Result: ${results.passed}/${checks.length} passed`,
270
- '',
271
- '## Requirements',
272
- '',
273
- ...Object.entries(results.requirements).map(([id, r]) => {
274
- const checkbox = r.status === 'pass' ? '[x]' : '[ ]';
275
- const errLine = r.error ? `\n - Error: \`${r.error}\`` : '';
276
- return `- ${checkbox} **${id}**: ${r.desc}${errLine}`;
277
- }),
278
- '',
279
- '## Evidence',
280
- '',
281
- '- `scripts/experiment-tracker.js` — Experiment lifecycle: create, record, progress, best',
282
- '- `scripts/autoresearch-runner.js` — Karpathy-inspired self-optimizing mutation loop',
283
- '- `scripts/hf-papers.js` — Hugging Face papers ingestion and research-brief builder',
284
- '- `tests/autoresearch.test.js` and `tests/hf-papers.test.js` — Node:test coverage for research-backed autoresearch',
285
- '- `scripts/prove-autoresearch.js` — This proof gate with 6 requirement checks',
286
- '',
287
- ].join('\n');
288
-
289
- fs.writeFileSync(reportMd, md);
290
-
291
- console.log(`\nPhase 9 proof: ${results.passed} passed, ${results.failed} failed`);
292
- console.log(`Report: ${reportJson}`);
293
-
294
- if (results.failed > 0) process.exit(1);
295
- }
296
-
297
- if (require.main === module) {
298
- run().catch((error) => {
299
- console.error(error.message);
300
- process.exit(1);
301
- });
302
- }
303
-
304
- module.exports = { run };
@@ -1,277 +0,0 @@
1
- 'use strict';
2
-
3
- const fs = require('fs');
4
- const os = require('os');
5
- const path = require('path');
6
-
7
- const ROOT = path.join(__dirname, '..');
8
- const GATES_ENGINE_PATH = require.resolve('./gates-engine');
9
- const MCP_SERVER_PATH = require.resolve('../adapters/mcp/server-stdio');
10
- const VERIFY_RUN_PATH = require.resolve('./verify-run');
11
-
12
- function resolveProofPaths() {
13
- const proofDir = process.env.THUMBGATE_CLAIM_VERIFICATION_PROOF_DIR
14
- || process.env.THUMBGATE_PROOF_DIR
15
- || path.join(ROOT, 'proof');
16
- return {
17
- proofDir,
18
- reportJson: path.join(proofDir, 'claim-verification-report.json'),
19
- reportMd: path.join(proofDir, 'claim-verification-report.md'),
20
- };
21
- }
22
-
23
- function resetModules() {
24
- [GATES_ENGINE_PATH, MCP_SERVER_PATH, VERIFY_RUN_PATH].forEach((modulePath) => {
25
- delete require.cache[modulePath];
26
- });
27
- }
28
-
29
- async function withIsolatedRuntime(fn) {
30
- const previousHome = process.env.HOME;
31
- const previousFeedbackDir = process.env.THUMBGATE_FEEDBACK_DIR;
32
- const previousNoRateLimit = process.env.THUMBGATE_NO_RATE_LIMIT;
33
- const homeDir = fs.mkdtempSync(path.join(os.tmpdir(), 'thumbgate-claim-home-'));
34
- const feedbackDir = fs.mkdtempSync(path.join(os.tmpdir(), 'thumbgate-claim-feedback-'));
35
-
36
- process.env.HOME = homeDir;
37
- process.env.THUMBGATE_FEEDBACK_DIR = feedbackDir;
38
- process.env.THUMBGATE_NO_RATE_LIMIT = '1';
39
- resetModules();
40
-
41
- try {
42
- const gatesEngine = require('./gates-engine');
43
- const mcpServer = require('../adapters/mcp/server-stdio');
44
- const verifyRun = require('./verify-run');
45
- return await fn({ gatesEngine, mcpServer, verifyRun, homeDir, feedbackDir });
46
- } finally {
47
- resetModules();
48
- if (previousHome === undefined) delete process.env.HOME;
49
- else process.env.HOME = previousHome;
50
- if (previousFeedbackDir === undefined) delete process.env.THUMBGATE_FEEDBACK_DIR;
51
- else process.env.THUMBGATE_FEEDBACK_DIR = previousFeedbackDir;
52
- if (previousNoRateLimit === undefined) delete process.env.THUMBGATE_NO_RATE_LIMIT;
53
- else process.env.THUMBGATE_NO_RATE_LIMIT = previousNoRateLimit;
54
- fs.rmSync(homeDir, { recursive: true, force: true });
55
- fs.rmSync(feedbackDir, { recursive: true, force: true });
56
- }
57
- }
58
-
59
- async function run() {
60
- const results = { passed: 0, failed: 0, requirements: {} };
61
- const { proofDir, reportJson, reportMd } = resolveProofPaths();
62
-
63
- const checks = [
64
- {
65
- id: 'CLAIM-01',
66
- desc: 'default claim gates ship with the expected evidence-backed assertions',
67
- fn: async () => {
68
- await withIsolatedRuntime(async ({ gatesEngine }) => {
69
- const config = gatesEngine.loadClaimGates();
70
- const patterns = config.claims.map((claim) => claim.pattern);
71
- if (!patterns.some((pattern) => pattern.includes('figma'))) {
72
- throw new Error('Missing Figma claim gate');
73
- }
74
- if (!patterns.some((pattern) => pattern.includes('tests? pass'))) {
75
- throw new Error('Missing tests-pass claim gate');
76
- }
77
- if (!patterns.some((pattern) => pattern.includes('ready to merge'))) {
78
- throw new Error('Missing PR readiness claim gate');
79
- }
80
- });
81
- },
82
- },
83
- {
84
- id: 'CLAIM-02',
85
- desc: 'tracked evidence verifies shipped default claims',
86
- fn: async () => {
87
- await withIsolatedRuntime(async ({ gatesEngine }) => {
88
- gatesEngine.trackAction('figma_verified', { tool: 'mcp__figma__get_design_context' });
89
- const result = gatesEngine.verifyClaimEvidence('colors match Figma design');
90
- if (!result.verified) {
91
- throw new Error(`Expected verified claim, got ${JSON.stringify(result)}`);
92
- }
93
- });
94
- },
95
- },
96
- {
97
- id: 'CLAIM-03',
98
- desc: 'missing evidence blocks claims with actionable missing actions',
99
- fn: async () => {
100
- await withIsolatedRuntime(async ({ gatesEngine }) => {
101
- const result = gatesEngine.verifyClaimEvidence('tests pass');
102
- if (result.verified) {
103
- throw new Error('Expected unverified tests-pass claim');
104
- }
105
- if (!result.checks[0] || !result.checks[0].missing.includes('tests_passed')) {
106
- throw new Error(`Expected missing tests_passed action, got ${JSON.stringify(result)}`);
107
- }
108
- });
109
- },
110
- },
111
- {
112
- id: 'CLAIM-04',
113
- desc: 'custom claim gates persist only to local runtime state',
114
- fn: async () => {
115
- await withIsolatedRuntime(async ({ gatesEngine }) => {
116
- const baseline = fs.readFileSync(gatesEngine.DEFAULT_CLAIM_GATES_PATH, 'utf8');
117
- gatesEngine.registerClaimGate('ready to demo', ['tests_passed'], 'Run tests before demo claims');
118
-
119
- if (!fs.existsSync(gatesEngine.CUSTOM_CLAIM_GATES_PATH)) {
120
- throw new Error('Expected runtime custom claim gate file');
121
- }
122
- if (fs.readFileSync(gatesEngine.DEFAULT_CLAIM_GATES_PATH, 'utf8') !== baseline) {
123
- throw new Error('Default claim gates file was mutated');
124
- }
125
- });
126
- },
127
- },
128
- {
129
- id: 'CLAIM-05',
130
- desc: 'MCP tools expose track_action, verify_claim, and register_claim_gate end to end',
131
- fn: async () => {
132
- await withIsolatedRuntime(async ({ mcpServer }) => {
133
- await mcpServer.handleRequest({
134
- jsonrpc: '2.0',
135
- id: 1,
136
- method: 'tools/call',
137
- params: {
138
- name: 'track_action',
139
- arguments: {
140
- actionId: 'tests_passed',
141
- metadata: { source: 'npm test' },
142
- },
143
- },
144
- });
145
-
146
- const verifyResponse = await mcpServer.handleRequest({
147
- jsonrpc: '2.0',
148
- id: 2,
149
- method: 'tools/call',
150
- params: {
151
- name: 'verify_claim',
152
- arguments: {
153
- claim: 'tests pass',
154
- },
155
- },
156
- });
157
- const verifyPayload = JSON.parse(verifyResponse.content[0].text);
158
- if (!verifyPayload.verified) {
159
- throw new Error(`Expected verified MCP claim, got ${verifyResponse.content[0].text}`);
160
- }
161
-
162
- const registerResponse = await mcpServer.handleRequest({
163
- jsonrpc: '2.0',
164
- id: 3,
165
- method: 'tools/call',
166
- params: {
167
- name: 'register_claim_gate',
168
- arguments: {
169
- claimPattern: 'ready to demo',
170
- requiredActions: ['tests_passed'],
171
- },
172
- },
173
- });
174
- const registerPayload = JSON.parse(registerResponse.content[0].text);
175
- if (registerPayload.pattern !== 'ready to demo') {
176
- throw new Error(`Unexpected custom gate response: ${registerResponse.content[0].text}`);
177
- }
178
- });
179
- },
180
- },
181
- {
182
- id: 'CLAIM-06',
183
- desc: 'verify:full includes the claim-verification proof lane and artifact',
184
- fn: async () => {
185
- await withIsolatedRuntime(async ({ verifyRun, feedbackDir }) => {
186
- const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'thumbgate-claim-proof-cwd-'));
187
- try {
188
- const plan = verifyRun.buildVerifyPlan('full');
189
- const commands = plan.map((step) => [step.command, ...(step.args || [])].join(' ')).join('\n');
190
- if (!commands.includes('prove:claim-verification')) {
191
- throw new Error('verify:full is missing prove:claim-verification');
192
- }
193
-
194
- const entry = verifyRun.recordVerifyWorkflowRun('full', cwd, feedbackDir);
195
- const hasArtifact = entry.proofArtifacts.some((artifact) => artifact.endsWith(path.join('proof', 'claim-verification-report.json')));
196
- if (!hasArtifact) {
197
- throw new Error('verify workflow run is missing claim verification proof artifact');
198
- }
199
- } finally {
200
- fs.rmSync(cwd, { recursive: true, force: true });
201
- }
202
- });
203
- },
204
- },
205
- ];
206
-
207
- console.log('Claim Verification Gates - Proof Gate\n');
208
- console.log('Checking requirements:\n');
209
-
210
- for (const check of checks) {
211
- try {
212
- await check.fn();
213
- results.passed += 1;
214
- results.requirements[check.id] = { status: 'pass', desc: check.desc };
215
- console.log(` PASS ${check.id}: ${check.desc}`);
216
- } catch (error) {
217
- results.failed += 1;
218
- results.requirements[check.id] = {
219
- status: 'fail',
220
- desc: check.desc,
221
- error: error.message,
222
- };
223
- console.error(` FAIL ${check.id}: ${error.message}`);
224
- }
225
- }
226
-
227
- fs.mkdirSync(proofDir, { recursive: true });
228
-
229
- const report = {
230
- phase: '13-claim-verification',
231
- generatedAt: new Date().toISOString(),
232
- passed: results.passed,
233
- failed: results.failed,
234
- total: checks.length,
235
- requirements: results.requirements,
236
- };
237
-
238
- fs.writeFileSync(reportJson, `${JSON.stringify(report, null, 2)}\n`);
239
-
240
- const markdown = [
241
- '# Claim Verification Proof Report',
242
- '',
243
- `Generated: ${report.generatedAt}`,
244
- `Result: ${results.passed}/${checks.length} passed`,
245
- '',
246
- '## Requirements',
247
- '',
248
- ...Object.entries(results.requirements).map(([id, requirement]) => {
249
- const checkbox = requirement.status === 'pass' ? '[x]' : '[ ]';
250
- const errorLine = requirement.error ? `\n - Error: \`${requirement.error}\`` : '';
251
- return `- ${checkbox} **${id}**: ${requirement.desc}${errorLine}`;
252
- }),
253
- '',
254
- `${results.passed} passed, ${results.failed} failed`,
255
- '',
256
- ].join('\n');
257
- fs.writeFileSync(reportMd, `${markdown}\n`);
258
-
259
- console.log(`\nResult: ${results.passed} passed, ${results.failed} failed`);
260
- console.log(`Report: ${reportJson}`);
261
-
262
- if (results.failed > 0) {
263
- process.exitCode = 1;
264
- }
265
- }
266
-
267
- if (require.main === module) {
268
- run().catch((error) => {
269
- console.error(error.message || String(error));
270
- process.exit(1);
271
- });
272
- }
273
-
274
- module.exports = {
275
- resolveProofPaths,
276
- run,
277
- };