thumbgate 1.4.2 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. package/.claude-plugin/README.md +45 -34
  2. package/.claude-plugin/marketplace.json +3 -3
  3. package/.claude-plugin/plugin.json +3 -3
  4. package/.well-known/llms.txt +1 -1
  5. package/.well-known/mcp/server-card.json +1 -1
  6. package/README.md +26 -2
  7. package/adapters/README.md +4 -1
  8. package/adapters/claude/.mcp.json +2 -2
  9. package/adapters/codex/config.toml +2 -2
  10. package/adapters/mcp/server-stdio.js +10 -4
  11. package/adapters/opencode/opencode.json +1 -1
  12. package/bin/cli.js +246 -90
  13. package/config/mcp-allowlists.json +11 -3
  14. package/package.json +184 -21
  15. package/scripts/audit-trail.js +25 -15
  16. package/scripts/auto-wire-hooks.js +127 -0
  17. package/scripts/cli-demo.js +102 -0
  18. package/scripts/cli-schema.js +285 -0
  19. package/scripts/cli-status.js +166 -0
  20. package/scripts/cross-encoder-reranker.js +235 -0
  21. package/scripts/explore-subcommands.js +277 -0
  22. package/scripts/explore.js +569 -0
  23. package/scripts/feedback-loop.js +20 -6
  24. package/scripts/lesson-inference.js +7 -1
  25. package/scripts/lesson-reranker.js +263 -0
  26. package/scripts/lesson-retrieval.js +34 -17
  27. package/scripts/lesson-search.js +69 -0
  28. package/scripts/perplexity-client.js +210 -0
  29. package/scripts/reflector-agent.js +2 -2
  30. package/scripts/statusline-local-stats.js +3 -1
  31. package/scripts/statusline.sh +12 -11
  32. package/src/api/server.js +178 -17
  33. package/src/index.js +3 -0
  34. package/.claude-plugin/bundle/icon.png +0 -0
  35. package/.claude-plugin/bundle/icon.svg +0 -18
  36. package/.claude-plugin/bundle/server/index.js +0 -24
  37. package/adapters/chatgpt/INSTALL.md +0 -138
  38. package/bin/memory.sh +0 -64
  39. package/bin/obsidian-sync.sh +0 -20
  40. package/plugins/amp-skill/INSTALL.md +0 -52
  41. package/plugins/amp-skill/SKILL.md +0 -64
  42. package/plugins/claude-codex-bridge/.claude-plugin/plugin.json +0 -22
  43. package/plugins/claude-codex-bridge/.mcp.json +0 -14
  44. package/plugins/claude-codex-bridge/INSTALL.md +0 -43
  45. package/plugins/claude-codex-bridge/README.md +0 -46
  46. package/plugins/claude-codex-bridge/scripts/codex-bridge.js +0 -286
  47. package/plugins/claude-codex-bridge/skills/adversarial-review/SKILL.md +0 -24
  48. package/plugins/claude-codex-bridge/skills/result/SKILL.md +0 -22
  49. package/plugins/claude-codex-bridge/skills/review/SKILL.md +0 -28
  50. package/plugins/claude-codex-bridge/skills/second-pass/SKILL.md +0 -27
  51. package/plugins/claude-codex-bridge/skills/setup/SKILL.md +0 -21
  52. package/plugins/claude-codex-bridge/skills/status/SKILL.md +0 -19
  53. package/plugins/claude-skill/INSTALL.md +0 -55
  54. package/plugins/claude-skill/SKILL.md +0 -46
  55. package/plugins/codex-profile/.codex-plugin/plugin.json +0 -43
  56. package/plugins/codex-profile/.mcp.json +0 -14
  57. package/plugins/codex-profile/AGENTS.md +0 -20
  58. package/plugins/codex-profile/INSTALL.md +0 -89
  59. package/plugins/codex-profile/README.md +0 -61
  60. package/plugins/cursor-marketplace/.cursor-plugin/plugin.json +0 -23
  61. package/plugins/cursor-marketplace/CHANGELOG.md +0 -30
  62. package/plugins/cursor-marketplace/LICENSE +0 -21
  63. package/plugins/cursor-marketplace/README.md +0 -124
  64. package/plugins/cursor-marketplace/agents/reliability-reviewer.md +0 -31
  65. package/plugins/cursor-marketplace/assets/logo-400x400.png +0 -0
  66. package/plugins/cursor-marketplace/commands/capture-feedback.md +0 -33
  67. package/plugins/cursor-marketplace/commands/check-gates.md +0 -25
  68. package/plugins/cursor-marketplace/commands/show-lessons.md +0 -27
  69. package/plugins/cursor-marketplace/hooks/hooks.json +0 -10
  70. package/plugins/cursor-marketplace/mcp.json +0 -14
  71. package/plugins/cursor-marketplace/rules/feedback-capture.mdc +0 -34
  72. package/plugins/cursor-marketplace/rules/pre-action-gates.mdc +0 -30
  73. package/plugins/cursor-marketplace/rules/session-continuity.mdc +0 -28
  74. package/plugins/cursor-marketplace/scripts/gate-check.sh +0 -21
  75. package/plugins/cursor-marketplace/skills/capture-feedback/SKILL.md +0 -48
  76. package/plugins/cursor-marketplace/skills/prevention-rules/SKILL.md +0 -31
  77. package/plugins/cursor-marketplace/skills/recall-context/SKILL.md +0 -30
  78. package/plugins/cursor-marketplace/skills/search-lessons/SKILL.md +0 -33
  79. package/plugins/gemini-extension/INSTALL.md +0 -92
  80. package/plugins/gemini-extension/gemini_prompt.txt +0 -14
  81. package/plugins/gemini-extension/tool_contract.json +0 -45
  82. package/plugins/opencode-profile/INSTALL.md +0 -57
  83. package/public/assets/instagram-card.png +0 -0
  84. package/public/assets/tiktok-agent-memory.mp4 +0 -0
  85. package/public/blog.html +0 -474
  86. package/public/compare/mem0.html +0 -189
  87. package/public/compare/speclock.html +0 -180
  88. package/public/compare.html +0 -310
  89. package/public/dashboard.html +0 -1100
  90. package/public/guide.html +0 -317
  91. package/public/guides/claude-code-prevent-repeated-mistakes.html +0 -161
  92. package/public/guides/codex-cli-guardrails.html +0 -158
  93. package/public/guides/cursor-prevent-repeated-mistakes.html +0 -161
  94. package/public/guides/pre-action-gates.html +0 -162
  95. package/public/guides/stop-repeated-ai-agent-mistakes.html +0 -159
  96. package/public/index.html +0 -1128
  97. package/public/js/buyer-intent.js +0 -252
  98. package/public/learn/agent-harness-pattern.html +0 -180
  99. package/public/learn/ai-agent-persistent-memory.html +0 -203
  100. package/public/learn/learn.css +0 -45
  101. package/public/learn/mcp-pre-action-gates-explained.html +0 -172
  102. package/public/learn/stop-ai-agent-force-push.html +0 -134
  103. package/public/learn/vibe-coding-safety-net.html +0 -142
  104. package/public/learn.html +0 -274
  105. package/public/lessons.html +0 -967
  106. package/public/llm-context.md +0 -140
  107. package/public/pro.html +0 -1087
  108. package/public/vercel.json +0 -8
  109. package/scripts/a2ui-engine.js +0 -73
  110. package/scripts/adk-consolidator.js +0 -274
  111. package/scripts/agent-security-hardening.js +0 -225
  112. package/scripts/ai-search-visibility.js +0 -142
  113. package/scripts/autonomous-sales-agent.js +0 -39
  114. package/scripts/autoresearch-runner.js +0 -216
  115. package/scripts/background-agent-governance.js +0 -229
  116. package/scripts/behavioral-extraction.js +0 -93
  117. package/scripts/budget-enforcer.js +0 -173
  118. package/scripts/budget-guard.js +0 -173
  119. package/scripts/build-claude-mcpb.js +0 -255
  120. package/scripts/build-codex-plugin.js +0 -152
  121. package/scripts/capture-railway-diagnostics.sh +0 -97
  122. package/scripts/changeset-check.js +0 -372
  123. package/scripts/check-congruence.js +0 -443
  124. package/scripts/computer-use-firewall.js +0 -280
  125. package/scripts/content-engine/linkedin-content-generator.js +0 -154
  126. package/scripts/content-engine/output/linkedin-memento-validation.md +0 -17
  127. package/scripts/content-engine/output/linkedin-posts-2026-04-09.md +0 -175
  128. package/scripts/content-engine/reddit-thread-finder.js +0 -154
  129. package/scripts/context-engine.js +0 -710
  130. package/scripts/daily-digest.js +0 -11
  131. package/scripts/data-governance.js +0 -173
  132. package/scripts/deploy-gcp.sh +0 -44
  133. package/scripts/deploy-policy.js +0 -249
  134. package/scripts/disagreement-mining.js +0 -315
  135. package/scripts/dpo-optimizer.js +0 -206
  136. package/scripts/ensure-repo-bootstrap.js +0 -130
  137. package/scripts/ephemeral-agent-store.js +0 -212
  138. package/scripts/eval-harness.js +0 -56
  139. package/scripts/export-kto-pairs.js +0 -309
  140. package/scripts/export-training.js +0 -446
  141. package/scripts/feedback-fallback.js +0 -111
  142. package/scripts/feedback-inbox-read.js +0 -162
  143. package/scripts/feedback-root-consolidator.js +0 -233
  144. package/scripts/feedback-to-memory.js +0 -185
  145. package/scripts/gate-satisfy.js +0 -42
  146. package/scripts/generate-paperbanana-diagrams.sh +0 -99
  147. package/scripts/generate-pretool-hook.sh +0 -40
  148. package/scripts/github-about.js +0 -430
  149. package/scripts/github-outreach.js +0 -65
  150. package/scripts/gtm-revenue-loop.js +0 -535
  151. package/scripts/hallucination-detector.js +0 -226
  152. package/scripts/hf-papers.js +0 -317
  153. package/scripts/hook-auto-capture.sh +0 -100
  154. package/scripts/hook-stop-pr-thread-check.sh +0 -68
  155. package/scripts/hook-stop-self-score.sh +0 -51
  156. package/scripts/hook-stop-verify-deploy.sh +0 -31
  157. package/scripts/hook-verify-before-done.sh +0 -20
  158. package/scripts/managed-dpo-export.js +0 -91
  159. package/scripts/markdown-escape.js +0 -12
  160. package/scripts/marketing-experiment.js +0 -657
  161. package/scripts/memalign-recall.js +0 -111
  162. package/scripts/memory-migration.js +0 -296
  163. package/scripts/meta-policy.js +0 -190
  164. package/scripts/metered-billing.js +0 -16
  165. package/scripts/model-tier-router.js +0 -310
  166. package/scripts/money-watcher.js +0 -218
  167. package/scripts/multi-hop-recall.js +0 -240
  168. package/scripts/per-step-scoring.js +0 -163
  169. package/scripts/perplexity-marketing.js +0 -466
  170. package/scripts/pii-scanner.js +0 -153
  171. package/scripts/plan-gate.js +0 -154
  172. package/scripts/post-everywhere.js +0 -341
  173. package/scripts/post-to-x-retry.sh +0 -22
  174. package/scripts/post-to-x.js +0 -369
  175. package/scripts/pr-manager.js +0 -421
  176. package/scripts/principle-extractor.js +0 -162
  177. package/scripts/pro-features.js +0 -41
  178. package/scripts/prompt-dlp.js +0 -222
  179. package/scripts/prove-adapters.js +0 -860
  180. package/scripts/prove-attribution.js +0 -361
  181. package/scripts/prove-automation.js +0 -651
  182. package/scripts/prove-autoresearch.js +0 -304
  183. package/scripts/prove-claim-verification.js +0 -277
  184. package/scripts/prove-cloudflare-sandbox.js +0 -161
  185. package/scripts/prove-data-pipeline.js +0 -408
  186. package/scripts/prove-data-quality.js +0 -227
  187. package/scripts/prove-evolution.js +0 -352
  188. package/scripts/prove-harnesses.js +0 -287
  189. package/scripts/prove-intelligence.js +0 -257
  190. package/scripts/prove-lancedb.js +0 -425
  191. package/scripts/prove-local-intelligence.js +0 -340
  192. package/scripts/prove-loop-closure.js +0 -263
  193. package/scripts/prove-packaged-runtime.js +0 -326
  194. package/scripts/prove-predictive-insights.js +0 -355
  195. package/scripts/prove-runtime.js +0 -363
  196. package/scripts/prove-seo-gsd.js +0 -234
  197. package/scripts/prove-settings.js +0 -279
  198. package/scripts/prove-subway-upgrades.js +0 -277
  199. package/scripts/prove-tessl.js +0 -229
  200. package/scripts/prove-training-export.js +0 -325
  201. package/scripts/prove-workflow-contract.js +0 -112
  202. package/scripts/prove-xmemory.js +0 -332
  203. package/scripts/publish-decision.js +0 -159
  204. package/scripts/ralph-loop.js +0 -376
  205. package/scripts/ralph-mode-ci.js +0 -331
  206. package/scripts/reddit-dm-outreach.js +0 -192
  207. package/scripts/reddit-monitor-cron.sh +0 -26
  208. package/scripts/reminder-engine.js +0 -132
  209. package/scripts/revenue-status.js +0 -472
  210. package/scripts/rotate-stripe-webhook-secret.js +0 -314
  211. package/scripts/schedule-manager.js +0 -249
  212. package/scripts/self-healing-check.js +0 -193
  213. package/scripts/shieldcortex-memory-firewall-runner.mjs +0 -53
  214. package/scripts/skill-exporter.js +0 -260
  215. package/scripts/skill-materializer.js +0 -134
  216. package/scripts/skill-packs.js +0 -136
  217. package/scripts/skill-proposer.js +0 -99
  218. package/scripts/skill-quality-tracker.js +0 -282
  219. package/scripts/slow-loop.js +0 -72
  220. package/scripts/social-analytics/db/analytics.sqlite +0 -0
  221. package/scripts/social-analytics/db/schema.sql +0 -32
  222. package/scripts/social-analytics/digest.js +0 -256
  223. package/scripts/social-analytics/engagement-audit.js +0 -185
  224. package/scripts/social-analytics/generate-instagram-card.js +0 -97
  225. package/scripts/social-analytics/instagram-thumbgate-post.js +0 -111
  226. package/scripts/social-analytics/install-growth-automation.js +0 -114
  227. package/scripts/social-analytics/load-env.js +0 -77
  228. package/scripts/social-analytics/mcp-server.js +0 -289
  229. package/scripts/social-analytics/normalizer.js +0 -580
  230. package/scripts/social-analytics/notify.js +0 -162
  231. package/scripts/social-analytics/poll-all.js +0 -107
  232. package/scripts/social-analytics/pollers/github.js +0 -195
  233. package/scripts/social-analytics/pollers/instagram.js +0 -253
  234. package/scripts/social-analytics/pollers/linkedin.js +0 -340
  235. package/scripts/social-analytics/pollers/plausible.js +0 -245
  236. package/scripts/social-analytics/pollers/reddit.js +0 -306
  237. package/scripts/social-analytics/pollers/threads.js +0 -233
  238. package/scripts/social-analytics/pollers/tiktok.js +0 -203
  239. package/scripts/social-analytics/pollers/x.js +0 -227
  240. package/scripts/social-analytics/pollers/youtube.js +0 -304
  241. package/scripts/social-analytics/pollers/zernio.js +0 -183
  242. package/scripts/social-analytics/publish-instagram-thumbgate.js +0 -104
  243. package/scripts/social-analytics/publish-thumbgate-launch.js +0 -322
  244. package/scripts/social-analytics/publishers/devto.js +0 -122
  245. package/scripts/social-analytics/publishers/instagram.js +0 -317
  246. package/scripts/social-analytics/publishers/linkedin.js +0 -294
  247. package/scripts/social-analytics/publishers/reddit.js +0 -385
  248. package/scripts/social-analytics/publishers/threads.js +0 -275
  249. package/scripts/social-analytics/publishers/tiktok.js +0 -217
  250. package/scripts/social-analytics/publishers/x.js +0 -259
  251. package/scripts/social-analytics/publishers/youtube.js +0 -223
  252. package/scripts/social-analytics/publishers/zernio.js +0 -539
  253. package/scripts/social-analytics/reconcile-thumbgate-campaign.js +0 -165
  254. package/scripts/social-analytics/run-digest.js +0 -34
  255. package/scripts/social-analytics/schedule-thumbgate-campaign.js +0 -275
  256. package/scripts/social-analytics/store.js +0 -455
  257. package/scripts/social-analytics/sync-launch-assets.js +0 -185
  258. package/scripts/social-analytics/utm.js +0 -143
  259. package/scripts/social-pipeline.js +0 -2626
  260. package/scripts/social-post-hourly.js +0 -228
  261. package/scripts/social-quality-gate.js +0 -134
  262. package/scripts/social-reply-monitor.js +0 -592
  263. package/scripts/status-dashboard.js +0 -155
  264. package/scripts/stripe-live-status.js +0 -115
  265. package/scripts/subagent-profiles.js +0 -79
  266. package/scripts/sync-branch-protection.js +0 -340
  267. package/scripts/sync-gh-secrets-from-env.sh +0 -70
  268. package/scripts/sync-github-about.js +0 -55
  269. package/scripts/sync-version.js +0 -479
  270. package/scripts/synthetic-dpo.js +0 -234
  271. package/scripts/tessl-export.js +0 -369
  272. package/scripts/test-coverage.js +0 -128
  273. package/scripts/thumbgate_session_start.sh +0 -32
  274. package/scripts/train_from_feedback.py +0 -929
  275. package/scripts/validate-feedback.js +0 -581
  276. package/scripts/verify-obsidian-setup.sh +0 -269
  277. package/scripts/verify-run.js +0 -269
  278. package/scripts/weekly-auto-post.js +0 -124
  279. package/scripts/x-autonomous-marketing.js +0 -139
@@ -1,651 +0,0 @@
1
- #!/usr/bin/env node
2
- const fs = require('fs');
3
- const path = require('path');
4
- const os = require('os');
5
- const {
6
- captureFeedback,
7
- analyzeFeedback,
8
- buildPreventionRules,
9
- getFeedbackPaths,
10
- readJSONL,
11
- waitForBackgroundSideEffects,
12
- } = require('./feedback-loop');
13
- const { exportDpoFromMemories } = require('./export-dpo-pairs');
14
- const { planIntent } = require('./intent-router');
15
- const { startHandoff, completeHandoff } = require('./delegation-runtime');
16
- const { startServer } = require('../src/api/server');
17
- const { handleRequest } = require('../adapters/mcp/server-stdio');
18
- const { collectHealthReport } = require('./self-healing-check');
19
- const { runSelfHeal } = require('./self-heal');
20
- const { getContextFsRoot, NAMESPACES } = require('./contextfs');
21
- const { traceForProofCheck, aggregateTraces } = require('./code-reasoning');
22
- const { runVerificationLoop } = require('./verification-loop');
23
- const { run: runGateCheck } = require('./gates-engine');
24
- const { evaluatePromptGuard } = require('./prompt-guard');
25
- const { ensureDir } = require('./fs-utils');
26
-
27
- const ROOT = path.join(__dirname, '..');
28
- const DEFAULT_PROOF_DIR = path.join(ROOT, 'proof', 'automation');
29
-
30
-
31
- function check(condition, message) {
32
- if (!condition) throw new Error(message);
33
- }
34
-
35
- async function fetchWithRetry(url, options, { retries = 5, delayMs = 100 } = {}) {
36
- let lastError = null;
37
-
38
- for (let attempt = 0; attempt <= retries; attempt += 1) {
39
- try {
40
- return await fetch(url, options);
41
- } catch (err) {
42
- lastError = err;
43
- if (attempt === retries) {
44
- throw err;
45
- }
46
- await new Promise((resolve) => setTimeout(resolve, delayMs * (attempt + 1)));
47
- }
48
- }
49
-
50
- throw lastError;
51
- }
52
-
53
- async function runAutomationProof(options = {}) {
54
- const proofDir = options.proofDir || process.env.THUMBGATE_AUTOMATION_PROOF_DIR || DEFAULT_PROOF_DIR;
55
- const writeArtifacts = options.writeArtifacts !== false;
56
- const proofPort = options.port ?? 0;
57
-
58
- if (writeArtifacts) ensureDir(proofDir);
59
-
60
- const tmpFeedbackDir = fs.mkdtempSync(path.join(os.tmpdir(), 'thumbgate-automation-proof-'));
61
- const previousCodegraphStub = process.env.THUMBGATE_CODEGRAPH_STUB_RESPONSE;
62
- process.env.THUMBGATE_FEEDBACK_DIR = tmpFeedbackDir;
63
- process.env.THUMBGATE_API_KEY = 'automation-proof-key';
64
- process.env.THUMBGATE_MCP_PROFILE = 'default';
65
- process.env.THUMBGATE_CODEGRAPH_STUB_RESPONSE = JSON.stringify({
66
- source: 'stub',
67
- symbols: ['planIntent'],
68
- callers: ['src/api/server.js -> planIntent', 'adapters/mcp/server-stdio.js -> planIntent'],
69
- callees: ['rankActions', 'decomposeActions'],
70
- deadCode: ['legacyIntentPlanner'],
71
- });
72
-
73
- const report = {
74
- generatedAt: new Date().toISOString(),
75
- checks: [],
76
- summary: { passed: 0, failed: 0 },
77
- };
78
-
79
- function addResult(name, passed, details) {
80
- report.checks.push({ name, passed, details });
81
- if (passed) report.summary.passed += 1;
82
- else report.summary.failed += 1;
83
- }
84
-
85
- const { server, port } = await startServer({ port: proofPort });
86
- const baseUrl = `http://127.0.0.1:${port}`;
87
- let currentCheck = 'bootstrap';
88
- try {
89
- // 1) Positive with valid rubric -> accepted
90
- {
91
- const result = captureFeedback({
92
- signal: 'up',
93
- context: 'Implemented with tests and evidence',
94
- whatWorked: 'Used proof harness and verification logs',
95
- tags: ['verification', 'automation'],
96
- rubricScores: [
97
- { criterion: 'correctness', score: 4, evidence: 'all tests pass', judge: 'judge-a' },
98
- { criterion: 'verification_evidence', score: 4, evidence: 'proof attached', judge: 'judge-a' },
99
- { criterion: 'safety', score: 4, evidence: 'path checks enabled', judge: 'judge-a' },
100
- ],
101
- guardrails: {
102
- testsPassed: true,
103
- pathSafety: true,
104
- budgetCompliant: true,
105
- },
106
- });
107
- check(result.accepted === true, 'expected rubric-valid positive feedback to be accepted');
108
- check(Boolean(result.memoryRecord && result.memoryRecord.rubricSummary), 'accepted learning should include rubricSummary');
109
- addResult('feedback.capture.rubric_pass', true, {
110
- accepted: result.accepted,
111
- weightedScore: result.memoryRecord.rubricSummary.weightedScore,
112
- });
113
- }
114
-
115
- // 2) Positive with failed guardrail/disagreement -> blocked
116
- {
117
- const result = captureFeedback({
118
- signal: 'up',
119
- context: 'Claimed done without logs',
120
- whatWorked: 'Reviewer approved despite missing logs',
121
- tags: ['verification', 'automation'],
122
- rubricScores: [
123
- { criterion: 'verification_evidence', score: 5, judge: 'judge-a' },
124
- { criterion: 'verification_evidence', score: 2, judge: 'judge-b', evidence: 'logs missing' },
125
- ],
126
- guardrails: {
127
- testsPassed: false,
128
- pathSafety: true,
129
- budgetCompliant: true,
130
- },
131
- });
132
- check(result.accepted === false, 'expected rubric-gated positive feedback to be rejected');
133
- check(/Rubric gate prevented promotion/i.test(String(result.reason)), 'expected rubric gate reason');
134
- addResult('feedback.capture.rubric_block', true, { accepted: result.accepted, reason: result.reason });
135
- }
136
-
137
- // 3) Negative with rubric failures -> accepted mistake memory with rubric tags
138
- {
139
- const result = captureFeedback({
140
- signal: 'down',
141
- context: 'Skipped verification before completion claim',
142
- whatWentWrong: 'No test evidence',
143
- whatToChange: 'Always include test output',
144
- tags: ['verification', 'automation'],
145
- rubricScores: [
146
- { criterion: 'verification_evidence', score: 1, evidence: 'no logs', judge: 'judge-a' },
147
- { criterion: 'correctness', score: 2, evidence: 'regression detected', judge: 'judge-a' },
148
- ],
149
- guardrails: {
150
- testsPassed: false,
151
- pathSafety: true,
152
- budgetCompliant: true,
153
- },
154
- });
155
- check(result.accepted === true, 'expected negative feedback to be accepted as mistake memory');
156
- check(result.memoryRecord.tags.includes('rubric-verification_evidence'), 'expected rubric failure tags');
157
- addResult('feedback.capture.negative_with_rubric', true, {
158
- accepted: result.accepted,
159
- tags: result.memoryRecord.tags,
160
- });
161
- }
162
-
163
- // 4) analytics tracks rubric blocks/failures
164
- {
165
- const { FEEDBACK_LOG_PATH } = getFeedbackPaths();
166
- const stats = analyzeFeedback(FEEDBACK_LOG_PATH);
167
- check(stats.rubric.samples >= 3, 'expected rubric samples to be tracked');
168
- check(stats.rubric.blockedPromotions >= 1, 'expected blocked rubric promotions to be tracked');
169
- check(stats.diagnostics.totalDiagnosed >= 2, 'expected diagnostic counts for failed/suspect feedback');
170
- addResult('analytics.rubric_tracking', true, stats.rubric);
171
- }
172
-
173
- // 5) failed verification emits structured diagnosis and critical step
174
- {
175
- currentCheck = 'verification.failure_diagnostics';
176
- const { MEMORY_LOG_PATH } = getFeedbackPaths();
177
- fs.appendFileSync(MEMORY_LOG_PATH, `${JSON.stringify({
178
- id: 'mem_verification_failure',
179
- category: 'error',
180
- title: 'MISTAKE: agent claimed done without running tests',
181
- content: 'How to avoid: Run npm test before claiming completion',
182
- })}\n`);
183
- const verification = runVerificationLoop({
184
- context: 'Agent claimed done without running tests or verification',
185
- tags: ['verification', 'testing'],
186
- maxRetries: 0,
187
- modelPath: path.join(tmpFeedbackDir, 'verification-model.json'),
188
- });
189
- check(verification.accepted === false, 'expected failed verification for unverified completion claim');
190
- check(Boolean(verification.finalVerification && verification.finalVerification.diagnosis), 'failed verification should include diagnosis');
191
- check(verification.finalVerification.diagnosis.rootCauseCategory === 'tool_output_misread', 'verification diagnosis should classify output misread');
192
- addResult('verification.failure_diagnostics', true, {
193
- rootCauseCategory: verification.finalVerification.diagnosis.rootCauseCategory,
194
- criticalFailureStep: verification.finalVerification.diagnosis.criticalFailureStep,
195
- });
196
- }
197
-
198
- // 6) prevention rules include rubric dimensions and root causes
199
- {
200
- const markdown = buildPreventionRules(1);
201
- check(markdown.includes('Rubric Failure Dimensions'), 'expected rubric section in prevention rules');
202
- check(markdown.includes('verification_evidence'), 'expected criterion in prevention rules');
203
- check(markdown.includes('Root Cause Categories'), 'expected diagnosis section in prevention rules');
204
- addResult('prevention_rules.rubric_dimensions', true, { hasRubricSection: true });
205
- }
206
-
207
- // 7) DPO export includes rubric delta metadata
208
- {
209
- const { MEMORY_LOG_PATH } = getFeedbackPaths();
210
- const memories = readJSONL(MEMORY_LOG_PATH);
211
- const result = exportDpoFromMemories(memories);
212
- check(result.pairs.length >= 1, 'expected at least one DPO pair');
213
- const first = result.pairs[0];
214
- check(Boolean(first.metadata && first.metadata.rubric), 'expected rubric metadata in DPO pair');
215
- addResult('dpo_export.rubric_metadata', true, first.metadata.rubric);
216
- }
217
-
218
- // 8) API rubric gate returns 422
219
- {
220
- currentCheck = 'api.rubric_gate';
221
- const res = await fetchWithRetry(`${baseUrl}/v1/feedback/capture`, {
222
- method: 'POST',
223
- headers: {
224
- Authorization: 'Bearer automation-proof-key',
225
- 'Content-Type': 'application/json',
226
- },
227
- body: JSON.stringify({
228
- signal: 'up',
229
- context: 'unsafe api approval attempt',
230
- whatWorked: 'claimed success',
231
- tags: ['verification', 'automation'],
232
- rubricScores: [
233
- { criterion: 'verification_evidence', score: 5, judge: 'judge-a' },
234
- { criterion: 'verification_evidence', score: 2, judge: 'judge-b', evidence: 'missing logs' },
235
- ],
236
- guardrails: { testsPassed: false, pathSafety: true, budgetCompliant: true },
237
- }),
238
- });
239
- check(res.status === 422, `expected 422 from API rubric gate, got ${res.status}`);
240
- const body = await res.json();
241
- check(body.accepted === false, 'API rubric-gated capture must be rejected');
242
- addResult('api.rubric_gate', true, { status: res.status });
243
- }
244
-
245
- // 9) MCP rubric gate returns accepted=false
246
- {
247
- currentCheck = 'mcp.rubric_gate';
248
- const call = await handleRequest({
249
- jsonrpc: '2.0',
250
- id: 91,
251
- method: 'tools/call',
252
- params: {
253
- name: 'capture_feedback',
254
- arguments: {
255
- signal: 'up',
256
- context: 'unsafe mcp approval attempt',
257
- whatWorked: 'claimed success',
258
- rubricScores: [
259
- { criterion: 'verification_evidence', score: 5, judge: 'judge-a' },
260
- { criterion: 'verification_evidence', score: 2, judge: 'judge-b', evidence: 'missing logs' },
261
- ],
262
- guardrails: { testsPassed: false, pathSafety: true, budgetCompliant: true },
263
- },
264
- },
265
- });
266
- const payload = JSON.parse(call.content[0].text);
267
- check(payload.accepted === false, 'MCP rubric-gated capture must be rejected');
268
- addResult('mcp.rubric_gate', true, { accepted: payload.accepted });
269
- }
270
-
271
- // 10) PreToolUse blocks reads of secret-bearing files
272
- {
273
- currentCheck = 'secret_guard.read_block';
274
- const secretPath = path.join(tmpFeedbackDir, '.env');
275
- const stripeKey = ['sk', '_live_', '1234567890abcdefghijklmnopqrstuvwxyz'].join('');
276
- fs.writeFileSync(secretPath, `STRIPE_SECRET_KEY=${stripeKey}\n`);
277
- const gateOutput = JSON.parse(runGateCheck({
278
- tool_name: 'Read',
279
- tool_input: { file_path: secretPath },
280
- cwd: tmpFeedbackDir,
281
- }));
282
- check(gateOutput.hookSpecificOutput.permissionDecision === 'deny', 'expected secret file read to be blocked');
283
- addResult('secret_guard.read_block', true, {
284
- decision: gateOutput.hookSpecificOutput.permissionDecision,
285
- reason: gateOutput.hookSpecificOutput.permissionDecisionReason,
286
- });
287
- }
288
-
289
- // 11) UserPromptSubmit blocks prompts with inline secrets
290
- {
291
- currentCheck = 'secret_guard.prompt_block';
292
- const gitHubPat = ['gh', 'p_', 'abcdefghijklmnopqrstuvwxyz1234'].join('');
293
- const result = evaluatePromptGuard(`Ship this token to support: ${gitHubPat}`);
294
- check(result && result.continue === false, 'expected prompt guard to block secret-bearing prompt');
295
- addResult('secret_guard.prompt_block', true, {
296
- continue: result.continue,
297
- stopReason: result.stopReason,
298
- });
299
- }
300
-
301
- // 12) MCP failure diagnostics compile schema and approval constraints
302
- {
303
- currentCheck = 'mcp.failure_diagnostics';
304
- const call = await handleRequest({
305
- jsonrpc: '2.0',
306
- id: 92,
307
- method: 'tools/call',
308
- params: {
309
- name: 'diagnose_failure',
310
- arguments: {
311
- step: 'capture_feedback',
312
- context: 'Attempted to approve publish flow without required approval',
313
- toolName: 'capture_feedback',
314
- toolArgs: {},
315
- intentId: 'publish_dpo_training_data',
316
- mcpProfile: 'default',
317
- },
318
- },
319
- });
320
- const payload = JSON.parse(call.content[0].text);
321
- check(payload.rootCauseCategory === 'intent_plan_misalignment', 'diagnose_failure should classify approval mismatch');
322
- check(payload.compiledConstraints.summary.toolSchemaCount >= 1, 'diagnose_failure should include MCP schema constraints');
323
- addResult('mcp.failure_diagnostics', true, {
324
- rootCauseCategory: payload.rootCauseCategory,
325
- toolSchemaCount: payload.compiledConstraints.summary.toolSchemaCount,
326
- });
327
- }
328
-
329
- // 13) intent checkpoints still enforced
330
- {
331
- currentCheck = 'intent.checkpoint_enforcement';
332
- const planBlocked = planIntent({
333
- intentId: 'publish_dpo_training_data',
334
- mcpProfile: 'default',
335
- approved: false,
336
- });
337
- check(planBlocked.status === 'checkpoint_required', 'expected checkpoint_required for high-risk intent');
338
-
339
- const planApproved = planIntent({
340
- intentId: 'publish_dpo_training_data',
341
- mcpProfile: 'default',
342
- approved: true,
343
- });
344
- check(planApproved.status === 'ready', 'expected ready when approved');
345
- addResult('intent.checkpoint_enforcement', true, {
346
- blocked: planBlocked.status,
347
- approved: planApproved.status,
348
- });
349
- }
350
-
351
- // 14) partner-aware planning returns execution strategy
352
- {
353
- currentCheck = 'intent.partner_strategy';
354
- const partnerPlan = planIntent({
355
- intentId: 'incident_postmortem',
356
- mcpProfile: 'default',
357
- partnerProfile: 'strict-reviewer',
358
- });
359
- check(partnerPlan.partnerProfile === 'strict_reviewer', 'expected normalized strict_reviewer partner profile');
360
- check(Boolean(partnerPlan.partnerStrategy), 'expected partner strategy metadata');
361
- check(partnerPlan.partnerStrategy.verificationMode === 'evidence_first', 'expected evidence_first verification mode');
362
- check(partnerPlan.tokenBudget.contextPack > 6000, 'expected boosted contextPack budget for strict reviewer');
363
- check(Array.isArray(partnerPlan.actionScores), 'expected action scores for partner-aware plan');
364
- addResult('intent.partner_strategy', true, {
365
- partnerProfile: partnerPlan.partnerProfile,
366
- verificationMode: partnerPlan.partnerStrategy.verificationMode,
367
- contextPack: partnerPlan.tokenBudget.contextPack,
368
- });
369
- }
370
-
371
- // 15) coding workflows include structural impact evidence and dead-code checks
372
- {
373
- currentCheck = 'intent.delegation_decision';
374
- const plan = planIntent({
375
- intentId: 'improve_response_quality',
376
- context: 'Improve the response with evidence and prevention rules',
377
- mcpProfile: 'default',
378
- delegationMode: 'auto',
379
- });
380
- check(plan.executionMode === 'sequential_delegate', 'expected delegation decision for eligible multi-phase task');
381
- check(plan.delegateProfile === 'pr_workflow', 'expected pr_workflow delegate profile');
382
- check(Boolean(plan.handoffContract), 'expected handoff contract on delegated plan');
383
- addResult('intent.delegation_decision', true, {
384
- executionMode: plan.executionMode,
385
- delegateProfile: plan.delegateProfile,
386
- delegationScore: plan.delegationScore,
387
- });
388
- }
389
-
390
- // 16) sequential handoff contract is explicit and blocks duplicate starts
391
- {
392
- currentCheck = 'handoff.contract_shape';
393
- const plan = planIntent({
394
- intentId: 'improve_response_quality',
395
- context: 'Improve the response with evidence and prevention rules',
396
- mcpProfile: 'default',
397
- delegationMode: 'auto',
398
- });
399
- const started = startHandoff({
400
- plan,
401
- context: plan.context,
402
- mcpProfile: plan.mcpProfile,
403
- partnerProfile: plan.partnerProfile,
404
- });
405
- check(Boolean(started.handoffContract), 'expected handoff contract');
406
- check(Array.isArray(started.handoffContract.scopeIn), 'handoff contract should include scopeIn');
407
- check(Array.isArray(started.handoffContract.requiredEvidence), 'handoff contract should include requiredEvidence');
408
- check(Array.isArray(started.handoffContract.requiredChecks), 'handoff contract should include requiredChecks');
409
- addResult('handoff.contract_shape', true, {
410
- handoffId: started.handoffId,
411
- requiredEvidence: started.handoffContract.requiredEvidence,
412
- requiredChecks: started.handoffContract.requiredChecks,
413
- });
414
-
415
- currentCheck = 'handoff.sequential_guard';
416
- let guardErr = null;
417
- try {
418
- startHandoff({
419
- plan,
420
- context: plan.context,
421
- mcpProfile: plan.mcpProfile,
422
- partnerProfile: plan.partnerProfile,
423
- });
424
- } catch (err) {
425
- guardErr = err;
426
- }
427
- check(Boolean(guardErr), 'expected duplicate handoff start to fail');
428
- check(/unresolved handoff/i.test(guardErr.message), 'expected unresolved handoff guard');
429
- addResult('handoff.sequential_guard', true, {
430
- statusCode: guardErr.statusCode,
431
- message: guardErr.message,
432
- });
433
-
434
- currentCheck = 'handoff.failure_diagnostics';
435
- const completed = completeHandoff({
436
- handoffId: started.handoffId,
437
- outcome: 'accepted',
438
- attempts: 1,
439
- violationCount: 1,
440
- summary: 'Returned without test evidence.',
441
- resultContext: 'Agent claimed done without running tests or verification',
442
- });
443
- check(completed.verificationAccepted === false, 'expected handoff verification to fail');
444
- check(Boolean(completed.diagnosis), 'expected handoff completion diagnosis');
445
- addResult('handoff.failure_diagnostics', true, {
446
- verificationAccepted: completed.verificationAccepted,
447
- rootCauseCategory: completed.diagnosis.rootCauseCategory,
448
- });
449
- }
450
-
451
- // 17) coding workflows include structural impact evidence and dead-code checks
452
- {
453
- currentCheck = 'intent.codegraph_impact';
454
- const plan = planIntent({
455
- intentId: 'incident_postmortem',
456
- context: 'Refactor `planIntent` in scripts/intent-router.js',
457
- mcpProfile: 'default',
458
- repoPath: ROOT,
459
- });
460
- check(plan.codegraphImpact.enabled === true, 'expected codegraph impact to be enabled');
461
- check(plan.codegraphImpact.evidence.deadCodeCount >= 1, 'expected dead-code candidates in codegraph evidence');
462
- check(
463
- plan.partnerStrategy.recommendedChecks.some((item) => /dead code/i.test(item)),
464
- 'expected structural verification checks to be appended',
465
- );
466
- addResult('intent.codegraph_impact', true, {
467
- source: plan.codegraphImpact.source,
468
- impactScore: plan.codegraphImpact.evidence.impactScore,
469
- deadCodeCount: plan.codegraphImpact.evidence.deadCodeCount,
470
- });
471
- }
472
-
473
- // 18) context evaluate stores rubric evaluation
474
- {
475
- currentCheck = 'context.evaluate.construct';
476
- const construct = await fetchWithRetry(`${baseUrl}/v1/context/construct`, {
477
- method: 'POST',
478
- headers: {
479
- Authorization: 'Bearer automation-proof-key',
480
- 'Content-Type': 'application/json',
481
- },
482
- body: JSON.stringify({ query: 'verification automation', maxItems: 5, maxChars: 5000 }),
483
- });
484
- check(construct.status === 200, `context construct expected 200, got ${construct.status}`);
485
- const pack = await construct.json();
486
-
487
- currentCheck = 'context.evaluate.rubric';
488
- const evaluate = await fetchWithRetry(`${baseUrl}/v1/context/evaluate`, {
489
- method: 'POST',
490
- headers: {
491
- Authorization: 'Bearer automation-proof-key',
492
- 'Content-Type': 'application/json',
493
- },
494
- body: JSON.stringify({
495
- packId: pack.packId,
496
- outcome: 'useful',
497
- signal: 'positive',
498
- rubricScores: [
499
- { criterion: 'correctness', score: 4, evidence: 'tests pass', judge: 'judge-a' },
500
- { criterion: 'verification_evidence', score: 4, evidence: 'logs attached', judge: 'judge-a' },
501
- ],
502
- guardrails: { testsPassed: true, pathSafety: true, budgetCompliant: true },
503
- }),
504
- });
505
- check(evaluate.status === 200, `context evaluate expected 200, got ${evaluate.status}`);
506
- const evalBody = await evaluate.json();
507
- check(Boolean(evalBody.rubricEvaluation), 'expected rubricEvaluation on context evaluate result');
508
- addResult('context.evaluate.rubric', true, { rubricId: evalBody.rubricEvaluation.rubricId });
509
- }
510
-
511
- // 19) semantic cache hit on equivalent query
512
- {
513
- currentCheck = 'context.semantic_cache.hit.first';
514
- fs.rmSync(path.join(getContextFsRoot(), NAMESPACES.provenance, 'semantic-cache.jsonl'), { force: true });
515
- const first = await fetchWithRetry(`${baseUrl}/v1/context/construct`, {
516
- method: 'POST',
517
- headers: {
518
- Authorization: 'Bearer automation-proof-key',
519
- 'Content-Type': 'application/json',
520
- },
521
- body: JSON.stringify({ query: 'verification testing evidence', maxItems: 5, maxChars: 5000 }),
522
- });
523
- check(first.status === 200, `first context construct expected 200, got ${first.status}`);
524
- const firstPack = await first.json();
525
-
526
- currentCheck = 'context.semantic_cache.hit.second';
527
- const second = await fetchWithRetry(`${baseUrl}/v1/context/construct`, {
528
- method: 'POST',
529
- headers: {
530
- Authorization: 'Bearer automation-proof-key',
531
- 'Content-Type': 'application/json',
532
- },
533
- body: JSON.stringify({ query: 'testing verification evidence', maxItems: 5, maxChars: 5000 }),
534
- });
535
- check(second.status === 200, `second context construct expected 200, got ${second.status}`);
536
- const secondPack = await second.json();
537
- check(firstPack.cache && firstPack.cache.hit === false, 'first pack expected cache miss');
538
- check(secondPack.cache && secondPack.cache.hit === true, 'second pack expected cache hit');
539
- addResult('context.semantic_cache.hit', true, {
540
- firstHit: firstPack.cache.hit,
541
- secondHit: secondPack.cache.hit,
542
- similarity: secondPack.cache.similarity,
543
- });
544
- }
545
-
546
- // 20) self-healing helpers produce healthy reports in baseline state
547
- {
548
- const health = collectHealthReport({
549
- checks: [
550
- { name: 'noop', command: ['node', '-e', 'process.exit(0)'] },
551
- ],
552
- });
553
- check(health.overall_status === 'healthy', 'health report expected healthy for noop check');
554
- const unhealthy = collectHealthReport({
555
- checks: [
556
- { name: 'explode', command: ['node', '-e', 'process.exit(2)'] },
557
- ],
558
- });
559
- check(unhealthy.checks[0].diagnosis.rootCauseCategory === 'system_failure', 'unhealthy self-heal check should include system_failure diagnosis');
560
-
561
- const heal = runSelfHeal({ reason: 'automation-proof', cwd: ROOT });
562
- check(heal.healthy === true, 'self-heal expected healthy execution');
563
- check(Boolean(heal.reasoning), 'self-heal must include reasoning traces');
564
- check(heal.traces.length === heal.plan.length, 'self-heal traces count must match plan length');
565
- addResult('self_healing.helpers', true, {
566
- healthStatus: health.overall_status,
567
- changed: heal.changed,
568
- reasoning: heal.reasoning,
569
- });
570
- }
571
-
572
- // 21) code reasoning traces verify DPO pair quality
573
- {
574
- const { MEMORY_LOG_PATH } = getFeedbackPaths();
575
- const memories = readJSONL(MEMORY_LOG_PATH);
576
- const result = exportDpoFromMemories(memories);
577
- if (result.pairs.length >= 1) {
578
- const first = result.pairs[0];
579
- check(Boolean(first.metadata.reasoningTrace), 'DPO pair must include reasoningTrace metadata');
580
- check(typeof first.metadata.reasoningTrace.confidence === 'number', 'reasoningTrace must have confidence score');
581
- check(typeof first.metadata.reasoningTrace.traceId === 'string', 'reasoningTrace must have traceId');
582
- check(Boolean(result.reasoning), 'DPO export must include aggregate reasoning summary');
583
- addResult('code_reasoning.dpo_traces', true, {
584
- traceId: first.metadata.reasoningTrace.traceId,
585
- confidence: first.metadata.reasoningTrace.confidence,
586
- aggregateConfidence: result.reasoning.averageConfidence,
587
- });
588
- } else {
589
- addResult('code_reasoning.dpo_traces', true, { skipped: true, reason: 'no DPO pairs to trace' });
590
- }
591
- }
592
-
593
- // 22) code reasoning traces attached to proof checks
594
- {
595
- const proofTraces = report.checks.map((chk) => traceForProofCheck(chk));
596
- const aggregate = aggregateTraces(proofTraces);
597
- check(aggregate.totalTraces === report.checks.length, 'proof trace count must match check count');
598
- check(aggregate.refuted === 0, 'no proof check should have refuted steps');
599
- check(aggregate.averageConfidence > 0, 'proof traces must have positive confidence');
600
- report.reasoning = aggregate;
601
- report.proofTraces = proofTraces;
602
- addResult('code_reasoning.proof_gate', true, {
603
- totalTraces: aggregate.totalTraces,
604
- averageConfidence: aggregate.averageConfidence,
605
- allPassed: aggregate.allPassed,
606
- });
607
- }
608
- } catch (err) {
609
- addResult('fatal', false, {
610
- check: currentCheck,
611
- error: err.message,
612
- cause: err.cause && err.cause.message ? err.cause.message : null,
613
- });
614
- } finally {
615
- await new Promise((resolve) => server.close(resolve));
616
- await waitForBackgroundSideEffects();
617
- fs.rmSync(tmpFeedbackDir, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 });
618
- if (previousCodegraphStub === undefined) delete process.env.THUMBGATE_CODEGRAPH_STUB_RESPONSE;
619
- else process.env.THUMBGATE_CODEGRAPH_STUB_RESPONSE = previousCodegraphStub;
620
- }
621
-
622
- if (writeArtifacts) {
623
- fs.writeFileSync(path.join(proofDir, 'report.json'), `${JSON.stringify(report, null, 2)}\n`);
624
- const mdLines = [
625
- '# Automation Proof',
626
- '',
627
- `Generated: ${report.generatedAt}`,
628
- '',
629
- `Passed: ${report.summary.passed}`,
630
- `Failed: ${report.summary.failed}`,
631
- '',
632
- '## Checks',
633
- ...report.checks.map((checkItem) => `- ${checkItem.passed ? 'PASS' : 'FAIL'} ${checkItem.name}`),
634
- '',
635
- ];
636
- fs.writeFileSync(path.join(proofDir, 'report.md'), `${mdLines.join('\n')}\n`);
637
- }
638
-
639
- if (report.summary.failed > 0) process.exitCode = 1;
640
- return report;
641
- }
642
-
643
- module.exports = {
644
- runAutomationProof,
645
- };
646
-
647
- if (require.main === module) {
648
- runAutomationProof().then((report) => {
649
- console.log(JSON.stringify(report.summary, null, 2));
650
- });
651
- }