thumbgate 1.4.3 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.well-known/mcp/server-card.json +1 -1
  4. package/adapters/README.md +1 -1
  5. package/adapters/claude/.mcp.json +2 -2
  6. package/adapters/codex/config.toml +2 -2
  7. package/adapters/mcp/server-stdio.js +1 -1
  8. package/adapters/opencode/opencode.json +1 -1
  9. package/package.json +157 -9
  10. package/scripts/statusline.sh +1 -0
  11. package/src/api/server.js +113 -16
  12. package/src/index.js +3 -0
  13. package/.claude-plugin/bundle/icon.png +0 -0
  14. package/.claude-plugin/bundle/icon.svg +0 -18
  15. package/.claude-plugin/bundle/server/index.js +0 -24
  16. package/adapters/chatgpt/INSTALL.md +0 -158
  17. package/adapters/perplexity/.mcp.json +0 -36
  18. package/adapters/perplexity/config.toml +0 -16
  19. package/adapters/perplexity/opencode.json +0 -29
  20. package/bin/memory.sh +0 -64
  21. package/bin/obsidian-sync.sh +0 -20
  22. package/plugins/amp-skill/INSTALL.md +0 -52
  23. package/plugins/amp-skill/SKILL.md +0 -64
  24. package/plugins/claude-codex-bridge/.claude-plugin/plugin.json +0 -22
  25. package/plugins/claude-codex-bridge/.mcp.json +0 -14
  26. package/plugins/claude-codex-bridge/INSTALL.md +0 -43
  27. package/plugins/claude-codex-bridge/README.md +0 -46
  28. package/plugins/claude-codex-bridge/scripts/codex-bridge.js +0 -286
  29. package/plugins/claude-codex-bridge/skills/adversarial-review/SKILL.md +0 -24
  30. package/plugins/claude-codex-bridge/skills/result/SKILL.md +0 -22
  31. package/plugins/claude-codex-bridge/skills/review/SKILL.md +0 -28
  32. package/plugins/claude-codex-bridge/skills/second-pass/SKILL.md +0 -27
  33. package/plugins/claude-codex-bridge/skills/setup/SKILL.md +0 -21
  34. package/plugins/claude-codex-bridge/skills/status/SKILL.md +0 -19
  35. package/plugins/claude-skill/INSTALL.md +0 -55
  36. package/plugins/claude-skill/SKILL.md +0 -46
  37. package/plugins/codex-profile/.codex-plugin/plugin.json +0 -43
  38. package/plugins/codex-profile/.mcp.json +0 -14
  39. package/plugins/codex-profile/AGENTS.md +0 -20
  40. package/plugins/codex-profile/INSTALL.md +0 -89
  41. package/plugins/codex-profile/README.md +0 -61
  42. package/plugins/cursor-marketplace/.cursor-plugin/plugin.json +0 -23
  43. package/plugins/cursor-marketplace/CHANGELOG.md +0 -30
  44. package/plugins/cursor-marketplace/LICENSE +0 -21
  45. package/plugins/cursor-marketplace/README.md +0 -124
  46. package/plugins/cursor-marketplace/agents/reliability-reviewer.md +0 -31
  47. package/plugins/cursor-marketplace/assets/logo-400x400.png +0 -0
  48. package/plugins/cursor-marketplace/commands/capture-feedback.md +0 -33
  49. package/plugins/cursor-marketplace/commands/check-gates.md +0 -25
  50. package/plugins/cursor-marketplace/commands/show-lessons.md +0 -27
  51. package/plugins/cursor-marketplace/hooks/hooks.json +0 -10
  52. package/plugins/cursor-marketplace/mcp.json +0 -14
  53. package/plugins/cursor-marketplace/rules/feedback-capture.mdc +0 -34
  54. package/plugins/cursor-marketplace/rules/pre-action-gates.mdc +0 -30
  55. package/plugins/cursor-marketplace/rules/session-continuity.mdc +0 -28
  56. package/plugins/cursor-marketplace/scripts/gate-check.sh +0 -21
  57. package/plugins/cursor-marketplace/skills/capture-feedback/SKILL.md +0 -48
  58. package/plugins/cursor-marketplace/skills/prevention-rules/SKILL.md +0 -31
  59. package/plugins/cursor-marketplace/skills/recall-context/SKILL.md +0 -30
  60. package/plugins/cursor-marketplace/skills/search-lessons/SKILL.md +0 -33
  61. package/plugins/gemini-extension/INSTALL.md +0 -92
  62. package/plugins/gemini-extension/gemini_prompt.txt +0 -14
  63. package/plugins/gemini-extension/tool_contract.json +0 -45
  64. package/plugins/opencode-profile/INSTALL.md +0 -57
  65. package/public/assets/instagram-card.png +0 -0
  66. package/public/assets/tiktok-agent-memory.mp4 +0 -0
  67. package/public/blog.html +0 -474
  68. package/public/compare/mem0.html +0 -189
  69. package/public/compare/speclock.html +0 -180
  70. package/public/compare.html +0 -310
  71. package/public/dashboard.html +0 -1100
  72. package/public/guide.html +0 -317
  73. package/public/guides/claude-code-prevent-repeated-mistakes.html +0 -161
  74. package/public/guides/codex-cli-guardrails.html +0 -158
  75. package/public/guides/cursor-prevent-repeated-mistakes.html +0 -161
  76. package/public/guides/pre-action-gates.html +0 -162
  77. package/public/guides/stop-repeated-ai-agent-mistakes.html +0 -159
  78. package/public/index.html +0 -1225
  79. package/public/js/buyer-intent.js +0 -252
  80. package/public/learn/agent-harness-pattern.html +0 -180
  81. package/public/learn/ai-agent-persistent-memory.html +0 -203
  82. package/public/learn/learn.css +0 -45
  83. package/public/learn/mcp-pre-action-gates-explained.html +0 -172
  84. package/public/learn/stop-ai-agent-force-push.html +0 -134
  85. package/public/learn/vibe-coding-safety-net.html +0 -142
  86. package/public/learn.html +0 -274
  87. package/public/lessons.html +0 -967
  88. package/public/llm-context.md +0 -156
  89. package/public/pro.html +0 -1087
  90. package/public/vercel.json +0 -8
  91. package/scripts/a2ui-engine.js +0 -73
  92. package/scripts/adk-consolidator.js +0 -274
  93. package/scripts/agent-security-hardening.js +0 -225
  94. package/scripts/ai-search-visibility.js +0 -116
  95. package/scripts/autonomous-sales-agent.js +0 -39
  96. package/scripts/autoresearch-runner.js +0 -216
  97. package/scripts/background-agent-governance.js +0 -229
  98. package/scripts/behavioral-extraction.js +0 -93
  99. package/scripts/budget-enforcer.js +0 -173
  100. package/scripts/budget-guard.js +0 -173
  101. package/scripts/build-claude-mcpb.js +0 -255
  102. package/scripts/build-codex-plugin.js +0 -152
  103. package/scripts/capture-railway-diagnostics.sh +0 -97
  104. package/scripts/changeset-check.js +0 -372
  105. package/scripts/check-congruence.js +0 -443
  106. package/scripts/computer-use-firewall.js +0 -280
  107. package/scripts/content-engine/linkedin-content-generator.js +0 -154
  108. package/scripts/content-engine/output/linkedin-memento-validation.md +0 -17
  109. package/scripts/content-engine/output/linkedin-posts-2026-04-09.md +0 -175
  110. package/scripts/content-engine/reddit-thread-finder.js +0 -154
  111. package/scripts/context-engine.js +0 -710
  112. package/scripts/daily-digest.js +0 -11
  113. package/scripts/data-governance.js +0 -173
  114. package/scripts/deploy-gcp.sh +0 -44
  115. package/scripts/deploy-policy.js +0 -249
  116. package/scripts/disagreement-mining.js +0 -315
  117. package/scripts/dpo-optimizer.js +0 -206
  118. package/scripts/ensure-repo-bootstrap.js +0 -130
  119. package/scripts/ephemeral-agent-store.js +0 -212
  120. package/scripts/eval-harness.js +0 -56
  121. package/scripts/export-kto-pairs.js +0 -309
  122. package/scripts/export-training.js +0 -446
  123. package/scripts/feedback-fallback.js +0 -111
  124. package/scripts/feedback-inbox-read.js +0 -162
  125. package/scripts/feedback-root-consolidator.js +0 -233
  126. package/scripts/feedback-to-memory.js +0 -185
  127. package/scripts/gate-satisfy.js +0 -42
  128. package/scripts/generate-paperbanana-diagrams.sh +0 -99
  129. package/scripts/generate-pretool-hook.sh +0 -40
  130. package/scripts/github-about.js +0 -430
  131. package/scripts/github-outreach.js +0 -65
  132. package/scripts/gtm-revenue-loop.js +0 -535
  133. package/scripts/hallucination-detector.js +0 -226
  134. package/scripts/hf-papers.js +0 -317
  135. package/scripts/hook-auto-capture.sh +0 -100
  136. package/scripts/hook-stop-pr-thread-check.sh +0 -68
  137. package/scripts/hook-stop-self-score.sh +0 -51
  138. package/scripts/hook-stop-verify-deploy.sh +0 -31
  139. package/scripts/hook-verify-before-done.sh +0 -20
  140. package/scripts/managed-dpo-export.js +0 -91
  141. package/scripts/markdown-escape.js +0 -12
  142. package/scripts/marketing-experiment.js +0 -657
  143. package/scripts/memalign-recall.js +0 -111
  144. package/scripts/memory-migration.js +0 -296
  145. package/scripts/meta-policy.js +0 -190
  146. package/scripts/metered-billing.js +0 -16
  147. package/scripts/model-tier-router.js +0 -310
  148. package/scripts/money-watcher.js +0 -218
  149. package/scripts/multi-hop-recall.js +0 -240
  150. package/scripts/per-step-scoring.js +0 -163
  151. package/scripts/perplexity-command-center.js +0 -644
  152. package/scripts/perplexity-marketing.js +0 -454
  153. package/scripts/pii-scanner.js +0 -153
  154. package/scripts/plan-gate.js +0 -154
  155. package/scripts/post-everywhere.js +0 -341
  156. package/scripts/post-to-x-retry.sh +0 -22
  157. package/scripts/post-to-x.js +0 -369
  158. package/scripts/pr-manager.js +0 -421
  159. package/scripts/principle-extractor.js +0 -162
  160. package/scripts/pro-features.js +0 -41
  161. package/scripts/prompt-dlp.js +0 -222
  162. package/scripts/prove-adapters.js +0 -860
  163. package/scripts/prove-attribution.js +0 -361
  164. package/scripts/prove-automation.js +0 -651
  165. package/scripts/prove-autoresearch.js +0 -304
  166. package/scripts/prove-claim-verification.js +0 -277
  167. package/scripts/prove-cloudflare-sandbox.js +0 -161
  168. package/scripts/prove-data-pipeline.js +0 -408
  169. package/scripts/prove-data-quality.js +0 -227
  170. package/scripts/prove-evolution.js +0 -352
  171. package/scripts/prove-harnesses.js +0 -287
  172. package/scripts/prove-intelligence.js +0 -257
  173. package/scripts/prove-lancedb.js +0 -425
  174. package/scripts/prove-local-intelligence.js +0 -340
  175. package/scripts/prove-loop-closure.js +0 -263
  176. package/scripts/prove-packaged-runtime.js +0 -327
  177. package/scripts/prove-predictive-insights.js +0 -355
  178. package/scripts/prove-runtime.js +0 -363
  179. package/scripts/prove-seo-gsd.js +0 -234
  180. package/scripts/prove-settings.js +0 -279
  181. package/scripts/prove-subway-upgrades.js +0 -277
  182. package/scripts/prove-tessl.js +0 -229
  183. package/scripts/prove-training-export.js +0 -325
  184. package/scripts/prove-workflow-contract.js +0 -112
  185. package/scripts/prove-xmemory.js +0 -332
  186. package/scripts/publish-decision.js +0 -159
  187. package/scripts/ralph-loop.js +0 -376
  188. package/scripts/ralph-mode-ci.js +0 -434
  189. package/scripts/reddit-dm-outreach.js +0 -192
  190. package/scripts/reddit-monitor-cron.sh +0 -26
  191. package/scripts/reminder-engine.js +0 -132
  192. package/scripts/revenue-status.js +0 -472
  193. package/scripts/rotate-stripe-webhook-secret.js +0 -314
  194. package/scripts/schedule-manager.js +0 -249
  195. package/scripts/self-healing-check.js +0 -193
  196. package/scripts/session-analyzer.js +0 -533
  197. package/scripts/shieldcortex-memory-firewall-runner.mjs +0 -53
  198. package/scripts/skill-exporter.js +0 -260
  199. package/scripts/skill-materializer.js +0 -134
  200. package/scripts/skill-packs.js +0 -136
  201. package/scripts/skill-proposer.js +0 -99
  202. package/scripts/skill-quality-tracker.js +0 -282
  203. package/scripts/slow-loop.js +0 -72
  204. package/scripts/social-analytics/db/marketing-db.js +0 -179
  205. package/scripts/social-analytics/db/schema.sql +0 -55
  206. package/scripts/social-analytics/digest.js +0 -256
  207. package/scripts/social-analytics/engagement-audit.js +0 -185
  208. package/scripts/social-analytics/generate-instagram-card.js +0 -123
  209. package/scripts/social-analytics/generate-slides.js +0 -268
  210. package/scripts/social-analytics/instagram-thumbgate-post.js +0 -111
  211. package/scripts/social-analytics/install-growth-automation.js +0 -114
  212. package/scripts/social-analytics/load-env.js +0 -77
  213. package/scripts/social-analytics/mcp-server.js +0 -289
  214. package/scripts/social-analytics/normalizer.js +0 -580
  215. package/scripts/social-analytics/notify.js +0 -162
  216. package/scripts/social-analytics/poll-all.js +0 -107
  217. package/scripts/social-analytics/pollers/github.js +0 -195
  218. package/scripts/social-analytics/pollers/instagram.js +0 -253
  219. package/scripts/social-analytics/pollers/linkedin.js +0 -340
  220. package/scripts/social-analytics/pollers/plausible.js +0 -245
  221. package/scripts/social-analytics/pollers/reddit.js +0 -306
  222. package/scripts/social-analytics/pollers/threads.js +0 -233
  223. package/scripts/social-analytics/pollers/tiktok.js +0 -203
  224. package/scripts/social-analytics/pollers/x.js +0 -227
  225. package/scripts/social-analytics/pollers/youtube.js +0 -304
  226. package/scripts/social-analytics/pollers/zernio.js +0 -183
  227. package/scripts/social-analytics/post-video.js +0 -316
  228. package/scripts/social-analytics/publish-instagram-thumbgate.js +0 -104
  229. package/scripts/social-analytics/publish-thumbgate-launch.js +0 -322
  230. package/scripts/social-analytics/publishers/devto.js +0 -122
  231. package/scripts/social-analytics/publishers/instagram.js +0 -317
  232. package/scripts/social-analytics/publishers/linkedin.js +0 -294
  233. package/scripts/social-analytics/publishers/reddit.js +0 -385
  234. package/scripts/social-analytics/publishers/threads.js +0 -275
  235. package/scripts/social-analytics/publishers/tiktok.js +0 -217
  236. package/scripts/social-analytics/publishers/x.js +0 -259
  237. package/scripts/social-analytics/publishers/youtube.js +0 -223
  238. package/scripts/social-analytics/publishers/zernio.js +0 -568
  239. package/scripts/social-analytics/reconcile-thumbgate-campaign.js +0 -165
  240. package/scripts/social-analytics/run-digest.js +0 -34
  241. package/scripts/social-analytics/schedule-thumbgate-campaign.js +0 -275
  242. package/scripts/social-analytics/store.js +0 -455
  243. package/scripts/social-analytics/sync-launch-assets.js +0 -185
  244. package/scripts/social-analytics/utm.js +0 -143
  245. package/scripts/social-pipeline.js +0 -2626
  246. package/scripts/social-post-hourly.js +0 -228
  247. package/scripts/social-quality-gate.js +0 -134
  248. package/scripts/social-reply-monitor.js +0 -592
  249. package/scripts/status-dashboard.js +0 -155
  250. package/scripts/stripe-live-status.js +0 -115
  251. package/scripts/subagent-profiles.js +0 -79
  252. package/scripts/sync-branch-protection.js +0 -340
  253. package/scripts/sync-gh-secrets-from-env.sh +0 -70
  254. package/scripts/sync-github-about.js +0 -55
  255. package/scripts/sync-version.js +0 -479
  256. package/scripts/synthetic-dpo.js +0 -234
  257. package/scripts/tessl-export.js +0 -369
  258. package/scripts/test-coverage.js +0 -128
  259. package/scripts/thumbgate-bench.js +0 -494
  260. package/scripts/thumbgate_session_start.sh +0 -32
  261. package/scripts/train_from_feedback.py +0 -929
  262. package/scripts/validate-feedback.js +0 -581
  263. package/scripts/verify-obsidian-setup.sh +0 -269
  264. package/scripts/verify-run.js +0 -269
  265. package/scripts/weekly-auto-post.js +0 -124
  266. package/scripts/x-autonomous-marketing.js +0 -139
@@ -1,651 +0,0 @@
1
- #!/usr/bin/env node
2
- const fs = require('fs');
3
- const path = require('path');
4
- const os = require('os');
5
- const {
6
- captureFeedback,
7
- analyzeFeedback,
8
- buildPreventionRules,
9
- getFeedbackPaths,
10
- readJSONL,
11
- waitForBackgroundSideEffects,
12
- } = require('./feedback-loop');
13
- const { exportDpoFromMemories } = require('./export-dpo-pairs');
14
- const { planIntent } = require('./intent-router');
15
- const { startHandoff, completeHandoff } = require('./delegation-runtime');
16
- const { startServer } = require('../src/api/server');
17
- const { handleRequest } = require('../adapters/mcp/server-stdio');
18
- const { collectHealthReport } = require('./self-healing-check');
19
- const { runSelfHeal } = require('./self-heal');
20
- const { getContextFsRoot, NAMESPACES } = require('./contextfs');
21
- const { traceForProofCheck, aggregateTraces } = require('./code-reasoning');
22
- const { runVerificationLoop } = require('./verification-loop');
23
- const { run: runGateCheck } = require('./gates-engine');
24
- const { evaluatePromptGuard } = require('./prompt-guard');
25
- const { ensureDir } = require('./fs-utils');
26
-
27
- const ROOT = path.join(__dirname, '..');
28
- const DEFAULT_PROOF_DIR = path.join(ROOT, 'proof', 'automation');
29
-
30
-
31
- function check(condition, message) {
32
- if (!condition) throw new Error(message);
33
- }
34
-
35
- async function fetchWithRetry(url, options, { retries = 5, delayMs = 100 } = {}) {
36
- let lastError = null;
37
-
38
- for (let attempt = 0; attempt <= retries; attempt += 1) {
39
- try {
40
- return await fetch(url, options);
41
- } catch (err) {
42
- lastError = err;
43
- if (attempt === retries) {
44
- throw err;
45
- }
46
- await new Promise((resolve) => setTimeout(resolve, delayMs * (attempt + 1)));
47
- }
48
- }
49
-
50
- throw lastError;
51
- }
52
-
53
- async function runAutomationProof(options = {}) {
54
- const proofDir = options.proofDir || process.env.THUMBGATE_AUTOMATION_PROOF_DIR || DEFAULT_PROOF_DIR;
55
- const writeArtifacts = options.writeArtifacts !== false;
56
- const proofPort = options.port ?? 0;
57
-
58
- if (writeArtifacts) ensureDir(proofDir);
59
-
60
- const tmpFeedbackDir = fs.mkdtempSync(path.join(os.tmpdir(), 'thumbgate-automation-proof-'));
61
- const previousCodegraphStub = process.env.THUMBGATE_CODEGRAPH_STUB_RESPONSE;
62
- process.env.THUMBGATE_FEEDBACK_DIR = tmpFeedbackDir;
63
- process.env.THUMBGATE_API_KEY = 'automation-proof-key';
64
- process.env.THUMBGATE_MCP_PROFILE = 'default';
65
- process.env.THUMBGATE_CODEGRAPH_STUB_RESPONSE = JSON.stringify({
66
- source: 'stub',
67
- symbols: ['planIntent'],
68
- callers: ['src/api/server.js -> planIntent', 'adapters/mcp/server-stdio.js -> planIntent'],
69
- callees: ['rankActions', 'decomposeActions'],
70
- deadCode: ['legacyIntentPlanner'],
71
- });
72
-
73
- const report = {
74
- generatedAt: new Date().toISOString(),
75
- checks: [],
76
- summary: { passed: 0, failed: 0 },
77
- };
78
-
79
- function addResult(name, passed, details) {
80
- report.checks.push({ name, passed, details });
81
- if (passed) report.summary.passed += 1;
82
- else report.summary.failed += 1;
83
- }
84
-
85
- const { server, port } = await startServer({ port: proofPort });
86
- const baseUrl = `http://127.0.0.1:${port}`;
87
- let currentCheck = 'bootstrap';
88
- try {
89
- // 1) Positive with valid rubric -> accepted
90
- {
91
- const result = captureFeedback({
92
- signal: 'up',
93
- context: 'Implemented with tests and evidence',
94
- whatWorked: 'Used proof harness and verification logs',
95
- tags: ['verification', 'automation'],
96
- rubricScores: [
97
- { criterion: 'correctness', score: 4, evidence: 'all tests pass', judge: 'judge-a' },
98
- { criterion: 'verification_evidence', score: 4, evidence: 'proof attached', judge: 'judge-a' },
99
- { criterion: 'safety', score: 4, evidence: 'path checks enabled', judge: 'judge-a' },
100
- ],
101
- guardrails: {
102
- testsPassed: true,
103
- pathSafety: true,
104
- budgetCompliant: true,
105
- },
106
- });
107
- check(result.accepted === true, 'expected rubric-valid positive feedback to be accepted');
108
- check(Boolean(result.memoryRecord && result.memoryRecord.rubricSummary), 'accepted learning should include rubricSummary');
109
- addResult('feedback.capture.rubric_pass', true, {
110
- accepted: result.accepted,
111
- weightedScore: result.memoryRecord.rubricSummary.weightedScore,
112
- });
113
- }
114
-
115
- // 2) Positive with failed guardrail/disagreement -> blocked
116
- {
117
- const result = captureFeedback({
118
- signal: 'up',
119
- context: 'Claimed done without logs',
120
- whatWorked: 'Reviewer approved despite missing logs',
121
- tags: ['verification', 'automation'],
122
- rubricScores: [
123
- { criterion: 'verification_evidence', score: 5, judge: 'judge-a' },
124
- { criterion: 'verification_evidence', score: 2, judge: 'judge-b', evidence: 'logs missing' },
125
- ],
126
- guardrails: {
127
- testsPassed: false,
128
- pathSafety: true,
129
- budgetCompliant: true,
130
- },
131
- });
132
- check(result.accepted === false, 'expected rubric-gated positive feedback to be rejected');
133
- check(/Rubric gate prevented promotion/i.test(String(result.reason)), 'expected rubric gate reason');
134
- addResult('feedback.capture.rubric_block', true, { accepted: result.accepted, reason: result.reason });
135
- }
136
-
137
- // 3) Negative with rubric failures -> accepted mistake memory with rubric tags
138
- {
139
- const result = captureFeedback({
140
- signal: 'down',
141
- context: 'Skipped verification before completion claim',
142
- whatWentWrong: 'No test evidence',
143
- whatToChange: 'Always include test output',
144
- tags: ['verification', 'automation'],
145
- rubricScores: [
146
- { criterion: 'verification_evidence', score: 1, evidence: 'no logs', judge: 'judge-a' },
147
- { criterion: 'correctness', score: 2, evidence: 'regression detected', judge: 'judge-a' },
148
- ],
149
- guardrails: {
150
- testsPassed: false,
151
- pathSafety: true,
152
- budgetCompliant: true,
153
- },
154
- });
155
- check(result.accepted === true, 'expected negative feedback to be accepted as mistake memory');
156
- check(result.memoryRecord.tags.includes('rubric-verification_evidence'), 'expected rubric failure tags');
157
- addResult('feedback.capture.negative_with_rubric', true, {
158
- accepted: result.accepted,
159
- tags: result.memoryRecord.tags,
160
- });
161
- }
162
-
163
- // 4) analytics tracks rubric blocks/failures
164
- {
165
- const { FEEDBACK_LOG_PATH } = getFeedbackPaths();
166
- const stats = analyzeFeedback(FEEDBACK_LOG_PATH);
167
- check(stats.rubric.samples >= 3, 'expected rubric samples to be tracked');
168
- check(stats.rubric.blockedPromotions >= 1, 'expected blocked rubric promotions to be tracked');
169
- check(stats.diagnostics.totalDiagnosed >= 2, 'expected diagnostic counts for failed/suspect feedback');
170
- addResult('analytics.rubric_tracking', true, stats.rubric);
171
- }
172
-
173
- // 5) failed verification emits structured diagnosis and critical step
174
- {
175
- currentCheck = 'verification.failure_diagnostics';
176
- const { MEMORY_LOG_PATH } = getFeedbackPaths();
177
- fs.appendFileSync(MEMORY_LOG_PATH, `${JSON.stringify({
178
- id: 'mem_verification_failure',
179
- category: 'error',
180
- title: 'MISTAKE: agent claimed done without running tests',
181
- content: 'How to avoid: Run npm test before claiming completion',
182
- })}\n`);
183
- const verification = runVerificationLoop({
184
- context: 'Agent claimed done without running tests or verification',
185
- tags: ['verification', 'testing'],
186
- maxRetries: 0,
187
- modelPath: path.join(tmpFeedbackDir, 'verification-model.json'),
188
- });
189
- check(verification.accepted === false, 'expected failed verification for unverified completion claim');
190
- check(Boolean(verification.finalVerification && verification.finalVerification.diagnosis), 'failed verification should include diagnosis');
191
- check(verification.finalVerification.diagnosis.rootCauseCategory === 'tool_output_misread', 'verification diagnosis should classify output misread');
192
- addResult('verification.failure_diagnostics', true, {
193
- rootCauseCategory: verification.finalVerification.diagnosis.rootCauseCategory,
194
- criticalFailureStep: verification.finalVerification.diagnosis.criticalFailureStep,
195
- });
196
- }
197
-
198
- // 6) prevention rules include rubric dimensions and root causes
199
- {
200
- const markdown = buildPreventionRules(1);
201
- check(markdown.includes('Rubric Failure Dimensions'), 'expected rubric section in prevention rules');
202
- check(markdown.includes('verification_evidence'), 'expected criterion in prevention rules');
203
- check(markdown.includes('Root Cause Categories'), 'expected diagnosis section in prevention rules');
204
- addResult('prevention_rules.rubric_dimensions', true, { hasRubricSection: true });
205
- }
206
-
207
- // 7) DPO export includes rubric delta metadata
208
- {
209
- const { MEMORY_LOG_PATH } = getFeedbackPaths();
210
- const memories = readJSONL(MEMORY_LOG_PATH);
211
- const result = exportDpoFromMemories(memories);
212
- check(result.pairs.length >= 1, 'expected at least one DPO pair');
213
- const first = result.pairs[0];
214
- check(Boolean(first.metadata && first.metadata.rubric), 'expected rubric metadata in DPO pair');
215
- addResult('dpo_export.rubric_metadata', true, first.metadata.rubric);
216
- }
217
-
218
- // 8) API rubric gate returns 422
219
- {
220
- currentCheck = 'api.rubric_gate';
221
- const res = await fetchWithRetry(`${baseUrl}/v1/feedback/capture`, {
222
- method: 'POST',
223
- headers: {
224
- Authorization: 'Bearer automation-proof-key',
225
- 'Content-Type': 'application/json',
226
- },
227
- body: JSON.stringify({
228
- signal: 'up',
229
- context: 'unsafe api approval attempt',
230
- whatWorked: 'claimed success',
231
- tags: ['verification', 'automation'],
232
- rubricScores: [
233
- { criterion: 'verification_evidence', score: 5, judge: 'judge-a' },
234
- { criterion: 'verification_evidence', score: 2, judge: 'judge-b', evidence: 'missing logs' },
235
- ],
236
- guardrails: { testsPassed: false, pathSafety: true, budgetCompliant: true },
237
- }),
238
- });
239
- check(res.status === 422, `expected 422 from API rubric gate, got ${res.status}`);
240
- const body = await res.json();
241
- check(body.accepted === false, 'API rubric-gated capture must be rejected');
242
- addResult('api.rubric_gate', true, { status: res.status });
243
- }
244
-
245
- // 9) MCP rubric gate returns accepted=false
246
- {
247
- currentCheck = 'mcp.rubric_gate';
248
- const call = await handleRequest({
249
- jsonrpc: '2.0',
250
- id: 91,
251
- method: 'tools/call',
252
- params: {
253
- name: 'capture_feedback',
254
- arguments: {
255
- signal: 'up',
256
- context: 'unsafe mcp approval attempt',
257
- whatWorked: 'claimed success',
258
- rubricScores: [
259
- { criterion: 'verification_evidence', score: 5, judge: 'judge-a' },
260
- { criterion: 'verification_evidence', score: 2, judge: 'judge-b', evidence: 'missing logs' },
261
- ],
262
- guardrails: { testsPassed: false, pathSafety: true, budgetCompliant: true },
263
- },
264
- },
265
- });
266
- const payload = JSON.parse(call.content[0].text);
267
- check(payload.accepted === false, 'MCP rubric-gated capture must be rejected');
268
- addResult('mcp.rubric_gate', true, { accepted: payload.accepted });
269
- }
270
-
271
- // 10) PreToolUse blocks reads of secret-bearing files
272
- {
273
- currentCheck = 'secret_guard.read_block';
274
- const secretPath = path.join(tmpFeedbackDir, '.env');
275
- const stripeKey = ['sk', '_live_', '1234567890abcdefghijklmnopqrstuvwxyz'].join('');
276
- fs.writeFileSync(secretPath, `STRIPE_SECRET_KEY=${stripeKey}\n`);
277
- const gateOutput = JSON.parse(runGateCheck({
278
- tool_name: 'Read',
279
- tool_input: { file_path: secretPath },
280
- cwd: tmpFeedbackDir,
281
- }));
282
- check(gateOutput.hookSpecificOutput.permissionDecision === 'deny', 'expected secret file read to be blocked');
283
- addResult('secret_guard.read_block', true, {
284
- decision: gateOutput.hookSpecificOutput.permissionDecision,
285
- reason: gateOutput.hookSpecificOutput.permissionDecisionReason,
286
- });
287
- }
288
-
289
- // 11) UserPromptSubmit blocks prompts with inline secrets
290
- {
291
- currentCheck = 'secret_guard.prompt_block';
292
- const gitHubPat = ['gh', 'p_', 'abcdefghijklmnopqrstuvwxyz1234'].join('');
293
- const result = evaluatePromptGuard(`Ship this token to support: ${gitHubPat}`);
294
- check(result && result.continue === false, 'expected prompt guard to block secret-bearing prompt');
295
- addResult('secret_guard.prompt_block', true, {
296
- continue: result.continue,
297
- stopReason: result.stopReason,
298
- });
299
- }
300
-
301
- // 12) MCP failure diagnostics compile schema and approval constraints
302
- {
303
- currentCheck = 'mcp.failure_diagnostics';
304
- const call = await handleRequest({
305
- jsonrpc: '2.0',
306
- id: 92,
307
- method: 'tools/call',
308
- params: {
309
- name: 'diagnose_failure',
310
- arguments: {
311
- step: 'capture_feedback',
312
- context: 'Attempted to approve publish flow without required approval',
313
- toolName: 'capture_feedback',
314
- toolArgs: {},
315
- intentId: 'publish_dpo_training_data',
316
- mcpProfile: 'default',
317
- },
318
- },
319
- });
320
- const payload = JSON.parse(call.content[0].text);
321
- check(payload.rootCauseCategory === 'intent_plan_misalignment', 'diagnose_failure should classify approval mismatch');
322
- check(payload.compiledConstraints.summary.toolSchemaCount >= 1, 'diagnose_failure should include MCP schema constraints');
323
- addResult('mcp.failure_diagnostics', true, {
324
- rootCauseCategory: payload.rootCauseCategory,
325
- toolSchemaCount: payload.compiledConstraints.summary.toolSchemaCount,
326
- });
327
- }
328
-
329
- // 13) intent checkpoints still enforced
330
- {
331
- currentCheck = 'intent.checkpoint_enforcement';
332
- const planBlocked = planIntent({
333
- intentId: 'publish_dpo_training_data',
334
- mcpProfile: 'default',
335
- approved: false,
336
- });
337
- check(planBlocked.status === 'checkpoint_required', 'expected checkpoint_required for high-risk intent');
338
-
339
- const planApproved = planIntent({
340
- intentId: 'publish_dpo_training_data',
341
- mcpProfile: 'default',
342
- approved: true,
343
- });
344
- check(planApproved.status === 'ready', 'expected ready when approved');
345
- addResult('intent.checkpoint_enforcement', true, {
346
- blocked: planBlocked.status,
347
- approved: planApproved.status,
348
- });
349
- }
350
-
351
- // 14) partner-aware planning returns execution strategy
352
- {
353
- currentCheck = 'intent.partner_strategy';
354
- const partnerPlan = planIntent({
355
- intentId: 'incident_postmortem',
356
- mcpProfile: 'default',
357
- partnerProfile: 'strict-reviewer',
358
- });
359
- check(partnerPlan.partnerProfile === 'strict_reviewer', 'expected normalized strict_reviewer partner profile');
360
- check(Boolean(partnerPlan.partnerStrategy), 'expected partner strategy metadata');
361
- check(partnerPlan.partnerStrategy.verificationMode === 'evidence_first', 'expected evidence_first verification mode');
362
- check(partnerPlan.tokenBudget.contextPack > 6000, 'expected boosted contextPack budget for strict reviewer');
363
- check(Array.isArray(partnerPlan.actionScores), 'expected action scores for partner-aware plan');
364
- addResult('intent.partner_strategy', true, {
365
- partnerProfile: partnerPlan.partnerProfile,
366
- verificationMode: partnerPlan.partnerStrategy.verificationMode,
367
- contextPack: partnerPlan.tokenBudget.contextPack,
368
- });
369
- }
370
-
371
- // 15) coding workflows include structural impact evidence and dead-code checks
372
- {
373
- currentCheck = 'intent.delegation_decision';
374
- const plan = planIntent({
375
- intentId: 'improve_response_quality',
376
- context: 'Improve the response with evidence and prevention rules',
377
- mcpProfile: 'default',
378
- delegationMode: 'auto',
379
- });
380
- check(plan.executionMode === 'sequential_delegate', 'expected delegation decision for eligible multi-phase task');
381
- check(plan.delegateProfile === 'pr_workflow', 'expected pr_workflow delegate profile');
382
- check(Boolean(plan.handoffContract), 'expected handoff contract on delegated plan');
383
- addResult('intent.delegation_decision', true, {
384
- executionMode: plan.executionMode,
385
- delegateProfile: plan.delegateProfile,
386
- delegationScore: plan.delegationScore,
387
- });
388
- }
389
-
390
- // 16) sequential handoff contract is explicit and blocks duplicate starts
391
- {
392
- currentCheck = 'handoff.contract_shape';
393
- const plan = planIntent({
394
- intentId: 'improve_response_quality',
395
- context: 'Improve the response with evidence and prevention rules',
396
- mcpProfile: 'default',
397
- delegationMode: 'auto',
398
- });
399
- const started = startHandoff({
400
- plan,
401
- context: plan.context,
402
- mcpProfile: plan.mcpProfile,
403
- partnerProfile: plan.partnerProfile,
404
- });
405
- check(Boolean(started.handoffContract), 'expected handoff contract');
406
- check(Array.isArray(started.handoffContract.scopeIn), 'handoff contract should include scopeIn');
407
- check(Array.isArray(started.handoffContract.requiredEvidence), 'handoff contract should include requiredEvidence');
408
- check(Array.isArray(started.handoffContract.requiredChecks), 'handoff contract should include requiredChecks');
409
- addResult('handoff.contract_shape', true, {
410
- handoffId: started.handoffId,
411
- requiredEvidence: started.handoffContract.requiredEvidence,
412
- requiredChecks: started.handoffContract.requiredChecks,
413
- });
414
-
415
- currentCheck = 'handoff.sequential_guard';
416
- let guardErr = null;
417
- try {
418
- startHandoff({
419
- plan,
420
- context: plan.context,
421
- mcpProfile: plan.mcpProfile,
422
- partnerProfile: plan.partnerProfile,
423
- });
424
- } catch (err) {
425
- guardErr = err;
426
- }
427
- check(Boolean(guardErr), 'expected duplicate handoff start to fail');
428
- check(/unresolved handoff/i.test(guardErr.message), 'expected unresolved handoff guard');
429
- addResult('handoff.sequential_guard', true, {
430
- statusCode: guardErr.statusCode,
431
- message: guardErr.message,
432
- });
433
-
434
- currentCheck = 'handoff.failure_diagnostics';
435
- const completed = completeHandoff({
436
- handoffId: started.handoffId,
437
- outcome: 'accepted',
438
- attempts: 1,
439
- violationCount: 1,
440
- summary: 'Returned without test evidence.',
441
- resultContext: 'Agent claimed done without running tests or verification',
442
- });
443
- check(completed.verificationAccepted === false, 'expected handoff verification to fail');
444
- check(Boolean(completed.diagnosis), 'expected handoff completion diagnosis');
445
- addResult('handoff.failure_diagnostics', true, {
446
- verificationAccepted: completed.verificationAccepted,
447
- rootCauseCategory: completed.diagnosis.rootCauseCategory,
448
- });
449
- }
450
-
451
- // 17) coding workflows include structural impact evidence and dead-code checks
452
- {
453
- currentCheck = 'intent.codegraph_impact';
454
- const plan = planIntent({
455
- intentId: 'incident_postmortem',
456
- context: 'Refactor `planIntent` in scripts/intent-router.js',
457
- mcpProfile: 'default',
458
- repoPath: ROOT,
459
- });
460
- check(plan.codegraphImpact.enabled === true, 'expected codegraph impact to be enabled');
461
- check(plan.codegraphImpact.evidence.deadCodeCount >= 1, 'expected dead-code candidates in codegraph evidence');
462
- check(
463
- plan.partnerStrategy.recommendedChecks.some((item) => /dead code/i.test(item)),
464
- 'expected structural verification checks to be appended',
465
- );
466
- addResult('intent.codegraph_impact', true, {
467
- source: plan.codegraphImpact.source,
468
- impactScore: plan.codegraphImpact.evidence.impactScore,
469
- deadCodeCount: plan.codegraphImpact.evidence.deadCodeCount,
470
- });
471
- }
472
-
473
- // 18) context evaluate stores rubric evaluation
474
- {
475
- currentCheck = 'context.evaluate.construct';
476
- const construct = await fetchWithRetry(`${baseUrl}/v1/context/construct`, {
477
- method: 'POST',
478
- headers: {
479
- Authorization: 'Bearer automation-proof-key',
480
- 'Content-Type': 'application/json',
481
- },
482
- body: JSON.stringify({ query: 'verification automation', maxItems: 5, maxChars: 5000 }),
483
- });
484
- check(construct.status === 200, `context construct expected 200, got ${construct.status}`);
485
- const pack = await construct.json();
486
-
487
- currentCheck = 'context.evaluate.rubric';
488
- const evaluate = await fetchWithRetry(`${baseUrl}/v1/context/evaluate`, {
489
- method: 'POST',
490
- headers: {
491
- Authorization: 'Bearer automation-proof-key',
492
- 'Content-Type': 'application/json',
493
- },
494
- body: JSON.stringify({
495
- packId: pack.packId,
496
- outcome: 'useful',
497
- signal: 'positive',
498
- rubricScores: [
499
- { criterion: 'correctness', score: 4, evidence: 'tests pass', judge: 'judge-a' },
500
- { criterion: 'verification_evidence', score: 4, evidence: 'logs attached', judge: 'judge-a' },
501
- ],
502
- guardrails: { testsPassed: true, pathSafety: true, budgetCompliant: true },
503
- }),
504
- });
505
- check(evaluate.status === 200, `context evaluate expected 200, got ${evaluate.status}`);
506
- const evalBody = await evaluate.json();
507
- check(Boolean(evalBody.rubricEvaluation), 'expected rubricEvaluation on context evaluate result');
508
- addResult('context.evaluate.rubric', true, { rubricId: evalBody.rubricEvaluation.rubricId });
509
- }
510
-
511
- // 19) semantic cache hit on equivalent query
512
- {
513
- currentCheck = 'context.semantic_cache.hit.first';
514
- fs.rmSync(path.join(getContextFsRoot(), NAMESPACES.provenance, 'semantic-cache.jsonl'), { force: true });
515
- const first = await fetchWithRetry(`${baseUrl}/v1/context/construct`, {
516
- method: 'POST',
517
- headers: {
518
- Authorization: 'Bearer automation-proof-key',
519
- 'Content-Type': 'application/json',
520
- },
521
- body: JSON.stringify({ query: 'verification testing evidence', maxItems: 5, maxChars: 5000 }),
522
- });
523
- check(first.status === 200, `first context construct expected 200, got ${first.status}`);
524
- const firstPack = await first.json();
525
-
526
- currentCheck = 'context.semantic_cache.hit.second';
527
- const second = await fetchWithRetry(`${baseUrl}/v1/context/construct`, {
528
- method: 'POST',
529
- headers: {
530
- Authorization: 'Bearer automation-proof-key',
531
- 'Content-Type': 'application/json',
532
- },
533
- body: JSON.stringify({ query: 'testing verification evidence', maxItems: 5, maxChars: 5000 }),
534
- });
535
- check(second.status === 200, `second context construct expected 200, got ${second.status}`);
536
- const secondPack = await second.json();
537
- check(firstPack.cache && firstPack.cache.hit === false, 'first pack expected cache miss');
538
- check(secondPack.cache && secondPack.cache.hit === true, 'second pack expected cache hit');
539
- addResult('context.semantic_cache.hit', true, {
540
- firstHit: firstPack.cache.hit,
541
- secondHit: secondPack.cache.hit,
542
- similarity: secondPack.cache.similarity,
543
- });
544
- }
545
-
546
- // 20) self-healing helpers produce healthy reports in baseline state
547
- {
548
- const health = collectHealthReport({
549
- checks: [
550
- { name: 'noop', command: ['node', '-e', 'process.exit(0)'] },
551
- ],
552
- });
553
- check(health.overall_status === 'healthy', 'health report expected healthy for noop check');
554
- const unhealthy = collectHealthReport({
555
- checks: [
556
- { name: 'explode', command: ['node', '-e', 'process.exit(2)'] },
557
- ],
558
- });
559
- check(unhealthy.checks[0].diagnosis.rootCauseCategory === 'system_failure', 'unhealthy self-heal check should include system_failure diagnosis');
560
-
561
- const heal = runSelfHeal({ reason: 'automation-proof', cwd: ROOT });
562
- check(heal.healthy === true, 'self-heal expected healthy execution');
563
- check(Boolean(heal.reasoning), 'self-heal must include reasoning traces');
564
- check(heal.traces.length === heal.plan.length, 'self-heal traces count must match plan length');
565
- addResult('self_healing.helpers', true, {
566
- healthStatus: health.overall_status,
567
- changed: heal.changed,
568
- reasoning: heal.reasoning,
569
- });
570
- }
571
-
572
- // 21) code reasoning traces verify DPO pair quality
573
- {
574
- const { MEMORY_LOG_PATH } = getFeedbackPaths();
575
- const memories = readJSONL(MEMORY_LOG_PATH);
576
- const result = exportDpoFromMemories(memories);
577
- if (result.pairs.length >= 1) {
578
- const first = result.pairs[0];
579
- check(Boolean(first.metadata.reasoningTrace), 'DPO pair must include reasoningTrace metadata');
580
- check(typeof first.metadata.reasoningTrace.confidence === 'number', 'reasoningTrace must have confidence score');
581
- check(typeof first.metadata.reasoningTrace.traceId === 'string', 'reasoningTrace must have traceId');
582
- check(Boolean(result.reasoning), 'DPO export must include aggregate reasoning summary');
583
- addResult('code_reasoning.dpo_traces', true, {
584
- traceId: first.metadata.reasoningTrace.traceId,
585
- confidence: first.metadata.reasoningTrace.confidence,
586
- aggregateConfidence: result.reasoning.averageConfidence,
587
- });
588
- } else {
589
- addResult('code_reasoning.dpo_traces', true, { skipped: true, reason: 'no DPO pairs to trace' });
590
- }
591
- }
592
-
593
- // 22) code reasoning traces attached to proof checks
594
- {
595
- const proofTraces = report.checks.map((chk) => traceForProofCheck(chk));
596
- const aggregate = aggregateTraces(proofTraces);
597
- check(aggregate.totalTraces === report.checks.length, 'proof trace count must match check count');
598
- check(aggregate.refuted === 0, 'no proof check should have refuted steps');
599
- check(aggregate.averageConfidence > 0, 'proof traces must have positive confidence');
600
- report.reasoning = aggregate;
601
- report.proofTraces = proofTraces;
602
- addResult('code_reasoning.proof_gate', true, {
603
- totalTraces: aggregate.totalTraces,
604
- averageConfidence: aggregate.averageConfidence,
605
- allPassed: aggregate.allPassed,
606
- });
607
- }
608
- } catch (err) {
609
- addResult('fatal', false, {
610
- check: currentCheck,
611
- error: err.message,
612
- cause: err.cause && err.cause.message ? err.cause.message : null,
613
- });
614
- } finally {
615
- await new Promise((resolve) => server.close(resolve));
616
- await waitForBackgroundSideEffects();
617
- fs.rmSync(tmpFeedbackDir, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 });
618
- if (previousCodegraphStub === undefined) delete process.env.THUMBGATE_CODEGRAPH_STUB_RESPONSE;
619
- else process.env.THUMBGATE_CODEGRAPH_STUB_RESPONSE = previousCodegraphStub;
620
- }
621
-
622
- if (writeArtifacts) {
623
- fs.writeFileSync(path.join(proofDir, 'report.json'), `${JSON.stringify(report, null, 2)}\n`);
624
- const mdLines = [
625
- '# Automation Proof',
626
- '',
627
- `Generated: ${report.generatedAt}`,
628
- '',
629
- `Passed: ${report.summary.passed}`,
630
- `Failed: ${report.summary.failed}`,
631
- '',
632
- '## Checks',
633
- ...report.checks.map((checkItem) => `- ${checkItem.passed ? 'PASS' : 'FAIL'} ${checkItem.name}`),
634
- '',
635
- ];
636
- fs.writeFileSync(path.join(proofDir, 'report.md'), `${mdLines.join('\n')}\n`);
637
- }
638
-
639
- if (report.summary.failed > 0) process.exitCode = 1;
640
- return report;
641
- }
642
-
643
- module.exports = {
644
- runAutomationProof,
645
- };
646
-
647
- if (require.main === module) {
648
- runAutomationProof().then((report) => {
649
- console.log(JSON.stringify(report.summary, null, 2));
650
- });
651
- }