thumbgate 1.4.3 → 1.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/mcp/server-card.json +1 -1
- package/adapters/README.md +1 -1
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/codex/config.toml +2 -2
- package/adapters/mcp/server-stdio.js +1 -1
- package/adapters/opencode/opencode.json +1 -1
- package/package.json +157 -9
- package/scripts/statusline.sh +1 -0
- package/src/api/server.js +113 -16
- package/src/index.js +3 -0
- package/.claude-plugin/bundle/icon.png +0 -0
- package/.claude-plugin/bundle/icon.svg +0 -18
- package/.claude-plugin/bundle/server/index.js +0 -24
- package/adapters/chatgpt/INSTALL.md +0 -158
- package/adapters/perplexity/.mcp.json +0 -36
- package/adapters/perplexity/config.toml +0 -16
- package/adapters/perplexity/opencode.json +0 -29
- package/bin/memory.sh +0 -64
- package/bin/obsidian-sync.sh +0 -20
- package/plugins/amp-skill/INSTALL.md +0 -52
- package/plugins/amp-skill/SKILL.md +0 -64
- package/plugins/claude-codex-bridge/.claude-plugin/plugin.json +0 -22
- package/plugins/claude-codex-bridge/.mcp.json +0 -14
- package/plugins/claude-codex-bridge/INSTALL.md +0 -43
- package/plugins/claude-codex-bridge/README.md +0 -46
- package/plugins/claude-codex-bridge/scripts/codex-bridge.js +0 -286
- package/plugins/claude-codex-bridge/skills/adversarial-review/SKILL.md +0 -24
- package/plugins/claude-codex-bridge/skills/result/SKILL.md +0 -22
- package/plugins/claude-codex-bridge/skills/review/SKILL.md +0 -28
- package/plugins/claude-codex-bridge/skills/second-pass/SKILL.md +0 -27
- package/plugins/claude-codex-bridge/skills/setup/SKILL.md +0 -21
- package/plugins/claude-codex-bridge/skills/status/SKILL.md +0 -19
- package/plugins/claude-skill/INSTALL.md +0 -55
- package/plugins/claude-skill/SKILL.md +0 -46
- package/plugins/codex-profile/.codex-plugin/plugin.json +0 -43
- package/plugins/codex-profile/.mcp.json +0 -14
- package/plugins/codex-profile/AGENTS.md +0 -20
- package/plugins/codex-profile/INSTALL.md +0 -89
- package/plugins/codex-profile/README.md +0 -61
- package/plugins/cursor-marketplace/.cursor-plugin/plugin.json +0 -23
- package/plugins/cursor-marketplace/CHANGELOG.md +0 -30
- package/plugins/cursor-marketplace/LICENSE +0 -21
- package/plugins/cursor-marketplace/README.md +0 -124
- package/plugins/cursor-marketplace/agents/reliability-reviewer.md +0 -31
- package/plugins/cursor-marketplace/assets/logo-400x400.png +0 -0
- package/plugins/cursor-marketplace/commands/capture-feedback.md +0 -33
- package/plugins/cursor-marketplace/commands/check-gates.md +0 -25
- package/plugins/cursor-marketplace/commands/show-lessons.md +0 -27
- package/plugins/cursor-marketplace/hooks/hooks.json +0 -10
- package/plugins/cursor-marketplace/mcp.json +0 -14
- package/plugins/cursor-marketplace/rules/feedback-capture.mdc +0 -34
- package/plugins/cursor-marketplace/rules/pre-action-gates.mdc +0 -30
- package/plugins/cursor-marketplace/rules/session-continuity.mdc +0 -28
- package/plugins/cursor-marketplace/scripts/gate-check.sh +0 -21
- package/plugins/cursor-marketplace/skills/capture-feedback/SKILL.md +0 -48
- package/plugins/cursor-marketplace/skills/prevention-rules/SKILL.md +0 -31
- package/plugins/cursor-marketplace/skills/recall-context/SKILL.md +0 -30
- package/plugins/cursor-marketplace/skills/search-lessons/SKILL.md +0 -33
- package/plugins/gemini-extension/INSTALL.md +0 -92
- package/plugins/gemini-extension/gemini_prompt.txt +0 -14
- package/plugins/gemini-extension/tool_contract.json +0 -45
- package/plugins/opencode-profile/INSTALL.md +0 -57
- package/public/assets/instagram-card.png +0 -0
- package/public/assets/tiktok-agent-memory.mp4 +0 -0
- package/public/blog.html +0 -474
- package/public/compare/mem0.html +0 -189
- package/public/compare/speclock.html +0 -180
- package/public/compare.html +0 -310
- package/public/dashboard.html +0 -1100
- package/public/guide.html +0 -317
- package/public/guides/claude-code-prevent-repeated-mistakes.html +0 -161
- package/public/guides/codex-cli-guardrails.html +0 -158
- package/public/guides/cursor-prevent-repeated-mistakes.html +0 -161
- package/public/guides/pre-action-gates.html +0 -162
- package/public/guides/stop-repeated-ai-agent-mistakes.html +0 -159
- package/public/index.html +0 -1225
- package/public/js/buyer-intent.js +0 -252
- package/public/learn/agent-harness-pattern.html +0 -180
- package/public/learn/ai-agent-persistent-memory.html +0 -203
- package/public/learn/learn.css +0 -45
- package/public/learn/mcp-pre-action-gates-explained.html +0 -172
- package/public/learn/stop-ai-agent-force-push.html +0 -134
- package/public/learn/vibe-coding-safety-net.html +0 -142
- package/public/learn.html +0 -274
- package/public/lessons.html +0 -967
- package/public/llm-context.md +0 -156
- package/public/pro.html +0 -1087
- package/public/vercel.json +0 -8
- package/scripts/a2ui-engine.js +0 -73
- package/scripts/adk-consolidator.js +0 -274
- package/scripts/agent-security-hardening.js +0 -225
- package/scripts/ai-search-visibility.js +0 -116
- package/scripts/autonomous-sales-agent.js +0 -39
- package/scripts/autoresearch-runner.js +0 -216
- package/scripts/background-agent-governance.js +0 -229
- package/scripts/behavioral-extraction.js +0 -93
- package/scripts/budget-enforcer.js +0 -173
- package/scripts/budget-guard.js +0 -173
- package/scripts/build-claude-mcpb.js +0 -255
- package/scripts/build-codex-plugin.js +0 -152
- package/scripts/capture-railway-diagnostics.sh +0 -97
- package/scripts/changeset-check.js +0 -372
- package/scripts/check-congruence.js +0 -443
- package/scripts/computer-use-firewall.js +0 -280
- package/scripts/content-engine/linkedin-content-generator.js +0 -154
- package/scripts/content-engine/output/linkedin-memento-validation.md +0 -17
- package/scripts/content-engine/output/linkedin-posts-2026-04-09.md +0 -175
- package/scripts/content-engine/reddit-thread-finder.js +0 -154
- package/scripts/context-engine.js +0 -710
- package/scripts/daily-digest.js +0 -11
- package/scripts/data-governance.js +0 -173
- package/scripts/deploy-gcp.sh +0 -44
- package/scripts/deploy-policy.js +0 -249
- package/scripts/disagreement-mining.js +0 -315
- package/scripts/dpo-optimizer.js +0 -206
- package/scripts/ensure-repo-bootstrap.js +0 -130
- package/scripts/ephemeral-agent-store.js +0 -212
- package/scripts/eval-harness.js +0 -56
- package/scripts/export-kto-pairs.js +0 -309
- package/scripts/export-training.js +0 -446
- package/scripts/feedback-fallback.js +0 -111
- package/scripts/feedback-inbox-read.js +0 -162
- package/scripts/feedback-root-consolidator.js +0 -233
- package/scripts/feedback-to-memory.js +0 -185
- package/scripts/gate-satisfy.js +0 -42
- package/scripts/generate-paperbanana-diagrams.sh +0 -99
- package/scripts/generate-pretool-hook.sh +0 -40
- package/scripts/github-about.js +0 -430
- package/scripts/github-outreach.js +0 -65
- package/scripts/gtm-revenue-loop.js +0 -535
- package/scripts/hallucination-detector.js +0 -226
- package/scripts/hf-papers.js +0 -317
- package/scripts/hook-auto-capture.sh +0 -100
- package/scripts/hook-stop-pr-thread-check.sh +0 -68
- package/scripts/hook-stop-self-score.sh +0 -51
- package/scripts/hook-stop-verify-deploy.sh +0 -31
- package/scripts/hook-verify-before-done.sh +0 -20
- package/scripts/managed-dpo-export.js +0 -91
- package/scripts/markdown-escape.js +0 -12
- package/scripts/marketing-experiment.js +0 -657
- package/scripts/memalign-recall.js +0 -111
- package/scripts/memory-migration.js +0 -296
- package/scripts/meta-policy.js +0 -190
- package/scripts/metered-billing.js +0 -16
- package/scripts/model-tier-router.js +0 -310
- package/scripts/money-watcher.js +0 -218
- package/scripts/multi-hop-recall.js +0 -240
- package/scripts/per-step-scoring.js +0 -163
- package/scripts/perplexity-command-center.js +0 -644
- package/scripts/perplexity-marketing.js +0 -454
- package/scripts/pii-scanner.js +0 -153
- package/scripts/plan-gate.js +0 -154
- package/scripts/post-everywhere.js +0 -341
- package/scripts/post-to-x-retry.sh +0 -22
- package/scripts/post-to-x.js +0 -369
- package/scripts/pr-manager.js +0 -421
- package/scripts/principle-extractor.js +0 -162
- package/scripts/pro-features.js +0 -41
- package/scripts/prompt-dlp.js +0 -222
- package/scripts/prove-adapters.js +0 -860
- package/scripts/prove-attribution.js +0 -361
- package/scripts/prove-automation.js +0 -651
- package/scripts/prove-autoresearch.js +0 -304
- package/scripts/prove-claim-verification.js +0 -277
- package/scripts/prove-cloudflare-sandbox.js +0 -161
- package/scripts/prove-data-pipeline.js +0 -408
- package/scripts/prove-data-quality.js +0 -227
- package/scripts/prove-evolution.js +0 -352
- package/scripts/prove-harnesses.js +0 -287
- package/scripts/prove-intelligence.js +0 -257
- package/scripts/prove-lancedb.js +0 -425
- package/scripts/prove-local-intelligence.js +0 -340
- package/scripts/prove-loop-closure.js +0 -263
- package/scripts/prove-packaged-runtime.js +0 -327
- package/scripts/prove-predictive-insights.js +0 -355
- package/scripts/prove-runtime.js +0 -363
- package/scripts/prove-seo-gsd.js +0 -234
- package/scripts/prove-settings.js +0 -279
- package/scripts/prove-subway-upgrades.js +0 -277
- package/scripts/prove-tessl.js +0 -229
- package/scripts/prove-training-export.js +0 -325
- package/scripts/prove-workflow-contract.js +0 -112
- package/scripts/prove-xmemory.js +0 -332
- package/scripts/publish-decision.js +0 -159
- package/scripts/ralph-loop.js +0 -376
- package/scripts/ralph-mode-ci.js +0 -434
- package/scripts/reddit-dm-outreach.js +0 -192
- package/scripts/reddit-monitor-cron.sh +0 -26
- package/scripts/reminder-engine.js +0 -132
- package/scripts/revenue-status.js +0 -472
- package/scripts/rotate-stripe-webhook-secret.js +0 -314
- package/scripts/schedule-manager.js +0 -249
- package/scripts/self-healing-check.js +0 -193
- package/scripts/session-analyzer.js +0 -533
- package/scripts/shieldcortex-memory-firewall-runner.mjs +0 -53
- package/scripts/skill-exporter.js +0 -260
- package/scripts/skill-materializer.js +0 -134
- package/scripts/skill-packs.js +0 -136
- package/scripts/skill-proposer.js +0 -99
- package/scripts/skill-quality-tracker.js +0 -282
- package/scripts/slow-loop.js +0 -72
- package/scripts/social-analytics/db/marketing-db.js +0 -179
- package/scripts/social-analytics/db/schema.sql +0 -55
- package/scripts/social-analytics/digest.js +0 -256
- package/scripts/social-analytics/engagement-audit.js +0 -185
- package/scripts/social-analytics/generate-instagram-card.js +0 -123
- package/scripts/social-analytics/generate-slides.js +0 -268
- package/scripts/social-analytics/instagram-thumbgate-post.js +0 -111
- package/scripts/social-analytics/install-growth-automation.js +0 -114
- package/scripts/social-analytics/load-env.js +0 -77
- package/scripts/social-analytics/mcp-server.js +0 -289
- package/scripts/social-analytics/normalizer.js +0 -580
- package/scripts/social-analytics/notify.js +0 -162
- package/scripts/social-analytics/poll-all.js +0 -107
- package/scripts/social-analytics/pollers/github.js +0 -195
- package/scripts/social-analytics/pollers/instagram.js +0 -253
- package/scripts/social-analytics/pollers/linkedin.js +0 -340
- package/scripts/social-analytics/pollers/plausible.js +0 -245
- package/scripts/social-analytics/pollers/reddit.js +0 -306
- package/scripts/social-analytics/pollers/threads.js +0 -233
- package/scripts/social-analytics/pollers/tiktok.js +0 -203
- package/scripts/social-analytics/pollers/x.js +0 -227
- package/scripts/social-analytics/pollers/youtube.js +0 -304
- package/scripts/social-analytics/pollers/zernio.js +0 -183
- package/scripts/social-analytics/post-video.js +0 -316
- package/scripts/social-analytics/publish-instagram-thumbgate.js +0 -104
- package/scripts/social-analytics/publish-thumbgate-launch.js +0 -322
- package/scripts/social-analytics/publishers/devto.js +0 -122
- package/scripts/social-analytics/publishers/instagram.js +0 -317
- package/scripts/social-analytics/publishers/linkedin.js +0 -294
- package/scripts/social-analytics/publishers/reddit.js +0 -385
- package/scripts/social-analytics/publishers/threads.js +0 -275
- package/scripts/social-analytics/publishers/tiktok.js +0 -217
- package/scripts/social-analytics/publishers/x.js +0 -259
- package/scripts/social-analytics/publishers/youtube.js +0 -223
- package/scripts/social-analytics/publishers/zernio.js +0 -568
- package/scripts/social-analytics/reconcile-thumbgate-campaign.js +0 -165
- package/scripts/social-analytics/run-digest.js +0 -34
- package/scripts/social-analytics/schedule-thumbgate-campaign.js +0 -275
- package/scripts/social-analytics/store.js +0 -455
- package/scripts/social-analytics/sync-launch-assets.js +0 -185
- package/scripts/social-analytics/utm.js +0 -143
- package/scripts/social-pipeline.js +0 -2626
- package/scripts/social-post-hourly.js +0 -228
- package/scripts/social-quality-gate.js +0 -134
- package/scripts/social-reply-monitor.js +0 -592
- package/scripts/status-dashboard.js +0 -155
- package/scripts/stripe-live-status.js +0 -115
- package/scripts/subagent-profiles.js +0 -79
- package/scripts/sync-branch-protection.js +0 -340
- package/scripts/sync-gh-secrets-from-env.sh +0 -70
- package/scripts/sync-github-about.js +0 -55
- package/scripts/sync-version.js +0 -479
- package/scripts/synthetic-dpo.js +0 -234
- package/scripts/tessl-export.js +0 -369
- package/scripts/test-coverage.js +0 -128
- package/scripts/thumbgate-bench.js +0 -494
- package/scripts/thumbgate_session_start.sh +0 -32
- package/scripts/train_from_feedback.py +0 -929
- package/scripts/validate-feedback.js +0 -581
- package/scripts/verify-obsidian-setup.sh +0 -269
- package/scripts/verify-run.js +0 -269
- package/scripts/weekly-auto-post.js +0 -124
- package/scripts/x-autonomous-marketing.js +0 -139
|
@@ -1,287 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
'use strict';
|
|
3
|
-
|
|
4
|
-
const fs = require('fs');
|
|
5
|
-
const os = require('os');
|
|
6
|
-
const path = require('path');
|
|
7
|
-
|
|
8
|
-
const ROOT = path.join(__dirname, '..');
|
|
9
|
-
const HARNESS_PATH = require.resolve('./natural-language-harness');
|
|
10
|
-
const RUNNER_PATH = require.resolve('./async-job-runner');
|
|
11
|
-
const VERIFICATION_PATH = require.resolve('./verification-loop');
|
|
12
|
-
const VERIFY_RUN_PATH = require.resolve('./verify-run');
|
|
13
|
-
const SERVER_STDIO_PATH = require.resolve('../adapters/mcp/server-stdio');
|
|
14
|
-
|
|
15
|
-
function resolveProofPaths() {
|
|
16
|
-
const proofDir = process.env.THUMBGATE_HARNESSES_PROOF_DIR || process.env.THUMBGATE_PROOF_DIR || path.join(ROOT, 'proof');
|
|
17
|
-
return {
|
|
18
|
-
proofDir,
|
|
19
|
-
reportJson: path.join(proofDir, 'harnesses-report.json'),
|
|
20
|
-
reportMd: path.join(proofDir, 'harnesses-report.md'),
|
|
21
|
-
};
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
function resetModules() {
|
|
25
|
-
[
|
|
26
|
-
HARNESS_PATH,
|
|
27
|
-
RUNNER_PATH,
|
|
28
|
-
VERIFICATION_PATH,
|
|
29
|
-
VERIFY_RUN_PATH,
|
|
30
|
-
SERVER_STDIO_PATH,
|
|
31
|
-
].forEach((modulePath) => {
|
|
32
|
-
delete require.cache[modulePath];
|
|
33
|
-
});
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
function stubModule(modulePath, exports) {
|
|
37
|
-
require.cache[modulePath] = {
|
|
38
|
-
id: modulePath,
|
|
39
|
-
filename: modulePath,
|
|
40
|
-
loaded: true,
|
|
41
|
-
exports,
|
|
42
|
-
};
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
function makeAcceptedVerification() {
|
|
46
|
-
return {
|
|
47
|
-
accepted: true,
|
|
48
|
-
attempts: 1,
|
|
49
|
-
finalVerification: {
|
|
50
|
-
score: 1,
|
|
51
|
-
violations: [],
|
|
52
|
-
},
|
|
53
|
-
partnerStrategy: {
|
|
54
|
-
profile: 'strict_reviewer',
|
|
55
|
-
verificationMode: 'evidence_first',
|
|
56
|
-
},
|
|
57
|
-
partnerReward: {
|
|
58
|
-
reward: 1,
|
|
59
|
-
},
|
|
60
|
-
};
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
async function withHarnessRuntime(callback) {
|
|
64
|
-
const feedbackDir = fs.mkdtempSync(path.join(os.tmpdir(), 'thumbgate-harness-proof-'));
|
|
65
|
-
process.env.THUMBGATE_FEEDBACK_DIR = feedbackDir;
|
|
66
|
-
try {
|
|
67
|
-
resetModules();
|
|
68
|
-
stubModule(VERIFICATION_PATH, {
|
|
69
|
-
runVerificationLoop: () => makeAcceptedVerification(),
|
|
70
|
-
});
|
|
71
|
-
return await callback({
|
|
72
|
-
harnesses: require('./natural-language-harness'),
|
|
73
|
-
runner: require('./async-job-runner'),
|
|
74
|
-
verifyRun: require('./verify-run'),
|
|
75
|
-
server: require('../adapters/mcp/server-stdio'),
|
|
76
|
-
});
|
|
77
|
-
} finally {
|
|
78
|
-
resetModules();
|
|
79
|
-
delete process.env.THUMBGATE_FEEDBACK_DIR;
|
|
80
|
-
fs.rmSync(feedbackDir, { recursive: true, force: true });
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
function writeReports(results, reportJson, reportMd) {
|
|
85
|
-
fs.mkdirSync(path.dirname(reportJson), { recursive: true });
|
|
86
|
-
fs.writeFileSync(reportJson, JSON.stringify(results, null, 2));
|
|
87
|
-
|
|
88
|
-
const lines = [
|
|
89
|
-
'# Natural-Language Harness Proof',
|
|
90
|
-
'',
|
|
91
|
-
`- Phase: ${results.phase}`,
|
|
92
|
-
`- Timestamp: ${results.timestamp}`,
|
|
93
|
-
`- ${results.passed} passed, ${results.failed} failed`,
|
|
94
|
-
'',
|
|
95
|
-
'## Requirements',
|
|
96
|
-
'',
|
|
97
|
-
];
|
|
98
|
-
|
|
99
|
-
for (const requirement of Object.values(results.requirements)) {
|
|
100
|
-
lines.push(`- [${requirement.passed ? 'x' : ' '}] **${requirement.id}** ${requirement.desc}`);
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
fs.writeFileSync(reportMd, `${lines.join('\n')}\n`);
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
async function run() {
|
|
107
|
-
const results = {
|
|
108
|
-
phase: '23-natural-language-harnesses',
|
|
109
|
-
timestamp: new Date().toISOString(),
|
|
110
|
-
passed: 0,
|
|
111
|
-
failed: 0,
|
|
112
|
-
requirements: {},
|
|
113
|
-
};
|
|
114
|
-
const { reportJson, reportMd } = resolveProofPaths();
|
|
115
|
-
|
|
116
|
-
const checks = [
|
|
117
|
-
{
|
|
118
|
-
id: 'HARNESS-01',
|
|
119
|
-
desc: 'natural-language harness specs load and validate with required sections',
|
|
120
|
-
fn: async () => {
|
|
121
|
-
const { harnesses } = await withHarnessRuntime((context) => context);
|
|
122
|
-
const listed = harnesses.listHarnesses();
|
|
123
|
-
if (listed.length < 3) {
|
|
124
|
-
throw new Error(`Expected at least 3 harnesses, found ${listed.length}`);
|
|
125
|
-
}
|
|
126
|
-
if (!listed.some((entry) => entry.id === 'repo-full-verification')) {
|
|
127
|
-
throw new Error('repo-full-verification harness is missing');
|
|
128
|
-
}
|
|
129
|
-
},
|
|
130
|
-
},
|
|
131
|
-
{
|
|
132
|
-
id: 'HARNESS-02',
|
|
133
|
-
desc: 'rendered harness plans substitute inputs into natural-language steps',
|
|
134
|
-
fn: async () => {
|
|
135
|
-
await withHarnessRuntime(({ harnesses }) => {
|
|
136
|
-
const plan = harnesses.renderHarnessPlan('creator-partnership-review', {
|
|
137
|
-
creatorHandle: 'agentbuilder',
|
|
138
|
-
});
|
|
139
|
-
if (!plan.steps.some((step) => step.includes('agentbuilder'))) {
|
|
140
|
-
throw new Error('Expected rendered steps to include the input override');
|
|
141
|
-
}
|
|
142
|
-
if (!plan.successEvidence.some((line) => line.includes('agentbuilder'))) {
|
|
143
|
-
throw new Error('Expected rendered evidence to include the input override');
|
|
144
|
-
}
|
|
145
|
-
});
|
|
146
|
-
},
|
|
147
|
-
},
|
|
148
|
-
{
|
|
149
|
-
id: 'HARNESS-03',
|
|
150
|
-
desc: 'harness plans compile into executable async-job-runner stages',
|
|
151
|
-
fn: async () => {
|
|
152
|
-
await withHarnessRuntime(({ harnesses }) => {
|
|
153
|
-
const job = harnesses.buildHarnessJob('repo-full-verification', {
|
|
154
|
-
verificationCommand: 'node -e "process.stdout.write(\'verify ok\')"',
|
|
155
|
-
}, { jobId: 'proof-harness-job' });
|
|
156
|
-
const commandStage = job.stages.find((stage) => stage.command);
|
|
157
|
-
if (!commandStage || !commandStage.command.includes('verify ok')) {
|
|
158
|
-
throw new Error('Expected a command stage with the rendered verification command');
|
|
159
|
-
}
|
|
160
|
-
if (!job.tags.includes('natural-language-harness')) {
|
|
161
|
-
throw new Error('Expected natural-language-harness tag on compiled job');
|
|
162
|
-
}
|
|
163
|
-
});
|
|
164
|
-
},
|
|
165
|
-
},
|
|
166
|
-
{
|
|
167
|
-
id: 'HARNESS-04',
|
|
168
|
-
desc: 'runHarness executes a harness through the runtime with checkpoints and verification',
|
|
169
|
-
fn: async () => {
|
|
170
|
-
await withHarnessRuntime(({ harnesses, runner }) => {
|
|
171
|
-
const result = harnesses.runHarness('repo-full-verification', {
|
|
172
|
-
verificationCommand: 'node -e "process.stdout.write(\'verify ok\')"',
|
|
173
|
-
}, {
|
|
174
|
-
jobId: 'proof-run-harness',
|
|
175
|
-
});
|
|
176
|
-
const state = runner.readJobState('proof-run-harness');
|
|
177
|
-
if (result.status !== 'completed') {
|
|
178
|
-
throw new Error(`Expected completed harness result, got ${result.status}`);
|
|
179
|
-
}
|
|
180
|
-
if (!state || state.stageHistory.length < 3) {
|
|
181
|
-
throw new Error('Expected persisted stage history for executed harness');
|
|
182
|
-
}
|
|
183
|
-
if (!String(state.currentContext || '').includes('Success evidence required:')) {
|
|
184
|
-
throw new Error('Expected final context to include success evidence summary');
|
|
185
|
-
}
|
|
186
|
-
});
|
|
187
|
-
},
|
|
188
|
-
},
|
|
189
|
-
{
|
|
190
|
-
id: 'HARNESS-05',
|
|
191
|
-
desc: 'MCP surfaces expose list_harnesses and run_harness operations',
|
|
192
|
-
fn: () => {
|
|
193
|
-
return withHarnessRuntime(async ({ server }) => {
|
|
194
|
-
const listed = await server.handleRequest({
|
|
195
|
-
jsonrpc: '2.0',
|
|
196
|
-
id: 1,
|
|
197
|
-
method: 'tools/call',
|
|
198
|
-
params: {
|
|
199
|
-
name: 'list_harnesses',
|
|
200
|
-
arguments: { tag: 'verification' },
|
|
201
|
-
},
|
|
202
|
-
});
|
|
203
|
-
const catalog = JSON.parse(listed.content[0].text);
|
|
204
|
-
if (!catalog.harnesses.some((entry) => entry.id === 'repo-full-verification')) {
|
|
205
|
-
throw new Error('Expected repo-full-verification in MCP harness catalog');
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
const executed = await server.handleRequest({
|
|
209
|
-
jsonrpc: '2.0',
|
|
210
|
-
id: 2,
|
|
211
|
-
method: 'tools/call',
|
|
212
|
-
params: {
|
|
213
|
-
name: 'run_harness',
|
|
214
|
-
arguments: {
|
|
215
|
-
harness: 'repo-full-verification',
|
|
216
|
-
jobId: 'mcp-proof-harness',
|
|
217
|
-
inputs: {
|
|
218
|
-
verificationCommand: 'node -e "process.stdout.write(\'verify ok\')"',
|
|
219
|
-
},
|
|
220
|
-
},
|
|
221
|
-
},
|
|
222
|
-
});
|
|
223
|
-
const payload = JSON.parse(executed.content[0].text);
|
|
224
|
-
if (payload.status !== 'completed') {
|
|
225
|
-
throw new Error(`Expected run_harness MCP result to complete, got ${payload.status}`);
|
|
226
|
-
}
|
|
227
|
-
});
|
|
228
|
-
},
|
|
229
|
-
},
|
|
230
|
-
{
|
|
231
|
-
id: 'HARNESS-06',
|
|
232
|
-
desc: 'full verification includes the harness proof lane and artifact',
|
|
233
|
-
fn: async () => {
|
|
234
|
-
await withHarnessRuntime(({ verifyRun }) => {
|
|
235
|
-
const plan = verifyRun.buildVerifyPlan('full');
|
|
236
|
-
const feedbackDir = fs.mkdtempSync(path.join(os.tmpdir(), 'thumbgate-harness-run-'));
|
|
237
|
-
try {
|
|
238
|
-
const workflowRun = verifyRun.recordVerifyWorkflowRun('full', ROOT, feedbackDir);
|
|
239
|
-
if (!plan.some((step) => Array.isArray(step.args) && step.args.includes('prove:harnesses'))) {
|
|
240
|
-
throw new Error('verify:full is missing prove:harnesses');
|
|
241
|
-
}
|
|
242
|
-
if (!workflowRun.proofArtifacts.some((artifact) => artifact.endsWith(path.join('proof', 'harnesses-report.json')))) {
|
|
243
|
-
throw new Error('Workflow run is missing harness proof artifact');
|
|
244
|
-
}
|
|
245
|
-
} finally {
|
|
246
|
-
fs.rmSync(feedbackDir, { recursive: true, force: true });
|
|
247
|
-
}
|
|
248
|
-
});
|
|
249
|
-
},
|
|
250
|
-
},
|
|
251
|
-
];
|
|
252
|
-
|
|
253
|
-
for (const check of checks) {
|
|
254
|
-
try {
|
|
255
|
-
await check.fn();
|
|
256
|
-
results.passed += 1;
|
|
257
|
-
results.requirements[check.id] = { id: check.id, desc: check.desc, passed: true };
|
|
258
|
-
} catch (error) {
|
|
259
|
-
results.failed += 1;
|
|
260
|
-
results.requirements[check.id] = {
|
|
261
|
-
id: check.id,
|
|
262
|
-
desc: check.desc,
|
|
263
|
-
passed: false,
|
|
264
|
-
error: error.message,
|
|
265
|
-
};
|
|
266
|
-
}
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
writeReports(results, reportJson, reportMd);
|
|
270
|
-
|
|
271
|
-
console.log(`${results.passed} passed, ${results.failed} failed`);
|
|
272
|
-
if (results.failed > 0) {
|
|
273
|
-
process.exitCode = 1;
|
|
274
|
-
}
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
if (require.main === module) {
|
|
278
|
-
run().catch((error) => {
|
|
279
|
-
console.error(error.stack || error.message);
|
|
280
|
-
process.exit(1);
|
|
281
|
-
});
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
module.exports = {
|
|
285
|
-
resolveProofPaths,
|
|
286
|
-
run,
|
|
287
|
-
};
|
|
@@ -1,257 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* prove-intelligence.js
|
|
4
|
-
*
|
|
5
|
-
* Smoke-test gate for Phase 9: Intelligence
|
|
6
|
-
* Verifies context-engine and skill-quality-tracker work end-to-end.
|
|
7
|
-
* Writes machine-readable JSON + human-readable markdown to proof/.
|
|
8
|
-
*/
|
|
9
|
-
|
|
10
|
-
'use strict';
|
|
11
|
-
|
|
12
|
-
const fs = require('fs');
|
|
13
|
-
const os = require('os');
|
|
14
|
-
const path = require('path');
|
|
15
|
-
const { execSync } = require('child_process');
|
|
16
|
-
const { ensureDir } = require('./fs-utils');
|
|
17
|
-
|
|
18
|
-
const ROOT = path.join(__dirname, '..');
|
|
19
|
-
function getProofDir() {
|
|
20
|
-
return process.env.THUMBGATE_PROOF_DIR || path.join(ROOT, 'proof');
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
// ---------------------------------------------------------------------------
|
|
25
|
-
// Run test suite and parse results
|
|
26
|
-
// ---------------------------------------------------------------------------
|
|
27
|
-
function runTests() {
|
|
28
|
-
try {
|
|
29
|
-
const output = execSync('node --test tests/intelligence.test.js', {
|
|
30
|
-
cwd: ROOT,
|
|
31
|
-
encoding: 'utf-8',
|
|
32
|
-
stdio: ['pipe', 'pipe', 'pipe'],
|
|
33
|
-
});
|
|
34
|
-
return output;
|
|
35
|
-
} catch (err) {
|
|
36
|
-
return err.stdout || err.stderr || String(err);
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
function parseTestOutput(output) {
|
|
41
|
-
const passMatch = output.match(/ℹ pass (\d+)/);
|
|
42
|
-
const failMatch = output.match(/ℹ fail (\d+)/);
|
|
43
|
-
const passed = passMatch ? parseInt(passMatch[1], 10) : 0;
|
|
44
|
-
const failed = failMatch ? parseInt(failMatch[1], 10) : 0;
|
|
45
|
-
return { passed, failed };
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
// ---------------------------------------------------------------------------
|
|
49
|
-
// Smoke test: context-engine
|
|
50
|
-
// ---------------------------------------------------------------------------
|
|
51
|
-
function smokeContextEngine() {
|
|
52
|
-
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'prove-ce-'));
|
|
53
|
-
try {
|
|
54
|
-
delete require.cache[require.resolve('./context-engine.js')];
|
|
55
|
-
const ce = require('./context-engine.js');
|
|
56
|
-
|
|
57
|
-
// Build index from empty docs dir
|
|
58
|
-
const docsDir = path.join(tmpDir, 'docs');
|
|
59
|
-
fs.mkdirSync(docsDir, { recursive: true });
|
|
60
|
-
fs.writeFileSync(path.join(docsDir, 'CI_GUIDE.md'), '# CI Guide\nBuild pipeline guide.');
|
|
61
|
-
fs.writeFileSync(path.join(docsDir, 'MCP_SERVER.md'), '# MCP Server\nClaude MCP agent setup.');
|
|
62
|
-
|
|
63
|
-
const indexPath = path.join(tmpDir, 'idx.json');
|
|
64
|
-
const index = ce.buildKnowledgeIndex(docsDir, indexPath);
|
|
65
|
-
|
|
66
|
-
if (!index.bundles || !index.metadata) throw new Error('buildKnowledgeIndex missing bundles/metadata');
|
|
67
|
-
if (index.metadata.docCount !== 2) throw new Error(`Expected 2 docs, got ${index.metadata.docCount}`);
|
|
68
|
-
|
|
69
|
-
// Route query
|
|
70
|
-
// Query using keyword that will match ('guide' is extracted from title "CI Guide")
|
|
71
|
-
const result = ce.routeQuery('guide for pipeline', indexPath, 3);
|
|
72
|
-
if (!result.results || result.results.length === 0) throw new Error('routeQuery returned no results');
|
|
73
|
-
|
|
74
|
-
const cats = result.results.map((r) => r.category);
|
|
75
|
-
if (!cats.includes('ci-cd')) throw new Error(`ci-cd not in results: ${JSON.stringify(cats)}`);
|
|
76
|
-
|
|
77
|
-
// Prompt registry
|
|
78
|
-
const regPath = path.join(tmpDir, 'reg.json');
|
|
79
|
-
ce.registerPrompt('test-prompt', 'Hello {{name}}', { models: ['claude-opus-4-6'], category: 'test' }, regPath);
|
|
80
|
-
const prompt = ce.getPrompt('test-prompt', 'claude-opus-4-6', regPath);
|
|
81
|
-
if (!prompt || !prompt.compatible) throw new Error('registerPrompt/getPrompt failed');
|
|
82
|
-
|
|
83
|
-
return { passed: true, docsIndexed: 2, routingWorked: true, promptRegistry: true };
|
|
84
|
-
} catch (err) {
|
|
85
|
-
return { passed: false, error: err.message };
|
|
86
|
-
} finally {
|
|
87
|
-
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
// ---------------------------------------------------------------------------
|
|
92
|
-
// Smoke test: skill-quality-tracker
|
|
93
|
-
// ---------------------------------------------------------------------------
|
|
94
|
-
function smokeSkillTracker() {
|
|
95
|
-
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'prove-sqt-'));
|
|
96
|
-
try {
|
|
97
|
-
delete require.cache[require.resolve('./skill-quality-tracker.js')];
|
|
98
|
-
const sqt = require('./skill-quality-tracker.js');
|
|
99
|
-
|
|
100
|
-
const now = Date.now();
|
|
101
|
-
|
|
102
|
-
// Write metrics
|
|
103
|
-
const metricsPath = path.join(tmpDir, 'metrics.jsonl');
|
|
104
|
-
const metrics = [
|
|
105
|
-
{ tool_name: 'Read', timestamp: new Date(now).toISOString() },
|
|
106
|
-
{ tool_name: 'Write', timestamp: new Date(now + 1000).toISOString() },
|
|
107
|
-
{ tool_name: 'Read', timestamp: new Date(now + 2000).toISOString() },
|
|
108
|
-
];
|
|
109
|
-
fs.writeFileSync(metricsPath, metrics.map((m) => JSON.stringify(m)).join('\n'));
|
|
110
|
-
|
|
111
|
-
// Write feedback (within window)
|
|
112
|
-
const feedbackPath = path.join(tmpDir, 'feedback.jsonl');
|
|
113
|
-
const feedback = [
|
|
114
|
-
{ timestamp: new Date(now + 5000).toISOString(), feedback: 'up' },
|
|
115
|
-
{ timestamp: new Date(now + 6000).toISOString(), signal: 'negative' },
|
|
116
|
-
];
|
|
117
|
-
fs.writeFileSync(feedbackPath, feedback.map((f) => JSON.stringify(f)).join('\n'));
|
|
118
|
-
|
|
119
|
-
// Override env so processMetrics reads our test files
|
|
120
|
-
process.env.METRICS_PATH = metricsPath;
|
|
121
|
-
process.env.FEEDBACK_PATH = feedbackPath;
|
|
122
|
-
|
|
123
|
-
// Re-require after env change doesn't matter since we call functions directly
|
|
124
|
-
const breakdown = {
|
|
125
|
-
ConsistentSkill: { uses: 20, correlatedPositive: 18, correlatedNegative: 2 },
|
|
126
|
-
MixedSkill: { uses: 20, correlatedPositive: 10, correlatedNegative: 10 },
|
|
127
|
-
};
|
|
128
|
-
sqt.computeSuccessRates(breakdown);
|
|
129
|
-
|
|
130
|
-
if (!(breakdown.ConsistentSkill.successRate > breakdown.MixedSkill.successRate)) {
|
|
131
|
-
throw new Error('INTL-03: ConsistentSkill should score higher than MixedSkill');
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
const top = sqt.topPerformers(breakdown, 10, 5);
|
|
135
|
-
if (top.length === 0) throw new Error('topPerformers returned empty array');
|
|
136
|
-
if (top[0].tool !== 'ConsistentSkill') throw new Error('Expected ConsistentSkill as top performer');
|
|
137
|
-
|
|
138
|
-
const recs = sqt.generateRecommendations(top, [], breakdown);
|
|
139
|
-
if (!Array.isArray(recs) || recs.length === 0) throw new Error('generateRecommendations returned empty');
|
|
140
|
-
|
|
141
|
-
return {
|
|
142
|
-
passed: true,
|
|
143
|
-
correlationWindowMs: sqt.CORRELATION_WINDOW_MS,
|
|
144
|
-
consistentSuccessRate: breakdown.ConsistentSkill.successRate,
|
|
145
|
-
mixedSuccessRate: breakdown.MixedSkill.successRate,
|
|
146
|
-
intl03Satisfied: breakdown.ConsistentSkill.successRate > breakdown.MixedSkill.successRate,
|
|
147
|
-
topPerformer: top[0].tool,
|
|
148
|
-
};
|
|
149
|
-
} catch (err) {
|
|
150
|
-
return { passed: false, error: err.message };
|
|
151
|
-
} finally {
|
|
152
|
-
delete process.env.METRICS_PATH;
|
|
153
|
-
delete process.env.FEEDBACK_PATH;
|
|
154
|
-
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
// ---------------------------------------------------------------------------
|
|
159
|
-
// Main
|
|
160
|
-
// ---------------------------------------------------------------------------
|
|
161
|
-
async function main() {
|
|
162
|
-
console.log('Running Phase 9: Intelligence proof gate...\n');
|
|
163
|
-
|
|
164
|
-
const testOutput = runTests();
|
|
165
|
-
const { passed: testsPassed, failed: testsFailed } = parseTestOutput(testOutput);
|
|
166
|
-
const ceSmoke = smokeContextEngine();
|
|
167
|
-
const sqtSmoke = smokeSkillTracker();
|
|
168
|
-
|
|
169
|
-
const allPassed = testsFailed === 0 && ceSmoke.passed && sqtSmoke.passed;
|
|
170
|
-
|
|
171
|
-
const report = {
|
|
172
|
-
phase: 9,
|
|
173
|
-
name: 'Intelligence',
|
|
174
|
-
requirements: ['INTL-01', 'INTL-02', 'INTL-03'],
|
|
175
|
-
generatedAt: new Date().toISOString(),
|
|
176
|
-
testResults: {
|
|
177
|
-
passed: testsPassed,
|
|
178
|
-
failed: testsFailed,
|
|
179
|
-
suiteFile: 'tests/intelligence.test.js',
|
|
180
|
-
},
|
|
181
|
-
smokeTests: {
|
|
182
|
-
contextEngine: ceSmoke,
|
|
183
|
-
skillQualityTracker: sqtSmoke,
|
|
184
|
-
},
|
|
185
|
-
overallPassed: allPassed,
|
|
186
|
-
};
|
|
187
|
-
|
|
188
|
-
const proofDir = getProofDir();
|
|
189
|
-
ensureDir(proofDir);
|
|
190
|
-
const jsonPath = path.join(proofDir, 'intelligence-report.json');
|
|
191
|
-
const mdPath = path.join(proofDir, 'intelligence-report.md');
|
|
192
|
-
|
|
193
|
-
fs.writeFileSync(jsonPath, JSON.stringify(report, null, 2));
|
|
194
|
-
|
|
195
|
-
const status = allPassed ? 'PASSED' : 'FAILED';
|
|
196
|
-
const md = `# Phase 9: Intelligence — Proof Report
|
|
197
|
-
|
|
198
|
-
**Status:** ${status}
|
|
199
|
-
**Generated:** ${report.generatedAt}
|
|
200
|
-
**Requirements:** ${report.requirements.join(', ')}
|
|
201
|
-
|
|
202
|
-
## Test Results
|
|
203
|
-
|
|
204
|
-
| Suite | Passed | Failed |
|
|
205
|
-
|-------|--------|--------|
|
|
206
|
-
| intelligence.test.js | ${testsPassed} | ${testsFailed} |
|
|
207
|
-
|
|
208
|
-
## Smoke Tests
|
|
209
|
-
|
|
210
|
-
### Context Engine (INTL-01)
|
|
211
|
-
|
|
212
|
-
- Passed: ${ceSmoke.passed}
|
|
213
|
-
${ceSmoke.passed ? `- Docs indexed: ${ceSmoke.docsIndexed}
|
|
214
|
-
- Routing worked: ${ceSmoke.routingWorked}
|
|
215
|
-
- Prompt registry: ${ceSmoke.promptRegistry}` : `- Error: ${ceSmoke.error}`}
|
|
216
|
-
|
|
217
|
-
### Skill Quality Tracker (INTL-02, INTL-03)
|
|
218
|
-
|
|
219
|
-
- Passed: ${sqtSmoke.passed}
|
|
220
|
-
${sqtSmoke.passed ? `- Correlation window: ${sqtSmoke.correlationWindowMs}ms
|
|
221
|
-
- Consistent skill success rate: ${sqtSmoke.consistentSuccessRate}
|
|
222
|
-
- Mixed skill success rate: ${sqtSmoke.mixedSuccessRate}
|
|
223
|
-
- INTL-03 satisfied (consistent > mixed): ${sqtSmoke.intl03Satisfied}
|
|
224
|
-
- Top performer: ${sqtSmoke.topPerformer}` : `- Error: ${sqtSmoke.error}`}
|
|
225
|
-
|
|
226
|
-
## Requirements Coverage
|
|
227
|
-
|
|
228
|
-
| Requirement | Description | Status |
|
|
229
|
-
|-------------|-------------|--------|
|
|
230
|
-
| INTL-01 | Context engine routes queries to pre-computed bundles | ${ceSmoke.passed ? 'PASS' : 'FAIL'} |
|
|
231
|
-
| INTL-02 | Skill tracker correlates tool calls to feedback by timestamp proximity | ${sqtSmoke.passed ? 'PASS' : 'FAIL'} |
|
|
232
|
-
| INTL-03 | Both modules have unit tests (52 tests, 0 failures) | ${testsFailed === 0 ? 'PASS' : 'FAIL'} |
|
|
233
|
-
|
|
234
|
-
## Files Created
|
|
235
|
-
|
|
236
|
-
- \`scripts/context-engine.js\` — Knowledge bundle builder, context router, quality scorer, prompt registry
|
|
237
|
-
- \`scripts/skill-quality-tracker.js\` — Tool call metric correlation to feedback by timestamp proximity
|
|
238
|
-
- \`tests/intelligence.test.js\` — ${testsPassed} unit tests covering routing logic, correlation, edge cases
|
|
239
|
-
- \`scripts/prove-intelligence.js\` — This proof gate script
|
|
240
|
-
`;
|
|
241
|
-
|
|
242
|
-
fs.writeFileSync(mdPath, md);
|
|
243
|
-
|
|
244
|
-
console.log(`Status: ${status}`);
|
|
245
|
-
console.log(`Tests: ${testsPassed} passed, ${testsFailed} failed`);
|
|
246
|
-
console.log(`Context Engine smoke: ${ceSmoke.passed ? 'PASS' : 'FAIL'}`);
|
|
247
|
-
console.log(`Skill Tracker smoke: ${sqtSmoke.passed ? 'PASS' : 'FAIL'}`);
|
|
248
|
-
console.log(`\nReport written to: ${mdPath}`);
|
|
249
|
-
console.log(`JSON report: ${jsonPath}`);
|
|
250
|
-
|
|
251
|
-
process.exit(allPassed ? 0 : 1);
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
main().catch((err) => {
|
|
255
|
-
console.error('prove-intelligence failed:', err.message);
|
|
256
|
-
process.exit(1);
|
|
257
|
-
});
|