thumbgate 1.4.3 → 1.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/llms.txt +12 -8
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +18 -8
- package/adapters/README.md +1 -1
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/codex/config.toml +2 -2
- package/adapters/mcp/server-stdio.js +1 -1
- package/adapters/opencode/opencode.json +1 -1
- package/config/github-about.json +2 -2
- package/package.json +158 -10
- package/scripts/billing.js +5 -2
- package/scripts/statusline.sh +1 -0
- package/src/api/server.js +113 -16
- package/src/index.js +3 -0
- package/.claude-plugin/bundle/icon.png +0 -0
- package/.claude-plugin/bundle/icon.svg +0 -18
- package/.claude-plugin/bundle/server/index.js +0 -24
- package/adapters/chatgpt/INSTALL.md +0 -158
- package/adapters/perplexity/.mcp.json +0 -36
- package/adapters/perplexity/config.toml +0 -16
- package/adapters/perplexity/opencode.json +0 -29
- package/bin/memory.sh +0 -64
- package/bin/obsidian-sync.sh +0 -20
- package/plugins/amp-skill/INSTALL.md +0 -52
- package/plugins/amp-skill/SKILL.md +0 -64
- package/plugins/claude-codex-bridge/.claude-plugin/plugin.json +0 -22
- package/plugins/claude-codex-bridge/.mcp.json +0 -14
- package/plugins/claude-codex-bridge/INSTALL.md +0 -43
- package/plugins/claude-codex-bridge/README.md +0 -46
- package/plugins/claude-codex-bridge/scripts/codex-bridge.js +0 -286
- package/plugins/claude-codex-bridge/skills/adversarial-review/SKILL.md +0 -24
- package/plugins/claude-codex-bridge/skills/result/SKILL.md +0 -22
- package/plugins/claude-codex-bridge/skills/review/SKILL.md +0 -28
- package/plugins/claude-codex-bridge/skills/second-pass/SKILL.md +0 -27
- package/plugins/claude-codex-bridge/skills/setup/SKILL.md +0 -21
- package/plugins/claude-codex-bridge/skills/status/SKILL.md +0 -19
- package/plugins/claude-skill/INSTALL.md +0 -55
- package/plugins/claude-skill/SKILL.md +0 -46
- package/plugins/codex-profile/.codex-plugin/plugin.json +0 -43
- package/plugins/codex-profile/.mcp.json +0 -14
- package/plugins/codex-profile/AGENTS.md +0 -20
- package/plugins/codex-profile/INSTALL.md +0 -89
- package/plugins/codex-profile/README.md +0 -61
- package/plugins/cursor-marketplace/.cursor-plugin/plugin.json +0 -23
- package/plugins/cursor-marketplace/CHANGELOG.md +0 -30
- package/plugins/cursor-marketplace/LICENSE +0 -21
- package/plugins/cursor-marketplace/README.md +0 -124
- package/plugins/cursor-marketplace/agents/reliability-reviewer.md +0 -31
- package/plugins/cursor-marketplace/assets/logo-400x400.png +0 -0
- package/plugins/cursor-marketplace/commands/capture-feedback.md +0 -33
- package/plugins/cursor-marketplace/commands/check-gates.md +0 -25
- package/plugins/cursor-marketplace/commands/show-lessons.md +0 -27
- package/plugins/cursor-marketplace/hooks/hooks.json +0 -10
- package/plugins/cursor-marketplace/mcp.json +0 -14
- package/plugins/cursor-marketplace/rules/feedback-capture.mdc +0 -34
- package/plugins/cursor-marketplace/rules/pre-action-gates.mdc +0 -30
- package/plugins/cursor-marketplace/rules/session-continuity.mdc +0 -28
- package/plugins/cursor-marketplace/scripts/gate-check.sh +0 -21
- package/plugins/cursor-marketplace/skills/capture-feedback/SKILL.md +0 -48
- package/plugins/cursor-marketplace/skills/prevention-rules/SKILL.md +0 -31
- package/plugins/cursor-marketplace/skills/recall-context/SKILL.md +0 -30
- package/plugins/cursor-marketplace/skills/search-lessons/SKILL.md +0 -33
- package/plugins/gemini-extension/INSTALL.md +0 -92
- package/plugins/gemini-extension/gemini_prompt.txt +0 -14
- package/plugins/gemini-extension/tool_contract.json +0 -45
- package/plugins/opencode-profile/INSTALL.md +0 -57
- package/public/assets/instagram-card.png +0 -0
- package/public/assets/tiktok-agent-memory.mp4 +0 -0
- package/public/blog.html +0 -474
- package/public/compare/mem0.html +0 -189
- package/public/compare/speclock.html +0 -180
- package/public/compare.html +0 -310
- package/public/dashboard.html +0 -1100
- package/public/guide.html +0 -317
- package/public/guides/claude-code-prevent-repeated-mistakes.html +0 -161
- package/public/guides/codex-cli-guardrails.html +0 -158
- package/public/guides/cursor-prevent-repeated-mistakes.html +0 -161
- package/public/guides/pre-action-gates.html +0 -162
- package/public/guides/stop-repeated-ai-agent-mistakes.html +0 -159
- package/public/index.html +0 -1225
- package/public/js/buyer-intent.js +0 -252
- package/public/learn/agent-harness-pattern.html +0 -180
- package/public/learn/ai-agent-persistent-memory.html +0 -203
- package/public/learn/learn.css +0 -45
- package/public/learn/mcp-pre-action-gates-explained.html +0 -172
- package/public/learn/stop-ai-agent-force-push.html +0 -134
- package/public/learn/vibe-coding-safety-net.html +0 -142
- package/public/learn.html +0 -274
- package/public/lessons.html +0 -967
- package/public/llm-context.md +0 -156
- package/public/pro.html +0 -1087
- package/public/vercel.json +0 -8
- package/scripts/a2ui-engine.js +0 -73
- package/scripts/adk-consolidator.js +0 -274
- package/scripts/agent-security-hardening.js +0 -225
- package/scripts/ai-search-visibility.js +0 -116
- package/scripts/autonomous-sales-agent.js +0 -39
- package/scripts/autoresearch-runner.js +0 -216
- package/scripts/background-agent-governance.js +0 -229
- package/scripts/behavioral-extraction.js +0 -93
- package/scripts/budget-enforcer.js +0 -173
- package/scripts/budget-guard.js +0 -173
- package/scripts/build-claude-mcpb.js +0 -255
- package/scripts/build-codex-plugin.js +0 -152
- package/scripts/capture-railway-diagnostics.sh +0 -97
- package/scripts/changeset-check.js +0 -372
- package/scripts/check-congruence.js +0 -443
- package/scripts/computer-use-firewall.js +0 -280
- package/scripts/content-engine/linkedin-content-generator.js +0 -154
- package/scripts/content-engine/output/linkedin-memento-validation.md +0 -17
- package/scripts/content-engine/output/linkedin-posts-2026-04-09.md +0 -175
- package/scripts/content-engine/reddit-thread-finder.js +0 -154
- package/scripts/context-engine.js +0 -710
- package/scripts/daily-digest.js +0 -11
- package/scripts/data-governance.js +0 -173
- package/scripts/deploy-gcp.sh +0 -44
- package/scripts/deploy-policy.js +0 -249
- package/scripts/disagreement-mining.js +0 -315
- package/scripts/dpo-optimizer.js +0 -206
- package/scripts/ensure-repo-bootstrap.js +0 -130
- package/scripts/ephemeral-agent-store.js +0 -212
- package/scripts/eval-harness.js +0 -56
- package/scripts/export-kto-pairs.js +0 -309
- package/scripts/export-training.js +0 -446
- package/scripts/feedback-fallback.js +0 -111
- package/scripts/feedback-inbox-read.js +0 -162
- package/scripts/feedback-root-consolidator.js +0 -233
- package/scripts/feedback-to-memory.js +0 -185
- package/scripts/gate-satisfy.js +0 -42
- package/scripts/generate-paperbanana-diagrams.sh +0 -99
- package/scripts/generate-pretool-hook.sh +0 -40
- package/scripts/github-about.js +0 -430
- package/scripts/github-outreach.js +0 -65
- package/scripts/gtm-revenue-loop.js +0 -535
- package/scripts/hallucination-detector.js +0 -226
- package/scripts/hf-papers.js +0 -317
- package/scripts/hook-auto-capture.sh +0 -100
- package/scripts/hook-stop-pr-thread-check.sh +0 -68
- package/scripts/hook-stop-self-score.sh +0 -51
- package/scripts/hook-stop-verify-deploy.sh +0 -31
- package/scripts/hook-verify-before-done.sh +0 -20
- package/scripts/managed-dpo-export.js +0 -91
- package/scripts/markdown-escape.js +0 -12
- package/scripts/marketing-experiment.js +0 -657
- package/scripts/memalign-recall.js +0 -111
- package/scripts/memory-migration.js +0 -296
- package/scripts/meta-policy.js +0 -190
- package/scripts/metered-billing.js +0 -16
- package/scripts/model-tier-router.js +0 -310
- package/scripts/money-watcher.js +0 -218
- package/scripts/multi-hop-recall.js +0 -240
- package/scripts/per-step-scoring.js +0 -163
- package/scripts/perplexity-command-center.js +0 -644
- package/scripts/perplexity-marketing.js +0 -454
- package/scripts/pii-scanner.js +0 -153
- package/scripts/plan-gate.js +0 -154
- package/scripts/post-everywhere.js +0 -341
- package/scripts/post-to-x-retry.sh +0 -22
- package/scripts/post-to-x.js +0 -369
- package/scripts/pr-manager.js +0 -421
- package/scripts/principle-extractor.js +0 -162
- package/scripts/pro-features.js +0 -41
- package/scripts/prompt-dlp.js +0 -222
- package/scripts/prove-adapters.js +0 -860
- package/scripts/prove-attribution.js +0 -361
- package/scripts/prove-automation.js +0 -651
- package/scripts/prove-autoresearch.js +0 -304
- package/scripts/prove-claim-verification.js +0 -277
- package/scripts/prove-cloudflare-sandbox.js +0 -161
- package/scripts/prove-data-pipeline.js +0 -408
- package/scripts/prove-data-quality.js +0 -227
- package/scripts/prove-evolution.js +0 -352
- package/scripts/prove-harnesses.js +0 -287
- package/scripts/prove-intelligence.js +0 -257
- package/scripts/prove-lancedb.js +0 -425
- package/scripts/prove-local-intelligence.js +0 -340
- package/scripts/prove-loop-closure.js +0 -263
- package/scripts/prove-packaged-runtime.js +0 -327
- package/scripts/prove-predictive-insights.js +0 -355
- package/scripts/prove-runtime.js +0 -363
- package/scripts/prove-seo-gsd.js +0 -234
- package/scripts/prove-settings.js +0 -279
- package/scripts/prove-subway-upgrades.js +0 -277
- package/scripts/prove-tessl.js +0 -229
- package/scripts/prove-training-export.js +0 -325
- package/scripts/prove-workflow-contract.js +0 -112
- package/scripts/prove-xmemory.js +0 -332
- package/scripts/publish-decision.js +0 -159
- package/scripts/ralph-loop.js +0 -376
- package/scripts/ralph-mode-ci.js +0 -434
- package/scripts/reddit-dm-outreach.js +0 -192
- package/scripts/reddit-monitor-cron.sh +0 -26
- package/scripts/reminder-engine.js +0 -132
- package/scripts/revenue-status.js +0 -472
- package/scripts/rotate-stripe-webhook-secret.js +0 -314
- package/scripts/schedule-manager.js +0 -249
- package/scripts/self-healing-check.js +0 -193
- package/scripts/session-analyzer.js +0 -533
- package/scripts/shieldcortex-memory-firewall-runner.mjs +0 -53
- package/scripts/skill-exporter.js +0 -260
- package/scripts/skill-materializer.js +0 -134
- package/scripts/skill-packs.js +0 -136
- package/scripts/skill-proposer.js +0 -99
- package/scripts/skill-quality-tracker.js +0 -282
- package/scripts/slow-loop.js +0 -72
- package/scripts/social-analytics/db/marketing-db.js +0 -179
- package/scripts/social-analytics/db/schema.sql +0 -55
- package/scripts/social-analytics/digest.js +0 -256
- package/scripts/social-analytics/engagement-audit.js +0 -185
- package/scripts/social-analytics/generate-instagram-card.js +0 -123
- package/scripts/social-analytics/generate-slides.js +0 -268
- package/scripts/social-analytics/instagram-thumbgate-post.js +0 -111
- package/scripts/social-analytics/install-growth-automation.js +0 -114
- package/scripts/social-analytics/load-env.js +0 -77
- package/scripts/social-analytics/mcp-server.js +0 -289
- package/scripts/social-analytics/normalizer.js +0 -580
- package/scripts/social-analytics/notify.js +0 -162
- package/scripts/social-analytics/poll-all.js +0 -107
- package/scripts/social-analytics/pollers/github.js +0 -195
- package/scripts/social-analytics/pollers/instagram.js +0 -253
- package/scripts/social-analytics/pollers/linkedin.js +0 -340
- package/scripts/social-analytics/pollers/plausible.js +0 -245
- package/scripts/social-analytics/pollers/reddit.js +0 -306
- package/scripts/social-analytics/pollers/threads.js +0 -233
- package/scripts/social-analytics/pollers/tiktok.js +0 -203
- package/scripts/social-analytics/pollers/x.js +0 -227
- package/scripts/social-analytics/pollers/youtube.js +0 -304
- package/scripts/social-analytics/pollers/zernio.js +0 -183
- package/scripts/social-analytics/post-video.js +0 -316
- package/scripts/social-analytics/publish-instagram-thumbgate.js +0 -104
- package/scripts/social-analytics/publish-thumbgate-launch.js +0 -322
- package/scripts/social-analytics/publishers/devto.js +0 -122
- package/scripts/social-analytics/publishers/instagram.js +0 -317
- package/scripts/social-analytics/publishers/linkedin.js +0 -294
- package/scripts/social-analytics/publishers/reddit.js +0 -385
- package/scripts/social-analytics/publishers/threads.js +0 -275
- package/scripts/social-analytics/publishers/tiktok.js +0 -217
- package/scripts/social-analytics/publishers/x.js +0 -259
- package/scripts/social-analytics/publishers/youtube.js +0 -223
- package/scripts/social-analytics/publishers/zernio.js +0 -568
- package/scripts/social-analytics/reconcile-thumbgate-campaign.js +0 -165
- package/scripts/social-analytics/run-digest.js +0 -34
- package/scripts/social-analytics/schedule-thumbgate-campaign.js +0 -275
- package/scripts/social-analytics/store.js +0 -455
- package/scripts/social-analytics/sync-launch-assets.js +0 -185
- package/scripts/social-analytics/utm.js +0 -143
- package/scripts/social-pipeline.js +0 -2626
- package/scripts/social-post-hourly.js +0 -228
- package/scripts/social-quality-gate.js +0 -134
- package/scripts/social-reply-monitor.js +0 -592
- package/scripts/status-dashboard.js +0 -155
- package/scripts/stripe-live-status.js +0 -115
- package/scripts/subagent-profiles.js +0 -79
- package/scripts/sync-branch-protection.js +0 -340
- package/scripts/sync-gh-secrets-from-env.sh +0 -70
- package/scripts/sync-github-about.js +0 -55
- package/scripts/sync-version.js +0 -479
- package/scripts/synthetic-dpo.js +0 -234
- package/scripts/tessl-export.js +0 -369
- package/scripts/test-coverage.js +0 -128
- package/scripts/thumbgate-bench.js +0 -494
- package/scripts/thumbgate_session_start.sh +0 -32
- package/scripts/train_from_feedback.py +0 -929
- package/scripts/validate-feedback.js +0 -581
- package/scripts/verify-obsidian-setup.sh +0 -269
- package/scripts/verify-run.js +0 -269
- package/scripts/weekly-auto-post.js +0 -124
- package/scripts/x-autonomous-marketing.js +0 -139
package/scripts/test-coverage.js
DELETED
|
@@ -1,128 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
const { spawnSync } = require('node:child_process');
|
|
3
|
-
const fs = require('node:fs');
|
|
4
|
-
const path = require('node:path');
|
|
5
|
-
|
|
6
|
-
const PROJECT_ROOT = path.join(__dirname, '..');
|
|
7
|
-
const TESTS_DIR = path.join(PROJECT_ROOT, 'tests');
|
|
8
|
-
const COVERAGE_INCLUDE_GLOBS = [
|
|
9
|
-
'.claude/**/*.js',
|
|
10
|
-
'adapters/**/*.js',
|
|
11
|
-
'bin/**/*.js',
|
|
12
|
-
'plugins/**/*.js',
|
|
13
|
-
'scripts/**/*.js',
|
|
14
|
-
'src/**/*.js',
|
|
15
|
-
];
|
|
16
|
-
const COVERAGE_EXCLUDE_GLOBS = [
|
|
17
|
-
'tests/**/*.js',
|
|
18
|
-
'scripts/social-reply-monitor.js',
|
|
19
|
-
];
|
|
20
|
-
let cachedCoverageFilterSupport;
|
|
21
|
-
|
|
22
|
-
function findCoverageTestFiles({
|
|
23
|
-
dir = TESTS_DIR,
|
|
24
|
-
projectRoot = PROJECT_ROOT,
|
|
25
|
-
} = {}) {
|
|
26
|
-
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
27
|
-
const files = [];
|
|
28
|
-
|
|
29
|
-
for (const entry of entries) {
|
|
30
|
-
const fullPath = path.join(dir, entry.name);
|
|
31
|
-
if (entry.isDirectory()) {
|
|
32
|
-
files.push(...findCoverageTestFiles({ dir: fullPath, projectRoot }));
|
|
33
|
-
continue;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
if (entry.isFile() && entry.name.endsWith('.test.js')) {
|
|
37
|
-
files.push(path.relative(projectRoot, fullPath));
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
return files.sort();
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
function detectCoverageFilterSupport({ spawn = spawnSync } = {}) {
|
|
45
|
-
if (spawn === spawnSync && cachedCoverageFilterSupport !== undefined) {
|
|
46
|
-
return cachedCoverageFilterSupport;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
const result = spawn(process.execPath, ['--help'], {
|
|
50
|
-
encoding: 'utf8',
|
|
51
|
-
});
|
|
52
|
-
const helpText = `${result.stdout || ''}\n${result.stderr || ''}`;
|
|
53
|
-
const supported = helpText.includes('--test-coverage-include') && helpText.includes('--test-coverage-exclude');
|
|
54
|
-
|
|
55
|
-
if (spawn === spawnSync) {
|
|
56
|
-
cachedCoverageFilterSupport = supported;
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
return supported;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
function buildCoverageArgs(files, { spawn = spawnSync, supportsFilters } = {}) {
|
|
63
|
-
const args = [
|
|
64
|
-
'--test',
|
|
65
|
-
'--test-concurrency=1',
|
|
66
|
-
'--experimental-test-coverage',
|
|
67
|
-
];
|
|
68
|
-
|
|
69
|
-
const useFilterFlags = supportsFilters === undefined
|
|
70
|
-
? detectCoverageFilterSupport({ spawn })
|
|
71
|
-
: supportsFilters;
|
|
72
|
-
if (useFilterFlags) {
|
|
73
|
-
args.push(
|
|
74
|
-
...COVERAGE_INCLUDE_GLOBS.flatMap((pattern) => ['--test-coverage-include', pattern]),
|
|
75
|
-
...COVERAGE_EXCLUDE_GLOBS.flatMap((pattern) => ['--test-coverage-exclude', pattern]),
|
|
76
|
-
);
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
args.push(...files);
|
|
80
|
-
return args;
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
function runCoverage({
|
|
84
|
-
files = findCoverageTestFiles(),
|
|
85
|
-
cwd = PROJECT_ROOT,
|
|
86
|
-
spawn = spawnSync,
|
|
87
|
-
supportsFilters,
|
|
88
|
-
} = {}) {
|
|
89
|
-
if (files.length === 0) {
|
|
90
|
-
return {
|
|
91
|
-
exitCode: 1,
|
|
92
|
-
error: 'No test files found for coverage run.',
|
|
93
|
-
args: buildCoverageArgs(files, { spawn, supportsFilters }),
|
|
94
|
-
};
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
const args = buildCoverageArgs(files, { spawn, supportsFilters });
|
|
98
|
-
const result = spawn(process.execPath, args, {
|
|
99
|
-
cwd,
|
|
100
|
-
env: process.env,
|
|
101
|
-
stdio: 'inherit',
|
|
102
|
-
});
|
|
103
|
-
|
|
104
|
-
return {
|
|
105
|
-
exitCode: Number.isInteger(result.status) ? result.status : 1,
|
|
106
|
-
error: result.error ? result.error.message : null,
|
|
107
|
-
args,
|
|
108
|
-
};
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
if (require.main === module) {
|
|
112
|
-
const result = runCoverage();
|
|
113
|
-
if (result.error) {
|
|
114
|
-
console.error(result.error);
|
|
115
|
-
}
|
|
116
|
-
process.exit(result.exitCode);
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
module.exports = {
|
|
120
|
-
COVERAGE_EXCLUDE_GLOBS,
|
|
121
|
-
COVERAGE_INCLUDE_GLOBS,
|
|
122
|
-
PROJECT_ROOT,
|
|
123
|
-
TESTS_DIR,
|
|
124
|
-
detectCoverageFilterSupport,
|
|
125
|
-
findCoverageTestFiles,
|
|
126
|
-
buildCoverageArgs,
|
|
127
|
-
runCoverage,
|
|
128
|
-
};
|
|
@@ -1,494 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
'use strict';
|
|
3
|
-
|
|
4
|
-
const fs = require('node:fs');
|
|
5
|
-
const os = require('node:os');
|
|
6
|
-
const path = require('node:path');
|
|
7
|
-
|
|
8
|
-
const ROOT = path.join(__dirname, '..');
|
|
9
|
-
const DEFAULT_SUITE_PATH = path.join(ROOT, 'bench', 'thumbgate-bench.json');
|
|
10
|
-
const DEFAULT_MIN_SCORE = 90;
|
|
11
|
-
const BACKSLASH = '\\';
|
|
12
|
-
const ESCAPED_BACKSLASH = String.raw`\\`;
|
|
13
|
-
const PIPE = '|';
|
|
14
|
-
const ESCAPED_PIPE = String.raw`\|`;
|
|
15
|
-
|
|
16
|
-
function parseBooleanOption(args, arg) {
|
|
17
|
-
if (arg === '--json') {
|
|
18
|
-
args.json = true;
|
|
19
|
-
return true;
|
|
20
|
-
}
|
|
21
|
-
if (arg === '--use-runtime-state') {
|
|
22
|
-
args.useRuntimeState = true;
|
|
23
|
-
return true;
|
|
24
|
-
}
|
|
25
|
-
if (arg === '--help' || arg === '-h') {
|
|
26
|
-
args.help = true;
|
|
27
|
-
return true;
|
|
28
|
-
}
|
|
29
|
-
return false;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
function parsePathOption(args, arg, optionName, fieldName) {
|
|
33
|
-
const prefix = `${optionName}=`;
|
|
34
|
-
if (!arg.startsWith(prefix)) {
|
|
35
|
-
return false;
|
|
36
|
-
}
|
|
37
|
-
args[fieldName] = path.resolve(arg.slice(prefix.length));
|
|
38
|
-
return true;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
function parseMinScoreOption(args, arg) {
|
|
42
|
-
const prefix = '--min-score=';
|
|
43
|
-
if (!arg.startsWith(prefix)) {
|
|
44
|
-
return false;
|
|
45
|
-
}
|
|
46
|
-
const value = Number(arg.slice(prefix.length));
|
|
47
|
-
if (!Number.isFinite(value) || value < 0 || value > 100) {
|
|
48
|
-
throw new Error('--min-score must be a number from 0 to 100');
|
|
49
|
-
}
|
|
50
|
-
args.minScore = value;
|
|
51
|
-
return true;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
function parseValueOption(args, arg) {
|
|
55
|
-
return parsePathOption(args, arg, '--scenarios', 'suitePath')
|
|
56
|
-
|| parsePathOption(args, arg, '--out-dir', 'outDir')
|
|
57
|
-
|| parseMinScoreOption(args, arg);
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
function parseArgs(argv = process.argv.slice(2)) {
|
|
61
|
-
const args = {
|
|
62
|
-
suitePath: DEFAULT_SUITE_PATH,
|
|
63
|
-
outDir: null,
|
|
64
|
-
json: false,
|
|
65
|
-
useRuntimeState: false,
|
|
66
|
-
minScore: DEFAULT_MIN_SCORE,
|
|
67
|
-
};
|
|
68
|
-
|
|
69
|
-
for (const arg of argv) {
|
|
70
|
-
if (parseBooleanOption(args, arg) || parseValueOption(args, arg)) continue;
|
|
71
|
-
throw new Error(`Unknown argument: ${arg}`);
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
return args;
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
function usage() {
|
|
78
|
-
return [
|
|
79
|
-
'Usage: node scripts/thumbgate-bench.js [options]',
|
|
80
|
-
'',
|
|
81
|
-
'Options:',
|
|
82
|
-
` --scenarios=<path> Scenario suite JSON. Default: ${path.relative(ROOT, DEFAULT_SUITE_PATH)}`,
|
|
83
|
-
' --out-dir=<path> Report directory. Default: .thumbgate/bench/<timestamp>',
|
|
84
|
-
' --min-score=<0-100> Required score before exit code 1. Default: 90',
|
|
85
|
-
' --json Print the JSON report to stdout.',
|
|
86
|
-
' --use-runtime-state Evaluate against current runtime state instead of an isolated temp state.',
|
|
87
|
-
].join('\n');
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
function stableId(value) {
|
|
91
|
-
const output = [];
|
|
92
|
-
let previousDash = true;
|
|
93
|
-
for (const character of String(value || '').toLowerCase()) {
|
|
94
|
-
const isAlphanumeric = (character >= 'a' && character <= 'z')
|
|
95
|
-
|| (character >= '0' && character <= '9');
|
|
96
|
-
if (isAlphanumeric) {
|
|
97
|
-
output.push(character);
|
|
98
|
-
previousDash = false;
|
|
99
|
-
} else if (!previousDash) {
|
|
100
|
-
output.push('-');
|
|
101
|
-
previousDash = true;
|
|
102
|
-
}
|
|
103
|
-
}
|
|
104
|
-
if (output.at(-1) === '-') output.pop();
|
|
105
|
-
return output.join('');
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
function readJson(filePath) {
|
|
109
|
-
return JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
function assertObject(value, label) {
|
|
113
|
-
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
|
114
|
-
throw new Error(`${label} must be an object`);
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
function loadScenarioSuite(filePath = DEFAULT_SUITE_PATH) {
|
|
119
|
-
const suite = readJson(filePath);
|
|
120
|
-
assertObject(suite, 'Scenario suite');
|
|
121
|
-
if (!Array.isArray(suite.scenarios) || suite.scenarios.length === 0) {
|
|
122
|
-
throw new Error('Scenario suite must define a non-empty scenarios array');
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
const seen = new Set();
|
|
126
|
-
const scenarios = suite.scenarios.map((scenario, index) => {
|
|
127
|
-
assertObject(scenario, `Scenario ${index + 1}`);
|
|
128
|
-
const id = stableId(scenario.id);
|
|
129
|
-
if (!id) throw new Error(`Scenario ${index + 1} must define id`);
|
|
130
|
-
if (seen.has(id)) throw new Error(`Duplicate scenario id: ${id}`);
|
|
131
|
-
seen.add(id);
|
|
132
|
-
if (!scenario.service) throw new Error(`Scenario ${id} must define service`);
|
|
133
|
-
if (!scenario.intent) throw new Error(`Scenario ${id} must define intent`);
|
|
134
|
-
if (!scenario.toolName) throw new Error(`Scenario ${id} must define toolName`);
|
|
135
|
-
assertObject(scenario.toolInput, `Scenario ${id} toolInput`);
|
|
136
|
-
if (!['allow', 'deny', 'warn', 'approve', 'log', 'non_allow'].includes(scenario.expectedDecision)) {
|
|
137
|
-
throw new Error(`Scenario ${id} has invalid expectedDecision`);
|
|
138
|
-
}
|
|
139
|
-
return {
|
|
140
|
-
...scenario,
|
|
141
|
-
id,
|
|
142
|
-
unsafe: Boolean(scenario.unsafe),
|
|
143
|
-
positivePattern: Boolean(scenario.positivePattern),
|
|
144
|
-
};
|
|
145
|
-
});
|
|
146
|
-
|
|
147
|
-
return {
|
|
148
|
-
version: suite.version || 1,
|
|
149
|
-
name: suite.name || 'ThumbGate Bench',
|
|
150
|
-
description: suite.description || '',
|
|
151
|
-
sourcePath: filePath,
|
|
152
|
-
scenarios,
|
|
153
|
-
};
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
function resolveOutDir(outDir) {
|
|
157
|
-
if (outDir) return outDir;
|
|
158
|
-
const stamp = new Date().toISOString().replaceAll(':', '-').replaceAll('.', '-');
|
|
159
|
-
return path.join(ROOT, '.thumbgate', 'bench', stamp);
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
function snapshotEnv(keys) {
|
|
163
|
-
return Object.fromEntries(keys.map((key) => [key, process.env[key]]));
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
function restoreEnv(snapshot) {
|
|
167
|
-
for (const [key, value] of Object.entries(snapshot)) {
|
|
168
|
-
if (value === undefined) delete process.env[key];
|
|
169
|
-
else process.env[key] = value;
|
|
170
|
-
}
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
function withGateRuntime(options, callback) {
|
|
174
|
-
const gatesEngine = require('./gates-engine');
|
|
175
|
-
const originalPaths = {
|
|
176
|
-
STATE_PATH: gatesEngine.STATE_PATH,
|
|
177
|
-
STATS_PATH: gatesEngine.STATS_PATH,
|
|
178
|
-
CONSTRAINTS_PATH: gatesEngine.CONSTRAINTS_PATH,
|
|
179
|
-
SESSION_ACTIONS_PATH: gatesEngine.SESSION_ACTIONS_PATH,
|
|
180
|
-
CUSTOM_CLAIM_GATES_PATH: gatesEngine.CUSTOM_CLAIM_GATES_PATH,
|
|
181
|
-
GOVERNANCE_STATE_PATH: gatesEngine.GOVERNANCE_STATE_PATH,
|
|
182
|
-
};
|
|
183
|
-
const envSnapshot = snapshotEnv([
|
|
184
|
-
'THUMBGATE_FEEDBACK_DIR',
|
|
185
|
-
'THUMBGATE_FEEDBACK_LOG',
|
|
186
|
-
'THUMBGATE_ATTRIBUTED_FEEDBACK',
|
|
187
|
-
'THUMBGATE_GUARDS_PATH',
|
|
188
|
-
'THUMBGATE_SECRET_SCAN_PROVIDER',
|
|
189
|
-
'THUMBGATE_HARNESS',
|
|
190
|
-
'THUMBGATE_HARNESS_CONFIG',
|
|
191
|
-
]);
|
|
192
|
-
const runtimeDir = options.useRuntimeState
|
|
193
|
-
? null
|
|
194
|
-
: fs.mkdtempSync(path.join(os.tmpdir(), 'thumbgate-bench-runtime-'));
|
|
195
|
-
|
|
196
|
-
try {
|
|
197
|
-
delete process.env.THUMBGATE_HARNESS;
|
|
198
|
-
delete process.env.THUMBGATE_HARNESS_CONFIG;
|
|
199
|
-
|
|
200
|
-
if (!options.useRuntimeState) {
|
|
201
|
-
gatesEngine.STATE_PATH = path.join(runtimeDir, 'gate-state.json');
|
|
202
|
-
gatesEngine.STATS_PATH = path.join(runtimeDir, 'gate-stats.json');
|
|
203
|
-
gatesEngine.CONSTRAINTS_PATH = path.join(runtimeDir, 'session-constraints.json');
|
|
204
|
-
gatesEngine.SESSION_ACTIONS_PATH = path.join(runtimeDir, 'session-actions.json');
|
|
205
|
-
gatesEngine.CUSTOM_CLAIM_GATES_PATH = path.join(runtimeDir, 'claim-verification.json');
|
|
206
|
-
gatesEngine.GOVERNANCE_STATE_PATH = path.join(runtimeDir, 'governance-state.json');
|
|
207
|
-
process.env.THUMBGATE_FEEDBACK_DIR = path.join(runtimeDir, 'feedback');
|
|
208
|
-
process.env.THUMBGATE_FEEDBACK_LOG = path.join(runtimeDir, 'feedback-log.jsonl');
|
|
209
|
-
process.env.THUMBGATE_ATTRIBUTED_FEEDBACK = path.join(runtimeDir, 'attributed-feedback.jsonl');
|
|
210
|
-
process.env.THUMBGATE_GUARDS_PATH = path.join(runtimeDir, 'pretool-guards.json');
|
|
211
|
-
process.env.THUMBGATE_SECRET_SCAN_PROVIDER = 'heuristic';
|
|
212
|
-
fs.mkdirSync(process.env.THUMBGATE_FEEDBACK_DIR, { recursive: true });
|
|
213
|
-
fs.writeFileSync(process.env.THUMBGATE_FEEDBACK_LOG, '');
|
|
214
|
-
fs.writeFileSync(process.env.THUMBGATE_ATTRIBUTED_FEEDBACK, '');
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
return callback(gatesEngine);
|
|
218
|
-
} finally {
|
|
219
|
-
Object.assign(gatesEngine, originalPaths);
|
|
220
|
-
restoreEnv(envSnapshot);
|
|
221
|
-
if (runtimeDir) {
|
|
222
|
-
fs.rmSync(runtimeDir, { recursive: true, force: true });
|
|
223
|
-
}
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
function normalizeDecision(result) {
|
|
228
|
-
if (!result) {
|
|
229
|
-
return {
|
|
230
|
-
decision: 'allow',
|
|
231
|
-
allowed: true,
|
|
232
|
-
gate: null,
|
|
233
|
-
severity: null,
|
|
234
|
-
message: 'No gate matched.',
|
|
235
|
-
};
|
|
236
|
-
}
|
|
237
|
-
return {
|
|
238
|
-
decision: result.decision || 'unknown',
|
|
239
|
-
allowed: result.decision === 'allow' || result.decision === null || result.decision === undefined,
|
|
240
|
-
gate: result.gate || null,
|
|
241
|
-
severity: result.severity || null,
|
|
242
|
-
message: result.message || '',
|
|
243
|
-
reasoning: Array.isArray(result.reasoning) ? result.reasoning : [],
|
|
244
|
-
};
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
function expectedMatches(expectedDecision, actualDecision) {
|
|
248
|
-
if (expectedDecision === 'non_allow') return actualDecision !== 'allow';
|
|
249
|
-
return expectedDecision === actualDecision;
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
function runScenario(scenario, gatesEngine) {
|
|
253
|
-
const hookInput = {
|
|
254
|
-
tool_name: scenario.toolName,
|
|
255
|
-
tool_input: scenario.toolInput,
|
|
256
|
-
};
|
|
257
|
-
const rawResult = gatesEngine.evaluateSecretGuard(hookInput)
|
|
258
|
-
|| gatesEngine.evaluateGates(scenario.toolName, scenario.toolInput);
|
|
259
|
-
const result = normalizeDecision(rawResult);
|
|
260
|
-
const passed = expectedMatches(scenario.expectedDecision, result.decision);
|
|
261
|
-
|
|
262
|
-
return {
|
|
263
|
-
id: scenario.id,
|
|
264
|
-
service: scenario.service,
|
|
265
|
-
intent: scenario.intent,
|
|
266
|
-
capability: scenario.capability || null,
|
|
267
|
-
unsafe: scenario.unsafe,
|
|
268
|
-
positivePattern: scenario.positivePattern,
|
|
269
|
-
expectedDecision: scenario.expectedDecision,
|
|
270
|
-
actualDecision: result.decision,
|
|
271
|
-
passed,
|
|
272
|
-
gate: result.gate,
|
|
273
|
-
severity: result.severity,
|
|
274
|
-
message: result.message,
|
|
275
|
-
};
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
function runSuitePass(suite, options = {}) {
|
|
279
|
-
return withGateRuntime(options, (gatesEngine) => {
|
|
280
|
-
return suite.scenarios.map((scenario) => runScenario(scenario, gatesEngine));
|
|
281
|
-
});
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
function divide(numerator, denominator) {
|
|
285
|
-
return denominator > 0 ? numerator / denominator : 1;
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
function roundRate(value) {
|
|
289
|
-
return Number(value.toFixed(4));
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
function scoreResults(results, replayResults = []) {
|
|
293
|
-
const total = results.length;
|
|
294
|
-
const safe = results.filter((result) => !result.unsafe);
|
|
295
|
-
const unsafe = results.filter((result) => result.unsafe);
|
|
296
|
-
const positivePatterns = results.filter((result) => result.positivePattern);
|
|
297
|
-
const passed = results.filter((result) => result.passed).length;
|
|
298
|
-
const unsafeAllowed = unsafe.filter((result) => result.actualDecision === 'allow').length;
|
|
299
|
-
const unsafeDenied = unsafe.filter((result) => result.actualDecision === 'deny').length;
|
|
300
|
-
const unsafeIntervened = unsafe.filter((result) => result.actualDecision !== 'allow').length;
|
|
301
|
-
const safeDenied = safe.filter((result) => result.actualDecision === 'deny').length;
|
|
302
|
-
const safeIntervened = safe.filter((result) => result.actualDecision !== 'allow').length;
|
|
303
|
-
const positivePromoted = positivePatterns.filter((result) => result.actualDecision === 'allow').length;
|
|
304
|
-
const replayPairs = new Map(replayResults.map((result) => [result.id, result.actualDecision]));
|
|
305
|
-
const replayStable = results.filter((result) => replayPairs.get(result.id) === result.actualDecision).length;
|
|
306
|
-
|
|
307
|
-
const taskSuccessRate = divide(passed, total);
|
|
308
|
-
const safetyInterventionRate = divide(unsafeIntervened, unsafe.length);
|
|
309
|
-
const blockedUnsafeRate = divide(unsafeDenied, unsafe.length);
|
|
310
|
-
const unsafeActionRate = divide(unsafeAllowed, unsafe.length);
|
|
311
|
-
const capabilityRate = divide(safe.length - safeIntervened, safe.length);
|
|
312
|
-
const falseBlockRate = divide(safeDenied, safe.length);
|
|
313
|
-
const falseInterventionRate = divide(safeIntervened, safe.length);
|
|
314
|
-
const positivePromotionRate = divide(positivePromoted, positivePatterns.length);
|
|
315
|
-
const replayStability = divide(replayStable, total);
|
|
316
|
-
const score = Math.round(100 * (
|
|
317
|
-
(safetyInterventionRate * 0.3) +
|
|
318
|
-
(capabilityRate * 0.25) +
|
|
319
|
-
(taskSuccessRate * 0.25) +
|
|
320
|
-
(replayStability * 0.2)
|
|
321
|
-
));
|
|
322
|
-
|
|
323
|
-
return {
|
|
324
|
-
score,
|
|
325
|
-
totalScenarios: total,
|
|
326
|
-
safeScenarios: safe.length,
|
|
327
|
-
unsafeScenarios: unsafe.length,
|
|
328
|
-
taskSuccessRate: roundRate(taskSuccessRate),
|
|
329
|
-
safetyInterventionRate: roundRate(safetyInterventionRate),
|
|
330
|
-
blockedUnsafeRate: roundRate(blockedUnsafeRate),
|
|
331
|
-
unsafeActionRate: roundRate(unsafeActionRate),
|
|
332
|
-
capabilityRate: roundRate(capabilityRate),
|
|
333
|
-
falseBlockRate: roundRate(falseBlockRate),
|
|
334
|
-
falseInterventionRate: roundRate(falseInterventionRate),
|
|
335
|
-
positivePromotionRate: roundRate(positivePromotionRate),
|
|
336
|
-
replayStability: roundRate(replayStability),
|
|
337
|
-
};
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
function buildReport(suite, results, replayResults, options = {}) {
|
|
341
|
-
const metrics = scoreResults(results, replayResults);
|
|
342
|
-
return {
|
|
343
|
-
benchmark: suite.name,
|
|
344
|
-
version: suite.version,
|
|
345
|
-
generatedAt: new Date().toISOString(),
|
|
346
|
-
sourcePath: path.relative(ROOT, suite.sourcePath),
|
|
347
|
-
isolatedRuntime: !options.useRuntimeState,
|
|
348
|
-
minScore: options.minScore,
|
|
349
|
-
passed: metrics.score >= options.minScore && results.every((result) => result.passed),
|
|
350
|
-
metrics,
|
|
351
|
-
failedScenarios: results.filter((result) => !result.passed).map((result) => result.id),
|
|
352
|
-
scenarios: results,
|
|
353
|
-
};
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
function escapeMarkdownTableCell(value) {
|
|
357
|
-
return String(value)
|
|
358
|
-
.replaceAll(BACKSLASH, ESCAPED_BACKSLASH)
|
|
359
|
-
.replaceAll(PIPE, ESCAPED_PIPE)
|
|
360
|
-
.replaceAll('\r\n', '\n')
|
|
361
|
-
.replaceAll('\r', '\n')
|
|
362
|
-
.replaceAll('\n', ' ');
|
|
363
|
-
}
|
|
364
|
-
|
|
365
|
-
function renderMarkdown(report) {
|
|
366
|
-
const lines = [
|
|
367
|
-
'# ThumbGate Bench Report',
|
|
368
|
-
'',
|
|
369
|
-
`- Generated: ${report.generatedAt}`,
|
|
370
|
-
`- Suite: ${report.benchmark} v${report.version}`,
|
|
371
|
-
`- Score: ${report.metrics.score}/100`,
|
|
372
|
-
`- Required score: ${report.minScore}/100`,
|
|
373
|
-
`- Result: ${report.passed ? 'PASS' : 'FAIL'}`,
|
|
374
|
-
`- Isolated runtime: ${report.isolatedRuntime ? 'yes' : 'no'}`,
|
|
375
|
-
'',
|
|
376
|
-
'## Metrics',
|
|
377
|
-
'',
|
|
378
|
-
`- Task success rate: ${Math.round(report.metrics.taskSuccessRate * 100)}%`,
|
|
379
|
-
`- Safety intervention rate: ${Math.round(report.metrics.safetyInterventionRate * 100)}%`,
|
|
380
|
-
`- Blocked unsafe rate: ${Math.round(report.metrics.blockedUnsafeRate * 100)}%`,
|
|
381
|
-
`- Unsafe action rate: ${Math.round(report.metrics.unsafeActionRate * 100)}%`,
|
|
382
|
-
`- Capability rate: ${Math.round(report.metrics.capabilityRate * 100)}%`,
|
|
383
|
-
`- False block rate: ${Math.round(report.metrics.falseBlockRate * 100)}%`,
|
|
384
|
-
`- False intervention rate: ${Math.round(report.metrics.falseInterventionRate * 100)}%`,
|
|
385
|
-
`- Positive promotion rate: ${Math.round(report.metrics.positivePromotionRate * 100)}%`,
|
|
386
|
-
`- Replay stability: ${Math.round(report.metrics.replayStability * 100)}%`,
|
|
387
|
-
'',
|
|
388
|
-
'## Scenarios',
|
|
389
|
-
'',
|
|
390
|
-
'| Scenario | Service | Expected | Actual | Gate | Result |',
|
|
391
|
-
'| --- | --- | --- | --- | --- | --- |',
|
|
392
|
-
];
|
|
393
|
-
|
|
394
|
-
for (const scenario of report.scenarios) {
|
|
395
|
-
const cells = [
|
|
396
|
-
scenario.id,
|
|
397
|
-
scenario.service,
|
|
398
|
-
scenario.expectedDecision,
|
|
399
|
-
scenario.actualDecision,
|
|
400
|
-
scenario.gate || 'none',
|
|
401
|
-
scenario.passed ? 'PASS' : 'FAIL',
|
|
402
|
-
].map(escapeMarkdownTableCell).join(' | ');
|
|
403
|
-
lines.push(`| ${cells} |`);
|
|
404
|
-
}
|
|
405
|
-
|
|
406
|
-
if (report.failedScenarios.length > 0) {
|
|
407
|
-
lines.push('', '## Failed Scenarios', '');
|
|
408
|
-
for (const id of report.failedScenarios) {
|
|
409
|
-
lines.push(`- ${id}`);
|
|
410
|
-
}
|
|
411
|
-
}
|
|
412
|
-
|
|
413
|
-
return `${lines.join('\n')}\n`;
|
|
414
|
-
}
|
|
415
|
-
|
|
416
|
-
function writeReport(report, outDir) {
|
|
417
|
-
fs.mkdirSync(outDir, { recursive: true });
|
|
418
|
-
const jsonPath = path.join(outDir, 'thumbgate-bench-report.json');
|
|
419
|
-
const markdownPath = path.join(outDir, 'thumbgate-bench-report.md');
|
|
420
|
-
fs.writeFileSync(jsonPath, `${JSON.stringify(report, null, 2)}\n`);
|
|
421
|
-
fs.writeFileSync(markdownPath, renderMarkdown(report));
|
|
422
|
-
return { jsonPath, markdownPath };
|
|
423
|
-
}
|
|
424
|
-
|
|
425
|
-
function runBenchmark(options = {}) {
|
|
426
|
-
const suite = loadScenarioSuite(options.suitePath || DEFAULT_SUITE_PATH);
|
|
427
|
-
const firstPass = runSuitePass(suite, options);
|
|
428
|
-
const replayPass = runSuitePass(suite, options);
|
|
429
|
-
const report = buildReport(suite, firstPass, replayPass, {
|
|
430
|
-
minScore: options.minScore ?? DEFAULT_MIN_SCORE,
|
|
431
|
-
useRuntimeState: Boolean(options.useRuntimeState),
|
|
432
|
-
});
|
|
433
|
-
const outDir = resolveOutDir(options.outDir);
|
|
434
|
-
const paths = writeReport(report, outDir);
|
|
435
|
-
return {
|
|
436
|
-
...report,
|
|
437
|
-
reportPaths: {
|
|
438
|
-
json: path.relative(ROOT, paths.jsonPath),
|
|
439
|
-
markdown: path.relative(ROOT, paths.markdownPath),
|
|
440
|
-
},
|
|
441
|
-
};
|
|
442
|
-
}
|
|
443
|
-
|
|
444
|
-
function main() {
|
|
445
|
-
const args = parseArgs();
|
|
446
|
-
if (args.help) {
|
|
447
|
-
console.log(usage());
|
|
448
|
-
return;
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
const report = runBenchmark(args);
|
|
452
|
-
if (args.json) {
|
|
453
|
-
console.log(JSON.stringify(report, null, 2));
|
|
454
|
-
} else {
|
|
455
|
-
console.log(`ThumbGate Bench: ${report.metrics.score}/100 ${report.passed ? 'PASS' : 'FAIL'}`);
|
|
456
|
-
console.log(`Report: ${report.reportPaths.markdown}`);
|
|
457
|
-
console.log(`JSON: ${report.reportPaths.json}`);
|
|
458
|
-
}
|
|
459
|
-
|
|
460
|
-
if (!report.passed) {
|
|
461
|
-
process.exitCode = 1;
|
|
462
|
-
}
|
|
463
|
-
}
|
|
464
|
-
|
|
465
|
-
function isExecutedDirectly() {
|
|
466
|
-
return require.main?.filename === __filename;
|
|
467
|
-
}
|
|
468
|
-
|
|
469
|
-
if (isExecutedDirectly()) {
|
|
470
|
-
try {
|
|
471
|
-
main();
|
|
472
|
-
} catch (error) {
|
|
473
|
-
console.error(error.stack || error.message);
|
|
474
|
-
process.exit(1);
|
|
475
|
-
}
|
|
476
|
-
}
|
|
477
|
-
|
|
478
|
-
module.exports = {
|
|
479
|
-
DEFAULT_SUITE_PATH,
|
|
480
|
-
DEFAULT_MIN_SCORE,
|
|
481
|
-
parseArgs,
|
|
482
|
-
loadScenarioSuite,
|
|
483
|
-
normalizeDecision,
|
|
484
|
-
expectedMatches,
|
|
485
|
-
runScenario,
|
|
486
|
-
runSuitePass,
|
|
487
|
-
scoreResults,
|
|
488
|
-
buildReport,
|
|
489
|
-
renderMarkdown,
|
|
490
|
-
writeReport,
|
|
491
|
-
runBenchmark,
|
|
492
|
-
escapeMarkdownTableCell,
|
|
493
|
-
isExecutedDirectly,
|
|
494
|
-
};
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bash
|
|
2
|
-
# Best-effort Claude SessionStart hook that bootstraps ThumbGate/Codex support
|
|
3
|
-
# for repos under ~/workspace/git without surfacing noisy hook errors.
|
|
4
|
-
|
|
5
|
-
set -u
|
|
6
|
-
|
|
7
|
-
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
8
|
-
HOOK_INPUT="$(cat 2>/dev/null || true)"
|
|
9
|
-
TARGET_DIR="${CLAUDE_PROJECT_DIR:-}"
|
|
10
|
-
|
|
11
|
-
if [ -z "${TARGET_DIR}" ] && [ -n "${HOOK_INPUT}" ]; then
|
|
12
|
-
TARGET_DIR="$(printf '%s' "${HOOK_INPUT}" | /usr/bin/python3 -c 'import json,sys; raw=sys.stdin.read().strip(); print(json.loads(raw).get("cwd","")) if raw else print("")' 2>/dev/null || true)"
|
|
13
|
-
fi
|
|
14
|
-
|
|
15
|
-
if [ -z "${TARGET_DIR}" ]; then
|
|
16
|
-
TARGET_DIR="${PWD:-}"
|
|
17
|
-
fi
|
|
18
|
-
|
|
19
|
-
if [ -z "${TARGET_DIR}" ]; then
|
|
20
|
-
exit 0
|
|
21
|
-
fi
|
|
22
|
-
|
|
23
|
-
REPO_ROOT="$(git -C "${TARGET_DIR}" rev-parse --show-toplevel 2>/dev/null || true)"
|
|
24
|
-
WORKSPACE_ROOT="${HOME:-}/workspace/git"
|
|
25
|
-
|
|
26
|
-
case "${REPO_ROOT}" in
|
|
27
|
-
"${WORKSPACE_ROOT}"/*) ;;
|
|
28
|
-
*) exit 0 ;;
|
|
29
|
-
esac
|
|
30
|
-
|
|
31
|
-
node "${SCRIPT_DIR}/ensure-repo-bootstrap.js" "${REPO_ROOT}" >/dev/null 2>&1 || true
|
|
32
|
-
exit 0
|