thumbgate 1.14.1 → 1.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +6 -6
- package/.claude-plugin/plugin.json +3 -3
- package/.well-known/llms.txt +5 -5
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +60 -35
- package/adapters/chatgpt/openapi.yaml +118 -2
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +217 -84
- package/adapters/opencode/opencode.json +1 -1
- package/bench/prompt-eval-suite.json +5 -1
- package/bin/cli.js +211 -8
- package/config/enforcement.json +59 -7
- package/config/evals/agent-safety-eval.json +338 -22
- package/config/gates/default.json +33 -0
- package/config/gates/routine.json +43 -0
- package/config/github-about.json +3 -3
- package/config/mcp-allowlists.json +4 -0
- package/config/merge-quality-checks.json +2 -1
- package/config/model-candidates.json +131 -0
- package/openapi/openapi.yaml +118 -2
- package/package.json +70 -51
- package/public/blog.html +7 -7
- package/public/codex-plugin.html +13 -7
- package/public/compare.html +29 -23
- package/public/dashboard.html +105 -12
- package/public/guide.html +28 -28
- package/public/index.html +233 -97
- package/public/learn.html +87 -20
- package/public/lessons.html +26 -2
- package/public/numbers.html +271 -0
- package/public/pro.html +89 -19
- package/scripts/agent-audit-trace.js +55 -0
- package/scripts/agent-memory-lifecycle.js +96 -0
- package/scripts/agent-readiness-plan.js +118 -0
- package/scripts/agentic-data-pipeline.js +21 -1
- package/scripts/agents-sdk-sandbox-plan.js +57 -0
- package/scripts/ai-org-governance.js +98 -0
- package/scripts/ai-search-distribution.js +43 -0
- package/scripts/artifact-agent-plan.js +81 -0
- package/scripts/billing.js +27 -8
- package/scripts/cli-feedback.js +2 -1
- package/scripts/cli-schema.js +60 -5
- package/scripts/code-mode-mcp-plan.js +71 -0
- package/scripts/commercial-offer.js +1 -1
- package/scripts/context-engine.js +1 -2
- package/scripts/context-manager.js +4 -1
- package/scripts/contextfs.js +214 -32
- package/scripts/dashboard-render-spec.js +1 -1
- package/scripts/dashboard.js +275 -9
- package/scripts/decision-journal.js +13 -3
- package/scripts/document-workflow-governance.js +62 -0
- package/scripts/enterprise-agent-rollout.js +34 -0
- package/scripts/experience-replay-governance.js +69 -0
- package/scripts/export-hf-dataset.js +1 -1
- package/scripts/feedback-loop.js +141 -9
- package/scripts/feedback-to-rules.js +17 -23
- package/scripts/gates-engine.js +4 -6
- package/scripts/growth-campaigns.js +49 -0
- package/scripts/harness-selector.js +145 -1
- package/scripts/hybrid-supervisor-agent.js +64 -0
- package/scripts/inference-cache-policy.js +72 -0
- package/scripts/inference-economics.js +53 -0
- package/scripts/internal-agent-bootstrap.js +12 -2
- package/scripts/knowledge-layer-plan.js +108 -0
- package/scripts/lesson-canonical.js +181 -0
- package/scripts/lesson-db.js +71 -10
- package/scripts/lesson-inference.js +183 -44
- package/scripts/lesson-search.js +4 -1
- package/scripts/lesson-synthesis.js +23 -2
- package/scripts/llm-client.js +157 -26
- package/scripts/mailer/resend-mailer.js +112 -1
- package/scripts/mcp-transport-strategy.js +66 -0
- package/scripts/memory-store-governance.js +60 -0
- package/scripts/meta-agent-loop.js +7 -13
- package/scripts/model-access-eligibility.js +38 -0
- package/scripts/model-migration-readiness.js +55 -0
- package/scripts/native-messaging-audit.js +514 -0
- package/scripts/operational-integrity.js +96 -3
- package/scripts/otel-declarative-config.js +56 -0
- package/scripts/perplexity-client.js +1 -1
- package/scripts/post-training-governance.js +34 -0
- package/scripts/pr-manager.js +47 -7
- package/scripts/private-core-boundary.js +72 -0
- package/scripts/production-agent-readiness.js +40 -0
- package/scripts/profile-router.js +16 -1
- package/scripts/prompt-eval.js +564 -32
- package/scripts/prompt-programs.js +93 -0
- package/scripts/provider-action-normalizer.js +585 -0
- package/scripts/rule-validator.js +285 -0
- package/scripts/scaling-law-claims.js +60 -0
- package/scripts/security-scanner.js +1 -1
- package/scripts/self-distill-agent.js +7 -32
- package/scripts/seo-gsd.js +400 -43
- package/scripts/skill-rag-router.js +53 -0
- package/scripts/spec-gate.js +1 -1
- package/scripts/student-consistent-training.js +73 -0
- package/scripts/synthetic-data-provenance.js +98 -0
- package/scripts/task-context-result.js +81 -0
- package/scripts/telemetry-analytics.js +149 -0
- package/scripts/thompson-sampling.js +2 -2
- package/scripts/token-savings.js +7 -6
- package/scripts/token-tco.js +46 -0
- package/scripts/tool-registry.js +75 -3
- package/scripts/verification-loop.js +10 -1
- package/scripts/verifier-scoring.js +71 -0
- package/scripts/workflow-sentinel.js +284 -28
- package/scripts/workspace-agent-routines.js +118 -0
- package/skills/thumbgate/SKILL.md +1 -1
- package/src/api/server.js +434 -120
- package/.claude-plugin/README.md +0 -170
- package/adapters/README.md +0 -12
- package/scripts/analytics-report.js +0 -328
- package/scripts/autonomous-workflow.js +0 -377
- package/scripts/billing-setup.js +0 -109
- package/scripts/creator-campaigns.js +0 -239
- package/scripts/cross-encoder-reranker.js +0 -235
- package/scripts/daemon-manager.js +0 -108
- package/scripts/decision-trace.js +0 -354
- package/scripts/delegation-runtime.js +0 -896
- package/scripts/dispatch-brief.js +0 -159
- package/scripts/distribution-surfaces.js +0 -110
- package/scripts/feedback-history-distiller.js +0 -382
- package/scripts/funnel-analytics.js +0 -35
- package/scripts/history-distiller.js +0 -200
- package/scripts/hosted-job-launcher.js +0 -256
- package/scripts/intent-router.js +0 -392
- package/scripts/lesson-reranker.js +0 -263
- package/scripts/lesson-retrieval.js +0 -148
- package/scripts/managed-lesson-agent.js +0 -183
- package/scripts/operational-dashboard.js +0 -103
- package/scripts/operational-summary.js +0 -129
- package/scripts/operator-artifacts.js +0 -608
- package/scripts/optimize-context.js +0 -17
- package/scripts/org-dashboard.js +0 -206
- package/scripts/partner-orchestration.js +0 -146
- package/scripts/predictive-insights.js +0 -356
- package/scripts/pulse.js +0 -80
- package/scripts/reflector-agent.js +0 -221
- package/scripts/sales-pipeline.js +0 -681
- package/scripts/session-episode-store.js +0 -329
- package/scripts/session-health-sensor.js +0 -242
- package/scripts/session-report.js +0 -120
- package/scripts/swarm-coordinator.js +0 -81
- package/scripts/tool-kpi-tracker.js +0 -12
- package/scripts/webhook-delivery.js +0 -62
- package/scripts/workflow-sprint-intake.js +0 -475
- package/skills/agent-memory/SKILL.md +0 -97
- package/skills/solve-architecture-autonomy/SKILL.md +0 -17
- package/skills/solve-architecture-autonomy/tool.js +0 -33
- package/skills/thumbgate-feedback/SKILL.md +0 -49
package/bin/cli.js
CHANGED
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
* npx thumbgate import-doc # import a local policy/runbook document and propose gates
|
|
12
12
|
* npx thumbgate export-dpo # export DPO training pairs
|
|
13
13
|
* npx thumbgate export-databricks # export Databricks-ready analytics bundle
|
|
14
|
+
* npx thumbgate eval --from-feedback # turn feedback into reusable prompt evals
|
|
14
15
|
* npx thumbgate stats # feedback analytics + Revenue-at-Risk
|
|
15
16
|
* npx thumbgate cfo # local operational billing summary
|
|
16
17
|
* npx thumbgate pro # solo dashboard + exports side lane
|
|
@@ -528,8 +529,54 @@ function setupForge() {
|
|
|
528
529
|
return true;
|
|
529
530
|
}
|
|
530
531
|
|
|
532
|
+
function setupCline() {
|
|
533
|
+
let changed = false;
|
|
534
|
+
|
|
535
|
+
// 1. Copy .clinerules into project root (Cline auto-loads this at session start).
|
|
536
|
+
const rulesDest = path.join(CWD, '.clinerules');
|
|
537
|
+
const rulesSrc = path.join(PKG_ROOT, 'adapters', 'cline', '.clinerules');
|
|
538
|
+
if (!fs.existsSync(rulesDest) && fs.existsSync(rulesSrc)) {
|
|
539
|
+
fs.copyFileSync(rulesSrc, rulesDest);
|
|
540
|
+
console.log(' Cline: installed .clinerules with ThumbGate gating rules');
|
|
541
|
+
changed = true;
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
// 2. Merge MCP server block into Cline's VS Code globalStorage settings.
|
|
545
|
+
// Extension id: saoudrizwan.claude-dev
|
|
546
|
+
const platform = process.platform;
|
|
547
|
+
let clineSettingsPath = null;
|
|
548
|
+
if (platform === 'darwin') {
|
|
549
|
+
clineSettingsPath = path.join(HOME, 'Library', 'Application Support', 'Code', 'User', 'globalStorage', 'saoudrizwan.claude-dev', 'settings', 'cline_mcp_settings.json');
|
|
550
|
+
} else if (platform === 'linux') {
|
|
551
|
+
clineSettingsPath = path.join(HOME, '.config', 'Code', 'User', 'globalStorage', 'saoudrizwan.claude-dev', 'settings', 'cline_mcp_settings.json');
|
|
552
|
+
} else if (platform === 'win32' && process.env.APPDATA) {
|
|
553
|
+
clineSettingsPath = path.join(process.env.APPDATA, 'Code', 'User', 'globalStorage', 'saoudrizwan.claude-dev', 'settings', 'cline_mcp_settings.json');
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
if (clineSettingsPath) {
|
|
557
|
+
let settings = { mcpServers: {} };
|
|
558
|
+
if (fs.existsSync(clineSettingsPath)) {
|
|
559
|
+
try { settings = JSON.parse(fs.readFileSync(clineSettingsPath, 'utf8')); } catch (_) { settings = { mcpServers: {} }; }
|
|
560
|
+
}
|
|
561
|
+
settings.mcpServers = settings.mcpServers || {};
|
|
562
|
+
const canonicalEntry = canonicalMcpEntry('home');
|
|
563
|
+
if (!mcpEntriesMatch(settings.mcpServers[MCP_SERVER_NAME], canonicalEntry)) {
|
|
564
|
+
settings.mcpServers[MCP_SERVER_NAME] = canonicalEntry;
|
|
565
|
+
fs.mkdirSync(path.dirname(clineSettingsPath), { recursive: true });
|
|
566
|
+
fs.writeFileSync(clineSettingsPath, JSON.stringify(settings, null, 2) + '\n');
|
|
567
|
+
console.log(` Cline: registered thumbgate MCP server in ${clineSettingsPath}`);
|
|
568
|
+
changed = true;
|
|
569
|
+
}
|
|
570
|
+
} else {
|
|
571
|
+
console.log(' Cline: unsupported platform for auto-wiring MCP settings; see adapters/cline/INSTALL.md');
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
return changed;
|
|
575
|
+
}
|
|
576
|
+
|
|
531
577
|
function detectAgent(projectDir) {
|
|
532
578
|
if (fs.existsSync(path.join(projectDir, '.claude'))) return 'claude-code';
|
|
579
|
+
if (fs.existsSync(path.join(projectDir, '.clinerules'))) return 'cline';
|
|
533
580
|
if (fs.existsSync(path.join(projectDir, '.cursorrules'))) return 'cursor';
|
|
534
581
|
if (fs.existsSync(path.join(projectDir, '.cursor'))) return 'cursor';
|
|
535
582
|
if (fs.existsSync(path.join(projectDir, '.codex'))) return 'codex';
|
|
@@ -670,6 +717,12 @@ function init(cliArgs = parseArgs(process.argv.slice(3))) {
|
|
|
670
717
|
{ name: 'Amp', detect: [() => whichExists('amp'), () => fs.existsSync(path.join(HOME, '.amp'))], setup: setupAmp },
|
|
671
718
|
{ name: 'Cursor', detect: [() => fs.existsSync(path.join(HOME, '.cursor', 'mcp.json')), () => fs.existsSync(path.join(CWD, '.cursor'))], setup: setupCursor },
|
|
672
719
|
{ name: 'ForgeCode', detect: [() => whichExists('forge'), () => fs.existsSync(path.join(CWD, 'forge.yaml'))], setup: setupForge },
|
|
720
|
+
{ name: 'Cline', detect: [
|
|
721
|
+
() => fs.existsSync(path.join(CWD, '.clinerules')),
|
|
722
|
+
() => process.platform === 'darwin' && fs.existsSync(path.join(HOME, 'Library', 'Application Support', 'Code', 'User', 'globalStorage', 'saoudrizwan.claude-dev')),
|
|
723
|
+
() => process.platform === 'linux' && fs.existsSync(path.join(HOME, '.config', 'Code', 'User', 'globalStorage', 'saoudrizwan.claude-dev')),
|
|
724
|
+
() => process.platform === 'win32' && process.env.APPDATA && fs.existsSync(path.join(process.env.APPDATA, 'Code', 'User', 'globalStorage', 'saoudrizwan.claude-dev')),
|
|
725
|
+
], setup: setupCline },
|
|
673
726
|
];
|
|
674
727
|
|
|
675
728
|
for (const p of platforms) {
|
|
@@ -690,8 +743,11 @@ function init(cliArgs = parseArgs(process.argv.slice(3))) {
|
|
|
690
743
|
|
|
691
744
|
if (configured === 0) console.log(' All detected platforms already configured.');
|
|
692
745
|
|
|
693
|
-
//
|
|
694
|
-
|
|
746
|
+
// Cline uses .clinerules (no native hook surface). Run setupCline directly
|
|
747
|
+
// and skip wireHooks, which does not support cline.
|
|
748
|
+
if (args.agent === 'cline') {
|
|
749
|
+
setupCline();
|
|
750
|
+
} else if (args.agent || args['wire-hooks']) {
|
|
695
751
|
const { wireHooks } = require(path.join(PKG_ROOT, 'scripts', 'auto-wire-hooks'));
|
|
696
752
|
const hookResult = wireHooks({ agent: args.agent, dryRun: args['dry-run'] });
|
|
697
753
|
if (hookResult.error) {
|
|
@@ -999,8 +1055,8 @@ function pro() {
|
|
|
999
1055
|
console.log(' Launch dashboard: npx thumbgate pro');
|
|
1000
1056
|
console.log(' Activate + run : npx thumbgate pro --activate --key=YOUR_KEY');
|
|
1001
1057
|
console.log(' Install configs : npx thumbgate pro --upgrade');
|
|
1002
|
-
console.log('
|
|
1003
|
-
console.log('
|
|
1058
|
+
console.log(' Private core : ThumbGate-Core (private repo)');
|
|
1059
|
+
console.log(' Core repo : https://github.com/IgorGanapolsky/ThumbGate-Core\n');
|
|
1004
1060
|
}
|
|
1005
1061
|
|
|
1006
1062
|
function launchDashboard(key, eventType) {
|
|
@@ -1164,6 +1220,27 @@ function modelFit() {
|
|
|
1164
1220
|
console.log(JSON.stringify({ reportPath, report }, null, 2));
|
|
1165
1221
|
}
|
|
1166
1222
|
|
|
1223
|
+
function modelCandidatesCmd() {
|
|
1224
|
+
const args = parseArgs(process.argv.slice(3));
|
|
1225
|
+
const { writeModelCandidatesReport, renderModelCandidatesReport } = require(path.join(PKG_ROOT, 'scripts', 'model-candidates'));
|
|
1226
|
+
const maxCandidates = args.max ? Number(args.max) : undefined;
|
|
1227
|
+
const { reportPath, report } = writeModelCandidatesReport(undefined, {
|
|
1228
|
+
workload: args.workload,
|
|
1229
|
+
provider: args.provider,
|
|
1230
|
+
family: args.family,
|
|
1231
|
+
gateway: args.gateway,
|
|
1232
|
+
maxCandidates: Number.isFinite(maxCandidates) ? maxCandidates : undefined,
|
|
1233
|
+
});
|
|
1234
|
+
|
|
1235
|
+
if (args.json) {
|
|
1236
|
+
console.log(JSON.stringify({ reportPath, report }, null, 2));
|
|
1237
|
+
return;
|
|
1238
|
+
}
|
|
1239
|
+
|
|
1240
|
+
process.stdout.write(renderModelCandidatesReport(report));
|
|
1241
|
+
process.stdout.write(`\nReport path: ${reportPath}\n`);
|
|
1242
|
+
}
|
|
1243
|
+
|
|
1167
1244
|
function risk() {
|
|
1168
1245
|
const args = parseArgs(process.argv.slice(3));
|
|
1169
1246
|
const riskScorer = require(path.join(PKG_ROOT, 'scripts', 'risk-scorer'));
|
|
@@ -1475,6 +1552,52 @@ function gateStats() {
|
|
|
1475
1552
|
console.log('\n' + formatStats(stats) + '\n');
|
|
1476
1553
|
}
|
|
1477
1554
|
|
|
1555
|
+
function harnessAudit() {
|
|
1556
|
+
const args = parseArgs(process.argv.slice(3));
|
|
1557
|
+
const { buildHarnessOptimizationAudit } = require(path.join(PKG_ROOT, 'scripts', 'harness-selector'));
|
|
1558
|
+
const audit = buildHarnessOptimizationAudit({
|
|
1559
|
+
rootDir: CWD,
|
|
1560
|
+
docTokenBudget: args['doc-token-budget'],
|
|
1561
|
+
});
|
|
1562
|
+
|
|
1563
|
+
if (args.json) {
|
|
1564
|
+
console.log(JSON.stringify(audit, null, 2));
|
|
1565
|
+
return;
|
|
1566
|
+
}
|
|
1567
|
+
|
|
1568
|
+
console.log('\nThumbGate Harness Optimization Audit');
|
|
1569
|
+
console.log(`Status : ${audit.status}`);
|
|
1570
|
+
console.log(`Score : ${audit.score}/100`);
|
|
1571
|
+
console.log(`Docs : ~${audit.totals.globalDocEstimatedTokens} tokens across global agent docs`);
|
|
1572
|
+
console.log(`MCP : ${audit.totals.mcpToolCount} indexed tools; progressive discovery ${audit.signals.progressiveToolIndexPresent ? 'on' : 'missing'}`);
|
|
1573
|
+
console.log(`Gates : ${audit.totals.specializedHarnessCount} specialized harnesses`);
|
|
1574
|
+
console.log('\nRecommendations:');
|
|
1575
|
+
for (const recommendation of audit.recommendations) {
|
|
1576
|
+
console.log(` - ${recommendation}`);
|
|
1577
|
+
}
|
|
1578
|
+
console.log('');
|
|
1579
|
+
}
|
|
1580
|
+
|
|
1581
|
+
function nativeMessagingAudit() {
|
|
1582
|
+
const args = parseArgs(process.argv.slice(3));
|
|
1583
|
+
const {
|
|
1584
|
+
buildNativeMessagingAudit,
|
|
1585
|
+
formatNativeMessagingAudit,
|
|
1586
|
+
} = require(path.join(PKG_ROOT, 'scripts', 'native-messaging-audit'));
|
|
1587
|
+
const report = buildNativeMessagingAudit({
|
|
1588
|
+
homeDir: args['home-dir'],
|
|
1589
|
+
platform: args.platform,
|
|
1590
|
+
aiOnly: args['ai-only'] === true,
|
|
1591
|
+
});
|
|
1592
|
+
|
|
1593
|
+
if (args.json) {
|
|
1594
|
+
console.log(JSON.stringify(report, null, 2));
|
|
1595
|
+
return;
|
|
1596
|
+
}
|
|
1597
|
+
|
|
1598
|
+
process.stdout.write(formatNativeMessagingAudit(report));
|
|
1599
|
+
}
|
|
1600
|
+
|
|
1478
1601
|
function optimize() {
|
|
1479
1602
|
const { optimize: doOptimize } = require(path.join(PKG_ROOT, 'scripts', 'optimize-context'));
|
|
1480
1603
|
doOptimize();
|
|
@@ -1550,7 +1673,14 @@ function hookAutoCapture() {
|
|
|
1550
1673
|
const prompt = process.env.CLAUDE_USER_PROMPT || process.env.THUMBGATE_USER_PROMPT || readStdinText().trim();
|
|
1551
1674
|
const { evaluatePromptGuard } = require(path.join(PKG_ROOT, 'scripts', 'prompt-guard'));
|
|
1552
1675
|
const { processInlineFeedback, formatCliOutput } = require(path.join(PKG_ROOT, 'scripts', 'cli-feedback'));
|
|
1553
|
-
const {
|
|
1676
|
+
const { loadOptionalModule } = require(path.join(PKG_ROOT, 'scripts', 'private-core-boundary'));
|
|
1677
|
+
const { recordConversationEntry, readRecentConversationWindow } = loadOptionalModule(
|
|
1678
|
+
path.join(PKG_ROOT, 'scripts', 'feedback-history-distiller'),
|
|
1679
|
+
() => ({
|
|
1680
|
+
recordConversationEntry: () => ({ recorded: false, reason: 'thumbgate_core_required' }),
|
|
1681
|
+
readRecentConversationWindow: () => [],
|
|
1682
|
+
})
|
|
1683
|
+
);
|
|
1554
1684
|
|
|
1555
1685
|
recordConversationEntry({
|
|
1556
1686
|
author: 'user',
|
|
@@ -1708,6 +1838,60 @@ function gateStats() {
|
|
|
1708
1838
|
console.log('\n' + formatStats(stats) + '\n');
|
|
1709
1839
|
}
|
|
1710
1840
|
|
|
1841
|
+
function evalCmd() {
|
|
1842
|
+
syncActiveProjectContext();
|
|
1843
|
+
const args = parseArgs(process.argv.slice(3));
|
|
1844
|
+
const {
|
|
1845
|
+
formatProofReport,
|
|
1846
|
+
runFeedbackEvalSuite,
|
|
1847
|
+
runSuite,
|
|
1848
|
+
loadSuite,
|
|
1849
|
+
} = require(path.join(PKG_ROOT, 'scripts', 'prompt-eval'));
|
|
1850
|
+
const minScore = args['min-score'] === undefined ? 80 : Number(args['min-score']);
|
|
1851
|
+
const maxCases = args['max-cases'] === undefined ? 25 : Number(args['max-cases']);
|
|
1852
|
+
const suitePath = args.suite ? path.resolve(CWD, args.suite) : null;
|
|
1853
|
+
const fromFeedback = Boolean(args['from-feedback'] || !suitePath);
|
|
1854
|
+
const evalRun = fromFeedback
|
|
1855
|
+
? runFeedbackEvalSuite({
|
|
1856
|
+
feedbackDir: args['feedback-dir'] ? path.resolve(CWD, args['feedback-dir']) : undefined,
|
|
1857
|
+
feedbackLog: args['feedback-log'] ? path.resolve(CWD, args['feedback-log']) : undefined,
|
|
1858
|
+
maxCases,
|
|
1859
|
+
minScore,
|
|
1860
|
+
})
|
|
1861
|
+
: {
|
|
1862
|
+
suite: loadSuite(suitePath),
|
|
1863
|
+
report: runSuite(suitePath, { minScore }),
|
|
1864
|
+
};
|
|
1865
|
+
const { suite, report } = evalRun;
|
|
1866
|
+
|
|
1867
|
+
if (args['write-suite']) {
|
|
1868
|
+
const outputPath = path.resolve(CWD, args['write-suite']);
|
|
1869
|
+
fs.mkdirSync(path.dirname(outputPath), { recursive: true });
|
|
1870
|
+
fs.writeFileSync(outputPath, `${JSON.stringify(suite, null, 2)}\n`, 'utf8');
|
|
1871
|
+
}
|
|
1872
|
+
|
|
1873
|
+
if (args['write-report']) {
|
|
1874
|
+
const outputPath = path.resolve(CWD, args['write-report']);
|
|
1875
|
+
fs.mkdirSync(path.dirname(outputPath), { recursive: true });
|
|
1876
|
+
fs.writeFileSync(outputPath, `${formatProofReport(report, suite)}\n`, 'utf8');
|
|
1877
|
+
}
|
|
1878
|
+
|
|
1879
|
+
if (args.json) {
|
|
1880
|
+
console.log(JSON.stringify({ ...report, suiteDefinition: fromFeedback ? suite : undefined }, null, 2));
|
|
1881
|
+
process.exit(report.pass ? 0 : 1);
|
|
1882
|
+
}
|
|
1883
|
+
|
|
1884
|
+
console.log(`\nPrompt Evaluation: ${report.suite}`);
|
|
1885
|
+
console.log('─'.repeat(50));
|
|
1886
|
+
console.log(` Score : ${report.score}% (min ${report.minScore}%)`);
|
|
1887
|
+
console.log(` Cases : ${report.passed}/${report.total} passing`);
|
|
1888
|
+
console.log(` Failures : ${report.failed}`);
|
|
1889
|
+
console.log(` Errors : ${report.errors}`);
|
|
1890
|
+
console.log(` Source : ${fromFeedback ? 'feedback-derived' : suitePath}`);
|
|
1891
|
+
console.log(report.pass ? '\n✅ PASS\n' : '\n❌ FAIL\n');
|
|
1892
|
+
process.exit(report.pass ? 0 : 1);
|
|
1893
|
+
}
|
|
1894
|
+
|
|
1711
1895
|
function startApi() {
|
|
1712
1896
|
const serverPath = path.join(PKG_ROOT, 'src', 'api', 'server.js');
|
|
1713
1897
|
try {
|
|
@@ -1724,13 +1908,13 @@ function help() {
|
|
|
1724
1908
|
const GROUP_LABELS = {
|
|
1725
1909
|
capture: 'Feedback capture',
|
|
1726
1910
|
discovery: 'Discovery & inspection',
|
|
1727
|
-
gates: '
|
|
1911
|
+
gates: 'Checks & rules',
|
|
1728
1912
|
export: 'Export',
|
|
1729
1913
|
ops: 'Operations',
|
|
1730
1914
|
advanced: 'Advanced',
|
|
1731
1915
|
};
|
|
1732
1916
|
|
|
1733
|
-
console.log(`thumbgate v${v} — pre-action
|
|
1917
|
+
console.log(`thumbgate v${v} — pre-action checks for AI coding agents`);
|
|
1734
1918
|
console.log('');
|
|
1735
1919
|
|
|
1736
1920
|
for (const [groupKey, label] of Object.entries(GROUP_LABELS)) {
|
|
@@ -1760,8 +1944,10 @@ function help() {
|
|
|
1760
1944
|
console.log(' repair-github-marketplace Repair legacy GitHub Marketplace amount mappings');
|
|
1761
1945
|
console.log(' north-star Show proof-backed workflow-run progress toward the North Star');
|
|
1762
1946
|
console.log(' model-fit Detect local embedding profile and write evidence report');
|
|
1947
|
+
console.log(' model-candidates Rank managed model candidates and emit a benchmark plan');
|
|
1763
1948
|
console.log(' risk Train or query the boosted local risk scorer');
|
|
1764
|
-
console.log('
|
|
1949
|
+
console.log(' eval Turn feedback into reusable prompt/workflow eval proof');
|
|
1950
|
+
console.log(' optimize [PRO] Prune CLAUDE.md and migrate rules to Pre-Action Checks');
|
|
1765
1951
|
console.log(' prove [--target=X] Run proof harness (adapters|automation|...)');
|
|
1766
1952
|
console.log(' watch Watch .thumbgate/ for external signals');
|
|
1767
1953
|
console.log(' status Approval trend + failure domain dashboard');
|
|
@@ -1792,6 +1978,7 @@ function help() {
|
|
|
1792
1978
|
console.log(' npx thumbgate explore gates --json');
|
|
1793
1979
|
console.log(' npx thumbgate demo');
|
|
1794
1980
|
console.log(' npx thumbgate stats --json');
|
|
1981
|
+
console.log(' npx thumbgate eval --from-feedback --json');
|
|
1795
1982
|
console.log(' npx thumbgate lessons "force push" --json');
|
|
1796
1983
|
console.log(' npx thumbgate lessons --query="deploy" --remote');
|
|
1797
1984
|
console.log(' npx thumbgate gate-stats --json');
|
|
@@ -1933,6 +2120,10 @@ switch (COMMAND) {
|
|
|
1933
2120
|
case 'model-fit':
|
|
1934
2121
|
modelFit();
|
|
1935
2122
|
break;
|
|
2123
|
+
case 'model-candidates':
|
|
2124
|
+
case 'managed-models':
|
|
2125
|
+
modelCandidatesCmd();
|
|
2126
|
+
break;
|
|
1936
2127
|
case 'risk':
|
|
1937
2128
|
risk();
|
|
1938
2129
|
break;
|
|
@@ -1969,6 +2160,14 @@ switch (COMMAND) {
|
|
|
1969
2160
|
case 'rules':
|
|
1970
2161
|
rules();
|
|
1971
2162
|
break;
|
|
2163
|
+
case 'harness-audit':
|
|
2164
|
+
case 'harness':
|
|
2165
|
+
harnessAudit();
|
|
2166
|
+
break;
|
|
2167
|
+
case 'native-messaging-audit':
|
|
2168
|
+
case 'bridge-audit':
|
|
2169
|
+
nativeMessagingAudit();
|
|
2170
|
+
break;
|
|
1972
2171
|
case 'optimize':
|
|
1973
2172
|
optimize();
|
|
1974
2173
|
break;
|
|
@@ -2064,6 +2263,10 @@ switch (COMMAND) {
|
|
|
2064
2263
|
case 'gate-stats':
|
|
2065
2264
|
gateStats();
|
|
2066
2265
|
break;
|
|
2266
|
+
case 'eval':
|
|
2267
|
+
case 'prompt-eval':
|
|
2268
|
+
evalCmd();
|
|
2269
|
+
break;
|
|
2067
2270
|
case 'explore': {
|
|
2068
2271
|
const subCmd = process.argv[3];
|
|
2069
2272
|
const exploreArgs = parseArgs(process.argv.slice(3));
|
package/config/enforcement.json
CHANGED
|
@@ -1,20 +1,72 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "./enforcement.schema.json",
|
|
3
|
-
"description": "Loss matrix and enforcement knobs for the Bayes-optimal pre-tool-use gate. See scripts/bayes-optimal-gate.js for the decision math. Tags listed here mirror the canonical tags emitted by risk-scorer.buildPatternSummary. To disable tag-specific costs and fall back to a symmetric 1:1 decision, reduce any override to 1.0.",
|
|
3
|
+
"description": "Loss matrix and enforcement knobs for the Bayes-optimal pre-tool-use gate. See scripts/bayes-optimal-gate.js for the decision math. Tags listed here mirror the canonical tags emitted by risk-scorer.buildPatternSummary. Costs are relative: falseAllow[tag] is the regret of letting a harmful tool call through, falseBlock[tag] is the regret of blocking a safe one. resolveCost takes the max across matched tags, so a single high-cost tag dominates. To disable tag-specific costs and fall back to a symmetric 1:1 decision, reduce any override to 1.0.",
|
|
4
4
|
"lossMatrix": {
|
|
5
5
|
"falseAllow": {
|
|
6
6
|
"default": 1.0,
|
|
7
|
-
|
|
8
|
-
"destructive": 50.0,
|
|
7
|
+
|
|
9
8
|
"secrets": 1000.0,
|
|
10
|
-
"force-push-main": 200.0,
|
|
11
|
-
"data-loss": 500.0,
|
|
12
9
|
"credentials": 800.0,
|
|
10
|
+
"env-file-edit": 700.0,
|
|
11
|
+
"env-override": 700.0,
|
|
12
|
+
"deploy-env-secret-exposure": 900.0,
|
|
13
|
+
|
|
14
|
+
"self-protect": 1500.0,
|
|
15
|
+
"kill-gate": 1500.0,
|
|
16
|
+
"hooks-disable": 1200.0,
|
|
17
|
+
"config-tamper": 1200.0,
|
|
18
|
+
|
|
19
|
+
"data-loss": 500.0,
|
|
20
|
+
"db-drop-production": 600.0,
|
|
21
|
+
"db-truncate-production": 600.0,
|
|
22
|
+
"db-delete-nowhere": 500.0,
|
|
23
|
+
"db-unmigrated-sql": 400.0,
|
|
24
|
+
"db-runtime-sqlite": 350.0,
|
|
25
|
+
"db-lancedb-wipe": 400.0,
|
|
26
|
+
"mcp-sql-delete": 400.0,
|
|
27
|
+
"mcp-sql-bulk-update": 250.0,
|
|
28
|
+
|
|
29
|
+
"destructive": 50.0,
|
|
13
30
|
"rm-rf": 300.0,
|
|
14
|
-
"git-reset-hard": 100.0
|
|
31
|
+
"git-reset-hard": 100.0,
|
|
32
|
+
"force-push-main": 200.0,
|
|
33
|
+
"force-push": 150.0,
|
|
34
|
+
"protected-branch-push": 150.0,
|
|
35
|
+
"protected-file": 120.0,
|
|
36
|
+
"package-lock-reset": 75.0,
|
|
37
|
+
|
|
38
|
+
"deploy-prod": 100.0,
|
|
39
|
+
"deploy-unverified": 120.0,
|
|
40
|
+
"deploy-skip-ci": 150.0,
|
|
41
|
+
"deploy-publish-without-test": 180.0,
|
|
42
|
+
"deploy-version-drift": 90.0,
|
|
43
|
+
"production-change": 130.0,
|
|
44
|
+
"schema-migration": 150.0,
|
|
45
|
+
"permission-change": 140.0,
|
|
46
|
+
|
|
47
|
+
"supply-chain": 200.0,
|
|
48
|
+
"supply-chain-add": 200.0,
|
|
49
|
+
"unverified-skill": 160.0,
|
|
50
|
+
"blocked-npx": 180.0,
|
|
51
|
+
"network-egress": 250.0,
|
|
52
|
+
"unauthorized-egress": 250.0,
|
|
53
|
+
|
|
54
|
+
"pr-scope-violation": 80.0,
|
|
55
|
+
"admin-merge-bypass": 80.0,
|
|
56
|
+
"loop-abuse": 40.0,
|
|
57
|
+
"thread-unchecked-push": 40.0,
|
|
58
|
+
"generated-file-edit": 30.0,
|
|
59
|
+
"test-skip": 40.0,
|
|
60
|
+
"version-drift": 50.0,
|
|
61
|
+
"lockfile-manual": 60.0
|
|
15
62
|
},
|
|
16
63
|
"falseBlock": {
|
|
17
|
-
"default": 1.0
|
|
64
|
+
"default": 1.0,
|
|
65
|
+
|
|
66
|
+
"style-violation": 5.0,
|
|
67
|
+
"console-log-commit": 4.0,
|
|
68
|
+
"non-critical-warning": 5.0,
|
|
69
|
+
"large-file": 3.0
|
|
18
70
|
}
|
|
19
71
|
},
|
|
20
72
|
"bayesOptimalEnabled": true,
|