thumbgate 1.25.2 → 1.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/mcp/server-card.json +1 -1
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +2 -2
- package/adapters/opencode/opencode.json +1 -1
- package/package.json +5 -2
- package/public/index.html +2 -2
- package/public/numbers.html +2 -2
- package/scripts/context-manager.js +10 -0
- package/scripts/gates-engine.js +40 -0
- package/scripts/install-shim.js +84 -0
- package/scripts/plan-gate.js +232 -0
- package/scripts/thompson-sampling.js +20 -5
- package/scripts/trajectory-scorer.js +63 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "thumbgate-marketplace",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.26.0",
|
|
4
4
|
"owner": {
|
|
5
5
|
"name": "Igor Ganapolsky",
|
|
6
6
|
"email": "ig5973700@gmail.com"
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"source": "npm",
|
|
15
15
|
"package": "thumbgate"
|
|
16
16
|
},
|
|
17
|
-
"version": "1.
|
|
17
|
+
"version": "1.26.0",
|
|
18
18
|
"author": {
|
|
19
19
|
"name": "Igor Ganapolsky",
|
|
20
20
|
"email": "ig5973700@gmail.com",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "thumbgate",
|
|
3
3
|
"description": "One 👎 becomes a hard rule the agent cannot bypass. Captures thumbs-down feedback, distills it into PreToolUse Pre-Action Checks, enforced across every future Claude Code session.",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.26.0",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Igor Ganapolsky",
|
|
7
7
|
"email": "ig5973700@gmail.com",
|
|
@@ -2,13 +2,13 @@
|
|
|
2
2
|
"mcpServers": {
|
|
3
3
|
"thumbgate": {
|
|
4
4
|
"command": "npx",
|
|
5
|
-
"args": ["--yes", "--package", "thumbgate@1.
|
|
5
|
+
"args": ["--yes", "--package", "thumbgate@1.26.0", "thumbgate", "serve"]
|
|
6
6
|
}
|
|
7
7
|
},
|
|
8
8
|
"hooks": {
|
|
9
9
|
"preToolUse": {
|
|
10
10
|
"command": "npx",
|
|
11
|
-
"args": ["--yes", "--package", "thumbgate@1.
|
|
11
|
+
"args": ["--yes", "--package", "thumbgate@1.26.0", "thumbgate", "gate-check"]
|
|
12
12
|
}
|
|
13
13
|
}
|
|
14
14
|
}
|
|
@@ -216,7 +216,7 @@ const {
|
|
|
216
216
|
finalizeSession: finalizeFeedbackSession,
|
|
217
217
|
} = require('../../scripts/feedback-session');
|
|
218
218
|
|
|
219
|
-
const SERVER_INFO = { name: 'thumbgate-mcp', version: '1.
|
|
219
|
+
const SERVER_INFO = { name: 'thumbgate-mcp', version: '1.26.0' };
|
|
220
220
|
const COMMERCE_CATEGORIES = [
|
|
221
221
|
'product_recommendation',
|
|
222
222
|
'brand_compliance',
|
|
@@ -637,7 +637,7 @@ function buildEstimateUncertaintyResponse(args = {}) {
|
|
|
637
637
|
|
|
638
638
|
async function callTool(name, args = {}) {
|
|
639
639
|
assertToolAllowed(name, getActiveMcpProfile());
|
|
640
|
-
if (name !== 'workflow_sentinel') {
|
|
640
|
+
if (name !== 'workflow_sentinel' && process.env.THUMBGATE_DISABLE_MCP_FIREWALL !== '1') {
|
|
641
641
|
const firewallResult = (await evaluateGatesAsync(name, args)) || evaluateSecretGuard({ tool_name: name, tool_input: args });
|
|
642
642
|
if (firewallResult && firewallResult.decision === 'deny') {
|
|
643
643
|
const err = new Error(`Action blocked by Semantic Firewall: ${firewallResult.message}`);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "thumbgate",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.26.0",
|
|
4
4
|
"description": "ThumbGate self-improving agent governance: thumbs-up/down turns every mistake into a prevention rule and blocks repeat patterns. 36 pre-action checks, budget enforcement, and self-protection for Claude Code, Cursor, Codex, Gemini CLI, and Amp.",
|
|
5
5
|
"homepage": "https://thumbgate.ai",
|
|
6
6
|
"repository": {
|
|
@@ -101,6 +101,7 @@
|
|
|
101
101
|
"scripts/hybrid-feedback-context.js",
|
|
102
102
|
"scripts/hybrid-supervisor-agent.js",
|
|
103
103
|
"scripts/inference-cache-policy.js",
|
|
104
|
+
"scripts/install-shim.js",
|
|
104
105
|
"scripts/install-mcp.js",
|
|
105
106
|
"scripts/internal-agent-bootstrap.js",
|
|
106
107
|
"scripts/intervention-policy.js",
|
|
@@ -136,6 +137,7 @@
|
|
|
136
137
|
"scripts/oss-pr-opportunity-scout.js",
|
|
137
138
|
"scripts/otel-declarative-config.js",
|
|
138
139
|
"scripts/perplexity-client.js",
|
|
140
|
+
"scripts/plan-gate.js",
|
|
139
141
|
"scripts/plausible-server-events.js",
|
|
140
142
|
"scripts/pr-manager.js",
|
|
141
143
|
"scripts/private-core-boundary.js",
|
|
@@ -189,6 +191,7 @@
|
|
|
189
191
|
"scripts/thompson-sampling.js",
|
|
190
192
|
"scripts/thumbgate-bench.js",
|
|
191
193
|
"scripts/thumbgate-search.js",
|
|
194
|
+
"scripts/trajectory-scorer.js",
|
|
192
195
|
"scripts/token-savings.js",
|
|
193
196
|
"scripts/token-tco.js",
|
|
194
197
|
"scripts/tool-registry.js",
|
|
@@ -687,7 +690,7 @@
|
|
|
687
690
|
"test:dashboard-page-clickability": "playwright test tests/e2e/dashboard-page-clickability.spec.js",
|
|
688
691
|
"test:agent-manager-page-clickability": "playwright test tests/e2e/agent-manager-page-clickability.spec.js",
|
|
689
692
|
"test:pricing-page-clickability": "playwright test tests/e2e/pricing-page-clickability.spec.js",
|
|
690
|
-
"test:proof:truth": "node --test tests/knowledge-entropy.test.js tests/mcp-wiring-doctor.test.js tests/sequence-guard.test.js tests/slopsquat-guard.test.js tests/slopsquat-stress.test.js tests/truth-and-proof.test.js tests/wire-proof-gate.test.js tests/adaptive-reliability.test.js",
|
|
693
|
+
"test:proof:truth": "node --test tests/knowledge-entropy.test.js tests/mcp-wiring-doctor.test.js tests/sequence-guard.test.js tests/slopsquat-guard.test.js tests/slopsquat-stress.test.js tests/truth-and-proof.test.js tests/wire-proof-gate.test.js tests/adaptive-reliability.test.js tests/coderabbit-patterns.test.js",
|
|
691
694
|
"build:grok-plugin": "node scripts/build-grok-plugin.js",
|
|
692
695
|
"promote:launch": "node scripts/x-autonomous-marketing.js",
|
|
693
696
|
"feedback:ingest": "node scripts/ingest-manual-feedback.js",
|
package/public/index.html
CHANGED
|
@@ -20,7 +20,7 @@ __GOOGLE_SITE_VERIFICATION_META__
|
|
|
20
20
|
<meta property="og:image" content="https://thumbgate.ai/og.png">
|
|
21
21
|
<meta name="twitter:card" content="summary_large_image">
|
|
22
22
|
<meta name="twitter:image" content="https://thumbgate.ai/og.png">
|
|
23
|
-
<meta name="thumbgate-version" content="1.
|
|
23
|
+
<meta name="thumbgate-version" content="1.26.0">
|
|
24
24
|
<meta name="keywords" content="ThumbGate, thumbgate, AI agent orchestration, AI experience orchestration, agentic development cycle, AC/DC framework, Guide Generate Verify Solve, agent enforcement layer, save LLM tokens, reduce Claude API cost, reduce OpenAI cost, AI agent token savings, prevent LLM retries, prevent hallucination retries, stop AI token waste, pre-action checks, agent governance, Claude Code, Cursor, Codex, Gemini, Amp, Cline, OpenCode, workflow hardening, context engineering, AI authenticity, brand authenticity AI">
|
|
25
25
|
<link rel="canonical" href="__APP_ORIGIN__/">
|
|
26
26
|
<link rel="alternate" type="text/markdown" title="ThumbGate LLM context" href="__APP_ORIGIN__/llm-context.md">
|
|
@@ -1586,7 +1586,7 @@ __GA_BOOTSTRAP__
|
|
|
1586
1586
|
<a href="https://www.linkedin.com/in/igorganapolsky" target="_blank" rel="noopener">LinkedIn</a>
|
|
1587
1587
|
<a href="/blog">Blog</a>
|
|
1588
1588
|
</div>
|
|
1589
|
-
<span class="footer-copy">© 2026 ThumbGate · MIT License · npm v1.
|
|
1589
|
+
<span class="footer-copy">© 2026 ThumbGate · MIT License · npm v1.26.0</span>
|
|
1590
1590
|
</div>
|
|
1591
1591
|
</footer>
|
|
1592
1592
|
|
package/public/numbers.html
CHANGED
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
"alternateName": "thumbgate",
|
|
26
26
|
"applicationCategory": "DeveloperApplication",
|
|
27
27
|
"operatingSystem": "Cross-platform, Node.js >=18.18.0",
|
|
28
|
-
"softwareVersion": "1.
|
|
28
|
+
"softwareVersion": "1.26.0",
|
|
29
29
|
"url": "https://thumbgate.ai/numbers",
|
|
30
30
|
"dateModified": "2026-05-07",
|
|
31
31
|
"creator": {
|
|
@@ -202,7 +202,7 @@
|
|
|
202
202
|
<main class="container">
|
|
203
203
|
<h1>The Numbers</h1>
|
|
204
204
|
<p class="subtitle">Generated first-party operational snapshot from the ThumbGate runtime. This is not customer traction, install volume, revenue, or proof that a configured gate has fired.</p>
|
|
205
|
-
<div class="freshness">Updated: 2026-05-07 · Version 1.
|
|
205
|
+
<div class="freshness">Updated: 2026-05-07 · Version 1.26.0</div>
|
|
206
206
|
<div class="truth-note"><strong>Read this first:</strong> configured checks are inventory. Recorded blocks and warnings are usage evidence. This snapshot currently reports 0 recorded hard-block event(s) and 0 recorded warning event(s).</div>
|
|
207
207
|
|
|
208
208
|
<h2>Gate enforcement</h2>
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
'use strict';
|
|
3
3
|
|
|
4
|
+
const fs = require('fs');
|
|
5
|
+
const path = require('path');
|
|
6
|
+
|
|
4
7
|
/**
|
|
5
8
|
* Context Manager — Unified Context-Augmented Generation (CAG) Orchestrator
|
|
6
9
|
*
|
|
@@ -248,6 +251,13 @@ function assembleUnifiedContext(params = {}) {
|
|
|
248
251
|
reliabilityDirective = 'CAUTION: Conflicting past patterns detected for this action. Prioritize absolute ground truth verification over rapid completion.';
|
|
249
252
|
}
|
|
250
253
|
|
|
254
|
+
// v1.26.0: CodeRabbit Planning Directive
|
|
255
|
+
const planPath = path.join(repoPath || process.cwd(), 'PLAN.md');
|
|
256
|
+
if (!fs.existsSync(planPath) && ['Bash', 'Write', 'Edit', 'Deploy'].includes(toolName)) {
|
|
257
|
+
const planReminder = 'ORCHESTRATION: High-risk action detected without a PLAN.md. Please document your intent, assumptions, and verification steps before proceeding.';
|
|
258
|
+
reliabilityDirective = reliabilityDirective ? `${reliabilityDirective}\n\n${planReminder}` : planReminder;
|
|
259
|
+
}
|
|
260
|
+
|
|
251
261
|
const result = {
|
|
252
262
|
tier,
|
|
253
263
|
agentType: agentType || 'default',
|
package/scripts/gates-engine.js
CHANGED
|
@@ -55,6 +55,8 @@ const {
|
|
|
55
55
|
const {
|
|
56
56
|
evaluateSecurityScan,
|
|
57
57
|
} = require('./security-scanner');
|
|
58
|
+
const { evaluatePlanGate } = require('./plan-gate');
|
|
59
|
+
const { getTrajectoryScore } = require('./trajectory-scorer');
|
|
58
60
|
const { evaluateSequenceState } = loadOptionalModule('./sequence-guard', () => ({
|
|
59
61
|
evaluateSequenceState: () => null,
|
|
60
62
|
}));
|
|
@@ -91,6 +93,10 @@ const REMOTE_SIDE_EFFECT_BASH_PATTERN = /\b(?:git\s+push\b|gh\s+pr\s+(?:create|m
|
|
|
91
93
|
const BOOSTED_RISK_BLOCK_SCORE = 0.8;
|
|
92
94
|
const BOOSTED_RISK_MIN_EXAMPLES = 3;
|
|
93
95
|
const PR_THREAD_RESOLUTION_ACTION = 'pr_thread_resolution_verified_after_commit';
|
|
96
|
+
|
|
97
|
+
function isRuntimePlanGateEnabled() {
|
|
98
|
+
return process.env.THUMBGATE_PLAN_GATE === '1' || process.env.THUMBGATE_PLAN_GATE === 'true';
|
|
99
|
+
}
|
|
94
100
|
const PR_THREAD_RESOLUTION_CLAIM_PATTERN = '(?:thread|review|comment).*?(?:resolved|verified|checked|addressed|fixed)|(?:resolved|verified|checked|addressed|fixed).*?(?:thread|review|comment)';
|
|
95
101
|
const PR_THREAD_RESOLUTION_REQUIRED_ACTIONS = ['pr_threads_checked', 'thread_resolution_verified'];
|
|
96
102
|
|
|
@@ -1512,6 +1518,23 @@ async function evaluateGatesAsync(toolName, toolInput, configPath) {
|
|
|
1512
1518
|
return boostedRiskGuard;
|
|
1513
1519
|
}
|
|
1514
1520
|
|
|
1521
|
+
// Tier 1b: Planning and Trajectory (v1.26.0 - CodeRabbit Pattern).
|
|
1522
|
+
// Keep runtime enforcement explicit so advisory planning checks do not mask
|
|
1523
|
+
// higher-priority deny/approve gates in established workflows.
|
|
1524
|
+
if (isRuntimePlanGateEnabled()) {
|
|
1525
|
+
const planGate = evaluatePlanGate(toolName, toolInput);
|
|
1526
|
+
if (planGate) {
|
|
1527
|
+
recordStat(planGate.gate, planGate.decision === 'deny' ? 'block' : 'warn');
|
|
1528
|
+
return planGate;
|
|
1529
|
+
}
|
|
1530
|
+
|
|
1531
|
+
const trajectory = getTrajectoryScore();
|
|
1532
|
+
if (trajectory.isDrifting) {
|
|
1533
|
+
recordStat('strategic-drift', 'block');
|
|
1534
|
+
return { decision: 'deny', gate: 'strategic-drift', message: trajectory.message, severity: 'high' };
|
|
1535
|
+
}
|
|
1536
|
+
}
|
|
1537
|
+
|
|
1515
1538
|
// Fast-path: feedback/recall tools skip metric gates entirely (avoids Stripe API calls)
|
|
1516
1539
|
const METRIC_SKIP_TOOLS = ['capture_feedback', 'feedback_stats', 'recall', 'feedback_summary', 'prevention_rules'];
|
|
1517
1540
|
const skipMetrics = METRIC_SKIP_TOOLS.includes(toolName);
|
|
@@ -1709,6 +1732,23 @@ function evaluateGates(toolName, toolInput, configPath) {
|
|
|
1709
1732
|
return boostedRiskGuard;
|
|
1710
1733
|
}
|
|
1711
1734
|
|
|
1735
|
+
// Tier 1b: Planning and Trajectory (v1.26.0 - CodeRabbit Pattern).
|
|
1736
|
+
// Keep runtime enforcement explicit so advisory planning checks do not mask
|
|
1737
|
+
// higher-priority deny/approve gates in established workflows.
|
|
1738
|
+
if (isRuntimePlanGateEnabled()) {
|
|
1739
|
+
const planGate = evaluatePlanGate(toolName, toolInput);
|
|
1740
|
+
if (planGate) {
|
|
1741
|
+
recordStat(planGate.gate, planGate.decision === 'deny' ? 'block' : 'warn');
|
|
1742
|
+
return planGate;
|
|
1743
|
+
}
|
|
1744
|
+
|
|
1745
|
+
const trajectory = getTrajectoryScore();
|
|
1746
|
+
if (trajectory.isDrifting) {
|
|
1747
|
+
recordStat('strategic-drift', 'block');
|
|
1748
|
+
return { decision: 'deny', gate: 'strategic-drift', message: trajectory.message, severity: 'high' };
|
|
1749
|
+
}
|
|
1750
|
+
}
|
|
1751
|
+
|
|
1712
1752
|
for (const gate of config.gates) {
|
|
1713
1753
|
const matchDetails = matchGate(gate, toolName, toolInput);
|
|
1714
1754
|
if (!matchDetails.matched) continue;
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* install-shim.js — Install a stable shim at ~/.thumbgate/bin/thumbgate-hook
|
|
5
|
+
*
|
|
6
|
+
* The shim is a tiny shell script that always resolves thumbgate@latest,
|
|
7
|
+
* so hook commands in settings.local.json never go stale. This is the
|
|
8
|
+
* Volta-style pattern: a version-agnostic indirection layer that survives
|
|
9
|
+
* across thumbgate upgrades.
|
|
10
|
+
*
|
|
11
|
+
* The shim checks for a cached runtime binary first (fast path), and falls
|
|
12
|
+
* back to `npx --yes thumbgate@latest` (slow path, self-installs).
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
const fs = require('fs');
|
|
16
|
+
const path = require('path');
|
|
17
|
+
const os = require('os');
|
|
18
|
+
|
|
19
|
+
const SHIM_DIR = path.join(os.homedir(), '.thumbgate', 'bin');
|
|
20
|
+
const SHIM_PATH = path.join(SHIM_DIR, 'thumbgate-hook');
|
|
21
|
+
const RUNTIME_BIN = path.join(os.homedir(), '.thumbgate', 'runtime', 'node_modules', '.bin', 'thumbgate');
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* The shim script. Key design choices:
|
|
25
|
+
* - Uses `exec` to replace the shell process (no zombie processes)
|
|
26
|
+
* - Fast path: if cached runtime binary exists, exec it directly
|
|
27
|
+
* - Slow path: npx --yes thumbgate@latest (auto-installs)
|
|
28
|
+
* - Background upgrade: after the fast path succeeds once, spawn a
|
|
29
|
+
* detached npm install to refresh the cache for next time
|
|
30
|
+
*/
|
|
31
|
+
function shimContent() {
|
|
32
|
+
const escapedRuntimeBin = JSON.stringify(RUNTIME_BIN);
|
|
33
|
+
const escapedRuntimeDir = JSON.stringify(path.join(os.homedir(), '.thumbgate', 'runtime'));
|
|
34
|
+
|
|
35
|
+
return `#!/usr/bin/env bash
|
|
36
|
+
# ThumbGate hook shim — DO NOT EDIT
|
|
37
|
+
# Installed by: thumbgate init
|
|
38
|
+
# Purpose: version-agnostic hook entry point that always runs latest ThumbGate
|
|
39
|
+
# Pattern: Volta-style stable shim (see https://volta.sh)
|
|
40
|
+
|
|
41
|
+
set -euo pipefail
|
|
42
|
+
|
|
43
|
+
RUNTIME_BIN=${escapedRuntimeBin}
|
|
44
|
+
RUNTIME_DIR=${escapedRuntimeDir}
|
|
45
|
+
|
|
46
|
+
# Fast path: cached runtime binary exists and is executable
|
|
47
|
+
if [ -x "$RUNTIME_BIN" ]; then
|
|
48
|
+
# Spawn background upgrade (detached, no stdout/stderr, won't block hook)
|
|
49
|
+
( nohup npm install --prefix "$RUNTIME_DIR" --no-save --omit=dev thumbgate@latest >/dev/null 2>&1 & ) 2>/dev/null || true
|
|
50
|
+
exec "$RUNTIME_BIN" "$@"
|
|
51
|
+
fi
|
|
52
|
+
|
|
53
|
+
# Slow path: no cached binary — install + exec via npx
|
|
54
|
+
mkdir -p "$RUNTIME_DIR"
|
|
55
|
+
exec npx --yes --package thumbgate@latest -- thumbgate "$@"
|
|
56
|
+
`;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function installShim() {
|
|
60
|
+
fs.mkdirSync(SHIM_DIR, { recursive: true });
|
|
61
|
+
fs.writeFileSync(SHIM_PATH, shimContent(), { mode: 0o755 });
|
|
62
|
+
return SHIM_PATH;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function shimInstalled() {
|
|
66
|
+
try {
|
|
67
|
+
return fs.existsSync(SHIM_PATH) && (fs.statSync(SHIM_PATH).mode & 0o111) !== 0;
|
|
68
|
+
} catch {
|
|
69
|
+
return false;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function shimPath() {
|
|
74
|
+
return SHIM_PATH;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
module.exports = {
|
|
78
|
+
installShim,
|
|
79
|
+
shimInstalled,
|
|
80
|
+
shimPath,
|
|
81
|
+
shimContent,
|
|
82
|
+
SHIM_DIR,
|
|
83
|
+
SHIM_PATH,
|
|
84
|
+
};
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Plan Gate — implementing the CodeRabbit "Planning-First" pattern.
|
|
6
|
+
*
|
|
7
|
+
* 1. (Static) Validates structured 'PLAN.md' / 'PRD' content (used in loop-closure).
|
|
8
|
+
* 2. (Dynamic) Intercepts high-risk tool calls during agent execution.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
const fs = require('fs');
|
|
12
|
+
const path = require('path');
|
|
13
|
+
|
|
14
|
+
const RISK_TOOLS = ['Bash', 'Write', 'Edit', 'Deploy'];
|
|
15
|
+
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
// Gate validators (Legacy / Loop Closure)
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
|
|
20
|
+
function countTableRows(content, sectionHeading) {
|
|
21
|
+
const sectionRegex = new RegExp(
|
|
22
|
+
`#+\\s*${sectionHeading}[^\\n]*\\n([\\s\\S]*?)(?=\\n#+\\s|$)`,
|
|
23
|
+
);
|
|
24
|
+
const match = content.match(sectionRegex);
|
|
25
|
+
if (!match) return 0;
|
|
26
|
+
|
|
27
|
+
const lines = match[1].split('\n').filter((l) => l.trim().startsWith('|'));
|
|
28
|
+
// Subtract header row and separator row
|
|
29
|
+
const dataRows = lines.filter(
|
|
30
|
+
(l) => !/^\|\s*-+/.test(l.trim()) && !/^\|\s*:?-+/.test(l.trim()),
|
|
31
|
+
);
|
|
32
|
+
// First row is the header
|
|
33
|
+
return Math.max(0, dataRows.length - 1);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function countContracts(content) {
|
|
37
|
+
const sectionRegex = /#+\s*Contracts[^\n]*\n([\s\S]*?)(?=\n#+\s|$)/;
|
|
38
|
+
const match = content.match(sectionRegex);
|
|
39
|
+
if (!match) return 0;
|
|
40
|
+
|
|
41
|
+
const section = match[1];
|
|
42
|
+
// Find code blocks and look for interface/type keywords inside them
|
|
43
|
+
const codeBlockRegex = /```[\s\S]*?```/g;
|
|
44
|
+
let count = 0;
|
|
45
|
+
let blockMatch;
|
|
46
|
+
while ((blockMatch = codeBlockRegex.exec(section)) !== null) {
|
|
47
|
+
const block = blockMatch[0];
|
|
48
|
+
const interfaceMatches = block.match(/\b(interface|type)\s+\w+/g);
|
|
49
|
+
if (interfaceMatches) count += interfaceMatches.length;
|
|
50
|
+
}
|
|
51
|
+
return count;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function countValidationScenarios(content) {
|
|
55
|
+
const sectionRegex =
|
|
56
|
+
/#+\s*Validation\s+Checklist[^\n]*\n([\s\S]*?)(?=\n#+\s|$)/;
|
|
57
|
+
const match = content.match(sectionRegex);
|
|
58
|
+
if (!match) return 0;
|
|
59
|
+
|
|
60
|
+
const lines = match[1].split('\n');
|
|
61
|
+
return lines.filter((l) => /^\s*-\s*\[\s*\]/.test(l)).length;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function getStatus(content) {
|
|
65
|
+
const match = content.match(/#+\s*Status[^\n]*\n\s*(\S+)/);
|
|
66
|
+
return match ? match[1].trim() : null;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function validatePlan(content) {
|
|
70
|
+
const questionCount = countTableRows(content, 'Clarifying Questions Resolved');
|
|
71
|
+
const contractCount = countContracts(content);
|
|
72
|
+
const scenarioCount = countValidationScenarios(content);
|
|
73
|
+
const status = getStatus(content);
|
|
74
|
+
|
|
75
|
+
const gates = [
|
|
76
|
+
{
|
|
77
|
+
name: 'Clarifying Questions',
|
|
78
|
+
pass: questionCount >= 3,
|
|
79
|
+
detail: `${questionCount} questions resolved`,
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
name: 'Contracts Defined',
|
|
83
|
+
pass: contractCount >= 1,
|
|
84
|
+
detail: `${contractCount} interface${contractCount !== 1 ? 's' : ''} found`,
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
name: 'Validation Checklist',
|
|
88
|
+
pass: scenarioCount >= 2,
|
|
89
|
+
detail: `${scenarioCount} scenarios defined`,
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
name: 'Status',
|
|
93
|
+
pass: status !== 'COMPLETE',
|
|
94
|
+
detail:
|
|
95
|
+
status === 'COMPLETE'
|
|
96
|
+
? 'COMPLETE (already finished — cannot re-approve)'
|
|
97
|
+
: `${status || 'UNKNOWN'} (not COMPLETE)`,
|
|
98
|
+
},
|
|
99
|
+
];
|
|
100
|
+
|
|
101
|
+
const allPass = gates.every((g) => g.pass);
|
|
102
|
+
return { gates, allPass };
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function formatReport(result) {
|
|
106
|
+
const lines = result.gates.map(
|
|
107
|
+
(g) => `${g.pass ? '✅' : '❌'} ${g.name}: ${g.detail}`,
|
|
108
|
+
);
|
|
109
|
+
lines.push('');
|
|
110
|
+
lines.push(
|
|
111
|
+
result.allPass
|
|
112
|
+
? 'RESULT: PASS — all gates satisfied'
|
|
113
|
+
: 'RESULT: BLOCKED — resolve issues above before spawning agents',
|
|
114
|
+
);
|
|
115
|
+
return lines.join('\n');
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// ---------------------------------------------------------------------------
|
|
119
|
+
// Dynamic Gating (CodeRabbit Orchestration Pattern)
|
|
120
|
+
// ---------------------------------------------------------------------------
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Evaluates the planning state for the current tool call.
|
|
124
|
+
*/
|
|
125
|
+
function evaluatePlanGate(toolName, toolInput, options = {}) {
|
|
126
|
+
if (!RISK_TOOLS.includes(toolName)) return null;
|
|
127
|
+
|
|
128
|
+
const projectRoot = options.projectRoot || process.cwd();
|
|
129
|
+
const planPath = path.join(projectRoot, 'PLAN.md');
|
|
130
|
+
|
|
131
|
+
// Tier 1: Existence Check
|
|
132
|
+
if (!fs.existsSync(planPath)) {
|
|
133
|
+
return {
|
|
134
|
+
decision: 'warn',
|
|
135
|
+
gate: 'plan-gate-missing',
|
|
136
|
+
message: '⚠️ THUMBGATE: High-risk tool call without a PLAN.md. Please create a plan documenting your intent and assumptions.',
|
|
137
|
+
severity: 'high'
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Tier 2: Alignment Check (Simple)
|
|
142
|
+
const planContent = fs.readFileSync(planPath, 'utf8');
|
|
143
|
+
const action = toolName === 'Bash' ? toolInput.command : toolInput.filePath;
|
|
144
|
+
|
|
145
|
+
if (action && !planContent.toLowerCase().includes(path.basename(action).toLowerCase())) {
|
|
146
|
+
return {
|
|
147
|
+
decision: 'warn',
|
|
148
|
+
gate: 'plan-gate-drift',
|
|
149
|
+
message: `⚠️ THUMBGATE: Strategic Drift detected. The action "${action}" is not mentioned in your PLAN.md.`,
|
|
150
|
+
severity: 'medium'
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Tier 3: Implicit Assumption Extraction
|
|
155
|
+
const assumptions = extractAssumptions(planContent);
|
|
156
|
+
if (assumptions.length > 0) {
|
|
157
|
+
return {
|
|
158
|
+
decision: 'warn',
|
|
159
|
+
gate: 'plan-gate-assumptions',
|
|
160
|
+
message: '🔍 THUMBGATE: Explicitly verify these implicit assumptions before proceeding:\n- ' + assumptions.join('\n- '),
|
|
161
|
+
severity: 'medium'
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return null;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Scans plan content for "Assumes" or "Implicit" keywords.
|
|
170
|
+
*/
|
|
171
|
+
function extractAssumptions(content) {
|
|
172
|
+
const lines = content.split('\n');
|
|
173
|
+
const assumptions = [];
|
|
174
|
+
const regex = /(?:assume|assumption|implicit|pre-requisite|depends on)s?[:\-]?\s*(.*)/i;
|
|
175
|
+
|
|
176
|
+
for (const line of lines) {
|
|
177
|
+
const match = line.match(regex);
|
|
178
|
+
if (match && match[1].trim()) {
|
|
179
|
+
assumptions.push(match[1].trim());
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
return assumptions.slice(0, 5);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// ---------------------------------------------------------------------------
|
|
186
|
+
// Main
|
|
187
|
+
// ---------------------------------------------------------------------------
|
|
188
|
+
|
|
189
|
+
function run() {
|
|
190
|
+
const args = process.argv.slice(2);
|
|
191
|
+
const jsonFlag = args.includes('--json');
|
|
192
|
+
const filePath = args.find((a) => a !== '--json');
|
|
193
|
+
|
|
194
|
+
if (!filePath) {
|
|
195
|
+
console.error('Usage: node scripts/plan-gate.js <plan-file.md> [--json]');
|
|
196
|
+
process.exit(1);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
const resolved = path.resolve(filePath);
|
|
200
|
+
if (!fs.existsSync(resolved)) {
|
|
201
|
+
console.error(`File not found: ${resolved}`);
|
|
202
|
+
process.exit(1);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
const content = fs.readFileSync(resolved, 'utf-8');
|
|
206
|
+
const result = validatePlan(content);
|
|
207
|
+
|
|
208
|
+
if (jsonFlag) {
|
|
209
|
+
console.log(JSON.stringify(result, null, 2));
|
|
210
|
+
} else {
|
|
211
|
+
console.log(formatReport(result));
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
process.exit(result.allPass ? 0 : 1);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// Export for testing
|
|
218
|
+
module.exports = {
|
|
219
|
+
validatePlan,
|
|
220
|
+
formatReport,
|
|
221
|
+
countTableRows,
|
|
222
|
+
countContracts,
|
|
223
|
+
countValidationScenarios,
|
|
224
|
+
getStatus,
|
|
225
|
+
evaluatePlanGate,
|
|
226
|
+
extractAssumptions,
|
|
227
|
+
};
|
|
228
|
+
|
|
229
|
+
// Run only when executed directly
|
|
230
|
+
if (require.main === module) {
|
|
231
|
+
run();
|
|
232
|
+
}
|
|
@@ -301,6 +301,24 @@ function getCalibration(model) {
|
|
|
301
301
|
// Posterior Sampling
|
|
302
302
|
// ---------------------------------------------------------------------------
|
|
303
303
|
|
|
304
|
+
/**
|
|
305
|
+
* Return the Beta posterior parameters after applying Thompson temperature
|
|
306
|
+
* scaling. The posterior mean is preserved while precision changes:
|
|
307
|
+
* lower temperatures sharpen the posterior, higher temperatures flatten it.
|
|
308
|
+
*
|
|
309
|
+
* @param {Object} params - Category posterior parameters
|
|
310
|
+
* @param {number} temperature - Scaling factor (default 1.0)
|
|
311
|
+
* @returns {{ alpha: number, beta: number }}
|
|
312
|
+
*/
|
|
313
|
+
function getTemperatureScaledPosteriorParams(params, temperature = 1.0) {
|
|
314
|
+
const T = Math.max(0.01, Number(temperature) || 1.0);
|
|
315
|
+
const invT = 1.0 / T;
|
|
316
|
+
return {
|
|
317
|
+
alpha: Math.max(params.alpha * invT, 0.01),
|
|
318
|
+
beta: Math.max(params.beta * invT, 0.01),
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
|
|
304
322
|
/**
|
|
305
323
|
* Draw one sample from the Beta posterior for each category.
|
|
306
324
|
* Supports temperature scaling to adjust exploitation vs exploration.
|
|
@@ -318,13 +336,9 @@ function getCalibration(model) {
|
|
|
318
336
|
*/
|
|
319
337
|
function samplePosteriors(model, temperature = 1.0) {
|
|
320
338
|
const samples = {};
|
|
321
|
-
const T = Math.max(0.01, Number(temperature) || 1.0);
|
|
322
|
-
const invT = 1.0 / T;
|
|
323
339
|
|
|
324
340
|
for (const [cat, params] of Object.entries(model.categories || {})) {
|
|
325
|
-
|
|
326
|
-
const alpha = Math.max(params.alpha * invT, 0.01);
|
|
327
|
-
const beta = Math.max(params.beta * invT, 0.01);
|
|
341
|
+
const { alpha, beta } = getTemperatureScaledPosteriorParams(params, temperature);
|
|
328
342
|
samples[cat] = betaSample(alpha, beta);
|
|
329
343
|
}
|
|
330
344
|
return samples;
|
|
@@ -457,6 +471,7 @@ module.exports = {
|
|
|
457
471
|
getReliability,
|
|
458
472
|
isCalibrated,
|
|
459
473
|
getCalibration,
|
|
474
|
+
getTemperatureScaledPosteriorParams,
|
|
460
475
|
samplePosteriors,
|
|
461
476
|
argmaxPosteriors,
|
|
462
477
|
pickBestCategory,
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Trajectory Scorer — Strategic Drift Detection.
|
|
6
|
+
*
|
|
7
|
+
* Measures the "Semantic Distance" between the original user intent
|
|
8
|
+
* (from primer.md) and the current set of changed files.
|
|
9
|
+
*
|
|
10
|
+
* If the agent modifies too many unrelated files, it triggers a safety block.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const fs = require('fs');
|
|
14
|
+
const path = require('path');
|
|
15
|
+
const { execSync } = require('child_process');
|
|
16
|
+
|
|
17
|
+
function getTrajectoryScore(options = {}) {
|
|
18
|
+
const projectRoot = options.projectRoot || process.cwd();
|
|
19
|
+
const primerPath = path.join(projectRoot, 'primer.md');
|
|
20
|
+
|
|
21
|
+
if (!fs.existsSync(primerPath)) return { score: 0, isDrifting: false, drift: false };
|
|
22
|
+
|
|
23
|
+
const intent = fs.readFileSync(primerPath, 'utf8').toLowerCase();
|
|
24
|
+
|
|
25
|
+
// Get currently modified files (unstaged + staged)
|
|
26
|
+
let changedFiles = options.changedFiles;
|
|
27
|
+
if (!changedFiles) {
|
|
28
|
+
try {
|
|
29
|
+
const output = execSync('git diff --name-only HEAD', { cwd: projectRoot, encoding: 'utf8' });
|
|
30
|
+
changedFiles = output.split('\n').filter(f => f.trim());
|
|
31
|
+
} catch {
|
|
32
|
+
return { score: 0, isDrifting: false, drift: false };
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
if (changedFiles.length === 0) return { score: 0, isDrifting: false, drift: false };
|
|
37
|
+
|
|
38
|
+
// Calculate drift: How many changed files are NOT mentioned in the intent?
|
|
39
|
+
let driftCount = 0;
|
|
40
|
+
for (const file of changedFiles) {
|
|
41
|
+
const base = path.basename(file).toLowerCase();
|
|
42
|
+
if (!intent.includes(base)) {
|
|
43
|
+
driftCount++;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const driftRatio = driftCount / changedFiles.length;
|
|
48
|
+
const isDrifting = driftRatio > 0.6 && changedFiles.length > 3;
|
|
49
|
+
|
|
50
|
+
return {
|
|
51
|
+
score: Number((1 - driftRatio).toFixed(2)),
|
|
52
|
+
changedCount: changedFiles.length,
|
|
53
|
+
driftCount,
|
|
54
|
+
isDrifting,
|
|
55
|
+
message: isDrifting
|
|
56
|
+
? `🚫 THUMBGATE: Strategic Drift Detected. You have modified ${changedFiles.length} files, but ${driftCount} of them were not mentioned in the original intent. Please refocus or update the intent.`
|
|
57
|
+
: null
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
module.exports = {
|
|
62
|
+
getTrajectoryScore
|
|
63
|
+
};
|