thumbgate 1.9.0 → 1.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "thumbgate-marketplace",
3
- "version": "1.9.0",
3
+ "version": "1.10.1",
4
4
  "owner": {
5
5
  "name": "Igor Ganapolsky",
6
6
  "email": "ig5973700@gmail.com"
@@ -13,7 +13,7 @@
13
13
  "source": "npm",
14
14
  "package": "thumbgate"
15
15
  },
16
- "version": "1.9.0",
16
+ "version": "1.10.1",
17
17
  "author": {
18
18
  "name": "Igor Ganapolsky"
19
19
  },
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "thumbgate",
3
3
  "description": "Type 👍 or 👎 on any agent action. ThumbGate captures it, distills a lesson, and blocks the pattern from repeating. One thumbs-down = the agent physically cannot make that mistake again. 33 pre-action gates, budget enforcement, self-protection, and NIST/SOC2 compliance tags.",
4
- "version": "1.9.0",
4
+ "version": "1.10.1",
5
5
  "author": {
6
6
  "name": "Igor Ganapolsky"
7
7
  },
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "thumbgate",
3
- "version": "1.9.0",
3
+ "version": "1.10.1",
4
4
  "description": "ThumbGate — 👍👎 feedback that teaches your AI agent. Thumbs down a mistake, it never happens again.",
5
5
  "homepage": "https://thumbgate-production.up.railway.app",
6
6
  "transport": "stdio",
@@ -3,7 +3,7 @@
3
3
  - `chatgpt/openapi.yaml`: import into GPT Actions.
4
4
  - `gemini/function-declarations.json`: Gemini function-calling definitions.
5
5
  - `mcp/server-stdio.js`: underlying local MCP stdio server implementation.
6
- - `claude/.mcp.json`: example Claude Code MCP config using `npx --yes --package thumbgate@1.9.0 thumbgate serve`.
6
+ - `claude/.mcp.json`: example Claude Code MCP config using `npx --yes --package thumbgate@1.10.1 thumbgate serve`.
7
7
  - `codex/config.toml`: example Codex MCP profile section using the same version-pinned portable launcher.
8
8
  - `amp/skills/thumbgate-feedback/SKILL.md`: Amp skill template.
9
9
  - `opencode/opencode.json`: portable OpenCode MCP profile using the same version-pinned portable launcher.
@@ -2,13 +2,13 @@
2
2
  "mcpServers": {
3
3
  "thumbgate": {
4
4
  "command": "npx",
5
- "args": ["--yes", "--package", "thumbgate@1.9.0", "thumbgate", "serve"]
5
+ "args": ["--yes", "--package", "thumbgate@1.10.1", "thumbgate", "serve"]
6
6
  }
7
7
  },
8
8
  "hooks": {
9
9
  "preToolUse": {
10
10
  "command": "npx",
11
- "args": ["--yes", "--package", "thumbgate@1.9.0", "thumbgate", "gate-check"]
11
+ "args": ["--yes", "--package", "thumbgate@1.10.1", "thumbgate", "gate-check"]
12
12
  }
13
13
  }
14
14
  }
@@ -152,7 +152,7 @@ const {
152
152
  finalizeSession: finalizeFeedbackSession,
153
153
  } = require('../../scripts/feedback-session');
154
154
 
155
- const SERVER_INFO = { name: 'thumbgate-mcp', version: '1.9.0' };
155
+ const SERVER_INFO = { name: 'thumbgate-mcp', version: '1.10.1' };
156
156
  const COMMERCE_CATEGORIES = [
157
157
  'product_recommendation',
158
158
  'brand_compliance',
@@ -7,7 +7,7 @@
7
7
  "npx",
8
8
  "--yes",
9
9
  "--package",
10
- "thumbgate@1.9.0",
10
+ "thumbgate@1.10.1",
11
11
  "thumbgate",
12
12
  "serve"
13
13
  ],
@@ -0,0 +1,22 @@
1
+ {
2
+ "$schema": "./enforcement.schema.json",
3
+ "description": "Loss matrix and enforcement knobs for the Bayes-optimal pre-tool-use gate. See scripts/bayes-optimal-gate.js for the decision math. Tags listed here mirror the canonical tags emitted by risk-scorer.buildPatternSummary. To disable tag-specific costs and fall back to a symmetric 1:1 decision, reduce any override to 1.0.",
4
+ "lossMatrix": {
5
+ "falseAllow": {
6
+ "default": 1.0,
7
+ "deploy-prod": 100.0,
8
+ "destructive": 50.0,
9
+ "secrets": 1000.0,
10
+ "force-push-main": 200.0,
11
+ "data-loss": 500.0,
12
+ "credentials": 800.0,
13
+ "rm-rf": 300.0,
14
+ "git-reset-hard": 100.0
15
+ },
16
+ "falseBlock": {
17
+ "default": 1.0
18
+ }
19
+ },
20
+ "bayesOptimalEnabled": true,
21
+ "bayesPosteriorFloor": 0.05
22
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "thumbgate",
3
- "version": "1.9.0",
3
+ "version": "1.10.1",
4
4
  "description": "Self-improving agent governance: type thumbs-up or thumbs-down on any AI agent action. ThumbGate turns every mistake into a prevention rule and blocks the pattern from repeating. One thumbs-down, never again. 33 pre-action gates, budget enforcement, and self-protection for Claude Code, Cursor, Codex, Gemini CLI, and Amp.",
5
5
  "homepage": "https://thumbgate-production.up.railway.app",
6
6
  "repository": {
@@ -54,6 +54,7 @@
54
54
  "scripts/audit-trail.js",
55
55
  "scripts/auto-promote-gates.js",
56
56
  "scripts/auto-wire-hooks.js",
57
+ "scripts/bayes-optimal-gate.js",
57
58
  "scripts/belief-update.js",
58
59
  "scripts/billing-setup.js",
59
60
  "scripts/billing.js",
@@ -254,7 +255,7 @@
254
255
  "trace:eval": "node scripts/decision-trace.js eval",
255
256
  "social:reply-monitor": "node scripts/social-reply-monitor.js",
256
257
  "social:reply-monitor:dry": "node scripts/social-reply-monitor.js --dry-run",
257
- "test": "npm run test:schema && npm run test:loop && npm run test:dpo && npm run test:kto && npm run test:api && npm run test:proof && npm run test:e2e && npm run test:rlaif && npm run test:attribution && npm run test:quality && npm run test:intelligence && npm run test:training-export && npm run test:deployment && npm run test:operational-integrity && npm run test:workflow && npm run test:billing && npm run test:cli && npm run test:watcher && npm run test:autoresearch && npm run test:ops && npm run test:session-analyzer && npm run test:tessl && npm run test:gates && npm run test:evoskill && npm run test:gates-hardening && npm run test:workers && npm run test:social-analytics && npm run test:memalign && npm run test:xmemory-lite && npm run test:filesystem-search && npm run test:zernio && npm run test:platform-limits && npm run test:post-video && npm run test:post-everywhere-instagram && npm run test:obsidian-export && npm run test:lesson-db && npm run test:lesson-rotation && npm run test:memory-dedup && npm run test:feedback-quality && npm run test:sync-version && npm run test:check-congruence && npm run test:tool-registry && npm run test:feedback-to-rules && npm run test:memory-firewall && npm run test:belief-update && npm run test:hosted-config && npm run test:operational-summary && npm run test:operator-key-auth && npm run test:cloudflare-sandbox && npm run test:mcp-config && npm run test:plan-gate && npm run test:pulse && npm run test:semantic-layer && npm run test:data-pipeline && npm run test:optimize-context && npm run test:principle-extractor && npm run test:analytics-window && npm run test:funnel-analytics && npm run test:experiment-tracker && npm run test:build-metadata && npm run test:context-engine && npm run test:hf-papers && npm run test:marketing-experiment && npm run test:seo-gsd && npm run test:verify-run && npm run test:export-dpo-pairs && npm run test:export-hf-dataset && npm run test:license && npm run test:bot-detector && npm run test:postinstall && npm run test:funnel-invariants && npm run test:cli-telemetry && npm run test:pro-parity && npm run test:model-tier-router && npm run test:computer-use-firewall && npm run test:skill-exporter && npm run test:statusline && npm run test:evolution && npm run test:org-dashboard && npm run test:multi-hop-recall && npm run test:synthetic-dpo && npm run test:thumbgate-skill && npm run test:learn-hub && npm run test:feedback-fallback && npm run test:metaclaw && npm run test:server-lock && npm run test:control-tower && npm run test:pii-scanner && npm run test:data-governance && npm run test:lesson-inference && npm run test:semantic-dedup && npm run test:fs-utils && npm run test:cli-schema && npm run test:explore && npm run test:lesson-reranker && npm run test:lesson-retrieval && npm run test:cross-encoder && npm run test:reflector-agent && npm run test:feedback-session && npm run test:feedback-history-distiller && npm run test:hallucination-detector && npm run test:history-distiller && npm run test:predictive-insights && npm run test:prove-predictive-insights && npm run test:statusbar-cli && npm run test:generate-instagram-card && npm run test:instagram-thumbgate-post && npm run test:publish-instagram-thumbgate && npm run test:lesson-synthesis && npm run test:background-governance && npm run test:memory-migration && npm run test:prompt-dlp && npm run test:ephemeral-store && npm run test:agent-security && npm run test:skill-progressive && npm run test:per-step-scoring && npm run test:weekly-auto-post && npm run test:social-post-hourly && npm run test:social-quality-gate && npm run test:a2ui-engine && npm run test:gate-satisfy && npm run test:money-watcher && npm run test:budget && npm run test:quick-start && npm run test:utm && npm run test:product-feedback && npm run test:feedback-root-consolidator && npm run test:engagement-audit && npm run test:install-growth-automation && npm run test:publish-thumbgate-launch && npm run test:reconcile-thumbgate-campaign && npm run test:reddit-publisher && npm run test:schedule-thumbgate-campaign && npm run test:social-reply-monitor && npm run test:sync-launch-assets && npm run test:ai-search-visibility && npm run test:perplexity && npm run test:security-scanner && npm run test:llm-client && npm run test:managed-lesson-agent && npm run test:self-distill && npm run test:meta-agent && npm run test:harness-selector && npm run test:thumbgate-bench && npm run test:seo-guides && npm run test:enforcement-loop && npm run test:cli-agent-experience && npm run test:bot-detection && npm run test:checkout-bot-guard && npm run test:session-health && npm run test:session-episodes && npm run test:spec-gate && npm run test:decision-trace && npm run test:dashboard-insights && npm run test:prompt-eval && npm run test:demo-voiceover && npm run test:gate-coherence && npm run test:gate-eval && npm run test:high-roi && npm run test:public-static-assets && npm run test:token-savings && npm run test:workflow-gate-checkpoint && npm run test:lesson-export-import && npm run test:landing-page-claims && npm run test:dashboard-deeplink-e2e && npm run test:public-package-parity && npm run test:token-savings-dashboard && npm run test:cursor-wiring && npm run test:pretooluse-injection && npm run test:recent-corrective-context && npm run test:durability-step && npm run test:mailer && npm run test:brand-assets && npm run test:enforcement-teeth && npm run test:swarm-coordinator && npm run test:session-report && npm run test:require-evidence-gate",
258
+ "test": "npm run test:schema && npm run test:loop && npm run test:dpo && npm run test:kto && npm run test:api && npm run test:proof && npm run test:e2e && npm run test:rlaif && npm run test:attribution && npm run test:quality && npm run test:intelligence && npm run test:training-export && npm run test:deployment && npm run test:operational-integrity && npm run test:workflow && npm run test:billing && npm run test:cli && npm run test:watcher && npm run test:autoresearch && npm run test:ops && npm run test:session-analyzer && npm run test:tessl && npm run test:gates && npm run test:evoskill && npm run test:gates-hardening && npm run test:workers && npm run test:social-analytics && npm run test:memalign && npm run test:xmemory-lite && npm run test:filesystem-search && npm run test:zernio && npm run test:platform-limits && npm run test:post-video && npm run test:post-everywhere-instagram && npm run test:obsidian-export && npm run test:lesson-db && npm run test:lesson-rotation && npm run test:memory-dedup && npm run test:feedback-quality && npm run test:sync-version && npm run test:check-congruence && npm run test:tool-registry && npm run test:feedback-to-rules && npm run test:memory-firewall && npm run test:belief-update && npm run test:hosted-config && npm run test:operational-summary && npm run test:operator-key-auth && npm run test:cloudflare-sandbox && npm run test:mcp-config && npm run test:plan-gate && npm run test:pulse && npm run test:semantic-layer && npm run test:data-pipeline && npm run test:optimize-context && npm run test:principle-extractor && npm run test:analytics-window && npm run test:funnel-analytics && npm run test:experiment-tracker && npm run test:build-metadata && npm run test:context-engine && npm run test:hf-papers && npm run test:marketing-experiment && npm run test:seo-gsd && npm run test:verify-run && npm run test:export-dpo-pairs && npm run test:export-hf-dataset && npm run test:license && npm run test:bot-detector && npm run test:postinstall && npm run test:funnel-invariants && npm run test:cli-telemetry && npm run test:pro-parity && npm run test:model-tier-router && npm run test:computer-use-firewall && npm run test:skill-exporter && npm run test:statusline && npm run test:evolution && npm run test:org-dashboard && npm run test:multi-hop-recall && npm run test:synthetic-dpo && npm run test:thumbgate-skill && npm run test:learn-hub && npm run test:feedback-fallback && npm run test:metaclaw && npm run test:server-lock && npm run test:control-tower && npm run test:pii-scanner && npm run test:data-governance && npm run test:lesson-inference && npm run test:semantic-dedup && npm run test:fs-utils && npm run test:cli-schema && npm run test:explore && npm run test:lesson-reranker && npm run test:lesson-retrieval && npm run test:cross-encoder && npm run test:reflector-agent && npm run test:feedback-session && npm run test:feedback-history-distiller && npm run test:hallucination-detector && npm run test:history-distiller && npm run test:predictive-insights && npm run test:prove-predictive-insights && npm run test:statusbar-cli && npm run test:generate-instagram-card && npm run test:instagram-thumbgate-post && npm run test:publish-instagram-thumbgate && npm run test:lesson-synthesis && npm run test:background-governance && npm run test:memory-migration && npm run test:prompt-dlp && npm run test:ephemeral-store && npm run test:agent-security && npm run test:skill-progressive && npm run test:per-step-scoring && npm run test:weekly-auto-post && npm run test:social-post-hourly && npm run test:social-quality-gate && npm run test:a2ui-engine && npm run test:gate-satisfy && npm run test:money-watcher && npm run test:budget && npm run test:quick-start && npm run test:utm && npm run test:product-feedback && npm run test:feedback-root-consolidator && npm run test:engagement-audit && npm run test:install-growth-automation && npm run test:publish-thumbgate-launch && npm run test:reconcile-thumbgate-campaign && npm run test:reddit-publisher && npm run test:schedule-thumbgate-campaign && npm run test:social-reply-monitor && npm run test:sync-launch-assets && npm run test:ai-search-visibility && npm run test:perplexity && npm run test:security-scanner && npm run test:llm-client && npm run test:managed-lesson-agent && npm run test:self-distill && npm run test:meta-agent && npm run test:harness-selector && npm run test:thumbgate-bench && npm run test:seo-guides && npm run test:enforcement-loop && npm run test:cli-agent-experience && npm run test:bot-detection && npm run test:checkout-bot-guard && npm run test:session-health && npm run test:session-episodes && npm run test:spec-gate && npm run test:decision-trace && npm run test:dashboard-insights && npm run test:prompt-eval && npm run test:demo-voiceover && npm run test:gate-coherence && npm run test:gate-eval && npm run test:high-roi && npm run test:public-static-assets && npm run test:token-savings && npm run test:workflow-gate-checkpoint && npm run test:lesson-export-import && npm run test:landing-page-claims && npm run test:dashboard-deeplink-e2e && npm run test:public-package-parity && npm run test:token-savings-dashboard && npm run test:cursor-wiring && npm run test:pretooluse-injection && npm run test:recent-corrective-context && npm run test:durability-step && npm run test:mailer && npm run test:brand-assets && npm run test:enforcement-teeth && npm run test:bayes-optimal-gate && npm run test:swarm-coordinator && npm run test:session-report && npm run test:require-evidence-gate",
258
259
  "test:swarm-coordinator": "node --test tests/swarm-coordinator.test.js",
259
260
  "test:session-report": "node --test tests/session-report.test.js",
260
261
  "test:require-evidence-gate": "node --test tests/require-evidence-gate.test.js",
@@ -504,9 +505,10 @@
504
505
  "test:cursor-wiring": "node --test tests/cursor-wiring.test.js",
505
506
  "test:pretooluse-injection": "node --test tests/pretooluse-lesson-injection.test.js",
506
507
  "test:recent-corrective-context": "node --test tests/recent-corrective-actions-context.test.js",
507
- "test:mailer": "node --test tests/mailer.test.js tests/billing-webhook-email.test.js",
508
+ "test:mailer": "node --test tests/mailer.test.js tests/mailer-dns.test.js tests/billing-webhook-email.test.js",
508
509
  "test:brand-assets": "node --test tests/brand-assets.test.js",
509
- "test:enforcement-teeth": "node --test tests/enforcement-teeth.test.js"
510
+ "test:enforcement-teeth": "node --test tests/enforcement-teeth.test.js",
511
+ "test:bayes-optimal-gate": "node --test tests/bayes-optimal-gate.test.js"
510
512
  },
511
513
  "keywords": [
512
514
  "mcp",
package/public/index.html CHANGED
@@ -974,7 +974,7 @@ __GA_BOOTSTRAP__
974
974
  <!-- HOW IT WORKS -->
975
975
  <section class="how-it-works" id="how-it-works">
976
976
  <div class="container">
977
- <div class="section-label">New in v1.9.0</div>
977
+ <div class="section-label">New in v1.10.1</div>
978
978
  <h2 class="section-title">Three steps to stop repeated AI failures</h2>
979
979
  <div class="steps">
980
980
  <div class="step">
@@ -1330,7 +1330,7 @@ __GA_BOOTSTRAP__
1330
1330
  <a href="https://www.linkedin.com/in/igorganapolsky" target="_blank" rel="noopener">LinkedIn</a>
1331
1331
  <a href="/blog">Blog</a>
1332
1332
  </div>
1333
- <span class="footer-copy">© 2026 Max Smith KDP LLC · MIT License · v1.9.0</span>
1333
+ <span class="footer-copy">© 2026 Max Smith KDP LLC · MIT License · v1.10.1</span>
1334
1334
  </div>
1335
1335
  </footer>
1336
1336
 
@@ -0,0 +1,273 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * scripts/bayes-optimal-gate.js
5
+ *
6
+ * Bayes-optimal decision layer for ThumbGate's pre-tool-use gate.
7
+ *
8
+ * Why this exists:
9
+ * The legacy gate blocks a tool call when any matched lesson tag has a
10
+ * heuristic risk score ≥ a global threshold. That is a "threshold on a
11
+ * heuristic" rule, not a Bayes-optimal decision. It cannot express two
12
+ * facts that matter in practice:
13
+ * 1. Different tags carry different empirical harm rates (a prior).
14
+ * 2. Mis-classification is asymmetric — letting a harmful `deploy-prod`
15
+ * call through is far more expensive than briefly blocking a safe
16
+ * lint fix. A single global threshold cannot reflect that.
17
+ *
18
+ * What this module provides:
19
+ * - `computeBayesPosterior(...)` — P(harmful | tags) combining the trained
20
+ * model's probability (if present), the base rate, and per-tag empirical
21
+ * risk rates via a clipped Bayes-factor update.
22
+ * - `bayesOptimalDecision(...)` — cost-weighted argmax over {block, allow}
23
+ * using a configurable loss matrix. Block iff the expected loss of
24
+ * allowing exceeds the expected loss of blocking.
25
+ * - `computeBayesErrorRate(rows)` — the irreducible error floor of the
26
+ * current feature set (tag signatures). Useful as a stopping rule when
27
+ * tuning the scorer.
28
+ *
29
+ * No external deps. Pure functions; the only IO is an optional
30
+ * `config/enforcement.json` read inside `loadLossMatrix()`.
31
+ */
32
+
33
+ const fs = require('node:fs');
34
+ const path = require('node:path');
35
+
36
+ // Baseline loss matrix. `default` applies when no tag-specific override
37
+ // matches. Higher = more expensive. The asymmetry below reflects the
38
+ // observed cost of real ThumbGate incidents: false-allow on a destructive
39
+ // or production-facing action costs hours of recovery and credibility;
40
+ // false-block costs the operator one explicit override flag.
41
+ const DEFAULT_LOSS_MATRIX = {
42
+ falseAllow: {
43
+ default: 1.0,
44
+ 'deploy-prod': 100.0,
45
+ 'destructive': 50.0,
46
+ 'secrets': 1000.0,
47
+ 'force-push-main': 200.0,
48
+ 'data-loss': 500.0,
49
+ },
50
+ falseBlock: {
51
+ default: 1.0,
52
+ },
53
+ };
54
+
55
+ const ENFORCEMENT_CONFIG_PATH = path.join(__dirname, '..', 'config', 'enforcement.json');
56
+
57
+ /**
58
+ * Load the loss matrix from `config/enforcement.json` if present, otherwise
59
+ * return the baked-in default. Any parse/IO failure falls back to defaults —
60
+ * the Bayes gate must never deadlock the hook on a config problem.
61
+ */
62
+ function loadLossMatrix(configPath = ENFORCEMENT_CONFIG_PATH) {
63
+ try {
64
+ if (!fs.existsSync(configPath)) return DEFAULT_LOSS_MATRIX;
65
+ const raw = JSON.parse(fs.readFileSync(configPath, 'utf8'));
66
+ if (!raw || typeof raw !== 'object' || !raw.lossMatrix) return DEFAULT_LOSS_MATRIX;
67
+ return {
68
+ falseAllow: { ...DEFAULT_LOSS_MATRIX.falseAllow, ...(raw.lossMatrix.falseAllow || {}) },
69
+ falseBlock: { ...DEFAULT_LOSS_MATRIX.falseBlock, ...(raw.lossMatrix.falseBlock || {}) },
70
+ };
71
+ } catch {
72
+ return DEFAULT_LOSS_MATRIX;
73
+ }
74
+ }
75
+
76
+ /**
77
+ * Look up the maximum applicable cost for a side of the loss matrix.
78
+ * A single high-cost tag (e.g. `deploy-prod`) dominates — one dangerous tag
79
+ * in a bundle of otherwise innocuous tags must still flip the decision.
80
+ */
81
+ function resolveCost(matrixSide, tags) {
82
+ const defaultCost = Number(matrixSide?.default ?? 1);
83
+ let cost = Number.isFinite(defaultCost) ? defaultCost : 1;
84
+ for (const tag of tags || []) {
85
+ const key = String(tag || '').trim().toLowerCase();
86
+ if (!key) continue;
87
+ const candidate = Number(matrixSide?.[key]);
88
+ if (Number.isFinite(candidate) && candidate > cost) cost = candidate;
89
+ }
90
+ return cost;
91
+ }
92
+
93
+ /**
94
+ * Clip a number to [min, max]. Used to bound the Bayes factor so a single
95
+ * noisy tag (e.g. 1/1 harmful) cannot flip the decision on the basis of one
96
+ * observation. The clip window is conservative on purpose.
97
+ */
98
+ function clip(value, min, max) {
99
+ if (Number.isNaN(value) || value === undefined || value === null) return min;
100
+ // +Infinity/-Infinity are finite conceptually at the bounds — clamp them to
101
+ // the nearest edge rather than silently collapsing to `min`.
102
+ if (value === Infinity) return max;
103
+ if (value === -Infinity) return min;
104
+ if (typeof value !== 'number') return min;
105
+ return Math.min(Math.max(value, min), max);
106
+ }
107
+
108
+ /**
109
+ * Normalize a tag into the canonical lowercase key used by the model's
110
+ * pattern summary. Returns an empty string for falsy or non-string tags.
111
+ */
112
+ function normalizeTag(tag) {
113
+ return String(tag || '').trim().toLowerCase();
114
+ }
115
+
116
+ /**
117
+ * Build a Map(tag -> riskRate) from the model's `highRiskTags` array.
118
+ * `riskRate` is empirical P(harmful | tag) computed from feedback sequences
119
+ * by `risk-scorer.buildPatternSummary`.
120
+ */
121
+ function buildRiskRateMap(highRiskTags) {
122
+ const map = new Map();
123
+ if (!Array.isArray(highRiskTags)) return map;
124
+ for (const bucket of highRiskTags) {
125
+ const key = normalizeTag(bucket?.key || bucket?.tag);
126
+ if (!key) continue;
127
+ const rate = Number(bucket?.riskRate ?? bucket?.rate);
128
+ if (Number.isFinite(rate) && rate >= 0 && rate <= 1) {
129
+ map.set(key, rate);
130
+ }
131
+ }
132
+ return map;
133
+ }
134
+
135
+ /**
136
+ * Compute P(harmful | tags) as a Bayes-factor update over a starting
137
+ * probability. If `modelProbability` is supplied (the trained scorer's
138
+ * direct output), it seeds the update — richer feature evidence than the
139
+ * raw base rate. Otherwise we fall back to the prior.
140
+ *
141
+ * For each observed tag with a known empirical risk rate, we multiply the
142
+ * current odds by `riskRate / prior` (the Bayes factor), then convert odds
143
+ * back to probability. The Bayes factor is clipped to [0.25, 4.0] to keep a
144
+ * single sparsely-observed tag from dominating.
145
+ */
146
+ function computeBayesPosterior({ tags, riskByTag, baseRate, modelProbability } = {}) {
147
+ const prior = clip(Number(baseRate) || 0, 0.01, 0.99);
148
+ const seed = Number.isFinite(modelProbability) ? clip(modelProbability, 0.01, 0.99) : prior;
149
+
150
+ let odds = seed / (1 - seed);
151
+ const rateMap = riskByTag instanceof Map
152
+ ? riskByTag
153
+ : new Map(Object.entries(riskByTag || {}).map(([k, v]) => [normalizeTag(k), Number(v)]));
154
+
155
+ const evidence = [];
156
+ for (const tag of tags || []) {
157
+ const key = normalizeTag(tag);
158
+ if (!key) continue;
159
+ const rate = rateMap.get(key);
160
+ if (!Number.isFinite(rate)) continue;
161
+ const bayesFactor = clip(rate / prior, 0.25, 4.0);
162
+ odds *= bayesFactor;
163
+ evidence.push({ tag: key, rate, bayesFactor: round3(bayesFactor) });
164
+ }
165
+
166
+ const pHarmful = odds / (1 + odds);
167
+ return {
168
+ pHarmful: round3(pHarmful),
169
+ pSafe: round3(1 - pHarmful),
170
+ prior: round3(prior),
171
+ seed: round3(seed),
172
+ evidence,
173
+ };
174
+ }
175
+
176
+ /**
177
+ * Cost-weighted Bayes-optimal decision. Block iff
178
+ * E[loss | allow] = P(harmful) * cost(falseAllow)
179
+ * exceeds
180
+ * E[loss | block] = P(safe) * cost(falseBlock).
181
+ *
182
+ * This reduces to the usual Bayes classifier when both costs are equal.
183
+ */
184
+ function bayesOptimalDecision(posterior, tags, lossMatrix = DEFAULT_LOSS_MATRIX) {
185
+ const pHarmful = clip(Number(posterior?.pHarmful), 0, 1);
186
+ const pSafe = clip(Number(posterior?.pSafe ?? 1 - pHarmful), 0, 1);
187
+ const cFalseAllow = resolveCost(lossMatrix?.falseAllow || {}, tags);
188
+ const cFalseBlock = resolveCost(lossMatrix?.falseBlock || {}, tags);
189
+ const lossAllow = pHarmful * cFalseAllow;
190
+ const lossBlock = pSafe * cFalseBlock;
191
+ return {
192
+ decision: lossAllow > lossBlock ? 'block' : 'allow',
193
+ expectedLoss: {
194
+ allow: round3(lossAllow),
195
+ block: round3(lossBlock),
196
+ },
197
+ costs: { falseAllow: cFalseAllow, falseBlock: cFalseBlock },
198
+ };
199
+ }
200
+
201
+ /**
202
+ * Bayes error rate: the irreducible error floor of a classifier built on
203
+ * the current feature set, estimated empirically from `rows`.
204
+ *
205
+ * For each tag signature s we have n_s rows of which k_s were harmful. The
206
+ * optimal per-signature prediction errs with probability min(k/n, 1-k/n).
207
+ * Weighting by P(s) = n_s / N and summing gives the Bayes error rate.
208
+ *
209
+ * Returns null when `rows` is empty or not an array.
210
+ */
211
+ function computeBayesErrorRate(rows) {
212
+ if (!Array.isArray(rows) || rows.length === 0) return null;
213
+
214
+ const buckets = new Map();
215
+ for (const row of rows) {
216
+ const sig = tagSignature(row);
217
+ if (!buckets.has(sig)) buckets.set(sig, { total: 0, harmful: 0 });
218
+ const bucket = buckets.get(sig);
219
+ bucket.total += 1;
220
+ if (isHarmful(row)) bucket.harmful += 1;
221
+ }
222
+
223
+ const total = rows.length;
224
+ let err = 0;
225
+ for (const { total: n, harmful: k } of buckets.values()) {
226
+ const p = n === 0 ? 0 : k / n;
227
+ err += (n / total) * Math.min(p, 1 - p);
228
+ }
229
+ return round3(err);
230
+ }
231
+
232
+ function tagSignature(row) {
233
+ const raw = Array.isArray(row?.targetTags)
234
+ ? row.targetTags
235
+ : Array.isArray(row?.tags)
236
+ ? row.tags
237
+ : [];
238
+ const normalized = raw.map(normalizeTag).filter(Boolean).sort();
239
+ return normalized.join('|') || '__none__';
240
+ }
241
+
242
+ /**
243
+ * Mirror of `risk-scorer.deriveTargetRisk` so this module has no cycle back
244
+ * into risk-scorer. Kept intentionally narrow — if risk-scorer's definition
245
+ * broadens, revisit here too.
246
+ */
247
+ function isHarmful(row) {
248
+ if (!row || typeof row !== 'object') return false;
249
+ if (typeof row.targetRisk === 'number') return row.targetRisk > 0;
250
+ if (typeof row.accepted === 'boolean' && row.accepted === false) return true;
251
+ const label = String(row.label || row.signal || '').toLowerCase();
252
+ return label === 'negative';
253
+ }
254
+
255
+ function round3(n) {
256
+ if (!Number.isFinite(n)) return 0;
257
+ return Math.round(n * 1000) / 1000;
258
+ }
259
+
260
+ module.exports = {
261
+ DEFAULT_LOSS_MATRIX,
262
+ ENFORCEMENT_CONFIG_PATH,
263
+ loadLossMatrix,
264
+ resolveCost,
265
+ buildRiskRateMap,
266
+ computeBayesPosterior,
267
+ bayesOptimalDecision,
268
+ computeBayesErrorRate,
269
+ tagSignature,
270
+ isHarmful,
271
+ clip,
272
+ normalizeTag,
273
+ };
@@ -4,6 +4,8 @@
4
4
  const fs = require('fs');
5
5
  const path = require('path');
6
6
  const { getAutoGatesPath } = require('./auto-promote-gates');
7
+ const { computeBayesErrorRate } = require('./bayes-optimal-gate');
8
+ const { sequencePathFor } = require('./risk-scorer');
7
9
 
8
10
  const PROJECT_ROOT = path.join(__dirname, '..');
9
11
  const MANUAL_GATES_PATH = path.join(PROJECT_ROOT, 'config', 'gates', 'default.json');
@@ -55,6 +57,14 @@ function calculateStats() {
55
57
  const estimatedMinutesSaved = (totalBlocked + totalWarned) * 15;
56
58
  const estimatedHoursSaved = (estimatedMinutesSaved / 60).toFixed(1);
57
59
 
60
+ // Bayes error rate: irreducible error floor of the current scorer given its
61
+ // feature set (tag signatures). If this is near zero, the scorer is already
62
+ // close to optimal — threshold tuning won't help, and new features are the
63
+ // only lever. If this is high, the feature set can't discriminate the signal
64
+ // and we should add features (file path, recency, commit context) rather
65
+ // than tune thresholds. Null when no feedback sequences have been recorded.
66
+ const bayesErrorRate = tryComputeBayesErrorRate();
67
+
58
68
  return {
59
69
  totalGates: allGates.length,
60
70
  manualGates: manualGates.length,
@@ -66,10 +76,26 @@ function calculateStats() {
66
76
  topBlocked,
67
77
  lastPromotion,
68
78
  estimatedHoursSaved,
79
+ bayesErrorRate,
69
80
  gates: allGates,
70
81
  };
71
82
  }
72
83
 
84
+ function tryComputeBayesErrorRate() {
85
+ try {
86
+ const seqPath = sequencePathFor();
87
+ if (!fs.existsSync(seqPath)) return null;
88
+ const rows = fs.readFileSync(seqPath, 'utf8')
89
+ .split('\n')
90
+ .filter(Boolean)
91
+ .map((line) => { try { return JSON.parse(line); } catch { return null; } })
92
+ .filter(Boolean);
93
+ return computeBayesErrorRate(rows);
94
+ } catch {
95
+ return null;
96
+ }
97
+ }
98
+
73
99
  function formatLastPromotion(promo) {
74
100
  if (!promo) return 'none';
75
101
  const ts = promo.timestamp ? new Date(promo.timestamp) : null;
@@ -94,9 +120,18 @@ function formatStats(stats) {
94
120
  lines.push(` Top blocked gate: ${stats.topBlocked ? `${stats.topBlocked.id} (${stats.topBlocked.occurrences || 0} blocks)` : 'none'}`);
95
121
  lines.push(` Last promotion: ${formatLastPromotion(stats.lastPromotion)}`);
96
122
  lines.push(` Estimated time saved: ~${stats.estimatedHoursSaved} hours`);
123
+ lines.push(` Bayes error rate: ${formatBayesErrorRate(stats.bayesErrorRate)}`);
97
124
  return lines.join('\n');
98
125
  }
99
126
 
127
+ function formatBayesErrorRate(rate) {
128
+ if (rate === null || rate === undefined) return 'n/a (no feedback sequences yet)';
129
+ const pct = (rate * 100).toFixed(1);
130
+ if (rate < 0.02) return `${pct}% — scorer is near-optimal; add features, don't tune thresholds`;
131
+ if (rate < 0.10) return `${pct}% — scorer has modest headroom`;
132
+ return `${pct}% — high irreducible error; the feature set can't discriminate`;
133
+ }
134
+
100
135
  if (require.main === module) {
101
136
  try {
102
137
  const stats = calculateStats();
@@ -111,6 +146,8 @@ module.exports = {
111
146
  calculateStats,
112
147
  formatStats,
113
148
  formatLastPromotion,
149
+ formatBayesErrorRate,
114
150
  loadGatesFile,
151
+ tryComputeBayesErrorRate,
115
152
  MANUAL_GATES_PATH,
116
153
  };
@@ -36,7 +36,12 @@ const SENDER_DNS_CACHE_MS = 10 * 60 * 1000;
36
36
  const ANGLE_EMAIL_RE = /<([^<>@\s]{1,64}@[^<>@\s]{1,255})>/;
37
37
  const BARE_EMAIL_RE = /([^\s<>@]{1,64}@[^\s<>@]{1,255})/;
38
38
  const DKIM_PUBLIC_KEY_RE = /^p=/i;
39
- const AMAZON_SES_MX_RE = /feedback-smtp\..*amazonaws\.com\.?$/i;
39
+ // Resend fronts outbound mail with Amazon SES; the MX for send.<domain> points
40
+ // at feedback-smtp.<region>.amazonses.com. Earlier revisions of this regex
41
+ // mistakenly matched `amazonaws.com`, so the positive branch never fired in
42
+ // production. Matching `amazonses.com` (optionally with a trailing dot) is
43
+ // what Resend's DNS setup wizard actually publishes.
44
+ const AMAZON_SES_MX_RE = /feedback-smtp\..*amazonses\.com\.?$/i;
40
45
  const AMAZON_SES_SPF_RE = /include:amazonses\.com/i;
41
46
  const TRAILING_EMAIL_DOMAIN_PUNCTUATION = new Set(['>', ')', ',', '.', ';']);
42
47
  const senderDnsCache = new Map();
@@ -504,6 +509,11 @@ module.exports = {
504
509
  renderTrialWelcomeBodies,
505
510
  _resolveSenderAddress: resolveSenderAddress,
506
511
  _hasResendSenderDns: hasResendSenderDns,
512
+ _recordsHaveResendDns: recordsHaveResendDns,
513
+ _getCachedSenderDnsReadiness: getCachedSenderDnsReadiness,
514
+ _setCachedSenderDnsReadiness: setCachedSenderDnsReadiness,
515
+ _senderDnsCache: senderDnsCache,
516
+ _SENDER_DNS_CACHE_MS: SENDER_DNS_CACHE_MS,
507
517
  _constants: {
508
518
  PRODUCT_NAME,
509
519
  DASHBOARD_URL,
@@ -324,6 +324,48 @@ function samplePosteriors(model) {
324
324
  return samples;
325
325
  }
326
326
 
327
+ /**
328
+ * Production/exploit-mode counterpart to `samplePosteriors`. Instead of
329
+ * drawing a random sample from each Beta posterior (which deliberately
330
+ * explores), return the posterior *mean* α/(α+β) for each category. Picking
331
+ * argmax over these means is the Bayes-optimal action under 0-1 loss when
332
+ * we only care about expected reward and do not need exploration.
333
+ *
334
+ * When to use which:
335
+ * - `samplePosteriors` in training / learning mode — we want to try
336
+ * under-sampled arms.
337
+ * - `argmaxPosteriors` in production / hot-path mode — we want the
338
+ * best-known lesson right now. The caller can still choose to mix the
339
+ * two (e.g. ε-greedy) but that's out of scope here.
340
+ */
341
+ function argmaxPosteriors(model) {
342
+ const means = {};
343
+ for (const [cat, params] of Object.entries(model.categories || {})) {
344
+ const alpha = Math.max(Number(params.alpha) || 0, 0.01);
345
+ const beta = Math.max(Number(params.beta) || 0, 0.01);
346
+ means[cat] = alpha / (alpha + beta);
347
+ }
348
+ return means;
349
+ }
350
+
351
+ /**
352
+ * Pick the single category with the highest posterior mean. Ties broken by
353
+ * lexicographic order for determinism. Returns `null` when no categories
354
+ * are present.
355
+ */
356
+ function pickBestCategory(model) {
357
+ const means = argmaxPosteriors(model);
358
+ const keys = Object.keys(means);
359
+ if (keys.length === 0) return null;
360
+ // Use localeCompare for deterministic, locale-aware alphabetical tie-break.
361
+ keys.sort((a, b) => a.localeCompare(b));
362
+ let best = keys[0];
363
+ for (const key of keys) {
364
+ if (means[key] > means[best]) best = key;
365
+ }
366
+ return best;
367
+ }
368
+
327
369
  // ---------------------------------------------------------------------------
328
370
  // Internal: Marsaglia-Tsang Gamma Sampling (2000)
329
371
  // ---------------------------------------------------------------------------
@@ -410,6 +452,8 @@ module.exports = {
410
452
  isCalibrated,
411
453
  getCalibration,
412
454
  samplePosteriors,
455
+ argmaxPosteriors,
456
+ pickBestCategory,
413
457
  HALF_LIFE_DAYS,
414
458
  DECAY_FLOOR,
415
459
  MIN_SAMPLES_THRESHOLD,