npm - thumbgate - Versions diffs - 1.16.12 → 1.16.19 - Mend

thumbgate 1.16.12 → 1.16.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/.well-known/mcp/server-card.json +1 -1
package/README.md +3 -1
package/adapters/claude/.mcp.json +2 -2
package/adapters/mcp/server-stdio.js +26 -1
package/adapters/opencode/opencode.json +1 -1
package/bin/cli.js +420 -1
package/config/gate-templates.json +372 -0
package/config/mcp-allowlists.json +25 -0
package/config/model-candidates.json +59 -2
package/config/model-tiers.json +4 -1
package/package.json +79 -22
package/public/compare.html +6 -0
package/public/index.html +144 -11
package/public/numbers.html +11 -11
package/public/pro.html +22 -24
package/scripts/agent-design-governance.js +211 -0
package/scripts/agent-reasoning-traces.js +683 -0
package/scripts/agent-reward-model.js +438 -0
package/scripts/agent-stack-survival-audit.js +231 -0
package/scripts/ai-engineering-stack-guardrails.js +256 -0
package/scripts/billing.js +16 -4
package/scripts/chatgpt-ads-readiness-pack.js +195 -0
package/scripts/cli-schema.js +277 -0
package/scripts/code-graph-guardrails.js +176 -0
package/scripts/deepseek-v4-runtime-guardrails.js +253 -0
package/scripts/gemini-embedding-policy.js +198 -0
package/scripts/inference-cache-policy.js +39 -0
package/scripts/judge-reward-function.js +396 -0
package/scripts/llm-behavior-monitor.js +251 -0
package/scripts/long-running-agent-context-guardrails.js +176 -0
package/scripts/multimodal-retrieval-plan.js +31 -11
package/scripts/oss-pr-opportunity-scout.js +240 -0
package/scripts/proactive-agent-eval-guardrails.js +230 -0
package/scripts/profile-router.js +5 -4
package/scripts/prompting-operating-system.js +273 -0
package/scripts/proxy-pointer-rag-guardrails.js +189 -0
package/scripts/rag-precision-guardrails.js +202 -0
package/scripts/rate-limiter.js +1 -1
package/scripts/reasoning-efficiency-guardrails.js +176 -0
package/scripts/reward-hacking-guardrails.js +251 -0
package/scripts/seo-gsd.js +1201 -11
package/scripts/single-use-credential-gate.js +182 -0
package/scripts/structured-prompt-driven.js +226 -0
package/scripts/telemetry-analytics.js +31 -6
package/scripts/tool-registry.js +92 -0
package/scripts/upstream-contribution-engine.js +379 -0
package/scripts/vector-store.js +119 -4
package/src/api/server.js +333 -100
package/scripts/agents-sdk-sandbox-plan.js +0 -57
package/scripts/ai-org-governance.js +0 -98
package/scripts/artifact-agent-plan.js +0 -81
package/scripts/enterprise-agent-rollout.js +0 -34
package/scripts/experience-replay-governance.js +0 -69
package/scripts/inference-economics.js +0 -53
package/scripts/knowledge-layer-plan.js +0 -108
package/scripts/memory-store-governance.js +0 -60
package/scripts/post-training-governance.js +0 -34
package/scripts/production-agent-readiness.js +0 -40
package/scripts/scaling-law-claims.js +0 -60
package/scripts/student-consistent-training.js +0 -73

package/public/numbers.html CHANGED Viewed

@@ -25,9 +25,9 @@
   "alternateName": "thumbgate",
   "applicationCategory": "DeveloperApplication",
   "operatingSystem": "Cross-platform, Node.js >=18.18.0",
-  "softwareVersion": "1.16.12",
+  "softwareVersion": "1.16.19",
   "url": "https://thumbgate-production.up.railway.app/numbers",
-  "dateModified": "2026-05-03",
+  "dateModified": "2026-05-04",
   "creator": {
     "@type": "Person",
     "name": "Igor Ganapolsky",
@@ -57,8 +57,8 @@
       "https://www.linkedin.com/in/igorganapolsky"
     ]
   },
-  "dateModified": "2026-05-03",
-  "datePublished": "2026-05-03",
+  "dateModified": "2026-05-04",
+  "datePublished": "2026-05-04",
   "keywords": [
     "AI agent gates",
     "LLM token savings",
@@ -70,7 +70,7 @@
     {
       "@type": "PropertyValue",
       "name": "active_gates",
-      "value": 37
+      "value": 36
     },
     {
       "@type": "PropertyValue",
@@ -101,7 +101,7 @@
     {
       "@type": "PropertyValue",
       "name": "bayes_error_rate",
-      "value": 0
+      "value": null
     }
   ]
 }
@@ -190,14 +190,14 @@
 <main class="container">
   <h1>The Numbers</h1>
   <p class="subtitle">Generated first-party operational data from the ThumbGate runtime. No surveys or projections — this page is a release-time snapshot produced by the same local scripts that power the CLI and dashboard.</p>
-  <div class="freshness">Updated: 2026-05-03 · Version 1.16.12</div>
+  <div class="freshness">Updated: 2026-05-04 · Version 1.16.19</div>
   <h2>Gate enforcement</h2>
   <div class="stats-grid">
     <div class="stat-card">
       <div class="stat-label">Active gates</div>
-      <div class="stat-value">37</div>
-      <div class="stat-sub">36 manual · 1 auto-promoted</div>
+      <div class="stat-value">36</div>
+      <div class="stat-sub">36 manual · 0 auto-promoted</div>
       <a class="stat-source" href="https://github.com/IgorGanapolsky/ThumbGate/blob/main/scripts/gate-stats.js">source: gate-stats.js</a>
     </div>
     <div class="stat-card">
@@ -242,7 +242,7 @@
     </div>
     <div class="stat-card">
       <div class="stat-label">Scorer Bayes error</div>
-      <div class="stat-value">0.0%</div>
+      <div class="stat-value">n/a (no feedback sequences recorded yet)</div>
       <div class="stat-sub">irreducible error given current feature set</div>
       <a class="stat-source" href="https://github.com/IgorGanapolsky/ThumbGate/blob/main/scripts/bayes-optimal-gate.js">source: bayes-optimal-gate.js</a>
     </div>
@@ -264,7 +264,7 @@
   <div class="cta">
     <a href="https://www.npmjs.com/package/thumbgate">Install ThumbGate — npx thumbgate init</a>
     <div class="footer-note">Prefer the raw feed? See <a href="https://github.com/IgorGanapolsky/ThumbGate">GitHub</a> or run <code>npm run gate:stats</code> locally.</div>
-    <div class="footer-note">Generated at 2026-05-03T22:31:46.242Z UTC.</div>
+    <div class="footer-note">Generated at 2026-05-04T21:47:27.878Z UTC.</div>
   </div>
 </main>
 </body>

package/public/pro.html CHANGED Viewed

@@ -804,11 +804,11 @@ __GA_BOOTSTRAP__
 <section class="hero">
   <div class="container hero-grid">
     <div class="panel hero-copy">
-      <div class="eyebrow">Agent governance for engineering teams</div>
-      <h1>One correction protects every agent on your team.</h1>
+      <div class="eyebrow">Paid lane for individual operators</div>
+      <h1>Buy the operator loop that proves your AI agent stopped repeating the mistake.</h1>
       <p style="font-size:13px;opacity:0.8;margin-bottom:0.5rem;">Updated: <time datetime="2026-04-20">2026-04-20</time> · by <a href="https://github.com/IgorGanapolsky" style="color:inherit;">Igor Ganapolsky</a></p>
-      <p>ThumbGate prevents unsafe AI agent actions before they hit shared repos, CI pipelines, and production. When one developer flags a bad pattern, every agent on the team is permanently blocked from repeating it.</p>
-      <p>Open-source core for individuals. Team plan for shared enforcement, CI checks, approval policies, and audit trails across your engineering org.</p>
+      <p>ThumbGate Pro is the fastest paid path for one operator who already hit a repeated AI-agent failure and now needs proof: what was blocked, why it was blocked, and what changed before the next risky run.</p>
+      <p>Start with the local-first Pro dashboard and DPO export. Move to Team only when one correction needs to protect multiple developers, agents, or shared repos.</p>
       <div class="hero-proof">
         <div class="proof-pill">Personal local dashboard</div>
         <div class="proof-pill">DPO export from real corrections</div>
@@ -960,32 +960,30 @@ __GA_BOOTSTRAP__
   <div class="container pricing-shell">
     <div class="pricing-card">
       <div class="section-label" style="text-align:left;margin-bottom:8px;">Pricing</div>
-      <h3>ThumbGate Team — Agent Governance</h3>
-      <div class="price">$49<span>/seat/mo</span></div>
-      <div class="annual">Billed monthly or annually · Starts with a 30-min pilot call</div>
-      <p class="pricing-note">Shared enforcement memory, CI checks, approval policies, sandbox routing, and a full audit trail of every blocked agent action across your team.</p>
+      <h3>ThumbGate Pro</h3>
+      <div class="price">$19<span>/mo</span></div>
+      <div class="annual">$149/year available · 7-day trial · Card required, no charge today</div>
+      <p class="pricing-note">For the individual operator who wants a personal local dashboard, DPO export, review-ready evidence, and founder help on the first risky workflow.</p>
       <ul>
-        <li><strong>Shared enforcement</strong> — one developer's correction blocks that pattern for every agent on the team.</li>
-        <li><strong>CI check integration</strong> — block unsafe merges, enforce test requirements, prevent PRs with unresolved threads.</li>
-        <li><strong>Approval policies</strong> — require human sign-off for high-risk actions (production deploys, schema migrations, destructive SQL).</li>
-        <li><strong>Audit trail</strong> — every blocked action logged with timestamp, agent, context, and the rule that fired.</li>
-        <li><strong>Sandbox routing</strong> — route risky agent runs into isolated execution environments.</li>
-        <li><strong>Org dashboard</strong> — active agents, check hit rates, risk scores, and proof-backed team metrics.</li>
+        <li><strong>Personal local dashboard</strong> — inspect blocked actions, active checks, and lesson evidence without a cloud account.</li>
+        <li><strong>DPO export</strong> — turn real thumbs-down corrections into training pairs you can review or reuse.</li>
+        <li><strong>Review-ready proof</strong> — bring evidence links and blocked-action history to the next risky workflow review.</li>
+        <li><strong>Founder support</strong> — get help hardening the first force-push, deploy, migration, or CI failure that keeps repeating.</li>
       </ul>
       <div class="pricing-actions">
-        <a class="btn-primary" href="/#workflow-sprint-intake">Book a Team Pilot Call</a>
-        <a class="btn-secondary btn-demo" href="/dashboard?utm_source=website&utm_medium=pro_page_pricing&utm_campaign=team">Open dashboard demo</a>
+        <a class="btn-primary btn-pro-checkout" href="/checkout/pro?utm_source=website&utm_medium=pro_page_pricing&utm_campaign=pro_pack&cta_id=pricing_pro&cta_placement=pricing&plan_id=pro&landing_path=%2Fpro">Start 7-Day Free Trial</a>
+        <a class="btn-secondary btn-pro-checkout" href="/checkout/pro?utm_source=website&utm_medium=pro_page_pricing&utm_campaign=pro_pack&cta_id=pricing_pro_annual&cta_placement=pricing&plan_id=pro&billing_cycle=annual&landing_path=%2Fpro">Choose annual</a>
       </div>
-      <div class="pricing-meta">Previously $99/seat. Now $49/seat/mo with a 3-seat minimum. Starts with one workflow, one repo, one repeat failure. We measure before/after and expand only when the results are real.</div>
+      <div class="pricing-meta">Best for one operator with one repeated failure to prove. Stay on Free if you only need the local install; buy Pro when the dashboard, export, and proof trail save you time.</div>
     </div>
     <div class="pricing-sidebar">
       <div class="team-card">
         <div class="section-label" style="text-align:left;margin-bottom:8px;">When Team is better</div>
-        <h3>Solo developer? Start free.</h3>
-        <p>The open-source core gives you local enforcement, PreToolUse hooks, and MCP integrations at no cost. Upgrade to Team when your org needs shared enforcement and audit.</p>
+        <h3>Need shared enforcement?</h3>
+        <p>Choose Team when one correction must protect multiple developers or agents across shared repositories, CI, approval policies, and audit trails. Team is $49/seat/mo with a 3-seat minimum after qualification.</p>
         <div class="hero-actions" style="margin-top:18px;">
-          <a class="btn-secondary" href="/guide?utm_source=website&utm_medium=pro_page&utm_campaign=free_install">Install free locally</a>
+          <a class="btn-secondary" href="/#workflow-sprint-intake">Book a Team Pilot Call</a>
         </div>
       </div>
       <div class="team-card">
@@ -1025,11 +1023,11 @@ __GA_BOOTSTRAP__
 <section class="final-cta">
   <div class="container">
     <div class="final-shell">
-      <h2>Your team's AI agents are one bad action away from breaking production.</h2>
-      <p>ThumbGate prevents force-pushes, secret commits, unsafe publishes, and destructive SQL before they execute. One correction protects every developer on your team — permanently.</p>
+      <h2>Stop losing time to the same AI-agent failure.</h2>
+      <p>Start Pro, harden one repeated mistake, and keep the proof trail: blocked action, lesson, prevention rule, and export path.</p>
       <div class="hero-actions" style="justify-content:center;">
-        <a class="btn-primary" href="/#workflow-sprint-intake">Book a Team Pilot Call</a>
-        <a class="btn-secondary btn-demo" href="/dashboard?utm_source=website&utm_medium=pro_page_final&utm_campaign=team">Open dashboard demo</a>
+        <a class="btn-primary btn-pro-checkout" href="/checkout/pro?utm_source=website&utm_medium=pro_page_final&utm_campaign=pro_pack&cta_id=final_go_pro&cta_placement=final&plan_id=pro&landing_path=%2Fpro">Start 7-Day Free Trial</a>
+        <a class="btn-secondary btn-demo" href="/dashboard?utm_source=website&utm_medium=pro_page_final&utm_campaign=pro_pack">Open dashboard demo</a>
       </div>
     </div>
   </div>

package/scripts/agent-design-governance.js ADDED Viewed

@@ -0,0 +1,211 @@
+#!/usr/bin/env node
+'use strict';
+const HIGH_RISK_KEYWORDS = /(^|[^a-z0-9])(delete|deploy|drop|finance|invoice|payment|production|publish|refund|secret|send|stripe|write)([^a-z0-9]|$)/i;
+function parseNumber(value, fallback = 0) {
+  const parsed = Number(value);
+  return Number.isFinite(parsed) ? parsed : fallback;
+}
+function parseBoolean(value, fallback = false) {
+  if (value === undefined || value === null || value === '') return fallback;
+  if (typeof value === 'boolean') return value;
+  return /^(1|true|yes|on)$/i.test(String(value).trim());
+}
+function splitList(value) {
+  if (Array.isArray(value)) return value.map(String).map((item) => item.trim()).filter(Boolean);
+  return String(value || '').split(',').map((item) => item.trim()).filter(Boolean);
+}
+function normalizeOptions(raw = {}) {
+  const tools = splitList(raw.tools || raw.toolNames);
+  const highRiskTools = splitList(raw['high-risk-tools'] || raw.highRiskTools)
+    .concat(tools.filter((tool) => HIGH_RISK_KEYWORDS.test(tool)));
+  return {
+    workflow: String(raw.workflow || raw.name || 'agent workflow').trim() || 'agent workflow',
+    toolCount: parseNumber(raw['tool-count'] || raw.toolCount || tools.length, tools.length),
+    similarToolCount: parseNumber(raw['similar-tool-count'] || raw.similarToolCount, 0),
+    conditionalBranches: parseNumber(raw['conditional-branches'] || raw.conditionalBranches, 0),
+    handoffCount: parseNumber(raw['handoff-count'] || raw.handoffCount, 0),
+    autonomyLevel: String(raw['autonomy-level'] || raw.autonomyLevel || 'assisted').trim().toLowerCase(),
+    tools,
+    highRiskTools: [...new Set(highRiskTools)],
+    writeTools: splitList(raw['write-tools'] || raw.writeTools),
+    hasBaselineEvals: parseBoolean(raw['baseline-evals'] || raw.hasBaselineEvals, false),
+    hasDocs: parseBoolean(raw.docs || raw.hasDocs, false),
+    hasExamples: parseBoolean(raw.examples || raw.hasExamples, false),
+    hasEdgeCases: parseBoolean(raw['edge-cases'] || raw.hasEdgeCases, false),
+    hasToolApprovals: parseBoolean(raw['tool-approvals'] || raw.hasToolApprovals, false),
+    hasExitCondition: parseBoolean(raw['exit-condition'] || raw.hasExitCondition, false),
+    reversibleActions: parseBoolean(raw['reversible-actions'] || raw.reversibleActions, false),
+  };
+}
+function scoreToolRisk(options) {
+  let score = 0;
+  const reasons = [];
+  if (options.highRiskTools.length > 0) {
+    score += 35;
+    reasons.push(`${options.highRiskTools.length} high-risk tool(s) can affect production, money, data, secrets, or outbound actions`);
+  }
+  if (options.writeTools.length > 0) {
+    score += 20;
+    reasons.push(`${options.writeTools.length} write-capable tool(s) need approval and audit trails`);
+  }
+  if (!options.reversibleActions && (options.highRiskTools.length > 0 || options.writeTools.length > 0)) {
+    score += 20;
+    reasons.push('some actions are not marked reversible');
+  }
+  if (!options.hasToolApprovals && (options.highRiskTools.length > 0 || options.writeTools.length > 0)) {
+    score += 25;
+    reasons.push('tool approvals are missing for risky tools');
+  }
+  const risk = score >= 70 ? 'high' : score >= 35 ? 'medium' : 'low';
+  return { risk, score: Math.min(100, score), reasons };
+}
+function scoreInstructions(options) {
+  const checks = [
+    { id: 'docs', passed: options.hasDocs, label: 'draws on existing workflow documentation' },
+    { id: 'examples', passed: options.hasExamples, label: 'includes concrete successful examples' },
+    { id: 'edge_cases', passed: options.hasEdgeCases, label: 'covers edge cases and failure paths' },
+    { id: 'exit_condition', passed: options.hasExitCondition, label: 'defines when the run is complete' },
+  ];
+  const passed = checks.filter((check) => check.passed).length;
+  return {
+    score: Math.round((passed / checks.length) * 100),
+    checks,
+    missing: checks.filter((check) => !check.passed).map((check) => check.label),
+  };
+}
+function selectArchitecture(options, toolRisk, instructionQuality) {
+  const triggers = [];
+  if (options.conditionalBranches >= 8) triggers.push('instruction_complexity');
+  if (options.similarToolCount >= 4 || (options.toolCount >= 10 && options.similarToolCount >= 2)) triggers.push('tool_overload');
+  if (options.handoffCount > 0) triggers.push('existing_handoffs');
+  if (triggers.includes('tool_overload') || triggers.includes('instruction_complexity')) {
+    return {
+      architecture: 'manager',
+      reason: 'split specialized responsibilities behind a manager agent because instructions or similar tools are becoming hard to route reliably',
+      triggers,
+    };
+  }
+  if (options.handoffCount >= 2 && toolRisk.risk !== 'high') {
+    return {
+      architecture: 'decentralized',
+      reason: 'peer handoffs can work because the workflow already has explicit handoff points and no high-risk tool profile',
+      triggers,
+    };
+  }
+  return {
+    architecture: 'single_agent',
+    reason: instructionQuality.score < 75
+      ? 'improve instructions and evals before adding orchestration complexity'
+      : 'a single agent with clearer tools and instructions should stay cheaper to evaluate and maintain',
+    triggers,
+  };
+}
+function buildBlockers(options, toolRisk, architecture) {
+  const blockers = [];
+  if (!options.hasBaselineEvals) {
+    blockers.push({
+      id: 'baseline_evals_required',
+      severity: 'high',
+      message: 'Establish baseline evals before adding tools, splitting agents, or increasing autonomy.',
+    });
+  }
+  if (toolRisk.risk === 'high' && !options.hasToolApprovals) {
+    blockers.push({
+      id: 'tool_approval_required',
+      severity: 'critical',
+      message: 'High-risk tools need approval gates before autonomous use.',
+    });
+  }
+  if (architecture.architecture !== 'single_agent' && architecture.triggers.length === 0) {
+    blockers.push({
+      id: 'multi_agent_without_trigger',
+      severity: 'medium',
+      message: 'Do not split agents without instruction-complexity, tool-overload, or explicit handoff evidence.',
+    });
+  }
+  return blockers;
+}
+function buildAgentDesignGovernancePlan(rawOptions = {}) {
+  const options = normalizeOptions(rawOptions);
+  const toolRisk = scoreToolRisk(options);
+  const instructionQuality = scoreInstructions(options);
+  const architecture = selectArchitecture(options, toolRisk, instructionQuality);
+  const blockers = buildBlockers(options, toolRisk, architecture);
+  return {
+    name: 'thumbgate-agent-design-governance',
+    workflow: options.workflow,
+    sourcePattern: 'OpenAI practical agent guide: model + tools + instructions, single-agent first, eval-driven multi-agent splits',
+    status: blockers.some((blocker) => blocker.severity === 'critical') ? 'blocked' : blockers.length ? 'needs_work' : 'ready',
+    recommendation: architecture,
+    toolRisk,
+    instructionQuality,
+    evals: {
+      baselinePresent: options.hasBaselineEvals,
+      requiredBefore: ['new high-risk tools', 'multi-agent split', 'higher autonomy', 'auto-PR or deploy'],
+    },
+    blockers,
+    nextActions: [
+      'Keep the workflow single-agent unless evals show instruction complexity or tool overload.',
+      'Write tool descriptions with clear names, parameters, side effects, and approval requirements.',
+      'Add examples and edge cases to instructions before adding subagents.',
+      'Add baseline evals that grade tool choice, exit condition, recovery behavior, and unsafe action refusal.',
+      'Assign low, medium, or high risk to every tool based on write access, reversibility, permissions, and financial or production impact.',
+    ],
+  };
+}
+function formatAgentDesignGovernancePlan(report) {
+  const lines = [
+    '',
+    'ThumbGate Agent Design Governance',
+    '-'.repeat(35),
+    `Workflow : ${report.workflow}`,
+    `Status   : ${report.status}`,
+    `Pattern  : ${report.recommendation.architecture}`,
+    `Reason   : ${report.recommendation.reason}`,
+    `Tool risk: ${report.toolRisk.risk} (${report.toolRisk.score}/100)`,
+    `Instruction score: ${report.instructionQuality.score}/100`,
+    `Baseline evals: ${report.evals.baselinePresent ? 'present' : 'missing'}`,
+  ];
+  if (report.blockers.length > 0) {
+    lines.push('', 'Blockers:');
+    for (const blocker of report.blockers) {
+      lines.push(`  - [${blocker.severity}] ${blocker.id}: ${blocker.message}`);
+    }
+  }
+  if (report.toolRisk.reasons.length > 0) {
+    lines.push('', 'Tool risk signals:');
+    for (const reason of report.toolRisk.reasons) lines.push(`  - ${reason}`);
+  }
+  lines.push('', 'Next actions:');
+  for (const action of report.nextActions) lines.push(`  - ${action}`);
+  lines.push('');
+  return `${lines.join('\n')}\n`;
+}
+module.exports = {
+  buildAgentDesignGovernancePlan,
+  formatAgentDesignGovernancePlan,
+  normalizeOptions,
+  scoreInstructions,
+  scoreToolRisk,
+  selectArchitecture,
+};