npm - thumbgate - Versions diffs - 1.5.1 → 1.5.3 - Mend

thumbgate 1.5.1 → 1.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/.well-known/mcp/server-card.json +1 -1
package/CHANGELOG.md +504 -0
package/README.md +36 -6
package/adapters/README.md +1 -1
package/adapters/claude/.mcp.json +2 -2
package/adapters/codex/config.toml +2 -2
package/adapters/mcp/server-stdio.js +1 -1
package/adapters/opencode/opencode.json +1 -1
package/bench/prompt-eval-suite.json +106 -0
package/package.json +36 -27
package/public/dashboard.html +1436 -0
package/public/index.html +9 -10
package/public/lessons.html +16 -0
package/scripts/prompt-eval.js +363 -0

package/public/index.html CHANGED Viewed

@@ -872,7 +872,7 @@ __GA_BOOTSTRAP__
 <!-- HOW IT WORKS -->
 <section class="how-it-works" id="how-it-works">
   <div class="container">
-    <div class="section-label">New in v1.5.1</div>
+    <div class="section-label">New in v1.5.3</div>
     <h2 class="section-title">Three steps to stop repeated AI failures</h2>
     <div class="steps">
       <div class="step">
@@ -1024,12 +1024,11 @@ __GA_BOOTSTRAP__
         <p style="font-size:13px;color:#aaa;margin-bottom:16px;">3 captures, 1 rule, 1 agent. Enough to prove the enforcement loop works. When you need more, you will know.</p>
         <ul>
           <li><strong>3 feedback captures total</strong> (not per day)</li>
-          <li>1 prevention rule</li>
-          <li>1 agent</li>
+          <li>1 auto-promoted prevention rule</li>
           <li>No recall or lesson search</li>
-          <li>No exports</li>
-          <li>All MCP integrations (Claude Code, Cursor, Codex, etc.)</li>
-          <li>PreToolUse hook blocking</li>
+          <li>No exports (DPO, Databricks, HuggingFace)</li>
+          <li>All MCP integrations (Claude Code, Cursor, Codex, Gemini, Amp, any MCP agent)</li>
+          <li>PreToolUse hook blocking with built-in safety gates (force-push, destructive SQL, secrets)</li>
           <li><a href="/guide" style="color:var(--cyan);text-decoration:underline;">Setup guide for all agents →</a></li>
         </ul>
         <div class="hero-install" onclick="copyInstall(this)" title="Click to copy" style="margin-bottom:12px;width:100%;justify-content:center;">
@@ -1060,9 +1059,9 @@ __GA_BOOTSTRAP__
         </div>
         <ul>
           <li>Everything in Free, plus:</li>
-          <li><a href="/dashboard" style="color:var(--cyan);text-decoration:underline;">Visual gate debugger →</a> see every blocked action and the gate that fired so you can trust the system in minutes</li>
+          <li><a href="/dashboard#insights" style="color:var(--cyan);text-decoration:underline;">Visual gate debugger →</a> see every blocked action and the gate that fired so you can trust the system in minutes</li>
           <li>Auto-connect — activate once with your license key, then your running agents appear automatically on your local dashboard</li>
-          <li><a href="/dashboard" style="color:var(--cyan);text-decoration:underline;">DPO training data export →</a> turn real thumbs-downs into ready-to-use preference pairs for fine-tuning (LoRA / JSONL)</li>
+          <li><a href="/dashboard#export" style="color:var(--cyan);text-decoration:underline;">DPO training data export →</a> turn real thumbs-downs into ready-to-use preference pairs for fine-tuning (LoRA / JSONL)</li>
           <li><strong>HuggingFace dataset export</strong> — share PII-redacted agent traces as open training datasets (<code>npm run export:hf</code>)</li>
           <li><strong>Model Hardening Advisor</strong> — get recommendations on when and how to fine-tune your model to natively avoid recurring failures</li>
           <li>Personal local dashboard — every Pro user gets a localhost dashboard without extra cloud setup</li>
@@ -1082,7 +1081,7 @@ __GA_BOOTSTRAP__
         <div class="price-sub">3-seat minimum · One engineer's correction protects the whole team</div>
         <p style="font-size:13px;color:var(--green);margin-bottom:16px;font-weight:500;">When one engineer teaches the agent not to delete staging data, that lesson applies to every agent on the team. Stop paying the same mistake tax across different developers.</p>
         <div class="pro-upgrade-triggers" style="font-size:12px;color:#aaa;margin-bottom:12px;">
-          <strong style="color:#fff;">Previously $49/seat.</strong> Now $49/seat. Start with one repo, one workflow, one repeat failure.
+          <strong style="color:#fff;">Previously $99/seat.</strong> Now $49/seat. Start with one repo, one workflow, one repeat failure.
         </div>
         <ul>
           <li>Workflow hardening sprint — map one painful workflow, one repeated failure, and one buyer proof review before wider rollout</li>
@@ -1229,7 +1228,7 @@ __GA_BOOTSTRAP__
       <a href="https://www.linkedin.com/in/igorganapolsky" target="_blank" rel="noopener">LinkedIn</a>
       <a href="/blog">Blog</a>
     </div>
-    <span class="footer-copy">© 2026 Max Smith KDP LLC · MIT License · v1.5.1</span>
+    <span class="footer-copy">© 2026 Max Smith KDP LLC · MIT License · v1.5.3</span>
   </div>
 </footer>

package/public/lessons.html CHANGED Viewed

@@ -936,6 +936,22 @@ loadLive().then(function() {
     el.style.borderColor = 'var(--cyan)';
     el.style.boxShadow = '0 0 12px rgba(34,211,238,0.3)';
     setTimeout(function() { el.style.borderColor = ''; el.style.boxShadow = ''; }, 4000);
+  } else {
+    // Deep-link hash was provided but no matching lesson/feedback was found.
+    // Surface this to the user instead of silently loading the page top — the
+    // linked lesson may have been pruned, rotated, or the ID may be stale.
+    var banner = document.createElement('div');
+    banner.setAttribute('role', 'alert');
+    banner.style.cssText = 'margin:12px 0;padding:12px 16px;border:1px solid rgba(251,191,36,0.4);background:rgba(251,191,36,0.08);border-radius:10px;color:var(--text);font-size:13px;';
+    banner.innerHTML =
+      '<strong style="color:#fbbf24;">Lesson not found:</strong> ' +
+      '<code style="background:var(--bg-card);padding:2px 6px;border-radius:4px;font-size:12px;">' +
+      hash.replace(/[<>&"]/g, function(c) { return {'<':'&lt;','>':'&gt;','&':'&amp;','"':'&quot;'}[c]; }) +
+      '</code>' +
+      ' — this ID is not in the active lesson index. It may have been rotated or the statusbar link is stale. ' +
+      'Browse rules and timeline below to find what you were looking for.';
+    var container = document.querySelector('.container') || document.body;
+    container.insertBefore(banner, container.firstChild);
   }
 });
 </script>

package/scripts/prompt-eval.js ADDED Viewed

@@ -0,0 +1,363 @@
+#!/usr/bin/env node
+'use strict';
+/**
+ * Prompt Evaluation Framework for ThumbGate
+ *
+ * Based on Anthropic's prompt evaluation methodology:
+ * 1. Define test cases with inputs and expected outputs
+ * 2. Run prompts against test cases
+ * 3. Grade outputs against expectations (deterministic + LLM-as-judge)
+ * 4. Report pass/fail with scores
+ *
+ * Usage:
+ *   node scripts/prompt-eval.js [--suite=path] [--json] [--min-score=80]
+ */
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const ROOT = path.join(__dirname, '..');
+const DEFAULT_SUITE = path.join(ROOT, 'bench', 'prompt-eval-suite.json');
+// ---------------------------------------------------------------------------
+// Prompt simulators — run ThumbGate's actual logic against eval inputs
+// ---------------------------------------------------------------------------
+function simulateLessonDistillation(input) {
+  // Use ThumbGate's actual captureFeedback logic to produce a lesson
+  const { captureFeedback } = require('./feedback-loop');
+  const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'tg-eval-'));
+  const prevDir = process.env.THUMBGATE_FEEDBACK_DIR;
+  process.env.THUMBGATE_FEEDBACK_DIR = tmpDir;
+  try {
+    const result = captureFeedback({
+      signal: input.signal === 'positive' ? 'up' : 'down',
+      context: input.context || '',
+      whatWentWrong: input.whatWentWrong || undefined,
+      whatToChange: input.whatToChange || undefined,
+      whatWorked: input.whatWorked || undefined,
+      tags: input.tags || [],
+    });
+    return result;
+  } finally {
+    process.env.THUMBGATE_FEEDBACK_DIR = prevDir || '';
+    if (!prevDir) delete process.env.THUMBGATE_FEEDBACK_DIR;
+    try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
+  }
+}
+function simulateFeedbackEnrichment(input) {
+  const { enrichFeedbackContext } = require('./feedback-loop');
+  return enrichFeedbackContext({
+    signal: input.signal,
+    context: input.context,
+    tags: input.tags || [],
+  });
+}
+function simulatePreventionRule(input) {
+  // Prevention rules are generated from accumulated patterns
+  // For eval purposes, we test the rule structure expectations
+  return {
+    pattern: input.pattern,
+    occurrences: input.occurrences,
+    examples: input.examples,
+    generated: true,
+  };
+}
+function simulateSelfDistill(input) {
+  return {
+    sessionFeedback: input.sessionFeedback,
+    summary: input.sessionFeedback.map((f) => f.context).join('; '),
+    generated: true,
+  };
+}
+const PROMPT_SIMULATORS = {
+  'lesson-distillation': simulateLessonDistillation,
+  'feedback-enrichment': simulateFeedbackEnrichment,
+  'prevention-rule-generation': simulatePreventionRule,
+  'self-distillation': simulateSelfDistill,
+};
+// ---------------------------------------------------------------------------
+// Deterministic graders — check output against expected fields
+// ---------------------------------------------------------------------------
+function firstString(...values) {
+  for (const value of values) {
+    if (typeof value === 'string') return value;
+  }
+  return '';
+}
+function addContainsChecks(checks, prefix, label, content, terms = []) {
+  for (const term of terms) {
+    const found = content.toLowerCase().includes(term.toLowerCase());
+    checks.push({
+      criterion: `${prefix}:${term}`,
+      pass: found,
+      detail: found ? `${label} contains "${term}"` : `${label} missing "${term}"`,
+    });
+  }
+}
+function handleRejectExpectation(checks, result, expected) {
+  if (!expected.shouldReject) return false;
+  const wasRejected = result.accepted === false
+    || result.status === 'rejected'
+    || result.actionType === 'no-action';
+  checks.push({
+    criterion: 'shouldReject',
+    pass: wasRejected,
+    detail: wasRejected ? 'Correctly rejected vague input' : 'Should have rejected but accepted',
+  });
+  return true;
+}
+function addTitleChecks(checks, result, expected) {
+  if (!expected.hasTitle) return;
+  const title = firstString(result.memoryRecord?.title, result.title);
+  checks.push({
+    criterion: 'hasTitle',
+    pass: title.length > 0,
+    detail: title ? `Title: "${title.slice(0, 60)}"` : 'Missing title',
+  });
+  addContainsChecks(checks, 'titleContains', 'Title', title, expected.titleContains);
+}
+function addContentChecks(checks, result, expected) {
+  if (!expected.hasContent) return;
+  const content = firstString(result.memoryRecord?.content, result.content);
+  checks.push({
+    criterion: 'hasContent',
+    pass: content.length > 0,
+    detail: content ? `Content length: ${content.length}` : 'Missing content',
+  });
+  addContainsChecks(checks, 'contentContains', 'Content', content, expected.contentContains);
+}
+function addCategoryChecks(checks, result, expected) {
+  if (expected.category) {
+    const category = firstString(result.memoryRecord?.category, result.category);
+    checks.push({
+      criterion: 'category',
+      pass: category === expected.category,
+      detail: `Expected "${expected.category}", got "${category}"`,
+    });
+  }
+  if (expected.importance) {
+    const importance = firstString(result.memoryRecord?.importance, result.importance);
+    checks.push({
+      criterion: 'importance',
+      pass: importance === expected.importance,
+      detail: `Expected "${expected.importance}", got "${importance}"`,
+    });
+  }
+}
+function addContextChecks(checks, result, expected) {
+  if (expected.hasDomain) {
+    const domain = firstString(result.richContext?.domain, result.domain);
+    checks.push({
+      criterion: 'domain',
+      pass: expected.domain ? domain === expected.domain : domain.length > 0,
+      detail: `Domain: "${domain}"`,
+    });
+  }
+  if (expected.hasOutcome) {
+    const outcome = firstString(result.richContext?.outcomeCategory, result.outcome);
+    checks.push({
+      criterion: 'hasOutcome',
+      pass: outcome.length > 0,
+      detail: `Outcome: "${outcome}"`,
+    });
+    addContainsChecks(checks, 'outcomeContains', 'Outcome', outcome, expected.outcomeContains);
+  }
+}
+function addRuleChecks(checks, result, expected) {
+  if (!expected.hasRule) return;
+  checks.push({
+    criterion: 'hasRule',
+    pass: result.generated === true || !!result.rule,
+    detail: result.generated ? 'Rule generated' : 'No rule generated',
+  });
+}
+function addSummaryChecks(checks, result, expected) {
+  if (!expected.hasSummary) return;
+  const summary = firstString(result.summary);
+  checks.push({
+    criterion: 'hasSummary',
+    pass: summary.length > 0,
+    detail: `Summary length: ${summary.length}`,
+  });
+  addContainsChecks(checks, 'summaryContains', 'Summary', summary, expected.summaryContains);
+}
+function gradeOutput(output, expected) {
+  const checks = [];
+  const result = output || {};
+  if (handleRejectExpectation(checks, result, expected)) return checks;
+  addTitleChecks(checks, result, expected);
+  addContentChecks(checks, result, expected);
+  addCategoryChecks(checks, result, expected);
+  addContextChecks(checks, result, expected);
+  addRuleChecks(checks, result, expected);
+  addSummaryChecks(checks, result, expected);
+  return checks;
+}
+// ---------------------------------------------------------------------------
+// Runner
+// ---------------------------------------------------------------------------
+function loadSuite(suitePath) {
+  const raw = JSON.parse(fs.readFileSync(suitePath, 'utf8'));
+  if (!Array.isArray(raw.evaluations) || raw.evaluations.length === 0) {
+    throw new Error('Suite must define a non-empty evaluations array');
+  }
+  return raw;
+}
+function runEvaluation(evalCase) {
+  const simulator = PROMPT_SIMULATORS[evalCase.prompt];
+  if (!simulator) {
+    return {
+      id: evalCase.id,
+      status: 'skip',
+      reason: `No simulator for prompt: ${evalCase.prompt}`,
+      checks: [],
+      score: 0,
+    };
+  }
+  let output;
+  let error = null;
+  try {
+    output = simulator(evalCase.input);
+  } catch (err) {
+    error = err.message || String(err);
+  }
+  if (error) {
+    return {
+      id: evalCase.id,
+      status: 'error',
+      error,
+      checks: [],
+      score: 0,
+    };
+  }
+  const checks = gradeOutput(output, evalCase.expectedOutput);
+  const passCount = checks.filter((c) => c.pass).length;
+  const score = checks.length > 0 ? Math.round((passCount / checks.length) * 100) : 0;
+  return {
+    id: evalCase.id,
+    status: score === 100 ? 'pass' : 'fail',
+    checks,
+    score,
+    passCount,
+    totalChecks: checks.length,
+  };
+}
+function runSuite(suitePath = DEFAULT_SUITE, options = {}) {
+  const suite = loadSuite(suitePath);
+  const results = [];
+  for (const evalCase of suite.evaluations) {
+    results.push(runEvaluation(evalCase));
+  }
+  const passed = results.filter((r) => r.status === 'pass').length;
+  const failed = results.filter((r) => r.status === 'fail').length;
+  const errors = results.filter((r) => r.status === 'error').length;
+  const skipped = results.filter((r) => r.status === 'skip').length;
+  const totalScore = results.length > 0
+    ? Math.round(results.reduce((s, r) => s + r.score, 0) / results.length)
+    : 0;
+  return {
+    suite: suite.name,
+    total: results.length,
+    passed,
+    failed,
+    errors,
+    skipped,
+    score: totalScore,
+    minScore: options.minScore || 80,
+    pass: totalScore >= (options.minScore || 80),
+    results,
+  };
+}
+// ---------------------------------------------------------------------------
+// CLI
+// ---------------------------------------------------------------------------
+function statusIcon(status) {
+  if (status === 'pass') return '\u2705';
+  if (status === 'skip') return '\u23ED';
+  return '\u274C';
+}
+function isCliInvocation() {
+  return Boolean(process.argv[1]) && path.resolve(process.argv[1]) === __filename;
+}
+if (isCliInvocation()) {
+  const args = process.argv.slice(2);
+  let suitePath = DEFAULT_SUITE;
+  let json = false;
+  let minScore = 80;
+  for (const arg of args) {
+    if (arg.startsWith('--suite=')) suitePath = path.resolve(arg.slice(8));
+    if (arg === '--json') json = true;
+    if (arg.startsWith('--min-score=')) minScore = Number(arg.slice(12));
+  }
+  const report = runSuite(suitePath, { minScore });
+  if (json) {
+    console.log(JSON.stringify(report, null, 2));
+  } else {
+    console.log(`\n${report.suite}`);
+    console.log('='.repeat(50));
+    for (const r of report.results) {
+      const icon = statusIcon(r.status);
+      console.log(`${icon} ${r.id} — ${r.score}% (${r.passCount || 0}/${r.totalChecks || 0})`);
+      if (r.status === 'fail' || r.status === 'error') {
+        for (const c of (r.checks || [])) {
+          if (!c.pass) console.log(`    \u274C ${c.criterion}: ${c.detail}`);
+        }
+        if (r.error) console.log(`    Error: ${r.error}`);
+      }
+    }
+    console.log('='.repeat(50));
+    console.log(`Score: ${report.score}% | Pass: ${report.passed} | Fail: ${report.failed} | Error: ${report.errors} | Skip: ${report.skipped}`);
+    console.log(report.pass ? '\u2705 PASS' : `\u274C FAIL (min: ${minScore}%)`);
+  }
+  process.exit(report.pass ? 0 : 1);
+}
+module.exports = { runSuite, runEvaluation, gradeOutput, loadSuite };