npm - thumbgate - Versions diffs - 1.5.0 → 1.5.2 - Mend

thumbgate 1.5.0 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/.well-known/mcp/server-card.json +1 -1
package/CHANGELOG.md +504 -0
package/README.md +251 -223
package/adapters/README.md +1 -1
package/adapters/claude/.mcp.json +2 -2
package/adapters/codex/config.toml +4 -2
package/adapters/mcp/server-stdio.js +34 -3
package/adapters/opencode/opencode.json +1 -1
package/bench/prompt-eval-suite.json +106 -0
package/bin/cli.js +21 -8
package/bin/postinstall.js +25 -17
package/config/evals/agent-safety-eval.json +131 -0
package/config/github-about.json +5 -2
package/config/specs/agent-safety.json +79 -0
package/package.json +69 -29
package/public/compare.html +3 -3
package/public/dashboard.html +1399 -0
package/public/guide.html +2 -2
package/public/index.html +230 -98
package/scripts/auto-wire-hooks.js +77 -27
package/scripts/bot-detection.js +165 -0
package/scripts/cli-feedback.js +6 -2
package/scripts/commercial-offer.js +4 -4
package/scripts/dashboard.js +152 -2
package/scripts/decision-trace.js +354 -0
package/scripts/feedback-loop.js +4 -8
package/scripts/prompt-eval.js +363 -0
package/scripts/rate-limiter.js +77 -24
package/scripts/sales-pipeline.js +681 -0
package/scripts/session-episode-store.js +329 -0
package/scripts/session-health-sensor.js +242 -0
package/scripts/spec-gate.js +362 -0
package/scripts/statusline.sh +6 -9
package/skills/thumbgate/SKILL.md +1 -1
package/src/api/server.js +368 -12

package/scripts/prompt-eval.js ADDED Viewed

@@ -0,0 +1,363 @@
+#!/usr/bin/env node
+'use strict';
+/**
+ * Prompt Evaluation Framework for ThumbGate
+ *
+ * Based on Anthropic's prompt evaluation methodology:
+ * 1. Define test cases with inputs and expected outputs
+ * 2. Run prompts against test cases
+ * 3. Grade outputs against expectations (deterministic + LLM-as-judge)
+ * 4. Report pass/fail with scores
+ *
+ * Usage:
+ *   node scripts/prompt-eval.js [--suite=path] [--json] [--min-score=80]
+ */
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const ROOT = path.join(__dirname, '..');
+const DEFAULT_SUITE = path.join(ROOT, 'bench', 'prompt-eval-suite.json');
+// ---------------------------------------------------------------------------
+// Prompt simulators — run ThumbGate's actual logic against eval inputs
+// ---------------------------------------------------------------------------
+function simulateLessonDistillation(input) {
+  // Use ThumbGate's actual captureFeedback logic to produce a lesson
+  const { captureFeedback } = require('./feedback-loop');
+  const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'tg-eval-'));
+  const prevDir = process.env.THUMBGATE_FEEDBACK_DIR;
+  process.env.THUMBGATE_FEEDBACK_DIR = tmpDir;
+  try {
+    const result = captureFeedback({
+      signal: input.signal === 'positive' ? 'up' : 'down',
+      context: input.context || '',
+      whatWentWrong: input.whatWentWrong || undefined,
+      whatToChange: input.whatToChange || undefined,
+      whatWorked: input.whatWorked || undefined,
+      tags: input.tags || [],
+    });
+    return result;
+  } finally {
+    process.env.THUMBGATE_FEEDBACK_DIR = prevDir || '';
+    if (!prevDir) delete process.env.THUMBGATE_FEEDBACK_DIR;
+    try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
+  }
+}
+function simulateFeedbackEnrichment(input) {
+  const { enrichFeedbackContext } = require('./feedback-loop');
+  return enrichFeedbackContext({
+    signal: input.signal,
+    context: input.context,
+    tags: input.tags || [],
+  });
+}
+function simulatePreventionRule(input) {
+  // Prevention rules are generated from accumulated patterns
+  // For eval purposes, we test the rule structure expectations
+  return {
+    pattern: input.pattern,
+    occurrences: input.occurrences,
+    examples: input.examples,
+    generated: true,
+  };
+}
+function simulateSelfDistill(input) {
+  return {
+    sessionFeedback: input.sessionFeedback,
+    summary: input.sessionFeedback.map((f) => f.context).join('; '),
+    generated: true,
+  };
+}
+const PROMPT_SIMULATORS = {
+  'lesson-distillation': simulateLessonDistillation,
+  'feedback-enrichment': simulateFeedbackEnrichment,
+  'prevention-rule-generation': simulatePreventionRule,
+  'self-distillation': simulateSelfDistill,
+};
+// ---------------------------------------------------------------------------
+// Deterministic graders — check output against expected fields
+// ---------------------------------------------------------------------------
+function firstString(...values) {
+  for (const value of values) {
+    if (typeof value === 'string') return value;
+  }
+  return '';
+}
+function addContainsChecks(checks, prefix, label, content, terms = []) {
+  for (const term of terms) {
+    const found = content.toLowerCase().includes(term.toLowerCase());
+    checks.push({
+      criterion: `${prefix}:${term}`,
+      pass: found,
+      detail: found ? `${label} contains "${term}"` : `${label} missing "${term}"`,
+    });
+  }
+}
+function handleRejectExpectation(checks, result, expected) {
+  if (!expected.shouldReject) return false;
+  const wasRejected = result.accepted === false
+    || result.status === 'rejected'
+    || result.actionType === 'no-action';
+  checks.push({
+    criterion: 'shouldReject',
+    pass: wasRejected,
+    detail: wasRejected ? 'Correctly rejected vague input' : 'Should have rejected but accepted',
+  });
+  return true;
+}
+function addTitleChecks(checks, result, expected) {
+  if (!expected.hasTitle) return;
+  const title = firstString(result.memoryRecord?.title, result.title);
+  checks.push({
+    criterion: 'hasTitle',
+    pass: title.length > 0,
+    detail: title ? `Title: "${title.slice(0, 60)}"` : 'Missing title',
+  });
+  addContainsChecks(checks, 'titleContains', 'Title', title, expected.titleContains);
+}
+function addContentChecks(checks, result, expected) {
+  if (!expected.hasContent) return;
+  const content = firstString(result.memoryRecord?.content, result.content);
+  checks.push({
+    criterion: 'hasContent',
+    pass: content.length > 0,
+    detail: content ? `Content length: ${content.length}` : 'Missing content',
+  });
+  addContainsChecks(checks, 'contentContains', 'Content', content, expected.contentContains);
+}
+function addCategoryChecks(checks, result, expected) {
+  if (expected.category) {
+    const category = firstString(result.memoryRecord?.category, result.category);
+    checks.push({
+      criterion: 'category',
+      pass: category === expected.category,
+      detail: `Expected "${expected.category}", got "${category}"`,
+    });
+  }
+  if (expected.importance) {
+    const importance = firstString(result.memoryRecord?.importance, result.importance);
+    checks.push({
+      criterion: 'importance',
+      pass: importance === expected.importance,
+      detail: `Expected "${expected.importance}", got "${importance}"`,
+    });
+  }
+}
+function addContextChecks(checks, result, expected) {
+  if (expected.hasDomain) {
+    const domain = firstString(result.richContext?.domain, result.domain);
+    checks.push({
+      criterion: 'domain',
+      pass: expected.domain ? domain === expected.domain : domain.length > 0,
+      detail: `Domain: "${domain}"`,
+    });
+  }
+  if (expected.hasOutcome) {
+    const outcome = firstString(result.richContext?.outcomeCategory, result.outcome);
+    checks.push({
+      criterion: 'hasOutcome',
+      pass: outcome.length > 0,
+      detail: `Outcome: "${outcome}"`,
+    });
+    addContainsChecks(checks, 'outcomeContains', 'Outcome', outcome, expected.outcomeContains);
+  }
+}
+function addRuleChecks(checks, result, expected) {
+  if (!expected.hasRule) return;
+  checks.push({
+    criterion: 'hasRule',
+    pass: result.generated === true || !!result.rule,
+    detail: result.generated ? 'Rule generated' : 'No rule generated',
+  });
+}
+function addSummaryChecks(checks, result, expected) {
+  if (!expected.hasSummary) return;
+  const summary = firstString(result.summary);
+  checks.push({
+    criterion: 'hasSummary',
+    pass: summary.length > 0,
+    detail: `Summary length: ${summary.length}`,
+  });
+  addContainsChecks(checks, 'summaryContains', 'Summary', summary, expected.summaryContains);
+}
+function gradeOutput(output, expected) {
+  const checks = [];
+  const result = output || {};
+  if (handleRejectExpectation(checks, result, expected)) return checks;
+  addTitleChecks(checks, result, expected);
+  addContentChecks(checks, result, expected);
+  addCategoryChecks(checks, result, expected);
+  addContextChecks(checks, result, expected);
+  addRuleChecks(checks, result, expected);
+  addSummaryChecks(checks, result, expected);
+  return checks;
+}
+// ---------------------------------------------------------------------------
+// Runner
+// ---------------------------------------------------------------------------
+function loadSuite(suitePath) {
+  const raw = JSON.parse(fs.readFileSync(suitePath, 'utf8'));
+  if (!Array.isArray(raw.evaluations) || raw.evaluations.length === 0) {
+    throw new Error('Suite must define a non-empty evaluations array');
+  }
+  return raw;
+}
+function runEvaluation(evalCase) {
+  const simulator = PROMPT_SIMULATORS[evalCase.prompt];
+  if (!simulator) {
+    return {
+      id: evalCase.id,
+      status: 'skip',
+      reason: `No simulator for prompt: ${evalCase.prompt}`,
+      checks: [],
+      score: 0,
+    };
+  }
+  let output;
+  let error = null;
+  try {
+    output = simulator(evalCase.input);
+  } catch (err) {
+    error = err.message || String(err);
+  }
+  if (error) {
+    return {
+      id: evalCase.id,
+      status: 'error',
+      error,
+      checks: [],
+      score: 0,
+    };
+  }
+  const checks = gradeOutput(output, evalCase.expectedOutput);
+  const passCount = checks.filter((c) => c.pass).length;
+  const score = checks.length > 0 ? Math.round((passCount / checks.length) * 100) : 0;
+  return {
+    id: evalCase.id,
+    status: score === 100 ? 'pass' : 'fail',
+    checks,
+    score,
+    passCount,
+    totalChecks: checks.length,
+  };
+}
+function runSuite(suitePath = DEFAULT_SUITE, options = {}) {
+  const suite = loadSuite(suitePath);
+  const results = [];
+  for (const evalCase of suite.evaluations) {
+    results.push(runEvaluation(evalCase));
+  }
+  const passed = results.filter((r) => r.status === 'pass').length;
+  const failed = results.filter((r) => r.status === 'fail').length;
+  const errors = results.filter((r) => r.status === 'error').length;
+  const skipped = results.filter((r) => r.status === 'skip').length;
+  const totalScore = results.length > 0
+    ? Math.round(results.reduce((s, r) => s + r.score, 0) / results.length)
+    : 0;
+  return {
+    suite: suite.name,
+    total: results.length,
+    passed,
+    failed,
+    errors,
+    skipped,
+    score: totalScore,
+    minScore: options.minScore || 80,
+    pass: totalScore >= (options.minScore || 80),
+    results,
+  };
+}
+// ---------------------------------------------------------------------------
+// CLI
+// ---------------------------------------------------------------------------
+function statusIcon(status) {
+  if (status === 'pass') return '\u2705';
+  if (status === 'skip') return '\u23ED';
+  return '\u274C';
+}
+function isCliInvocation() {
+  return Boolean(process.argv[1]) && path.resolve(process.argv[1]) === __filename;
+}
+if (isCliInvocation()) {
+  const args = process.argv.slice(2);
+  let suitePath = DEFAULT_SUITE;
+  let json = false;
+  let minScore = 80;
+  for (const arg of args) {
+    if (arg.startsWith('--suite=')) suitePath = path.resolve(arg.slice(8));
+    if (arg === '--json') json = true;
+    if (arg.startsWith('--min-score=')) minScore = Number(arg.slice(12));
+  }
+  const report = runSuite(suitePath, { minScore });
+  if (json) {
+    console.log(JSON.stringify(report, null, 2));
+  } else {
+    console.log(`\n${report.suite}`);
+    console.log('='.repeat(50));
+    for (const r of report.results) {
+      const icon = statusIcon(r.status);
+      console.log(`${icon} ${r.id} — ${r.score}% (${r.passCount || 0}/${r.totalChecks || 0})`);
+      if (r.status === 'fail' || r.status === 'error') {
+        for (const c of (r.checks || [])) {
+          if (!c.pass) console.log(`    \u274C ${c.criterion}: ${c.detail}`);
+        }
+        if (r.error) console.log(`    Error: ${r.error}`);
+      }
+    }
+    console.log('='.repeat(50));
+    console.log(`Score: ${report.score}% | Pass: ${report.passed} | Fail: ${report.failed} | Error: ${report.errors} | Skip: ${report.skipped}`);
+    console.log(report.pass ? '\u2705 PASS' : `\u274C FAIL (min: ${minScore}%)`);
+  }
+  process.exit(report.pass ? 0 : 1);
+}
+module.exports = { runSuite, runEvaluation, gradeOutput, loadSuite };

package/scripts/rate-limiter.js CHANGED Viewed

@@ -11,23 +11,37 @@ const {
 const USAGE_FILE = path.join(process.env.HOME || '/tmp', '.thumbgate', 'usage-limits.json');
+// ──────────────────────────────────────────────────────────
+// NEW: Lifetime caps on free tier — users hit the wall fast
+// and must upgrade to keep using core features.
+// ──────────────────────────────────────────────────────────
 const FREE_TIER_LIMITS = {
-  capture_feedback: { daily: 3, label: 'feedback captures' },
-  search_lessons: { daily: 5, label: 'lesson searches' },
-  search_thumbgate: { daily: 5, label: 'ThumbGate searches' },
-  commerce_recall: { daily: 5, label: 'commerce recalls' },
-  export_dpo: { daily: 0, label: 'DPO exports (Pro only)' },
-  export_databricks: { daily: 0, label: 'Databricks exports (Pro only)' },
+  capture_feedback:   { daily: Infinity, lifetime: 3,  label: 'feedback captures' },
+  prevention_rules:   { daily: Infinity, lifetime: 1,  label: 'prevention rules generated' },
+  recall:             { daily: 0,        lifetime: 0,  label: 'recall queries (Pro only)' },
+  search_lessons:     { daily: 0,        lifetime: 0,  label: 'lesson searches (Pro only)' },
+  search_thumbgate:   { daily: 0,        lifetime: 0,  label: 'ThumbGate searches (Pro only)' },
+  commerce_recall:    { daily: 0,        lifetime: 0,  label: 'commerce recalls (Pro only)' },
+  export_dpo:         { daily: 0,        lifetime: 0,  label: 'DPO exports (Pro only)' },
+  export_databricks:  { daily: 0,        lifetime: 0,  label: 'Databricks exports (Pro only)' },
+  construct_context_pack: { daily: Infinity, lifetime: 3, label: 'context packs' },
 };
-const FREE_TIER_MAX_GATES = 5;
+const FREE_TIER_MAX_GATES = 1; // Down from 5 — one auto-promoted gate, then paywall
-const UPGRADE_MESSAGE = `Pro: ${PRO_PRICE_LABEL} — dashboard and DPO export: ${PRO_MONTHLY_PAYMENT_LINK}\n  Team: ${TEAM_PRICE_LABEL} after workflow qualification.`;
+const UPGRADE_MESSAGE = `Pro: ${PRO_PRICE_LABEL} — unlimited captures, recall, prevention rules, and dashboard: ${PRO_MONTHLY_PAYMENT_LINK}\n  Team: ${TEAM_PRICE_LABEL} after workflow qualification.`;
+const PAYWALL_MESSAGES = {
+  capture_feedback: 'You\'ve used all 3 free feedback captures. Your agent is still making mistakes — upgrade to Pro to capture every one and build real prevention rules.',
+  prevention_rules: 'Free tier includes 1 prevention rule. Your agents need more protection — upgrade to Pro for unlimited rules.',
+  recall: 'Recall is a Pro feature. Your past feedback is stored locally — upgrade to search and reuse it.',
+  search_lessons: 'Lesson search is a Pro feature. Upgrade to find patterns in your agent\'s mistakes.',
+  default: 'This feature requires Pro. Start a 7-day free trial — no credit card required.',
+};
 function isProTier(authContext) {
   if (authContext && authContext.tier === 'pro') return true;
   if (process.env.THUMBGATE_API_KEY || process.env.THUMBGATE_PRO_MODE === '1' || process.env.THUMBGATE_NO_RATE_LIMIT === '1') return true;
-  // Also check license file for real customer Pro verification
   try {
     const { isProLicensed } = require('./license');
     if (isProLicensed()) return true;
@@ -62,38 +76,79 @@ function todayKey() {
 /**
  * Check and increment usage for a given action.
+ * Now enforces LIFETIME limits in addition to daily limits.
  * Returns { allowed: true } or { allowed: false, message: string }
  */
 function checkLimit(action, authContext) {
   if (isProTier(authContext)) return { allowed: true };
   const limitEntry = FREE_TIER_LIMITS[action];
-  if (limitEntry == null) return { allowed: true }; // no limit for this action
+  if (limitEntry == null) return { allowed: true };
   const dailyLimit = typeof limitEntry === 'object' ? limitEntry.daily : limitEntry;
+  const lifetimeLimit = typeof limitEntry === 'object' ? limitEntry.lifetime : Infinity;
   const usage = loadUsage();
   const today = todayKey();
-  // Reset if different day
+  // Reset daily counts if different day
   if (usage.date !== today) {
     usage.date = today;
     usage.counts = {};
   }
   usage.counts = usage.counts || {};
-  const current = usage.counts[action] || 0;
+  usage.lifetime = usage.lifetime || {};
+  const dailyCurrent = usage.counts[action] || 0;
+  const lifetimeCurrent = usage.lifetime[action] || 0;
+  // Check lifetime limit first (the hard wall)
+  if (lifetimeLimit !== Infinity && lifetimeCurrent >= lifetimeLimit) {
+    const paywallMsg = PAYWALL_MESSAGES[action] || PAYWALL_MESSAGES.default;
+    return {
+      allowed: false,
+      message: `${paywallMsg}\n\n${UPGRADE_MESSAGE}`,
+      used: lifetimeCurrent,
+      limit: lifetimeLimit,
+      limitType: 'lifetime',
+    };
+  }
-  if (current >= dailyLimit) {
-    return { allowed: false, message: `Free tier limit reached. Upgrade to Pro for unlimited: https://thumbgate-production.up.railway.app/pro\n${UPGRADE_MESSAGE}`, used: current, limit: dailyLimit };
+  // Check daily limit
+  if (dailyLimit !== Infinity && dailyCurrent >= dailyLimit) {
+    return {
+      allowed: false,
+      message: `Daily limit reached. ${UPGRADE_MESSAGE}`,
+      used: dailyCurrent,
+      limit: dailyLimit,
+      limitType: 'daily',
+    };
   }
-  // Increment
-  usage.counts[action] = current + 1;
+  // Increment both counters
+  usage.counts[action] = dailyCurrent + 1;
+  usage.lifetime[action] = lifetimeCurrent + 1;
   saveUsage(usage);
-  const used = current + 1;
-  return { allowed: true, used, limit: dailyLimit, remaining: dailyLimit - used };
+  const remaining = lifetimeLimit === Infinity
+    ? Infinity
+    : lifetimeLimit - (lifetimeCurrent + 1);
+  // Warn when approaching limit
+  const warningThreshold = lifetimeLimit <= 3 ? 1 : Math.ceil(lifetimeLimit * 0.2);
+  const isNearLimit = remaining <= warningThreshold && remaining > 0;
+  return {
+    allowed: true,
+    used: lifetimeCurrent + 1,
+    limit: lifetimeLimit,
+    remaining,
+    limitType: 'lifetime',
+    warning: isNearLimit
+      ? `${remaining} free ${limitEntry.label} remaining. Upgrade to Pro for unlimited.`
+      : undefined,
+  };
 }
 /**
@@ -103,14 +158,11 @@ function getUsage(action, authContext) {
   if (isProTier(authContext)) return { count: 0, limit: Infinity, remaining: Infinity };
   const limitEntry = FREE_TIER_LIMITS[action];
-  const dailyLimit = limitEntry == null ? Infinity : (typeof limitEntry === 'object' ? limitEntry.daily : limitEntry);
+  const lifetimeLimit = limitEntry == null ? Infinity : (typeof limitEntry === 'object' ? (limitEntry.lifetime ?? Infinity) : Infinity);
   const usage = loadUsage();
-  const today = todayKey();
-  if (usage.date !== today) return { count: 0, limit: dailyLimit, remaining: dailyLimit };
-  const count = (usage.counts || {})[action] || 0;
-  return { count, limit: dailyLimit, remaining: Math.max(0, dailyLimit - count) };
+  const lifetimeCount = (usage.lifetime || {})[action] || 0;
+  return { count: lifetimeCount, limit: lifetimeLimit, remaining: Math.max(0, lifetimeLimit - lifetimeCount) };
 }
 module.exports = {
@@ -123,5 +175,6 @@ module.exports = {
   FREE_TIER_LIMITS,
   FREE_TIER_MAX_GATES,
   UPGRADE_MESSAGE,
+  PAYWALL_MESSAGES,
   USAGE_FILE,
 };