npm - thumbgate - Versions diffs - 1.27.6 → 1.27.8 - Mend

thumbgate 1.27.6 → 1.27.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

package/.claude/commands/thumbgate-blocked.md +27 -0
package/.claude/commands/thumbgate-doctor.md +30 -0
package/.claude/commands/thumbgate-guard.md +36 -0
package/.claude/commands/thumbgate-protect.md +30 -0
package/.claude/commands/thumbgate-rules.md +30 -0
package/.claude-plugin/plugin.json +1 -1
package/.well-known/llms.txt +6 -2
package/.well-known/mcp/server-card.json +1 -1
package/README.md +49 -5
package/adapters/claude/.mcp.json +2 -2
package/adapters/letta/README.md +41 -0
package/adapters/letta/thumbgate-letta-adapter.js +133 -0
package/adapters/mcp/server-stdio.js +16 -1
package/adapters/opencode/opencode.json +1 -1
package/adapters/policy-engine/ethicore-guardian-client.js +68 -0
package/adapters/policy-engine/thumbgate-policy-engine-adapter.js +260 -0
package/bench/observability-eval-suite.json +26 -0
package/bin/cli.js +180 -2
package/bin/postinstall.js +1 -1
package/config/gate-templates.json +84 -0
package/config/gates/claim-verification.json +6 -0
package/config/gates/default.json +20 -0
package/config/github-about.json +1 -1
package/config/model-candidates.json +50 -0
package/package.json +66 -25
package/public/agent-manager.html +41 -1
package/public/agents-cost-savings.html +1 -1
package/public/ai-malpractice-prevention.html +2 -1
package/public/assets/brand/github-social-preview.png +0 -0
package/public/assets/brand/thumbgate-icon-512.png +0 -0
package/public/assets/brand/thumbgate-icon-pro-512.png +0 -0
package/public/assets/brand/thumbgate-icon-team-512.png +0 -0
package/public/assets/brand/thumbgate-logo-1200x360.png +0 -0
package/public/assets/brand/thumbgate-mark-inline.svg +15 -0
package/public/assets/brand/thumbgate-mark-pro.svg +23 -0
package/public/assets/brand/thumbgate-mark-team.svg +26 -0
package/public/assets/brand/thumbgate-mark.svg +15 -0
package/public/assets/brand/thumbgate-wordmark.svg +20 -0
package/public/assets/claude-thumbgate-statusbar.svg +8 -0
package/public/assets/codex-thumbgate-statusbar-test.svg +9 -0
package/public/assets/legal-intake-control-flow.svg +66 -0
package/public/blog.html +1 -1
package/public/brand/thumbgate-mark.svg +15 -0
package/public/brand/thumbgate-og.svg +16 -0
package/public/codex-enterprise.html +1 -1
package/public/codex-plugin.html +1 -1
package/public/compare.html +23 -3
package/public/dashboard.html +312 -30
package/public/federal.html +1 -1
package/public/guide.html +5 -4
package/public/index.html +167 -49
package/public/js/buyer-intent.js +672 -0
package/public/learn.html +74 -7
package/public/lessons.html +2 -1
package/public/numbers.html +3 -3
package/public/pricing.html +63 -15
package/public/pro.html +7 -7
package/scripts/activation-quickstart.js +187 -0
package/scripts/agent-memory-lifecycle.js +211 -0
package/scripts/async-eval-observability.js +236 -0
package/scripts/auto-promote-gates.js +75 -4
package/scripts/build-metadata.js +24 -3
package/scripts/cli-schema.js +22 -0
package/scripts/dashboard-chat.js +2 -1
package/scripts/dashboard.js +8 -0
package/scripts/export-databricks-bundle.js +5 -1
package/scripts/export-dpo-pairs.js +7 -2
package/scripts/feedback-aggregate.js +281 -0
package/scripts/feedback-loop.js +34 -0
package/scripts/filesystem-search.js +35 -10
package/scripts/gates-engine.js +198 -6
package/scripts/gemini-embedding-policy.js +2 -1
package/scripts/hook-stop-anti-claim.js +227 -0
package/scripts/hook-thumbgate-cache-updater.js +18 -2
package/scripts/lesson-inference.js +8 -3
package/scripts/lesson-search.js +17 -1
package/scripts/operational-integrity.js +39 -5
package/scripts/plausible-domain-config.js +4 -2
package/scripts/rate-limiter.js +12 -6
package/scripts/secret-redaction.js +166 -0
package/scripts/security-scanner.js +100 -0
package/scripts/self-distill-agent.js +3 -1
package/scripts/self-harness-optimizer.js +141 -0
package/scripts/seo-gsd.js +635 -0
package/scripts/statusline-cache-path.js +17 -2
package/scripts/statusline-cache-read.js +57 -0
package/scripts/statusline-local-stats.js +9 -1
package/scripts/statusline-meta.js +5 -2
package/scripts/statusline.sh +13 -1
package/scripts/sync-telemetry-from-prod.js +374 -0
package/scripts/telemetry-analytics.js +9 -0
package/scripts/thumbgate-search.js +85 -19
package/scripts/tool-contract-validator.js +76 -0
package/scripts/vector-store.js +44 -0
package/scripts/workspace-evolver.js +62 -2
package/src/api/server.js +715 -86

package/scripts/agent-memory-lifecycle.js CHANGED Viewed

@@ -2,6 +2,40 @@
 'use strict';
 const MEMORY_TYPES = new Set(['episodic', 'semantic', 'procedural', 'preference', 'working']);
+const MEMORY_SCOPES = new Set(['task', 'session', 'user', 'project', 'org']);
+const HIGH_RISK_TERMS = new Set([
+  'billing',
+  'checkout',
+  'compliance',
+  'credential',
+  'data-loss',
+  'deploy',
+  'deployment',
+  'git',
+  'payment',
+  'production',
+  'release',
+  'secret',
+  'security',
+  'stripe',
+  'verification',
+]);
+const KNOWN_ENTITY_PATTERNS = [
+  ['Claude Code', /\bclaude\s+code\b/i, 'agent'],
+  ['Codex', /\bcodex\b/i, 'agent'],
+  ['Cursor', /\bcursor\b/i, 'agent'],
+  ['Gemini CLI', /\bgemini\s+cli\b/i, 'agent'],
+  ['MCP', /\bmcp\b/i, 'protocol'],
+  ['Stripe', /\bstripe\b/i, 'service'],
+  ['GitHub', /\bgithub\b|\bgh\s+/i, 'service'],
+  ['Railway', /\brailway\b/i, 'service'],
+  ['Plausible', /\bplausible\b/i, 'service'],
+  ['PostHog', /\bposthog\b/i, 'service'],
+  ['SQLite', /\bsqlite\b|\bfts5\b/i, 'storage'],
+  ['LanceDB', /\blancedb\b/i, 'storage'],
+  ['Docker', /\bdocker\b/i, 'runtime'],
+  ['npm', /\bnpm\b|\bnpx\b/i, 'runtime'],
+];
 function normalizeText(value) {
   if (value === undefined || value === null) return '';
@@ -13,6 +47,178 @@ function normalizeMemoryType(value) {
   return MEMORY_TYPES.has(normalized) ? normalized : 'episodic';
 }
+function tokenize(value) {
+  return normalizeText(value)
+    .toLowerCase()
+    .split(/[^a-z0-9_.:/-]+/)
+    .filter(Boolean);
+}
+function uniqueByName(entities) {
+  const seen = new Set();
+  return entities.filter((entity) => {
+    const key = normalizeText(entity.name).toLowerCase();
+    if (!key || seen.has(key)) return false;
+    seen.add(key);
+    return true;
+  });
+}
+function collectMemoryText(memory = {}) {
+  return [
+    memory.title,
+    memory.content,
+    memory.context,
+    memory.whatWentWrong,
+    memory.whatToChange,
+    memory.whatWorked,
+    memory.domain,
+    memory.skill,
+    Array.isArray(memory.tags) ? memory.tags.join(' ') : memory.tags,
+  ].filter(Boolean).join(' ');
+}
+function extractMemoryEntities(memory = {}) {
+  const text = collectMemoryText(memory);
+  const entities = [];
+  for (const [name, pattern, type] of KNOWN_ENTITY_PATTERNS) {
+    if (pattern.test(text)) entities.push({ name, type });
+  }
+  const commandMatches = text.match(/`([^`]+)`/g) || [];
+  for (const match of commandMatches) {
+    const command = match.slice(1, -1).trim();
+    if (/^(git|npm|npx|node|gh|curl|docker|python|pytest|stripe)\b/i.test(command)) {
+      entities.push({ name: command, type: 'command' });
+    } else if (/[./-]/.test(command)) {
+      entities.push({ name: command, type: 'path' });
+    }
+  }
+  const pathMatches = text.match(/\b(?:[a-z0-9_-]+\/)+[a-z0-9_.-]+\b/gi) || [];
+  for (const filePath of pathMatches.slice(0, 8)) {
+    entities.push({ name: filePath, type: 'path' });
+  }
+  return uniqueByName(entities).slice(0, 16);
+}
+function inferMemoryScope(memory = {}) {
+  const explicit = normalizeText(memory.scope || memory.memoryScope).toLowerCase();
+  if (MEMORY_SCOPES.has(explicit)) return explicit;
+  const text = collectMemoryText(memory).toLowerCase();
+  const tags = new Set(Array.isArray(memory.tags) ? memory.tags.map((tag) => normalizeText(tag).toLowerCase()) : []);
+  if (tags.has('preference') || /\b(prefer|style|tone|my preference|user preference)\b/.test(text)) return 'user';
+  if (tags.has('org') || tags.has('team') || /\b(enterprise|seat|team|shared|org|compliance|policy|approval)\b/.test(text)) return 'org';
+  if (tags.has('repo') || tags.has('project') || tags.has('release') || tags.has('deployment')
+    || /\b(repo|repository|branch|ci|pull request|github|deploy|production|release|publish)\b/.test(text)) return 'project';
+  if (tags.has('session') || /\b(this session|current session|today|right now)\b/.test(text)) return 'session';
+  return 'task';
+}
+function scoreMemoryDecay(memory = {}, options = {}) {
+  const nowMs = options.now ? new Date(options.now).getTime() : Date.now();
+  const timestampMs = memory.timestamp ? new Date(memory.timestamp).getTime() : NaN;
+  const ageDays = Number.isFinite(timestampMs)
+    ? Math.max(0, (nowMs - timestampMs) / (1000 * 60 * 60 * 24))
+    : null;
+  const textTokens = new Set(tokenize(collectMemoryText(memory)));
+  const tags = Array.isArray(memory.tags) ? memory.tags.map((tag) => normalizeText(tag).toLowerCase()) : [];
+  const highRisk = tags.some((tag) => HIGH_RISK_TERMS.has(tag))
+    || [...textTokens].some((token) => HIGH_RISK_TERMS.has(token))
+    || ['critical', 'high'].includes(normalizeText(memory.importance).toLowerCase());
+  if (highRisk) {
+    return {
+      state: 'sticky',
+      ageDays,
+      score: 1,
+      reason: 'high-risk memories stay retrievable until explicitly retired',
+    };
+  }
+  if (ageDays === null) {
+    return {
+      state: 'review',
+      ageDays,
+      score: 0.6,
+      reason: 'memory has no timestamp, so it needs review before durable promotion',
+    };
+  }
+  if (ageDays > 180) {
+    return {
+      state: 'archive_candidate',
+      ageDays,
+      score: 0.2,
+      reason: 'old low-risk memory should be consolidated or archived',
+    };
+  }
+  if (ageDays > 60) {
+    return {
+      state: 'review',
+      ageDays,
+      score: 0.55,
+      reason: 'older low-risk memory should be refreshed before it dominates recall',
+    };
+  }
+  return {
+    state: 'active',
+    ageDays,
+    score: 0.85,
+    reason: 'recent memory remains eligible for recall',
+  };
+}
+function scoreHybridMemoryMatch(query, memory = {}, options = {}) {
+  const queryTokens = new Set(tokenize(query));
+  const memoryTokens = new Set(tokenize(collectMemoryText(memory)));
+  const queryText = normalizeText(query).toLowerCase();
+  const memoryText = collectMemoryText(memory).toLowerCase();
+  const memoryEntities = extractMemoryEntities(memory);
+  const queryEntityNames = extractMemoryEntities({ content: query }).map((entity) => entity.name.toLowerCase());
+  let lexicalMatches = 0;
+  for (const token of queryTokens) {
+    if (memoryTokens.has(token)) lexicalMatches++;
+  }
+  const lexicalScore = queryTokens.size > 0 ? lexicalMatches / queryTokens.size : 0;
+  const phraseScore = queryText && memoryText.includes(queryText) ? 0.35 : 0;
+  const entityMatches = memoryEntities.filter((entity) => queryEntityNames.includes(entity.name.toLowerCase()));
+  const entityScore = queryEntityNames.length > 0 ? entityMatches.length / queryEntityNames.length : 0;
+  const decay = scoreMemoryDecay(memory, options);
+  const lifecycleScore = decay.state === 'archive_candidate' ? -0.15 : decay.state === 'sticky' ? 0.12 : 0;
+  const score = lexicalScore + phraseScore + (entityScore * 0.45) + lifecycleScore;
+  return {
+    score: Number(Math.max(0, score).toFixed(4)),
+    lexicalScore: Number(lexicalScore.toFixed(4)),
+    entityScore: Number(entityScore.toFixed(4)),
+    matchedEntities: entityMatches,
+    decayState: decay.state,
+  };
+}
+function buildMemoryLifecycleView(memory = {}, options = {}) {
+  const scope = inferMemoryScope(memory);
+  const entities = extractMemoryEntities(memory);
+  const decay = scoreMemoryDecay(memory, options);
+  const retrieval = scoreHybridMemoryMatch(options.query || '', memory, options);
+  return {
+    scope,
+    entities,
+    decay,
+    retrievalHints: {
+      hybridScore: retrieval.score,
+      lexicalScore: retrieval.lexicalScore,
+      entityScore: retrieval.entityScore,
+      matchedEntities: retrieval.matchedEntities,
+    },
+  };
+}
 function buildMemoryLifecyclePolicy(input = {}) {
   return {
     generatedAt: normalizeText(input.generatedAt) || new Date().toISOString(),
@@ -91,6 +297,11 @@ function evaluateMemoryPromotion(memory = {}, policy = buildMemoryLifecyclePolic
 module.exports = {
   buildMemoryLifecyclePolicy,
+  buildMemoryLifecycleView,
   evaluateMemoryPromotion,
+  extractMemoryEntities,
+  inferMemoryScope,
   normalizeMemoryType,
+  scoreHybridMemoryMatch,
+  scoreMemoryDecay,
 };

package/scripts/async-eval-observability.js ADDED Viewed

@@ -0,0 +1,236 @@
+#!/usr/bin/env node
+'use strict';
+const fs = require('node:fs');
+const path = require('node:path');
+const DEFAULT_THRESHOLDS = {
+  faithfulness: 0.72,
+  answerRelevance: 0.45,
+  contextPrecision: 0.5,
+};
+function tokenize(value) {
+  return String(value || '')
+    .toLowerCase()
+    .split(/[^a-z0-9]+/)
+    .filter((token) => token.length > 2);
+}
+function unique(values) {
+  return [...new Set(values.filter(Boolean))];
+}
+function overlapScore(left, right) {
+  const leftTokens = unique(tokenize(left));
+  const rightSet = new Set(tokenize(right));
+  if (leftTokens.length === 0) return 0;
+  const matches = leftTokens.filter((token) => rightSet.has(token)).length;
+  return matches / leftTokens.length;
+}
+function splitClaims(response) {
+  return String(response || '')
+    .split(/(?:[.!?]\s+|\n+)/)
+    .map((claim) => claim.trim())
+    .filter((claim) => claim.length > 0);
+}
+function normalizeContexts(contexts) {
+  if (Array.isArray(contexts)) return contexts.map(String).filter(Boolean);
+  if (contexts) return [String(contexts)];
+  return [];
+}
+function scoreFaithfulness(response, contexts) {
+  const claims = splitClaims(response);
+  const contextText = normalizeContexts(contexts).join('\n');
+  if (claims.length === 0) return { score: 0, supportedClaims: 0, totalClaims: 0 };
+  const supportedClaims = claims.filter((claim) => {
+    const normalized = claim.toLowerCase();
+    return contextText.toLowerCase().includes(normalized) || overlapScore(claim, contextText) >= 0.58;
+  }).length;
+  return {
+    score: Number((supportedClaims / claims.length).toFixed(4)),
+    supportedClaims,
+    totalClaims: claims.length,
+  };
+}
+function scoreAnswerRelevance(question, response) {
+  const score = overlapScore(question, response);
+  return {
+    score: Number(score.toFixed(4)),
+    matchedQuestionTerms: unique(tokenize(question).filter((token) => tokenize(response).includes(token))),
+  };
+}
+function scoreContextPrecision(question, contexts, reference = '') {
+  const normalizedContexts = normalizeContexts(contexts);
+  const target = [question, reference].filter(Boolean).join('\n');
+  if (normalizedContexts.length === 0) return { score: 0, relevantContexts: 0, totalContexts: 0 };
+  let precisionSum = 0;
+  let relevantContexts = 0;
+  normalizedContexts.forEach((context, index) => {
+    const relevant = overlapScore(target, context) >= 0.22 || overlapScore(context, target) >= 0.22;
+    if (relevant) relevantContexts += 1;
+    const precisionAtK = relevantContexts / (index + 1);
+    if (relevant) precisionSum += precisionAtK;
+  });
+  const score = relevantContexts === 0 ? 0 : precisionSum / relevantContexts;
+  return {
+    score: Number(score.toFixed(4)),
+    relevantContexts,
+    totalContexts: normalizedContexts.length,
+  };
+}
+function evaluateGeneration(testCase, options = {}) {
+  const thresholds = { ...DEFAULT_THRESHOLDS, ...(options.thresholds || {}) };
+  const contexts = normalizeContexts(testCase.retrievedContexts || testCase.contexts || testCase.retrieved_contexts);
+  const faithfulness = scoreFaithfulness(testCase.response || testCase.answer, contexts);
+  const answerRelevance = scoreAnswerRelevance(testCase.question || testCase.user_input, testCase.response || testCase.answer);
+  const contextPrecision = scoreContextPrecision(
+    testCase.question || testCase.user_input,
+    contexts,
+    testCase.reference || testCase.groundTruth || ''
+  );
+  const scores = {
+    faithfulness: faithfulness.score,
+    answerRelevance: answerRelevance.score,
+    contextPrecision: contextPrecision.score,
+  };
+  const passed = scores.faithfulness >= thresholds.faithfulness
+    && scores.answerRelevance >= thresholds.answerRelevance
+    && scores.contextPrecision >= thresholds.contextPrecision;
+  return {
+    id: String(testCase.id || testCase.traceId || 'case'),
+    traceId: String(testCase.traceId || testCase.id || ''),
+    passed,
+    scores,
+    thresholds,
+    details: {
+      faithfulness,
+      answerRelevance,
+      contextPrecision,
+    },
+  };
+}
+function buildRagasCompatibleRows(cases) {
+  return cases.map((testCase) => ({
+    user_input: testCase.question || testCase.user_input || '',
+    response: testCase.response || testCase.answer || '',
+    retrieved_contexts: normalizeContexts(testCase.retrievedContexts || testCase.contexts || testCase.retrieved_contexts),
+    reference: testCase.reference || testCase.groundTruth || '',
+  }));
+}
+function buildLangSmithCompatibleRuns(cases, results) {
+  return cases.map((testCase, index) => ({
+    id: testCase.traceId || testCase.id || `case-${index + 1}`,
+    name: 'thumbgate_async_rag_eval',
+    inputs: { question: testCase.question || testCase.user_input || '' },
+    outputs: { response: testCase.response || testCase.answer || '' },
+    metadata: {
+      evaluator: 'thumbgate-async-eval-observability',
+      caseId: testCase.id || null,
+    },
+    feedback: Object.entries(results[index].scores).map(([key, score]) => ({
+      key,
+      score,
+    })),
+  }));
+}
+function buildEvalReport(cases, options = {}) {
+  const normalizedCases = Array.isArray(cases) ? cases : [];
+  const results = normalizedCases.map((testCase) => evaluateGeneration(testCase, options));
+  const passed = results.filter((result) => result.passed).length;
+  const failed = results.length - passed;
+  const aggregate = {
+    faithfulness: average(results.map((result) => result.scores.faithfulness)),
+    answerRelevance: average(results.map((result) => result.scores.answerRelevance)),
+    contextPrecision: average(results.map((result) => result.scores.contextPrecision)),
+  };
+  return {
+    generatedAt: new Date().toISOString(),
+    mode: 'async-post-generation',
+    total: results.length,
+    passed,
+    failed,
+    passRate: results.length === 0 ? 0 : Number(((passed / results.length) * 100).toFixed(2)),
+    aggregate,
+    passedThreshold: failed === 0,
+    metrics: ['faithfulness', 'answerRelevance', 'contextPrecision'],
+    sinks: {
+      ci: true,
+      langsmithCompatible: true,
+      ragasCompatible: true,
+    },
+    results,
+    ragasDataset: buildRagasCompatibleRows(normalizedCases),
+    langsmithRuns: buildLangSmithCompatibleRuns(normalizedCases, results),
+  };
+}
+function average(values) {
+  const numeric = values.filter((value) => Number.isFinite(value));
+  if (numeric.length === 0) return 0;
+  return Number((numeric.reduce((sum, value) => sum + value, 0) / numeric.length).toFixed(4));
+}
+async function runAsyncEvaluation(cases, options = {}) {
+  const report = await new Promise((resolve) => {
+    setImmediate(() => resolve(buildEvalReport(cases, options)));
+  });
+  if (options.outputPath) {
+    fs.mkdirSync(path.dirname(options.outputPath), { recursive: true });
+    fs.writeFileSync(options.outputPath, `${JSON.stringify(report, null, 2)}\n`);
+  }
+  return report;
+}
+function loadCases(inputPath) {
+  const payload = JSON.parse(fs.readFileSync(inputPath, 'utf8'));
+  return Array.isArray(payload) ? payload : payload.cases || [];
+}
+async function main(argv = process.argv.slice(2)) {
+  const inputIndex = argv.indexOf('--input');
+  const outputIndex = argv.indexOf('--output');
+  const inputPath = inputIndex >= 0 ? argv[inputIndex + 1] : 'bench/observability-eval-suite.json';
+  const outputPath = outputIndex >= 0 ? argv[outputIndex + 1] : 'proof/async-eval-observability-report.json';
+  const report = await runAsyncEvaluation(loadCases(inputPath), { outputPath });
+  process.stdout.write(`${JSON.stringify({
+    outputPath,
+    total: report.total,
+    passed: report.passed,
+    failed: report.failed,
+    passRate: report.passRate,
+  }, null, 2)}\n`);
+  if (!report.passedThreshold) process.exitCode = 1;
+}
+module.exports = {
+  DEFAULT_THRESHOLDS,
+  buildEvalReport,
+  buildLangSmithCompatibleRuns,
+  buildRagasCompatibleRows,
+  evaluateGeneration,
+  runAsyncEvaluation,
+  scoreAnswerRelevance,
+  scoreContextPrecision,
+  scoreFaithfulness,
+};
+if (require.main === module) {
+  main().catch((err) => {
+    console.error(err.stack || err.message);
+    process.exitCode = 1;
+  });
+}

package/scripts/auto-promote-gates.js CHANGED Viewed

@@ -58,6 +58,47 @@ function readJSONL(filePath) {
   }).filter(Boolean);
 }
+// --- Self-Harness stage 3: regression-gated promotion -----------------------
+// Inspired by "Self-Harness: Harnesses That Improve Themselves" (arXiv 2606.09498).
+// Stages 1-2 (weakness mining -> rule extraction) already exist via lesson
+// inference + this promoter. Stage 3 — accept a harness change only after
+// regression-testing it does not degrade behavior — was missing: a noisy 3x
+// capture could hard-block an over-broad pattern with no check that it wouldn't
+// have wrongly blocked actions that were previously ALLOWED. This replays a
+// candidate BLOCK rule against the audit trail's prior `allow` decisions; if it
+// would have blocked safe actions, the caller quarantines it to `warn` instead.
+const REGRESSION_FALSE_BLOCK_LIMIT = 0; // any prior safe action it would block => quarantine
+function getAuditTrailPath() {
+  return path.join(path.dirname(getFeedbackLogPath()), 'audit-trail.jsonl');
+}
+// Returns { falseBlocks, allowSampleSize } or null when there is no history /
+// matcher available — in which case the caller promotes as usual (fail-open to
+// existing behavior, since regression gating is an enhancement, not a hard gate).
+function regressionCheck(gate, options = {}) {
+  const auditPath = options.auditTrailPath || getAuditTrailPath();
+  const entries = readJSONL(auditPath);
+  if (!entries.length) return null;
+  // Lazy-require to avoid the gates-engine <-> auto-promote-gates require cycle.
+  let matchesGate;
+  try { ({ matchesGate } = require('./gates-engine')); } catch { return null; }
+  if (typeof matchesGate !== 'function') return null;
+  const allowed = entries.filter((e) => e && e.decision === 'allow' && e.toolName);
+  if (!allowed.length) return null;
+  let falseBlocks = 0;
+  for (const e of allowed) {
+    try {
+      if (matchesGate(gate, e.toolName, e.toolInput || {})) falseBlocks += 1;
+    } catch { /* a bad pattern/entry never counts as a false block */ }
+  }
+  return { falseBlocks, allowSampleSize: allowed.length };
+}
+function safeRegressionCheck(gate, options) {
+  try { return regressionCheck(gate, options); } catch { return null; }
+}
 function loadAutoGates() {
   const autoGatesPath = getAutoGatesPath();
   if (!fs.existsSync(autoGatesPath)) {
@@ -358,9 +399,16 @@ function promote(feedbackLogPath, options) {
       const existing = data.gates[existingIdx];
       const newAction = group.count >= BLOCK_THRESHOLD ? 'block' : 'warn';
       if (existing.action !== newAction && newAction === 'block') {
-        // Upgrade from warn to block
-        data.gates[existingIdx] = { ...existing, action: 'block', severity: 'critical', occurrences: group.count, upgradedAt: new Date().toISOString() };
-        promotions.push({ type: 'upgrade', gateId, from: existing.action, to: 'block', occurrences: group.count });
+        // Self-Harness stage 3: regression-test before upgrading warn -> block.
+        const regression = opts.skipRegression ? null : safeRegressionCheck(buildGateRule(group, 'block'), opts);
+        if (regression && regression.falseBlocks > REGRESSION_FALSE_BLOCK_LIMIT) {
+          // Would block prior safe actions — hold at warn instead of upgrading.
+          promotions.push({ type: 'upgrade-quarantined', gateId, from: existing.action, occurrences: group.count, falseBlocks: regression.falseBlocks });
+        } else {
+          // Upgrade from warn to block
+          data.gates[existingIdx] = { ...existing, action: 'block', severity: 'critical', occurrences: group.count, upgradedAt: new Date().toISOString() };
+          promotions.push({ type: 'upgrade', gateId, from: existing.action, to: 'block', occurrences: group.count });
+        }
       }
       // Update occurrence count even if no action change
       data.gates[existingIdx].occurrences = group.count;
@@ -370,6 +418,20 @@ function promote(feedbackLogPath, options) {
     // New gate — respect explicit gateAction override (e.g. 'approve' for human-approval rules)
     const gate = buildGateRule(group, opts.gateAction);
+    // Self-Harness stage 3: before a feedback rule goes live as a hard block,
+    // regression-test it against prior allowed actions. If it would have blocked
+    // safe actions, quarantine it to `warn` instead of `block`.
+    let regression = null;
+    if (gate.action === 'block' && !opts.gateAction && !opts.skipRegression) {
+      regression = safeRegressionCheck(gate, opts);
+      if (regression && regression.falseBlocks > REGRESSION_FALSE_BLOCK_LIMIT) {
+        gate.action = 'warn';
+        gate.severity = 'medium';
+        gate.quarantined = true;
+        gate.regression = regression;
+      }
+    }
     // Enforce max limit — rotate oldest
     if (data.gates.length >= MAX_AUTO_GATES) {
       const removed = data.gates.shift();
@@ -377,7 +439,13 @@ function promote(feedbackLogPath, options) {
     }
     data.gates.push(gate);
-    promotions.push({ type: 'new', gateId: gate.id, action: gate.action, occurrences: group.count });
+    promotions.push({
+      type: gate.quarantined ? 'new-quarantined' : 'new',
+      gateId: gate.id,
+      action: gate.action,
+      occurrences: group.count,
+      ...(gate.quarantined ? { falseBlocks: regression.falseBlocks, allowSampleSize: regression.allowSampleSize } : {}),
+    });
   }
   // Log promotions
@@ -438,6 +506,9 @@ module.exports = {
   groupNegativeFeedback,
   patternToGateId,
   buildGateRule,
+  regressionCheck,
+  getAuditTrailPath,
+  REGRESSION_FALSE_BLOCK_LIMIT,
   extractPatternKey,
   normalizeCommandSignature,
   isNegative,

package/scripts/build-metadata.js CHANGED Viewed

@@ -5,6 +5,11 @@ const PROJECT_ROOT = path.resolve(__dirname, '..');
 const DEFAULT_BUILD_METADATA_PATH = path.join(PROJECT_ROOT, 'config', 'build-metadata.json');
 const BUILD_SHA_ENV_KEY = 'THUMBGATE_BUILD_SHA';
 const BUILD_GENERATED_AT_ENV_KEY = 'THUMBGATE_BUILD_GENERATED_AT';
+// Railway injects this automatically for GitHub-connected deployments: the git
+// SHA of the commit that triggered the deploy. It is the ground truth for what
+// code is actually live, and unlike THUMBGATE_BUILD_SHA it cannot drift (Railway
+// sets it per deploy). https://docs.railway.com/reference/variables
+const RAILWAY_GIT_COMMIT_SHA_ENV_KEY = 'RAILWAY_GIT_COMMIT_SHA';
 function normalizeNullableText(value) {
   if (typeof value !== 'string') {
@@ -28,6 +33,7 @@ function resolveBuildMetadata({ env = process.env, filePath } = {}) {
     normalizeNullableText(env.THUMBGATE_BUILD_METADATA_PATH) ||
     DEFAULT_BUILD_METADATA_PATH;
   const envBuildSha = normalizeNullableText(env[BUILD_SHA_ENV_KEY]);
+  const railwayGitSha = normalizeNullableText(env[RAILWAY_GIT_COMMIT_SHA_ENV_KEY]);
   const envGeneratedAt = normalizeNullableText(env[BUILD_GENERATED_AT_ENV_KEY]);
   let fileBuildSha = null;
@@ -48,9 +54,23 @@ function resolveBuildMetadata({ env = process.env, filePath } = {}) {
     };
   }
-  // No SHA in the file — fall back to env only if an explicit SHA is set.
-  // (Previously a bare GENERATED_AT with no SHA could short-circuit and return
-  // { buildSha: null }, losing both signals; now we require the SHA.)
+  // No SHA baked into the image. Prefer Railway's own per-deploy commit SHA over
+  // THUMBGATE_BUILD_SHA: the latter is set out-of-band by the deploy workflow and
+  // has drifted in prod (stuck reporting an old commit while newer code was live,
+  // because RAILWAY_SYNC_VARIABLES is off and `railway up` stamping is unreliable).
+  // RAILWAY_GIT_COMMIT_SHA is injected by Railway per deploy, so it always matches
+  // the code actually serving traffic on a GitHub-connected service.
+  if (railwayGitSha) {
+    return {
+      path: resolvedPath,
+      buildSha: railwayGitSha,
+      generatedAt: envGeneratedAt,
+    };
+  }
+  // Last resort: the workflow-managed env var. Only trust it when an explicit SHA
+  // is set. (Previously a bare GENERATED_AT with no SHA could short-circuit and
+  // return { buildSha: null }, losing both signals; now we require the SHA.)
   if (envBuildSha) {
     return {
       path: resolvedPath,
@@ -124,6 +144,7 @@ if (require.main === module) {
 module.exports = {
   BUILD_GENERATED_AT_ENV_KEY,
   BUILD_SHA_ENV_KEY,
+  RAILWAY_GIT_COMMIT_SHA_ENV_KEY,
   DEFAULT_BUILD_METADATA_PATH,
   resolveBuildMetadata,
   writeBuildMetadataFile,

package/scripts/cli-schema.js CHANGED Viewed

@@ -505,6 +505,12 @@ const CLI_COMMANDS = [
     group: 'gates',
     flags: [],
   },
+  {
+    name: 'hermes-gate',
+    description: 'Hermes Agent pre_tool_call hook: gate runtime tool calls (incl. skill_manage) before they run',
+    group: 'gates',
+    flags: [],
+  },
   {
     name: 'force-gate',
     description: 'Immediately create a blocking gate from a pattern string',
@@ -650,6 +656,22 @@ const CLI_COMMANDS = [
       { name: 'json',        type: 'boolean', description: 'Output results as JSON' },
     ],
   },
+  {
+    name: 'check-update',
+    aliases: ['upgrade-check'],
+    description: 'Check for newer versions of ThumbGate from npm or GitHub',
+    group: 'ops',
+    flags: [
+      { name: 'json', type: 'boolean', description: 'Output results as JSON' },
+    ],
+  },
+  {
+    name: 'self-update',
+    aliases: ['upgrade-cli'],
+    description: 'Automatically install the latest version of ThumbGate globally',
+    group: 'ops',
+    flags: [],
+  },
 ];
 /**

package/scripts/dashboard-chat.js CHANGED Viewed

@@ -317,7 +317,8 @@ async function answerDataQuestion(question, opts = {}) {
     if (isPerplexity) return await callPerplexityEndpoint({ apiKey, prompt, fetchImpl, sources });
     return await callGeminiEndpoint({ apiKey, model, prompt, fetchImpl, sources });
   } catch (err) {
-    return { ok: false, error: 'network', message: err?.message || String(err), sources };
+    const safeMessage = (err && err.message) ? String(err.message).split('\n')[0].slice(0, 100) : 'An unexpected error occurred.';
+    return { ok: false, error: 'network', message: safeMessage, sources };
   }
 }