npm - thumbgate - Versions diffs - 1.5.0 → 1.5.2 - Mend

thumbgate 1.5.0 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/.well-known/mcp/server-card.json +1 -1
package/CHANGELOG.md +504 -0
package/README.md +251 -223
package/adapters/README.md +1 -1
package/adapters/claude/.mcp.json +2 -2
package/adapters/codex/config.toml +4 -2
package/adapters/mcp/server-stdio.js +34 -3
package/adapters/opencode/opencode.json +1 -1
package/bench/prompt-eval-suite.json +106 -0
package/bin/cli.js +21 -8
package/bin/postinstall.js +25 -17
package/config/evals/agent-safety-eval.json +131 -0
package/config/github-about.json +5 -2
package/config/specs/agent-safety.json +79 -0
package/package.json +69 -29
package/public/compare.html +3 -3
package/public/dashboard.html +1399 -0
package/public/guide.html +2 -2
package/public/index.html +230 -98
package/scripts/auto-wire-hooks.js +77 -27
package/scripts/bot-detection.js +165 -0
package/scripts/cli-feedback.js +6 -2
package/scripts/commercial-offer.js +4 -4
package/scripts/dashboard.js +152 -2
package/scripts/decision-trace.js +354 -0
package/scripts/feedback-loop.js +4 -8
package/scripts/prompt-eval.js +363 -0
package/scripts/rate-limiter.js +77 -24
package/scripts/sales-pipeline.js +681 -0
package/scripts/session-episode-store.js +329 -0
package/scripts/session-health-sensor.js +242 -0
package/scripts/spec-gate.js +362 -0
package/scripts/statusline.sh +6 -9
package/skills/thumbgate/SKILL.md +1 -1
package/src/api/server.js +368 -12

package/scripts/session-episode-store.js ADDED Viewed

@@ -0,0 +1,329 @@
+#!/usr/bin/env node
+'use strict';
+/**
+ * Session Episode Store — episodic memory for agent sessions.
+ *
+ * Persists session health snapshots across conversations so the system
+ * learns cross-session degradation patterns:
+ *   - Which times of day produce degraded sessions
+ *   - Which task categories trigger repeat errors
+ *   - How long sessions last before degradation onset
+ *   - Whether feedback is actually reducing repeat mistakes over time
+ *
+ * This is the "episodic experience" layer described in the harnessed-agent
+ * framework (Memory = working context + semantic knowledge + episodic experience).
+ * The session-health-sensor provides the real-time signal; this module provides
+ * the longitudinal learning.
+ */
+const crypto = require('node:crypto');
+const path = require('node:path');
+const { readJsonl, appendJsonl } = require('./fs-utils');
+const { resolveFeedbackDir } = require('./feedback-paths');
+const {
+  computeSessionHealth,
+  loadRecentFeedback,
+} = require('./session-health-sensor');
+const EPISODE_FILE = 'session-episodes.jsonl';
+const PATTERN_WINDOW_EPISODES = 20;
+// ---------------------------------------------------------------------------
+// Episode Recording
+// ---------------------------------------------------------------------------
+function getEpisodePath({ feedbackDir } = {}) {
+  const dir = feedbackDir || resolveFeedbackDir();
+  return path.join(dir, EPISODE_FILE);
+}
+function buildEpisode({
+  sessionId = null,
+  health = null,
+  feedbackEntries = [],
+  tags = [],
+  durationMs = null,
+} = {}) {
+  const now = new Date();
+  const effectiveHealth = health || computeSessionHealth(feedbackEntries);
+  const negativeEntries = feedbackEntries.filter((e) => e.signal === 'negative');
+  const categories = extractCategories(feedbackEntries);
+  const errorFingerprints = extractErrorFingerprints(negativeEntries);
+  return {
+    sessionId: sessionId || `session_${Date.now()}_${crypto.randomBytes(4).toString('hex')}`,
+    recordedAt: now.toISOString(),
+    hourOfDay: now.getHours(),
+    dayOfWeek: now.getDay(),
+    score: effectiveHealth.score,
+    grade: effectiveHealth.grade,
+    signals: effectiveHealth.signals.map((s) => ({ signal: s.signal, severity: s.severity })),
+    recommendation: effectiveHealth.recommendation,
+    feedbackCount: feedbackEntries.length,
+    negativeCount: negativeEntries.length,
+    positiveCount: feedbackEntries.filter((e) => e.signal === 'positive').length,
+    categories,
+    errorFingerprints,
+    durationMs,
+    tags,
+  };
+}
+function recordEpisode(episode, options = {}) {
+  const episodePath = getEpisodePath(options);
+  appendJsonl(episodePath, episode);
+  return episode;
+}
+function captureAndRecordEpisode(options = {}) {
+  const feedbackEntries = loadRecentFeedback(options);
+  const episode = buildEpisode({
+    sessionId: options.sessionId,
+    feedbackEntries,
+    tags: options.tags || [],
+    durationMs: options.durationMs,
+  });
+  return recordEpisode(episode, options);
+}
+// ---------------------------------------------------------------------------
+// Episode Loading
+// ---------------------------------------------------------------------------
+function loadEpisodes(options = {}) {
+  return readJsonl(getEpisodePath(options));
+}
+function loadRecentEpisodes(count = PATTERN_WINDOW_EPISODES, options = {}) {
+  return readJsonl(getEpisodePath(options), { tail: true, maxLines: count });
+}
+// ---------------------------------------------------------------------------
+// Cross-Session Pattern Detection
+// ---------------------------------------------------------------------------
+function analyzeTimeOfDayPatterns(episodes) {
+  const byHour = new Map();
+  for (const ep of episodes) {
+    const hour = ep.hourOfDay;
+    if (hour === undefined || hour === null) continue;
+    const bucket = byHour.get(hour) || { total: 0, degraded: 0, critical: 0, totalScore: 0 };
+    bucket.total += 1;
+    bucket.totalScore += ep.score || 0;
+    if (ep.grade === 'degraded') bucket.degraded += 1;
+    if (ep.grade === 'critical') bucket.critical += 1;
+    byHour.set(hour, bucket);
+  }
+  const patterns = [];
+  for (const [hour, bucket] of byHour) {
+    if (bucket.total < 2) continue;
+    const failRate = (bucket.degraded + bucket.critical) / bucket.total;
+    const avgScore = Math.round(bucket.totalScore / bucket.total);
+    if (failRate > 0.5) {
+      patterns.push({
+        type: 'time_of_day_risk',
+        hour,
+        failRate: Math.round(failRate * 100),
+        avgScore,
+        sessions: bucket.total,
+        recommendation: `Sessions at ${formatHour(hour)} degrade ${Math.round(failRate * 100)}% of the time. Consider scheduling complex work at other hours.`,
+      });
+    }
+  }
+  return patterns.sort((a, b) => b.failRate - a.failRate);
+}
+function analyzeCategoryPatterns(episodes) {
+  const byCategory = new Map();
+  for (const ep of episodes) {
+    for (const cat of ep.categories || []) {
+      const bucket = byCategory.get(cat) || { total: 0, degraded: 0, totalScore: 0 };
+      bucket.total += 1;
+      bucket.totalScore += ep.score || 0;
+      if (ep.grade === 'degraded' || ep.grade === 'critical') bucket.degraded += 1;
+      byCategory.set(cat, bucket);
+    }
+  }
+  const patterns = [];
+  for (const [category, bucket] of byCategory) {
+    if (bucket.total < 2) continue;
+    const failRate = bucket.degraded / bucket.total;
+    const avgScore = Math.round(bucket.totalScore / bucket.total);
+    if (failRate > 0.4) {
+      patterns.push({
+        type: 'category_risk',
+        category,
+        failRate: Math.round(failRate * 100),
+        avgScore,
+        sessions: bucket.total,
+        recommendation: `"${category}" tasks degrade ${Math.round(failRate * 100)}% of sessions. Break these into smaller chunks or add prevention rules.`,
+      });
+    }
+  }
+  return patterns.sort((a, b) => b.failRate - a.failRate);
+}
+function analyzeRecurringErrors(episodes) {
+  const fingerprints = new Map();
+  for (const ep of episodes) {
+    for (const fp of ep.errorFingerprints || []) {
+      const count = (fingerprints.get(fp) || 0) + 1;
+      fingerprints.set(fp, count);
+    }
+  }
+  const patterns = [];
+  for (const [fingerprint, count] of fingerprints) {
+    if (count < 2) continue;
+    patterns.push({
+      type: 'recurring_error',
+      fingerprint,
+      occurrences: count,
+      recommendation: `Error "${fingerprint.slice(0, 80)}" has recurred across ${count} sessions. Promote to a prevention rule.`,
+    });
+  }
+  return patterns.sort((a, b) => b.occurrences - a.occurrences);
+}
+function analyzeFeedbackEffectiveness(episodes) {
+  if (episodes.length < 3) return null;
+  const recentHalf = episodes.slice(Math.floor(episodes.length / 2));
+  const olderHalf = episodes.slice(0, Math.floor(episodes.length / 2));
+  const avgRecent = average(recentHalf.map((e) => e.score || 0));
+  const avgOlder = average(olderHalf.map((e) => e.score || 0));
+  const recentRepeatRate = average(recentHalf.map((e) => (e.errorFingerprints || []).length));
+  const olderRepeatRate = average(olderHalf.map((e) => (e.errorFingerprints || []).length));
+  const scoreTrend = avgRecent - avgOlder;
+  const repeatTrend = recentRepeatRate - olderRepeatRate;
+  return {
+    type: 'feedback_effectiveness',
+    olderAvgScore: Math.round(avgOlder),
+    recentAvgScore: Math.round(avgRecent),
+    scoreTrend: Math.round(scoreTrend),
+    olderRepeatRate: round2(olderRepeatRate),
+    recentRepeatRate: round2(recentRepeatRate),
+    repeatTrend: round2(repeatTrend),
+    improving: scoreTrend > 0 && repeatTrend <= 0,
+    recommendation: scoreTrend > 0
+      ? `Session health is improving (${Math.round(avgOlder)} → ${Math.round(avgRecent)}). Feedback loop is working.`
+      : `Session health is declining (${Math.round(avgOlder)} → ${Math.round(avgRecent)}). Review prevention rules and consider a fresh context reset.`,
+  };
+}
+function analyzePatterns(episodes) {
+  const timePatterns = analyzeTimeOfDayPatterns(episodes);
+  const categoryPatterns = analyzeCategoryPatterns(episodes);
+  const recurringErrors = analyzeRecurringErrors(episodes);
+  const effectiveness = analyzeFeedbackEffectiveness(episodes);
+  return {
+    timeOfDay: timePatterns,
+    categories: categoryPatterns,
+    recurringErrors,
+    effectiveness,
+    episodesAnalyzed: episodes.length,
+    analyzedAt: new Date().toISOString(),
+  };
+}
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+function extractCategories(entries) {
+  const cats = new Set();
+  for (const entry of entries) {
+    if (Array.isArray(entry.tags)) {
+      for (const tag of entry.tags) cats.add(tag);
+    }
+    if (entry.richContext && entry.richContext.domain) {
+      cats.add(entry.richContext.domain);
+    }
+  }
+  return Array.from(cats).slice(0, 20);
+}
+function extractErrorFingerprints(negativeEntries) {
+  const fps = new Set();
+  for (const entry of negativeEntries) {
+    if (!entry.whatWentWrong) continue;
+    const fp = entry.whatWentWrong
+      .toLowerCase()
+      .replace(/\b(line|col|column)\s*\d+/g, '')
+      .replace(/\b\d+\b/g, 'N')
+      .replace(/\s+/g, ' ')
+      .trim()
+      .slice(0, 200);
+    if (fp) fps.add(fp);
+  }
+  return Array.from(fps).slice(0, 20);
+}
+function formatHour(hour) {
+  const h = hour % 12 || 12;
+  const ampm = hour < 12 ? 'AM' : 'PM';
+  return `${h}${ampm}`;
+}
+function average(nums) {
+  return nums.length > 0 ? nums.reduce((a, b) => a + b, 0) / nums.length : 0;
+}
+function round2(n) {
+  return Math.round(n * 100) / 100;
+}
+// ---------------------------------------------------------------------------
+// CLI
+// ---------------------------------------------------------------------------
+function isCliInvocation(argv = process.argv) {
+  const invokedPath = argv[1];
+  return invokedPath ? path.resolve(invokedPath) === __filename : false;
+}
+if (isCliInvocation()) {
+  const command = process.argv[2] || 'capture';
+  if (command === 'capture') {
+    const episode = captureAndRecordEpisode();
+    console.log(JSON.stringify(episode, null, 2));
+  } else if (command === 'patterns') {
+    const episodes = loadEpisodes();
+    const patterns = analyzePatterns(episodes);
+    console.log(JSON.stringify(patterns, null, 2));
+  } else if (command === 'history') {
+    const episodes = loadRecentEpisodes(20);
+    console.log(JSON.stringify(episodes, null, 2));
+  } else {
+    console.error(`Unknown command: ${command}. Use: capture, patterns, history`);
+    process.exit(1);
+  }
+}
+module.exports = {
+  EPISODE_FILE,
+  PATTERN_WINDOW_EPISODES,
+  analyzePatterns,
+  analyzeCategoryPatterns,
+  analyzeFeedbackEffectiveness,
+  analyzeRecurringErrors,
+  analyzeTimeOfDayPatterns,
+  buildEpisode,
+  captureAndRecordEpisode,
+  getEpisodePath,
+  loadEpisodes,
+  loadRecentEpisodes,
+  recordEpisode,
+};

package/scripts/session-health-sensor.js ADDED Viewed

@@ -0,0 +1,242 @@
+#!/usr/bin/env node
+'use strict';
+/**
+ * Session Health Sensor
+ *
+ * Detects real-time agent session degradation by analyzing feedback patterns,
+ * error recurrence, and context drift signals. Inspired by community research
+ * showing that "context rot" — not model quality — is the primary cause of
+ * perceived AI agent degradation on large projects.
+ *
+ * Signals tracked:
+ *   1. Repeat error rate — same error recurring within a session window
+ *   2. Negative feedback density — ratio of thumbs-down in recent window
+ *   3. Stagnation — consecutive negative signals without recovery
+ *   4. Context amnesia — feedback referencing "forgot", "again", "already told"
+ *
+ * Output: A session health score (0–100) and actionable degradation signals.
+ *
+ * Integration points:
+ *   - Thompson Sampling: feeds per-category reliability with session context
+ *   - Gates engine: health score can trigger "restart session" recommendation
+ *   - Self-heal: low health triggers diagnostic capture
+ */
+const path = require('node:path');
+const { readJsonl } = require('./fs-utils');
+const { resolveFeedbackDir } = require('./feedback-paths');
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+const SESSION_WINDOW_MS = 45 * 60 * 1000; // 45 minutes — aligned with community best practice
+const AMNESIA_PATTERNS = /\b(again|forgot|already told|repeated|same mistake|same error|keeps? (doing|making|breaking)|context (lost|drift|rot)|amnesia)\b/i;
+const STAGNATION_THRESHOLD = 4; // consecutive negatives without a positive
+const HEALTH_FLOOR = 0;
+const HEALTH_CEILING = 100;
+// ---------------------------------------------------------------------------
+// Data Loading
+// ---------------------------------------------------------------------------
+function loadRecentFeedback({ feedbackDir, windowMs = SESSION_WINDOW_MS, now = Date.now() } = {}) {
+  const dir = feedbackDir || resolveFeedbackDir();
+  const logPath = path.join(dir, 'feedback-log.jsonl');
+  const entries = readJsonl(logPath, { tail: true, maxLines: 200 });
+  const cutoff = now - windowMs;
+  return entries.filter((entry) => {
+    const ts = entry.timestamp ? new Date(entry.timestamp).getTime() : 0;
+    return Number.isFinite(ts) && ts >= cutoff;
+  });
+}
+// ---------------------------------------------------------------------------
+// Signal Detectors
+// ---------------------------------------------------------------------------
+function detectRepeatErrors(entries) {
+  const errorTexts = entries
+    .filter((e) => e.signal === 'negative' && e.whatWentWrong)
+    .map((e) => normalizeErrorText(e.whatWentWrong));
+  const seen = new Map();
+  let repeats = 0;
+  for (const text of errorTexts) {
+    const count = (seen.get(text) || 0) + 1;
+    seen.set(text, count);
+    if (count > 1) repeats += 1;
+  }
+  return {
+    signal: 'repeat_errors',
+    count: repeats,
+    total: errorTexts.length,
+    rate: errorTexts.length > 0 ? repeats / errorTexts.length : 0,
+    severity: repeats >= 3 ? 'critical' : repeats >= 1 ? 'warning' : 'healthy',
+  };
+}
+function detectNegativeDensity(entries) {
+  if (entries.length === 0) {
+    return { signal: 'negative_density', count: 0, total: 0, rate: 0, severity: 'healthy' };
+  }
+  const negatives = entries.filter((e) => e.signal === 'negative').length;
+  const rate = negatives / entries.length;
+  return {
+    signal: 'negative_density',
+    count: negatives,
+    total: entries.length,
+    rate,
+    severity: rate > 0.7 ? 'critical' : rate > 0.4 ? 'warning' : 'healthy',
+  };
+}
+function detectStagnation(entries) {
+  let maxConsecutiveNegatives = 0;
+  let current = 0;
+  for (const entry of entries) {
+    if (entry.signal === 'negative') {
+      current += 1;
+      maxConsecutiveNegatives = Math.max(maxConsecutiveNegatives, current);
+    } else {
+      current = 0;
+    }
+  }
+  return {
+    signal: 'stagnation',
+    consecutiveNegatives: maxConsecutiveNegatives,
+    threshold: STAGNATION_THRESHOLD,
+    severity: maxConsecutiveNegatives >= STAGNATION_THRESHOLD * 2 ? 'critical'
+      : maxConsecutiveNegatives >= STAGNATION_THRESHOLD ? 'warning'
+        : 'healthy',
+  };
+}
+function detectContextAmnesia(entries) {
+  const amnesiaEntries = entries.filter((e) => {
+    const text = [e.context, e.whatWentWrong, e.whatToChange].filter(Boolean).join(' ');
+    return AMNESIA_PATTERNS.test(text);
+  });
+  return {
+    signal: 'context_amnesia',
+    count: amnesiaEntries.length,
+    total: entries.length,
+    severity: amnesiaEntries.length >= 3 ? 'critical'
+      : amnesiaEntries.length >= 1 ? 'warning'
+        : 'healthy',
+  };
+}
+// ---------------------------------------------------------------------------
+// Health Score
+// ---------------------------------------------------------------------------
+const SEVERITY_WEIGHTS = { healthy: 0, warning: 15, critical: 30 };
+function computeSessionHealth(entries) {
+  const signals = [
+    detectRepeatErrors(entries),
+    detectNegativeDensity(entries),
+    detectStagnation(entries),
+    detectContextAmnesia(entries),
+  ];
+  let penalty = 0;
+  for (const signal of signals) {
+    penalty += SEVERITY_WEIGHTS[signal.severity] || 0;
+  }
+  // Extra penalty for high negative density rate
+  const density = signals.find((s) => s.signal === 'negative_density');
+  if (density && density.rate > 0) {
+    penalty += Math.round(density.rate * 20);
+  }
+  const score = Math.max(HEALTH_FLOOR, Math.min(HEALTH_CEILING, HEALTH_CEILING - penalty));
+  return {
+    score,
+    grade: score >= 80 ? 'healthy' : score >= 50 ? 'degraded' : 'critical',
+    signals,
+    recommendation: buildRecommendation(score, signals),
+    windowMs: SESSION_WINDOW_MS,
+    entriesAnalyzed: entries.length,
+    computedAt: new Date().toISOString(),
+  };
+}
+function buildRecommendation(score, signals) {
+  if (score >= 80) return null;
+  const critical = signals.filter((s) => s.severity === 'critical');
+  const parts = [];
+  if (critical.some((s) => s.signal === 'context_amnesia')) {
+    parts.push('Context drift detected. Start a fresh session with CLAUDE.md re-read.');
+  }
+  if (critical.some((s) => s.signal === 'repeat_errors')) {
+    parts.push('Same errors recurring. Capture feedback and promote to prevention rule.');
+  }
+  if (critical.some((s) => s.signal === 'stagnation')) {
+    parts.push('No recovery from failures. Break the task into smaller chunks or restart.');
+  }
+  if (score < 50 && parts.length === 0) {
+    parts.push('Session health is critically low. Consider starting a fresh conversation.');
+  }
+  return parts.length > 0 ? parts.join(' ') : 'Session showing mild degradation. Monitor closely.';
+}
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+function normalizeErrorText(text) {
+  if (!text) return '';
+  return text
+    .toLowerCase()
+    .replace(/\b(line|col|column)\s*\d+/g, '')
+    .replace(/\b\d+\b/g, 'N')
+    .replace(/\s+/g, ' ')
+    .trim()
+    .slice(0, 200);
+}
+// ---------------------------------------------------------------------------
+// CLI
+// ---------------------------------------------------------------------------
+function isCliInvocation(argv = process.argv) {
+  const invokedPath = argv[1];
+  return invokedPath ? path.resolve(invokedPath) === __filename : false;
+}
+if (isCliInvocation()) {
+  const entries = loadRecentFeedback();
+  const health = computeSessionHealth(entries);
+  console.log(JSON.stringify(health, null, 2));
+  if (health.grade === 'critical') process.exit(1);
+  if (health.grade === 'degraded') process.exit(2);
+}
+module.exports = {
+  AMNESIA_PATTERNS,
+  SESSION_WINDOW_MS,
+  STAGNATION_THRESHOLD,
+  computeSessionHealth,
+  detectContextAmnesia,
+  detectNegativeDensity,
+  detectRepeatErrors,
+  detectStagnation,
+  loadRecentFeedback,
+  normalizeErrorText,
+};