npm - thumbgate - Versions diffs - 1.2.0 → 1.3.0 - Mend

thumbgate 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/.claude-plugin/README.md +4 -4
package/.claude-plugin/marketplace.json +1 -1
package/.claude-plugin/plugin.json +1 -1
package/.well-known/mcp/server-card.json +1 -1
package/README.md +35 -14
package/adapters/README.md +1 -1
package/adapters/claude/.mcp.json +2 -2
package/adapters/codex/config.toml +2 -2
package/adapters/mcp/server-stdio.js +2 -2
package/adapters/opencode/opencode.json +1 -1
package/bin/cli.js +20 -11
package/config/github-about.json +1 -1
package/config/model-tiers.json +11 -0
package/package.json +8 -6
package/plugins/claude-codex-bridge/.claude-plugin/plugin.json +1 -1
package/plugins/claude-codex-bridge/.mcp.json +1 -1
package/plugins/codex-profile/.codex-plugin/plugin.json +1 -1
package/plugins/codex-profile/.mcp.json +1 -1
package/plugins/codex-profile/INSTALL.md +1 -1
package/plugins/codex-profile/README.md +1 -1
package/plugins/cursor-marketplace/.cursor-plugin/plugin.json +1 -1
package/plugins/cursor-marketplace/README.md +2 -2
package/plugins/cursor-marketplace/commands/capture-feedback.md +2 -2
package/plugins/cursor-marketplace/rules/feedback-capture.mdc +3 -3
package/plugins/cursor-marketplace/skills/capture-feedback/SKILL.md +3 -2
package/plugins/opencode-profile/INSTALL.md +1 -1
package/public/compare.html +4 -4
package/public/guide.html +4 -4
package/public/index.html +51 -38
package/public/learn/ai-agent-persistent-memory.html +1 -0
package/public/lessons.html +325 -17
package/scripts/__pycache__/train_from_feedback.cpython-312.pyc +0 -0
package/scripts/audit-trail.js +6 -0
package/scripts/capture-railway-diagnostics.sh +97 -0
package/scripts/check-congruence.js +1 -1
package/scripts/claude-feedback-sync.js +320 -0
package/scripts/cli-telemetry.js +4 -1
package/scripts/contextfs.js +32 -23
package/scripts/dashboard.js +84 -0
package/scripts/feedback-loop.js +16 -0
package/scripts/intervention-policy.js +696 -0
package/scripts/local-model-profile.js +18 -2
package/scripts/model-tier-router.js +10 -1
package/scripts/operational-integrity.js +354 -31
package/scripts/prove-adapters.js +1 -0
package/scripts/prove-automation.js +2 -2
package/scripts/prove-packaged-runtime.js +260 -0
package/scripts/prove-runtime.js +13 -0
package/scripts/rate-limiter.js +3 -3
package/scripts/statusline-local-stats.js +2 -0
package/scripts/statusline.sh +166 -11
package/scripts/tool-registry.js +2 -2
package/scripts/workflow-sentinel.js +114 -4
package/skills/thumbgate/SKILL.md +1 -1

package/scripts/workflow-sentinel.js CHANGED Viewed

@@ -16,6 +16,7 @@ const {
 } = require('./operational-integrity');
 const { buildDockerSandboxPlan } = require('./docker-sandbox-planner');
 const { evaluatePretool } = require('./hybrid-feedback-context');
+const { getInterventionRecommendation } = require('./intervention-policy');
 const GOVERNANCE_STATE_PATH = path.join(process.env.HOME || '/tmp', '.thumbgate', 'governance-state.json');
 const DEFAULT_PROTECTED_FILE_GLOBS = [
@@ -387,6 +388,7 @@ function scoreRisk({
   affectedFiles,
   integrity,
   memoryGuard,
+  learnedPolicy,
   blastRadius,
   taskScopeViolation,
   protectedSurface,
@@ -472,6 +474,43 @@ function scoreRisk({
       { mode: memoryGuard.mode }
     );
   }
+  if (learnedPolicy && learnedPolicy.enabled && learnedPolicy.prediction) {
+    const confidence = learnedPolicy.prediction.confidence || 0;
+    const label = learnedPolicy.prediction.label;
+    if (label === 'deny' && confidence >= 0.6) {
+      addDriver(
+        drivers,
+        'learned_policy_deny',
+        Math.min(0.26, 0.16 + (confidence * 0.12)),
+        'Learned intervention policy predicts a deny-worthy failure pattern.',
+        { confidence, label }
+      );
+    } else if (label === 'warn' && confidence >= 0.3) {
+      addDriver(
+        drivers,
+        'learned_policy_warn',
+        Math.min(0.18, 0.1 + (confidence * 0.08)),
+        'Learned intervention policy predicts elevated execution risk.',
+        { confidence, label }
+      );
+    } else if (label === 'verify' && confidence >= 0.3) {
+      addDriver(
+        drivers,
+        'learned_policy_verify',
+        Math.min(0.16, 0.08 + (confidence * 0.06)),
+        'Learned intervention policy predicts a verification gap before close-out.',
+        { confidence, label }
+      );
+    } else if (label === 'recall' && confidence >= 0.3) {
+      addDriver(
+        drivers,
+        'learned_policy_recall',
+        Math.min(0.14, 0.06 + (confidence * 0.05)),
+        'Learned intervention policy predicts prior lessons are needed before execution.',
+        { confidence, label }
+      );
+    }
+  }
   const score = Math.min(1, drivers.reduce((sum, driver) => sum + driver.weight, 0));
   return {
@@ -492,6 +531,7 @@ function scoreRisk({
 function buildEvidence({
   integrity,
   memoryGuard,
+  learnedPolicy,
   blastRadius,
   taskScopeViolation,
   protectedSurface,
@@ -500,6 +540,16 @@ function buildEvidence({
   if (memoryGuard && memoryGuard.mode && memoryGuard.mode !== 'allow') {
     evidence.push(`Memory guard predicted ${memoryGuard.mode}: ${memoryGuard.reason}`);
   }
+  if (learnedPolicy && learnedPolicy.enabled && learnedPolicy.prediction) {
+    const topTokens = Array.isArray(learnedPolicy.topTokens)
+      ? learnedPolicy.topTokens.map((entry) => entry.token).slice(0, 3)
+      : [];
+    evidence.push(
+      `Learned policy predicted ${learnedPolicy.prediction.label} (${Math.round((learnedPolicy.prediction.confidence || 0) * 100)}% confidence)`
+      + (topTokens.length ? ` from ${topTokens.join(', ')}` : '')
+      + '.'
+    );
+  }
   if (taskScopeViolation) {
     evidence.push(
       taskScopeViolation.reasonCode === 'missing_task_scope'
@@ -575,6 +625,7 @@ function buildRemediations({
   protectedSurface,
   blastRadius,
   memoryGuard,
+  learnedPolicy,
   executionSurface,
 }) {
   const remediations = [];
@@ -611,6 +662,24 @@ function buildRemediations({
       'The system already has evidence that this action pattern failed before.'
     );
   }
+  if (learnedPolicy && learnedPolicy.enabled && learnedPolicy.prediction) {
+    if (learnedPolicy.prediction.label === 'verify' && learnedPolicy.prediction.confidence >= 0.3) {
+      push(
+        'verify_before_closeout',
+        'Raise verification before claiming success',
+        'Run the relevant proof or test command and confirm the exact output before retrying or closing out.',
+        'The learned policy predicts this path tends to fail at verification time.'
+      );
+    }
+    if (learnedPolicy.prediction.label === 'recall' && learnedPolicy.prediction.confidence >= 0.3) {
+      push(
+        'retrieve_lessons',
+        'Inspect prior lessons',
+        'Call retrieve_lessons or search_lessons for this tool context before retrying.',
+        'The learned policy predicts this action needs prior lessons and corrective context.'
+      );
+    }
+  }
   if (blastRadius.fileCount >= 4 || blastRadius.surfaceCount >= 3) {
     push(
       'split_blast_radius',
@@ -636,6 +705,11 @@ function buildReasoning(report) {
     `Workflow sentinel risk ${report.band} (${report.riskScore}) for ${report.toolName}.`,
     `Blast radius: ${report.blastRadius.summary}.`,
   ];
+  if (report.learnedPolicy && report.learnedPolicy.enabled && report.learnedPolicy.prediction) {
+    lines.push(
+      `Learned policy predicted ${report.learnedPolicy.prediction.label} (${report.learnedPolicy.prediction.confidence}).`
+    );
+  }
   if (report.executionSurface?.shouldSandbox) {
     lines.push(`Execution surface: ${report.executionSurface.summary}`);
   }
@@ -658,15 +732,32 @@ function getSentinelActionType(toolName) {
   return '';
 }
-function chooseDecision({ riskScore, integrity, memoryGuard, blastRadius, command }) {
+function chooseDecision({ riskScore, integrity, memoryGuard, learnedPolicy, blastRadius, command }) {
   const hasOperationalBlockers = Boolean(integrity && Array.isArray(integrity.blockers) && integrity.blockers.length > 0);
   const destructiveBypass = /\bgit\s+push\b.*(?:--force|-f)\b/i.test(command) || /\bgh\s+pr\s+merge\b.*--admin\b/i.test(command);
+  const learnedPrediction = learnedPolicy && learnedPolicy.enabled ? learnedPolicy.prediction : null;
+  const learnedHardStop = Boolean(
+    learnedPrediction
+      && learnedPrediction.label === 'deny'
+      && learnedPrediction.confidence >= 0.7
+  );
+  const learnedWarning = Boolean(
+    learnedPrediction
+      && ['warn', 'verify', 'deny'].includes(learnedPrediction.label)
+      && learnedPrediction.confidence >= 0.3
+  );
+  const learnedRecall = Boolean(
+    learnedPrediction
+      && learnedPrediction.label === 'recall'
+      && learnedPrediction.confidence >= 0.3
+  );
   const lowBlastRadius = blastRadius.fileCount <= 1
     && blastRadius.surfaceCount <= 1
     && blastRadius.releaseSensitiveFiles.length === 0
     && blastRadius.unapprovedProtectedFiles === 0;
   const lowRiskHandoff = /\bgit\s+push\b|\bgh\s+pr\s+(?:create|merge)\b/i.test(command)
     && !destructiveBypass
+    && !learnedHardStop
     && lowBlastRadius
     && !hasOperationalBlockers
     && memoryGuard
@@ -686,10 +777,10 @@ function chooseDecision({ riskScore, integrity, memoryGuard, blastRadius, comman
   if (lowRiskHandoff) {
     return 'allow';
   }
-  if (destructiveBypass || repeatedHighBlast || (hasOperationalBlockers && riskScore >= 0.72) || riskScore >= 0.86) {
+  if (destructiveBypass || learnedHardStop || repeatedHighBlast || (hasOperationalBlockers && riskScore >= 0.72) || riskScore >= 0.86) {
     return 'deny';
   }
-  if (riskScore >= 0.45) {
+  if (riskScore >= 0.45 || (learnedWarning && riskScore >= 0.3) || (learnedRecall && riskScore >= 0.34)) {
     return 'warn';
   }
   return 'allow';
@@ -732,6 +823,20 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
     affectedFiles,
   }), options.feedbackOptions || {});
   const memoryGuard = normalizeMemoryGuardForSentinel(rawMemoryGuard, highRiskAction);
+  const learnedPolicy = getInterventionRecommendation({
+    toolName,
+    command: toolInput.command || '',
+    affectedFiles,
+    integrity,
+    memoryGuard,
+    riskBand: highRiskAction ? 'high' : 'low',
+    taskScopeViolation,
+    protectedSurface: protectedSurfaceForRisk,
+  }, {
+    feedbackDir: options.feedbackDir
+      || process.env.THUMBGATE_FEEDBACK_DIR
+      || (repoRoot ? path.join(repoRoot, '.thumbgate') : null),
+  });
   const blastRadius = buildBlastRadius({
     affectedFiles,
     integrity,
@@ -743,6 +848,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
     affectedFiles,
     integrity,
     memoryGuard,
+    learnedPolicy,
     blastRadius,
     taskScopeViolation,
     protectedSurface: protectedSurfaceForRisk,
@@ -763,6 +869,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
     riskScore: risk.score,
     integrity,
     memoryGuard,
+    learnedPolicy,
     blastRadius: {
       ...blastRadius,
       unapprovedProtectedFiles: protectedSurfaceForRisk.unapprovedProtectedFiles.length,
@@ -772,6 +879,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
   const evidence = buildEvidence({
     integrity,
     memoryGuard,
+    learnedPolicy,
     blastRadius,
     taskScopeViolation,
     protectedSurface: protectedSurfaceForRisk,
@@ -782,6 +890,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
     protectedSurface: protectedSurfaceForRisk,
     blastRadius,
     memoryGuard,
+    learnedPolicy,
     executionSurface,
   });
   const summary = decision === 'allow'
@@ -790,7 +899,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
       ? 'Predicted workflow risk is elevated before execution.'
       : 'Predicted workflow failure before execution.';
   const report = {
-    sentinelVersion: 'workflow-sentinel-v1',
+    sentinelVersion: 'workflow-sentinel-v2',
     toolName,
     decision,
     riskScore: risk.score,
@@ -802,6 +911,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
     remediations,
     executionSurface,
     memoryGuard,
+    learnedPolicy,
     taskScopeViolation,
     operationalIntegrity: {
       ok: integrity.ok,

package/skills/thumbgate/SKILL.md CHANGED Viewed

@@ -86,7 +86,7 @@ Bounded retrieval of relevant feedback history for the current task. The agent g
 | | Free | Pro | Team |
 |---|---|---|---|
-| Feedback capture | Unlimited | Unlimited | Unlimited |
+| Feedback capture | 3/day | Unlimited | Unlimited |
 | Lesson search | 5/day | Unlimited | Unlimited |
 | Active gates | 5 | Unlimited | Unlimited |
 | Dashboard | - | Yes | Yes |