thumbgate 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/.claude-plugin/README.md +4 -4
  2. package/.claude-plugin/marketplace.json +1 -1
  3. package/.claude-plugin/plugin.json +1 -1
  4. package/.well-known/mcp/server-card.json +1 -1
  5. package/README.md +35 -14
  6. package/adapters/README.md +1 -1
  7. package/adapters/claude/.mcp.json +2 -2
  8. package/adapters/codex/config.toml +2 -2
  9. package/adapters/mcp/server-stdio.js +2 -2
  10. package/adapters/opencode/opencode.json +1 -1
  11. package/bin/cli.js +20 -11
  12. package/config/github-about.json +1 -1
  13. package/config/model-tiers.json +11 -0
  14. package/package.json +8 -6
  15. package/plugins/claude-codex-bridge/.claude-plugin/plugin.json +1 -1
  16. package/plugins/claude-codex-bridge/.mcp.json +1 -1
  17. package/plugins/codex-profile/.codex-plugin/plugin.json +1 -1
  18. package/plugins/codex-profile/.mcp.json +1 -1
  19. package/plugins/codex-profile/INSTALL.md +1 -1
  20. package/plugins/codex-profile/README.md +1 -1
  21. package/plugins/cursor-marketplace/.cursor-plugin/plugin.json +1 -1
  22. package/plugins/cursor-marketplace/README.md +2 -2
  23. package/plugins/cursor-marketplace/commands/capture-feedback.md +2 -2
  24. package/plugins/cursor-marketplace/rules/feedback-capture.mdc +3 -3
  25. package/plugins/cursor-marketplace/skills/capture-feedback/SKILL.md +3 -2
  26. package/plugins/opencode-profile/INSTALL.md +1 -1
  27. package/public/compare.html +4 -4
  28. package/public/guide.html +4 -4
  29. package/public/index.html +51 -38
  30. package/public/learn/ai-agent-persistent-memory.html +1 -0
  31. package/public/lessons.html +325 -17
  32. package/scripts/__pycache__/train_from_feedback.cpython-312.pyc +0 -0
  33. package/scripts/audit-trail.js +6 -0
  34. package/scripts/capture-railway-diagnostics.sh +97 -0
  35. package/scripts/check-congruence.js +1 -1
  36. package/scripts/claude-feedback-sync.js +320 -0
  37. package/scripts/cli-telemetry.js +4 -1
  38. package/scripts/contextfs.js +32 -23
  39. package/scripts/dashboard.js +84 -0
  40. package/scripts/feedback-loop.js +16 -0
  41. package/scripts/intervention-policy.js +696 -0
  42. package/scripts/local-model-profile.js +18 -2
  43. package/scripts/model-tier-router.js +10 -1
  44. package/scripts/operational-integrity.js +354 -31
  45. package/scripts/prove-adapters.js +1 -0
  46. package/scripts/prove-automation.js +2 -2
  47. package/scripts/prove-packaged-runtime.js +260 -0
  48. package/scripts/prove-runtime.js +13 -0
  49. package/scripts/rate-limiter.js +3 -3
  50. package/scripts/statusline-local-stats.js +2 -0
  51. package/scripts/statusline.sh +166 -11
  52. package/scripts/tool-registry.js +2 -2
  53. package/scripts/workflow-sentinel.js +114 -4
  54. package/skills/thumbgate/SKILL.md +1 -1
@@ -16,6 +16,7 @@ const {
16
16
  } = require('./operational-integrity');
17
17
  const { buildDockerSandboxPlan } = require('./docker-sandbox-planner');
18
18
  const { evaluatePretool } = require('./hybrid-feedback-context');
19
+ const { getInterventionRecommendation } = require('./intervention-policy');
19
20
 
20
21
  const GOVERNANCE_STATE_PATH = path.join(process.env.HOME || '/tmp', '.thumbgate', 'governance-state.json');
21
22
  const DEFAULT_PROTECTED_FILE_GLOBS = [
@@ -387,6 +388,7 @@ function scoreRisk({
387
388
  affectedFiles,
388
389
  integrity,
389
390
  memoryGuard,
391
+ learnedPolicy,
390
392
  blastRadius,
391
393
  taskScopeViolation,
392
394
  protectedSurface,
@@ -472,6 +474,43 @@ function scoreRisk({
472
474
  { mode: memoryGuard.mode }
473
475
  );
474
476
  }
477
+ if (learnedPolicy && learnedPolicy.enabled && learnedPolicy.prediction) {
478
+ const confidence = learnedPolicy.prediction.confidence || 0;
479
+ const label = learnedPolicy.prediction.label;
480
+ if (label === 'deny' && confidence >= 0.6) {
481
+ addDriver(
482
+ drivers,
483
+ 'learned_policy_deny',
484
+ Math.min(0.26, 0.16 + (confidence * 0.12)),
485
+ 'Learned intervention policy predicts a deny-worthy failure pattern.',
486
+ { confidence, label }
487
+ );
488
+ } else if (label === 'warn' && confidence >= 0.3) {
489
+ addDriver(
490
+ drivers,
491
+ 'learned_policy_warn',
492
+ Math.min(0.18, 0.1 + (confidence * 0.08)),
493
+ 'Learned intervention policy predicts elevated execution risk.',
494
+ { confidence, label }
495
+ );
496
+ } else if (label === 'verify' && confidence >= 0.3) {
497
+ addDriver(
498
+ drivers,
499
+ 'learned_policy_verify',
500
+ Math.min(0.16, 0.08 + (confidence * 0.06)),
501
+ 'Learned intervention policy predicts a verification gap before close-out.',
502
+ { confidence, label }
503
+ );
504
+ } else if (label === 'recall' && confidence >= 0.3) {
505
+ addDriver(
506
+ drivers,
507
+ 'learned_policy_recall',
508
+ Math.min(0.14, 0.06 + (confidence * 0.05)),
509
+ 'Learned intervention policy predicts prior lessons are needed before execution.',
510
+ { confidence, label }
511
+ );
512
+ }
513
+ }
475
514
 
476
515
  const score = Math.min(1, drivers.reduce((sum, driver) => sum + driver.weight, 0));
477
516
  return {
@@ -492,6 +531,7 @@ function scoreRisk({
492
531
  function buildEvidence({
493
532
  integrity,
494
533
  memoryGuard,
534
+ learnedPolicy,
495
535
  blastRadius,
496
536
  taskScopeViolation,
497
537
  protectedSurface,
@@ -500,6 +540,16 @@ function buildEvidence({
500
540
  if (memoryGuard && memoryGuard.mode && memoryGuard.mode !== 'allow') {
501
541
  evidence.push(`Memory guard predicted ${memoryGuard.mode}: ${memoryGuard.reason}`);
502
542
  }
543
+ if (learnedPolicy && learnedPolicy.enabled && learnedPolicy.prediction) {
544
+ const topTokens = Array.isArray(learnedPolicy.topTokens)
545
+ ? learnedPolicy.topTokens.map((entry) => entry.token).slice(0, 3)
546
+ : [];
547
+ evidence.push(
548
+ `Learned policy predicted ${learnedPolicy.prediction.label} (${Math.round((learnedPolicy.prediction.confidence || 0) * 100)}% confidence)`
549
+ + (topTokens.length ? ` from ${topTokens.join(', ')}` : '')
550
+ + '.'
551
+ );
552
+ }
503
553
  if (taskScopeViolation) {
504
554
  evidence.push(
505
555
  taskScopeViolation.reasonCode === 'missing_task_scope'
@@ -575,6 +625,7 @@ function buildRemediations({
575
625
  protectedSurface,
576
626
  blastRadius,
577
627
  memoryGuard,
628
+ learnedPolicy,
578
629
  executionSurface,
579
630
  }) {
580
631
  const remediations = [];
@@ -611,6 +662,24 @@ function buildRemediations({
611
662
  'The system already has evidence that this action pattern failed before.'
612
663
  );
613
664
  }
665
+ if (learnedPolicy && learnedPolicy.enabled && learnedPolicy.prediction) {
666
+ if (learnedPolicy.prediction.label === 'verify' && learnedPolicy.prediction.confidence >= 0.3) {
667
+ push(
668
+ 'verify_before_closeout',
669
+ 'Raise verification before claiming success',
670
+ 'Run the relevant proof or test command and confirm the exact output before retrying or closing out.',
671
+ 'The learned policy predicts this path tends to fail at verification time.'
672
+ );
673
+ }
674
+ if (learnedPolicy.prediction.label === 'recall' && learnedPolicy.prediction.confidence >= 0.3) {
675
+ push(
676
+ 'retrieve_lessons',
677
+ 'Inspect prior lessons',
678
+ 'Call retrieve_lessons or search_lessons for this tool context before retrying.',
679
+ 'The learned policy predicts this action needs prior lessons and corrective context.'
680
+ );
681
+ }
682
+ }
614
683
  if (blastRadius.fileCount >= 4 || blastRadius.surfaceCount >= 3) {
615
684
  push(
616
685
  'split_blast_radius',
@@ -636,6 +705,11 @@ function buildReasoning(report) {
636
705
  `Workflow sentinel risk ${report.band} (${report.riskScore}) for ${report.toolName}.`,
637
706
  `Blast radius: ${report.blastRadius.summary}.`,
638
707
  ];
708
+ if (report.learnedPolicy && report.learnedPolicy.enabled && report.learnedPolicy.prediction) {
709
+ lines.push(
710
+ `Learned policy predicted ${report.learnedPolicy.prediction.label} (${report.learnedPolicy.prediction.confidence}).`
711
+ );
712
+ }
639
713
  if (report.executionSurface?.shouldSandbox) {
640
714
  lines.push(`Execution surface: ${report.executionSurface.summary}`);
641
715
  }
@@ -658,15 +732,32 @@ function getSentinelActionType(toolName) {
658
732
  return '';
659
733
  }
660
734
 
661
- function chooseDecision({ riskScore, integrity, memoryGuard, blastRadius, command }) {
735
+ function chooseDecision({ riskScore, integrity, memoryGuard, learnedPolicy, blastRadius, command }) {
662
736
  const hasOperationalBlockers = Boolean(integrity && Array.isArray(integrity.blockers) && integrity.blockers.length > 0);
663
737
  const destructiveBypass = /\bgit\s+push\b.*(?:--force|-f)\b/i.test(command) || /\bgh\s+pr\s+merge\b.*--admin\b/i.test(command);
738
+ const learnedPrediction = learnedPolicy && learnedPolicy.enabled ? learnedPolicy.prediction : null;
739
+ const learnedHardStop = Boolean(
740
+ learnedPrediction
741
+ && learnedPrediction.label === 'deny'
742
+ && learnedPrediction.confidence >= 0.7
743
+ );
744
+ const learnedWarning = Boolean(
745
+ learnedPrediction
746
+ && ['warn', 'verify', 'deny'].includes(learnedPrediction.label)
747
+ && learnedPrediction.confidence >= 0.3
748
+ );
749
+ const learnedRecall = Boolean(
750
+ learnedPrediction
751
+ && learnedPrediction.label === 'recall'
752
+ && learnedPrediction.confidence >= 0.3
753
+ );
664
754
  const lowBlastRadius = blastRadius.fileCount <= 1
665
755
  && blastRadius.surfaceCount <= 1
666
756
  && blastRadius.releaseSensitiveFiles.length === 0
667
757
  && blastRadius.unapprovedProtectedFiles === 0;
668
758
  const lowRiskHandoff = /\bgit\s+push\b|\bgh\s+pr\s+(?:create|merge)\b/i.test(command)
669
759
  && !destructiveBypass
760
+ && !learnedHardStop
670
761
  && lowBlastRadius
671
762
  && !hasOperationalBlockers
672
763
  && memoryGuard
@@ -686,10 +777,10 @@ function chooseDecision({ riskScore, integrity, memoryGuard, blastRadius, comman
686
777
  if (lowRiskHandoff) {
687
778
  return 'allow';
688
779
  }
689
- if (destructiveBypass || repeatedHighBlast || (hasOperationalBlockers && riskScore >= 0.72) || riskScore >= 0.86) {
780
+ if (destructiveBypass || learnedHardStop || repeatedHighBlast || (hasOperationalBlockers && riskScore >= 0.72) || riskScore >= 0.86) {
690
781
  return 'deny';
691
782
  }
692
- if (riskScore >= 0.45) {
783
+ if (riskScore >= 0.45 || (learnedWarning && riskScore >= 0.3) || (learnedRecall && riskScore >= 0.34)) {
693
784
  return 'warn';
694
785
  }
695
786
  return 'allow';
@@ -732,6 +823,20 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
732
823
  affectedFiles,
733
824
  }), options.feedbackOptions || {});
734
825
  const memoryGuard = normalizeMemoryGuardForSentinel(rawMemoryGuard, highRiskAction);
826
+ const learnedPolicy = getInterventionRecommendation({
827
+ toolName,
828
+ command: toolInput.command || '',
829
+ affectedFiles,
830
+ integrity,
831
+ memoryGuard,
832
+ riskBand: highRiskAction ? 'high' : 'low',
833
+ taskScopeViolation,
834
+ protectedSurface: protectedSurfaceForRisk,
835
+ }, {
836
+ feedbackDir: options.feedbackDir
837
+ || process.env.THUMBGATE_FEEDBACK_DIR
838
+ || (repoRoot ? path.join(repoRoot, '.thumbgate') : null),
839
+ });
735
840
  const blastRadius = buildBlastRadius({
736
841
  affectedFiles,
737
842
  integrity,
@@ -743,6 +848,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
743
848
  affectedFiles,
744
849
  integrity,
745
850
  memoryGuard,
851
+ learnedPolicy,
746
852
  blastRadius,
747
853
  taskScopeViolation,
748
854
  protectedSurface: protectedSurfaceForRisk,
@@ -763,6 +869,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
763
869
  riskScore: risk.score,
764
870
  integrity,
765
871
  memoryGuard,
872
+ learnedPolicy,
766
873
  blastRadius: {
767
874
  ...blastRadius,
768
875
  unapprovedProtectedFiles: protectedSurfaceForRisk.unapprovedProtectedFiles.length,
@@ -772,6 +879,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
772
879
  const evidence = buildEvidence({
773
880
  integrity,
774
881
  memoryGuard,
882
+ learnedPolicy,
775
883
  blastRadius,
776
884
  taskScopeViolation,
777
885
  protectedSurface: protectedSurfaceForRisk,
@@ -782,6 +890,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
782
890
  protectedSurface: protectedSurfaceForRisk,
783
891
  blastRadius,
784
892
  memoryGuard,
893
+ learnedPolicy,
785
894
  executionSurface,
786
895
  });
787
896
  const summary = decision === 'allow'
@@ -790,7 +899,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
790
899
  ? 'Predicted workflow risk is elevated before execution.'
791
900
  : 'Predicted workflow failure before execution.';
792
901
  const report = {
793
- sentinelVersion: 'workflow-sentinel-v1',
902
+ sentinelVersion: 'workflow-sentinel-v2',
794
903
  toolName,
795
904
  decision,
796
905
  riskScore: risk.score,
@@ -802,6 +911,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
802
911
  remediations,
803
912
  executionSurface,
804
913
  memoryGuard,
914
+ learnedPolicy,
805
915
  taskScopeViolation,
806
916
  operationalIntegrity: {
807
917
  ok: integrity.ok,
@@ -86,7 +86,7 @@ Bounded retrieval of relevant feedback history for the current task. The agent g
86
86
 
87
87
  | | Free | Pro | Team |
88
88
  |---|---|---|---|
89
- | Feedback capture | Unlimited | Unlimited | Unlimited |
89
+ | Feedback capture | 3/day | Unlimited | Unlimited |
90
90
  | Lesson search | 5/day | Unlimited | Unlimited |
91
91
  | Active gates | 5 | Unlimited | Unlimited |
92
92
  | Dashboard | - | Yes | Yes |