thumbgate 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/README.md +4 -4
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +35 -14
- package/adapters/README.md +1 -1
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/codex/config.toml +2 -2
- package/adapters/mcp/server-stdio.js +2 -2
- package/adapters/opencode/opencode.json +1 -1
- package/bin/cli.js +20 -11
- package/config/github-about.json +1 -1
- package/config/model-tiers.json +11 -0
- package/package.json +8 -6
- package/plugins/claude-codex-bridge/.claude-plugin/plugin.json +1 -1
- package/plugins/claude-codex-bridge/.mcp.json +1 -1
- package/plugins/codex-profile/.codex-plugin/plugin.json +1 -1
- package/plugins/codex-profile/.mcp.json +1 -1
- package/plugins/codex-profile/INSTALL.md +1 -1
- package/plugins/codex-profile/README.md +1 -1
- package/plugins/cursor-marketplace/.cursor-plugin/plugin.json +1 -1
- package/plugins/cursor-marketplace/README.md +2 -2
- package/plugins/cursor-marketplace/commands/capture-feedback.md +2 -2
- package/plugins/cursor-marketplace/rules/feedback-capture.mdc +3 -3
- package/plugins/cursor-marketplace/skills/capture-feedback/SKILL.md +3 -2
- package/plugins/opencode-profile/INSTALL.md +1 -1
- package/public/compare.html +4 -4
- package/public/guide.html +4 -4
- package/public/index.html +51 -38
- package/public/learn/ai-agent-persistent-memory.html +1 -0
- package/public/lessons.html +325 -17
- package/scripts/__pycache__/train_from_feedback.cpython-312.pyc +0 -0
- package/scripts/audit-trail.js +6 -0
- package/scripts/capture-railway-diagnostics.sh +97 -0
- package/scripts/check-congruence.js +1 -1
- package/scripts/claude-feedback-sync.js +320 -0
- package/scripts/cli-telemetry.js +4 -1
- package/scripts/contextfs.js +32 -23
- package/scripts/dashboard.js +84 -0
- package/scripts/feedback-loop.js +16 -0
- package/scripts/intervention-policy.js +696 -0
- package/scripts/local-model-profile.js +18 -2
- package/scripts/model-tier-router.js +10 -1
- package/scripts/operational-integrity.js +354 -31
- package/scripts/prove-adapters.js +1 -0
- package/scripts/prove-automation.js +2 -2
- package/scripts/prove-packaged-runtime.js +260 -0
- package/scripts/prove-runtime.js +13 -0
- package/scripts/rate-limiter.js +3 -3
- package/scripts/statusline-local-stats.js +2 -0
- package/scripts/statusline.sh +166 -11
- package/scripts/tool-registry.js +2 -2
- package/scripts/workflow-sentinel.js +114 -4
- package/skills/thumbgate/SKILL.md +1 -1
|
@@ -16,6 +16,7 @@ const {
|
|
|
16
16
|
} = require('./operational-integrity');
|
|
17
17
|
const { buildDockerSandboxPlan } = require('./docker-sandbox-planner');
|
|
18
18
|
const { evaluatePretool } = require('./hybrid-feedback-context');
|
|
19
|
+
const { getInterventionRecommendation } = require('./intervention-policy');
|
|
19
20
|
|
|
20
21
|
const GOVERNANCE_STATE_PATH = path.join(process.env.HOME || '/tmp', '.thumbgate', 'governance-state.json');
|
|
21
22
|
const DEFAULT_PROTECTED_FILE_GLOBS = [
|
|
@@ -387,6 +388,7 @@ function scoreRisk({
|
|
|
387
388
|
affectedFiles,
|
|
388
389
|
integrity,
|
|
389
390
|
memoryGuard,
|
|
391
|
+
learnedPolicy,
|
|
390
392
|
blastRadius,
|
|
391
393
|
taskScopeViolation,
|
|
392
394
|
protectedSurface,
|
|
@@ -472,6 +474,43 @@ function scoreRisk({
|
|
|
472
474
|
{ mode: memoryGuard.mode }
|
|
473
475
|
);
|
|
474
476
|
}
|
|
477
|
+
if (learnedPolicy && learnedPolicy.enabled && learnedPolicy.prediction) {
|
|
478
|
+
const confidence = learnedPolicy.prediction.confidence || 0;
|
|
479
|
+
const label = learnedPolicy.prediction.label;
|
|
480
|
+
if (label === 'deny' && confidence >= 0.6) {
|
|
481
|
+
addDriver(
|
|
482
|
+
drivers,
|
|
483
|
+
'learned_policy_deny',
|
|
484
|
+
Math.min(0.26, 0.16 + (confidence * 0.12)),
|
|
485
|
+
'Learned intervention policy predicts a deny-worthy failure pattern.',
|
|
486
|
+
{ confidence, label }
|
|
487
|
+
);
|
|
488
|
+
} else if (label === 'warn' && confidence >= 0.3) {
|
|
489
|
+
addDriver(
|
|
490
|
+
drivers,
|
|
491
|
+
'learned_policy_warn',
|
|
492
|
+
Math.min(0.18, 0.1 + (confidence * 0.08)),
|
|
493
|
+
'Learned intervention policy predicts elevated execution risk.',
|
|
494
|
+
{ confidence, label }
|
|
495
|
+
);
|
|
496
|
+
} else if (label === 'verify' && confidence >= 0.3) {
|
|
497
|
+
addDriver(
|
|
498
|
+
drivers,
|
|
499
|
+
'learned_policy_verify',
|
|
500
|
+
Math.min(0.16, 0.08 + (confidence * 0.06)),
|
|
501
|
+
'Learned intervention policy predicts a verification gap before close-out.',
|
|
502
|
+
{ confidence, label }
|
|
503
|
+
);
|
|
504
|
+
} else if (label === 'recall' && confidence >= 0.3) {
|
|
505
|
+
addDriver(
|
|
506
|
+
drivers,
|
|
507
|
+
'learned_policy_recall',
|
|
508
|
+
Math.min(0.14, 0.06 + (confidence * 0.05)),
|
|
509
|
+
'Learned intervention policy predicts prior lessons are needed before execution.',
|
|
510
|
+
{ confidence, label }
|
|
511
|
+
);
|
|
512
|
+
}
|
|
513
|
+
}
|
|
475
514
|
|
|
476
515
|
const score = Math.min(1, drivers.reduce((sum, driver) => sum + driver.weight, 0));
|
|
477
516
|
return {
|
|
@@ -492,6 +531,7 @@ function scoreRisk({
|
|
|
492
531
|
function buildEvidence({
|
|
493
532
|
integrity,
|
|
494
533
|
memoryGuard,
|
|
534
|
+
learnedPolicy,
|
|
495
535
|
blastRadius,
|
|
496
536
|
taskScopeViolation,
|
|
497
537
|
protectedSurface,
|
|
@@ -500,6 +540,16 @@ function buildEvidence({
|
|
|
500
540
|
if (memoryGuard && memoryGuard.mode && memoryGuard.mode !== 'allow') {
|
|
501
541
|
evidence.push(`Memory guard predicted ${memoryGuard.mode}: ${memoryGuard.reason}`);
|
|
502
542
|
}
|
|
543
|
+
if (learnedPolicy && learnedPolicy.enabled && learnedPolicy.prediction) {
|
|
544
|
+
const topTokens = Array.isArray(learnedPolicy.topTokens)
|
|
545
|
+
? learnedPolicy.topTokens.map((entry) => entry.token).slice(0, 3)
|
|
546
|
+
: [];
|
|
547
|
+
evidence.push(
|
|
548
|
+
`Learned policy predicted ${learnedPolicy.prediction.label} (${Math.round((learnedPolicy.prediction.confidence || 0) * 100)}% confidence)`
|
|
549
|
+
+ (topTokens.length ? ` from ${topTokens.join(', ')}` : '')
|
|
550
|
+
+ '.'
|
|
551
|
+
);
|
|
552
|
+
}
|
|
503
553
|
if (taskScopeViolation) {
|
|
504
554
|
evidence.push(
|
|
505
555
|
taskScopeViolation.reasonCode === 'missing_task_scope'
|
|
@@ -575,6 +625,7 @@ function buildRemediations({
|
|
|
575
625
|
protectedSurface,
|
|
576
626
|
blastRadius,
|
|
577
627
|
memoryGuard,
|
|
628
|
+
learnedPolicy,
|
|
578
629
|
executionSurface,
|
|
579
630
|
}) {
|
|
580
631
|
const remediations = [];
|
|
@@ -611,6 +662,24 @@ function buildRemediations({
|
|
|
611
662
|
'The system already has evidence that this action pattern failed before.'
|
|
612
663
|
);
|
|
613
664
|
}
|
|
665
|
+
if (learnedPolicy && learnedPolicy.enabled && learnedPolicy.prediction) {
|
|
666
|
+
if (learnedPolicy.prediction.label === 'verify' && learnedPolicy.prediction.confidence >= 0.3) {
|
|
667
|
+
push(
|
|
668
|
+
'verify_before_closeout',
|
|
669
|
+
'Raise verification before claiming success',
|
|
670
|
+
'Run the relevant proof or test command and confirm the exact output before retrying or closing out.',
|
|
671
|
+
'The learned policy predicts this path tends to fail at verification time.'
|
|
672
|
+
);
|
|
673
|
+
}
|
|
674
|
+
if (learnedPolicy.prediction.label === 'recall' && learnedPolicy.prediction.confidence >= 0.3) {
|
|
675
|
+
push(
|
|
676
|
+
'retrieve_lessons',
|
|
677
|
+
'Inspect prior lessons',
|
|
678
|
+
'Call retrieve_lessons or search_lessons for this tool context before retrying.',
|
|
679
|
+
'The learned policy predicts this action needs prior lessons and corrective context.'
|
|
680
|
+
);
|
|
681
|
+
}
|
|
682
|
+
}
|
|
614
683
|
if (blastRadius.fileCount >= 4 || blastRadius.surfaceCount >= 3) {
|
|
615
684
|
push(
|
|
616
685
|
'split_blast_radius',
|
|
@@ -636,6 +705,11 @@ function buildReasoning(report) {
|
|
|
636
705
|
`Workflow sentinel risk ${report.band} (${report.riskScore}) for ${report.toolName}.`,
|
|
637
706
|
`Blast radius: ${report.blastRadius.summary}.`,
|
|
638
707
|
];
|
|
708
|
+
if (report.learnedPolicy && report.learnedPolicy.enabled && report.learnedPolicy.prediction) {
|
|
709
|
+
lines.push(
|
|
710
|
+
`Learned policy predicted ${report.learnedPolicy.prediction.label} (${report.learnedPolicy.prediction.confidence}).`
|
|
711
|
+
);
|
|
712
|
+
}
|
|
639
713
|
if (report.executionSurface?.shouldSandbox) {
|
|
640
714
|
lines.push(`Execution surface: ${report.executionSurface.summary}`);
|
|
641
715
|
}
|
|
@@ -658,15 +732,32 @@ function getSentinelActionType(toolName) {
|
|
|
658
732
|
return '';
|
|
659
733
|
}
|
|
660
734
|
|
|
661
|
-
function chooseDecision({ riskScore, integrity, memoryGuard, blastRadius, command }) {
|
|
735
|
+
function chooseDecision({ riskScore, integrity, memoryGuard, learnedPolicy, blastRadius, command }) {
|
|
662
736
|
const hasOperationalBlockers = Boolean(integrity && Array.isArray(integrity.blockers) && integrity.blockers.length > 0);
|
|
663
737
|
const destructiveBypass = /\bgit\s+push\b.*(?:--force|-f)\b/i.test(command) || /\bgh\s+pr\s+merge\b.*--admin\b/i.test(command);
|
|
738
|
+
const learnedPrediction = learnedPolicy && learnedPolicy.enabled ? learnedPolicy.prediction : null;
|
|
739
|
+
const learnedHardStop = Boolean(
|
|
740
|
+
learnedPrediction
|
|
741
|
+
&& learnedPrediction.label === 'deny'
|
|
742
|
+
&& learnedPrediction.confidence >= 0.7
|
|
743
|
+
);
|
|
744
|
+
const learnedWarning = Boolean(
|
|
745
|
+
learnedPrediction
|
|
746
|
+
&& ['warn', 'verify', 'deny'].includes(learnedPrediction.label)
|
|
747
|
+
&& learnedPrediction.confidence >= 0.3
|
|
748
|
+
);
|
|
749
|
+
const learnedRecall = Boolean(
|
|
750
|
+
learnedPrediction
|
|
751
|
+
&& learnedPrediction.label === 'recall'
|
|
752
|
+
&& learnedPrediction.confidence >= 0.3
|
|
753
|
+
);
|
|
664
754
|
const lowBlastRadius = blastRadius.fileCount <= 1
|
|
665
755
|
&& blastRadius.surfaceCount <= 1
|
|
666
756
|
&& blastRadius.releaseSensitiveFiles.length === 0
|
|
667
757
|
&& blastRadius.unapprovedProtectedFiles === 0;
|
|
668
758
|
const lowRiskHandoff = /\bgit\s+push\b|\bgh\s+pr\s+(?:create|merge)\b/i.test(command)
|
|
669
759
|
&& !destructiveBypass
|
|
760
|
+
&& !learnedHardStop
|
|
670
761
|
&& lowBlastRadius
|
|
671
762
|
&& !hasOperationalBlockers
|
|
672
763
|
&& memoryGuard
|
|
@@ -686,10 +777,10 @@ function chooseDecision({ riskScore, integrity, memoryGuard, blastRadius, comman
|
|
|
686
777
|
if (lowRiskHandoff) {
|
|
687
778
|
return 'allow';
|
|
688
779
|
}
|
|
689
|
-
if (destructiveBypass || repeatedHighBlast || (hasOperationalBlockers && riskScore >= 0.72) || riskScore >= 0.86) {
|
|
780
|
+
if (destructiveBypass || learnedHardStop || repeatedHighBlast || (hasOperationalBlockers && riskScore >= 0.72) || riskScore >= 0.86) {
|
|
690
781
|
return 'deny';
|
|
691
782
|
}
|
|
692
|
-
if (riskScore >= 0.45) {
|
|
783
|
+
if (riskScore >= 0.45 || (learnedWarning && riskScore >= 0.3) || (learnedRecall && riskScore >= 0.34)) {
|
|
693
784
|
return 'warn';
|
|
694
785
|
}
|
|
695
786
|
return 'allow';
|
|
@@ -732,6 +823,20 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
732
823
|
affectedFiles,
|
|
733
824
|
}), options.feedbackOptions || {});
|
|
734
825
|
const memoryGuard = normalizeMemoryGuardForSentinel(rawMemoryGuard, highRiskAction);
|
|
826
|
+
const learnedPolicy = getInterventionRecommendation({
|
|
827
|
+
toolName,
|
|
828
|
+
command: toolInput.command || '',
|
|
829
|
+
affectedFiles,
|
|
830
|
+
integrity,
|
|
831
|
+
memoryGuard,
|
|
832
|
+
riskBand: highRiskAction ? 'high' : 'low',
|
|
833
|
+
taskScopeViolation,
|
|
834
|
+
protectedSurface: protectedSurfaceForRisk,
|
|
835
|
+
}, {
|
|
836
|
+
feedbackDir: options.feedbackDir
|
|
837
|
+
|| process.env.THUMBGATE_FEEDBACK_DIR
|
|
838
|
+
|| (repoRoot ? path.join(repoRoot, '.thumbgate') : null),
|
|
839
|
+
});
|
|
735
840
|
const blastRadius = buildBlastRadius({
|
|
736
841
|
affectedFiles,
|
|
737
842
|
integrity,
|
|
@@ -743,6 +848,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
743
848
|
affectedFiles,
|
|
744
849
|
integrity,
|
|
745
850
|
memoryGuard,
|
|
851
|
+
learnedPolicy,
|
|
746
852
|
blastRadius,
|
|
747
853
|
taskScopeViolation,
|
|
748
854
|
protectedSurface: protectedSurfaceForRisk,
|
|
@@ -763,6 +869,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
763
869
|
riskScore: risk.score,
|
|
764
870
|
integrity,
|
|
765
871
|
memoryGuard,
|
|
872
|
+
learnedPolicy,
|
|
766
873
|
blastRadius: {
|
|
767
874
|
...blastRadius,
|
|
768
875
|
unapprovedProtectedFiles: protectedSurfaceForRisk.unapprovedProtectedFiles.length,
|
|
@@ -772,6 +879,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
772
879
|
const evidence = buildEvidence({
|
|
773
880
|
integrity,
|
|
774
881
|
memoryGuard,
|
|
882
|
+
learnedPolicy,
|
|
775
883
|
blastRadius,
|
|
776
884
|
taskScopeViolation,
|
|
777
885
|
protectedSurface: protectedSurfaceForRisk,
|
|
@@ -782,6 +890,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
782
890
|
protectedSurface: protectedSurfaceForRisk,
|
|
783
891
|
blastRadius,
|
|
784
892
|
memoryGuard,
|
|
893
|
+
learnedPolicy,
|
|
785
894
|
executionSurface,
|
|
786
895
|
});
|
|
787
896
|
const summary = decision === 'allow'
|
|
@@ -790,7 +899,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
790
899
|
? 'Predicted workflow risk is elevated before execution.'
|
|
791
900
|
: 'Predicted workflow failure before execution.';
|
|
792
901
|
const report = {
|
|
793
|
-
sentinelVersion: 'workflow-sentinel-
|
|
902
|
+
sentinelVersion: 'workflow-sentinel-v2',
|
|
794
903
|
toolName,
|
|
795
904
|
decision,
|
|
796
905
|
riskScore: risk.score,
|
|
@@ -802,6 +911,7 @@ function evaluateWorkflowSentinel(toolName, toolInput = {}, options = {}) {
|
|
|
802
911
|
remediations,
|
|
803
912
|
executionSurface,
|
|
804
913
|
memoryGuard,
|
|
914
|
+
learnedPolicy,
|
|
805
915
|
taskScopeViolation,
|
|
806
916
|
operationalIntegrity: {
|
|
807
917
|
ok: integrity.ok,
|
|
@@ -86,7 +86,7 @@ Bounded retrieval of relevant feedback history for the current task. The agent g
|
|
|
86
86
|
|
|
87
87
|
| | Free | Pro | Team |
|
|
88
88
|
|---|---|---|---|
|
|
89
|
-
| Feedback capture |
|
|
89
|
+
| Feedback capture | 3/day | Unlimited | Unlimited |
|
|
90
90
|
| Lesson search | 5/day | Unlimited | Unlimited |
|
|
91
91
|
| Active gates | 5 | Unlimited | Unlimited |
|
|
92
92
|
| Dashboard | - | Yes | Yes |
|