thumbgate 1.5.8 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/mcp/server-card.json +1 -1
- package/CHANGELOG.md +198 -0
- package/README.md +7 -6
- package/adapters/README.md +1 -1
- package/adapters/chatgpt/openapi.yaml +25 -0
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/codex/config.toml +4 -4
- package/adapters/mcp/server-stdio.js +41 -2
- package/adapters/opencode/opencode.json +1 -1
- package/bin/cli.js +100 -10
- package/openapi/openapi.yaml +25 -0
- package/package.json +13 -3
- package/public/codex-plugin.html +277 -0
- package/public/dashboard.html +141 -13
- package/public/index.html +92 -34
- package/public/learn.html +13 -2
- package/public/lessons.html +5 -2
- package/public/pro.html +8 -1
- package/scripts/auto-wire-hooks.js +10 -5
- package/scripts/billing.js +503 -8
- package/scripts/contextfs.js +1 -1
- package/scripts/dashboard.js +236 -0
- package/scripts/feedback-loop.js +22 -0
- package/scripts/gates-engine.js +461 -7
- package/scripts/hook-runtime.js +42 -0
- package/scripts/llm-client.js +25 -10
- package/scripts/mailer/index.js +13 -0
- package/scripts/mailer/resend-mailer.js +350 -0
- package/scripts/mcp-config.js +13 -0
- package/scripts/published-cli.js +8 -0
- package/scripts/seo-gsd.js +118 -4
- package/scripts/statusline.sh +8 -0
- package/scripts/vector-store.js +21 -7
- package/src/api/server.js +112 -7
package/scripts/gates-engine.js
CHANGED
|
@@ -83,6 +83,11 @@ const DEFAULT_PROTECTED_FILE_GLOBS = [
|
|
|
83
83
|
];
|
|
84
84
|
const EDIT_LIKE_TOOLS = new Set(['Edit', 'Write', 'MultiEdit']);
|
|
85
85
|
const HIGH_RISK_BASH_PATTERN = /\b(?:git\s+(?:add|commit|push)|gh\s+pr\s+(?:create|merge)|npm\s+publish|yarn\s+publish|pnpm\s+publish|rm\s+-rf)\b/i;
|
|
86
|
+
const BOOSTED_RISK_BLOCK_SCORE = 0.8;
|
|
87
|
+
const BOOSTED_RISK_MIN_EXAMPLES = 3;
|
|
88
|
+
const PR_THREAD_RESOLUTION_ACTION = 'pr_thread_resolution_verified_after_commit';
|
|
89
|
+
const PR_THREAD_RESOLUTION_CLAIM_PATTERN = '(?:thread|review|comment).*?(?:resolved|verified|checked|addressed|fixed)|(?:resolved|verified|checked|addressed|fixed).*?(?:thread|review|comment)';
|
|
90
|
+
const PR_THREAD_RESOLUTION_REQUIRED_ACTIONS = ['pr_threads_checked', 'thread_resolution_verified'];
|
|
86
91
|
|
|
87
92
|
// ---------------------------------------------------------------------------
|
|
88
93
|
// Config loading
|
|
@@ -609,6 +614,218 @@ function isHighRiskAction(toolName, toolInput = {}, affectedFiles = []) {
|
|
|
609
614
|
return false;
|
|
610
615
|
}
|
|
611
616
|
|
|
617
|
+
function normalizeRiskToken(value) {
|
|
618
|
+
return String(value || '')
|
|
619
|
+
.toLowerCase()
|
|
620
|
+
.replace(/[^a-z0-9]+/g, ' ')
|
|
621
|
+
.trim();
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
function singularizeRiskToken(token) {
|
|
625
|
+
const value = String(token || '').trim();
|
|
626
|
+
if (value.length > 3 && value.endsWith('ies')) return `${value.slice(0, -3)}y`;
|
|
627
|
+
if (value.length > 3 && value.endsWith('s')) return value.slice(0, -1);
|
|
628
|
+
return value;
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
function riskTokenVariants(token) {
|
|
632
|
+
const normalized = singularizeRiskToken(token);
|
|
633
|
+
const variants = new Set([token, normalized]);
|
|
634
|
+
const synonyms = {
|
|
635
|
+
comment: ['comment', 'comments', 'review', 'reviews', 'reply', 'replies', 'thread', 'threads'],
|
|
636
|
+
thread: ['thread', 'threads', 'review', 'reviews', 'comment', 'comments'],
|
|
637
|
+
bot: ['bot', 'bots', 'automation', 'automated', 'assistant', 'claude', 'codex'],
|
|
638
|
+
pr: ['pr', 'pull', 'pullrequest', 'pullrequests'],
|
|
639
|
+
file: ['file', 'files', 'path', 'paths'],
|
|
640
|
+
test: ['test', 'tests', 'ci', 'coverage', 'verify', 'verification'],
|
|
641
|
+
};
|
|
642
|
+
for (const candidate of [token, normalized]) {
|
|
643
|
+
for (const item of synonyms[candidate] || []) {
|
|
644
|
+
variants.add(item);
|
|
645
|
+
variants.add(singularizeRiskToken(item));
|
|
646
|
+
}
|
|
647
|
+
}
|
|
648
|
+
return [...variants].filter(Boolean);
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
function normalizeRiskTagEntry(entry) {
|
|
652
|
+
if (!entry) return null;
|
|
653
|
+
if (typeof entry === 'string') {
|
|
654
|
+
return { tag: entry };
|
|
655
|
+
}
|
|
656
|
+
if (typeof entry !== 'object') return null;
|
|
657
|
+
const tag = entry.tag || entry.key || entry.name || entry.domain || entry.label || entry.id;
|
|
658
|
+
if (!tag) return null;
|
|
659
|
+
return {
|
|
660
|
+
tag: String(tag),
|
|
661
|
+
count: Number(entry.count ?? entry.examples ?? entry.exampleCount ?? entry.total ?? entry.samples),
|
|
662
|
+
failures: Number(entry.failures ?? entry.failureCount),
|
|
663
|
+
riskRate: Number(entry.riskRate ?? entry.rate ?? entry.failureRate ?? entry.score ?? entry.riskScore),
|
|
664
|
+
};
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
function collectBoostedRiskTags(toolInput = {}) {
|
|
668
|
+
const boostedRisk = toolInput.boostedRisk && typeof toolInput.boostedRisk === 'object'
|
|
669
|
+
? toolInput.boostedRisk
|
|
670
|
+
: {};
|
|
671
|
+
const sources = [
|
|
672
|
+
toolInput.highRiskTags,
|
|
673
|
+
toolInput.riskTags,
|
|
674
|
+
boostedRisk.highRiskTags,
|
|
675
|
+
boostedRisk.tags,
|
|
676
|
+
boostedRisk.highRiskDomains,
|
|
677
|
+
];
|
|
678
|
+
const tags = [];
|
|
679
|
+
for (const source of sources) {
|
|
680
|
+
if (Array.isArray(source)) {
|
|
681
|
+
tags.push(...source.map(normalizeRiskTagEntry).filter(Boolean));
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
return tags;
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
function isBoostedRiskHigh(toolInput = {}) {
|
|
688
|
+
const boostedRisk = toolInput.boostedRisk && typeof toolInput.boostedRisk === 'object'
|
|
689
|
+
? toolInput.boostedRisk
|
|
690
|
+
: {};
|
|
691
|
+
const level = String(boostedRisk.riskLevel || boostedRisk.level || boostedRisk.mode || '').toLowerCase();
|
|
692
|
+
if (/\b(?:high|critical|block|deny)\b/.test(level)) return true;
|
|
693
|
+
|
|
694
|
+
const riskScore = Number(boostedRisk.riskScore ?? boostedRisk.score ?? boostedRisk.riskRate ?? boostedRisk.failureRate ?? boostedRisk.baseRate);
|
|
695
|
+
if (Number.isFinite(riskScore) && riskScore >= BOOSTED_RISK_BLOCK_SCORE) return true;
|
|
696
|
+
|
|
697
|
+
const exampleCount = Number(boostedRisk.exampleCount ?? boostedRisk.count ?? boostedRisk.samples ?? boostedRisk.total);
|
|
698
|
+
const failureCount = Number(boostedRisk.failureCount ?? boostedRisk.failures);
|
|
699
|
+
if (
|
|
700
|
+
Number.isFinite(exampleCount) &&
|
|
701
|
+
exampleCount >= BOOSTED_RISK_MIN_EXAMPLES &&
|
|
702
|
+
Number.isFinite(failureCount) &&
|
|
703
|
+
failureCount / Math.max(exampleCount, 1) >= BOOSTED_RISK_BLOCK_SCORE
|
|
704
|
+
) {
|
|
705
|
+
return true;
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
return collectBoostedRiskTags(toolInput).some((entry) => {
|
|
709
|
+
if (Number.isFinite(entry.riskRate) && entry.riskRate >= BOOSTED_RISK_BLOCK_SCORE) return true;
|
|
710
|
+
if (Number.isFinite(entry.count) && entry.count >= BOOSTED_RISK_MIN_EXAMPLES && !Number.isFinite(entry.riskRate)) return true;
|
|
711
|
+
if (
|
|
712
|
+
Number.isFinite(entry.count) &&
|
|
713
|
+
entry.count >= BOOSTED_RISK_MIN_EXAMPLES &&
|
|
714
|
+
Number.isFinite(entry.failures) &&
|
|
715
|
+
entry.failures / Math.max(entry.count, 1) >= BOOSTED_RISK_BLOCK_SCORE
|
|
716
|
+
) {
|
|
717
|
+
return true;
|
|
718
|
+
}
|
|
719
|
+
return false;
|
|
720
|
+
});
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
function riskTagMatchesAction(tag, actionContext) {
|
|
724
|
+
const normalizedTag = normalizeRiskToken(tag);
|
|
725
|
+
const normalizedAction = normalizeRiskToken(actionContext);
|
|
726
|
+
if (!normalizedTag || !normalizedAction) return false;
|
|
727
|
+
const actionTokens = new Set(normalizedAction.split(/\s+/).filter(Boolean));
|
|
728
|
+
const tagTokens = normalizedTag.split(/\s+/).filter(Boolean);
|
|
729
|
+
return tagTokens.some((token) => riskTokenVariants(token).some((variant) => actionTokens.has(variant)));
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
function evaluateBoostedRiskTagGuard(toolName, toolInput = {}) {
|
|
733
|
+
const tags = collectBoostedRiskTags(toolInput);
|
|
734
|
+
if (tags.length === 0 || !isBoostedRiskHigh(toolInput)) return null;
|
|
735
|
+
|
|
736
|
+
const actionContext = extractActionContext(toolName, toolInput);
|
|
737
|
+
const matchedTag = tags.find((entry) => riskTagMatchesAction(entry.tag, actionContext));
|
|
738
|
+
if (!matchedTag) return null;
|
|
739
|
+
|
|
740
|
+
const matchText = toolInput.command || toolInput.file_path || toolInput.path || actionContext;
|
|
741
|
+
const message = `Boosted-risk history matched this action (${matchedTag.tag}). This pattern is denied by default until explicit evidence lowers the risk.`;
|
|
742
|
+
return {
|
|
743
|
+
decision: 'deny',
|
|
744
|
+
gate: 'boosted-risk-tag-default-deny',
|
|
745
|
+
message,
|
|
746
|
+
severity: 'critical',
|
|
747
|
+
reasoning: [
|
|
748
|
+
`High-risk tag "${matchedTag.tag}" matched "${String(matchText).slice(0, 120)}"`,
|
|
749
|
+
`Risk threshold: score >= ${BOOSTED_RISK_BLOCK_SCORE} or at least ${BOOSTED_RISK_MIN_EXAMPLES} examples`,
|
|
750
|
+
'Hook enforcement blocks this pre-tool call instead of relying on advisory recall',
|
|
751
|
+
],
|
|
752
|
+
};
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
function isGitCommitCommand(toolName, toolInput = {}) {
|
|
756
|
+
return toolName === 'Bash' && /\bgit\s+commit\b/i.test(String(toolInput.command || ''));
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
function isProtectedBranchName(branchName) {
|
|
760
|
+
return /^(?:main|master|develop|dev|trunk|release)$/i.test(String(branchName || '').trim());
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
function detectBranchName(toolInput = {}, repoRoot = null) {
|
|
764
|
+
const inline = toolInput.branchName || toolInput.currentBranch || toolInput.branch || toolInput.headRefName;
|
|
765
|
+
if (inline) return String(inline).trim();
|
|
766
|
+
if (!repoRoot) return '';
|
|
767
|
+
return safeExecFileLines('git', ['rev-parse', '--abbrev-ref', 'HEAD'], repoRoot)[0] || '';
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
function hasPrBranchContext(toolInput = {}, repoRoot = null) {
|
|
771
|
+
if (toolInput.prNumber || toolInput.prUrl || toolInput.pullRequestNumber || toolInput.pullRequestUrl) {
|
|
772
|
+
return true;
|
|
773
|
+
}
|
|
774
|
+
const branchName = detectBranchName(toolInput, repoRoot);
|
|
775
|
+
return Boolean(branchName && !isProtectedBranchName(branchName));
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
function registerPrThreadResolutionClaimGate(toolName, toolInput = {}) {
|
|
779
|
+
if (!isGitCommitCommand(toolName, toolInput)) return null;
|
|
780
|
+
const repoRoot = resolveRepoRoot(toolInput);
|
|
781
|
+
if (!hasPrBranchContext(toolInput, repoRoot)) return null;
|
|
782
|
+
|
|
783
|
+
const branchName = detectBranchName(toolInput, repoRoot);
|
|
784
|
+
const claimGate = registerClaimGate(
|
|
785
|
+
PR_THREAD_RESOLUTION_CLAIM_PATTERN,
|
|
786
|
+
PR_THREAD_RESOLUTION_REQUIRED_ACTIONS,
|
|
787
|
+
'A PR-branch commit requires verified review-thread resolution before more tool calls or readiness claims.',
|
|
788
|
+
);
|
|
789
|
+
trackAction(PR_THREAD_RESOLUTION_ACTION, {
|
|
790
|
+
branchName: branchName || null,
|
|
791
|
+
repoRoot: repoRoot || null,
|
|
792
|
+
commandHash: crypto.createHash('sha256').update(String(toolInput.command || '')).digest('hex'),
|
|
793
|
+
});
|
|
794
|
+
return claimGate;
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
function isThreadResolutionSatisfied() {
|
|
798
|
+
return PR_THREAD_RESOLUTION_REQUIRED_ACTIONS.some((actionId) => (
|
|
799
|
+
hasAction(actionId) || isConditionSatisfied(actionId)
|
|
800
|
+
));
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
function isThreadResolutionEvidenceAction(toolName, toolInput = {}) {
|
|
804
|
+
if (isGitCommitCommand(toolName, toolInput)) return true;
|
|
805
|
+
if (['recall', 'search_lessons', 'verify_claim', 'satisfy_gate', 'track_action'].includes(toolName)) return true;
|
|
806
|
+
if (toolName !== 'Bash') return false;
|
|
807
|
+
const command = String(toolInput.command || '');
|
|
808
|
+
return /\b(?:gate-satisfy|satisfy_gate|track_action|gh\s+pr\s+(?:view|checks|status)|gh\s+api\b.*(?:reviewThreads|reviews|comments|threads)|git\s+(?:status|diff|show))\b/i.test(command);
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
function evaluatePendingPrThreadResolutionGate(toolName, toolInput = {}) {
|
|
812
|
+
if (!hasAction(PR_THREAD_RESOLUTION_ACTION)) return null;
|
|
813
|
+
if (isThreadResolutionSatisfied()) return null;
|
|
814
|
+
if (isThreadResolutionEvidenceAction(toolName, toolInput)) return null;
|
|
815
|
+
|
|
816
|
+
const message = 'A git commit was made on a PR branch. Verify review threads are resolved before the next tool call.';
|
|
817
|
+
return {
|
|
818
|
+
decision: 'deny',
|
|
819
|
+
gate: 'pr-thread-resolution-verified-required',
|
|
820
|
+
message,
|
|
821
|
+
severity: 'critical',
|
|
822
|
+
reasoning: [
|
|
823
|
+
`Tracked action ${PR_THREAD_RESOLUTION_ACTION} is pending`,
|
|
824
|
+
'Satisfy pr_threads_checked or thread_resolution_verified with evidence before continuing',
|
|
825
|
+
],
|
|
826
|
+
};
|
|
827
|
+
}
|
|
828
|
+
|
|
612
829
|
function isScopeEnforcedAction(toolName, toolInput = {}, affectedFiles = []) {
|
|
613
830
|
if (EDIT_LIKE_TOOLS.has(toolName) && affectedFiles.length > 0) return true;
|
|
614
831
|
if (toolName !== 'Bash') return false;
|
|
@@ -1116,6 +1333,38 @@ async function evaluateGatesAsync(toolName, toolInput, configPath) {
|
|
|
1116
1333
|
}
|
|
1117
1334
|
|
|
1118
1335
|
const constraints = loadConstraints();
|
|
1336
|
+
registerPrThreadResolutionClaimGate(toolName, toolInput);
|
|
1337
|
+
const pendingThreadResolutionGate = evaluatePendingPrThreadResolutionGate(toolName, toolInput);
|
|
1338
|
+
if (pendingThreadResolutionGate) {
|
|
1339
|
+
recordStat(pendingThreadResolutionGate.gate, 'block');
|
|
1340
|
+
const auditRecord = recordAuditEvent({
|
|
1341
|
+
toolName,
|
|
1342
|
+
toolInput,
|
|
1343
|
+
decision: 'deny',
|
|
1344
|
+
gateId: pendingThreadResolutionGate.gate,
|
|
1345
|
+
message: pendingThreadResolutionGate.message,
|
|
1346
|
+
severity: pendingThreadResolutionGate.severity,
|
|
1347
|
+
source: 'gates-engine',
|
|
1348
|
+
});
|
|
1349
|
+
auditToFeedback(auditRecord);
|
|
1350
|
+
return pendingThreadResolutionGate;
|
|
1351
|
+
}
|
|
1352
|
+
|
|
1353
|
+
const boostedRiskGuard = evaluateBoostedRiskTagGuard(toolName, toolInput);
|
|
1354
|
+
if (boostedRiskGuard) {
|
|
1355
|
+
recordStat(boostedRiskGuard.gate, 'block');
|
|
1356
|
+
const auditRecord = recordAuditEvent({
|
|
1357
|
+
toolName,
|
|
1358
|
+
toolInput,
|
|
1359
|
+
decision: 'deny',
|
|
1360
|
+
gateId: boostedRiskGuard.gate,
|
|
1361
|
+
message: boostedRiskGuard.message,
|
|
1362
|
+
severity: boostedRiskGuard.severity,
|
|
1363
|
+
source: 'gates-engine',
|
|
1364
|
+
});
|
|
1365
|
+
auditToFeedback(auditRecord);
|
|
1366
|
+
return boostedRiskGuard;
|
|
1367
|
+
}
|
|
1119
1368
|
|
|
1120
1369
|
// Fast-path: feedback/recall tools skip metric gates entirely (avoids Stripe API calls)
|
|
1121
1370
|
const METRIC_SKIP_TOOLS = ['capture_feedback', 'feedback_stats', 'recall', 'feedback_summary', 'prevention_rules'];
|
|
@@ -1254,6 +1503,38 @@ function evaluateGates(toolName, toolInput, configPath) {
|
|
|
1254
1503
|
}
|
|
1255
1504
|
|
|
1256
1505
|
const constraints = loadConstraints();
|
|
1506
|
+
registerPrThreadResolutionClaimGate(toolName, toolInput);
|
|
1507
|
+
const pendingThreadResolutionGate = evaluatePendingPrThreadResolutionGate(toolName, toolInput);
|
|
1508
|
+
if (pendingThreadResolutionGate) {
|
|
1509
|
+
recordStat(pendingThreadResolutionGate.gate, 'block');
|
|
1510
|
+
const auditRecord = recordAuditEvent({
|
|
1511
|
+
toolName,
|
|
1512
|
+
toolInput,
|
|
1513
|
+
decision: 'deny',
|
|
1514
|
+
gateId: pendingThreadResolutionGate.gate,
|
|
1515
|
+
message: pendingThreadResolutionGate.message,
|
|
1516
|
+
severity: pendingThreadResolutionGate.severity,
|
|
1517
|
+
source: 'gates-engine',
|
|
1518
|
+
});
|
|
1519
|
+
auditToFeedback(auditRecord);
|
|
1520
|
+
return pendingThreadResolutionGate;
|
|
1521
|
+
}
|
|
1522
|
+
|
|
1523
|
+
const boostedRiskGuard = evaluateBoostedRiskTagGuard(toolName, toolInput);
|
|
1524
|
+
if (boostedRiskGuard) {
|
|
1525
|
+
recordStat(boostedRiskGuard.gate, 'block');
|
|
1526
|
+
const auditRecord = recordAuditEvent({
|
|
1527
|
+
toolName,
|
|
1528
|
+
toolInput,
|
|
1529
|
+
decision: 'deny',
|
|
1530
|
+
gateId: boostedRiskGuard.gate,
|
|
1531
|
+
message: boostedRiskGuard.message,
|
|
1532
|
+
severity: boostedRiskGuard.severity,
|
|
1533
|
+
source: 'gates-engine',
|
|
1534
|
+
});
|
|
1535
|
+
auditToFeedback(auditRecord);
|
|
1536
|
+
return boostedRiskGuard;
|
|
1537
|
+
}
|
|
1257
1538
|
|
|
1258
1539
|
for (const gate of config.gates) {
|
|
1259
1540
|
const matchDetails = matchGate(gate, toolName, toolInput);
|
|
@@ -1456,14 +1737,20 @@ function evaluateSecretGuard(input = {}) {
|
|
|
1456
1737
|
// PreToolUse hook interface (stdin/stdout JSON)
|
|
1457
1738
|
// ---------------------------------------------------------------------------
|
|
1458
1739
|
|
|
1740
|
+
function buildReminderOutput(context) {
|
|
1741
|
+
return {
|
|
1742
|
+
additionalContext: context,
|
|
1743
|
+
systemReminder: context,
|
|
1744
|
+
thumbgateSystemReminder: context,
|
|
1745
|
+
};
|
|
1746
|
+
}
|
|
1747
|
+
|
|
1459
1748
|
function formatOutput(result, behavioralContext) {
|
|
1460
1749
|
if (!result) {
|
|
1461
1750
|
// No gate matched — inject behavioral context if available
|
|
1462
1751
|
if (behavioralContext) {
|
|
1463
1752
|
return JSON.stringify({
|
|
1464
|
-
hookSpecificOutput:
|
|
1465
|
-
additionalContext: behavioralContext,
|
|
1466
|
-
},
|
|
1753
|
+
hookSpecificOutput: buildReminderOutput(behavioralContext),
|
|
1467
1754
|
});
|
|
1468
1755
|
}
|
|
1469
1756
|
return JSON.stringify({});
|
|
@@ -1474,19 +1761,27 @@ function formatOutput(result, behavioralContext) {
|
|
|
1474
1761
|
: '';
|
|
1475
1762
|
|
|
1476
1763
|
if (result.decision === 'deny') {
|
|
1764
|
+
const reminder = behavioralContext ? buildReminderOutput(behavioralContext) : {};
|
|
1765
|
+
const reminderSuffix = behavioralContext ? `\n\nSystem reminder:\n${behavioralContext}` : '';
|
|
1477
1766
|
return JSON.stringify({
|
|
1478
1767
|
hookSpecificOutput: {
|
|
1768
|
+
...reminder,
|
|
1479
1769
|
permissionDecision: 'deny',
|
|
1480
|
-
permissionDecisionReason: `[GATE:${result.gate}] ${result.message}${reasoningSuffix}`,
|
|
1770
|
+
permissionDecisionReason: `[GATE:${result.gate}] ${result.message}${reasoningSuffix}${reminderSuffix}`,
|
|
1481
1771
|
},
|
|
1482
1772
|
});
|
|
1483
1773
|
}
|
|
1484
1774
|
|
|
1485
1775
|
if (result.decision === 'warn') {
|
|
1486
1776
|
const extra = behavioralContext ? `\n${behavioralContext}` : '';
|
|
1777
|
+
const context = `[GATE:${result.gate}] WARNING: ${result.message}${reasoningSuffix}${extra}`;
|
|
1487
1778
|
return JSON.stringify({
|
|
1488
1779
|
hookSpecificOutput: {
|
|
1489
|
-
additionalContext:
|
|
1780
|
+
additionalContext: context,
|
|
1781
|
+
...(behavioralContext ? {
|
|
1782
|
+
systemReminder: behavioralContext,
|
|
1783
|
+
thumbgateSystemReminder: behavioralContext,
|
|
1784
|
+
} : {}),
|
|
1490
1785
|
},
|
|
1491
1786
|
});
|
|
1492
1787
|
}
|
|
@@ -1518,6 +1813,146 @@ function buildBehavioralContext() {
|
|
|
1518
1813
|
}
|
|
1519
1814
|
}
|
|
1520
1815
|
|
|
1816
|
+
/**
|
|
1817
|
+
* Build "recent mistakes" context by reading the tail of memory-log.jsonl.
|
|
1818
|
+
* Surfaces the 3 most recent negative-signal memories (captured via
|
|
1819
|
+
* capture_feedback) as a reminder on EVERY tool call — even when semantic
|
|
1820
|
+
* retrieval returns nothing and there are no recurring patterns yet.
|
|
1821
|
+
*
|
|
1822
|
+
* This plugs the cold-start gap: a mistake captured seconds ago should
|
|
1823
|
+
* surface on the very next tool call, not wait for the recurring-pattern
|
|
1824
|
+
* threshold (≥2 occurrences) that buildBehavioralContext requires.
|
|
1825
|
+
*
|
|
1826
|
+
* @param {Object} [options]
|
|
1827
|
+
* @param {number} [options.maxAgeMs=86400000] - Only include memories from the last 24h by default
|
|
1828
|
+
* @param {number} [options.limit=3]
|
|
1829
|
+
* @returns {string|null}
|
|
1830
|
+
*/
|
|
1831
|
+
function buildRecentCorrectiveActionsContext(options = {}) {
|
|
1832
|
+
const maxAgeMs = typeof options.maxAgeMs === 'number' ? options.maxAgeMs : 24 * 60 * 60 * 1000;
|
|
1833
|
+
const limit = typeof options.limit === 'number' ? options.limit : 3;
|
|
1834
|
+
|
|
1835
|
+
let resolveFeedbackDir;
|
|
1836
|
+
try {
|
|
1837
|
+
({ resolveFeedbackDir } = require('./feedback-paths'));
|
|
1838
|
+
} catch {
|
|
1839
|
+
return null;
|
|
1840
|
+
}
|
|
1841
|
+
|
|
1842
|
+
let feedbackDir;
|
|
1843
|
+
try {
|
|
1844
|
+
feedbackDir = resolveFeedbackDir({});
|
|
1845
|
+
} catch {
|
|
1846
|
+
return null;
|
|
1847
|
+
}
|
|
1848
|
+
|
|
1849
|
+
const memoryLogPath = path.join(feedbackDir, 'memory-log.jsonl');
|
|
1850
|
+
if (!fs.existsSync(memoryLogPath)) return null;
|
|
1851
|
+
|
|
1852
|
+
let raw;
|
|
1853
|
+
try {
|
|
1854
|
+
raw = fs.readFileSync(memoryLogPath, 'utf8');
|
|
1855
|
+
} catch {
|
|
1856
|
+
return null;
|
|
1857
|
+
}
|
|
1858
|
+
|
|
1859
|
+
const lines = raw.split('\n').filter(Boolean);
|
|
1860
|
+
if (lines.length === 0) return null;
|
|
1861
|
+
|
|
1862
|
+
const cutoff = Date.now() - maxAgeMs;
|
|
1863
|
+
const recent = [];
|
|
1864
|
+
|
|
1865
|
+
// Walk from the tail backwards so we get the newest entries first
|
|
1866
|
+
for (let i = lines.length - 1; i >= 0 && recent.length < limit; i--) {
|
|
1867
|
+
try {
|
|
1868
|
+
const entry = JSON.parse(lines[i]);
|
|
1869
|
+
if (entry.category !== 'error' && entry.category !== 'learning') continue;
|
|
1870
|
+
const ts = entry.timestamp ? Date.parse(entry.timestamp) : NaN;
|
|
1871
|
+
if (!Number.isFinite(ts) || ts < cutoff) continue;
|
|
1872
|
+
recent.push(entry);
|
|
1873
|
+
} catch {
|
|
1874
|
+
// skip malformed line
|
|
1875
|
+
}
|
|
1876
|
+
}
|
|
1877
|
+
|
|
1878
|
+
if (recent.length === 0) return null;
|
|
1879
|
+
|
|
1880
|
+
const formatted = recent.map((m) => {
|
|
1881
|
+
const title = String(m.title || '').replace(/^MISTAKE:\s*/, '').slice(0, 140);
|
|
1882
|
+
const content = String(m.content || '');
|
|
1883
|
+
const avoidMatch = content.match(/How to avoid:\s*([^\n]+)/i);
|
|
1884
|
+
const advice = avoidMatch ? avoidMatch[1].trim().slice(0, 220) : null;
|
|
1885
|
+
return advice ? ` • ${title}\n → ${advice}` : ` • ${title}`;
|
|
1886
|
+
});
|
|
1887
|
+
|
|
1888
|
+
return `[ThumbGate] Recent mistakes (last 24h) — do NOT repeat:\n${formatted.join('\n')}`;
|
|
1889
|
+
}
|
|
1890
|
+
|
|
1891
|
+
/**
|
|
1892
|
+
* Build per-action lesson context: retrieve semantically-relevant lessons for this
|
|
1893
|
+
* specific tool call and inject the top negative ones into hook output so the agent
|
|
1894
|
+
* sees its past mistakes BEFORE executing the action (not after).
|
|
1895
|
+
*
|
|
1896
|
+
* This is the enforcement mechanism that turns ThumbGate from a passive log into an
|
|
1897
|
+
* active governor. Without this, lessons stay in the DB and never get surfaced at
|
|
1898
|
+
* decision time — so the agent repeats mistakes.
|
|
1899
|
+
*/
|
|
1900
|
+
function buildRelevantLessonContext(toolName, toolInput) {
|
|
1901
|
+
if (!toolName) return null;
|
|
1902
|
+
|
|
1903
|
+
let retrieveRelevantLessons;
|
|
1904
|
+
try {
|
|
1905
|
+
({ retrieveRelevantLessons } = require('./lesson-retrieval'));
|
|
1906
|
+
} catch {
|
|
1907
|
+
return null;
|
|
1908
|
+
}
|
|
1909
|
+
|
|
1910
|
+
// Extract a searchable action context from the tool input
|
|
1911
|
+
const actionContext = extractActionContext(toolName, toolInput);
|
|
1912
|
+
if (!actionContext) return null;
|
|
1913
|
+
|
|
1914
|
+
try {
|
|
1915
|
+
const lessons = retrieveRelevantLessons(toolName, actionContext, { maxResults: 3 });
|
|
1916
|
+
// retrieveRelevantLessons already filters at relevanceScore > 0.1 internally;
|
|
1917
|
+
// any negative lesson that survives retrieval is relevant enough to surface.
|
|
1918
|
+
const negative = lessons.filter((l) => l.signal === 'negative');
|
|
1919
|
+
if (negative.length === 0) return null;
|
|
1920
|
+
|
|
1921
|
+
const formatted = negative.map((l) => {
|
|
1922
|
+
const title = (l.title || '').replace(/^MISTAKE:\s*/, '').slice(0, 140);
|
|
1923
|
+
const advice = extractAvoidanceAdvice(l.content);
|
|
1924
|
+
return advice ? ` • ${title}\n → ${advice}` : ` • ${title}`;
|
|
1925
|
+
});
|
|
1926
|
+
|
|
1927
|
+
return `[ThumbGate] Past mistakes relevant to this action — read before proceeding:\n${formatted.join('\n')}`;
|
|
1928
|
+
} catch {
|
|
1929
|
+
return null;
|
|
1930
|
+
}
|
|
1931
|
+
}
|
|
1932
|
+
|
|
1933
|
+
function extractActionContext(toolName, toolInput) {
|
|
1934
|
+
if (!toolInput) return toolName;
|
|
1935
|
+
const parts = [toolName];
|
|
1936
|
+
if (toolInput.command) parts.push(String(toolInput.command).slice(0, 400));
|
|
1937
|
+
if (toolInput.file_path) parts.push(String(toolInput.file_path));
|
|
1938
|
+
if (toolInput.description) parts.push(String(toolInput.description).slice(0, 200));
|
|
1939
|
+
if (toolInput.prompt) parts.push(String(toolInput.prompt).slice(0, 400));
|
|
1940
|
+
if (toolInput.pattern) parts.push(String(toolInput.pattern).slice(0, 200));
|
|
1941
|
+
return parts.filter(Boolean).join(' ');
|
|
1942
|
+
}
|
|
1943
|
+
|
|
1944
|
+
function extractAvoidanceAdvice(content) {
|
|
1945
|
+
if (!content) return null;
|
|
1946
|
+
// Extract the "How to avoid:" section if present
|
|
1947
|
+
const match = content.match(/How to avoid:\s*([^\n]+)/i);
|
|
1948
|
+
if (match) return match[1].trim().slice(0, 220);
|
|
1949
|
+
return null;
|
|
1950
|
+
}
|
|
1951
|
+
|
|
1952
|
+
function mergeContextStrings(...ctxs) {
|
|
1953
|
+
return ctxs.filter((c) => typeof c === 'string' && c.length > 0).join('\n\n') || null;
|
|
1954
|
+
}
|
|
1955
|
+
|
|
1521
1956
|
async function runAsync(input) {
|
|
1522
1957
|
const secretGuard = evaluateSecretGuard(input);
|
|
1523
1958
|
if (secretGuard) {
|
|
@@ -1545,7 +1980,10 @@ async function runAsync(input) {
|
|
|
1545
1980
|
}
|
|
1546
1981
|
|
|
1547
1982
|
const behavioralContext = buildBehavioralContext();
|
|
1548
|
-
|
|
1983
|
+
const lessonContext = buildRelevantLessonContext(toolName, toolInput);
|
|
1984
|
+
const recentContext = buildRecentCorrectiveActionsContext();
|
|
1985
|
+
const combinedContext = mergeContextStrings(lessonContext, recentContext, behavioralContext);
|
|
1986
|
+
return formatOutput(result, combinedContext);
|
|
1549
1987
|
}
|
|
1550
1988
|
|
|
1551
1989
|
function run(input) {
|
|
@@ -1575,7 +2013,10 @@ function run(input) {
|
|
|
1575
2013
|
}
|
|
1576
2014
|
|
|
1577
2015
|
const behavioralContext = buildBehavioralContext();
|
|
1578
|
-
|
|
2016
|
+
const lessonContext = buildRelevantLessonContext(toolName, toolInput);
|
|
2017
|
+
const recentContext = buildRecentCorrectiveActionsContext();
|
|
2018
|
+
const combinedContext = mergeContextStrings(lessonContext, recentContext, behavioralContext);
|
|
2019
|
+
return formatOutput(result, combinedContext);
|
|
1579
2020
|
}
|
|
1580
2021
|
|
|
1581
2022
|
// ---------------------------------------------------------------------------
|
|
@@ -1796,7 +2237,20 @@ module.exports = {
|
|
|
1796
2237
|
PROTECTED_APPROVAL_TTL_MS,
|
|
1797
2238
|
DEFAULT_PROTECTED_FILE_GLOBS,
|
|
1798
2239
|
buildBehavioralContext,
|
|
2240
|
+
buildRecentCorrectiveActionsContext,
|
|
2241
|
+
buildRelevantLessonContext,
|
|
2242
|
+
extractActionContext,
|
|
2243
|
+
extractAvoidanceAdvice,
|
|
2244
|
+
mergeContextStrings,
|
|
2245
|
+
buildReminderOutput,
|
|
1799
2246
|
isHighRiskAction,
|
|
2247
|
+
collectBoostedRiskTags,
|
|
2248
|
+
isBoostedRiskHigh,
|
|
2249
|
+
riskTagMatchesAction,
|
|
2250
|
+
evaluateBoostedRiskTagGuard,
|
|
2251
|
+
registerPrThreadResolutionClaimGate,
|
|
2252
|
+
evaluatePendingPrThreadResolutionGate,
|
|
2253
|
+
PR_THREAD_RESOLUTION_ACTION,
|
|
1800
2254
|
};
|
|
1801
2255
|
|
|
1802
2256
|
// ---------------------------------------------------------------------------
|
package/scripts/hook-runtime.js
CHANGED
|
@@ -44,10 +44,25 @@ function resolveCliBaseCommand() {
|
|
|
44
44
|
return publishedCliShellCommand(version);
|
|
45
45
|
}
|
|
46
46
|
|
|
47
|
+
function resolveCodexCliBaseCommand() {
|
|
48
|
+
const version = packageVersion();
|
|
49
|
+
if (publishedHookCommandsAvailable(version)) {
|
|
50
|
+
return publishedCliShellCommand('latest', [], { preferInstalled: false });
|
|
51
|
+
}
|
|
52
|
+
if (isSourceCheckout(PKG_ROOT)) {
|
|
53
|
+
return `node ${shellQuote(path.join(PKG_ROOT, 'bin', 'cli.js'))}`;
|
|
54
|
+
}
|
|
55
|
+
return publishedCliShellCommand('latest', [], { preferInstalled: false });
|
|
56
|
+
}
|
|
57
|
+
|
|
47
58
|
function buildPortableHookCommand(subcommand) {
|
|
48
59
|
return `${resolveCliBaseCommand()} ${subcommand}`;
|
|
49
60
|
}
|
|
50
61
|
|
|
62
|
+
function buildCodexPortableHookCommand(subcommand) {
|
|
63
|
+
return `${resolveCodexCliBaseCommand()} ${subcommand}`;
|
|
64
|
+
}
|
|
65
|
+
|
|
51
66
|
function preToolHookCommand() {
|
|
52
67
|
return buildPortableHookCommand('gate-check');
|
|
53
68
|
}
|
|
@@ -68,12 +83,39 @@ function statuslineCommand() {
|
|
|
68
83
|
return buildPortableHookCommand('statusline-render');
|
|
69
84
|
}
|
|
70
85
|
|
|
86
|
+
function codexPreToolHookCommand() {
|
|
87
|
+
return buildCodexPortableHookCommand('gate-check');
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function codexUserPromptHookCommand() {
|
|
91
|
+
return buildCodexPortableHookCommand('hook-auto-capture');
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function codexSessionStartHookCommand() {
|
|
95
|
+
return buildCodexPortableHookCommand('session-start');
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function codexCacheUpdateHookCommand() {
|
|
99
|
+
return buildCodexPortableHookCommand('cache-update');
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function codexStatuslineCommand() {
|
|
103
|
+
return buildCodexPortableHookCommand('statusline-render');
|
|
104
|
+
}
|
|
105
|
+
|
|
71
106
|
module.exports = {
|
|
72
107
|
buildPortableHookCommand,
|
|
108
|
+
buildCodexPortableHookCommand,
|
|
73
109
|
cacheUpdateHookCommand,
|
|
110
|
+
codexCacheUpdateHookCommand,
|
|
111
|
+
codexPreToolHookCommand,
|
|
112
|
+
codexSessionStartHookCommand,
|
|
113
|
+
codexStatuslineCommand,
|
|
114
|
+
codexUserPromptHookCommand,
|
|
74
115
|
packageVersion,
|
|
75
116
|
publishedHookCommandsAvailable,
|
|
76
117
|
preToolHookCommand,
|
|
118
|
+
resolveCodexCliBaseCommand,
|
|
77
119
|
resolveCliBaseCommand,
|
|
78
120
|
sessionStartHookCommand,
|
|
79
121
|
statuslineCommand,
|
package/scripts/llm-client.js
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
'use strict';
|
|
3
3
|
|
|
4
|
+
const { runStep } = require('./durability/step');
|
|
5
|
+
|
|
4
6
|
const MODELS = {
|
|
5
7
|
FAST: 'claude-haiku-4-5-20251001',
|
|
6
8
|
SMART: 'claude-sonnet-4-6',
|
|
@@ -33,25 +35,38 @@ function stripCodeFences(text) {
|
|
|
33
35
|
return fenced ? fenced[1].trim() : text.trim();
|
|
34
36
|
}
|
|
35
37
|
|
|
38
|
+
// Anthropic SDK throws errors with a `.status` field for HTTP failures.
|
|
39
|
+
// Our defaultClassify already reads `.status`, so 429/5xx retry and 4xx
|
|
40
|
+
// (bad request / unauthorized / not-found) bail immediately — which is
|
|
41
|
+
// what we want: there is no point retrying a malformed prompt or a
|
|
42
|
+
// revoked API key.
|
|
36
43
|
async function callClaude({ systemPrompt, userPrompt, model, maxTokens } = {}) {
|
|
37
44
|
const client = getClient();
|
|
38
45
|
if (!client) return null;
|
|
39
46
|
|
|
40
47
|
try {
|
|
41
|
-
const
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
48
|
+
const text = await runStep('llm.callClaude', {
|
|
49
|
+
retries: 2,
|
|
50
|
+
logger: (msg) => console.warn(msg),
|
|
51
|
+
}, async () => {
|
|
52
|
+
const response = await client.messages.create({
|
|
53
|
+
model: model || DEFAULT_MODEL,
|
|
54
|
+
max_tokens: maxTokens || DEFAULT_MAX_TOKENS,
|
|
55
|
+
system: systemPrompt || undefined,
|
|
56
|
+
messages: [{ role: 'user', content: userPrompt }],
|
|
57
|
+
});
|
|
47
58
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
59
|
+
return response.content
|
|
60
|
+
.filter((b) => b.type === 'text')
|
|
61
|
+
.map((b) => b.text)
|
|
62
|
+
.join('');
|
|
63
|
+
});
|
|
52
64
|
|
|
53
65
|
return stripCodeFences(text);
|
|
54
66
|
} catch {
|
|
67
|
+
// Preserve the original callClaude contract — callers expect `null` on
|
|
68
|
+
// failure, not an exception. runStep already logged retry attempts,
|
|
69
|
+
// so the permanent failure is visible in logs.
|
|
55
70
|
return null;
|
|
56
71
|
}
|
|
57
72
|
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* scripts/mailer/index.js — public entry point for the mailer module.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const { sendEmail, sendTrialWelcomeEmail, renderTrialWelcomeBodies } = require('./resend-mailer');
|
|
8
|
+
|
|
9
|
+
module.exports = {
|
|
10
|
+
sendEmail,
|
|
11
|
+
sendTrialWelcomeEmail,
|
|
12
|
+
renderTrialWelcomeBodies,
|
|
13
|
+
};
|