thumbgate 1.16.13 → 1.16.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +3 -1
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +26 -1
- package/adapters/opencode/opencode.json +1 -1
- package/bin/cli.js +420 -1
- package/config/gate-templates.json +372 -0
- package/config/mcp-allowlists.json +25 -0
- package/config/model-candidates.json +59 -2
- package/config/model-tiers.json +4 -1
- package/package.json +79 -22
- package/public/compare.html +6 -0
- package/public/index.html +144 -11
- package/public/numbers.html +8 -8
- package/public/pro.html +22 -24
- package/scripts/agent-design-governance.js +211 -0
- package/scripts/agent-reasoning-traces.js +683 -0
- package/scripts/agent-reward-model.js +438 -0
- package/scripts/agent-stack-survival-audit.js +231 -0
- package/scripts/ai-engineering-stack-guardrails.js +256 -0
- package/scripts/billing.js +16 -4
- package/scripts/chatgpt-ads-readiness-pack.js +195 -0
- package/scripts/cli-schema.js +277 -0
- package/scripts/code-graph-guardrails.js +176 -0
- package/scripts/deepseek-v4-runtime-guardrails.js +253 -0
- package/scripts/gemini-embedding-policy.js +198 -0
- package/scripts/inference-cache-policy.js +39 -0
- package/scripts/judge-reward-function.js +396 -0
- package/scripts/llm-behavior-monitor.js +251 -0
- package/scripts/long-running-agent-context-guardrails.js +176 -0
- package/scripts/multimodal-retrieval-plan.js +31 -11
- package/scripts/oss-pr-opportunity-scout.js +240 -0
- package/scripts/proactive-agent-eval-guardrails.js +230 -0
- package/scripts/profile-router.js +5 -4
- package/scripts/prompting-operating-system.js +273 -0
- package/scripts/proxy-pointer-rag-guardrails.js +189 -0
- package/scripts/rag-precision-guardrails.js +202 -0
- package/scripts/rate-limiter.js +1 -1
- package/scripts/reasoning-efficiency-guardrails.js +176 -0
- package/scripts/reward-hacking-guardrails.js +251 -0
- package/scripts/seo-gsd.js +1201 -11
- package/scripts/single-use-credential-gate.js +182 -0
- package/scripts/structured-prompt-driven.js +226 -0
- package/scripts/telemetry-analytics.js +31 -6
- package/scripts/tool-registry.js +92 -0
- package/scripts/upstream-contribution-engine.js +379 -0
- package/scripts/vector-store.js +119 -4
- package/src/api/server.js +333 -100
- package/scripts/agents-sdk-sandbox-plan.js +0 -57
- package/scripts/ai-org-governance.js +0 -98
- package/scripts/artifact-agent-plan.js +0 -81
- package/scripts/enterprise-agent-rollout.js +0 -34
- package/scripts/experience-replay-governance.js +0 -69
- package/scripts/inference-economics.js +0 -53
- package/scripts/knowledge-layer-plan.js +0 -108
- package/scripts/memory-store-governance.js +0 -60
- package/scripts/post-training-governance.js +0 -34
- package/scripts/production-agent-readiness.js +0 -40
- package/scripts/scaling-law-claims.js +0 -60
- package/scripts/student-consistent-training.js +0 -73
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
const path = require('path');
|
|
5
|
+
const { listGateTemplates } = require('./gate-templates');
|
|
6
|
+
|
|
7
|
+
const DOCUMENT_RAG_CATEGORY = 'Document RAG Safety';
|
|
8
|
+
|
|
9
|
+
function splitCsv(value) {
|
|
10
|
+
if (Array.isArray(value)) return value.map(String).map((item) => item.trim()).filter(Boolean);
|
|
11
|
+
if (value === undefined || value === null || value === true) return [];
|
|
12
|
+
return String(value)
|
|
13
|
+
.split(',')
|
|
14
|
+
.map((item) => item.trim())
|
|
15
|
+
.filter(Boolean);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function unique(values) {
|
|
19
|
+
return Array.from(new Set(values.filter(Boolean)));
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function normalizeBoolean(value) {
|
|
23
|
+
if (value === true) return true;
|
|
24
|
+
if (value === false || value === undefined || value === null) return false;
|
|
25
|
+
return /^(1|true|yes|on)$/i.test(String(value).trim());
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function normalizeOptions(options = {}) {
|
|
29
|
+
const sectionIds = unique([
|
|
30
|
+
...splitCsv(options.sections),
|
|
31
|
+
...splitCsv(options['section-ids']),
|
|
32
|
+
]);
|
|
33
|
+
const imagePointers = unique([
|
|
34
|
+
...splitCsv(options['image-pointers']),
|
|
35
|
+
...splitCsv(options.images),
|
|
36
|
+
...splitCsv(options.figures),
|
|
37
|
+
]);
|
|
38
|
+
const documentIds = unique([
|
|
39
|
+
...splitCsv(options.documents),
|
|
40
|
+
...splitCsv(options['document-ids']),
|
|
41
|
+
]);
|
|
42
|
+
const candidateImages = Number.isFinite(Number(options['candidate-images']))
|
|
43
|
+
? Number(options['candidate-images'])
|
|
44
|
+
: null;
|
|
45
|
+
|
|
46
|
+
return {
|
|
47
|
+
ragTool: String(options['rag-tool'] || options.tool || 'proxy-pointer-rag').trim() || 'proxy-pointer-rag',
|
|
48
|
+
treePath: options['tree-path'] ? path.normalize(String(options['tree-path']).trim()) : null,
|
|
49
|
+
sectionIds,
|
|
50
|
+
imagePointers,
|
|
51
|
+
documentIds,
|
|
52
|
+
candidateImages,
|
|
53
|
+
crossDocumentPolicy: String(options['cross-doc-policy'] || options['cross-document-policy'] || '').trim().toLowerCase(),
|
|
54
|
+
visionFilter: normalizeBoolean(options['vision-filter']),
|
|
55
|
+
visualClaims: normalizeBoolean(options['visual-claims']),
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function gateApplicability(template, options) {
|
|
60
|
+
if (template.id === 'require-section-tree-before-multimodal-answer') {
|
|
61
|
+
return Boolean(options.treePath || options.sectionIds.length > 0);
|
|
62
|
+
}
|
|
63
|
+
if (template.id === 'require-image-pointer-grounding') {
|
|
64
|
+
return options.imagePointers.length > 0;
|
|
65
|
+
}
|
|
66
|
+
if (template.id === 'block-cross-document-image-leakage') {
|
|
67
|
+
return options.documentIds.length > 1 || options.crossDocumentPolicy === 'strict';
|
|
68
|
+
}
|
|
69
|
+
if (template.id === 'checkpoint-vision-filter-for-visual-claims') {
|
|
70
|
+
return options.visualClaims || options.visionFilter || (options.candidateImages !== null && options.candidateImages > 0);
|
|
71
|
+
}
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function buildSignalSummary(options) {
|
|
76
|
+
const signals = [];
|
|
77
|
+
if (options.treePath || options.sectionIds.length > 0) {
|
|
78
|
+
signals.push({
|
|
79
|
+
id: 'section_tree',
|
|
80
|
+
label: 'Hierarchical section tree',
|
|
81
|
+
values: unique([options.treePath, ...options.sectionIds]),
|
|
82
|
+
risk: 'visual answers should be grounded in document structure, not sliding-window chunks',
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
if (options.imagePointers.length > 0) {
|
|
86
|
+
signals.push({
|
|
87
|
+
id: 'image_pointers',
|
|
88
|
+
label: 'Image pointers',
|
|
89
|
+
values: options.imagePointers,
|
|
90
|
+
risk: 'every selected visual needs source document, parent section, and path metadata',
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
if (options.documentIds.length > 1 || options.crossDocumentPolicy === 'strict') {
|
|
94
|
+
signals.push({
|
|
95
|
+
id: 'cross_document_leakage',
|
|
96
|
+
label: 'Cross-document leakage risk',
|
|
97
|
+
values: options.documentIds.length > 0 ? options.documentIds : ['strict cross-document policy'],
|
|
98
|
+
risk: 'a plausible image from the wrong document can invalidate the answer',
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
if (options.visualClaims || options.visionFilter || (options.candidateImages !== null && options.candidateImages > 0)) {
|
|
102
|
+
signals.push({
|
|
103
|
+
id: 'visual_claims',
|
|
104
|
+
label: 'Visual claim checkpoint',
|
|
105
|
+
values: unique([
|
|
106
|
+
options.visualClaims ? 'visual claims enabled' : null,
|
|
107
|
+
options.visionFilter ? 'vision filter enabled' : null,
|
|
108
|
+
options.candidateImages !== null ? `${options.candidateImages} candidate images` : null,
|
|
109
|
+
]),
|
|
110
|
+
risk: 'answers that describe image content may need a vision-model sanity check',
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
return signals;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function buildProxyPointerRagGuardrailsPlan(rawOptions = {}, templatesPath) {
|
|
117
|
+
const options = normalizeOptions(rawOptions);
|
|
118
|
+
const templates = listGateTemplates(templatesPath)
|
|
119
|
+
.filter((template) => template.category === DOCUMENT_RAG_CATEGORY);
|
|
120
|
+
const signals = buildSignalSummary(options);
|
|
121
|
+
const recommendedTemplates = templates.map((template) => ({
|
|
122
|
+
...template,
|
|
123
|
+
recommended: gateApplicability(template, options),
|
|
124
|
+
}));
|
|
125
|
+
const activeTemplates = recommendedTemplates.filter((template) => template.recommended);
|
|
126
|
+
|
|
127
|
+
return {
|
|
128
|
+
name: 'thumbgate-proxy-pointer-rag-guardrails',
|
|
129
|
+
status: activeTemplates.length > 0 ? 'actionable' : 'ready',
|
|
130
|
+
ragTool: options.ragTool,
|
|
131
|
+
treePath: options.treePath,
|
|
132
|
+
summary: {
|
|
133
|
+
signalCount: signals.length,
|
|
134
|
+
templateCount: templates.length,
|
|
135
|
+
recommendedTemplateCount: activeTemplates.length,
|
|
136
|
+
candidateImages: options.candidateImages,
|
|
137
|
+
},
|
|
138
|
+
signals,
|
|
139
|
+
templates: recommendedTemplates,
|
|
140
|
+
nextActions: [
|
|
141
|
+
'Preserve document hierarchy, section IDs, and image file paths during ingestion.',
|
|
142
|
+
'Pass section-tree and image-pointer metadata into the agent before it answers with visuals.',
|
|
143
|
+
'Enable the recommended Document RAG Safety templates as pre-action gates.',
|
|
144
|
+
'Use a vision filter only for high-impact answers that make claims about visual content.',
|
|
145
|
+
],
|
|
146
|
+
exampleCommand: 'npx thumbgate proxy-pointer-rag-guardrails --tree-path=.rag/tree.json --image-pointers=paper-1/figures/fig2.png --documents=paper-1 --visual-claims --json',
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function formatProxyPointerRagGuardrailsPlan(report) {
|
|
151
|
+
const lines = [
|
|
152
|
+
'',
|
|
153
|
+
'ThumbGate Proxy-Pointer RAG Guardrails',
|
|
154
|
+
'-'.repeat(42),
|
|
155
|
+
`Status : ${report.status}`,
|
|
156
|
+
`RAG tool : ${report.ragTool}`,
|
|
157
|
+
];
|
|
158
|
+
if (report.treePath) lines.push(`Tree path: ${report.treePath}`);
|
|
159
|
+
lines.push(`Signals : ${report.summary.signalCount}`);
|
|
160
|
+
lines.push(`Templates: ${report.summary.recommendedTemplateCount}/${report.summary.templateCount} recommended`);
|
|
161
|
+
|
|
162
|
+
if (report.signals.length > 0) {
|
|
163
|
+
lines.push('', 'Detected document/RAG risk signals:');
|
|
164
|
+
for (const signal of report.signals) {
|
|
165
|
+
lines.push(` - ${signal.label}: ${signal.values.join(', ')}`);
|
|
166
|
+
lines.push(` Risk: ${signal.risk}`);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
lines.push('', 'Recommended templates:');
|
|
171
|
+
for (const template of report.templates.filter((entry) => entry.recommended)) {
|
|
172
|
+
lines.push(` - ${template.id} [${template.defaultAction}]`);
|
|
173
|
+
lines.push(` ${template.roi}`);
|
|
174
|
+
}
|
|
175
|
+
if (report.summary.recommendedTemplateCount === 0) {
|
|
176
|
+
lines.push(' - No document/RAG signals were passed. Start with --tree-path, --image-pointers, --documents, or --visual-claims.');
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
lines.push('', 'Next actions:');
|
|
180
|
+
for (const action of report.nextActions) lines.push(` - ${action}`);
|
|
181
|
+
lines.push('', `Example: ${report.exampleCommand}`, '');
|
|
182
|
+
return `${lines.join('\n')}\n`;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
module.exports = {
|
|
186
|
+
buildProxyPointerRagGuardrailsPlan,
|
|
187
|
+
formatProxyPointerRagGuardrailsPlan,
|
|
188
|
+
normalizeOptions,
|
|
189
|
+
};
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
const { listGateTemplates } = require('./gate-templates');
|
|
5
|
+
|
|
6
|
+
const DOCUMENT_RAG_CATEGORY = 'Document RAG Safety';
|
|
7
|
+
const PRECISION_TEMPLATE_IDS = new Set([
|
|
8
|
+
'require-rag-baseline-before-precision-tuning',
|
|
9
|
+
'require-two-stage-rag-verifier-for-structural-near-misses',
|
|
10
|
+
'checkpoint-rag-latency-precision-tradeoff',
|
|
11
|
+
]);
|
|
12
|
+
|
|
13
|
+
function normalizeBoolean(value) {
|
|
14
|
+
if (value === true) return true;
|
|
15
|
+
if (value === false || value === undefined || value === null) return false;
|
|
16
|
+
return /^(1|true|yes|on)$/i.test(String(value).trim());
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function toNumber(value) {
|
|
20
|
+
if (value === undefined || value === null || value === '') return null;
|
|
21
|
+
const num = Number(value);
|
|
22
|
+
return Number.isFinite(num) ? num : null;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function normalizeOptions(options = {}) {
|
|
26
|
+
return {
|
|
27
|
+
ragTool: String(options['rag-tool'] || options.tool || 'agentic-rag').trim() || 'agentic-rag',
|
|
28
|
+
baselineRecall: toNumber(options['baseline-recall'] || options.recall),
|
|
29
|
+
newRecall: toNumber(options['new-recall']),
|
|
30
|
+
baselinePrecision: toNumber(options['baseline-precision'] || options.precision),
|
|
31
|
+
newPrecision: toNumber(options['new-precision']),
|
|
32
|
+
topK: toNumber(options['top-k'] || options.k),
|
|
33
|
+
thresholdChanged: normalizeBoolean(options['threshold-change'] || options['threshold-changed']),
|
|
34
|
+
embeddingFineTune: normalizeBoolean(options['embedding-finetune'] || options['embedding-fine-tune'] || options['fine-tune']),
|
|
35
|
+
structuralNearMisses: normalizeBoolean(options['structural-near-misses'] || options['near-misses']),
|
|
36
|
+
verifier: normalizeBoolean(options.verifier || options.reranker || options['second-stage']),
|
|
37
|
+
latencyMs: toNumber(options['latency-ms'] || options.latency),
|
|
38
|
+
latencyBudgetMs: toNumber(options['latency-budget-ms'] || options['latency-budget']),
|
|
39
|
+
agenticPipeline: normalizeBoolean(options.agentic || options['agentic-pipeline']),
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function recallDropPercent(options) {
|
|
44
|
+
if (options.baselineRecall === null || options.newRecall === null || options.baselineRecall <= 0) return null;
|
|
45
|
+
return Number((((options.baselineRecall - options.newRecall) / options.baselineRecall) * 100).toFixed(2));
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function templateApplicability(template, options) {
|
|
49
|
+
if (template.id === 'require-rag-baseline-before-precision-tuning') {
|
|
50
|
+
return options.thresholdChanged ||
|
|
51
|
+
options.embeddingFineTune ||
|
|
52
|
+
options.baselineRecall === null ||
|
|
53
|
+
options.newRecall === null ||
|
|
54
|
+
(recallDropPercent(options) !== null && recallDropPercent(options) > 5);
|
|
55
|
+
}
|
|
56
|
+
if (template.id === 'require-two-stage-rag-verifier-for-structural-near-misses') {
|
|
57
|
+
return options.structuralNearMisses || (options.agenticPipeline && !options.verifier);
|
|
58
|
+
}
|
|
59
|
+
if (template.id === 'checkpoint-rag-latency-precision-tradeoff') {
|
|
60
|
+
return options.verifier ||
|
|
61
|
+
(options.latencyMs !== null && options.latencyBudgetMs !== null && options.latencyMs > options.latencyBudgetMs);
|
|
62
|
+
}
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function buildSignals(options) {
|
|
67
|
+
const drop = recallDropPercent(options);
|
|
68
|
+
return [
|
|
69
|
+
precisionTuningSignal(options, drop),
|
|
70
|
+
ragCascadeSignal(options),
|
|
71
|
+
verifierLatencySignal(options),
|
|
72
|
+
].filter(Boolean);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function precisionTuningSignal(options, drop) {
|
|
76
|
+
if (!(options.thresholdChanged || options.embeddingFineTune || drop !== null)) return null;
|
|
77
|
+
return {
|
|
78
|
+
id: 'precision_tuning',
|
|
79
|
+
label: 'Precision tuning change',
|
|
80
|
+
values: [
|
|
81
|
+
options.thresholdChanged ? 'threshold changed' : null,
|
|
82
|
+
options.embeddingFineTune ? 'embedding fine-tune' : null,
|
|
83
|
+
drop !== null ? `recall drop ${drop}%` : null,
|
|
84
|
+
].filter(Boolean),
|
|
85
|
+
risk: 'precision wins can hide broad retrieval recall regressions',
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function ragCascadeSignal(options) {
|
|
90
|
+
if (!(options.structuralNearMisses || options.agenticPipeline)) return null;
|
|
91
|
+
return {
|
|
92
|
+
id: 'agentic_rag_cascade',
|
|
93
|
+
label: 'Agentic RAG cascade risk',
|
|
94
|
+
values: [
|
|
95
|
+
options.agenticPipeline ? 'agentic pipeline' : null,
|
|
96
|
+
options.structuralNearMisses ? 'structural near misses' : null,
|
|
97
|
+
].filter(Boolean),
|
|
98
|
+
risk: 'wrong retrieval can trigger downstream tool calls, not just wrong answers',
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function verifierLatencySignal(options) {
|
|
103
|
+
if (!(options.verifier || options.latencyMs !== null || options.latencyBudgetMs !== null)) return null;
|
|
104
|
+
return {
|
|
105
|
+
id: 'verifier_latency',
|
|
106
|
+
label: 'Verifier latency tradeoff',
|
|
107
|
+
values: [
|
|
108
|
+
options.verifier ? 'verifier enabled' : null,
|
|
109
|
+
options.latencyMs !== null ? `${options.latencyMs}ms observed` : null,
|
|
110
|
+
options.latencyBudgetMs !== null ? `${options.latencyBudgetMs}ms budget` : null,
|
|
111
|
+
].filter(Boolean),
|
|
112
|
+
risk: 'second-stage verification needs a known latency budget',
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function buildRagPrecisionGuardrailsPlan(rawOptions = {}, templatesPath) {
|
|
117
|
+
const options = normalizeOptions(rawOptions);
|
|
118
|
+
const templates = listGateTemplates(templatesPath)
|
|
119
|
+
.filter((template) => template.category === DOCUMENT_RAG_CATEGORY && PRECISION_TEMPLATE_IDS.has(template.id))
|
|
120
|
+
.map((template) => ({
|
|
121
|
+
...template,
|
|
122
|
+
recommended: templateApplicability(template, options),
|
|
123
|
+
}));
|
|
124
|
+
const signals = buildSignals(options);
|
|
125
|
+
const recommendedTemplates = templates.filter((template) => template.recommended);
|
|
126
|
+
|
|
127
|
+
return {
|
|
128
|
+
name: 'thumbgate-rag-precision-guardrails',
|
|
129
|
+
status: recommendedTemplates.length > 0 ? 'actionable' : 'ready',
|
|
130
|
+
ragTool: options.ragTool,
|
|
131
|
+
metrics: {
|
|
132
|
+
topK: options.topK,
|
|
133
|
+
baselineRecall: options.baselineRecall,
|
|
134
|
+
newRecall: options.newRecall,
|
|
135
|
+
recallDropPercent: recallDropPercent(options),
|
|
136
|
+
baselinePrecision: options.baselinePrecision,
|
|
137
|
+
newPrecision: options.newPrecision,
|
|
138
|
+
latencyMs: options.latencyMs,
|
|
139
|
+
latencyBudgetMs: options.latencyBudgetMs,
|
|
140
|
+
},
|
|
141
|
+
summary: {
|
|
142
|
+
signalCount: signals.length,
|
|
143
|
+
templateCount: templates.length,
|
|
144
|
+
recommendedTemplateCount: recommendedTemplates.length,
|
|
145
|
+
},
|
|
146
|
+
signals,
|
|
147
|
+
templates,
|
|
148
|
+
nextActions: [
|
|
149
|
+
'Save baseline recall@k, precision@k, answer-with-evidence, and latency before tuning retrieval.',
|
|
150
|
+
'Block embedding or threshold changes when recall drops without an approved rollback plan.',
|
|
151
|
+
'Use a second-stage verifier or reranker for structural near misses such as negation and role reversal.',
|
|
152
|
+
'Attach verifier latency budgets before routing the retrieval output into autonomous agent actions.',
|
|
153
|
+
],
|
|
154
|
+
exampleCommand: 'npx thumbgate rag-precision-guardrails --baseline-recall=0.86 --new-recall=0.72 --threshold-change --agentic --structural-near-misses --json',
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function formatRagPrecisionGuardrailsPlan(report) {
|
|
159
|
+
const lines = [
|
|
160
|
+
'',
|
|
161
|
+
'ThumbGate RAG Precision Guardrails',
|
|
162
|
+
'-'.repeat(39),
|
|
163
|
+
`Status : ${report.status}`,
|
|
164
|
+
`RAG tool : ${report.ragTool}`,
|
|
165
|
+
`Signals : ${report.summary.signalCount}`,
|
|
166
|
+
`Templates: ${report.summary.recommendedTemplateCount}/${report.summary.templateCount} recommended`,
|
|
167
|
+
];
|
|
168
|
+
if (report.metrics.recallDropPercent !== null) {
|
|
169
|
+
lines.push(`Recall drop: ${report.metrics.recallDropPercent}%`);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
if (report.signals.length > 0) {
|
|
173
|
+
lines.push('', 'Detected retrieval risk signals:');
|
|
174
|
+
for (const signal of report.signals) {
|
|
175
|
+
lines.push(` - ${signal.label}: ${signal.values.join(', ')}`);
|
|
176
|
+
lines.push(` Risk: ${signal.risk}`);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
lines.push('', 'Recommended templates:');
|
|
181
|
+
const recommended = report.templates.filter((template) => template.recommended);
|
|
182
|
+
if (recommended.length === 0) {
|
|
183
|
+
lines.push(' - No precision-risk signals were passed. Start with recall metrics, threshold changes, or verifier flags.');
|
|
184
|
+
} else {
|
|
185
|
+
for (const template of recommended) {
|
|
186
|
+
lines.push(` - ${template.id} [${template.defaultAction}]`);
|
|
187
|
+
lines.push(` ${template.roi}`);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
lines.push('', 'Next actions:');
|
|
192
|
+
for (const action of report.nextActions) lines.push(` - ${action}`);
|
|
193
|
+
lines.push('', `Example: ${report.exampleCommand}`, '');
|
|
194
|
+
return `${lines.join('\n')}\n`;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
module.exports = {
|
|
198
|
+
buildRagPrecisionGuardrailsPlan,
|
|
199
|
+
formatRagPrecisionGuardrailsPlan,
|
|
200
|
+
normalizeOptions,
|
|
201
|
+
recallDropPercent,
|
|
202
|
+
};
|
package/scripts/rate-limiter.js
CHANGED
|
@@ -36,7 +36,7 @@ const PAYWALL_MESSAGES = {
|
|
|
36
36
|
prevention_rules: 'Free tier includes 1 prevention rule. Your agents need more protection — upgrade to Pro for unlimited rules.',
|
|
37
37
|
recall: 'Recall is a Pro feature. Your past feedback is stored locally — upgrade to search and reuse it.',
|
|
38
38
|
search_lessons: 'Lesson search is a Pro feature. Upgrade to find patterns in your agent\'s mistakes.',
|
|
39
|
-
default: 'This feature requires Pro. Start a 7-day
|
|
39
|
+
default: 'This feature requires Pro. Start a 7-day trial — card required; no charge today.',
|
|
40
40
|
};
|
|
41
41
|
|
|
42
42
|
function isProTier(authContext) {
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
const { listGateTemplates } = require('./gate-templates');
|
|
5
|
+
|
|
6
|
+
const CATEGORY = 'Reasoning Efficiency Safety';
|
|
7
|
+
|
|
8
|
+
function normalizeBoolean(value) {
|
|
9
|
+
if (value === true) return true;
|
|
10
|
+
if (value === false || value === undefined || value === null) return false;
|
|
11
|
+
return /^(1|true|yes|on)$/i.test(String(value).trim());
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function toNumber(value) {
|
|
15
|
+
if (value === undefined || value === null || value === '') return null;
|
|
16
|
+
const num = Number(value);
|
|
17
|
+
return Number.isFinite(num) ? num : null;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function normalizeOptions(options = {}) {
|
|
21
|
+
return {
|
|
22
|
+
workload: String(options.workload || options.name || 'reasoning').trim() || 'reasoning',
|
|
23
|
+
baselineTokens: toNumber(options['baseline-tokens']),
|
|
24
|
+
compressedTokens: toNumber(options['compressed-tokens']),
|
|
25
|
+
baselineAccuracy: toNumber(options['baseline-accuracy'] || options['baseline-pass1']),
|
|
26
|
+
compressedAccuracy: toNumber(options['compressed-accuracy'] || options['compressed-pass1']),
|
|
27
|
+
verifier: normalizeBoolean(options.verifier || options['verification-outcomes']),
|
|
28
|
+
lowConfidenceSteps: toNumber(options['low-confidence-steps']),
|
|
29
|
+
highConfidenceFailures: toNumber(options['high-confidence-failures']),
|
|
30
|
+
truncationFailures: normalizeBoolean(options['truncation-failures']),
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function tokenReductionPercent(options) {
|
|
35
|
+
if (options.baselineTokens === null || options.compressedTokens === null || options.baselineTokens <= 0) return null;
|
|
36
|
+
return Number((((options.baselineTokens - options.compressedTokens) / options.baselineTokens) * 100).toFixed(2));
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function accuracyDelta(options) {
|
|
40
|
+
if (options.baselineAccuracy === null || options.compressedAccuracy === null) return null;
|
|
41
|
+
return Number((options.compressedAccuracy - options.baselineAccuracy).toFixed(4));
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function templateApplicability(template, options) {
|
|
45
|
+
if (template.id === 'require-verifier-before-reasoning-compression') {
|
|
46
|
+
return !options.verifier || accuracyDelta(options) === null || (accuracyDelta(options) !== null && accuracyDelta(options) < -0.01);
|
|
47
|
+
}
|
|
48
|
+
if (template.id === 'checkpoint-low-confidence-reasoning-steps') {
|
|
49
|
+
return options.lowConfidenceSteps !== null && options.lowConfidenceSteps > 0;
|
|
50
|
+
}
|
|
51
|
+
if (template.id === 'checkpoint-high-confidence-failed-rollout') {
|
|
52
|
+
return (options.highConfidenceFailures !== null && options.highConfidenceFailures > 0) || options.truncationFailures;
|
|
53
|
+
}
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function buildSignals(options) {
|
|
58
|
+
const signals = [];
|
|
59
|
+
const tokenReduction = tokenReductionPercent(options);
|
|
60
|
+
const delta = accuracyDelta(options);
|
|
61
|
+
if (tokenReduction !== null || delta !== null || !options.verifier) {
|
|
62
|
+
signals.push({
|
|
63
|
+
id: 'reasoning_compression',
|
|
64
|
+
label: 'Reasoning compression rollout',
|
|
65
|
+
values: [
|
|
66
|
+
tokenReduction !== null ? `${tokenReduction}% token reduction` : null,
|
|
67
|
+
delta !== null ? `${delta} accuracy delta` : null,
|
|
68
|
+
options.verifier ? 'verifier present' : 'missing verifier',
|
|
69
|
+
].filter(Boolean),
|
|
70
|
+
risk: 'shorter traces can reduce cost while destabilizing accuracy',
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
if (options.lowConfidenceSteps !== null && options.lowConfidenceSteps > 0) {
|
|
74
|
+
signals.push({
|
|
75
|
+
id: 'low_confidence_steps',
|
|
76
|
+
label: 'Low-confidence accepted steps',
|
|
77
|
+
values: [`${options.lowConfidenceSteps} step(s)`],
|
|
78
|
+
risk: 'successful rollouts can still contain brittle intermediate reasoning',
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
if ((options.highConfidenceFailures !== null && options.highConfidenceFailures > 0) || options.truncationFailures) {
|
|
82
|
+
signals.push({
|
|
83
|
+
id: 'failed_confident_rollouts',
|
|
84
|
+
label: 'High-confidence failed rollout',
|
|
85
|
+
values: [
|
|
86
|
+
options.highConfidenceFailures !== null ? `${options.highConfidenceFailures} failure(s)` : null,
|
|
87
|
+
options.truncationFailures ? 'truncation failure' : null,
|
|
88
|
+
].filter(Boolean),
|
|
89
|
+
risk: 'failed rollouts may reflect verifier noise or truncation rather than bad reasoning',
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
return signals;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function buildReasoningEfficiencyGuardrailsPlan(rawOptions = {}, templatesPath) {
|
|
96
|
+
const options = normalizeOptions(rawOptions);
|
|
97
|
+
const templates = listGateTemplates(templatesPath)
|
|
98
|
+
.filter((template) => template.category === CATEGORY)
|
|
99
|
+
.map((template) => ({
|
|
100
|
+
...template,
|
|
101
|
+
recommended: templateApplicability(template, options),
|
|
102
|
+
}));
|
|
103
|
+
const signals = buildSignals(options);
|
|
104
|
+
const recommendedTemplates = templates.filter((template) => template.recommended);
|
|
105
|
+
|
|
106
|
+
return {
|
|
107
|
+
name: 'thumbgate-reasoning-efficiency-guardrails',
|
|
108
|
+
status: recommendedTemplates.length > 0 ? 'actionable' : 'ready',
|
|
109
|
+
workload: options.workload,
|
|
110
|
+
metrics: {
|
|
111
|
+
baselineTokens: options.baselineTokens,
|
|
112
|
+
compressedTokens: options.compressedTokens,
|
|
113
|
+
tokenReductionPercent: tokenReductionPercent(options),
|
|
114
|
+
baselineAccuracy: options.baselineAccuracy,
|
|
115
|
+
compressedAccuracy: options.compressedAccuracy,
|
|
116
|
+
accuracyDelta: accuracyDelta(options),
|
|
117
|
+
},
|
|
118
|
+
summary: {
|
|
119
|
+
signalCount: signals.length,
|
|
120
|
+
templateCount: templates.length,
|
|
121
|
+
recommendedTemplateCount: recommendedTemplates.length,
|
|
122
|
+
},
|
|
123
|
+
signals,
|
|
124
|
+
templates,
|
|
125
|
+
nextActions: [
|
|
126
|
+
'Keep a verifier and pass@1 baseline before compressing reasoning traces.',
|
|
127
|
+
'Inspect low-confidence steps even when the final rollout is correct.',
|
|
128
|
+
'Inspect high-confidence failed rollouts for truncation or verifier noise before penalizing the trace.',
|
|
129
|
+
'Route cheaper compressed reasoning only after accuracy and efficiency both clear the gate.',
|
|
130
|
+
],
|
|
131
|
+
exampleCommand: 'npx thumbgate reasoning-efficiency-guardrails --baseline-tokens=1200 --compressed-tokens=980 --baseline-accuracy=0.84 --compressed-accuracy=0.85 --verifier --json',
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function formatReasoningEfficiencyGuardrailsPlan(report) {
|
|
136
|
+
const lines = [
|
|
137
|
+
'',
|
|
138
|
+
'ThumbGate Reasoning Efficiency Guardrails',
|
|
139
|
+
'-'.repeat(43),
|
|
140
|
+
`Status : ${report.status}`,
|
|
141
|
+
`Workload: ${report.workload}`,
|
|
142
|
+
`Signals : ${report.summary.signalCount}`,
|
|
143
|
+
`Templates: ${report.summary.recommendedTemplateCount}/${report.summary.templateCount} recommended`,
|
|
144
|
+
];
|
|
145
|
+
if (report.metrics.tokenReductionPercent !== null) lines.push(`Token reduction: ${report.metrics.tokenReductionPercent}%`);
|
|
146
|
+
if (report.metrics.accuracyDelta !== null) lines.push(`Accuracy delta : ${report.metrics.accuracyDelta}`);
|
|
147
|
+
|
|
148
|
+
if (report.signals.length > 0) {
|
|
149
|
+
lines.push('', 'Detected reasoning-efficiency signals:');
|
|
150
|
+
for (const signal of report.signals) {
|
|
151
|
+
lines.push(` - ${signal.label}: ${signal.values.join(', ')}`);
|
|
152
|
+
lines.push(` Risk: ${signal.risk}`);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
lines.push('', 'Recommended templates:');
|
|
157
|
+
const recommended = report.templates.filter((template) => template.recommended);
|
|
158
|
+
if (recommended.length === 0) lines.push(' - No reasoning-efficiency risks were passed.');
|
|
159
|
+
for (const template of recommended) {
|
|
160
|
+
lines.push(` - ${template.id} [${template.defaultAction}]`);
|
|
161
|
+
lines.push(` ${template.roi}`);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
lines.push('', 'Next actions:');
|
|
165
|
+
for (const action of report.nextActions) lines.push(` - ${action}`);
|
|
166
|
+
lines.push('', `Example: ${report.exampleCommand}`, '');
|
|
167
|
+
return `${lines.join('\n')}\n`;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
module.exports = {
|
|
171
|
+
buildReasoningEfficiencyGuardrailsPlan,
|
|
172
|
+
formatReasoningEfficiencyGuardrailsPlan,
|
|
173
|
+
normalizeOptions,
|
|
174
|
+
tokenReductionPercent,
|
|
175
|
+
accuracyDelta,
|
|
176
|
+
};
|