thumbgate 1.16.13 → 1.16.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +3 -1
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +26 -1
- package/adapters/opencode/opencode.json +1 -1
- package/bin/cli.js +420 -1
- package/bin/postinstall.js +2 -2
- package/config/gate-templates.json +372 -0
- package/config/mcp-allowlists.json +25 -0
- package/config/model-candidates.json +59 -2
- package/config/model-tiers.json +4 -1
- package/package.json +79 -22
- package/public/compare.html +6 -0
- package/public/index.html +153 -20
- package/public/numbers.html +6 -6
- package/public/pro.html +25 -27
- package/scripts/agent-design-governance.js +211 -0
- package/scripts/agent-reasoning-traces.js +683 -0
- package/scripts/agent-reward-model.js +438 -0
- package/scripts/agent-stack-survival-audit.js +231 -0
- package/scripts/ai-engineering-stack-guardrails.js +256 -0
- package/scripts/billing.js +33 -5
- package/scripts/chatgpt-ads-readiness-pack.js +195 -0
- package/scripts/cli-schema.js +277 -0
- package/scripts/code-graph-guardrails.js +176 -0
- package/scripts/commercial-offer.js +1 -1
- package/scripts/deepseek-v4-runtime-guardrails.js +253 -0
- package/scripts/gemini-embedding-policy.js +198 -0
- package/scripts/inference-cache-policy.js +39 -0
- package/scripts/judge-reward-function.js +396 -0
- package/scripts/llm-behavior-monitor.js +251 -0
- package/scripts/long-running-agent-context-guardrails.js +176 -0
- package/scripts/multimodal-retrieval-plan.js +31 -11
- package/scripts/oss-pr-opportunity-scout.js +240 -0
- package/scripts/proactive-agent-eval-guardrails.js +230 -0
- package/scripts/profile-router.js +5 -4
- package/scripts/prompting-operating-system.js +273 -0
- package/scripts/proxy-pointer-rag-guardrails.js +189 -0
- package/scripts/rag-precision-guardrails.js +202 -0
- package/scripts/rate-limiter.js +1 -1
- package/scripts/reasoning-efficiency-guardrails.js +176 -0
- package/scripts/reward-hacking-guardrails.js +251 -0
- package/scripts/seo-gsd.js +1201 -11
- package/scripts/single-use-credential-gate.js +182 -0
- package/scripts/structured-prompt-driven.js +226 -0
- package/scripts/telemetry-analytics.js +108 -6
- package/scripts/tool-registry.js +92 -0
- package/scripts/upstream-contribution-engine.js +379 -0
- package/scripts/vector-store.js +119 -4
- package/src/api/server.js +455 -143
- package/scripts/agents-sdk-sandbox-plan.js +0 -57
- package/scripts/ai-org-governance.js +0 -98
- package/scripts/artifact-agent-plan.js +0 -81
- package/scripts/enterprise-agent-rollout.js +0 -34
- package/scripts/experience-replay-governance.js +0 -69
- package/scripts/inference-economics.js +0 -53
- package/scripts/knowledge-layer-plan.js +0 -108
- package/scripts/memory-store-governance.js +0 -60
- package/scripts/post-training-governance.js +0 -34
- package/scripts/production-agent-readiness.js +0 -40
- package/scripts/scaling-law-claims.js +0 -60
- package/scripts/student-consistent-training.js +0 -73
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
const HIGH_RISK_KEYWORDS = /(^|[^a-z0-9])(delete|deploy|drop|finance|invoice|payment|production|publish|refund|secret|send|stripe|write)([^a-z0-9]|$)/i;
|
|
5
|
+
|
|
6
|
+
function parseNumber(value, fallback = 0) {
|
|
7
|
+
const parsed = Number(value);
|
|
8
|
+
return Number.isFinite(parsed) ? parsed : fallback;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
function parseBoolean(value, fallback = false) {
|
|
12
|
+
if (value === undefined || value === null || value === '') return fallback;
|
|
13
|
+
if (typeof value === 'boolean') return value;
|
|
14
|
+
return /^(1|true|yes|on)$/i.test(String(value).trim());
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function splitList(value) {
|
|
18
|
+
if (Array.isArray(value)) return value.map(String).map((item) => item.trim()).filter(Boolean);
|
|
19
|
+
return String(value || '').split(',').map((item) => item.trim()).filter(Boolean);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function normalizeOptions(raw = {}) {
|
|
23
|
+
const tools = splitList(raw.tools || raw.toolNames);
|
|
24
|
+
const highRiskTools = splitList(raw['high-risk-tools'] || raw.highRiskTools)
|
|
25
|
+
.concat(tools.filter((tool) => HIGH_RISK_KEYWORDS.test(tool)));
|
|
26
|
+
return {
|
|
27
|
+
workflow: String(raw.workflow || raw.name || 'agent workflow').trim() || 'agent workflow',
|
|
28
|
+
toolCount: parseNumber(raw['tool-count'] || raw.toolCount || tools.length, tools.length),
|
|
29
|
+
similarToolCount: parseNumber(raw['similar-tool-count'] || raw.similarToolCount, 0),
|
|
30
|
+
conditionalBranches: parseNumber(raw['conditional-branches'] || raw.conditionalBranches, 0),
|
|
31
|
+
handoffCount: parseNumber(raw['handoff-count'] || raw.handoffCount, 0),
|
|
32
|
+
autonomyLevel: String(raw['autonomy-level'] || raw.autonomyLevel || 'assisted').trim().toLowerCase(),
|
|
33
|
+
tools,
|
|
34
|
+
highRiskTools: [...new Set(highRiskTools)],
|
|
35
|
+
writeTools: splitList(raw['write-tools'] || raw.writeTools),
|
|
36
|
+
hasBaselineEvals: parseBoolean(raw['baseline-evals'] || raw.hasBaselineEvals, false),
|
|
37
|
+
hasDocs: parseBoolean(raw.docs || raw.hasDocs, false),
|
|
38
|
+
hasExamples: parseBoolean(raw.examples || raw.hasExamples, false),
|
|
39
|
+
hasEdgeCases: parseBoolean(raw['edge-cases'] || raw.hasEdgeCases, false),
|
|
40
|
+
hasToolApprovals: parseBoolean(raw['tool-approvals'] || raw.hasToolApprovals, false),
|
|
41
|
+
hasExitCondition: parseBoolean(raw['exit-condition'] || raw.hasExitCondition, false),
|
|
42
|
+
reversibleActions: parseBoolean(raw['reversible-actions'] || raw.reversibleActions, false),
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function scoreToolRisk(options) {
|
|
47
|
+
let score = 0;
|
|
48
|
+
const reasons = [];
|
|
49
|
+
if (options.highRiskTools.length > 0) {
|
|
50
|
+
score += 35;
|
|
51
|
+
reasons.push(`${options.highRiskTools.length} high-risk tool(s) can affect production, money, data, secrets, or outbound actions`);
|
|
52
|
+
}
|
|
53
|
+
if (options.writeTools.length > 0) {
|
|
54
|
+
score += 20;
|
|
55
|
+
reasons.push(`${options.writeTools.length} write-capable tool(s) need approval and audit trails`);
|
|
56
|
+
}
|
|
57
|
+
if (!options.reversibleActions && (options.highRiskTools.length > 0 || options.writeTools.length > 0)) {
|
|
58
|
+
score += 20;
|
|
59
|
+
reasons.push('some actions are not marked reversible');
|
|
60
|
+
}
|
|
61
|
+
if (!options.hasToolApprovals && (options.highRiskTools.length > 0 || options.writeTools.length > 0)) {
|
|
62
|
+
score += 25;
|
|
63
|
+
reasons.push('tool approvals are missing for risky tools');
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const risk = score >= 70 ? 'high' : score >= 35 ? 'medium' : 'low';
|
|
67
|
+
return { risk, score: Math.min(100, score), reasons };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function scoreInstructions(options) {
|
|
71
|
+
const checks = [
|
|
72
|
+
{ id: 'docs', passed: options.hasDocs, label: 'draws on existing workflow documentation' },
|
|
73
|
+
{ id: 'examples', passed: options.hasExamples, label: 'includes concrete successful examples' },
|
|
74
|
+
{ id: 'edge_cases', passed: options.hasEdgeCases, label: 'covers edge cases and failure paths' },
|
|
75
|
+
{ id: 'exit_condition', passed: options.hasExitCondition, label: 'defines when the run is complete' },
|
|
76
|
+
];
|
|
77
|
+
const passed = checks.filter((check) => check.passed).length;
|
|
78
|
+
return {
|
|
79
|
+
score: Math.round((passed / checks.length) * 100),
|
|
80
|
+
checks,
|
|
81
|
+
missing: checks.filter((check) => !check.passed).map((check) => check.label),
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function selectArchitecture(options, toolRisk, instructionQuality) {
|
|
86
|
+
const triggers = [];
|
|
87
|
+
if (options.conditionalBranches >= 8) triggers.push('instruction_complexity');
|
|
88
|
+
if (options.similarToolCount >= 4 || (options.toolCount >= 10 && options.similarToolCount >= 2)) triggers.push('tool_overload');
|
|
89
|
+
if (options.handoffCount > 0) triggers.push('existing_handoffs');
|
|
90
|
+
|
|
91
|
+
if (triggers.includes('tool_overload') || triggers.includes('instruction_complexity')) {
|
|
92
|
+
return {
|
|
93
|
+
architecture: 'manager',
|
|
94
|
+
reason: 'split specialized responsibilities behind a manager agent because instructions or similar tools are becoming hard to route reliably',
|
|
95
|
+
triggers,
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (options.handoffCount >= 2 && toolRisk.risk !== 'high') {
|
|
100
|
+
return {
|
|
101
|
+
architecture: 'decentralized',
|
|
102
|
+
reason: 'peer handoffs can work because the workflow already has explicit handoff points and no high-risk tool profile',
|
|
103
|
+
triggers,
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
return {
|
|
108
|
+
architecture: 'single_agent',
|
|
109
|
+
reason: instructionQuality.score < 75
|
|
110
|
+
? 'improve instructions and evals before adding orchestration complexity'
|
|
111
|
+
: 'a single agent with clearer tools and instructions should stay cheaper to evaluate and maintain',
|
|
112
|
+
triggers,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function buildBlockers(options, toolRisk, architecture) {
|
|
117
|
+
const blockers = [];
|
|
118
|
+
if (!options.hasBaselineEvals) {
|
|
119
|
+
blockers.push({
|
|
120
|
+
id: 'baseline_evals_required',
|
|
121
|
+
severity: 'high',
|
|
122
|
+
message: 'Establish baseline evals before adding tools, splitting agents, or increasing autonomy.',
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
if (toolRisk.risk === 'high' && !options.hasToolApprovals) {
|
|
126
|
+
blockers.push({
|
|
127
|
+
id: 'tool_approval_required',
|
|
128
|
+
severity: 'critical',
|
|
129
|
+
message: 'High-risk tools need approval gates before autonomous use.',
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
if (architecture.architecture !== 'single_agent' && architecture.triggers.length === 0) {
|
|
133
|
+
blockers.push({
|
|
134
|
+
id: 'multi_agent_without_trigger',
|
|
135
|
+
severity: 'medium',
|
|
136
|
+
message: 'Do not split agents without instruction-complexity, tool-overload, or explicit handoff evidence.',
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
return blockers;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function buildAgentDesignGovernancePlan(rawOptions = {}) {
|
|
143
|
+
const options = normalizeOptions(rawOptions);
|
|
144
|
+
const toolRisk = scoreToolRisk(options);
|
|
145
|
+
const instructionQuality = scoreInstructions(options);
|
|
146
|
+
const architecture = selectArchitecture(options, toolRisk, instructionQuality);
|
|
147
|
+
const blockers = buildBlockers(options, toolRisk, architecture);
|
|
148
|
+
|
|
149
|
+
return {
|
|
150
|
+
name: 'thumbgate-agent-design-governance',
|
|
151
|
+
workflow: options.workflow,
|
|
152
|
+
sourcePattern: 'OpenAI practical agent guide: model + tools + instructions, single-agent first, eval-driven multi-agent splits',
|
|
153
|
+
status: blockers.some((blocker) => blocker.severity === 'critical') ? 'blocked' : blockers.length ? 'needs_work' : 'ready',
|
|
154
|
+
recommendation: architecture,
|
|
155
|
+
toolRisk,
|
|
156
|
+
instructionQuality,
|
|
157
|
+
evals: {
|
|
158
|
+
baselinePresent: options.hasBaselineEvals,
|
|
159
|
+
requiredBefore: ['new high-risk tools', 'multi-agent split', 'higher autonomy', 'auto-PR or deploy'],
|
|
160
|
+
},
|
|
161
|
+
blockers,
|
|
162
|
+
nextActions: [
|
|
163
|
+
'Keep the workflow single-agent unless evals show instruction complexity or tool overload.',
|
|
164
|
+
'Write tool descriptions with clear names, parameters, side effects, and approval requirements.',
|
|
165
|
+
'Add examples and edge cases to instructions before adding subagents.',
|
|
166
|
+
'Add baseline evals that grade tool choice, exit condition, recovery behavior, and unsafe action refusal.',
|
|
167
|
+
'Assign low, medium, or high risk to every tool based on write access, reversibility, permissions, and financial or production impact.',
|
|
168
|
+
],
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
function formatAgentDesignGovernancePlan(report) {
|
|
173
|
+
const lines = [
|
|
174
|
+
'',
|
|
175
|
+
'ThumbGate Agent Design Governance',
|
|
176
|
+
'-'.repeat(35),
|
|
177
|
+
`Workflow : ${report.workflow}`,
|
|
178
|
+
`Status : ${report.status}`,
|
|
179
|
+
`Pattern : ${report.recommendation.architecture}`,
|
|
180
|
+
`Reason : ${report.recommendation.reason}`,
|
|
181
|
+
`Tool risk: ${report.toolRisk.risk} (${report.toolRisk.score}/100)`,
|
|
182
|
+
`Instruction score: ${report.instructionQuality.score}/100`,
|
|
183
|
+
`Baseline evals: ${report.evals.baselinePresent ? 'present' : 'missing'}`,
|
|
184
|
+
];
|
|
185
|
+
|
|
186
|
+
if (report.blockers.length > 0) {
|
|
187
|
+
lines.push('', 'Blockers:');
|
|
188
|
+
for (const blocker of report.blockers) {
|
|
189
|
+
lines.push(` - [${blocker.severity}] ${blocker.id}: ${blocker.message}`);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
if (report.toolRisk.reasons.length > 0) {
|
|
194
|
+
lines.push('', 'Tool risk signals:');
|
|
195
|
+
for (const reason of report.toolRisk.reasons) lines.push(` - ${reason}`);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
lines.push('', 'Next actions:');
|
|
199
|
+
for (const action of report.nextActions) lines.push(` - ${action}`);
|
|
200
|
+
lines.push('');
|
|
201
|
+
return `${lines.join('\n')}\n`;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
module.exports = {
|
|
205
|
+
buildAgentDesignGovernancePlan,
|
|
206
|
+
formatAgentDesignGovernancePlan,
|
|
207
|
+
normalizeOptions,
|
|
208
|
+
scoreInstructions,
|
|
209
|
+
scoreToolRisk,
|
|
210
|
+
selectArchitecture,
|
|
211
|
+
};
|