thumbgate 1.16.12 → 1.16.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +3 -1
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +26 -1
- package/adapters/opencode/opencode.json +1 -1
- package/bin/cli.js +420 -1
- package/config/gate-templates.json +372 -0
- package/config/mcp-allowlists.json +25 -0
- package/config/model-candidates.json +59 -2
- package/config/model-tiers.json +4 -1
- package/package.json +79 -22
- package/public/compare.html +6 -0
- package/public/index.html +144 -11
- package/public/numbers.html +11 -11
- package/public/pro.html +22 -24
- package/scripts/agent-design-governance.js +211 -0
- package/scripts/agent-reasoning-traces.js +683 -0
- package/scripts/agent-reward-model.js +438 -0
- package/scripts/agent-stack-survival-audit.js +231 -0
- package/scripts/ai-engineering-stack-guardrails.js +256 -0
- package/scripts/billing.js +16 -4
- package/scripts/chatgpt-ads-readiness-pack.js +195 -0
- package/scripts/cli-schema.js +277 -0
- package/scripts/code-graph-guardrails.js +176 -0
- package/scripts/deepseek-v4-runtime-guardrails.js +253 -0
- package/scripts/gemini-embedding-policy.js +198 -0
- package/scripts/inference-cache-policy.js +39 -0
- package/scripts/judge-reward-function.js +396 -0
- package/scripts/llm-behavior-monitor.js +251 -0
- package/scripts/long-running-agent-context-guardrails.js +176 -0
- package/scripts/multimodal-retrieval-plan.js +31 -11
- package/scripts/oss-pr-opportunity-scout.js +240 -0
- package/scripts/proactive-agent-eval-guardrails.js +230 -0
- package/scripts/profile-router.js +5 -4
- package/scripts/prompting-operating-system.js +273 -0
- package/scripts/proxy-pointer-rag-guardrails.js +189 -0
- package/scripts/rag-precision-guardrails.js +202 -0
- package/scripts/rate-limiter.js +1 -1
- package/scripts/reasoning-efficiency-guardrails.js +176 -0
- package/scripts/reward-hacking-guardrails.js +251 -0
- package/scripts/seo-gsd.js +1201 -11
- package/scripts/single-use-credential-gate.js +182 -0
- package/scripts/structured-prompt-driven.js +226 -0
- package/scripts/telemetry-analytics.js +31 -6
- package/scripts/tool-registry.js +92 -0
- package/scripts/upstream-contribution-engine.js +379 -0
- package/scripts/vector-store.js +119 -4
- package/src/api/server.js +333 -100
- package/scripts/agents-sdk-sandbox-plan.js +0 -57
- package/scripts/ai-org-governance.js +0 -98
- package/scripts/artifact-agent-plan.js +0 -81
- package/scripts/enterprise-agent-rollout.js +0 -34
- package/scripts/experience-replay-governance.js +0 -69
- package/scripts/inference-economics.js +0 -53
- package/scripts/knowledge-layer-plan.js +0 -108
- package/scripts/memory-store-governance.js +0 -60
- package/scripts/post-training-governance.js +0 -34
- package/scripts/production-agent-readiness.js +0 -40
- package/scripts/scaling-law-claims.js +0 -60
- package/scripts/student-consistent-training.js +0 -73
package/public/numbers.html
CHANGED
|
@@ -25,9 +25,9 @@
|
|
|
25
25
|
"alternateName": "thumbgate",
|
|
26
26
|
"applicationCategory": "DeveloperApplication",
|
|
27
27
|
"operatingSystem": "Cross-platform, Node.js >=18.18.0",
|
|
28
|
-
"softwareVersion": "1.16.
|
|
28
|
+
"softwareVersion": "1.16.19",
|
|
29
29
|
"url": "https://thumbgate-production.up.railway.app/numbers",
|
|
30
|
-
"dateModified": "2026-05-
|
|
30
|
+
"dateModified": "2026-05-04",
|
|
31
31
|
"creator": {
|
|
32
32
|
"@type": "Person",
|
|
33
33
|
"name": "Igor Ganapolsky",
|
|
@@ -57,8 +57,8 @@
|
|
|
57
57
|
"https://www.linkedin.com/in/igorganapolsky"
|
|
58
58
|
]
|
|
59
59
|
},
|
|
60
|
-
"dateModified": "2026-05-
|
|
61
|
-
"datePublished": "2026-05-
|
|
60
|
+
"dateModified": "2026-05-04",
|
|
61
|
+
"datePublished": "2026-05-04",
|
|
62
62
|
"keywords": [
|
|
63
63
|
"AI agent gates",
|
|
64
64
|
"LLM token savings",
|
|
@@ -70,7 +70,7 @@
|
|
|
70
70
|
{
|
|
71
71
|
"@type": "PropertyValue",
|
|
72
72
|
"name": "active_gates",
|
|
73
|
-
"value":
|
|
73
|
+
"value": 36
|
|
74
74
|
},
|
|
75
75
|
{
|
|
76
76
|
"@type": "PropertyValue",
|
|
@@ -101,7 +101,7 @@
|
|
|
101
101
|
{
|
|
102
102
|
"@type": "PropertyValue",
|
|
103
103
|
"name": "bayes_error_rate",
|
|
104
|
-
"value":
|
|
104
|
+
"value": null
|
|
105
105
|
}
|
|
106
106
|
]
|
|
107
107
|
}
|
|
@@ -190,14 +190,14 @@
|
|
|
190
190
|
<main class="container">
|
|
191
191
|
<h1>The Numbers</h1>
|
|
192
192
|
<p class="subtitle">Generated first-party operational data from the ThumbGate runtime. No surveys or projections — this page is a release-time snapshot produced by the same local scripts that power the CLI and dashboard.</p>
|
|
193
|
-
<div class="freshness">Updated: 2026-05-
|
|
193
|
+
<div class="freshness">Updated: 2026-05-04 · Version 1.16.19</div>
|
|
194
194
|
|
|
195
195
|
<h2>Gate enforcement</h2>
|
|
196
196
|
<div class="stats-grid">
|
|
197
197
|
<div class="stat-card">
|
|
198
198
|
<div class="stat-label">Active gates</div>
|
|
199
|
-
<div class="stat-value">
|
|
200
|
-
<div class="stat-sub">36 manual ·
|
|
199
|
+
<div class="stat-value">36</div>
|
|
200
|
+
<div class="stat-sub">36 manual · 0 auto-promoted</div>
|
|
201
201
|
<a class="stat-source" href="https://github.com/IgorGanapolsky/ThumbGate/blob/main/scripts/gate-stats.js">source: gate-stats.js</a>
|
|
202
202
|
</div>
|
|
203
203
|
<div class="stat-card">
|
|
@@ -242,7 +242,7 @@
|
|
|
242
242
|
</div>
|
|
243
243
|
<div class="stat-card">
|
|
244
244
|
<div class="stat-label">Scorer Bayes error</div>
|
|
245
|
-
<div class="stat-value">
|
|
245
|
+
<div class="stat-value">n/a (no feedback sequences recorded yet)</div>
|
|
246
246
|
<div class="stat-sub">irreducible error given current feature set</div>
|
|
247
247
|
<a class="stat-source" href="https://github.com/IgorGanapolsky/ThumbGate/blob/main/scripts/bayes-optimal-gate.js">source: bayes-optimal-gate.js</a>
|
|
248
248
|
</div>
|
|
@@ -264,7 +264,7 @@
|
|
|
264
264
|
<div class="cta">
|
|
265
265
|
<a href="https://www.npmjs.com/package/thumbgate">Install ThumbGate — npx thumbgate init</a>
|
|
266
266
|
<div class="footer-note">Prefer the raw feed? See <a href="https://github.com/IgorGanapolsky/ThumbGate">GitHub</a> or run <code>npm run gate:stats</code> locally.</div>
|
|
267
|
-
<div class="footer-note">Generated at 2026-05-
|
|
267
|
+
<div class="footer-note">Generated at 2026-05-04T21:47:27.878Z UTC.</div>
|
|
268
268
|
</div>
|
|
269
269
|
</main>
|
|
270
270
|
</body>
|
package/public/pro.html
CHANGED
|
@@ -804,11 +804,11 @@ __GA_BOOTSTRAP__
|
|
|
804
804
|
<section class="hero">
|
|
805
805
|
<div class="container hero-grid">
|
|
806
806
|
<div class="panel hero-copy">
|
|
807
|
-
<div class="eyebrow">
|
|
808
|
-
<h1>
|
|
807
|
+
<div class="eyebrow">Paid lane for individual operators</div>
|
|
808
|
+
<h1>Buy the operator loop that proves your AI agent stopped repeating the mistake.</h1>
|
|
809
809
|
<p style="font-size:13px;opacity:0.8;margin-bottom:0.5rem;">Updated: <time datetime="2026-04-20">2026-04-20</time> · by <a href="https://github.com/IgorGanapolsky" style="color:inherit;">Igor Ganapolsky</a></p>
|
|
810
|
-
<p>ThumbGate
|
|
811
|
-
<p>
|
|
810
|
+
<p>ThumbGate Pro is the fastest paid path for one operator who already hit a repeated AI-agent failure and now needs proof: what was blocked, why it was blocked, and what changed before the next risky run.</p>
|
|
811
|
+
<p>Start with the local-first Pro dashboard and DPO export. Move to Team only when one correction needs to protect multiple developers, agents, or shared repos.</p>
|
|
812
812
|
<div class="hero-proof">
|
|
813
813
|
<div class="proof-pill">Personal local dashboard</div>
|
|
814
814
|
<div class="proof-pill">DPO export from real corrections</div>
|
|
@@ -960,32 +960,30 @@ __GA_BOOTSTRAP__
|
|
|
960
960
|
<div class="container pricing-shell">
|
|
961
961
|
<div class="pricing-card">
|
|
962
962
|
<div class="section-label" style="text-align:left;margin-bottom:8px;">Pricing</div>
|
|
963
|
-
<h3>ThumbGate
|
|
964
|
-
<div class="price">$
|
|
965
|
-
<div class="annual"
|
|
966
|
-
<p class="pricing-note">
|
|
963
|
+
<h3>ThumbGate Pro</h3>
|
|
964
|
+
<div class="price">$19<span>/mo</span></div>
|
|
965
|
+
<div class="annual">$149/year available · 7-day trial · Card required, no charge today</div>
|
|
966
|
+
<p class="pricing-note">For the individual operator who wants a personal local dashboard, DPO export, review-ready evidence, and founder help on the first risky workflow.</p>
|
|
967
967
|
<ul>
|
|
968
|
-
<li><strong>
|
|
969
|
-
<li><strong>
|
|
970
|
-
<li><strong>
|
|
971
|
-
<li><strong>
|
|
972
|
-
<li><strong>Sandbox routing</strong> — route risky agent runs into isolated execution environments.</li>
|
|
973
|
-
<li><strong>Org dashboard</strong> — active agents, check hit rates, risk scores, and proof-backed team metrics.</li>
|
|
968
|
+
<li><strong>Personal local dashboard</strong> — inspect blocked actions, active checks, and lesson evidence without a cloud account.</li>
|
|
969
|
+
<li><strong>DPO export</strong> — turn real thumbs-down corrections into training pairs you can review or reuse.</li>
|
|
970
|
+
<li><strong>Review-ready proof</strong> — bring evidence links and blocked-action history to the next risky workflow review.</li>
|
|
971
|
+
<li><strong>Founder support</strong> — get help hardening the first force-push, deploy, migration, or CI failure that keeps repeating.</li>
|
|
974
972
|
</ul>
|
|
975
973
|
<div class="pricing-actions">
|
|
976
|
-
<a class="btn-primary" href="
|
|
977
|
-
<a class="btn-secondary btn-
|
|
974
|
+
<a class="btn-primary btn-pro-checkout" href="/checkout/pro?utm_source=website&utm_medium=pro_page_pricing&utm_campaign=pro_pack&cta_id=pricing_pro&cta_placement=pricing&plan_id=pro&landing_path=%2Fpro">Start 7-Day Free Trial</a>
|
|
975
|
+
<a class="btn-secondary btn-pro-checkout" href="/checkout/pro?utm_source=website&utm_medium=pro_page_pricing&utm_campaign=pro_pack&cta_id=pricing_pro_annual&cta_placement=pricing&plan_id=pro&billing_cycle=annual&landing_path=%2Fpro">Choose annual</a>
|
|
978
976
|
</div>
|
|
979
|
-
<div class="pricing-meta">
|
|
977
|
+
<div class="pricing-meta">Best for one operator with one repeated failure to prove. Stay on Free if you only need the local install; buy Pro when the dashboard, export, and proof trail save you time.</div>
|
|
980
978
|
</div>
|
|
981
979
|
|
|
982
980
|
<div class="pricing-sidebar">
|
|
983
981
|
<div class="team-card">
|
|
984
982
|
<div class="section-label" style="text-align:left;margin-bottom:8px;">When Team is better</div>
|
|
985
|
-
<h3>
|
|
986
|
-
<p>
|
|
983
|
+
<h3>Need shared enforcement?</h3>
|
|
984
|
+
<p>Choose Team when one correction must protect multiple developers or agents across shared repositories, CI, approval policies, and audit trails. Team is $49/seat/mo with a 3-seat minimum after qualification.</p>
|
|
987
985
|
<div class="hero-actions" style="margin-top:18px;">
|
|
988
|
-
<a class="btn-secondary" href="
|
|
986
|
+
<a class="btn-secondary" href="/#workflow-sprint-intake">Book a Team Pilot Call</a>
|
|
989
987
|
</div>
|
|
990
988
|
</div>
|
|
991
989
|
<div class="team-card">
|
|
@@ -1025,11 +1023,11 @@ __GA_BOOTSTRAP__
|
|
|
1025
1023
|
<section class="final-cta">
|
|
1026
1024
|
<div class="container">
|
|
1027
1025
|
<div class="final-shell">
|
|
1028
|
-
<h2>
|
|
1029
|
-
<p>
|
|
1026
|
+
<h2>Stop losing time to the same AI-agent failure.</h2>
|
|
1027
|
+
<p>Start Pro, harden one repeated mistake, and keep the proof trail: blocked action, lesson, prevention rule, and export path.</p>
|
|
1030
1028
|
<div class="hero-actions" style="justify-content:center;">
|
|
1031
|
-
<a class="btn-primary" href="
|
|
1032
|
-
<a class="btn-secondary btn-demo" href="/dashboard?utm_source=website&utm_medium=pro_page_final&utm_campaign=
|
|
1029
|
+
<a class="btn-primary btn-pro-checkout" href="/checkout/pro?utm_source=website&utm_medium=pro_page_final&utm_campaign=pro_pack&cta_id=final_go_pro&cta_placement=final&plan_id=pro&landing_path=%2Fpro">Start 7-Day Free Trial</a>
|
|
1030
|
+
<a class="btn-secondary btn-demo" href="/dashboard?utm_source=website&utm_medium=pro_page_final&utm_campaign=pro_pack">Open dashboard demo</a>
|
|
1033
1031
|
</div>
|
|
1034
1032
|
</div>
|
|
1035
1033
|
</div>
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
const HIGH_RISK_KEYWORDS = /(^|[^a-z0-9])(delete|deploy|drop|finance|invoice|payment|production|publish|refund|secret|send|stripe|write)([^a-z0-9]|$)/i;
|
|
5
|
+
|
|
6
|
+
function parseNumber(value, fallback = 0) {
|
|
7
|
+
const parsed = Number(value);
|
|
8
|
+
return Number.isFinite(parsed) ? parsed : fallback;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
function parseBoolean(value, fallback = false) {
|
|
12
|
+
if (value === undefined || value === null || value === '') return fallback;
|
|
13
|
+
if (typeof value === 'boolean') return value;
|
|
14
|
+
return /^(1|true|yes|on)$/i.test(String(value).trim());
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function splitList(value) {
|
|
18
|
+
if (Array.isArray(value)) return value.map(String).map((item) => item.trim()).filter(Boolean);
|
|
19
|
+
return String(value || '').split(',').map((item) => item.trim()).filter(Boolean);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function normalizeOptions(raw = {}) {
|
|
23
|
+
const tools = splitList(raw.tools || raw.toolNames);
|
|
24
|
+
const highRiskTools = splitList(raw['high-risk-tools'] || raw.highRiskTools)
|
|
25
|
+
.concat(tools.filter((tool) => HIGH_RISK_KEYWORDS.test(tool)));
|
|
26
|
+
return {
|
|
27
|
+
workflow: String(raw.workflow || raw.name || 'agent workflow').trim() || 'agent workflow',
|
|
28
|
+
toolCount: parseNumber(raw['tool-count'] || raw.toolCount || tools.length, tools.length),
|
|
29
|
+
similarToolCount: parseNumber(raw['similar-tool-count'] || raw.similarToolCount, 0),
|
|
30
|
+
conditionalBranches: parseNumber(raw['conditional-branches'] || raw.conditionalBranches, 0),
|
|
31
|
+
handoffCount: parseNumber(raw['handoff-count'] || raw.handoffCount, 0),
|
|
32
|
+
autonomyLevel: String(raw['autonomy-level'] || raw.autonomyLevel || 'assisted').trim().toLowerCase(),
|
|
33
|
+
tools,
|
|
34
|
+
highRiskTools: [...new Set(highRiskTools)],
|
|
35
|
+
writeTools: splitList(raw['write-tools'] || raw.writeTools),
|
|
36
|
+
hasBaselineEvals: parseBoolean(raw['baseline-evals'] || raw.hasBaselineEvals, false),
|
|
37
|
+
hasDocs: parseBoolean(raw.docs || raw.hasDocs, false),
|
|
38
|
+
hasExamples: parseBoolean(raw.examples || raw.hasExamples, false),
|
|
39
|
+
hasEdgeCases: parseBoolean(raw['edge-cases'] || raw.hasEdgeCases, false),
|
|
40
|
+
hasToolApprovals: parseBoolean(raw['tool-approvals'] || raw.hasToolApprovals, false),
|
|
41
|
+
hasExitCondition: parseBoolean(raw['exit-condition'] || raw.hasExitCondition, false),
|
|
42
|
+
reversibleActions: parseBoolean(raw['reversible-actions'] || raw.reversibleActions, false),
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function scoreToolRisk(options) {
|
|
47
|
+
let score = 0;
|
|
48
|
+
const reasons = [];
|
|
49
|
+
if (options.highRiskTools.length > 0) {
|
|
50
|
+
score += 35;
|
|
51
|
+
reasons.push(`${options.highRiskTools.length} high-risk tool(s) can affect production, money, data, secrets, or outbound actions`);
|
|
52
|
+
}
|
|
53
|
+
if (options.writeTools.length > 0) {
|
|
54
|
+
score += 20;
|
|
55
|
+
reasons.push(`${options.writeTools.length} write-capable tool(s) need approval and audit trails`);
|
|
56
|
+
}
|
|
57
|
+
if (!options.reversibleActions && (options.highRiskTools.length > 0 || options.writeTools.length > 0)) {
|
|
58
|
+
score += 20;
|
|
59
|
+
reasons.push('some actions are not marked reversible');
|
|
60
|
+
}
|
|
61
|
+
if (!options.hasToolApprovals && (options.highRiskTools.length > 0 || options.writeTools.length > 0)) {
|
|
62
|
+
score += 25;
|
|
63
|
+
reasons.push('tool approvals are missing for risky tools');
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const risk = score >= 70 ? 'high' : score >= 35 ? 'medium' : 'low';
|
|
67
|
+
return { risk, score: Math.min(100, score), reasons };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function scoreInstructions(options) {
|
|
71
|
+
const checks = [
|
|
72
|
+
{ id: 'docs', passed: options.hasDocs, label: 'draws on existing workflow documentation' },
|
|
73
|
+
{ id: 'examples', passed: options.hasExamples, label: 'includes concrete successful examples' },
|
|
74
|
+
{ id: 'edge_cases', passed: options.hasEdgeCases, label: 'covers edge cases and failure paths' },
|
|
75
|
+
{ id: 'exit_condition', passed: options.hasExitCondition, label: 'defines when the run is complete' },
|
|
76
|
+
];
|
|
77
|
+
const passed = checks.filter((check) => check.passed).length;
|
|
78
|
+
return {
|
|
79
|
+
score: Math.round((passed / checks.length) * 100),
|
|
80
|
+
checks,
|
|
81
|
+
missing: checks.filter((check) => !check.passed).map((check) => check.label),
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function selectArchitecture(options, toolRisk, instructionQuality) {
|
|
86
|
+
const triggers = [];
|
|
87
|
+
if (options.conditionalBranches >= 8) triggers.push('instruction_complexity');
|
|
88
|
+
if (options.similarToolCount >= 4 || (options.toolCount >= 10 && options.similarToolCount >= 2)) triggers.push('tool_overload');
|
|
89
|
+
if (options.handoffCount > 0) triggers.push('existing_handoffs');
|
|
90
|
+
|
|
91
|
+
if (triggers.includes('tool_overload') || triggers.includes('instruction_complexity')) {
|
|
92
|
+
return {
|
|
93
|
+
architecture: 'manager',
|
|
94
|
+
reason: 'split specialized responsibilities behind a manager agent because instructions or similar tools are becoming hard to route reliably',
|
|
95
|
+
triggers,
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (options.handoffCount >= 2 && toolRisk.risk !== 'high') {
|
|
100
|
+
return {
|
|
101
|
+
architecture: 'decentralized',
|
|
102
|
+
reason: 'peer handoffs can work because the workflow already has explicit handoff points and no high-risk tool profile',
|
|
103
|
+
triggers,
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
return {
|
|
108
|
+
architecture: 'single_agent',
|
|
109
|
+
reason: instructionQuality.score < 75
|
|
110
|
+
? 'improve instructions and evals before adding orchestration complexity'
|
|
111
|
+
: 'a single agent with clearer tools and instructions should stay cheaper to evaluate and maintain',
|
|
112
|
+
triggers,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function buildBlockers(options, toolRisk, architecture) {
|
|
117
|
+
const blockers = [];
|
|
118
|
+
if (!options.hasBaselineEvals) {
|
|
119
|
+
blockers.push({
|
|
120
|
+
id: 'baseline_evals_required',
|
|
121
|
+
severity: 'high',
|
|
122
|
+
message: 'Establish baseline evals before adding tools, splitting agents, or increasing autonomy.',
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
if (toolRisk.risk === 'high' && !options.hasToolApprovals) {
|
|
126
|
+
blockers.push({
|
|
127
|
+
id: 'tool_approval_required',
|
|
128
|
+
severity: 'critical',
|
|
129
|
+
message: 'High-risk tools need approval gates before autonomous use.',
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
if (architecture.architecture !== 'single_agent' && architecture.triggers.length === 0) {
|
|
133
|
+
blockers.push({
|
|
134
|
+
id: 'multi_agent_without_trigger',
|
|
135
|
+
severity: 'medium',
|
|
136
|
+
message: 'Do not split agents without instruction-complexity, tool-overload, or explicit handoff evidence.',
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
return blockers;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function buildAgentDesignGovernancePlan(rawOptions = {}) {
|
|
143
|
+
const options = normalizeOptions(rawOptions);
|
|
144
|
+
const toolRisk = scoreToolRisk(options);
|
|
145
|
+
const instructionQuality = scoreInstructions(options);
|
|
146
|
+
const architecture = selectArchitecture(options, toolRisk, instructionQuality);
|
|
147
|
+
const blockers = buildBlockers(options, toolRisk, architecture);
|
|
148
|
+
|
|
149
|
+
return {
|
|
150
|
+
name: 'thumbgate-agent-design-governance',
|
|
151
|
+
workflow: options.workflow,
|
|
152
|
+
sourcePattern: 'OpenAI practical agent guide: model + tools + instructions, single-agent first, eval-driven multi-agent splits',
|
|
153
|
+
status: blockers.some((blocker) => blocker.severity === 'critical') ? 'blocked' : blockers.length ? 'needs_work' : 'ready',
|
|
154
|
+
recommendation: architecture,
|
|
155
|
+
toolRisk,
|
|
156
|
+
instructionQuality,
|
|
157
|
+
evals: {
|
|
158
|
+
baselinePresent: options.hasBaselineEvals,
|
|
159
|
+
requiredBefore: ['new high-risk tools', 'multi-agent split', 'higher autonomy', 'auto-PR or deploy'],
|
|
160
|
+
},
|
|
161
|
+
blockers,
|
|
162
|
+
nextActions: [
|
|
163
|
+
'Keep the workflow single-agent unless evals show instruction complexity or tool overload.',
|
|
164
|
+
'Write tool descriptions with clear names, parameters, side effects, and approval requirements.',
|
|
165
|
+
'Add examples and edge cases to instructions before adding subagents.',
|
|
166
|
+
'Add baseline evals that grade tool choice, exit condition, recovery behavior, and unsafe action refusal.',
|
|
167
|
+
'Assign low, medium, or high risk to every tool based on write access, reversibility, permissions, and financial or production impact.',
|
|
168
|
+
],
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
function formatAgentDesignGovernancePlan(report) {
|
|
173
|
+
const lines = [
|
|
174
|
+
'',
|
|
175
|
+
'ThumbGate Agent Design Governance',
|
|
176
|
+
'-'.repeat(35),
|
|
177
|
+
`Workflow : ${report.workflow}`,
|
|
178
|
+
`Status : ${report.status}`,
|
|
179
|
+
`Pattern : ${report.recommendation.architecture}`,
|
|
180
|
+
`Reason : ${report.recommendation.reason}`,
|
|
181
|
+
`Tool risk: ${report.toolRisk.risk} (${report.toolRisk.score}/100)`,
|
|
182
|
+
`Instruction score: ${report.instructionQuality.score}/100`,
|
|
183
|
+
`Baseline evals: ${report.evals.baselinePresent ? 'present' : 'missing'}`,
|
|
184
|
+
];
|
|
185
|
+
|
|
186
|
+
if (report.blockers.length > 0) {
|
|
187
|
+
lines.push('', 'Blockers:');
|
|
188
|
+
for (const blocker of report.blockers) {
|
|
189
|
+
lines.push(` - [${blocker.severity}] ${blocker.id}: ${blocker.message}`);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
if (report.toolRisk.reasons.length > 0) {
|
|
194
|
+
lines.push('', 'Tool risk signals:');
|
|
195
|
+
for (const reason of report.toolRisk.reasons) lines.push(` - ${reason}`);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
lines.push('', 'Next actions:');
|
|
199
|
+
for (const action of report.nextActions) lines.push(` - ${action}`);
|
|
200
|
+
lines.push('');
|
|
201
|
+
return `${lines.join('\n')}\n`;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
module.exports = {
|
|
205
|
+
buildAgentDesignGovernancePlan,
|
|
206
|
+
formatAgentDesignGovernancePlan,
|
|
207
|
+
normalizeOptions,
|
|
208
|
+
scoreInstructions,
|
|
209
|
+
scoreToolRisk,
|
|
210
|
+
selectArchitecture,
|
|
211
|
+
};
|