thumbgate 1.16.12 → 1.16.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +3 -1
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +26 -1
- package/adapters/opencode/opencode.json +1 -1
- package/bin/cli.js +420 -1
- package/config/gate-templates.json +372 -0
- package/config/mcp-allowlists.json +25 -0
- package/config/model-candidates.json +59 -2
- package/config/model-tiers.json +4 -1
- package/package.json +79 -22
- package/public/compare.html +6 -0
- package/public/index.html +144 -11
- package/public/numbers.html +11 -11
- package/public/pro.html +22 -24
- package/scripts/agent-design-governance.js +211 -0
- package/scripts/agent-reasoning-traces.js +683 -0
- package/scripts/agent-reward-model.js +438 -0
- package/scripts/agent-stack-survival-audit.js +231 -0
- package/scripts/ai-engineering-stack-guardrails.js +256 -0
- package/scripts/billing.js +16 -4
- package/scripts/chatgpt-ads-readiness-pack.js +195 -0
- package/scripts/cli-schema.js +277 -0
- package/scripts/code-graph-guardrails.js +176 -0
- package/scripts/deepseek-v4-runtime-guardrails.js +253 -0
- package/scripts/gemini-embedding-policy.js +198 -0
- package/scripts/inference-cache-policy.js +39 -0
- package/scripts/judge-reward-function.js +396 -0
- package/scripts/llm-behavior-monitor.js +251 -0
- package/scripts/long-running-agent-context-guardrails.js +176 -0
- package/scripts/multimodal-retrieval-plan.js +31 -11
- package/scripts/oss-pr-opportunity-scout.js +240 -0
- package/scripts/proactive-agent-eval-guardrails.js +230 -0
- package/scripts/profile-router.js +5 -4
- package/scripts/prompting-operating-system.js +273 -0
- package/scripts/proxy-pointer-rag-guardrails.js +189 -0
- package/scripts/rag-precision-guardrails.js +202 -0
- package/scripts/rate-limiter.js +1 -1
- package/scripts/reasoning-efficiency-guardrails.js +176 -0
- package/scripts/reward-hacking-guardrails.js +251 -0
- package/scripts/seo-gsd.js +1201 -11
- package/scripts/single-use-credential-gate.js +182 -0
- package/scripts/structured-prompt-driven.js +226 -0
- package/scripts/telemetry-analytics.js +31 -6
- package/scripts/tool-registry.js +92 -0
- package/scripts/upstream-contribution-engine.js +379 -0
- package/scripts/vector-store.js +119 -4
- package/src/api/server.js +333 -100
- package/scripts/agents-sdk-sandbox-plan.js +0 -57
- package/scripts/ai-org-governance.js +0 -98
- package/scripts/artifact-agent-plan.js +0 -81
- package/scripts/enterprise-agent-rollout.js +0 -34
- package/scripts/experience-replay-governance.js +0 -69
- package/scripts/inference-economics.js +0 -53
- package/scripts/knowledge-layer-plan.js +0 -108
- package/scripts/memory-store-governance.js +0 -60
- package/scripts/post-training-governance.js +0 -34
- package/scripts/production-agent-readiness.js +0 -40
- package/scripts/scaling-law-claims.js +0 -60
- package/scripts/student-consistent-training.js +0 -73
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Single-Use Credential Gate
|
|
6
|
+
*
|
|
7
|
+
* Converts the Link CLI pattern into local ThumbGate policy: risky agent
|
|
8
|
+
* actions should request narrow, one-time credentials with synchronous
|
|
9
|
+
* approval instead of reusing long-lived secrets.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const crypto = require('node:crypto');
|
|
13
|
+
const path = require('node:path');
|
|
14
|
+
|
|
15
|
+
const DEFAULT_TTL_SECONDS = 300;
|
|
16
|
+
const RISK_PATTERNS = [
|
|
17
|
+
{ tag: 'purchase', pattern: /\b(buy|buys|buying|purchase|purchases|checkout|payment|gumroad|stripe|card)\b/i },
|
|
18
|
+
{ tag: 'credential', pattern: /\b(token|secret|credential|api[_-]?key|oauth|login)\b/i },
|
|
19
|
+
{ tag: 'deploy', pattern: /\b(deploy|production|railway|release)\b/i },
|
|
20
|
+
{ tag: 'external-write', pattern: /\b(post|reply|send|email|upload|publish|create order)\b/i },
|
|
21
|
+
];
|
|
22
|
+
|
|
23
|
+
function planSingleUseCredentialRequest(action = {}, options = {}) {
|
|
24
|
+
const text = buildActionText(action);
|
|
25
|
+
const riskTags = RISK_PATTERNS.filter((item) => item.pattern.test(text)).map((item) => item.tag);
|
|
26
|
+
const highRisk = riskTags.length > 0 || Boolean(action.requiresCredential);
|
|
27
|
+
const scope = normalizeScope(action.scope || inferScope(text));
|
|
28
|
+
const ttlSeconds = clamp(Number(action.ttlSeconds || options.ttlSeconds || DEFAULT_TTL_SECONDS), 30, 900);
|
|
29
|
+
|
|
30
|
+
return {
|
|
31
|
+
required: highRisk,
|
|
32
|
+
riskTags,
|
|
33
|
+
scope,
|
|
34
|
+
ttlSeconds,
|
|
35
|
+
singleUse: true,
|
|
36
|
+
approvalMode: highRisk ? 'synchronous' : 'not-required',
|
|
37
|
+
approvalPrompt: highRisk
|
|
38
|
+
? `Approve one-time credential for ${scope.resource} (${scope.operation})? Expires in ${ttlSeconds}s and cannot be reused.`
|
|
39
|
+
: 'No credential approval required.',
|
|
40
|
+
deniedReasons: buildDeniedReasons(action, scope),
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function mintCredentialGrant(request = {}, approval = {}) {
|
|
45
|
+
const approved = Boolean(approval.approved);
|
|
46
|
+
return {
|
|
47
|
+
grantId: `cred_${Date.now()}_${crypto.randomBytes(4).toString('hex')}`,
|
|
48
|
+
approved,
|
|
49
|
+
singleUse: request.singleUse !== false,
|
|
50
|
+
scope: normalizeScope(request.scope),
|
|
51
|
+
issuedAt: new Date().toISOString(),
|
|
52
|
+
expiresAt: new Date(Date.now() + (Number(request.ttlSeconds || DEFAULT_TTL_SECONDS) * 1000)).toISOString(),
|
|
53
|
+
approvedBy: approval.approvedBy || null,
|
|
54
|
+
approvalEvidence: approval.evidence || null,
|
|
55
|
+
usedAt: null,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function evaluateCredentialUse(grant = {}, action = {}, now = new Date()) {
|
|
60
|
+
const reasons = [];
|
|
61
|
+
if (!grant.approved) reasons.push('credential_not_approved');
|
|
62
|
+
if (!grant.singleUse) reasons.push('credential_not_single_use');
|
|
63
|
+
if (grant.usedAt) reasons.push('credential_already_used');
|
|
64
|
+
if (grant.expiresAt && new Date(grant.expiresAt).getTime() < now.getTime()) reasons.push('credential_expired');
|
|
65
|
+
|
|
66
|
+
const actionScope = normalizeScope(action.scope || inferScope(buildActionText(action)));
|
|
67
|
+
const grantScope = normalizeScope(grant.scope);
|
|
68
|
+
if (!scopeAllows(grantScope, actionScope)) reasons.push('credential_scope_mismatch');
|
|
69
|
+
|
|
70
|
+
return {
|
|
71
|
+
allowed: reasons.length === 0,
|
|
72
|
+
reasons,
|
|
73
|
+
grantId: grant.grantId || null,
|
|
74
|
+
requiredScope: actionScope,
|
|
75
|
+
grantedScope: grantScope,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function markCredentialUsed(grant = {}, now = new Date()) {
|
|
80
|
+
return {
|
|
81
|
+
...grant,
|
|
82
|
+
usedAt: now.toISOString(),
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function buildActionText(action = {}) {
|
|
87
|
+
return [
|
|
88
|
+
action.command,
|
|
89
|
+
action.intent,
|
|
90
|
+
action.description,
|
|
91
|
+
action.url,
|
|
92
|
+
...(action.tags || []),
|
|
93
|
+
].filter(Boolean).join(' ');
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function inferScope(text = '') {
|
|
97
|
+
if (/\b(stripe|checkout|payment|card)\b/i.test(text)) return { resource: 'payments', operation: 'write' };
|
|
98
|
+
if (/\b(gumroad|buy|buys|buying|purchase|purchases)\b/i.test(text)) return { resource: 'purchase', operation: 'create' };
|
|
99
|
+
if (/\b(deploy|railway|production)\b/i.test(text)) return { resource: 'deployment', operation: 'write' };
|
|
100
|
+
if (/\b(post|reply|email|send|publish)\b/i.test(text)) return { resource: 'external-message', operation: 'send' };
|
|
101
|
+
return { resource: 'local', operation: 'read' };
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function normalizeScope(scope = {}) {
|
|
105
|
+
if (typeof scope === 'string') {
|
|
106
|
+
const [resource, operation = 'use'] = scope.split(':');
|
|
107
|
+
return { resource: resource || 'local', operation };
|
|
108
|
+
}
|
|
109
|
+
return {
|
|
110
|
+
resource: String(scope.resource || 'local'),
|
|
111
|
+
operation: String(scope.operation || 'read'),
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function scopeAllows(granted, required) {
|
|
116
|
+
if (granted.resource === '*') return true;
|
|
117
|
+
if (granted.resource !== required.resource) return false;
|
|
118
|
+
return granted.operation === '*' || granted.operation === required.operation;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function buildDeniedReasons(action, scope) {
|
|
122
|
+
const reasons = [];
|
|
123
|
+
if (action.persistent === true) reasons.push('persistent_credentials_not_allowed');
|
|
124
|
+
if (scope.resource === '*' || scope.operation === '*') reasons.push('credential_scope_too_broad');
|
|
125
|
+
return reasons;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function clamp(value, min, max) {
|
|
129
|
+
if (!Number.isFinite(value)) return min;
|
|
130
|
+
return Math.min(max, Math.max(min, value));
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function formatCredentialPlan(plan = {}) {
|
|
134
|
+
return [
|
|
135
|
+
'# Single-Use Credential Plan',
|
|
136
|
+
'',
|
|
137
|
+
`Required: ${plan.required ? 'yes' : 'no'}`,
|
|
138
|
+
`Approval mode: ${plan.approvalMode}`,
|
|
139
|
+
`Scope: ${plan.scope?.resource}:${plan.scope?.operation}`,
|
|
140
|
+
`TTL seconds: ${plan.ttlSeconds}`,
|
|
141
|
+
`Denied reasons: ${(plan.deniedReasons || []).join(', ') || 'none'}`,
|
|
142
|
+
'',
|
|
143
|
+
plan.approvalPrompt || '',
|
|
144
|
+
'',
|
|
145
|
+
].join('\n');
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function parseArgs(argv = process.argv.slice(2)) {
|
|
149
|
+
const args = { command: argv[0] || 'plan', intent: '' };
|
|
150
|
+
for (const arg of argv.slice(1)) {
|
|
151
|
+
if (arg.startsWith('--intent=')) args.intent = arg.slice('--intent='.length);
|
|
152
|
+
if (arg.startsWith('--action=')) args.intent = arg.slice('--action='.length);
|
|
153
|
+
if (arg.startsWith('--description=')) args.description = arg.slice('--description='.length);
|
|
154
|
+
if (arg.startsWith('--scope=')) args.scope = arg.slice('--scope='.length);
|
|
155
|
+
}
|
|
156
|
+
return args;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
function isCliInvocation(argv = process.argv) {
|
|
160
|
+
return Boolean(argv[1] && path.resolve(argv[1]) === __filename);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if (isCliInvocation()) {
|
|
164
|
+
const args = parseArgs();
|
|
165
|
+
const plan = planSingleUseCredentialRequest(args);
|
|
166
|
+
if (args.command === 'json') {
|
|
167
|
+
console.log(JSON.stringify(plan, null, 2));
|
|
168
|
+
} else if (args.command === 'plan') {
|
|
169
|
+
console.log(formatCredentialPlan(plan));
|
|
170
|
+
} else {
|
|
171
|
+
console.error(`Unknown command: ${args.command}. Use: plan, json`);
|
|
172
|
+
process.exit(1);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
module.exports = {
|
|
177
|
+
evaluateCredentialUse,
|
|
178
|
+
formatCredentialPlan,
|
|
179
|
+
markCredentialUsed,
|
|
180
|
+
mintCredentialGrant,
|
|
181
|
+
planSingleUseCredentialRequest,
|
|
182
|
+
};
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Structured Prompt-Driven Development (SPDD) Gate
|
|
6
|
+
*
|
|
7
|
+
* Makes code-generation prompts governable artifacts by requiring a compact
|
|
8
|
+
* REASONS canvas before risky implementation work proceeds.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
const path = require('node:path');
|
|
12
|
+
|
|
13
|
+
const FIELD_DEFINITIONS = [
|
|
14
|
+
['requirements', 'Problem, business value, scope, and definition of done.'],
|
|
15
|
+
['entities', 'Domain nouns, relationships, and data contracts.'],
|
|
16
|
+
['approach', 'Strategy for satisfying the requirements.'],
|
|
17
|
+
['structure', 'Files, modules, dependencies, and integration boundaries.'],
|
|
18
|
+
['operations', 'Concrete, testable implementation steps.'],
|
|
19
|
+
['norms', 'Reusable engineering standards and team conventions.'],
|
|
20
|
+
['safeguards', 'Non-negotiable constraints, risks, and verification gates.'],
|
|
21
|
+
];
|
|
22
|
+
|
|
23
|
+
const FIELD_KEYS = FIELD_DEFINITIONS.map(([key]) => key);
|
|
24
|
+
|
|
25
|
+
function buildReasonsCanvas(input = {}) {
|
|
26
|
+
const source = typeof input === 'string' ? { request: input } : input;
|
|
27
|
+
const request = String(source.request || source.story || source.task || '').trim();
|
|
28
|
+
const canvas = {};
|
|
29
|
+
|
|
30
|
+
for (const key of FIELD_KEYS) {
|
|
31
|
+
canvas[key] = normalizeList(source[key]);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
if (request && canvas.requirements.length === 0) {
|
|
35
|
+
canvas.requirements.push(request);
|
|
36
|
+
}
|
|
37
|
+
if (source.acceptanceCriteria) {
|
|
38
|
+
canvas.requirements.push(...normalizeList(source.acceptanceCriteria));
|
|
39
|
+
}
|
|
40
|
+
if (source.files || source.changedFiles) {
|
|
41
|
+
canvas.structure.push(...normalizeList(source.files || source.changedFiles));
|
|
42
|
+
}
|
|
43
|
+
if (source.tests || source.verification) {
|
|
44
|
+
canvas.safeguards.push(...normalizeList(source.tests || source.verification).map((item) => `Verification: ${item}`));
|
|
45
|
+
}
|
|
46
|
+
if (canvas.norms.length === 0) {
|
|
47
|
+
canvas.norms.push('Keep prompt, code, and tests synchronized in version control.');
|
|
48
|
+
}
|
|
49
|
+
if (canvas.safeguards.length === 0) {
|
|
50
|
+
canvas.safeguards.push('Do not claim completion without passing verification evidence.');
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
return {
|
|
54
|
+
title: source.title || inferTitle(request),
|
|
55
|
+
canvas,
|
|
56
|
+
source: {
|
|
57
|
+
request,
|
|
58
|
+
artifactPath: source.artifactPath || 'docs/prompts/<feature>.reasons.md',
|
|
59
|
+
},
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function evaluateReasonsCanvas(document = {}, options = {}) {
|
|
64
|
+
const canvas = document.canvas || document;
|
|
65
|
+
const gates = [];
|
|
66
|
+
const missing = FIELD_KEYS.filter((key) => normalizeList(canvas[key]).length === 0);
|
|
67
|
+
|
|
68
|
+
for (const key of missing) {
|
|
69
|
+
gates.push({
|
|
70
|
+
id: `missing-${key}`,
|
|
71
|
+
severity: key === 'requirements' || key === 'safeguards' ? 'block' : 'warn',
|
|
72
|
+
reason: `${labelFor(key)} is empty; the agent lacks a governed ${key} boundary.`,
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const operations = normalizeList(canvas.operations);
|
|
77
|
+
if (operations.length > 0 && !operations.some(isTestableOperation)) {
|
|
78
|
+
gates.push({
|
|
79
|
+
id: 'operations-not-testable',
|
|
80
|
+
severity: 'block',
|
|
81
|
+
reason: 'At least one operation must be concrete and testable before code generation.',
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const safeguards = normalizeList(canvas.safeguards).join('\n');
|
|
86
|
+
if (!/\b(tests?|verify|verification|evidence|gate|security|privacy|rollback|performance)\b/i.test(safeguards)) {
|
|
87
|
+
gates.push({
|
|
88
|
+
id: 'safeguards-without-verification',
|
|
89
|
+
severity: 'block',
|
|
90
|
+
reason: 'Safeguards must name verification, evidence, or non-negotiable risk controls.',
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const changedFiles = normalizeList(options.changedFiles);
|
|
95
|
+
const structure = normalizeList(canvas.structure).join('\n');
|
|
96
|
+
if (changedFiles.length > 0 && !changedFiles.some((file) => structure.includes(file) || structure.includes(path.basename(file)))) {
|
|
97
|
+
gates.push({
|
|
98
|
+
id: 'code-prompt-drift',
|
|
99
|
+
severity: 'warn',
|
|
100
|
+
reason: 'Changed files are not represented in the prompt structure; sync the canvas before review.',
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const hardBlocks = gates.filter((gate) => gate.severity === 'block');
|
|
105
|
+
const warnings = gates.filter((gate) => gate.severity === 'warn');
|
|
106
|
+
return {
|
|
107
|
+
allowed: hardBlocks.length === 0,
|
|
108
|
+
score: Math.max(0, 100 - (hardBlocks.length * 30) - (warnings.length * 10)),
|
|
109
|
+
gates,
|
|
110
|
+
missing,
|
|
111
|
+
recommendation: hardBlocks.length
|
|
112
|
+
? 'Fix the structured prompt before generating or merging code.'
|
|
113
|
+
: warnings.length
|
|
114
|
+
? 'Proceed only after syncing prompt drift and documenting review evidence.'
|
|
115
|
+
: 'Structured prompt is ready for code generation and review.',
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function buildPromptSyncPlan(document = {}, changes = {}) {
|
|
120
|
+
const evaluation = evaluateReasonsCanvas(document, changes);
|
|
121
|
+
const changedFiles = normalizeList(changes.changedFiles);
|
|
122
|
+
const verification = normalizeList(changes.verification || changes.tests);
|
|
123
|
+
return {
|
|
124
|
+
promptFirst: evaluation.allowed,
|
|
125
|
+
artifactPath: document.source?.artifactPath || changes.artifactPath || 'docs/prompts/<feature>.reasons.md',
|
|
126
|
+
requiredUpdates: [
|
|
127
|
+
...(evaluation.gates || []).map((gate) => gate.id),
|
|
128
|
+
...(changedFiles.length ? ['sync-structure-with-changed-files'] : []),
|
|
129
|
+
...(verification.length ? ['attach-verification-evidence'] : ['add-verification-evidence']),
|
|
130
|
+
],
|
|
131
|
+
reviewChecklist: [
|
|
132
|
+
'Review intent and scope before reviewing code diff.',
|
|
133
|
+
'Confirm operations map to focused tests.',
|
|
134
|
+
'Update the canvas when implementation reality diverges.',
|
|
135
|
+
'Store prompt artifact beside the feature or PR evidence.',
|
|
136
|
+
],
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function formatReasonsCanvas(document = {}, evaluation = evaluateReasonsCanvas(document)) {
|
|
141
|
+
const canvas = document.canvas || document;
|
|
142
|
+
return [
|
|
143
|
+
`# ${document.title || 'Structured Prompt Canvas'}`,
|
|
144
|
+
'',
|
|
145
|
+
`Artifact: ${document.source?.artifactPath || 'docs/prompts/<feature>.reasons.md'}`,
|
|
146
|
+
`Readiness: ${evaluation.allowed ? 'ready' : 'blocked'} (${evaluation.score}/100)`,
|
|
147
|
+
'',
|
|
148
|
+
...FIELD_DEFINITIONS.flatMap(([key, description]) => [
|
|
149
|
+
`## ${labelFor(key)}`,
|
|
150
|
+
'',
|
|
151
|
+
`_${description}_`,
|
|
152
|
+
'',
|
|
153
|
+
...renderList(normalizeList(canvas[key])),
|
|
154
|
+
'',
|
|
155
|
+
]),
|
|
156
|
+
'## Gates',
|
|
157
|
+
'',
|
|
158
|
+
...(evaluation.gates.length ? evaluation.gates.map((gate) => `- ${gate.severity}: ${gate.id} — ${gate.reason}`) : ['- pass: canvas-ready — Structured prompt is complete enough to govern generation.']),
|
|
159
|
+
'',
|
|
160
|
+
`Recommendation: ${evaluation.recommendation}`,
|
|
161
|
+
'',
|
|
162
|
+
].join('\n');
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
function normalizeList(value) {
|
|
166
|
+
if (!value) return [];
|
|
167
|
+
if (Array.isArray(value)) return value.map((item) => String(item).trim()).filter(Boolean);
|
|
168
|
+
return String(value)
|
|
169
|
+
.split(/\n|;/)
|
|
170
|
+
.map((item) => item.replace(/^[-*]\s*/, '').trim())
|
|
171
|
+
.filter(Boolean);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function renderList(items) {
|
|
175
|
+
return items.length ? items.map((item) => `- ${item}`) : ['- <missing>'];
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function labelFor(key) {
|
|
179
|
+
return key.charAt(0).toUpperCase() + key.slice(1);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
function isTestableOperation(operation) {
|
|
183
|
+
return /\b(add|update|remove|implement|verify|test|run|assert|block|allow|return|emit)\b/i.test(operation);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function inferTitle(request) {
|
|
187
|
+
if (!request) return 'Structured Prompt Canvas';
|
|
188
|
+
return request.length > 70 ? `${request.slice(0, 67)}...` : request;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function parseArgs(argv = process.argv.slice(2)) {
|
|
192
|
+
const args = { command: argv[0] || 'canvas', request: '' };
|
|
193
|
+
for (const arg of argv.slice(1)) {
|
|
194
|
+
if (arg.startsWith('--request=')) args.request = arg.slice('--request='.length);
|
|
195
|
+
if (arg.startsWith('--file=')) args.files = [...(args.files || []), arg.slice('--file='.length)];
|
|
196
|
+
if (arg.startsWith('--test=')) args.tests = [...(args.tests || []), arg.slice('--test='.length)];
|
|
197
|
+
if (arg.startsWith('--operation=')) args.operations = [...(args.operations || []), arg.slice('--operation='.length)];
|
|
198
|
+
if (arg.startsWith('--safeguard=')) args.safeguards = [...(args.safeguards || []), arg.slice('--safeguard='.length)];
|
|
199
|
+
}
|
|
200
|
+
return args;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
function isCliInvocation(argv = process.argv) {
|
|
204
|
+
return Boolean(argv[1] && path.resolve(argv[1]) === __filename);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
if (isCliInvocation()) {
|
|
208
|
+
const args = parseArgs();
|
|
209
|
+
const document = buildReasonsCanvas(args);
|
|
210
|
+
const evaluation = evaluateReasonsCanvas(document, { changedFiles: args.files });
|
|
211
|
+
if (args.command === 'json') {
|
|
212
|
+
console.log(JSON.stringify({ document, evaluation, syncPlan: buildPromptSyncPlan(document, { changedFiles: args.files, tests: args.tests }) }, null, 2));
|
|
213
|
+
} else if (args.command === 'canvas') {
|
|
214
|
+
console.log(formatReasonsCanvas(document, evaluation));
|
|
215
|
+
} else {
|
|
216
|
+
console.error(`Unknown command: ${args.command}. Use: canvas, json`);
|
|
217
|
+
process.exit(1);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
module.exports = {
|
|
222
|
+
buildPromptSyncPlan,
|
|
223
|
+
buildReasonsCanvas,
|
|
224
|
+
evaluateReasonsCanvas,
|
|
225
|
+
formatReasonsCanvas,
|
|
226
|
+
};
|
|
@@ -344,9 +344,31 @@ function appendTelemetryEvent(feedbackDir, payload = {}, headers = {}) {
|
|
|
344
344
|
return entry;
|
|
345
345
|
}
|
|
346
346
|
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
347
|
+
const DEFAULT_BOUNDED_TELEMETRY_TAIL_BYTES = 8 * 1024 * 1024;
|
|
348
|
+
|
|
349
|
+
function readTelemetryText(filePath, options = {}) {
|
|
350
|
+
if (!fs.existsSync(filePath)) return '';
|
|
351
|
+
const maxBytes = Number(options.maxBytes || 0);
|
|
352
|
+
if (maxBytes > 0) {
|
|
353
|
+
const stats = fs.statSync(filePath);
|
|
354
|
+
if (stats.size > maxBytes) {
|
|
355
|
+
const fd = fs.openSync(filePath, 'r');
|
|
356
|
+
try {
|
|
357
|
+
const buffer = Buffer.alloc(maxBytes);
|
|
358
|
+
fs.readSync(fd, buffer, 0, maxBytes, stats.size - maxBytes);
|
|
359
|
+
const text = buffer.toString('utf-8');
|
|
360
|
+
const firstNewline = text.indexOf('\n');
|
|
361
|
+
return firstNewline >= 0 ? text.slice(firstNewline + 1) : text;
|
|
362
|
+
} finally {
|
|
363
|
+
fs.closeSync(fd);
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
return fs.readFileSync(filePath, 'utf-8');
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
function loadTelemetryEventsFromPath(filePath, options = {}) {
|
|
371
|
+
const raw = readTelemetryText(filePath, options).trim();
|
|
350
372
|
if (!raw) return [];
|
|
351
373
|
return raw
|
|
352
374
|
.split('\n')
|
|
@@ -365,13 +387,13 @@ function loadTelemetryEventsFromPath(filePath) {
|
|
|
365
387
|
.filter(Boolean);
|
|
366
388
|
}
|
|
367
389
|
|
|
368
|
-
function loadTelemetryEvents(feedbackDir) {
|
|
390
|
+
function loadTelemetryEvents(feedbackDir, options = {}) {
|
|
369
391
|
const diagnostics = getTelemetrySourceDiagnostics(feedbackDir);
|
|
370
392
|
const merged = [];
|
|
371
393
|
const seen = new Set();
|
|
372
394
|
|
|
373
395
|
for (const filePath of diagnostics.activePaths) {
|
|
374
|
-
const rows = loadTelemetryEventsFromPath(filePath);
|
|
396
|
+
const rows = loadTelemetryEventsFromPath(filePath, options);
|
|
375
397
|
for (const row of rows) {
|
|
376
398
|
const key = JSON.stringify(row);
|
|
377
399
|
if (seen.has(key)) continue;
|
|
@@ -406,8 +428,11 @@ function summarizeRecentEvents(events) {
|
|
|
406
428
|
|
|
407
429
|
function getTelemetrySummary(feedbackDir, options = {}) {
|
|
408
430
|
const analyticsWindow = resolveAnalyticsWindow(options);
|
|
431
|
+
const telemetryLoadOptions = analyticsWindow.bounded
|
|
432
|
+
? { maxBytes: Number(options.telemetryTailBytes || DEFAULT_BOUNDED_TELEMETRY_TAIL_BYTES) }
|
|
433
|
+
: {};
|
|
409
434
|
const events = filterEntriesForWindow(
|
|
410
|
-
loadTelemetryEvents(feedbackDir),
|
|
435
|
+
loadTelemetryEvents(feedbackDir, telemetryLoadOptions),
|
|
411
436
|
analyticsWindow,
|
|
412
437
|
(entry) => entry && (entry.receivedAt || entry.timestamp)
|
|
413
438
|
);
|
package/scripts/tool-registry.js
CHANGED
|
@@ -192,6 +192,98 @@ const TOOLS = [
|
|
|
192
192
|
},
|
|
193
193
|
},
|
|
194
194
|
}),
|
|
195
|
+
readOnlyTool({
|
|
196
|
+
name: 'plan_agent_design_governance',
|
|
197
|
+
description: 'Evaluate an agent workflow before adding tools, autonomy, or subagents. Recommends single-agent vs manager/decentralized patterns, baseline evals, instruction fixes, and tool safeguards.',
|
|
198
|
+
inputSchema: {
|
|
199
|
+
type: 'object',
|
|
200
|
+
properties: {
|
|
201
|
+
workflow: { type: 'string', description: 'Workflow name or short description.' },
|
|
202
|
+
tools: { type: 'array', items: { type: 'string' }, description: 'Tool names available to the agent.' },
|
|
203
|
+
toolCount: { type: 'number', description: 'Total tools when names are not listed.' },
|
|
204
|
+
similarToolCount: { type: 'number', description: 'Number of similar or overlapping tools.' },
|
|
205
|
+
conditionalBranches: { type: 'number', description: 'Rough count of if/then instruction branches.' },
|
|
206
|
+
handoffCount: { type: 'number', description: 'Existing or proposed handoff count.' },
|
|
207
|
+
highRiskTools: { type: 'array', items: { type: 'string' }, description: 'Tools that affect production, money, data, secrets, or outbound actions.' },
|
|
208
|
+
writeTools: { type: 'array', items: { type: 'string' }, description: 'Write-capable tools.' },
|
|
209
|
+
hasBaselineEvals: { type: 'boolean', description: 'Whether baseline agent evals exist.' },
|
|
210
|
+
hasDocs: { type: 'boolean', description: 'Instructions draw on existing workflow docs.' },
|
|
211
|
+
hasExamples: { type: 'boolean', description: 'Instructions include concrete examples.' },
|
|
212
|
+
hasEdgeCases: { type: 'boolean', description: 'Instructions include edge cases and failure paths.' },
|
|
213
|
+
hasToolApprovals: { type: 'boolean', description: 'Risky tool calls require approval.' },
|
|
214
|
+
hasExitCondition: { type: 'boolean', description: 'Instructions define when the run is complete.' },
|
|
215
|
+
reversibleActions: { type: 'boolean', description: 'Risky actions are reversible or have rollback procedures.' },
|
|
216
|
+
},
|
|
217
|
+
},
|
|
218
|
+
}),
|
|
219
|
+
readOnlyTool({
|
|
220
|
+
name: 'plan_proactive_agent_eval_guardrails',
|
|
221
|
+
description: 'Map proactive-assistant eval gaps to PARE-style state-machine, active-user-simulation, goal-inference, intervention-timing, and multi-app orchestration gates.',
|
|
222
|
+
inputSchema: {
|
|
223
|
+
type: 'object',
|
|
224
|
+
properties: {
|
|
225
|
+
workflow: { type: 'string', description: 'Proactive assistant workflow name.' },
|
|
226
|
+
apps: { type: 'array', items: { type: 'string' }, description: 'Apps involved in the proactive workflow.' },
|
|
227
|
+
states: { type: 'array', items: { type: 'string' }, description: 'Modeled app states.' },
|
|
228
|
+
stateCount: { type: 'number', description: 'Number of modeled states.' },
|
|
229
|
+
actionCount: { type: 'number', description: 'Number of state-dependent actions.' },
|
|
230
|
+
taskCount: { type: 'number', description: 'Number of benchmark tasks or scenarios.' },
|
|
231
|
+
hasStateMachine: { type: 'boolean', description: 'Whether apps are modeled as finite state machines.' },
|
|
232
|
+
hasActiveUserSimulation: { type: 'boolean', description: 'Whether active user simulation exists.' },
|
|
233
|
+
hasGoalInferenceEvals: { type: 'boolean', description: 'Whether goal inference is graded.' },
|
|
234
|
+
hasInterventionTimingEvals: { type: 'boolean', description: 'Whether intervention timing is graded.' },
|
|
235
|
+
hasMultiAppEvals: { type: 'boolean', description: 'Whether multi-app orchestration is graded.' },
|
|
236
|
+
flatToolApiOnly: { type: 'boolean', description: 'Current eval only covers flat tool calls.' },
|
|
237
|
+
proactiveWrites: { type: 'boolean', description: 'Proactive agent can write or mutate state.' },
|
|
238
|
+
userVisibleActions: { type: 'boolean', description: 'Interventions can notify, schedule, send, or affect users.' },
|
|
239
|
+
},
|
|
240
|
+
},
|
|
241
|
+
}),
|
|
242
|
+
readOnlyTool({
|
|
243
|
+
name: 'plan_reward_hacking_guardrails',
|
|
244
|
+
description: 'Detect reward-hacking patterns such as unsupported completion claims, sycophancy, verbosity-as-proof, benchmark overfitting, evaluator manipulation, and proxy-only metrics.',
|
|
245
|
+
inputSchema: {
|
|
246
|
+
type: 'object',
|
|
247
|
+
properties: {
|
|
248
|
+
workflow: { type: 'string', description: 'Agent workflow or release lane being evaluated.' },
|
|
249
|
+
text: { type: 'string', description: 'Candidate response, claim, summary, or verifier output to inspect.' },
|
|
250
|
+
evidence: { type: 'array', items: { type: 'string' }, description: 'Evidence artifacts attached to the claim.' },
|
|
251
|
+
metrics: { type: 'array', items: { type: 'string' }, description: 'Proxy metrics or reward scores used by the workflow.' },
|
|
252
|
+
hasHoldout: { type: 'boolean', description: 'Whether holdout, regression, or real-workflow evidence exists.' },
|
|
253
|
+
hasHumanObjective: { type: 'boolean', description: 'Whether proxy metrics are mapped to a user objective.' },
|
|
254
|
+
hasVerifierTrace: { type: 'boolean', description: 'Whether verifier trace, run log, or proof artifact exists.' },
|
|
255
|
+
optimizedForScore: { type: 'boolean', description: 'Whether an eval, benchmark, or reward score is being optimized.' },
|
|
256
|
+
multimodal: { type: 'boolean', description: 'Whether claims depend on screenshots, PDFs, charts, images, or video.' },
|
|
257
|
+
},
|
|
258
|
+
},
|
|
259
|
+
}),
|
|
260
|
+
readOnlyTool({
|
|
261
|
+
name: 'plan_oss_pr_opportunity_scout',
|
|
262
|
+
description: 'Rank upstream GitHub repositories ThumbGate depends on for proof-backed issue, bounty, and PR opportunities without spammy drive-by contributions.',
|
|
263
|
+
inputSchema: {
|
|
264
|
+
type: 'object',
|
|
265
|
+
properties: {
|
|
266
|
+
packagePath: { type: 'string', description: 'Path to package.json used to discover dependencies.' },
|
|
267
|
+
dependencies: { type: 'array', items: { type: 'string' }, description: 'Dependency names to scout instead of package.json.' },
|
|
268
|
+
maxRepos: { type: 'number', description: 'Maximum mapped repositories to include.' },
|
|
269
|
+
includeBounties: { type: 'boolean', description: 'Include bug-bounty and security search queries.' },
|
|
270
|
+
},
|
|
271
|
+
},
|
|
272
|
+
}),
|
|
273
|
+
readOnlyTool({
|
|
274
|
+
name: 'plan_chatgpt_ads_readiness',
|
|
275
|
+
description: 'Prepare ThumbGate intent clusters, ad copy, proof links, UTM measurement, and launch gates for ChatGPT Ads Manager tests.',
|
|
276
|
+
inputSchema: {
|
|
277
|
+
type: 'object',
|
|
278
|
+
properties: {
|
|
279
|
+
offer: { type: 'string', description: 'Offer to advertise, such as Pro or Workflow Hardening Sprint.' },
|
|
280
|
+
audience: { type: 'string', description: 'Audience segment to target.' },
|
|
281
|
+
budget: { type: 'number', description: 'Initial test budget.' },
|
|
282
|
+
keywords: { type: 'array', items: { type: 'string' }, description: 'High-intent conversational queries.' },
|
|
283
|
+
proofLinks: { type: 'array', items: { type: 'string' }, description: 'Proof URLs required by ad claims.' },
|
|
284
|
+
},
|
|
285
|
+
},
|
|
286
|
+
}),
|
|
195
287
|
destructiveTool({
|
|
196
288
|
name: 'import_document',
|
|
197
289
|
description: 'Import a local policy or runbook document into ThumbGate, normalize it for search, and propose provenance-backed gate candidates.',
|