tryassay 0.6.0 → 0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/pricing-enforcer.d.ts +45 -0
- package/dist/api/pricing-enforcer.js +144 -0
- package/dist/api/pricing-enforcer.js.map +1 -0
- package/dist/api/server.d.ts +28 -0
- package/dist/api/server.js +265 -0
- package/dist/api/server.js.map +1 -0
- package/dist/api/team-session.d.ts +59 -0
- package/dist/api/team-session.js +240 -0
- package/dist/api/team-session.js.map +1 -0
- package/dist/cli.js +123 -2
- package/dist/cli.js.map +1 -1
- package/dist/commands/api.d.ts +4 -0
- package/dist/commands/api.js +50 -0
- package/dist/commands/api.js.map +1 -0
- package/dist/commands/runtime.d.ts +61 -0
- package/dist/commands/runtime.js +554 -0
- package/dist/commands/runtime.js.map +1 -1
- package/dist/runtime/agent-spawner.d.ts +56 -0
- package/dist/runtime/agent-spawner.js +217 -0
- package/dist/runtime/agent-spawner.js.map +1 -0
- package/dist/runtime/agents/coordinator-agent.d.ts +20 -0
- package/dist/runtime/agents/coordinator-agent.js +182 -0
- package/dist/runtime/agents/coordinator-agent.js.map +1 -0
- package/dist/runtime/agents/ops-agent.d.ts +11 -0
- package/dist/runtime/agents/ops-agent.js +113 -0
- package/dist/runtime/agents/ops-agent.js.map +1 -0
- package/dist/runtime/agents/research-agent.d.ts +11 -0
- package/dist/runtime/agents/research-agent.js +114 -0
- package/dist/runtime/agents/research-agent.js.map +1 -0
- package/dist/runtime/agents/test-agent.d.ts +11 -0
- package/dist/runtime/agents/test-agent.js +114 -0
- package/dist/runtime/agents/test-agent.js.map +1 -0
- package/dist/runtime/audit-log.js +2 -2
- package/dist/runtime/audit-log.js.map +1 -1
- package/dist/runtime/capability-registry.d.ts +62 -0
- package/dist/runtime/capability-registry.js +191 -0
- package/dist/runtime/capability-registry.js.map +1 -0
- package/dist/runtime/collusion-detector.d.ts +35 -0
- package/dist/runtime/collusion-detector.js +97 -0
- package/dist/runtime/collusion-detector.js.map +1 -0
- package/dist/runtime/control-server.js +8 -4
- package/dist/runtime/control-server.js.map +1 -1
- package/dist/runtime/domain-coverage-analyzer.d.ts +24 -0
- package/dist/runtime/domain-coverage-analyzer.js +178 -0
- package/dist/runtime/domain-coverage-analyzer.js.map +1 -0
- package/dist/runtime/executor.js +27 -12
- package/dist/runtime/executor.js.map +1 -1
- package/dist/runtime/human-escalation.d.ts +41 -0
- package/dist/runtime/human-escalation.js +122 -0
- package/dist/runtime/human-escalation.js.map +1 -0
- package/dist/runtime/kill-switch.d.ts +51 -0
- package/dist/runtime/kill-switch.js +185 -0
- package/dist/runtime/kill-switch.js.map +1 -0
- package/dist/runtime/layer2-guardian.d.ts +81 -0
- package/dist/runtime/layer2-guardian.js +263 -0
- package/dist/runtime/layer2-guardian.js.map +1 -0
- package/dist/runtime/multi-agent-loop.d.ts +37 -0
- package/dist/runtime/multi-agent-loop.js +411 -0
- package/dist/runtime/multi-agent-loop.js.map +1 -0
- package/dist/runtime/prompt-safety-analyzer.d.ts +17 -0
- package/dist/runtime/prompt-safety-analyzer.js +230 -0
- package/dist/runtime/prompt-safety-analyzer.js.map +1 -0
- package/dist/runtime/rollback-manager.d.ts +50 -0
- package/dist/runtime/rollback-manager.js +157 -0
- package/dist/runtime/rollback-manager.js.map +1 -0
- package/dist/runtime/rule-canary-deployer.d.ts +69 -0
- package/dist/runtime/rule-canary-deployer.js +289 -0
- package/dist/runtime/rule-canary-deployer.js.map +1 -0
- package/dist/runtime/rule-conflict-detector.d.ts +48 -0
- package/dist/runtime/rule-conflict-detector.js +214 -0
- package/dist/runtime/rule-conflict-detector.js.map +1 -0
- package/dist/runtime/rule-meta-verifier.d.ts +18 -0
- package/dist/runtime/rule-meta-verifier.js +275 -0
- package/dist/runtime/rule-meta-verifier.js.map +1 -0
- package/dist/runtime/rule-proposal-manager.d.ts +95 -0
- package/dist/runtime/rule-proposal-manager.js +190 -0
- package/dist/runtime/rule-proposal-manager.js.map +1 -0
- package/dist/runtime/safety-enforcer.d.ts +35 -0
- package/dist/runtime/safety-enforcer.js +165 -0
- package/dist/runtime/safety-enforcer.js.map +1 -0
- package/dist/runtime/safety-status.d.ts +48 -0
- package/dist/runtime/safety-status.js +119 -0
- package/dist/runtime/safety-status.js.map +1 -0
- package/dist/runtime/shared-memory.d.ts +47 -0
- package/dist/runtime/shared-memory.js +151 -0
- package/dist/runtime/shared-memory.js.map +1 -0
- package/dist/runtime/specialized-agent.d.ts +5 -0
- package/dist/runtime/specialized-agent.js +37 -0
- package/dist/runtime/specialized-agent.js.map +1 -1
- package/dist/runtime/stall-detector.d.ts +13 -0
- package/dist/runtime/stall-detector.js +121 -0
- package/dist/runtime/stall-detector.js.map +1 -0
- package/dist/runtime/tool-approval.d.ts +51 -0
- package/dist/runtime/tool-approval.js +148 -0
- package/dist/runtime/tool-approval.js.map +1 -0
- package/dist/runtime/tool-sandbox.d.ts +43 -0
- package/dist/runtime/tool-sandbox.js +394 -0
- package/dist/runtime/tool-sandbox.js.map +1 -0
- package/dist/runtime/tool-verifier.d.ts +18 -0
- package/dist/runtime/tool-verifier.js +323 -0
- package/dist/runtime/tool-verifier.js.map +1 -0
- package/dist/runtime/trust-manager.d.ts +33 -3
- package/dist/runtime/trust-manager.js +128 -26
- package/dist/runtime/trust-manager.js.map +1 -1
- package/dist/runtime/types.d.ts +652 -0
- package/dist/runtime/verification-intensity.d.ts +34 -0
- package/dist/runtime/verification-intensity.js +104 -0
- package/dist/runtime/verification-intensity.js.map +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
// ============================================================
|
|
2
|
+
// Assay Verified Agent Runtime — Tool Verifier
|
|
3
|
+
// Runs Assay verification pipeline on tool source code.
|
|
4
|
+
// Produces a ToolVerification report with code, security,
|
|
5
|
+
// and sandbox test results.
|
|
6
|
+
// ============================================================
|
|
7
|
+
import { readFile } from 'node:fs/promises';
|
|
8
|
+
import { join } from 'node:path';
|
|
9
|
+
import { createHash, randomUUID } from 'node:crypto';
|
|
10
|
+
import { extractClaims } from '../lib/claim-extractor.js';
|
|
11
|
+
import { verifyClaims } from '../lib/code-verifier.js';
|
|
12
|
+
import { indexCodebase } from '../lib/codebase-indexer.js';
|
|
13
|
+
import { ToolSandbox } from './tool-sandbox.js';
|
|
14
|
+
const SECURITY_PATTERNS = [
|
|
15
|
+
{
|
|
16
|
+
name: 'Dynamic URL construction',
|
|
17
|
+
type: 'ssrf',
|
|
18
|
+
regex: /fetch\s*\(\s*[^'"]/,
|
|
19
|
+
severity: 'high',
|
|
20
|
+
category: 'ssrf',
|
|
21
|
+
recommendation: 'Use allowlisted URLs only. Never construct URLs from user input.',
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
name: 'eval() usage',
|
|
25
|
+
type: 'code_injection',
|
|
26
|
+
regex: /\beval\s*\(/,
|
|
27
|
+
severity: 'critical',
|
|
28
|
+
category: 'injection',
|
|
29
|
+
recommendation: 'Remove eval(). Use JSON.parse() for data, or Function() for controlled execution.',
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
name: 'new Function() usage',
|
|
33
|
+
type: 'code_injection',
|
|
34
|
+
regex: /new\s+Function\s*\(/,
|
|
35
|
+
severity: 'high',
|
|
36
|
+
category: 'injection',
|
|
37
|
+
recommendation: 'Avoid dynamic code generation. Use static functions.',
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
name: 'child_process spawn/exec',
|
|
41
|
+
type: 'privilege_escalation',
|
|
42
|
+
regex: /(?:exec|execSync|spawn|spawnSync|fork)\s*\(/,
|
|
43
|
+
severity: 'critical',
|
|
44
|
+
category: 'privilege_escalation',
|
|
45
|
+
recommendation: 'Tools must not spawn subprocesses. The sandbox handles execution.',
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
name: 'File system write outside temp',
|
|
49
|
+
type: 'fs_escape',
|
|
50
|
+
regex: /writeFile(?:Sync)?\s*\(\s*(?!['"]\/tmp)/,
|
|
51
|
+
severity: 'high',
|
|
52
|
+
category: 'data_exfiltration',
|
|
53
|
+
recommendation: 'Write only to /tmp or declared write paths.',
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
name: 'Environment variable access',
|
|
57
|
+
type: 'info_leak',
|
|
58
|
+
regex: /process\.env\[/,
|
|
59
|
+
severity: 'medium',
|
|
60
|
+
category: 'data_exfiltration',
|
|
61
|
+
recommendation: 'Tools should not read environment variables. Use declared configuration.',
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
name: 'process.exit call',
|
|
65
|
+
type: 'privilege_escalation',
|
|
66
|
+
regex: /process\.exit\s*\(/,
|
|
67
|
+
severity: 'high',
|
|
68
|
+
category: 'privilege_escalation',
|
|
69
|
+
recommendation: 'Tools must not call process.exit(). Throw an error instead.',
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
name: 'SQL concatenation',
|
|
73
|
+
type: 'sql_injection',
|
|
74
|
+
regex: /['"`]\s*\+\s*.*(?:SELECT|INSERT|UPDATE|DELETE|FROM|WHERE)/i,
|
|
75
|
+
severity: 'critical',
|
|
76
|
+
category: 'injection',
|
|
77
|
+
recommendation: 'Use parameterized queries. Never concatenate SQL.',
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
name: 'require() dynamic import',
|
|
81
|
+
type: 'code_injection',
|
|
82
|
+
regex: /require\s*\(\s*[^'"]/,
|
|
83
|
+
severity: 'high',
|
|
84
|
+
category: 'injection',
|
|
85
|
+
recommendation: 'Use static imports only. No dynamic require() calls.',
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
name: 'Global object modification',
|
|
89
|
+
type: 'privilege_escalation',
|
|
90
|
+
regex: /(?:globalThis|global)\s*\.\s*\w+\s*=/,
|
|
91
|
+
severity: 'high',
|
|
92
|
+
category: 'privilege_escalation',
|
|
93
|
+
recommendation: 'Tools must not modify global objects.',
|
|
94
|
+
},
|
|
95
|
+
];
|
|
96
|
+
// ── Tool Verifier ──────────────────────────────────────────
|
|
97
|
+
export class ToolVerifier {
|
|
98
|
+
sandbox;
|
|
99
|
+
constructor(sandbox) {
|
|
100
|
+
this.sandbox = sandbox ?? new ToolSandbox();
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Run full verification on a tool: code analysis, security scan,
|
|
104
|
+
* and sandbox tests. Returns a ToolVerification report.
|
|
105
|
+
*/
|
|
106
|
+
async verify(tool, toolSourceDir) {
|
|
107
|
+
const verificationId = randomUUID();
|
|
108
|
+
const entryPath = join(toolSourceDir, tool.source.entry_point);
|
|
109
|
+
// Read source code
|
|
110
|
+
let sourceCode;
|
|
111
|
+
try {
|
|
112
|
+
sourceCode = await readFile(entryPath, 'utf-8');
|
|
113
|
+
}
|
|
114
|
+
catch (err) {
|
|
115
|
+
return this.makeRejection(tool.id, verificationId, `Cannot read tool source: ${err instanceof Error ? err.message : String(err)}`);
|
|
116
|
+
}
|
|
117
|
+
// Verify source hash
|
|
118
|
+
const actualHash = createHash('sha256').update(sourceCode).digest('hex');
|
|
119
|
+
if (actualHash !== tool.source.source_hash) {
|
|
120
|
+
return this.makeRejection(tool.id, verificationId, `Source hash mismatch. Expected: ${tool.source.source_hash}, Actual: ${actualHash}`);
|
|
121
|
+
}
|
|
122
|
+
// Step 1: Run Assay forward pipeline on tool source
|
|
123
|
+
const codeVerification = await this.verifyCode(sourceCode, toolSourceDir);
|
|
124
|
+
// Step 2: Run security scan
|
|
125
|
+
const securityResult = this.scanSecurity(sourceCode, entryPath);
|
|
126
|
+
// Step 3: Run sandbox tests
|
|
127
|
+
const sandboxTests = await this.sandbox.runTestSuite(tool, toolSourceDir);
|
|
128
|
+
// Determine overall verdict
|
|
129
|
+
const { verdict, blockers, confidence } = this.determineVerdict(codeVerification, securityResult, sandboxTests);
|
|
130
|
+
return {
|
|
131
|
+
tool_id: tool.id,
|
|
132
|
+
verification_id: verificationId,
|
|
133
|
+
timestamp: new Date().toISOString(),
|
|
134
|
+
code_verification: codeVerification,
|
|
135
|
+
security: securityResult,
|
|
136
|
+
sandbox_tests: sandboxTests,
|
|
137
|
+
verdict,
|
|
138
|
+
confidence,
|
|
139
|
+
reasoning: this.generateReasoning(codeVerification, securityResult, sandboxTests, verdict),
|
|
140
|
+
blockers,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
// ── Private: Code Verification ────────────────────────────
|
|
144
|
+
async verifyCode(sourceCode, toolSourceDir) {
|
|
145
|
+
try {
|
|
146
|
+
const { claims: rawClaims } = await extractClaims(sourceCode);
|
|
147
|
+
if (rawClaims.length === 0) {
|
|
148
|
+
return {
|
|
149
|
+
claims_extracted: 0,
|
|
150
|
+
claims_passed: 0,
|
|
151
|
+
claims_failed: 0,
|
|
152
|
+
critical_failures: [],
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
const index = await indexCodebase(toolSourceDir);
|
|
156
|
+
const { verifications } = await verifyClaims(rawClaims, index);
|
|
157
|
+
const passed = verifications.filter(v => v.verdict === 'PASS').length;
|
|
158
|
+
const failed = verifications.filter(v => v.verdict === 'FAIL').length;
|
|
159
|
+
const criticalFailures = verifications
|
|
160
|
+
.filter(v => v.verdict === 'FAIL')
|
|
161
|
+
.map(v => ({
|
|
162
|
+
claim: v.claim || v.claimId,
|
|
163
|
+
verdict: 'FAIL',
|
|
164
|
+
severity: this.inferClaimSeverity(v.claim || v.claimId),
|
|
165
|
+
evidence: v.evidence?.map((e) => e.snippet ?? '').join('; ') ?? '',
|
|
166
|
+
}))
|
|
167
|
+
.filter(f => f.severity === 'critical' || f.severity === 'high');
|
|
168
|
+
return {
|
|
169
|
+
claims_extracted: rawClaims.length,
|
|
170
|
+
claims_passed: passed,
|
|
171
|
+
claims_failed: failed,
|
|
172
|
+
critical_failures: criticalFailures,
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
catch {
|
|
176
|
+
return {
|
|
177
|
+
claims_extracted: 0,
|
|
178
|
+
claims_passed: 0,
|
|
179
|
+
claims_failed: 0,
|
|
180
|
+
critical_failures: [],
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
// ── Private: Security Scan ────────────────────────────────
|
|
185
|
+
scanSecurity(sourceCode, filePath) {
|
|
186
|
+
const findings = [];
|
|
187
|
+
for (const pattern of SECURITY_PATTERNS) {
|
|
188
|
+
const matches = sourceCode.match(pattern.regex);
|
|
189
|
+
if (matches) {
|
|
190
|
+
// Find line number
|
|
191
|
+
const lines = sourceCode.split('\n');
|
|
192
|
+
let lineNum = 1;
|
|
193
|
+
for (let i = 0; i < lines.length; i++) {
|
|
194
|
+
if (pattern.regex.test(lines[i])) {
|
|
195
|
+
lineNum = i + 1;
|
|
196
|
+
break;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
findings.push({
|
|
200
|
+
type: pattern.type,
|
|
201
|
+
severity: pattern.severity,
|
|
202
|
+
location: `${filePath}:${lineNum}`,
|
|
203
|
+
description: pattern.name,
|
|
204
|
+
recommendation: pattern.recommendation,
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
return {
|
|
209
|
+
ssrf_risk: this.maxRisk(findings.filter(f => f.type === 'ssrf')),
|
|
210
|
+
injection_risk: this.maxRisk(findings.filter(f => f.type === 'code_injection' || f.type === 'sql_injection')),
|
|
211
|
+
privilege_escalation_risk: this.maxRisk(findings.filter(f => f.type === 'privilege_escalation')),
|
|
212
|
+
data_exfiltration_risk: this.maxRisk(findings.filter(f => f.type === 'fs_escape' || f.type === 'info_leak')),
|
|
213
|
+
findings,
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
// ── Private: Verdict Determination ────────────────────────
|
|
217
|
+
determineVerdict(code, security, sandbox) {
|
|
218
|
+
const blockers = [];
|
|
219
|
+
// Critical security findings = immediate reject
|
|
220
|
+
if (security.findings.some(f => f.severity === 'critical')) {
|
|
221
|
+
blockers.push('Critical security finding(s) detected');
|
|
222
|
+
}
|
|
223
|
+
if (security.privilege_escalation_risk === 'critical' || security.privilege_escalation_risk === 'high') {
|
|
224
|
+
blockers.push('High privilege escalation risk');
|
|
225
|
+
}
|
|
226
|
+
if (security.injection_risk === 'critical') {
|
|
227
|
+
blockers.push('Critical injection risk');
|
|
228
|
+
}
|
|
229
|
+
// Critical code verification failures = reject
|
|
230
|
+
if (code.critical_failures.length > 0) {
|
|
231
|
+
blockers.push(`${code.critical_failures.length} critical code verification failure(s)`);
|
|
232
|
+
}
|
|
233
|
+
// Sandbox test failures
|
|
234
|
+
if (sandbox.happy_path.status === 'fail' || sandbox.happy_path.status === 'error') {
|
|
235
|
+
blockers.push('Happy path test failed');
|
|
236
|
+
}
|
|
237
|
+
if (sandbox.constraint_compliance.status === 'fail') {
|
|
238
|
+
blockers.push('Constraint compliance test failed');
|
|
239
|
+
}
|
|
240
|
+
if (blockers.length > 0) {
|
|
241
|
+
return { verdict: 'reject', blockers, confidence: 0.9 };
|
|
242
|
+
}
|
|
243
|
+
// Medium/high findings = needs review
|
|
244
|
+
const needsReview = security.findings.some(f => f.severity === 'high') ||
|
|
245
|
+
sandbox.malformed_input.status === 'error' ||
|
|
246
|
+
code.claims_failed > code.claims_passed * 0.1;
|
|
247
|
+
if (needsReview) {
|
|
248
|
+
return { verdict: 'needs_review', blockers: [], confidence: 0.6 };
|
|
249
|
+
}
|
|
250
|
+
// Calculate confidence from code verification pass rate
|
|
251
|
+
const passRate = code.claims_extracted > 0
|
|
252
|
+
? code.claims_passed / code.claims_extracted
|
|
253
|
+
: 0.5;
|
|
254
|
+
const sandboxScore = [sandbox.happy_path, sandbox.malformed_input, sandbox.constraint_compliance]
|
|
255
|
+
.filter(t => t.status === 'pass').length / 3;
|
|
256
|
+
return {
|
|
257
|
+
verdict: 'approve',
|
|
258
|
+
blockers: [],
|
|
259
|
+
confidence: Math.min(passRate, sandboxScore, 0.95),
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
// ── Private: Helpers ──────────────────────────────────────
|
|
263
|
+
maxRisk(findings) {
|
|
264
|
+
if (findings.length === 0)
|
|
265
|
+
return 'none';
|
|
266
|
+
const order = ['none', 'low', 'medium', 'high', 'critical'];
|
|
267
|
+
let max = 0;
|
|
268
|
+
for (const f of findings) {
|
|
269
|
+
const idx = order.indexOf(f.severity);
|
|
270
|
+
if (idx > max)
|
|
271
|
+
max = idx;
|
|
272
|
+
}
|
|
273
|
+
return order[max];
|
|
274
|
+
}
|
|
275
|
+
inferClaimSeverity(claimText) {
|
|
276
|
+
const lower = claimText.toLowerCase();
|
|
277
|
+
if (lower.includes('injection') || lower.includes('auth') || lower.includes('secret'))
|
|
278
|
+
return 'critical';
|
|
279
|
+
if (lower.includes('error') || lower.includes('crash') || lower.includes('data loss'))
|
|
280
|
+
return 'high';
|
|
281
|
+
if (lower.includes('performance') || lower.includes('edge case'))
|
|
282
|
+
return 'medium';
|
|
283
|
+
return 'low';
|
|
284
|
+
}
|
|
285
|
+
generateReasoning(code, security, sandbox, verdict) {
|
|
286
|
+
const parts = [];
|
|
287
|
+
parts.push(`Code: ${code.claims_passed}/${code.claims_extracted} claims passed`);
|
|
288
|
+
parts.push(`Security: ${security.findings.length} finding(s)`);
|
|
289
|
+
const testsPassed = [sandbox.happy_path, sandbox.malformed_input, sandbox.timeout_behavior, sandbox.constraint_compliance]
|
|
290
|
+
.filter(t => t.status === 'pass').length;
|
|
291
|
+
parts.push(`Sandbox: ${testsPassed}/4 tests passed`);
|
|
292
|
+
parts.push(`Verdict: ${verdict}`);
|
|
293
|
+
return parts.join('. ');
|
|
294
|
+
}
|
|
295
|
+
makeRejection(toolId, verificationId, reason) {
|
|
296
|
+
const emptyTest = {
|
|
297
|
+
status: 'error',
|
|
298
|
+
input_summary: 'Not executed',
|
|
299
|
+
output_summary: reason,
|
|
300
|
+
duration_ms: 0,
|
|
301
|
+
error: reason,
|
|
302
|
+
};
|
|
303
|
+
return {
|
|
304
|
+
tool_id: toolId,
|
|
305
|
+
verification_id: verificationId,
|
|
306
|
+
timestamp: new Date().toISOString(),
|
|
307
|
+
code_verification: { claims_extracted: 0, claims_passed: 0, claims_failed: 0, critical_failures: [] },
|
|
308
|
+
security: {
|
|
309
|
+
ssrf_risk: 'none', injection_risk: 'none',
|
|
310
|
+
privilege_escalation_risk: 'none', data_exfiltration_risk: 'none', findings: [],
|
|
311
|
+
},
|
|
312
|
+
sandbox_tests: {
|
|
313
|
+
happy_path: emptyTest, malformed_input: emptyTest,
|
|
314
|
+
timeout_behavior: emptyTest, constraint_compliance: emptyTest,
|
|
315
|
+
},
|
|
316
|
+
verdict: 'reject',
|
|
317
|
+
confidence: 1.0,
|
|
318
|
+
reasoning: reason,
|
|
319
|
+
blockers: [reason],
|
|
320
|
+
};
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
//# sourceMappingURL=tool-verifier.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tool-verifier.js","sourceRoot":"","sources":["../../src/runtime/tool-verifier.ts"],"names":[],"mappings":"AAAA,+DAA+D;AAC/D,+CAA+C;AAC/C,wDAAwD;AACxD,0DAA0D;AAC1D,4BAA4B;AAC5B,+DAA+D;AAE/D,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACrD,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAqBhD,MAAM,iBAAiB,GAAsB;IAC3C;QACE,IAAI,EAAE,0BAA0B;QAChC,IAAI,EAAE,MAAM;QACZ,KAAK,EAAE,oBAAoB;QAC3B,QAAQ,EAAE,MAAM;QAChB,QAAQ,EAAE,MAAM;QAChB,cAAc,EAAE,kEAAkE;KACnF;IACD;QACE,IAAI,EAAE,cAAc;QACpB,IAAI,EAAE,gBAAgB;QACtB,KAAK,EAAE,aAAa;QACpB,QAAQ,EAAE,UAAU;QACpB,QAAQ,EAAE,WAAW;QACrB,cAAc,EAAE,mFAAmF;KACpG;IACD;QACE,IAAI,EAAE,sBAAsB;QAC5B,IAAI,EAAE,gBAAgB;QACtB,KAAK,EAAE,qBAAqB;QAC5B,QAAQ,EAAE,MAAM;QAChB,QAAQ,EAAE,WAAW;QACrB,cAAc,EAAE,sDAAsD;KACvE;IACD;QACE,IAAI,EAAE,0BAA0B;QAChC,IAAI,EAAE,sBAAsB;QAC5B,KAAK,EAAE,6CAA6C;QACpD,QAAQ,EAAE,UAAU;QACpB,QAAQ,EAAE,sBAAsB;QAChC,cAAc,EAAE,mEAAmE;KACpF;IACD;QACE,IAAI,EAAE,gCAAgC;QACtC,IAAI,EAAE,WAAW;QACjB,KAAK,EAAE,yCAAyC;QAChD,QAAQ,EAAE,MAAM;QAChB,QAAQ,EAAE,mBAAmB;QAC7B,cAAc,EAAE,6CAA6C;KAC9D;IACD;QACE,IAAI,EAAE,6BAA6B;QACnC,IAAI,EAAE,WAAW;QACjB,KAAK,EAAE,gBAAgB;QACvB,QAAQ,EAAE,QAAQ;QAClB,QAAQ,EAAE,mBAAmB;QAC7B,cAAc,EAAE,0EAA0E;KAC3F;IACD;QACE,IAAI,EAAE,mBAAmB;QACzB,IAAI,EAAE,sBAAsB;QAC5B,KAAK,EAAE,oBAAoB;QAC3B,QAAQ,EAAE,MAAM;QAChB,QAAQ,EAAE,sBAAsB;QAChC,cAAc,EAAE,6DAA6D;KAC9E;IACD;QACE,IAAI,EAAE,mBAAmB;QACzB,IAAI,EAAE,eAAe;QACrB,KAAK,EAAE,4DAA4D;QACnE,QAAQ,EAAE,UAAU;QACpB,QAAQ,EAAE,WAAW;QACrB,cAAc,EAAE,mDAAmD;KACpE;IACD;QACE,IAAI,EAAE,0BAA0B;QAChC,IAAI,EAAE,gBAAgB;QACtB,KAAK,EAAE,sBAAsB;QAC7B,QAAQ,EAAE,MAAM;QAChB,QAAQ,EAAE,WAAW;QACrB,cAAc,EAAE,sDAAsD;KACvE;IACD;QACE,IAAI,EAAE,4BAA4B;QAClC,IAAI,EAAE,sBAAsB;QAC5B,KAAK,EAAE,sCAAsC;QAC7C,QAAQ,EAAE,MAAM;QAChB,QAAQ,EAAE,sBAAsB;QAChC,cAAc,EAAE,uCAAuC;KACxD;CACF,CAAC;AAEF,8DAA8D;AAE9D,MAAM,OAAO,YAAY;IACf,OAAO,CAAc;IAE7B,YAAY,OAAqB;QAC/B,IAAI,CAAC,OAAO,GAAG,OAAO,IAAI,IAAI,WAAW,EAAE,CAAC;IAC9C,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,MAAM,CACV,IAAoB,EACpB,aAAqB;QAErB,MAAM,cAAc,GAAG,UAAU,EAAE,CAAC;QACpC,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;QAE/D,mBAAmB;QACnB,IAAI,UAAkB,CAAC;QACvB,IAAI,CAAC;YACH,UAAU,GAAG,MAAM,QAAQ,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QAClD,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,EAAE,EAAE,cAAc,EAAE,4BAA4B,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACrI,CAAC;QAED,qBAAqB;QACrB,MAAM,UAAU,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACzE,IAAI,UAAU,KAAK,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;YAC3C,OAAO,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,EAAE,EAAE,cAAc,EAAE,mCAAmC,IAAI,CAAC,MAAM,CAAC,WAAW,aAAa,UAAU,EAAE,CAAC,CAAC;QAC1I,CAAC;QAED,oDAAoD;QACpD,MAAM,gBAAgB,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,UAAU,EAAE,aAAa,CAAC,CAAC;QAE1E,4BAA4B;QAC5B,MAAM,cAAc,GAAG,IAAI,CAAC,YAAY,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC;QAEhE,4BAA4B;QAC5B,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;QAE1E,4BAA4B;QAC5B,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC,gBAAgB,CAC7D,gBAAgB,EAChB,cAAc,EACd,YAAY,CACb,CAAC;QAEF,OAAO;YACL,OAAO,EAAE,IAAI,CAAC,EAAE;YAChB,eAAe,EAAE,cAAc;YAC/B,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,iBAAiB,EAAE,gBAAgB;YACnC,QAAQ,EAAE,cAAc;YACxB,aAAa,EAAE,YAAY;YAC3B,OAAO;YACP,UAAU;YACV,SAAS,EAAE,IAAI,CAAC,iBAAiB,CAAC,gBAAgB,EAAE,cAAc,EAAE,YAAY,EAAE,OAAO,CAAC;YAC1F,QAAQ;SACT,CAAC;IACJ,CAAC;IAED,6DAA6D;IAErD,KAAK,CAAC,UAAU,CACtB,UAAkB,EAClB,aAAqB;QAErB,IAAI,CAAC;YACH,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,aAAa,CAAC,UAAU,CAAC,CAAC;YAE9D,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC3B,OAAO;oBACL,gBAAgB,EAAE,CAAC;oBACnB,aAAa,EAAE,CAAC;oBAChB,aAAa,EAAE,CAAC;oBAChB,iBAAiB,EAAE,EAAE;iBACtB,CAAC;YACJ,CAAC;YAED,MAAM,KAAK,GAAG,MAAM,aAAa,CAAC,aAAa,CAAC,CAAC;YACjD,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,YAAY,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;YAE/D,MAAM,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;YACtE,MAAM,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;YAEtE,MAAM,gBAAgB,GAA8B,aAAa;iBAC9D,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,MAAM,CAAC;iBACjC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBACT,KAAK,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,OAAO;gBAC3B,OAAO,EAAE,MAAe;gBACxB,QAAQ,EAAE,IAAI,CAAC,kBAAkB,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,OAAO,CAAC;gBACvD,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;aACnE,CAAC,CAAC;iBACF,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,UAAU,IAAI,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC;YAEnE,OAAO;gBACL,gBAAgB,EAAE,SAAS,CAAC,MAAM;gBAClC,aAAa,EAAE,MAAM;gBACrB,aAAa,EAAE,MAAM;gBACrB,iBAAiB,EAAE,gBAAgB;aACpC,CAAC;QACJ,CAAC;QAAC,MAAM,CAAC;YACP,OAAO;gBACL,gBAAgB,EAAE,CAAC;gBACnB,aAAa,EAAE,CAAC;gBAChB,aAAa,EAAE,CAAC;gBAChB,iBAAiB,EAAE,EAAE;aACtB,CAAC;QACJ,CAAC;IACH,CAAC;IAED,6DAA6D;IAErD,YAAY,CAClB,UAAkB,EAClB,QAAgB;QAEhB,MAAM,QAAQ,GAAsB,EAAE,CAAC;QAEvC,KAAK,MAAM,OAAO,IAAI,iBAAiB,EAAE,CAAC;YACxC,MAAM,OAAO,GAAG,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;YAChD,IAAI,OAAO,EAAE,CAAC;gBACZ,mBAAmB;gBACnB,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACrC,IAAI,OAAO,GAAG,CAAC,CAAC;gBAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBACtC,IAAI,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;wBACjC,OAAO,GAAG,CAAC,GAAG,CAAC,CAAC;wBAChB,MAAM;oBACR,CAAC;gBACH,CAAC;gBAED,QAAQ,CAAC,IAAI,CAAC;oBACZ,IAAI,EAAE,OAAO,CAAC,IAAI;oBAClB,QAAQ,EAAE,OAAO,CAAC,QAAQ;oBAC1B,QAAQ,EAAE,GAAG,QAAQ,IAAI,OAAO,EAAE;oBAClC,WAAW,EAAE,OAAO,CAAC,IAAI;oBACzB,cAAc,EAAE,OAAO,CAAC,cAAc;iBACvC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;YAChE,cAAc,EAAE,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAC/C,CAAC,CAAC,IAAI,KAAK,gBAAgB,IAAI,CAAC,CAAC,IAAI,KAAK,eAAe,CAC1D,CAAC;YACF,yBAAyB,EAAE,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAC1D,CAAC,CAAC,IAAI,KAAK,sBAAsB,CAClC,CAAC;YACF,sBAAsB,EAAE,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CACvD,CAAC,CAAC,IAAI,KAAK,WAAW,IAAI,CAAC,CAAC,IAAI,KAAK,WAAW,CACjD,CAAC;YACF,QAAQ;SACT,CAAC;IACJ,CAAC;IAED,6DAA6D;IAErD,gBAAgB,CACtB,IAA2C,EAC3C,QAAsC,EACtC,OAA0C;QAE1C,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,gDAAgD;QAChD,IAAI,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,UAAU,CAAC,EAAE,CAAC;YAC3D,QAAQ,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC;QACzD,CAAC;QACD,IAAI,QAAQ,CAAC,yBAAyB,KAAK,UAAU,IAAI,QAAQ,CAAC,yBAAyB,KAAK,MAAM,EAAE,CAAC;YACvG,QAAQ,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;QAClD,CAAC;QACD,IAAI,QAAQ,CAAC,cAAc,KAAK,UAAU,EAAE,CAAC;YAC3C,QAAQ,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;QAC3C,CAAC;QAED,+CAA+C;QAC/C,IAAI,IAAI,CAAC,iBAAiB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtC,QAAQ,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,iBAAiB,CAAC,MAAM,wCAAwC,CAAC,CAAC;QAC1F,CAAC;QAED,wBAAwB;QACxB,IAAI,OAAO,CAAC,UAAU,CAAC,MAAM,KAAK,MAAM,IAAI,OAAO,CAAC,UAAU,CAAC,MAAM,KAAK,OAAO,EAAE,CAAC;YAClF,QAAQ,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;QAC1C,CAAC;QACD,IAAI,OAAO,CAAC,qBAAqB,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YACpD,QAAQ,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;QACrD,CAAC;QAED,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;QAC1D,CAAC;QAED,sCAAsC;QACtC,MAAM,WAAW,GACf,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC;YAClD,OAAO,CAAC,eAAe,CAAC,MAAM,KAAK,OAAO;YAC1C,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC,aAAa,GAAG,GAAG,CAAC;QAEhD,IAAI,WAAW,EAAE,CAAC;YAChB,OAAO,EAAE,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,EAAE,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;QACpE,CAAC;QAED,wDAAwD;QACxD,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,GAAG,CAAC;YACxC,CAAC,CAAC,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC,gBAAgB;YAC5C,CAAC,CAAC,GAAG,CAAC;QACR,MAAM,YAAY,GAAG,CAAC,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC,eAAe,EAAE,OAAO,CAAC,qBAAqB,CAAC;aAC9F,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;QAE/C,OAAO;YACL,OAAO,EAAE,SAAS;YAClB,QAAQ,EAAE,EAAE;YACZ,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,YAAY,EAAE,IAAI,CAAC;SACnD,CAAC;IACJ,CAAC;IAED,6DAA6D;IAErD,OAAO,CAAC,QAA2B;QACzC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,MAAM,CAAC;QACzC,MAAM,KAAK,GAAoB,CAAC,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAC7E,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;YACtC,IAAI,GAAG,GAAG,GAAG;gBAAE,GAAG,GAAG,GAAG,CAAC;QAC3B,CAAC;QACD,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC;IACpB,CAAC;IAEO,kBAAkB,CAAC,SAAiB;QAC1C,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC;QACtC,IAAI,KAAK,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC;YAAE,OAAO,UAAU,CAAC;QACzG,IAAI,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,WAAW,CAAC;YAAE,OAAO,MAAM,CAAC;QACrG,IAAI,KAAK,CAAC,QAAQ,CAAC,aAAa,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,WAAW,CAAC;YAAE,OAAO,QAAQ,CAAC;QAClF,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,iBAAiB,CACvB,IAA2C,EAC3C,QAAsC,EACtC,OAA0C,EAC1C,OAAe;QAEf,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,CAAC,IAAI,CAAC,SAAS,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,gBAAgB,gBAAgB,CAAC,CAAC;QACjF,KAAK,CAAC,IAAI,CAAC,aAAa,QAAQ,CAAC,QAAQ,CAAC,MAAM,aAAa,CAAC,CAAC;QAC/D,MAAM,WAAW,GAAG,CAAC,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC,eAAe,EAAE,OAAO,CAAC,gBAAgB,EAAE,OAAO,CAAC,qBAAqB,CAAC;aACvH,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;QAC3C,KAAK,CAAC,IAAI,CAAC,YAAY,WAAW,iBAAiB,CAAC,CAAC;QACrD,KAAK,CAAC,IAAI,CAAC,YAAY,OAAO,EAAE,CAAC,CAAC;QAClC,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAEO,aAAa,CAAC,MAAc,EAAE,cAAsB,EAAE,MAAc;QAC1E,MAAM,SAAS,GAAmB;YAChC,MAAM,EAAE,OAAO;YACf,aAAa,EAAE,cAAc;YAC7B,cAAc,EAAE,MAAM;YACtB,WAAW,EAAE,CAAC;YACd,KAAK,EAAE,MAAM;SACd,CAAC;QACF,OAAO;YACL,OAAO,EAAE,MAAM;YACf,eAAe,EAAE,cAAc;YAC/B,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,iBAAiB,EAAE,EAAE,gBAAgB,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE,iBAAiB,EAAE,EAAE,EAAE;YACrG,QAAQ,EAAE;gBACR,SAAS,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM;gBACzC,yBAAyB,EAAE,MAAM,EAAE,sBAAsB,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE;aAChF;YACD,aAAa,EAAE;gBACb,UAAU,EAAE,SAAS,EAAE,eAAe,EAAE,SAAS;gBACjD,gBAAgB,EAAE,SAAS,EAAE,qBAAqB,EAAE,SAAS;aAC9D;YACD,OAAO,EAAE,QAAQ;YACjB,UAAU,EAAE,GAAG;YACf,SAAS,EAAE,MAAM;YACjB,QAAQ,EAAE,CAAC,MAAM,CAAC;SACnB,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -1,12 +1,30 @@
|
|
|
1
|
-
import type { AgentIdentity, TrustLevel, BoundaryVerificationResult, SafetyPolicy } from './types.js';
|
|
1
|
+
import type { AgentIdentity, TrustLevel, BoundaryVerificationResult, SafetyPolicy, TrustWindow, TrustTransition } from './types.js';
|
|
2
|
+
interface TrustThresholds {
|
|
3
|
+
/** Handoffs required before promotion from untrusted to provisional. */
|
|
4
|
+
readonly provisionalMinHandoffs: number;
|
|
5
|
+
/** Minimum pass rate for untrusted -> provisional. */
|
|
6
|
+
readonly provisionalMinPassRate: number;
|
|
7
|
+
/** Handoffs required before promotion from provisional to trusted. */
|
|
8
|
+
readonly trustedMinHandoffs: number;
|
|
9
|
+
/** Minimum pass rate for provisional -> trusted. */
|
|
10
|
+
readonly trustedMinPassRate: number;
|
|
11
|
+
/** Rolling window size for pass rate calculation. */
|
|
12
|
+
readonly windowSize: number;
|
|
13
|
+
/** Pass rate below which trusted is demoted to provisional. */
|
|
14
|
+
readonly demotionPassRate: number;
|
|
15
|
+
}
|
|
2
16
|
export declare class TrustManager {
|
|
3
17
|
private agents;
|
|
4
18
|
private policy;
|
|
5
19
|
private trustDemotionOnOverride;
|
|
20
|
+
private thresholds;
|
|
6
21
|
private sessionOverrides;
|
|
7
22
|
private sessionDemotions;
|
|
23
|
+
private windows;
|
|
24
|
+
private transitions;
|
|
8
25
|
constructor(policy: SafetyPolicy, opts?: {
|
|
9
26
|
trustDemotionOnOverride?: boolean;
|
|
27
|
+
thresholds?: Partial<TrustThresholds>;
|
|
10
28
|
});
|
|
11
29
|
/** Register an agent with its initial identity. */
|
|
12
30
|
register(agent: AgentIdentity): void;
|
|
@@ -14,20 +32,32 @@ export declare class TrustManager {
|
|
|
14
32
|
getAgent(agentId: string): AgentIdentity | undefined;
|
|
15
33
|
/** Get all registered agents. */
|
|
16
34
|
getAllAgents(): AgentIdentity[];
|
|
35
|
+
/** Get the rolling trust window for an agent. */
|
|
36
|
+
getTrustWindow(agentId: string): TrustWindow | undefined;
|
|
37
|
+
/** Get all trust transitions that occurred during this session. */
|
|
38
|
+
getTransitions(): readonly TrustTransition[];
|
|
17
39
|
/**
|
|
18
40
|
* Update trust based on a boundary verification result.
|
|
19
|
-
*
|
|
41
|
+
* Uses rolling window for pass rate calculation.
|
|
42
|
+
* Returns the updated trust level, demotion status, and halt signal.
|
|
20
43
|
*/
|
|
21
44
|
updateTrust(agentId: string, result: BoundaryVerificationResult): {
|
|
22
45
|
newTrust: TrustLevel;
|
|
23
46
|
demoted: boolean;
|
|
47
|
+
promoted: boolean;
|
|
24
48
|
shouldHalt: boolean;
|
|
25
49
|
};
|
|
50
|
+
/**
|
|
51
|
+
* Force-demote an agent (e.g., collusion detected).
|
|
52
|
+
*/
|
|
53
|
+
forceDemote(agentId: string, reason: TrustTransition['reason']): TrustLevel;
|
|
26
54
|
/** Get session-level stats for an agent. */
|
|
27
55
|
getSessionStats(agentId: string): {
|
|
28
56
|
overrides: number;
|
|
29
57
|
demotions: number;
|
|
30
58
|
};
|
|
31
59
|
private demoteTrust;
|
|
32
|
-
private
|
|
60
|
+
private computeWindowPassRate;
|
|
61
|
+
private recordTransition;
|
|
33
62
|
}
|
|
63
|
+
export {};
|
|
@@ -1,23 +1,37 @@
|
|
|
1
1
|
// ============================================================
|
|
2
|
-
// Assay Verified Agent Runtime — Trust Manager
|
|
3
|
-
//
|
|
2
|
+
// Assay Verified Agent Runtime — Trust Manager (M3)
|
|
3
|
+
// Rolling-window trust calculation, promotion/demotion,
|
|
4
|
+
// trust transition logging, session-level safety enforcement.
|
|
4
5
|
// ============================================================
|
|
5
|
-
|
|
6
|
+
const DEFAULT_THRESHOLDS = {
|
|
7
|
+
provisionalMinHandoffs: 10,
|
|
8
|
+
provisionalMinPassRate: 0.9,
|
|
9
|
+
trustedMinHandoffs: 50,
|
|
10
|
+
trustedMinPassRate: 0.95,
|
|
11
|
+
windowSize: 20,
|
|
12
|
+
demotionPassRate: 0.8,
|
|
13
|
+
};
|
|
14
|
+
// ── Trust Manager ──────────────────────────────────────────
|
|
6
15
|
export class TrustManager {
|
|
7
16
|
agents = new Map();
|
|
8
17
|
policy;
|
|
9
18
|
trustDemotionOnOverride;
|
|
10
|
-
|
|
11
|
-
|
|
19
|
+
thresholds;
|
|
20
|
+
sessionOverrides = new Map();
|
|
21
|
+
sessionDemotions = new Map();
|
|
22
|
+
windows = new Map();
|
|
23
|
+
transitions = [];
|
|
12
24
|
constructor(policy, opts) {
|
|
13
25
|
this.policy = policy;
|
|
14
26
|
this.trustDemotionOnOverride = opts?.trustDemotionOnOverride ?? true;
|
|
27
|
+
this.thresholds = { ...DEFAULT_THRESHOLDS, ...opts?.thresholds };
|
|
15
28
|
}
|
|
16
29
|
/** Register an agent with its initial identity. */
|
|
17
30
|
register(agent) {
|
|
18
31
|
this.agents.set(agent.id, agent);
|
|
19
32
|
this.sessionOverrides.set(agent.id, 0);
|
|
20
33
|
this.sessionDemotions.set(agent.id, 0);
|
|
34
|
+
this.windows.set(agent.id, []);
|
|
21
35
|
}
|
|
22
36
|
/** Get an agent's current identity (with updated trust). */
|
|
23
37
|
getAgent(agentId) {
|
|
@@ -27,20 +41,52 @@ export class TrustManager {
|
|
|
27
41
|
getAllAgents() {
|
|
28
42
|
return Array.from(this.agents.values());
|
|
29
43
|
}
|
|
44
|
+
/** Get the rolling trust window for an agent. */
|
|
45
|
+
getTrustWindow(agentId) {
|
|
46
|
+
const agent = this.agents.get(agentId);
|
|
47
|
+
const entries = this.windows.get(agentId);
|
|
48
|
+
if (!agent || !entries)
|
|
49
|
+
return undefined;
|
|
50
|
+
const windowEntries = entries.slice(-this.thresholds.windowSize);
|
|
51
|
+
const passCount = windowEntries.filter(e => e.verdict === 'PASS').length;
|
|
52
|
+
const overrideCount = windowEntries.filter(e => e.hadFormalOverride).length;
|
|
53
|
+
return {
|
|
54
|
+
agentId,
|
|
55
|
+
windowSize: windowEntries.length,
|
|
56
|
+
results: windowEntries,
|
|
57
|
+
passRate: windowEntries.length > 0 ? passCount / windowEntries.length : 0,
|
|
58
|
+
formalOverrideCount: overrideCount,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
/** Get all trust transitions that occurred during this session. */
|
|
62
|
+
getTransitions() {
|
|
63
|
+
return this.transitions;
|
|
64
|
+
}
|
|
30
65
|
/**
|
|
31
66
|
* Update trust based on a boundary verification result.
|
|
32
|
-
*
|
|
67
|
+
* Uses rolling window for pass rate calculation.
|
|
68
|
+
* Returns the updated trust level, demotion status, and halt signal.
|
|
33
69
|
*/
|
|
34
70
|
updateTrust(agentId, result) {
|
|
35
71
|
const agent = this.agents.get(agentId);
|
|
36
72
|
if (!agent)
|
|
37
|
-
return { newTrust: 'untrusted', demoted: false, shouldHalt: false };
|
|
73
|
+
return { newTrust: 'untrusted', demoted: false, promoted: false, shouldHalt: false };
|
|
74
|
+
// Add to rolling window
|
|
75
|
+
const windowEntry = {
|
|
76
|
+
boundaryId: result.boundaryId,
|
|
77
|
+
verdict: result.verdict,
|
|
78
|
+
hadFormalOverride: result.formalStats.formalOverrides > 0,
|
|
79
|
+
timestamp: new Date().toISOString(),
|
|
80
|
+
};
|
|
81
|
+
const window = this.windows.get(agentId) ?? [];
|
|
82
|
+
window.push(windowEntry);
|
|
83
|
+
this.windows.set(agentId, window);
|
|
38
84
|
const oldTrust = agent.trustLevel;
|
|
39
85
|
let newTrust = oldTrust;
|
|
40
86
|
let demoted = false;
|
|
41
|
-
|
|
87
|
+
let promoted = false;
|
|
88
|
+
// Check for formal overrides -> demotion
|
|
42
89
|
if (result.formalStats.formalOverrides > 0 && this.trustDemotionOnOverride) {
|
|
43
|
-
// A single formal override drops trust by one level
|
|
44
90
|
newTrust = this.demoteTrust(oldTrust);
|
|
45
91
|
demoted = newTrust !== oldTrust;
|
|
46
92
|
if (demoted) {
|
|
@@ -48,29 +94,54 @@ export class TrustManager {
|
|
|
48
94
|
this.sessionOverrides.set(agentId, overrides);
|
|
49
95
|
const demotions = (this.sessionDemotions.get(agentId) ?? 0) + 1;
|
|
50
96
|
this.sessionDemotions.set(agentId, demotions);
|
|
51
|
-
// Two formal overrides in a session → drop to untrusted
|
|
52
97
|
if (overrides >= 2) {
|
|
53
98
|
newTrust = 'untrusted';
|
|
54
99
|
}
|
|
100
|
+
this.recordTransition(agent, oldTrust, newTrust, 'demotion_formal_override');
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
// Check rolling window pass rate for demotion
|
|
104
|
+
if (!demoted) {
|
|
105
|
+
const recentWindow = window.slice(-this.thresholds.windowSize);
|
|
106
|
+
if (recentWindow.length >= this.thresholds.windowSize) {
|
|
107
|
+
const passRate = recentWindow.filter(e => e.verdict === 'PASS').length / recentWindow.length;
|
|
108
|
+
if (passRate < this.thresholds.demotionPassRate && oldTrust !== 'untrusted') {
|
|
109
|
+
newTrust = this.demoteTrust(oldTrust);
|
|
110
|
+
demoted = newTrust !== oldTrust;
|
|
111
|
+
if (demoted) {
|
|
112
|
+
const demotions = (this.sessionDemotions.get(agentId) ?? 0) + 1;
|
|
113
|
+
this.sessionDemotions.set(agentId, demotions);
|
|
114
|
+
this.recordTransition(agent, oldTrust, newTrust, 'demotion_low_pass_rate');
|
|
115
|
+
}
|
|
116
|
+
}
|
|
55
117
|
}
|
|
56
118
|
}
|
|
57
|
-
// Check for promotion
|
|
119
|
+
// Check for promotion (only if not demoted this round)
|
|
58
120
|
if (!demoted && result.verdict === 'PASS') {
|
|
59
|
-
const
|
|
60
|
-
const
|
|
61
|
-
const
|
|
62
|
-
|
|
63
|
-
|
|
121
|
+
const totalHandoffs = window.length;
|
|
122
|
+
const recentWindow = window.slice(-this.thresholds.windowSize);
|
|
123
|
+
const passRate = recentWindow.length > 0
|
|
124
|
+
? recentWindow.filter(e => e.verdict === 'PASS').length / recentWindow.length
|
|
125
|
+
: 0;
|
|
126
|
+
if (oldTrust === 'untrusted' &&
|
|
127
|
+
totalHandoffs >= this.thresholds.provisionalMinHandoffs &&
|
|
128
|
+
passRate >= this.thresholds.provisionalMinPassRate) {
|
|
64
129
|
newTrust = 'provisional';
|
|
130
|
+
promoted = true;
|
|
131
|
+
this.recordTransition(agent, oldTrust, newTrust, 'promotion');
|
|
65
132
|
}
|
|
66
|
-
else if (oldTrust === 'provisional' &&
|
|
133
|
+
else if (oldTrust === 'provisional' &&
|
|
134
|
+
totalHandoffs >= this.thresholds.trustedMinHandoffs &&
|
|
135
|
+
passRate >= this.thresholds.trustedMinPassRate) {
|
|
67
136
|
newTrust = 'trusted';
|
|
137
|
+
promoted = true;
|
|
138
|
+
this.recordTransition(agent, oldTrust, newTrust, 'promotion');
|
|
68
139
|
}
|
|
69
140
|
}
|
|
70
|
-
// Update
|
|
141
|
+
// Update agent identity
|
|
71
142
|
const updatedHistory = {
|
|
72
143
|
totalHandoffs: agent.verificationHistory.totalHandoffs + 1,
|
|
73
|
-
passRate: this.
|
|
144
|
+
passRate: this.computeWindowPassRate(agentId),
|
|
74
145
|
formalOverrides: agent.verificationHistory.formalOverrides + result.formalStats.formalOverrides,
|
|
75
146
|
};
|
|
76
147
|
const updatedAgent = {
|
|
@@ -82,7 +153,25 @@ export class TrustManager {
|
|
|
82
153
|
// Check if we should halt
|
|
83
154
|
const shouldHalt = (this.sessionOverrides.get(agentId) ?? 0) >= this.policy.escalationRules.maxFormalOverridesBeforeHalt ||
|
|
84
155
|
(this.sessionDemotions.get(agentId) ?? 0) >= this.policy.escalationRules.maxTrustDemotions;
|
|
85
|
-
return { newTrust, demoted, shouldHalt };
|
|
156
|
+
return { newTrust, demoted, promoted, shouldHalt };
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Force-demote an agent (e.g., collusion detected).
|
|
160
|
+
*/
|
|
161
|
+
forceDemote(agentId, reason) {
|
|
162
|
+
const agent = this.agents.get(agentId);
|
|
163
|
+
if (!agent)
|
|
164
|
+
return 'untrusted';
|
|
165
|
+
const oldTrust = agent.trustLevel;
|
|
166
|
+
const newTrust = this.demoteTrust(oldTrust);
|
|
167
|
+
if (newTrust !== oldTrust) {
|
|
168
|
+
this.recordTransition(agent, oldTrust, newTrust, reason);
|
|
169
|
+
const demotions = (this.sessionDemotions.get(agentId) ?? 0) + 1;
|
|
170
|
+
this.sessionDemotions.set(agentId, demotions);
|
|
171
|
+
const updatedAgent = { ...agent, trustLevel: newTrust };
|
|
172
|
+
this.agents.set(agentId, updatedAgent);
|
|
173
|
+
}
|
|
174
|
+
return newTrust;
|
|
86
175
|
}
|
|
87
176
|
/** Get session-level stats for an agent. */
|
|
88
177
|
getSessionStats(agentId) {
|
|
@@ -97,14 +186,27 @@ export class TrustManager {
|
|
|
97
186
|
case 'trusted': return 'provisional';
|
|
98
187
|
case 'provisional': return 'untrusted';
|
|
99
188
|
case 'untrusted': return 'untrusted';
|
|
100
|
-
case 'formal': return 'formal';
|
|
189
|
+
case 'formal': return 'formal';
|
|
101
190
|
}
|
|
102
191
|
}
|
|
103
|
-
|
|
104
|
-
const
|
|
105
|
-
const
|
|
106
|
-
|
|
107
|
-
|
|
192
|
+
computeWindowPassRate(agentId) {
|
|
193
|
+
const window = this.windows.get(agentId) ?? [];
|
|
194
|
+
const recent = window.slice(-this.thresholds.windowSize);
|
|
195
|
+
if (recent.length === 0)
|
|
196
|
+
return 0;
|
|
197
|
+
return recent.filter(e => e.verdict === 'PASS').length / recent.length;
|
|
198
|
+
}
|
|
199
|
+
recordTransition(agent, from, to, reason) {
|
|
200
|
+
this.transitions.push({
|
|
201
|
+
agentId: agent.id,
|
|
202
|
+
agentName: agent.name,
|
|
203
|
+
previousLevel: from,
|
|
204
|
+
newLevel: to,
|
|
205
|
+
reason,
|
|
206
|
+
passRate: this.computeWindowPassRate(agent.id),
|
|
207
|
+
windowSize: (this.windows.get(agent.id) ?? []).length,
|
|
208
|
+
timestamp: new Date().toISOString(),
|
|
209
|
+
});
|
|
108
210
|
}
|
|
109
211
|
}
|
|
110
212
|
//# sourceMappingURL=trust-manager.js.map
|