@bryan-thompson/inspector-assessment-client 1.18.0 → 1.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/{OAuthCallback-B07fRaZ6.js → OAuthCallback-D_dKq_wM.js} +1 -1
- package/dist/assets/{OAuthDebugCallback-CJL48E2b.js → OAuthDebugCallback-UqARwe_4.js} +1 -1
- package/dist/assets/{index-CzoGuYPy.css → index-32-uLPhe.css} +3 -0
- package/dist/assets/{index-CmlaHDEu.js → index-B5_VY0TC.js} +571 -12
- package/dist/index.html +2 -2
- package/lib/lib/assessmentTypes.d.ts +51 -2
- package/lib/lib/assessmentTypes.d.ts.map +1 -1
- package/lib/lib/securityPatterns.d.ts +4 -2
- package/lib/lib/securityPatterns.d.ts.map +1 -1
- package/lib/lib/securityPatterns.js +194 -2
- package/lib/services/assessment/AssessmentOrchestrator.d.ts +1 -0
- package/lib/services/assessment/AssessmentOrchestrator.d.ts.map +1 -1
- package/lib/services/assessment/AssessmentOrchestrator.js +7 -0
- package/lib/services/assessment/LanguageAwarePayloadGenerator.d.ts +41 -0
- package/lib/services/assessment/LanguageAwarePayloadGenerator.d.ts.map +1 -0
- package/lib/services/assessment/LanguageAwarePayloadGenerator.js +258 -0
- package/lib/services/assessment/PolicyComplianceGenerator.d.ts.map +1 -1
- package/lib/services/assessment/PolicyComplianceGenerator.js +15 -0
- package/lib/services/assessment/ToolClassifier.d.ts +1 -0
- package/lib/services/assessment/ToolClassifier.d.ts.map +1 -1
- package/lib/services/assessment/ToolClassifier.js +26 -0
- package/lib/services/assessment/modules/ResourceAssessor.d.ts +5 -0
- package/lib/services/assessment/modules/ResourceAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/ResourceAssessor.js +161 -4
- package/lib/services/assessment/modules/SecurityAssessor.d.ts +1 -0
- package/lib/services/assessment/modules/SecurityAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/SecurityAssessor.js +51 -4
- package/lib/services/assessment/modules/TemporalAssessor.d.ts +5 -0
- package/lib/services/assessment/modules/TemporalAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/TemporalAssessor.js +133 -15
- package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts +5 -0
- package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/ToolAnnotationAssessor.js +256 -1
- package/package.json +1 -1
|
@@ -13,7 +13,9 @@ import { BaseAssessor } from "./BaseAssessor.js";
|
|
|
13
13
|
import { getAllAttackPatterns, getPayloadsForAttack, } from "../../../lib/securityPatterns.js";
|
|
14
14
|
import { ToolClassifier, ToolCategory } from "../ToolClassifier.js";
|
|
15
15
|
import { createConcurrencyLimit } from "../lib/concurrencyLimit.js";
|
|
16
|
+
import { LanguageAwarePayloadGenerator } from "../LanguageAwarePayloadGenerator.js";
|
|
16
17
|
export class SecurityAssessor extends BaseAssessor {
|
|
18
|
+
languageGenerator = new LanguageAwarePayloadGenerator();
|
|
17
19
|
async assess(context) {
|
|
18
20
|
// Select tools for testing first
|
|
19
21
|
const toolsToTest = this.selectToolsForTesting(context.tools);
|
|
@@ -1085,6 +1087,18 @@ export class SecurityAssessor extends BaseAssessor {
|
|
|
1085
1087
|
/result.*action\s+executed\s+successfully/i,
|
|
1086
1088
|
/successfully\s+(executed|completed|processed):/i,
|
|
1087
1089
|
/successfully\s+(executed|completed|processed)"/i,
|
|
1090
|
+
// "Action received:" - safe echo/acknowledgment pattern (DVMCP testbed)
|
|
1091
|
+
/action\s+received:/i,
|
|
1092
|
+
/input\s+received:/i,
|
|
1093
|
+
/request\s+received:/i,
|
|
1094
|
+
// Explicit safety indicators in JSON responses (context-aware to avoid matching unrelated fields)
|
|
1095
|
+
// Require safety-related context: message, result, status, stored, reflected, etc.
|
|
1096
|
+
/"safe"\s*:\s*true[^}]*("message"|"result"|"status"|"response")/i,
|
|
1097
|
+
/("message"|"result"|"status"|"response")[^}]*"safe"\s*:\s*true/i,
|
|
1098
|
+
/"vulnerable"\s*:\s*false[^}]*("safe"|"stored"|"reflected"|"status")/i,
|
|
1099
|
+
/("safe"|"stored"|"reflected"|"status")[^}]*"vulnerable"\s*:\s*false/i,
|
|
1100
|
+
/"status"\s*:\s*"acknowledged"[^}]*("message"|"result"|"safe")/i,
|
|
1101
|
+
/("message"|"result"|"safe")[^}]*"status"\s*:\s*"acknowledged"/i,
|
|
1088
1102
|
];
|
|
1089
1103
|
const reflectionPatterns = [
|
|
1090
1104
|
...statusPatterns,
|
|
@@ -1320,8 +1334,41 @@ export class SecurityAssessor extends BaseAssessor {
|
|
|
1320
1334
|
const params = {};
|
|
1321
1335
|
const targetParamTypes = payload.parameterTypes || [];
|
|
1322
1336
|
let payloadInjected = false;
|
|
1323
|
-
//
|
|
1324
|
-
|
|
1337
|
+
// NEW: Check for language-specific code execution parameters first
|
|
1338
|
+
// This enables detection of vulnerabilities in tools expecting Python/JS/SQL code
|
|
1339
|
+
for (const [key, prop] of Object.entries(schema.properties)) {
|
|
1340
|
+
const propSchema = prop;
|
|
1341
|
+
if (propSchema.type !== "string")
|
|
1342
|
+
continue;
|
|
1343
|
+
const detectedLanguage = this.languageGenerator.detectLanguage(key, tool.name, tool.description);
|
|
1344
|
+
// If we detect a specific language (not generic), use language-appropriate payloads
|
|
1345
|
+
if (detectedLanguage !== "generic" && !payloadInjected) {
|
|
1346
|
+
const languagePayloads = this.languageGenerator.getPayloadsForLanguage(detectedLanguage);
|
|
1347
|
+
if (languagePayloads.length > 0) {
|
|
1348
|
+
// Select a payload that targets similar behavior as the current attack pattern
|
|
1349
|
+
// (e.g., if testing command injection, use a command-executing payload)
|
|
1350
|
+
const payloadLower = payload.payload.toLowerCase();
|
|
1351
|
+
const isCommandTest = payloadLower.includes("whoami") ||
|
|
1352
|
+
payloadLower.includes("passwd") ||
|
|
1353
|
+
payloadLower.includes("id");
|
|
1354
|
+
// Find matching language payload based on test intent
|
|
1355
|
+
let selectedPayload = languagePayloads[0]; // Default to first
|
|
1356
|
+
if (isCommandTest) {
|
|
1357
|
+
// Prefer command execution payloads
|
|
1358
|
+
const cmdPayload = languagePayloads.find((lp) => lp.payload.includes("whoami") ||
|
|
1359
|
+
lp.payload.includes("subprocess") ||
|
|
1360
|
+
lp.payload.includes("execSync"));
|
|
1361
|
+
if (cmdPayload)
|
|
1362
|
+
selectedPayload = cmdPayload;
|
|
1363
|
+
}
|
|
1364
|
+
params[key] = selectedPayload.payload;
|
|
1365
|
+
payloadInjected = true;
|
|
1366
|
+
break;
|
|
1367
|
+
}
|
|
1368
|
+
}
|
|
1369
|
+
}
|
|
1370
|
+
// Fall back to parameterTypes matching if no language-specific payload was used
|
|
1371
|
+
if (!payloadInjected && targetParamTypes.length > 0) {
|
|
1325
1372
|
// Payload is parameter-specific (e.g., URLs only for "url" params)
|
|
1326
1373
|
for (const [key, prop] of Object.entries(schema.properties)) {
|
|
1327
1374
|
const propSchema = prop;
|
|
@@ -1335,8 +1382,8 @@ export class SecurityAssessor extends BaseAssessor {
|
|
|
1335
1382
|
}
|
|
1336
1383
|
}
|
|
1337
1384
|
}
|
|
1338
|
-
|
|
1339
|
-
|
|
1385
|
+
// Fall back to generic payload - inject into first string parameter (original behavior)
|
|
1386
|
+
if (!payloadInjected) {
|
|
1340
1387
|
for (const [key, prop] of Object.entries(schema.properties)) {
|
|
1341
1388
|
const propSchema = prop;
|
|
1342
1389
|
if (propSchema.type === "string" && !payloadInjected) {
|
|
@@ -28,6 +28,11 @@ export declare class TemporalAssessor extends BaseAssessor {
|
|
|
28
28
|
constructor(config: AssessmentConfiguration);
|
|
29
29
|
assess(context: AssessmentContext): Promise<TemporalAssessment>;
|
|
30
30
|
private assessTool;
|
|
31
|
+
/**
|
|
32
|
+
* Detect mutations in tool definition across invocation snapshots.
|
|
33
|
+
* DVMCP Challenge 4: Tool descriptions that mutate after N calls.
|
|
34
|
+
*/
|
|
35
|
+
private detectDefinitionMutation;
|
|
31
36
|
private analyzeResponses;
|
|
32
37
|
/**
|
|
33
38
|
* Generate a safe/neutral payload for a tool based on its input schema.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"TemporalAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/TemporalAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EACL,uBAAuB,EAEvB,kBAAkB,EAEnB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"TemporalAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/TemporalAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EACL,uBAAuB,EAEvB,kBAAkB,EAEnB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AA+B9C,qBAAa,gBAAiB,SAAQ,YAAY;IAChD,OAAO,CAAC,kBAAkB,CAAS;IAGnC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAoBnC;IAGF,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CAAU;IAEjD;;;;;;;;;;OAUG;IACH,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CASrC;gBAEU,MAAM,EAAE,uBAAuB;IAKrC,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC;YAqEvD,UAAU;IAuHxB;;;OAGG;IACH,OAAO,CAAC,wBAAwB;IAkChC,OAAO,CAAC,gBAAgB;IAmFxB;;;OAGG;IACH,OAAO,CAAC,mBAAmB;IAsC3B;;;;OAIG;IACH,OAAO,CAAC,iBAAiB;IAoDzB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAKzB;;;;OAIG;IACH,OAAO,CAAC,cAAc;IAYtB;;;;;;OAMG;IACH,OAAO,CAAC,cAAc;IAuBtB;;;OAGG;IACH,OAAO,CAAC,iBAAiB;IAiCzB,OAAO,CAAC,uBAAuB;IAa/B,OAAO,CAAC,mBAAmB;IA+C3B,OAAO,CAAC,uBAAuB;CA+DhC"}
|
|
@@ -63,7 +63,15 @@ export class TemporalAssessor extends BaseAssessor {
|
|
|
63
63
|
async assess(context) {
|
|
64
64
|
const results = [];
|
|
65
65
|
let rugPullsDetected = 0;
|
|
66
|
-
|
|
66
|
+
let definitionMutationsDetected = 0;
|
|
67
|
+
// Check if definition tracking is available
|
|
68
|
+
const canTrackDefinitions = typeof context.listTools === "function";
|
|
69
|
+
if (canTrackDefinitions) {
|
|
70
|
+
this.log(`Starting temporal assessment with ${this.invocationsPerTool} invocations per tool (definition tracking enabled)`);
|
|
71
|
+
}
|
|
72
|
+
else {
|
|
73
|
+
this.log(`Starting temporal assessment with ${this.invocationsPerTool} invocations per tool (definition tracking unavailable)`);
|
|
74
|
+
}
|
|
67
75
|
for (const tool of context.tools) {
|
|
68
76
|
// Skip if tool selection is configured and this tool isn't selected
|
|
69
77
|
if (this.config.selectedToolsForTesting !== undefined &&
|
|
@@ -76,33 +84,63 @@ export class TemporalAssessor extends BaseAssessor {
|
|
|
76
84
|
rugPullsDetected++;
|
|
77
85
|
this.log(`RUG PULL DETECTED: ${tool.name} changed behavior at invocation ${result.firstDeviationAt}`);
|
|
78
86
|
}
|
|
87
|
+
if (result.definitionMutated) {
|
|
88
|
+
definitionMutationsDetected++;
|
|
89
|
+
this.log(`DEFINITION MUTATION DETECTED: ${tool.name} changed description at invocation ${result.definitionMutationAt}`);
|
|
90
|
+
}
|
|
79
91
|
// Respect delay between tests
|
|
80
92
|
if (this.config.delayBetweenTests) {
|
|
81
93
|
await this.sleep(this.config.delayBetweenTests);
|
|
82
94
|
}
|
|
83
95
|
}
|
|
84
|
-
|
|
96
|
+
// Status fails if either response or definition mutations detected
|
|
97
|
+
const totalVulnerabilities = rugPullsDetected + definitionMutationsDetected;
|
|
98
|
+
const status = this.determineTemporalStatus(totalVulnerabilities, results);
|
|
85
99
|
return {
|
|
86
100
|
toolsTested: results.length,
|
|
87
101
|
invocationsPerTool: this.invocationsPerTool,
|
|
88
102
|
rugPullsDetected,
|
|
103
|
+
definitionMutationsDetected,
|
|
89
104
|
details: results,
|
|
90
105
|
status,
|
|
91
|
-
explanation: this.generateExplanation(rugPullsDetected, results),
|
|
106
|
+
explanation: this.generateExplanation(rugPullsDetected, definitionMutationsDetected, results),
|
|
92
107
|
recommendations: this.generateRecommendations(results),
|
|
93
108
|
};
|
|
94
109
|
}
|
|
95
110
|
async assessTool(context, tool) {
|
|
96
111
|
const responses = [];
|
|
112
|
+
const definitionSnapshots = [];
|
|
97
113
|
const payload = this.generateSafePayload(tool);
|
|
98
114
|
// Reduce invocations for potentially destructive tools
|
|
99
115
|
const isDestructive = this.isDestructiveTool(tool);
|
|
100
116
|
const invocations = isDestructive
|
|
101
117
|
? Math.min(5, this.invocationsPerTool)
|
|
102
118
|
: this.invocationsPerTool;
|
|
119
|
+
// Check if definition tracking is available
|
|
120
|
+
const canTrackDefinitions = typeof context.listTools === "function";
|
|
103
121
|
this.log(`Testing ${tool.name} with ${invocations} invocations${isDestructive ? " (reduced - destructive)" : ""}`);
|
|
104
122
|
for (let i = 1; i <= invocations; i++) {
|
|
105
123
|
this.testCount++;
|
|
124
|
+
// Track tool definition BEFORE each invocation (if available)
|
|
125
|
+
// This detects rug pulls where description mutates after N calls
|
|
126
|
+
if (canTrackDefinitions) {
|
|
127
|
+
try {
|
|
128
|
+
const currentTools = await this.executeWithTimeout(context.listTools(), this.PER_INVOCATION_TIMEOUT);
|
|
129
|
+
const currentTool = currentTools.find((t) => t.name === tool.name);
|
|
130
|
+
if (currentTool) {
|
|
131
|
+
definitionSnapshots.push({
|
|
132
|
+
invocation: i,
|
|
133
|
+
description: currentTool.description,
|
|
134
|
+
inputSchema: currentTool.inputSchema,
|
|
135
|
+
timestamp: Date.now(),
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
catch {
|
|
140
|
+
// Definition tracking failed - continue with response tracking
|
|
141
|
+
this.log(`Warning: Failed to fetch tool definition for ${tool.name} at invocation ${i}`);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
106
144
|
try {
|
|
107
145
|
// P2-2: Use shorter per-invocation timeout (10s vs default 30s)
|
|
108
146
|
const response = await this.executeWithTimeout(context.callTool(tool.name, payload), this.PER_INVOCATION_TIMEOUT);
|
|
@@ -137,12 +175,59 @@ export class TemporalAssessor extends BaseAssessor {
|
|
|
137
175
|
await this.sleep(50);
|
|
138
176
|
}
|
|
139
177
|
}
|
|
178
|
+
// Analyze responses for temporal behavior changes
|
|
140
179
|
const result = this.analyzeResponses(tool, responses);
|
|
180
|
+
// Analyze definitions for mutation (rug pull via description change)
|
|
181
|
+
const definitionMutation = this.detectDefinitionMutation(definitionSnapshots);
|
|
141
182
|
return {
|
|
142
183
|
...result,
|
|
143
184
|
reducedInvocations: isDestructive,
|
|
185
|
+
// Add definition mutation results
|
|
186
|
+
definitionMutated: definitionMutation !== null,
|
|
187
|
+
definitionMutationAt: definitionMutation?.detectedAt ?? null,
|
|
188
|
+
definitionEvidence: definitionMutation
|
|
189
|
+
? {
|
|
190
|
+
baselineDescription: definitionMutation.baselineDescription,
|
|
191
|
+
mutatedDescription: definitionMutation.mutatedDescription,
|
|
192
|
+
baselineSchema: definitionMutation.baselineSchema,
|
|
193
|
+
mutatedSchema: definitionMutation.mutatedSchema,
|
|
194
|
+
}
|
|
195
|
+
: undefined,
|
|
196
|
+
// If definition mutated, mark as vulnerable with DEFINITION pattern
|
|
197
|
+
vulnerable: result.vulnerable || definitionMutation !== null,
|
|
198
|
+
pattern: definitionMutation !== null ? "RUG_PULL_DEFINITION" : result.pattern,
|
|
199
|
+
severity: definitionMutation !== null || result.vulnerable ? "HIGH" : "NONE",
|
|
144
200
|
};
|
|
145
201
|
}
|
|
202
|
+
/**
|
|
203
|
+
* Detect mutations in tool definition across invocation snapshots.
|
|
204
|
+
* DVMCP Challenge 4: Tool descriptions that mutate after N calls.
|
|
205
|
+
*/
|
|
206
|
+
detectDefinitionMutation(snapshots) {
|
|
207
|
+
if (snapshots.length < 2)
|
|
208
|
+
return null;
|
|
209
|
+
const baseline = snapshots[0];
|
|
210
|
+
for (let i = 1; i < snapshots.length; i++) {
|
|
211
|
+
const current = snapshots[i];
|
|
212
|
+
// Check if description changed
|
|
213
|
+
const descriptionChanged = baseline.description !== current.description;
|
|
214
|
+
// Check if schema changed (deep comparison)
|
|
215
|
+
const schemaChanged = JSON.stringify(baseline.inputSchema) !==
|
|
216
|
+
JSON.stringify(current.inputSchema);
|
|
217
|
+
if (descriptionChanged || schemaChanged) {
|
|
218
|
+
return {
|
|
219
|
+
detectedAt: current.invocation,
|
|
220
|
+
baselineDescription: baseline.description,
|
|
221
|
+
mutatedDescription: descriptionChanged
|
|
222
|
+
? current.description
|
|
223
|
+
: undefined,
|
|
224
|
+
baselineSchema: schemaChanged ? baseline.inputSchema : undefined,
|
|
225
|
+
mutatedSchema: schemaChanged ? current.inputSchema : undefined,
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
return null;
|
|
230
|
+
}
|
|
146
231
|
analyzeResponses(tool, responses) {
|
|
147
232
|
if (responses.length === 0) {
|
|
148
233
|
return {
|
|
@@ -380,31 +465,64 @@ export class TemporalAssessor extends BaseAssessor {
|
|
|
380
465
|
}
|
|
381
466
|
return "PASS";
|
|
382
467
|
}
|
|
383
|
-
generateExplanation(rugPullsDetected, results) {
|
|
468
|
+
generateExplanation(rugPullsDetected, definitionMutationsDetected, results) {
|
|
384
469
|
if (results.length === 0) {
|
|
385
470
|
return "No tools were tested for temporal vulnerabilities.";
|
|
386
471
|
}
|
|
387
|
-
|
|
388
|
-
|
|
472
|
+
const parts = [];
|
|
473
|
+
// Report response-based rug pulls
|
|
474
|
+
if (rugPullsDetected > 0) {
|
|
475
|
+
const responseVulnerableTools = results
|
|
476
|
+
.filter((r) => r.vulnerable && r.pattern === "RUG_PULL_TEMPORAL")
|
|
477
|
+
.map((r) => `${r.tool} (changed at invocation ${r.firstDeviationAt})`)
|
|
478
|
+
.join(", ");
|
|
479
|
+
if (responseVulnerableTools) {
|
|
480
|
+
parts.push(`CRITICAL: ${rugPullsDetected} tool(s) showed temporal response changes: ${responseVulnerableTools}`);
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
// Report definition mutations
|
|
484
|
+
if (definitionMutationsDetected > 0) {
|
|
485
|
+
const definitionVulnerableTools = results
|
|
486
|
+
.filter((r) => r.definitionMutated)
|
|
487
|
+
.map((r) => `${r.tool} (description changed at invocation ${r.definitionMutationAt})`)
|
|
488
|
+
.join(", ");
|
|
489
|
+
parts.push(`CRITICAL: ${definitionMutationsDetected} tool(s) mutated their definition/description: ${definitionVulnerableTools}`);
|
|
490
|
+
}
|
|
491
|
+
if (parts.length === 0) {
|
|
492
|
+
return `All ${results.length} tools showed consistent behavior and definitions across repeated invocations.`;
|
|
389
493
|
}
|
|
390
|
-
|
|
391
|
-
.filter((r) => r.vulnerable)
|
|
392
|
-
.map((r) => `${r.tool} (changed at invocation ${r.firstDeviationAt})`)
|
|
393
|
-
.join(", ");
|
|
394
|
-
return `CRITICAL: ${rugPullsDetected} tool(s) showed temporal behavior changes indicating potential rug pull vulnerability: ${vulnerableTools}`;
|
|
494
|
+
return parts.join(" ");
|
|
395
495
|
}
|
|
396
496
|
generateRecommendations(results) {
|
|
397
497
|
const recommendations = [];
|
|
398
|
-
|
|
399
|
-
|
|
498
|
+
// Response-based rug pulls
|
|
499
|
+
const responseVulnerableTools = results.filter((r) => r.vulnerable && r.pattern === "RUG_PULL_TEMPORAL");
|
|
500
|
+
if (responseVulnerableTools.length > 0) {
|
|
400
501
|
recommendations.push("Immediately investigate tools with temporal behavior changes - this pattern is characteristic of rug pull attacks.");
|
|
401
|
-
for (const tool of
|
|
502
|
+
for (const tool of responseVulnerableTools) {
|
|
402
503
|
recommendations.push(`Review ${tool.tool}: behavior changed after ${tool.firstDeviationAt} invocations. Compare safe vs malicious responses in evidence.`);
|
|
403
504
|
}
|
|
404
505
|
recommendations.push("Check for invocation counters, time-based triggers, or state accumulation in the tool implementation.");
|
|
405
506
|
}
|
|
507
|
+
// Definition mutation rug pulls
|
|
508
|
+
const definitionMutatedTools = results.filter((r) => r.definitionMutated);
|
|
509
|
+
if (definitionMutatedTools.length > 0) {
|
|
510
|
+
recommendations.push("CRITICAL: Tool definition/description mutations detected - this is a sophisticated rug pull attack that injects malicious instructions after N calls.");
|
|
511
|
+
for (const tool of definitionMutatedTools) {
|
|
512
|
+
const baseline = tool.definitionEvidence?.baselineDescription
|
|
513
|
+
? `"${tool.definitionEvidence.baselineDescription.substring(0, 100)}..."`
|
|
514
|
+
: "unknown";
|
|
515
|
+
const mutated = tool.definitionEvidence?.mutatedDescription
|
|
516
|
+
? `"${tool.definitionEvidence.mutatedDescription.substring(0, 100)}..."`
|
|
517
|
+
: "unknown";
|
|
518
|
+
recommendations.push(`${tool.tool}: Description changed at invocation ${tool.definitionMutationAt}. Baseline: ${baseline} → Mutated: ${mutated}`);
|
|
519
|
+
}
|
|
520
|
+
recommendations.push("Review tool source code for global state that mutates __doc__, description, or tool metadata based on call count.");
|
|
521
|
+
}
|
|
406
522
|
const errorTools = results.filter((r) => r.errorCount > 0);
|
|
407
|
-
if (errorTools.length > 0 &&
|
|
523
|
+
if (errorTools.length > 0 &&
|
|
524
|
+
responseVulnerableTools.length === 0 &&
|
|
525
|
+
definitionMutatedTools.length === 0) {
|
|
408
526
|
recommendations.push(`${errorTools.length} tool(s) had errors during repeated invocations. Review error handling and rate limiting.`);
|
|
409
527
|
}
|
|
410
528
|
return recommendations;
|
|
@@ -79,6 +79,11 @@ export declare class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
79
79
|
* Now includes alignment status with confidence-aware logic
|
|
80
80
|
*/
|
|
81
81
|
private assessTool;
|
|
82
|
+
/**
|
|
83
|
+
* Scan tool description for poisoning patterns (Issue #8)
|
|
84
|
+
* Detects hidden instructions, override commands, concealment, and exfiltration attempts
|
|
85
|
+
*/
|
|
86
|
+
private scanDescriptionForPoisoning;
|
|
82
87
|
/**
|
|
83
88
|
* Extract annotations from a tool
|
|
84
89
|
* MCP SDK may have annotations in different locations
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ToolAnnotationAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/ToolAnnotationAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,KAAK,EACV,wBAAwB,EACxB,oBAAoB,EAKpB,uBAAuB,EAExB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAChE,OAAO,EACL,KAAK,gBAAgB,EAGtB,MAAM,8BAA8B,CAAC;
|
|
1
|
+
{"version":3,"file":"ToolAnnotationAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/ToolAnnotationAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,KAAK,EACV,wBAAwB,EACxB,oBAAoB,EAKpB,uBAAuB,EAExB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAChE,OAAO,EACL,KAAK,gBAAgB,EAGtB,MAAM,8BAA8B,CAAC;AAgNtC;;GAEG;AACH,MAAM,WAAW,4BAA6B,SAAQ,oBAAoB;IACxE,eAAe,CAAC,EAAE;QAChB,gBAAgB,EAAE,OAAO,CAAC;QAC1B,mBAAmB,EAAE,OAAO,CAAC;QAC7B,UAAU,EAAE,MAAM,CAAC;QACnB,SAAS,EAAE,MAAM,CAAC;QAClB,oBAAoB,EAAE;YACpB,YAAY,CAAC,EAAE,OAAO,CAAC;YACvB,eAAe,CAAC,EAAE,OAAO,CAAC;YAC1B,cAAc,CAAC,EAAE,OAAO,CAAC;SAC1B,CAAC;QACF,oBAAoB,EAAE,OAAO,CAAC;QAC9B,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,MAAM,EAAE,iBAAiB,GAAG,eAAe,CAAC;KAC7C,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,gCAAiC,SAAQ,wBAAwB;IAChF,WAAW,EAAE,4BAA4B,EAAE,CAAC;IAC5C,cAAc,EAAE,OAAO,CAAC;IACxB,2BAA2B,EAAE,4BAA4B,EAAE,CAAC;CAC7D;AAKD,qBAAa,sBAAuB,SAAQ,YAAY;IACtD,OAAO,CAAC,YAAY,CAAC,CAAmB;IACxC,OAAO,CAAC,gBAAgB,CAAmB;gBAE/B,MAAM,EAAE,uBAAuB;IAM3C;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,gBAAgB,GAAG,IAAI;IAK7C;;OAEG;IACH,eAAe,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI;IAK/C;;OAEG;IACH,eAAe,IAAI,OAAO;IAO1B;;OAEG;IACG,MAAM,CACV,OAAO,EAAE,iBAAiB,GACzB,OAAO,CAAC,wBAAwB,GAAG,gCAAgC,CAAC;IAkSvE;;OAEG;YACW,0BAA0B;IA+IxC;;OAEG;IACH,OAAO,CAAC,2BAA2B;IAiCnC;;OAEG;IACH,OAAO,CAAC,+BAA+B;IAoFvC;;;OAGG;IACH,OAAO,CAAC,UAAU;IA2HlB;;;OAGG;IACH,OAAO,CAAC,2BAA2B;IA2DnC;;;;;;;;;OASG;IACH,OAAO,CAAC,kBAAkB;IAyE1B;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAuBzB;;;OAGG;IACH,OAAO,CAAC,aAAa;IAgGrB;;;OAGG;IACH,OAAO,CAAC,yBAAyB;IA0DjC;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAiDxB;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAmC3B;;OAEG;IACH,OAAO,CAAC,uBAAuB;CA2ChC"}
|
|
@@ -12,6 +12,190 @@
|
|
|
12
12
|
*/
|
|
13
13
|
import { BaseAssessor } from "./BaseAssessor.js";
|
|
14
14
|
import { getDefaultCompiledPatterns, matchToolPattern, } from "../config/annotationPatterns.js";
|
|
15
|
+
const DESCRIPTION_POISONING_PATTERNS = [
|
|
16
|
+
// Hidden instruction tags
|
|
17
|
+
{
|
|
18
|
+
name: "hidden_tag",
|
|
19
|
+
pattern: /<HIDDEN>[\s\S]*?<\/HIDDEN>/gi,
|
|
20
|
+
severity: "HIGH",
|
|
21
|
+
category: "hidden_instructions",
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
name: "important_tag",
|
|
25
|
+
pattern: /<IMPORTANT>[\s\S]*?<\/IMPORTANT>/gi,
|
|
26
|
+
severity: "HIGH",
|
|
27
|
+
category: "hidden_instructions",
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
name: "system_tag",
|
|
31
|
+
pattern: /<SYSTEM>[\s\S]*?<\/SYSTEM>/gi,
|
|
32
|
+
severity: "HIGH",
|
|
33
|
+
category: "hidden_instructions",
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
name: "instruction_tag",
|
|
37
|
+
pattern: /<INSTRUCTION>[\s\S]*?<\/INSTRUCTION>/gi,
|
|
38
|
+
severity: "HIGH",
|
|
39
|
+
category: "hidden_instructions",
|
|
40
|
+
},
|
|
41
|
+
// Override commands
|
|
42
|
+
{
|
|
43
|
+
name: "ignore_instructions",
|
|
44
|
+
pattern: /ignore\s+(all\s+)?(previous|prior|above)\s+instructions/i,
|
|
45
|
+
severity: "HIGH",
|
|
46
|
+
category: "override",
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
name: "disregard_instructions",
|
|
50
|
+
pattern: /disregard\s+(all\s+)?(previous|prior)\s+instructions/i,
|
|
51
|
+
severity: "HIGH",
|
|
52
|
+
category: "override",
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
name: "you_are_now",
|
|
56
|
+
pattern: /you\s+are\s+now\s+(a|an|the)/i,
|
|
57
|
+
severity: "MEDIUM",
|
|
58
|
+
category: "override",
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
name: "system_override",
|
|
62
|
+
pattern: /system:\s*override/i,
|
|
63
|
+
severity: "HIGH",
|
|
64
|
+
category: "override",
|
|
65
|
+
},
|
|
66
|
+
// Secrecy/concealment
|
|
67
|
+
{
|
|
68
|
+
name: "do_not_mention",
|
|
69
|
+
pattern: /do\s+not\s+(mention|tell|inform|reveal)/i,
|
|
70
|
+
severity: "HIGH",
|
|
71
|
+
category: "concealment",
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
name: "secretly",
|
|
75
|
+
pattern: /\bsecretly\b/i,
|
|
76
|
+
severity: "MEDIUM",
|
|
77
|
+
category: "concealment",
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
name: "hide_from_user",
|
|
81
|
+
pattern: /hide\s+(this\s+)?from\s+(the\s+)?user/i,
|
|
82
|
+
severity: "HIGH",
|
|
83
|
+
category: "concealment",
|
|
84
|
+
},
|
|
85
|
+
// Data exfiltration
|
|
86
|
+
{
|
|
87
|
+
name: "return_keys",
|
|
88
|
+
pattern: /return\s+(all\s+)?(api\s*)?keys/i,
|
|
89
|
+
severity: "HIGH",
|
|
90
|
+
category: "exfiltration",
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
name: "include_credentials",
|
|
94
|
+
pattern: /include.*(api.?key|credential|password|secret)/i,
|
|
95
|
+
severity: "HIGH",
|
|
96
|
+
category: "exfiltration",
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
name: "reveal_secrets",
|
|
100
|
+
pattern: /reveal\s+(all\s+)?(secrets|credentials|api\s*keys)/i,
|
|
101
|
+
severity: "HIGH",
|
|
102
|
+
category: "exfiltration",
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
name: "access_internal_secrets",
|
|
106
|
+
pattern: /access\s+(the\s+)?internal\s+(api\s*)?(key|secret|credential|password|token)/i,
|
|
107
|
+
severity: "HIGH",
|
|
108
|
+
category: "exfiltration",
|
|
109
|
+
},
|
|
110
|
+
// Delimiter injection
|
|
111
|
+
{
|
|
112
|
+
name: "system_codeblock",
|
|
113
|
+
pattern: /```system[\s\S]*?```/gi,
|
|
114
|
+
severity: "HIGH",
|
|
115
|
+
category: "delimiter",
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
name: "inst_tags",
|
|
119
|
+
pattern: /\[INST\][\s\S]*?\[\/INST\]/gi,
|
|
120
|
+
severity: "HIGH",
|
|
121
|
+
category: "delimiter",
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
name: "chatml_system",
|
|
125
|
+
pattern: /<\|im_start\|>system/gi,
|
|
126
|
+
severity: "HIGH",
|
|
127
|
+
category: "delimiter",
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
name: "llama_sys",
|
|
131
|
+
pattern: /<<SYS>>/gi,
|
|
132
|
+
severity: "HIGH",
|
|
133
|
+
category: "delimiter",
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
name: "user_assistant_block",
|
|
137
|
+
pattern: /\[USER\][\s\S]*?\[ASSISTANT\]/gi,
|
|
138
|
+
severity: "HIGH",
|
|
139
|
+
category: "delimiter",
|
|
140
|
+
},
|
|
141
|
+
// Role/persona injection (Warning #4)
|
|
142
|
+
{
|
|
143
|
+
name: "act_as",
|
|
144
|
+
pattern: /act\s+(like|as)\s+(a|an|the)/i,
|
|
145
|
+
severity: "MEDIUM",
|
|
146
|
+
category: "override",
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
name: "pretend_to_be",
|
|
150
|
+
pattern: /pretend\s+(to\s+be|you\s*'?re)/i,
|
|
151
|
+
severity: "MEDIUM",
|
|
152
|
+
category: "override",
|
|
153
|
+
},
|
|
154
|
+
{
|
|
155
|
+
name: "roleplay_as",
|
|
156
|
+
pattern: /role\s*play\s+(as|like)/i,
|
|
157
|
+
severity: "MEDIUM",
|
|
158
|
+
category: "override",
|
|
159
|
+
},
|
|
160
|
+
{
|
|
161
|
+
name: "new_task",
|
|
162
|
+
pattern: /new\s+(task|instruction|objective):\s*/i,
|
|
163
|
+
severity: "HIGH",
|
|
164
|
+
category: "override",
|
|
165
|
+
},
|
|
166
|
+
// Encoding bypass detection (Warning #1)
|
|
167
|
+
{
|
|
168
|
+
name: "base64_encoded_block",
|
|
169
|
+
pattern: /[A-Za-z0-9+/]{50,}={0,2}/g, // Large Base64 strings (50+ chars)
|
|
170
|
+
severity: "MEDIUM",
|
|
171
|
+
category: "encoding_bypass",
|
|
172
|
+
},
|
|
173
|
+
{
|
|
174
|
+
name: "unicode_escape_sequence",
|
|
175
|
+
pattern: /(?:\\u[0-9a-fA-F]{4}){3,}/gi, // 3+ consecutive Unicode escapes
|
|
176
|
+
severity: "MEDIUM",
|
|
177
|
+
category: "encoding_bypass",
|
|
178
|
+
},
|
|
179
|
+
{
|
|
180
|
+
name: "html_entity_block",
|
|
181
|
+
pattern: /(?:&#x?[0-9a-fA-F]+;){3,}/gi, // 3+ consecutive HTML entities
|
|
182
|
+
severity: "MEDIUM",
|
|
183
|
+
category: "encoding_bypass",
|
|
184
|
+
},
|
|
185
|
+
// Typoglycemia/evasion patterns (Warning #2)
|
|
186
|
+
{
|
|
187
|
+
name: "ignore_instructions_typo",
|
|
188
|
+
pattern: /ign[o0]r[e3]?\s+(all\s+)?(pr[e3]v[i1][o0]us|pr[i1][o0]r|ab[o0]v[e3])\s+[i1]nstruct[i1][o0]ns?/i,
|
|
189
|
+
severity: "HIGH",
|
|
190
|
+
category: "override",
|
|
191
|
+
},
|
|
192
|
+
{
|
|
193
|
+
name: "disregard_typo",
|
|
194
|
+
pattern: /d[i1]sr[e3]g[a4]rd\s+(all\s+)?(pr[e3]v[i1][o0]us|pr[i1][o0]r)\s+[i1]nstruct[i1][o0]ns?/i,
|
|
195
|
+
severity: "HIGH",
|
|
196
|
+
category: "override",
|
|
197
|
+
},
|
|
198
|
+
];
|
|
15
199
|
// NOTE: Pattern arrays moved to config/annotationPatterns.ts for configurability
|
|
16
200
|
// The patterns are now loaded from getDefaultCompiledPatterns() or custom config
|
|
17
201
|
export class ToolAnnotationAssessor extends BaseAssessor {
|
|
@@ -53,6 +237,7 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
53
237
|
let annotatedCount = 0;
|
|
54
238
|
let missingAnnotationsCount = 0;
|
|
55
239
|
let misalignedAnnotationsCount = 0;
|
|
240
|
+
let poisonedDescriptionsCount = 0;
|
|
56
241
|
// Track annotation sources
|
|
57
242
|
const annotationSourceCounts = {
|
|
58
243
|
mcp: 0,
|
|
@@ -128,6 +313,20 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
128
313
|
else {
|
|
129
314
|
annotationSourceCounts.none++;
|
|
130
315
|
}
|
|
316
|
+
// Track and emit poisoned description detection (Issue #8)
|
|
317
|
+
if (latestResult.descriptionPoisoning?.detected) {
|
|
318
|
+
poisonedDescriptionsCount++;
|
|
319
|
+
this.log(`POISONED DESCRIPTION DETECTED: ${tool.name} contains suspicious patterns`);
|
|
320
|
+
if (context.onProgress) {
|
|
321
|
+
context.onProgress({
|
|
322
|
+
type: "annotation_poisoned",
|
|
323
|
+
tool: tool.name,
|
|
324
|
+
description: tool.description,
|
|
325
|
+
patterns: latestResult.descriptionPoisoning.patterns,
|
|
326
|
+
riskLevel: latestResult.descriptionPoisoning.riskLevel,
|
|
327
|
+
});
|
|
328
|
+
}
|
|
329
|
+
}
|
|
131
330
|
// Emit annotation_missing event with tool details
|
|
132
331
|
if (!latestResult.hasAnnotations) {
|
|
133
332
|
if (context.onProgress && latestResult.inferredBehavior) {
|
|
@@ -231,7 +430,7 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
231
430
|
const recommendations = this.generateRecommendations(toolResults);
|
|
232
431
|
// Calculate new metrics and alignment breakdown
|
|
233
432
|
const { metrics, alignmentBreakdown } = this.calculateMetrics(toolResults, context.tools.length);
|
|
234
|
-
this.log(`Assessment complete: ${annotatedCount}/${context.tools.length} tools annotated, ${misalignedAnnotationsCount} misaligned, ${alignmentBreakdown.reviewRecommended} need review`);
|
|
433
|
+
this.log(`Assessment complete: ${annotatedCount}/${context.tools.length} tools annotated, ${misalignedAnnotationsCount} misaligned, ${alignmentBreakdown.reviewRecommended} need review, ${poisonedDescriptionsCount} poisoned`);
|
|
235
434
|
// Return enhanced assessment if Claude was used
|
|
236
435
|
if (useClaudeInference) {
|
|
237
436
|
const highConfidenceMisalignments = toolResults.filter((r) => r.claudeInference &&
|
|
@@ -249,6 +448,7 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
249
448
|
metrics,
|
|
250
449
|
alignmentBreakdown,
|
|
251
450
|
annotationSources: annotationSourceCounts,
|
|
451
|
+
poisonedDescriptionsDetected: poisonedDescriptionsCount,
|
|
252
452
|
claudeEnhanced: true,
|
|
253
453
|
highConfidenceMisalignments,
|
|
254
454
|
};
|
|
@@ -264,6 +464,7 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
264
464
|
metrics,
|
|
265
465
|
alignmentBreakdown,
|
|
266
466
|
annotationSources: annotationSourceCounts,
|
|
467
|
+
poisonedDescriptionsDetected: poisonedDescriptionsCount,
|
|
267
468
|
};
|
|
268
469
|
}
|
|
269
470
|
/**
|
|
@@ -516,6 +717,12 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
516
717
|
alignmentStatus = "MISALIGNED";
|
|
517
718
|
}
|
|
518
719
|
}
|
|
720
|
+
// Scan for description poisoning (Issue #8)
|
|
721
|
+
const descriptionPoisoning = this.scanDescriptionForPoisoning(tool);
|
|
722
|
+
if (descriptionPoisoning.detected) {
|
|
723
|
+
issues.push(`Tool description contains suspicious patterns: ${descriptionPoisoning.patterns.map((p) => p.name).join(", ")}`);
|
|
724
|
+
recommendations.push(`Review ${tool.name} description for potential prompt injection or hidden instructions`);
|
|
725
|
+
}
|
|
519
726
|
return {
|
|
520
727
|
toolName: tool.name,
|
|
521
728
|
hasAnnotations,
|
|
@@ -525,6 +732,49 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
525
732
|
alignmentStatus,
|
|
526
733
|
issues,
|
|
527
734
|
recommendations,
|
|
735
|
+
descriptionPoisoning,
|
|
736
|
+
};
|
|
737
|
+
}
|
|
738
|
+
/**
|
|
739
|
+
* Scan tool description for poisoning patterns (Issue #8)
|
|
740
|
+
* Detects hidden instructions, override commands, concealment, and exfiltration attempts
|
|
741
|
+
*/
|
|
742
|
+
scanDescriptionForPoisoning(tool) {
|
|
743
|
+
const description = tool.description || "";
|
|
744
|
+
const matches = [];
|
|
745
|
+
for (const patternDef of DESCRIPTION_POISONING_PATTERNS) {
|
|
746
|
+
// Create a fresh regex to reset lastIndex
|
|
747
|
+
const regex = new RegExp(patternDef.pattern.source, patternDef.pattern.flags);
|
|
748
|
+
// Loop to find all matches (not just first)
|
|
749
|
+
let match;
|
|
750
|
+
while ((match = regex.exec(description)) !== null) {
|
|
751
|
+
matches.push({
|
|
752
|
+
name: patternDef.name,
|
|
753
|
+
pattern: patternDef.pattern.toString(),
|
|
754
|
+
severity: patternDef.severity,
|
|
755
|
+
category: patternDef.category,
|
|
756
|
+
evidence: match[0].substring(0, 100) + (match[0].length > 100 ? "..." : ""),
|
|
757
|
+
});
|
|
758
|
+
// Prevent infinite loop for patterns without 'g' flag
|
|
759
|
+
if (!regex.global)
|
|
760
|
+
break;
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
// Determine overall risk level based on highest severity match
|
|
764
|
+
let riskLevel = "NONE";
|
|
765
|
+
if (matches.some((m) => m.severity === "HIGH")) {
|
|
766
|
+
riskLevel = "HIGH";
|
|
767
|
+
}
|
|
768
|
+
else if (matches.some((m) => m.severity === "MEDIUM")) {
|
|
769
|
+
riskLevel = "MEDIUM";
|
|
770
|
+
}
|
|
771
|
+
else if (matches.length > 0) {
|
|
772
|
+
riskLevel = "LOW";
|
|
773
|
+
}
|
|
774
|
+
return {
|
|
775
|
+
detected: matches.length > 0,
|
|
776
|
+
patterns: matches,
|
|
777
|
+
riskLevel,
|
|
528
778
|
};
|
|
529
779
|
}
|
|
530
780
|
/**
|
|
@@ -700,6 +950,11 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
700
950
|
if (totalTools === 0)
|
|
701
951
|
return "PASS";
|
|
702
952
|
const annotatedCount = results.filter((r) => r.hasAnnotations).length;
|
|
953
|
+
// Check for poisoned descriptions (Issue #8) - critical security issue
|
|
954
|
+
const poisonedCount = results.filter((r) => r.descriptionPoisoning?.detected === true).length;
|
|
955
|
+
if (poisonedCount > 0) {
|
|
956
|
+
return "FAIL";
|
|
957
|
+
}
|
|
703
958
|
// Only count actual MISALIGNED, not REVIEW_RECOMMENDED
|
|
704
959
|
const misalignedCount = results.filter((r) => r.alignmentStatus === "MISALIGNED").length;
|
|
705
960
|
// Count high-confidence destructive tools without proper hints
|