@bryan-thompson/inspector-assessment-client 1.18.1 → 1.19.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/{OAuthCallback-DhwTOA1q.js → OAuthCallback-BDIUPkR-.js} +1 -1
- package/dist/assets/{OAuthDebugCallback-DzopkA29.js → OAuthDebugCallback-DPLV3zir.js} +1 -1
- package/dist/assets/{index-CzoGuYPy.css → index-32-uLPhe.css} +3 -0
- package/dist/assets/{index-zBRaltBB.js → index-DKTSB7VQ.js} +460 -27
- package/dist/index.html +2 -2
- package/lib/lib/assessmentTypes.d.ts +45 -2
- package/lib/lib/assessmentTypes.d.ts.map +1 -1
- package/lib/lib/securityPatterns.d.ts.map +1 -1
- package/lib/lib/securityPatterns.js +82 -2
- package/lib/services/assessment/AssessmentOrchestrator.d.ts +1 -0
- package/lib/services/assessment/AssessmentOrchestrator.d.ts.map +1 -1
- package/lib/services/assessment/AssessmentOrchestrator.js +5 -2
- package/lib/services/assessment/LanguageAwarePayloadGenerator.d.ts +41 -0
- package/lib/services/assessment/LanguageAwarePayloadGenerator.d.ts.map +1 -0
- package/lib/services/assessment/LanguageAwarePayloadGenerator.js +258 -0
- package/lib/services/assessment/ToolClassifier.d.ts +1 -0
- package/lib/services/assessment/ToolClassifier.d.ts.map +1 -1
- package/lib/services/assessment/ToolClassifier.js +26 -0
- package/lib/services/assessment/modules/ResourceAssessor.d.ts +5 -0
- package/lib/services/assessment/modules/ResourceAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/ResourceAssessor.js +161 -4
- package/lib/services/assessment/modules/SecurityAssessor.d.ts +1 -0
- package/lib/services/assessment/modules/SecurityAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/SecurityAssessor.js +49 -14
- package/lib/services/assessment/modules/TemporalAssessor.d.ts +5 -0
- package/lib/services/assessment/modules/TemporalAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/TemporalAssessor.js +133 -15
- package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts +5 -0
- package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/ToolAnnotationAssessor.js +256 -1
- package/package.json +1 -1
|
@@ -13,7 +13,9 @@ import { BaseAssessor } from "./BaseAssessor.js";
|
|
|
13
13
|
import { getAllAttackPatterns, getPayloadsForAttack, } from "../../../lib/securityPatterns.js";
|
|
14
14
|
import { ToolClassifier, ToolCategory } from "../ToolClassifier.js";
|
|
15
15
|
import { createConcurrencyLimit } from "../lib/concurrencyLimit.js";
|
|
16
|
+
import { LanguageAwarePayloadGenerator } from "../LanguageAwarePayloadGenerator.js";
|
|
16
17
|
export class SecurityAssessor extends BaseAssessor {
|
|
18
|
+
languageGenerator = new LanguageAwarePayloadGenerator();
|
|
17
19
|
async assess(context) {
|
|
18
20
|
// Select tools for testing first
|
|
19
21
|
const toolsToTest = this.selectToolsForTesting(context.tools);
|
|
@@ -458,9 +460,7 @@ export class SecurityAssessor extends BaseAssessor {
|
|
|
458
460
|
/error GETting.*endpoint/i, // Transport layer GET errors (requires 'endpoint' to avoid false positives)
|
|
459
461
|
/service unavailable/i, // HTTP 503 (server down)
|
|
460
462
|
/gateway timeout/i, // HTTP 504 (gateway timeout)
|
|
461
|
-
/unknown tool:/i, //
|
|
462
|
-
/tool.*not found/i, // Alternative phrasing for missing tool
|
|
463
|
-
/tool.*does not exist/i, // Alternative phrasing for missing tool
|
|
463
|
+
/unknown tool:/i, // MCP spec format: "Unknown tool: <name>"
|
|
464
464
|
/no such tool/i, // Alternative phrasing for missing tool
|
|
465
465
|
];
|
|
466
466
|
// Check unambiguous patterns first
|
|
@@ -508,9 +508,7 @@ export class SecurityAssessor extends BaseAssessor {
|
|
|
508
508
|
/error GETting/i, // Transport layer GET errors
|
|
509
509
|
/service unavailable/i, // HTTP 503 (server down)
|
|
510
510
|
/gateway timeout/i, // HTTP 504 (gateway timeout)
|
|
511
|
-
/unknown tool:/i, //
|
|
512
|
-
/tool.*not found/i, // Alternative phrasing for missing tool
|
|
513
|
-
/tool.*does not exist/i, // Alternative phrasing for missing tool
|
|
511
|
+
/unknown tool:/i, // MCP spec format: "Unknown tool: <name>"
|
|
514
512
|
/no such tool/i, // Alternative phrasing for missing tool
|
|
515
513
|
];
|
|
516
514
|
// Check unambiguous patterns first
|
|
@@ -1089,10 +1087,14 @@ export class SecurityAssessor extends BaseAssessor {
|
|
|
1089
1087
|
/action\s+received:/i,
|
|
1090
1088
|
/input\s+received:/i,
|
|
1091
1089
|
/request\s+received:/i,
|
|
1092
|
-
// Explicit safety indicators in JSON responses
|
|
1093
|
-
|
|
1094
|
-
/"
|
|
1095
|
-
/"status"
|
|
1090
|
+
// Explicit safety indicators in JSON responses (context-aware to avoid matching unrelated fields)
|
|
1091
|
+
// Require safety-related context: message, result, status, stored, reflected, etc.
|
|
1092
|
+
/"safe"\s*:\s*true[^}]*("message"|"result"|"status"|"response")/i,
|
|
1093
|
+
/("message"|"result"|"status"|"response")[^}]*"safe"\s*:\s*true/i,
|
|
1094
|
+
/"vulnerable"\s*:\s*false[^}]*("safe"|"stored"|"reflected"|"status")/i,
|
|
1095
|
+
/("safe"|"stored"|"reflected"|"status")[^}]*"vulnerable"\s*:\s*false/i,
|
|
1096
|
+
/"status"\s*:\s*"acknowledged"[^}]*("message"|"result"|"safe")/i,
|
|
1097
|
+
/("message"|"result"|"safe")[^}]*"status"\s*:\s*"acknowledged"/i,
|
|
1096
1098
|
];
|
|
1097
1099
|
const reflectionPatterns = [
|
|
1098
1100
|
...statusPatterns,
|
|
@@ -1328,8 +1330,41 @@ export class SecurityAssessor extends BaseAssessor {
|
|
|
1328
1330
|
const params = {};
|
|
1329
1331
|
const targetParamTypes = payload.parameterTypes || [];
|
|
1330
1332
|
let payloadInjected = false;
|
|
1331
|
-
//
|
|
1332
|
-
|
|
1333
|
+
// NEW: Check for language-specific code execution parameters first
|
|
1334
|
+
// This enables detection of vulnerabilities in tools expecting Python/JS/SQL code
|
|
1335
|
+
for (const [key, prop] of Object.entries(schema.properties)) {
|
|
1336
|
+
const propSchema = prop;
|
|
1337
|
+
if (propSchema.type !== "string")
|
|
1338
|
+
continue;
|
|
1339
|
+
const detectedLanguage = this.languageGenerator.detectLanguage(key, tool.name, tool.description);
|
|
1340
|
+
// If we detect a specific language (not generic), use language-appropriate payloads
|
|
1341
|
+
if (detectedLanguage !== "generic" && !payloadInjected) {
|
|
1342
|
+
const languagePayloads = this.languageGenerator.getPayloadsForLanguage(detectedLanguage);
|
|
1343
|
+
if (languagePayloads.length > 0) {
|
|
1344
|
+
// Select a payload that targets similar behavior as the current attack pattern
|
|
1345
|
+
// (e.g., if testing command injection, use a command-executing payload)
|
|
1346
|
+
const payloadLower = payload.payload.toLowerCase();
|
|
1347
|
+
const isCommandTest = payloadLower.includes("whoami") ||
|
|
1348
|
+
payloadLower.includes("passwd") ||
|
|
1349
|
+
payloadLower.includes("id");
|
|
1350
|
+
// Find matching language payload based on test intent
|
|
1351
|
+
let selectedPayload = languagePayloads[0]; // Default to first
|
|
1352
|
+
if (isCommandTest) {
|
|
1353
|
+
// Prefer command execution payloads
|
|
1354
|
+
const cmdPayload = languagePayloads.find((lp) => lp.payload.includes("whoami") ||
|
|
1355
|
+
lp.payload.includes("subprocess") ||
|
|
1356
|
+
lp.payload.includes("execSync"));
|
|
1357
|
+
if (cmdPayload)
|
|
1358
|
+
selectedPayload = cmdPayload;
|
|
1359
|
+
}
|
|
1360
|
+
params[key] = selectedPayload.payload;
|
|
1361
|
+
payloadInjected = true;
|
|
1362
|
+
break;
|
|
1363
|
+
}
|
|
1364
|
+
}
|
|
1365
|
+
}
|
|
1366
|
+
// Fall back to parameterTypes matching if no language-specific payload was used
|
|
1367
|
+
if (!payloadInjected && targetParamTypes.length > 0) {
|
|
1333
1368
|
// Payload is parameter-specific (e.g., URLs only for "url" params)
|
|
1334
1369
|
for (const [key, prop] of Object.entries(schema.properties)) {
|
|
1335
1370
|
const propSchema = prop;
|
|
@@ -1343,8 +1378,8 @@ export class SecurityAssessor extends BaseAssessor {
|
|
|
1343
1378
|
}
|
|
1344
1379
|
}
|
|
1345
1380
|
}
|
|
1346
|
-
|
|
1347
|
-
|
|
1381
|
+
// Fall back to generic payload - inject into first string parameter (original behavior)
|
|
1382
|
+
if (!payloadInjected) {
|
|
1348
1383
|
for (const [key, prop] of Object.entries(schema.properties)) {
|
|
1349
1384
|
const propSchema = prop;
|
|
1350
1385
|
if (propSchema.type === "string" && !payloadInjected) {
|
|
@@ -28,6 +28,11 @@ export declare class TemporalAssessor extends BaseAssessor {
|
|
|
28
28
|
constructor(config: AssessmentConfiguration);
|
|
29
29
|
assess(context: AssessmentContext): Promise<TemporalAssessment>;
|
|
30
30
|
private assessTool;
|
|
31
|
+
/**
|
|
32
|
+
* Detect mutations in tool definition across invocation snapshots.
|
|
33
|
+
* DVMCP Challenge 4: Tool descriptions that mutate after N calls.
|
|
34
|
+
*/
|
|
35
|
+
private detectDefinitionMutation;
|
|
31
36
|
private analyzeResponses;
|
|
32
37
|
/**
|
|
33
38
|
* Generate a safe/neutral payload for a tool based on its input schema.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"TemporalAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/TemporalAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EACL,uBAAuB,EAEvB,kBAAkB,EAEnB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"TemporalAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/TemporalAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EACL,uBAAuB,EAEvB,kBAAkB,EAEnB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AA+B9C,qBAAa,gBAAiB,SAAQ,YAAY;IAChD,OAAO,CAAC,kBAAkB,CAAS;IAGnC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAoBnC;IAGF,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CAAU;IAEjD;;;;;;;;;;OAUG;IACH,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CASrC;gBAEU,MAAM,EAAE,uBAAuB;IAKrC,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC;YAqEvD,UAAU;IAuHxB;;;OAGG;IACH,OAAO,CAAC,wBAAwB;IAkChC,OAAO,CAAC,gBAAgB;IAmFxB;;;OAGG;IACH,OAAO,CAAC,mBAAmB;IAsC3B;;;;OAIG;IACH,OAAO,CAAC,iBAAiB;IAoDzB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAKzB;;;;OAIG;IACH,OAAO,CAAC,cAAc;IAYtB;;;;;;OAMG;IACH,OAAO,CAAC,cAAc;IAuBtB;;;OAGG;IACH,OAAO,CAAC,iBAAiB;IAiCzB,OAAO,CAAC,uBAAuB;IAa/B,OAAO,CAAC,mBAAmB;IA+C3B,OAAO,CAAC,uBAAuB;CA+DhC"}
|
|
@@ -63,7 +63,15 @@ export class TemporalAssessor extends BaseAssessor {
|
|
|
63
63
|
async assess(context) {
|
|
64
64
|
const results = [];
|
|
65
65
|
let rugPullsDetected = 0;
|
|
66
|
-
|
|
66
|
+
let definitionMutationsDetected = 0;
|
|
67
|
+
// Check if definition tracking is available
|
|
68
|
+
const canTrackDefinitions = typeof context.listTools === "function";
|
|
69
|
+
if (canTrackDefinitions) {
|
|
70
|
+
this.log(`Starting temporal assessment with ${this.invocationsPerTool} invocations per tool (definition tracking enabled)`);
|
|
71
|
+
}
|
|
72
|
+
else {
|
|
73
|
+
this.log(`Starting temporal assessment with ${this.invocationsPerTool} invocations per tool (definition tracking unavailable)`);
|
|
74
|
+
}
|
|
67
75
|
for (const tool of context.tools) {
|
|
68
76
|
// Skip if tool selection is configured and this tool isn't selected
|
|
69
77
|
if (this.config.selectedToolsForTesting !== undefined &&
|
|
@@ -76,33 +84,63 @@ export class TemporalAssessor extends BaseAssessor {
|
|
|
76
84
|
rugPullsDetected++;
|
|
77
85
|
this.log(`RUG PULL DETECTED: ${tool.name} changed behavior at invocation ${result.firstDeviationAt}`);
|
|
78
86
|
}
|
|
87
|
+
if (result.definitionMutated) {
|
|
88
|
+
definitionMutationsDetected++;
|
|
89
|
+
this.log(`DEFINITION MUTATION DETECTED: ${tool.name} changed description at invocation ${result.definitionMutationAt}`);
|
|
90
|
+
}
|
|
79
91
|
// Respect delay between tests
|
|
80
92
|
if (this.config.delayBetweenTests) {
|
|
81
93
|
await this.sleep(this.config.delayBetweenTests);
|
|
82
94
|
}
|
|
83
95
|
}
|
|
84
|
-
|
|
96
|
+
// Status fails if either response or definition mutations detected
|
|
97
|
+
const totalVulnerabilities = rugPullsDetected + definitionMutationsDetected;
|
|
98
|
+
const status = this.determineTemporalStatus(totalVulnerabilities, results);
|
|
85
99
|
return {
|
|
86
100
|
toolsTested: results.length,
|
|
87
101
|
invocationsPerTool: this.invocationsPerTool,
|
|
88
102
|
rugPullsDetected,
|
|
103
|
+
definitionMutationsDetected,
|
|
89
104
|
details: results,
|
|
90
105
|
status,
|
|
91
|
-
explanation: this.generateExplanation(rugPullsDetected, results),
|
|
106
|
+
explanation: this.generateExplanation(rugPullsDetected, definitionMutationsDetected, results),
|
|
92
107
|
recommendations: this.generateRecommendations(results),
|
|
93
108
|
};
|
|
94
109
|
}
|
|
95
110
|
async assessTool(context, tool) {
|
|
96
111
|
const responses = [];
|
|
112
|
+
const definitionSnapshots = [];
|
|
97
113
|
const payload = this.generateSafePayload(tool);
|
|
98
114
|
// Reduce invocations for potentially destructive tools
|
|
99
115
|
const isDestructive = this.isDestructiveTool(tool);
|
|
100
116
|
const invocations = isDestructive
|
|
101
117
|
? Math.min(5, this.invocationsPerTool)
|
|
102
118
|
: this.invocationsPerTool;
|
|
119
|
+
// Check if definition tracking is available
|
|
120
|
+
const canTrackDefinitions = typeof context.listTools === "function";
|
|
103
121
|
this.log(`Testing ${tool.name} with ${invocations} invocations${isDestructive ? " (reduced - destructive)" : ""}`);
|
|
104
122
|
for (let i = 1; i <= invocations; i++) {
|
|
105
123
|
this.testCount++;
|
|
124
|
+
// Track tool definition BEFORE each invocation (if available)
|
|
125
|
+
// This detects rug pulls where description mutates after N calls
|
|
126
|
+
if (canTrackDefinitions) {
|
|
127
|
+
try {
|
|
128
|
+
const currentTools = await this.executeWithTimeout(context.listTools(), this.PER_INVOCATION_TIMEOUT);
|
|
129
|
+
const currentTool = currentTools.find((t) => t.name === tool.name);
|
|
130
|
+
if (currentTool) {
|
|
131
|
+
definitionSnapshots.push({
|
|
132
|
+
invocation: i,
|
|
133
|
+
description: currentTool.description,
|
|
134
|
+
inputSchema: currentTool.inputSchema,
|
|
135
|
+
timestamp: Date.now(),
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
catch {
|
|
140
|
+
// Definition tracking failed - continue with response tracking
|
|
141
|
+
this.log(`Warning: Failed to fetch tool definition for ${tool.name} at invocation ${i}`);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
106
144
|
try {
|
|
107
145
|
// P2-2: Use shorter per-invocation timeout (10s vs default 30s)
|
|
108
146
|
const response = await this.executeWithTimeout(context.callTool(tool.name, payload), this.PER_INVOCATION_TIMEOUT);
|
|
@@ -137,12 +175,59 @@ export class TemporalAssessor extends BaseAssessor {
|
|
|
137
175
|
await this.sleep(50);
|
|
138
176
|
}
|
|
139
177
|
}
|
|
178
|
+
// Analyze responses for temporal behavior changes
|
|
140
179
|
const result = this.analyzeResponses(tool, responses);
|
|
180
|
+
// Analyze definitions for mutation (rug pull via description change)
|
|
181
|
+
const definitionMutation = this.detectDefinitionMutation(definitionSnapshots);
|
|
141
182
|
return {
|
|
142
183
|
...result,
|
|
143
184
|
reducedInvocations: isDestructive,
|
|
185
|
+
// Add definition mutation results
|
|
186
|
+
definitionMutated: definitionMutation !== null,
|
|
187
|
+
definitionMutationAt: definitionMutation?.detectedAt ?? null,
|
|
188
|
+
definitionEvidence: definitionMutation
|
|
189
|
+
? {
|
|
190
|
+
baselineDescription: definitionMutation.baselineDescription,
|
|
191
|
+
mutatedDescription: definitionMutation.mutatedDescription,
|
|
192
|
+
baselineSchema: definitionMutation.baselineSchema,
|
|
193
|
+
mutatedSchema: definitionMutation.mutatedSchema,
|
|
194
|
+
}
|
|
195
|
+
: undefined,
|
|
196
|
+
// If definition mutated, mark as vulnerable with DEFINITION pattern
|
|
197
|
+
vulnerable: result.vulnerable || definitionMutation !== null,
|
|
198
|
+
pattern: definitionMutation !== null ? "RUG_PULL_DEFINITION" : result.pattern,
|
|
199
|
+
severity: definitionMutation !== null || result.vulnerable ? "HIGH" : "NONE",
|
|
144
200
|
};
|
|
145
201
|
}
|
|
202
|
+
/**
|
|
203
|
+
* Detect mutations in tool definition across invocation snapshots.
|
|
204
|
+
* DVMCP Challenge 4: Tool descriptions that mutate after N calls.
|
|
205
|
+
*/
|
|
206
|
+
detectDefinitionMutation(snapshots) {
|
|
207
|
+
if (snapshots.length < 2)
|
|
208
|
+
return null;
|
|
209
|
+
const baseline = snapshots[0];
|
|
210
|
+
for (let i = 1; i < snapshots.length; i++) {
|
|
211
|
+
const current = snapshots[i];
|
|
212
|
+
// Check if description changed
|
|
213
|
+
const descriptionChanged = baseline.description !== current.description;
|
|
214
|
+
// Check if schema changed (deep comparison)
|
|
215
|
+
const schemaChanged = JSON.stringify(baseline.inputSchema) !==
|
|
216
|
+
JSON.stringify(current.inputSchema);
|
|
217
|
+
if (descriptionChanged || schemaChanged) {
|
|
218
|
+
return {
|
|
219
|
+
detectedAt: current.invocation,
|
|
220
|
+
baselineDescription: baseline.description,
|
|
221
|
+
mutatedDescription: descriptionChanged
|
|
222
|
+
? current.description
|
|
223
|
+
: undefined,
|
|
224
|
+
baselineSchema: schemaChanged ? baseline.inputSchema : undefined,
|
|
225
|
+
mutatedSchema: schemaChanged ? current.inputSchema : undefined,
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
return null;
|
|
230
|
+
}
|
|
146
231
|
analyzeResponses(tool, responses) {
|
|
147
232
|
if (responses.length === 0) {
|
|
148
233
|
return {
|
|
@@ -380,31 +465,64 @@ export class TemporalAssessor extends BaseAssessor {
|
|
|
380
465
|
}
|
|
381
466
|
return "PASS";
|
|
382
467
|
}
|
|
383
|
-
generateExplanation(rugPullsDetected, results) {
|
|
468
|
+
generateExplanation(rugPullsDetected, definitionMutationsDetected, results) {
|
|
384
469
|
if (results.length === 0) {
|
|
385
470
|
return "No tools were tested for temporal vulnerabilities.";
|
|
386
471
|
}
|
|
387
|
-
|
|
388
|
-
|
|
472
|
+
const parts = [];
|
|
473
|
+
// Report response-based rug pulls
|
|
474
|
+
if (rugPullsDetected > 0) {
|
|
475
|
+
const responseVulnerableTools = results
|
|
476
|
+
.filter((r) => r.vulnerable && r.pattern === "RUG_PULL_TEMPORAL")
|
|
477
|
+
.map((r) => `${r.tool} (changed at invocation ${r.firstDeviationAt})`)
|
|
478
|
+
.join(", ");
|
|
479
|
+
if (responseVulnerableTools) {
|
|
480
|
+
parts.push(`CRITICAL: ${rugPullsDetected} tool(s) showed temporal response changes: ${responseVulnerableTools}`);
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
// Report definition mutations
|
|
484
|
+
if (definitionMutationsDetected > 0) {
|
|
485
|
+
const definitionVulnerableTools = results
|
|
486
|
+
.filter((r) => r.definitionMutated)
|
|
487
|
+
.map((r) => `${r.tool} (description changed at invocation ${r.definitionMutationAt})`)
|
|
488
|
+
.join(", ");
|
|
489
|
+
parts.push(`CRITICAL: ${definitionMutationsDetected} tool(s) mutated their definition/description: ${definitionVulnerableTools}`);
|
|
490
|
+
}
|
|
491
|
+
if (parts.length === 0) {
|
|
492
|
+
return `All ${results.length} tools showed consistent behavior and definitions across repeated invocations.`;
|
|
389
493
|
}
|
|
390
|
-
|
|
391
|
-
.filter((r) => r.vulnerable)
|
|
392
|
-
.map((r) => `${r.tool} (changed at invocation ${r.firstDeviationAt})`)
|
|
393
|
-
.join(", ");
|
|
394
|
-
return `CRITICAL: ${rugPullsDetected} tool(s) showed temporal behavior changes indicating potential rug pull vulnerability: ${vulnerableTools}`;
|
|
494
|
+
return parts.join(" ");
|
|
395
495
|
}
|
|
396
496
|
generateRecommendations(results) {
|
|
397
497
|
const recommendations = [];
|
|
398
|
-
|
|
399
|
-
|
|
498
|
+
// Response-based rug pulls
|
|
499
|
+
const responseVulnerableTools = results.filter((r) => r.vulnerable && r.pattern === "RUG_PULL_TEMPORAL");
|
|
500
|
+
if (responseVulnerableTools.length > 0) {
|
|
400
501
|
recommendations.push("Immediately investigate tools with temporal behavior changes - this pattern is characteristic of rug pull attacks.");
|
|
401
|
-
for (const tool of
|
|
502
|
+
for (const tool of responseVulnerableTools) {
|
|
402
503
|
recommendations.push(`Review ${tool.tool}: behavior changed after ${tool.firstDeviationAt} invocations. Compare safe vs malicious responses in evidence.`);
|
|
403
504
|
}
|
|
404
505
|
recommendations.push("Check for invocation counters, time-based triggers, or state accumulation in the tool implementation.");
|
|
405
506
|
}
|
|
507
|
+
// Definition mutation rug pulls
|
|
508
|
+
const definitionMutatedTools = results.filter((r) => r.definitionMutated);
|
|
509
|
+
if (definitionMutatedTools.length > 0) {
|
|
510
|
+
recommendations.push("CRITICAL: Tool definition/description mutations detected - this is a sophisticated rug pull attack that injects malicious instructions after N calls.");
|
|
511
|
+
for (const tool of definitionMutatedTools) {
|
|
512
|
+
const baseline = tool.definitionEvidence?.baselineDescription
|
|
513
|
+
? `"${tool.definitionEvidence.baselineDescription.substring(0, 100)}..."`
|
|
514
|
+
: "unknown";
|
|
515
|
+
const mutated = tool.definitionEvidence?.mutatedDescription
|
|
516
|
+
? `"${tool.definitionEvidence.mutatedDescription.substring(0, 100)}..."`
|
|
517
|
+
: "unknown";
|
|
518
|
+
recommendations.push(`${tool.tool}: Description changed at invocation ${tool.definitionMutationAt}. Baseline: ${baseline} → Mutated: ${mutated}`);
|
|
519
|
+
}
|
|
520
|
+
recommendations.push("Review tool source code for global state that mutates __doc__, description, or tool metadata based on call count.");
|
|
521
|
+
}
|
|
406
522
|
const errorTools = results.filter((r) => r.errorCount > 0);
|
|
407
|
-
if (errorTools.length > 0 &&
|
|
523
|
+
if (errorTools.length > 0 &&
|
|
524
|
+
responseVulnerableTools.length === 0 &&
|
|
525
|
+
definitionMutatedTools.length === 0) {
|
|
408
526
|
recommendations.push(`${errorTools.length} tool(s) had errors during repeated invocations. Review error handling and rate limiting.`);
|
|
409
527
|
}
|
|
410
528
|
return recommendations;
|
|
@@ -79,6 +79,11 @@ export declare class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
79
79
|
* Now includes alignment status with confidence-aware logic
|
|
80
80
|
*/
|
|
81
81
|
private assessTool;
|
|
82
|
+
/**
|
|
83
|
+
* Scan tool description for poisoning patterns (Issue #8)
|
|
84
|
+
* Detects hidden instructions, override commands, concealment, and exfiltration attempts
|
|
85
|
+
*/
|
|
86
|
+
private scanDescriptionForPoisoning;
|
|
82
87
|
/**
|
|
83
88
|
* Extract annotations from a tool
|
|
84
89
|
* MCP SDK may have annotations in different locations
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ToolAnnotationAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/ToolAnnotationAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,KAAK,EACV,wBAAwB,EACxB,oBAAoB,EAKpB,uBAAuB,EAExB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAChE,OAAO,EACL,KAAK,gBAAgB,EAGtB,MAAM,8BAA8B,CAAC;
|
|
1
|
+
{"version":3,"file":"ToolAnnotationAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/ToolAnnotationAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,KAAK,EACV,wBAAwB,EACxB,oBAAoB,EAKpB,uBAAuB,EAExB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAChE,OAAO,EACL,KAAK,gBAAgB,EAGtB,MAAM,8BAA8B,CAAC;AAgNtC;;GAEG;AACH,MAAM,WAAW,4BAA6B,SAAQ,oBAAoB;IACxE,eAAe,CAAC,EAAE;QAChB,gBAAgB,EAAE,OAAO,CAAC;QAC1B,mBAAmB,EAAE,OAAO,CAAC;QAC7B,UAAU,EAAE,MAAM,CAAC;QACnB,SAAS,EAAE,MAAM,CAAC;QAClB,oBAAoB,EAAE;YACpB,YAAY,CAAC,EAAE,OAAO,CAAC;YACvB,eAAe,CAAC,EAAE,OAAO,CAAC;YAC1B,cAAc,CAAC,EAAE,OAAO,CAAC;SAC1B,CAAC;QACF,oBAAoB,EAAE,OAAO,CAAC;QAC9B,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,MAAM,EAAE,iBAAiB,GAAG,eAAe,CAAC;KAC7C,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,gCAAiC,SAAQ,wBAAwB;IAChF,WAAW,EAAE,4BAA4B,EAAE,CAAC;IAC5C,cAAc,EAAE,OAAO,CAAC;IACxB,2BAA2B,EAAE,4BAA4B,EAAE,CAAC;CAC7D;AAKD,qBAAa,sBAAuB,SAAQ,YAAY;IACtD,OAAO,CAAC,YAAY,CAAC,CAAmB;IACxC,OAAO,CAAC,gBAAgB,CAAmB;gBAE/B,MAAM,EAAE,uBAAuB;IAM3C;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,gBAAgB,GAAG,IAAI;IAK7C;;OAEG;IACH,eAAe,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI;IAK/C;;OAEG;IACH,eAAe,IAAI,OAAO;IAO1B;;OAEG;IACG,MAAM,CACV,OAAO,EAAE,iBAAiB,GACzB,OAAO,CAAC,wBAAwB,GAAG,gCAAgC,CAAC;IAkSvE;;OAEG;YACW,0BAA0B;IA+IxC;;OAEG;IACH,OAAO,CAAC,2BAA2B;IAiCnC;;OAEG;IACH,OAAO,CAAC,+BAA+B;IAoFvC;;;OAGG;IACH,OAAO,CAAC,UAAU;IA2HlB;;;OAGG;IACH,OAAO,CAAC,2BAA2B;IA2DnC;;;;;;;;;OASG;IACH,OAAO,CAAC,kBAAkB;IAyE1B;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAuBzB;;;OAGG;IACH,OAAO,CAAC,aAAa;IAgGrB;;;OAGG;IACH,OAAO,CAAC,yBAAyB;IA0DjC;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAiDxB;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAmC3B;;OAEG;IACH,OAAO,CAAC,uBAAuB;CA2ChC"}
|