@bryan-thompson/inspector-assessment-client 1.18.1 → 1.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/dist/assets/{OAuthCallback-DhwTOA1q.js → OAuthCallback-BDIUPkR-.js} +1 -1
  2. package/dist/assets/{OAuthDebugCallback-DzopkA29.js → OAuthDebugCallback-DPLV3zir.js} +1 -1
  3. package/dist/assets/{index-CzoGuYPy.css → index-32-uLPhe.css} +3 -0
  4. package/dist/assets/{index-zBRaltBB.js → index-DKTSB7VQ.js} +460 -27
  5. package/dist/index.html +2 -2
  6. package/lib/lib/assessmentTypes.d.ts +45 -2
  7. package/lib/lib/assessmentTypes.d.ts.map +1 -1
  8. package/lib/lib/securityPatterns.d.ts.map +1 -1
  9. package/lib/lib/securityPatterns.js +82 -2
  10. package/lib/services/assessment/AssessmentOrchestrator.d.ts +1 -0
  11. package/lib/services/assessment/AssessmentOrchestrator.d.ts.map +1 -1
  12. package/lib/services/assessment/AssessmentOrchestrator.js +5 -2
  13. package/lib/services/assessment/LanguageAwarePayloadGenerator.d.ts +41 -0
  14. package/lib/services/assessment/LanguageAwarePayloadGenerator.d.ts.map +1 -0
  15. package/lib/services/assessment/LanguageAwarePayloadGenerator.js +258 -0
  16. package/lib/services/assessment/ToolClassifier.d.ts +1 -0
  17. package/lib/services/assessment/ToolClassifier.d.ts.map +1 -1
  18. package/lib/services/assessment/ToolClassifier.js +26 -0
  19. package/lib/services/assessment/modules/ResourceAssessor.d.ts +5 -0
  20. package/lib/services/assessment/modules/ResourceAssessor.d.ts.map +1 -1
  21. package/lib/services/assessment/modules/ResourceAssessor.js +161 -4
  22. package/lib/services/assessment/modules/SecurityAssessor.d.ts +1 -0
  23. package/lib/services/assessment/modules/SecurityAssessor.d.ts.map +1 -1
  24. package/lib/services/assessment/modules/SecurityAssessor.js +49 -14
  25. package/lib/services/assessment/modules/TemporalAssessor.d.ts +5 -0
  26. package/lib/services/assessment/modules/TemporalAssessor.d.ts.map +1 -1
  27. package/lib/services/assessment/modules/TemporalAssessor.js +133 -15
  28. package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts +5 -0
  29. package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts.map +1 -1
  30. package/lib/services/assessment/modules/ToolAnnotationAssessor.js +256 -1
  31. package/package.json +1 -1
@@ -13,7 +13,9 @@ import { BaseAssessor } from "./BaseAssessor.js";
13
13
  import { getAllAttackPatterns, getPayloadsForAttack, } from "../../../lib/securityPatterns.js";
14
14
  import { ToolClassifier, ToolCategory } from "../ToolClassifier.js";
15
15
  import { createConcurrencyLimit } from "../lib/concurrencyLimit.js";
16
+ import { LanguageAwarePayloadGenerator } from "../LanguageAwarePayloadGenerator.js";
16
17
  export class SecurityAssessor extends BaseAssessor {
18
+ languageGenerator = new LanguageAwarePayloadGenerator();
17
19
  async assess(context) {
18
20
  // Select tools for testing first
19
21
  const toolsToTest = this.selectToolsForTesting(context.tools);
@@ -458,9 +460,7 @@ export class SecurityAssessor extends BaseAssessor {
458
460
  /error GETting.*endpoint/i, // Transport layer GET errors (requires 'endpoint' to avoid false positives)
459
461
  /service unavailable/i, // HTTP 503 (server down)
460
462
  /gateway timeout/i, // HTTP 504 (gateway timeout)
461
- /unknown tool:/i, // Tool name not in current server's tool list (stale tool list)
462
- /tool.*not found/i, // Alternative phrasing for missing tool
463
- /tool.*does not exist/i, // Alternative phrasing for missing tool
463
+ /unknown tool:/i, // MCP spec format: "Unknown tool: <name>"
464
464
  /no such tool/i, // Alternative phrasing for missing tool
465
465
  ];
466
466
  // Check unambiguous patterns first
@@ -508,9 +508,7 @@ export class SecurityAssessor extends BaseAssessor {
508
508
  /error GETting/i, // Transport layer GET errors
509
509
  /service unavailable/i, // HTTP 503 (server down)
510
510
  /gateway timeout/i, // HTTP 504 (gateway timeout)
511
- /unknown tool:/i, // Tool name not in current server's tool list (stale tool list)
512
- /tool.*not found/i, // Alternative phrasing for missing tool
513
- /tool.*does not exist/i, // Alternative phrasing for missing tool
511
+ /unknown tool:/i, // MCP spec format: "Unknown tool: <name>"
514
512
  /no such tool/i, // Alternative phrasing for missing tool
515
513
  ];
516
514
  // Check unambiguous patterns first
@@ -1089,10 +1087,14 @@ export class SecurityAssessor extends BaseAssessor {
1089
1087
  /action\s+received:/i,
1090
1088
  /input\s+received:/i,
1091
1089
  /request\s+received:/i,
1092
- // Explicit safety indicators in JSON responses
1093
- /"safe":\s*true/i,
1094
- /"vulnerable":\s*false/i,
1095
- /"status":\s*"acknowledged"/i,
1090
+ // Explicit safety indicators in JSON responses (context-aware to avoid matching unrelated fields)
1091
+ // Require safety-related context: message, result, status, stored, reflected, etc.
1092
+ /"safe"\s*:\s*true[^}]*("message"|"result"|"status"|"response")/i,
1093
+ /("message"|"result"|"status"|"response")[^}]*"safe"\s*:\s*true/i,
1094
+ /"vulnerable"\s*:\s*false[^}]*("safe"|"stored"|"reflected"|"status")/i,
1095
+ /("safe"|"stored"|"reflected"|"status")[^}]*"vulnerable"\s*:\s*false/i,
1096
+ /"status"\s*:\s*"acknowledged"[^}]*("message"|"result"|"safe")/i,
1097
+ /("message"|"result"|"safe")[^}]*"status"\s*:\s*"acknowledged"/i,
1096
1098
  ];
1097
1099
  const reflectionPatterns = [
1098
1100
  ...statusPatterns,
@@ -1328,8 +1330,41 @@ export class SecurityAssessor extends BaseAssessor {
1328
1330
  const params = {};
1329
1331
  const targetParamTypes = payload.parameterTypes || [];
1330
1332
  let payloadInjected = false;
1331
- // Try to match payload to appropriate parameter by name
1332
- if (targetParamTypes.length > 0) {
1333
+ // NEW: Check for language-specific code execution parameters first
1334
+ // This enables detection of vulnerabilities in tools expecting Python/JS/SQL code
1335
+ for (const [key, prop] of Object.entries(schema.properties)) {
1336
+ const propSchema = prop;
1337
+ if (propSchema.type !== "string")
1338
+ continue;
1339
+ const detectedLanguage = this.languageGenerator.detectLanguage(key, tool.name, tool.description);
1340
+ // If we detect a specific language (not generic), use language-appropriate payloads
1341
+ if (detectedLanguage !== "generic" && !payloadInjected) {
1342
+ const languagePayloads = this.languageGenerator.getPayloadsForLanguage(detectedLanguage);
1343
+ if (languagePayloads.length > 0) {
1344
+ // Select a payload that targets similar behavior as the current attack pattern
1345
+ // (e.g., if testing command injection, use a command-executing payload)
1346
+ const payloadLower = payload.payload.toLowerCase();
1347
+ const isCommandTest = payloadLower.includes("whoami") ||
1348
+ payloadLower.includes("passwd") ||
1349
+ payloadLower.includes("id");
1350
+ // Find matching language payload based on test intent
1351
+ let selectedPayload = languagePayloads[0]; // Default to first
1352
+ if (isCommandTest) {
1353
+ // Prefer command execution payloads
1354
+ const cmdPayload = languagePayloads.find((lp) => lp.payload.includes("whoami") ||
1355
+ lp.payload.includes("subprocess") ||
1356
+ lp.payload.includes("execSync"));
1357
+ if (cmdPayload)
1358
+ selectedPayload = cmdPayload;
1359
+ }
1360
+ params[key] = selectedPayload.payload;
1361
+ payloadInjected = true;
1362
+ break;
1363
+ }
1364
+ }
1365
+ }
1366
+ // Fall back to parameterTypes matching if no language-specific payload was used
1367
+ if (!payloadInjected && targetParamTypes.length > 0) {
1333
1368
  // Payload is parameter-specific (e.g., URLs only for "url" params)
1334
1369
  for (const [key, prop] of Object.entries(schema.properties)) {
1335
1370
  const propSchema = prop;
@@ -1343,8 +1378,8 @@ export class SecurityAssessor extends BaseAssessor {
1343
1378
  }
1344
1379
  }
1345
1380
  }
1346
- else {
1347
- // Generic payload - inject into first string parameter (original behavior)
1381
+ // Fall back to generic payload - inject into first string parameter (original behavior)
1382
+ if (!payloadInjected) {
1348
1383
  for (const [key, prop] of Object.entries(schema.properties)) {
1349
1384
  const propSchema = prop;
1350
1385
  if (propSchema.type === "string" && !payloadInjected) {
@@ -28,6 +28,11 @@ export declare class TemporalAssessor extends BaseAssessor {
28
28
  constructor(config: AssessmentConfiguration);
29
29
  assess(context: AssessmentContext): Promise<TemporalAssessment>;
30
30
  private assessTool;
31
+ /**
32
+ * Detect mutations in tool definition across invocation snapshots.
33
+ * DVMCP Challenge 4: Tool descriptions that mutate after N calls.
34
+ */
35
+ private detectDefinitionMutation;
31
36
  private analyzeResponses;
32
37
  /**
33
38
  * Generate a safe/neutral payload for a tool based on its input schema.
@@ -1 +1 @@
1
- {"version":3,"file":"TemporalAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/TemporalAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EACL,uBAAuB,EAEvB,kBAAkB,EAEnB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAY9C,qBAAa,gBAAiB,SAAQ,YAAY;IAChD,OAAO,CAAC,kBAAkB,CAAS;IAGnC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAoBnC;IAGF,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CAAU;IAEjD;;;;;;;;;;OAUG;IACH,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CASrC;gBAEU,MAAM,EAAE,uBAAuB;IAKrC,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC;YA8CvD,UAAU;IAkExB,OAAO,CAAC,gBAAgB;IAmFxB;;;OAGG;IACH,OAAO,CAAC,mBAAmB;IAsC3B;;;;OAIG;IACH,OAAO,CAAC,iBAAiB;IAoDzB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAKzB;;;;OAIG;IACH,OAAO,CAAC,cAAc;IAYtB;;;;;;OAMG;IACH,OAAO,CAAC,cAAc;IAuBtB;;;OAGG;IACH,OAAO,CAAC,iBAAiB;IAiCzB,OAAO,CAAC,uBAAuB;IAa/B,OAAO,CAAC,mBAAmB;IAoB3B,OAAO,CAAC,uBAAuB;CA8BhC"}
1
+ {"version":3,"file":"TemporalAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/TemporalAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EACL,uBAAuB,EAEvB,kBAAkB,EAEnB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AA+B9C,qBAAa,gBAAiB,SAAQ,YAAY;IAChD,OAAO,CAAC,kBAAkB,CAAS;IAGnC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAoBnC;IAGF,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CAAU;IAEjD;;;;;;;;;;OAUG;IACH,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CASrC;gBAEU,MAAM,EAAE,uBAAuB;IAKrC,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC;YAqEvD,UAAU;IAuHxB;;;OAGG;IACH,OAAO,CAAC,wBAAwB;IAkChC,OAAO,CAAC,gBAAgB;IAmFxB;;;OAGG;IACH,OAAO,CAAC,mBAAmB;IAsC3B;;;;OAIG;IACH,OAAO,CAAC,iBAAiB;IAoDzB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAKzB;;;;OAIG;IACH,OAAO,CAAC,cAAc;IAYtB;;;;;;OAMG;IACH,OAAO,CAAC,cAAc;IAuBtB;;;OAGG;IACH,OAAO,CAAC,iBAAiB;IAiCzB,OAAO,CAAC,uBAAuB;IAa/B,OAAO,CAAC,mBAAmB;IA+C3B,OAAO,CAAC,uBAAuB;CA+DhC"}
@@ -63,7 +63,15 @@ export class TemporalAssessor extends BaseAssessor {
63
63
  async assess(context) {
64
64
  const results = [];
65
65
  let rugPullsDetected = 0;
66
- this.log(`Starting temporal assessment with ${this.invocationsPerTool} invocations per tool`);
66
+ let definitionMutationsDetected = 0;
67
+ // Check if definition tracking is available
68
+ const canTrackDefinitions = typeof context.listTools === "function";
69
+ if (canTrackDefinitions) {
70
+ this.log(`Starting temporal assessment with ${this.invocationsPerTool} invocations per tool (definition tracking enabled)`);
71
+ }
72
+ else {
73
+ this.log(`Starting temporal assessment with ${this.invocationsPerTool} invocations per tool (definition tracking unavailable)`);
74
+ }
67
75
  for (const tool of context.tools) {
68
76
  // Skip if tool selection is configured and this tool isn't selected
69
77
  if (this.config.selectedToolsForTesting !== undefined &&
@@ -76,33 +84,63 @@ export class TemporalAssessor extends BaseAssessor {
76
84
  rugPullsDetected++;
77
85
  this.log(`RUG PULL DETECTED: ${tool.name} changed behavior at invocation ${result.firstDeviationAt}`);
78
86
  }
87
+ if (result.definitionMutated) {
88
+ definitionMutationsDetected++;
89
+ this.log(`DEFINITION MUTATION DETECTED: ${tool.name} changed description at invocation ${result.definitionMutationAt}`);
90
+ }
79
91
  // Respect delay between tests
80
92
  if (this.config.delayBetweenTests) {
81
93
  await this.sleep(this.config.delayBetweenTests);
82
94
  }
83
95
  }
84
- const status = this.determineTemporalStatus(rugPullsDetected, results);
96
+ // Status fails if either response or definition mutations detected
97
+ const totalVulnerabilities = rugPullsDetected + definitionMutationsDetected;
98
+ const status = this.determineTemporalStatus(totalVulnerabilities, results);
85
99
  return {
86
100
  toolsTested: results.length,
87
101
  invocationsPerTool: this.invocationsPerTool,
88
102
  rugPullsDetected,
103
+ definitionMutationsDetected,
89
104
  details: results,
90
105
  status,
91
- explanation: this.generateExplanation(rugPullsDetected, results),
106
+ explanation: this.generateExplanation(rugPullsDetected, definitionMutationsDetected, results),
92
107
  recommendations: this.generateRecommendations(results),
93
108
  };
94
109
  }
95
110
  async assessTool(context, tool) {
96
111
  const responses = [];
112
+ const definitionSnapshots = [];
97
113
  const payload = this.generateSafePayload(tool);
98
114
  // Reduce invocations for potentially destructive tools
99
115
  const isDestructive = this.isDestructiveTool(tool);
100
116
  const invocations = isDestructive
101
117
  ? Math.min(5, this.invocationsPerTool)
102
118
  : this.invocationsPerTool;
119
+ // Check if definition tracking is available
120
+ const canTrackDefinitions = typeof context.listTools === "function";
103
121
  this.log(`Testing ${tool.name} with ${invocations} invocations${isDestructive ? " (reduced - destructive)" : ""}`);
104
122
  for (let i = 1; i <= invocations; i++) {
105
123
  this.testCount++;
124
+ // Track tool definition BEFORE each invocation (if available)
125
+ // This detects rug pulls where description mutates after N calls
126
+ if (canTrackDefinitions) {
127
+ try {
128
+ const currentTools = await this.executeWithTimeout(context.listTools(), this.PER_INVOCATION_TIMEOUT);
129
+ const currentTool = currentTools.find((t) => t.name === tool.name);
130
+ if (currentTool) {
131
+ definitionSnapshots.push({
132
+ invocation: i,
133
+ description: currentTool.description,
134
+ inputSchema: currentTool.inputSchema,
135
+ timestamp: Date.now(),
136
+ });
137
+ }
138
+ }
139
+ catch {
140
+ // Definition tracking failed - continue with response tracking
141
+ this.log(`Warning: Failed to fetch tool definition for ${tool.name} at invocation ${i}`);
142
+ }
143
+ }
106
144
  try {
107
145
  // P2-2: Use shorter per-invocation timeout (10s vs default 30s)
108
146
  const response = await this.executeWithTimeout(context.callTool(tool.name, payload), this.PER_INVOCATION_TIMEOUT);
@@ -137,12 +175,59 @@ export class TemporalAssessor extends BaseAssessor {
137
175
  await this.sleep(50);
138
176
  }
139
177
  }
178
+ // Analyze responses for temporal behavior changes
140
179
  const result = this.analyzeResponses(tool, responses);
180
+ // Analyze definitions for mutation (rug pull via description change)
181
+ const definitionMutation = this.detectDefinitionMutation(definitionSnapshots);
141
182
  return {
142
183
  ...result,
143
184
  reducedInvocations: isDestructive,
185
+ // Add definition mutation results
186
+ definitionMutated: definitionMutation !== null,
187
+ definitionMutationAt: definitionMutation?.detectedAt ?? null,
188
+ definitionEvidence: definitionMutation
189
+ ? {
190
+ baselineDescription: definitionMutation.baselineDescription,
191
+ mutatedDescription: definitionMutation.mutatedDescription,
192
+ baselineSchema: definitionMutation.baselineSchema,
193
+ mutatedSchema: definitionMutation.mutatedSchema,
194
+ }
195
+ : undefined,
196
+ // If definition mutated, mark as vulnerable with DEFINITION pattern
197
+ vulnerable: result.vulnerable || definitionMutation !== null,
198
+ pattern: definitionMutation !== null ? "RUG_PULL_DEFINITION" : result.pattern,
199
+ severity: definitionMutation !== null || result.vulnerable ? "HIGH" : "NONE",
144
200
  };
145
201
  }
202
+ /**
203
+ * Detect mutations in tool definition across invocation snapshots.
204
+ * DVMCP Challenge 4: Tool descriptions that mutate after N calls.
205
+ */
206
+ detectDefinitionMutation(snapshots) {
207
+ if (snapshots.length < 2)
208
+ return null;
209
+ const baseline = snapshots[0];
210
+ for (let i = 1; i < snapshots.length; i++) {
211
+ const current = snapshots[i];
212
+ // Check if description changed
213
+ const descriptionChanged = baseline.description !== current.description;
214
+ // Check if schema changed (deep comparison)
215
+ const schemaChanged = JSON.stringify(baseline.inputSchema) !==
216
+ JSON.stringify(current.inputSchema);
217
+ if (descriptionChanged || schemaChanged) {
218
+ return {
219
+ detectedAt: current.invocation,
220
+ baselineDescription: baseline.description,
221
+ mutatedDescription: descriptionChanged
222
+ ? current.description
223
+ : undefined,
224
+ baselineSchema: schemaChanged ? baseline.inputSchema : undefined,
225
+ mutatedSchema: schemaChanged ? current.inputSchema : undefined,
226
+ };
227
+ }
228
+ }
229
+ return null;
230
+ }
146
231
  analyzeResponses(tool, responses) {
147
232
  if (responses.length === 0) {
148
233
  return {
@@ -380,31 +465,64 @@ export class TemporalAssessor extends BaseAssessor {
380
465
  }
381
466
  return "PASS";
382
467
  }
383
- generateExplanation(rugPullsDetected, results) {
468
+ generateExplanation(rugPullsDetected, definitionMutationsDetected, results) {
384
469
  if (results.length === 0) {
385
470
  return "No tools were tested for temporal vulnerabilities.";
386
471
  }
387
- if (rugPullsDetected === 0) {
388
- return `All ${results.length} tools showed consistent behavior across repeated invocations.`;
472
+ const parts = [];
473
+ // Report response-based rug pulls
474
+ if (rugPullsDetected > 0) {
475
+ const responseVulnerableTools = results
476
+ .filter((r) => r.vulnerable && r.pattern === "RUG_PULL_TEMPORAL")
477
+ .map((r) => `${r.tool} (changed at invocation ${r.firstDeviationAt})`)
478
+ .join(", ");
479
+ if (responseVulnerableTools) {
480
+ parts.push(`CRITICAL: ${rugPullsDetected} tool(s) showed temporal response changes: ${responseVulnerableTools}`);
481
+ }
482
+ }
483
+ // Report definition mutations
484
+ if (definitionMutationsDetected > 0) {
485
+ const definitionVulnerableTools = results
486
+ .filter((r) => r.definitionMutated)
487
+ .map((r) => `${r.tool} (description changed at invocation ${r.definitionMutationAt})`)
488
+ .join(", ");
489
+ parts.push(`CRITICAL: ${definitionMutationsDetected} tool(s) mutated their definition/description: ${definitionVulnerableTools}`);
490
+ }
491
+ if (parts.length === 0) {
492
+ return `All ${results.length} tools showed consistent behavior and definitions across repeated invocations.`;
389
493
  }
390
- const vulnerableTools = results
391
- .filter((r) => r.vulnerable)
392
- .map((r) => `${r.tool} (changed at invocation ${r.firstDeviationAt})`)
393
- .join(", ");
394
- return `CRITICAL: ${rugPullsDetected} tool(s) showed temporal behavior changes indicating potential rug pull vulnerability: ${vulnerableTools}`;
494
+ return parts.join(" ");
395
495
  }
396
496
  generateRecommendations(results) {
397
497
  const recommendations = [];
398
- const vulnerableTools = results.filter((r) => r.vulnerable);
399
- if (vulnerableTools.length > 0) {
498
+ // Response-based rug pulls
499
+ const responseVulnerableTools = results.filter((r) => r.vulnerable && r.pattern === "RUG_PULL_TEMPORAL");
500
+ if (responseVulnerableTools.length > 0) {
400
501
  recommendations.push("Immediately investigate tools with temporal behavior changes - this pattern is characteristic of rug pull attacks.");
401
- for (const tool of vulnerableTools) {
502
+ for (const tool of responseVulnerableTools) {
402
503
  recommendations.push(`Review ${tool.tool}: behavior changed after ${tool.firstDeviationAt} invocations. Compare safe vs malicious responses in evidence.`);
403
504
  }
404
505
  recommendations.push("Check for invocation counters, time-based triggers, or state accumulation in the tool implementation.");
405
506
  }
507
+ // Definition mutation rug pulls
508
+ const definitionMutatedTools = results.filter((r) => r.definitionMutated);
509
+ if (definitionMutatedTools.length > 0) {
510
+ recommendations.push("CRITICAL: Tool definition/description mutations detected - this is a sophisticated rug pull attack that injects malicious instructions after N calls.");
511
+ for (const tool of definitionMutatedTools) {
512
+ const baseline = tool.definitionEvidence?.baselineDescription
513
+ ? `"${tool.definitionEvidence.baselineDescription.substring(0, 100)}..."`
514
+ : "unknown";
515
+ const mutated = tool.definitionEvidence?.mutatedDescription
516
+ ? `"${tool.definitionEvidence.mutatedDescription.substring(0, 100)}..."`
517
+ : "unknown";
518
+ recommendations.push(`${tool.tool}: Description changed at invocation ${tool.definitionMutationAt}. Baseline: ${baseline} → Mutated: ${mutated}`);
519
+ }
520
+ recommendations.push("Review tool source code for global state that mutates __doc__, description, or tool metadata based on call count.");
521
+ }
406
522
  const errorTools = results.filter((r) => r.errorCount > 0);
407
- if (errorTools.length > 0 && vulnerableTools.length === 0) {
523
+ if (errorTools.length > 0 &&
524
+ responseVulnerableTools.length === 0 &&
525
+ definitionMutatedTools.length === 0) {
408
526
  recommendations.push(`${errorTools.length} tool(s) had errors during repeated invocations. Review error handling and rate limiting.`);
409
527
  }
410
528
  return recommendations;
@@ -79,6 +79,11 @@ export declare class ToolAnnotationAssessor extends BaseAssessor {
79
79
  * Now includes alignment status with confidence-aware logic
80
80
  */
81
81
  private assessTool;
82
+ /**
83
+ * Scan tool description for poisoning patterns (Issue #8)
84
+ * Detects hidden instructions, override commands, concealment, and exfiltration attempts
85
+ */
86
+ private scanDescriptionForPoisoning;
82
87
  /**
83
88
  * Extract annotations from a tool
84
89
  * MCP SDK may have annotations in different locations
@@ -1 +1 @@
1
- {"version":3,"file":"ToolAnnotationAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/ToolAnnotationAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,KAAK,EACV,wBAAwB,EACxB,oBAAoB,EAKpB,uBAAuB,EAExB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAChE,OAAO,EACL,KAAK,gBAAgB,EAGtB,MAAM,8BAA8B,CAAC;AAEtC;;GAEG;AACH,MAAM,WAAW,4BAA6B,SAAQ,oBAAoB;IACxE,eAAe,CAAC,EAAE;QAChB,gBAAgB,EAAE,OAAO,CAAC;QAC1B,mBAAmB,EAAE,OAAO,CAAC;QAC7B,UAAU,EAAE,MAAM,CAAC;QACnB,SAAS,EAAE,MAAM,CAAC;QAClB,oBAAoB,EAAE;YACpB,YAAY,CAAC,EAAE,OAAO,CAAC;YACvB,eAAe,CAAC,EAAE,OAAO,CAAC;YAC1B,cAAc,CAAC,EAAE,OAAO,CAAC;SAC1B,CAAC;QACF,oBAAoB,EAAE,OAAO,CAAC;QAC9B,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,MAAM,EAAE,iBAAiB,GAAG,eAAe,CAAC;KAC7C,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,gCAAiC,SAAQ,wBAAwB;IAChF,WAAW,EAAE,4BAA4B,EAAE,CAAC;IAC5C,cAAc,EAAE,OAAO,CAAC;IACxB,2BAA2B,EAAE,4BAA4B,EAAE,CAAC;CAC7D;AAKD,qBAAa,sBAAuB,SAAQ,YAAY;IACtD,OAAO,CAAC,YAAY,CAAC,CAAmB;IACxC,OAAO,CAAC,gBAAgB,CAAmB;gBAE/B,MAAM,EAAE,uBAAuB;IAM3C;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,gBAAgB,GAAG,IAAI;IAK7C;;OAEG;IACH,eAAe,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI;IAK/C;;OAEG;IACH,eAAe,IAAI,OAAO;IAO1B;;OAEG;IACG,MAAM,CACV,OAAO,EAAE,iBAAiB,GACzB,OAAO,CAAC,wBAAwB,GAAG,gCAAgC,CAAC;IA8QvE;;OAEG;YACW,0BAA0B;IA+IxC;;OAEG;IACH,OAAO,CAAC,2BAA2B;IAiCnC;;OAEG;IACH,OAAO,CAAC,+BAA+B;IAoFvC;;;OAGG;IACH,OAAO,CAAC,UAAU;IA+GlB;;;;;;;;;OASG;IACH,OAAO,CAAC,kBAAkB;IAyE1B;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAuBzB;;;OAGG;IACH,OAAO,CAAC,aAAa;IAgGrB;;;OAGG;IACH,OAAO,CAAC,yBAAyB;IAkDjC;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAiDxB;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAmC3B;;OAEG;IACH,OAAO,CAAC,uBAAuB;CA2ChC"}
1
+ {"version":3,"file":"ToolAnnotationAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/ToolAnnotationAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,KAAK,EACV,wBAAwB,EACxB,oBAAoB,EAKpB,uBAAuB,EAExB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAChE,OAAO,EACL,KAAK,gBAAgB,EAGtB,MAAM,8BAA8B,CAAC;AAgNtC;;GAEG;AACH,MAAM,WAAW,4BAA6B,SAAQ,oBAAoB;IACxE,eAAe,CAAC,EAAE;QAChB,gBAAgB,EAAE,OAAO,CAAC;QAC1B,mBAAmB,EAAE,OAAO,CAAC;QAC7B,UAAU,EAAE,MAAM,CAAC;QACnB,SAAS,EAAE,MAAM,CAAC;QAClB,oBAAoB,EAAE;YACpB,YAAY,CAAC,EAAE,OAAO,CAAC;YACvB,eAAe,CAAC,EAAE,OAAO,CAAC;YAC1B,cAAc,CAAC,EAAE,OAAO,CAAC;SAC1B,CAAC;QACF,oBAAoB,EAAE,OAAO,CAAC;QAC9B,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,MAAM,EAAE,iBAAiB,GAAG,eAAe,CAAC;KAC7C,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,gCAAiC,SAAQ,wBAAwB;IAChF,WAAW,EAAE,4BAA4B,EAAE,CAAC;IAC5C,cAAc,EAAE,OAAO,CAAC;IACxB,2BAA2B,EAAE,4BAA4B,EAAE,CAAC;CAC7D;AAKD,qBAAa,sBAAuB,SAAQ,YAAY;IACtD,OAAO,CAAC,YAAY,CAAC,CAAmB;IACxC,OAAO,CAAC,gBAAgB,CAAmB;gBAE/B,MAAM,EAAE,uBAAuB;IAM3C;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,gBAAgB,GAAG,IAAI;IAK7C;;OAEG;IACH,eAAe,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI;IAK/C;;OAEG;IACH,eAAe,IAAI,OAAO;IAO1B;;OAEG;IACG,MAAM,CACV,OAAO,EAAE,iBAAiB,GACzB,OAAO,CAAC,wBAAwB,GAAG,gCAAgC,CAAC;IAkSvE;;OAEG;YACW,0BAA0B;IA+IxC;;OAEG;IACH,OAAO,CAAC,2BAA2B;IAiCnC;;OAEG;IACH,OAAO,CAAC,+BAA+B;IAoFvC;;;OAGG;IACH,OAAO,CAAC,UAAU;IA2HlB;;;OAGG;IACH,OAAO,CAAC,2BAA2B;IA2DnC;;;;;;;;;OASG;IACH,OAAO,CAAC,kBAAkB;IAyE1B;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAuBzB;;;OAGG;IACH,OAAO,CAAC,aAAa;IAgGrB;;;OAGG;IACH,OAAO,CAAC,yBAAyB;IA0DjC;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAiDxB;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAmC3B;;OAEG;IACH,OAAO,CAAC,uBAAuB;CA2ChC"}