@bryan-thompson/inspector-assessment-client 1.18.1 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/dist/assets/{OAuthCallback-DhwTOA1q.js → OAuthCallback-D_dKq_wM.js} +1 -1
  2. package/dist/assets/{OAuthDebugCallback-DzopkA29.js → OAuthDebugCallback-UqARwe_4.js} +1 -1
  3. package/dist/assets/{index-CzoGuYPy.css → index-32-uLPhe.css} +3 -0
  4. package/dist/assets/{index-zBRaltBB.js → index-B5_VY0TC.js} +458 -17
  5. package/dist/index.html +2 -2
  6. package/lib/lib/assessmentTypes.d.ts +45 -2
  7. package/lib/lib/assessmentTypes.d.ts.map +1 -1
  8. package/lib/lib/securityPatterns.d.ts.map +1 -1
  9. package/lib/lib/securityPatterns.js +82 -2
  10. package/lib/services/assessment/AssessmentOrchestrator.d.ts +1 -0
  11. package/lib/services/assessment/AssessmentOrchestrator.d.ts.map +1 -1
  12. package/lib/services/assessment/AssessmentOrchestrator.js +5 -2
  13. package/lib/services/assessment/LanguageAwarePayloadGenerator.d.ts +41 -0
  14. package/lib/services/assessment/LanguageAwarePayloadGenerator.d.ts.map +1 -0
  15. package/lib/services/assessment/LanguageAwarePayloadGenerator.js +258 -0
  16. package/lib/services/assessment/ToolClassifier.d.ts +1 -0
  17. package/lib/services/assessment/ToolClassifier.d.ts.map +1 -1
  18. package/lib/services/assessment/ToolClassifier.js +26 -0
  19. package/lib/services/assessment/modules/ResourceAssessor.d.ts +5 -0
  20. package/lib/services/assessment/modules/ResourceAssessor.d.ts.map +1 -1
  21. package/lib/services/assessment/modules/ResourceAssessor.js +161 -4
  22. package/lib/services/assessment/modules/SecurityAssessor.d.ts +1 -0
  23. package/lib/services/assessment/modules/SecurityAssessor.d.ts.map +1 -1
  24. package/lib/services/assessment/modules/SecurityAssessor.js +47 -8
  25. package/lib/services/assessment/modules/TemporalAssessor.d.ts +5 -0
  26. package/lib/services/assessment/modules/TemporalAssessor.d.ts.map +1 -1
  27. package/lib/services/assessment/modules/TemporalAssessor.js +133 -15
  28. package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts +5 -0
  29. package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts.map +1 -1
  30. package/lib/services/assessment/modules/ToolAnnotationAssessor.js +256 -1
  31. package/package.json +1 -1
@@ -13,7 +13,9 @@ import { BaseAssessor } from "./BaseAssessor.js";
13
13
  import { getAllAttackPatterns, getPayloadsForAttack, } from "../../../lib/securityPatterns.js";
14
14
  import { ToolClassifier, ToolCategory } from "../ToolClassifier.js";
15
15
  import { createConcurrencyLimit } from "../lib/concurrencyLimit.js";
16
+ import { LanguageAwarePayloadGenerator } from "../LanguageAwarePayloadGenerator.js";
16
17
  export class SecurityAssessor extends BaseAssessor {
18
+ languageGenerator = new LanguageAwarePayloadGenerator();
17
19
  async assess(context) {
18
20
  // Select tools for testing first
19
21
  const toolsToTest = this.selectToolsForTesting(context.tools);
@@ -1089,10 +1091,14 @@ export class SecurityAssessor extends BaseAssessor {
1089
1091
  /action\s+received:/i,
1090
1092
  /input\s+received:/i,
1091
1093
  /request\s+received:/i,
1092
- // Explicit safety indicators in JSON responses
1093
- /"safe":\s*true/i,
1094
- /"vulnerable":\s*false/i,
1095
- /"status":\s*"acknowledged"/i,
1094
+ // Explicit safety indicators in JSON responses (context-aware to avoid matching unrelated fields)
1095
+ // Require safety-related context: message, result, status, stored, reflected, etc.
1096
+ /"safe"\s*:\s*true[^}]*("message"|"result"|"status"|"response")/i,
1097
+ /("message"|"result"|"status"|"response")[^}]*"safe"\s*:\s*true/i,
1098
+ /"vulnerable"\s*:\s*false[^}]*("safe"|"stored"|"reflected"|"status")/i,
1099
+ /("safe"|"stored"|"reflected"|"status")[^}]*"vulnerable"\s*:\s*false/i,
1100
+ /"status"\s*:\s*"acknowledged"[^}]*("message"|"result"|"safe")/i,
1101
+ /("message"|"result"|"safe")[^}]*"status"\s*:\s*"acknowledged"/i,
1096
1102
  ];
1097
1103
  const reflectionPatterns = [
1098
1104
  ...statusPatterns,
@@ -1328,8 +1334,41 @@ export class SecurityAssessor extends BaseAssessor {
1328
1334
  const params = {};
1329
1335
  const targetParamTypes = payload.parameterTypes || [];
1330
1336
  let payloadInjected = false;
1331
- // Try to match payload to appropriate parameter by name
1332
- if (targetParamTypes.length > 0) {
1337
+ // NEW: Check for language-specific code execution parameters first
1338
+ // This enables detection of vulnerabilities in tools expecting Python/JS/SQL code
1339
+ for (const [key, prop] of Object.entries(schema.properties)) {
1340
+ const propSchema = prop;
1341
+ if (propSchema.type !== "string")
1342
+ continue;
1343
+ const detectedLanguage = this.languageGenerator.detectLanguage(key, tool.name, tool.description);
1344
+ // If we detect a specific language (not generic), use language-appropriate payloads
1345
+ if (detectedLanguage !== "generic" && !payloadInjected) {
1346
+ const languagePayloads = this.languageGenerator.getPayloadsForLanguage(detectedLanguage);
1347
+ if (languagePayloads.length > 0) {
1348
+ // Select a payload that targets similar behavior as the current attack pattern
1349
+ // (e.g., if testing command injection, use a command-executing payload)
1350
+ const payloadLower = payload.payload.toLowerCase();
1351
+ const isCommandTest = payloadLower.includes("whoami") ||
1352
+ payloadLower.includes("passwd") ||
1353
+ payloadLower.includes("id");
1354
+ // Find matching language payload based on test intent
1355
+ let selectedPayload = languagePayloads[0]; // Default to first
1356
+ if (isCommandTest) {
1357
+ // Prefer command execution payloads
1358
+ const cmdPayload = languagePayloads.find((lp) => lp.payload.includes("whoami") ||
1359
+ lp.payload.includes("subprocess") ||
1360
+ lp.payload.includes("execSync"));
1361
+ if (cmdPayload)
1362
+ selectedPayload = cmdPayload;
1363
+ }
1364
+ params[key] = selectedPayload.payload;
1365
+ payloadInjected = true;
1366
+ break;
1367
+ }
1368
+ }
1369
+ }
1370
+ // Fall back to parameterTypes matching if no language-specific payload was used
1371
+ if (!payloadInjected && targetParamTypes.length > 0) {
1333
1372
  // Payload is parameter-specific (e.g., URLs only for "url" params)
1334
1373
  for (const [key, prop] of Object.entries(schema.properties)) {
1335
1374
  const propSchema = prop;
@@ -1343,8 +1382,8 @@ export class SecurityAssessor extends BaseAssessor {
1343
1382
  }
1344
1383
  }
1345
1384
  }
1346
- else {
1347
- // Generic payload - inject into first string parameter (original behavior)
1385
+ // Fall back to generic payload - inject into first string parameter (original behavior)
1386
+ if (!payloadInjected) {
1348
1387
  for (const [key, prop] of Object.entries(schema.properties)) {
1349
1388
  const propSchema = prop;
1350
1389
  if (propSchema.type === "string" && !payloadInjected) {
@@ -28,6 +28,11 @@ export declare class TemporalAssessor extends BaseAssessor {
28
28
  constructor(config: AssessmentConfiguration);
29
29
  assess(context: AssessmentContext): Promise<TemporalAssessment>;
30
30
  private assessTool;
31
+ /**
32
+ * Detect mutations in tool definition across invocation snapshots.
33
+ * DVMCP Challenge 4: Tool descriptions that mutate after N calls.
34
+ */
35
+ private detectDefinitionMutation;
31
36
  private analyzeResponses;
32
37
  /**
33
38
  * Generate a safe/neutral payload for a tool based on its input schema.
@@ -1 +1 @@
1
- {"version":3,"file":"TemporalAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/TemporalAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EACL,uBAAuB,EAEvB,kBAAkB,EAEnB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAY9C,qBAAa,gBAAiB,SAAQ,YAAY;IAChD,OAAO,CAAC,kBAAkB,CAAS;IAGnC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAoBnC;IAGF,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CAAU;IAEjD;;;;;;;;;;OAUG;IACH,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CASrC;gBAEU,MAAM,EAAE,uBAAuB;IAKrC,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC;YA8CvD,UAAU;IAkExB,OAAO,CAAC,gBAAgB;IAmFxB;;;OAGG;IACH,OAAO,CAAC,mBAAmB;IAsC3B;;;;OAIG;IACH,OAAO,CAAC,iBAAiB;IAoDzB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAKzB;;;;OAIG;IACH,OAAO,CAAC,cAAc;IAYtB;;;;;;OAMG;IACH,OAAO,CAAC,cAAc;IAuBtB;;;OAGG;IACH,OAAO,CAAC,iBAAiB;IAiCzB,OAAO,CAAC,uBAAuB;IAa/B,OAAO,CAAC,mBAAmB;IAoB3B,OAAO,CAAC,uBAAuB;CA8BhC"}
1
+ {"version":3,"file":"TemporalAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/TemporalAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EACL,uBAAuB,EAEvB,kBAAkB,EAEnB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AA+B9C,qBAAa,gBAAiB,SAAQ,YAAY;IAChD,OAAO,CAAC,kBAAkB,CAAS;IAGnC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAoBnC;IAGF,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CAAU;IAEjD;;;;;;;;;;OAUG;IACH,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CASrC;gBAEU,MAAM,EAAE,uBAAuB;IAKrC,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC;YAqEvD,UAAU;IAuHxB;;;OAGG;IACH,OAAO,CAAC,wBAAwB;IAkChC,OAAO,CAAC,gBAAgB;IAmFxB;;;OAGG;IACH,OAAO,CAAC,mBAAmB;IAsC3B;;;;OAIG;IACH,OAAO,CAAC,iBAAiB;IAoDzB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAKzB;;;;OAIG;IACH,OAAO,CAAC,cAAc;IAYtB;;;;;;OAMG;IACH,OAAO,CAAC,cAAc;IAuBtB;;;OAGG;IACH,OAAO,CAAC,iBAAiB;IAiCzB,OAAO,CAAC,uBAAuB;IAa/B,OAAO,CAAC,mBAAmB;IA+C3B,OAAO,CAAC,uBAAuB;CA+DhC"}
@@ -63,7 +63,15 @@ export class TemporalAssessor extends BaseAssessor {
63
63
  async assess(context) {
64
64
  const results = [];
65
65
  let rugPullsDetected = 0;
66
- this.log(`Starting temporal assessment with ${this.invocationsPerTool} invocations per tool`);
66
+ let definitionMutationsDetected = 0;
67
+ // Check if definition tracking is available
68
+ const canTrackDefinitions = typeof context.listTools === "function";
69
+ if (canTrackDefinitions) {
70
+ this.log(`Starting temporal assessment with ${this.invocationsPerTool} invocations per tool (definition tracking enabled)`);
71
+ }
72
+ else {
73
+ this.log(`Starting temporal assessment with ${this.invocationsPerTool} invocations per tool (definition tracking unavailable)`);
74
+ }
67
75
  for (const tool of context.tools) {
68
76
  // Skip if tool selection is configured and this tool isn't selected
69
77
  if (this.config.selectedToolsForTesting !== undefined &&
@@ -76,33 +84,63 @@ export class TemporalAssessor extends BaseAssessor {
76
84
  rugPullsDetected++;
77
85
  this.log(`RUG PULL DETECTED: ${tool.name} changed behavior at invocation ${result.firstDeviationAt}`);
78
86
  }
87
+ if (result.definitionMutated) {
88
+ definitionMutationsDetected++;
89
+ this.log(`DEFINITION MUTATION DETECTED: ${tool.name} changed description at invocation ${result.definitionMutationAt}`);
90
+ }
79
91
  // Respect delay between tests
80
92
  if (this.config.delayBetweenTests) {
81
93
  await this.sleep(this.config.delayBetweenTests);
82
94
  }
83
95
  }
84
- const status = this.determineTemporalStatus(rugPullsDetected, results);
96
+ // Status fails if either response or definition mutations detected
97
+ const totalVulnerabilities = rugPullsDetected + definitionMutationsDetected;
98
+ const status = this.determineTemporalStatus(totalVulnerabilities, results);
85
99
  return {
86
100
  toolsTested: results.length,
87
101
  invocationsPerTool: this.invocationsPerTool,
88
102
  rugPullsDetected,
103
+ definitionMutationsDetected,
89
104
  details: results,
90
105
  status,
91
- explanation: this.generateExplanation(rugPullsDetected, results),
106
+ explanation: this.generateExplanation(rugPullsDetected, definitionMutationsDetected, results),
92
107
  recommendations: this.generateRecommendations(results),
93
108
  };
94
109
  }
95
110
  async assessTool(context, tool) {
96
111
  const responses = [];
112
+ const definitionSnapshots = [];
97
113
  const payload = this.generateSafePayload(tool);
98
114
  // Reduce invocations for potentially destructive tools
99
115
  const isDestructive = this.isDestructiveTool(tool);
100
116
  const invocations = isDestructive
101
117
  ? Math.min(5, this.invocationsPerTool)
102
118
  : this.invocationsPerTool;
119
+ // Check if definition tracking is available
120
+ const canTrackDefinitions = typeof context.listTools === "function";
103
121
  this.log(`Testing ${tool.name} with ${invocations} invocations${isDestructive ? " (reduced - destructive)" : ""}`);
104
122
  for (let i = 1; i <= invocations; i++) {
105
123
  this.testCount++;
124
+ // Track tool definition BEFORE each invocation (if available)
125
+ // This detects rug pulls where description mutates after N calls
126
+ if (canTrackDefinitions) {
127
+ try {
128
+ const currentTools = await this.executeWithTimeout(context.listTools(), this.PER_INVOCATION_TIMEOUT);
129
+ const currentTool = currentTools.find((t) => t.name === tool.name);
130
+ if (currentTool) {
131
+ definitionSnapshots.push({
132
+ invocation: i,
133
+ description: currentTool.description,
134
+ inputSchema: currentTool.inputSchema,
135
+ timestamp: Date.now(),
136
+ });
137
+ }
138
+ }
139
+ catch {
140
+ // Definition tracking failed - continue with response tracking
141
+ this.log(`Warning: Failed to fetch tool definition for ${tool.name} at invocation ${i}`);
142
+ }
143
+ }
106
144
  try {
107
145
  // P2-2: Use shorter per-invocation timeout (10s vs default 30s)
108
146
  const response = await this.executeWithTimeout(context.callTool(tool.name, payload), this.PER_INVOCATION_TIMEOUT);
@@ -137,12 +175,59 @@ export class TemporalAssessor extends BaseAssessor {
137
175
  await this.sleep(50);
138
176
  }
139
177
  }
178
+ // Analyze responses for temporal behavior changes
140
179
  const result = this.analyzeResponses(tool, responses);
180
+ // Analyze definitions for mutation (rug pull via description change)
181
+ const definitionMutation = this.detectDefinitionMutation(definitionSnapshots);
141
182
  return {
142
183
  ...result,
143
184
  reducedInvocations: isDestructive,
185
+ // Add definition mutation results
186
+ definitionMutated: definitionMutation !== null,
187
+ definitionMutationAt: definitionMutation?.detectedAt ?? null,
188
+ definitionEvidence: definitionMutation
189
+ ? {
190
+ baselineDescription: definitionMutation.baselineDescription,
191
+ mutatedDescription: definitionMutation.mutatedDescription,
192
+ baselineSchema: definitionMutation.baselineSchema,
193
+ mutatedSchema: definitionMutation.mutatedSchema,
194
+ }
195
+ : undefined,
196
+ // If definition mutated, mark as vulnerable with DEFINITION pattern
197
+ vulnerable: result.vulnerable || definitionMutation !== null,
198
+ pattern: definitionMutation !== null ? "RUG_PULL_DEFINITION" : result.pattern,
199
+ severity: definitionMutation !== null || result.vulnerable ? "HIGH" : "NONE",
144
200
  };
145
201
  }
202
+ /**
203
+ * Detect mutations in tool definition across invocation snapshots.
204
+ * DVMCP Challenge 4: Tool descriptions that mutate after N calls.
205
+ */
206
+ detectDefinitionMutation(snapshots) {
207
+ if (snapshots.length < 2)
208
+ return null;
209
+ const baseline = snapshots[0];
210
+ for (let i = 1; i < snapshots.length; i++) {
211
+ const current = snapshots[i];
212
+ // Check if description changed
213
+ const descriptionChanged = baseline.description !== current.description;
214
+ // Check if schema changed (deep comparison)
215
+ const schemaChanged = JSON.stringify(baseline.inputSchema) !==
216
+ JSON.stringify(current.inputSchema);
217
+ if (descriptionChanged || schemaChanged) {
218
+ return {
219
+ detectedAt: current.invocation,
220
+ baselineDescription: baseline.description,
221
+ mutatedDescription: descriptionChanged
222
+ ? current.description
223
+ : undefined,
224
+ baselineSchema: schemaChanged ? baseline.inputSchema : undefined,
225
+ mutatedSchema: schemaChanged ? current.inputSchema : undefined,
226
+ };
227
+ }
228
+ }
229
+ return null;
230
+ }
146
231
  analyzeResponses(tool, responses) {
147
232
  if (responses.length === 0) {
148
233
  return {
@@ -380,31 +465,64 @@ export class TemporalAssessor extends BaseAssessor {
380
465
  }
381
466
  return "PASS";
382
467
  }
383
- generateExplanation(rugPullsDetected, results) {
468
+ generateExplanation(rugPullsDetected, definitionMutationsDetected, results) {
384
469
  if (results.length === 0) {
385
470
  return "No tools were tested for temporal vulnerabilities.";
386
471
  }
387
- if (rugPullsDetected === 0) {
388
- return `All ${results.length} tools showed consistent behavior across repeated invocations.`;
472
+ const parts = [];
473
+ // Report response-based rug pulls
474
+ if (rugPullsDetected > 0) {
475
+ const responseVulnerableTools = results
476
+ .filter((r) => r.vulnerable && r.pattern === "RUG_PULL_TEMPORAL")
477
+ .map((r) => `${r.tool} (changed at invocation ${r.firstDeviationAt})`)
478
+ .join(", ");
479
+ if (responseVulnerableTools) {
480
+ parts.push(`CRITICAL: ${rugPullsDetected} tool(s) showed temporal response changes: ${responseVulnerableTools}`);
481
+ }
482
+ }
483
+ // Report definition mutations
484
+ if (definitionMutationsDetected > 0) {
485
+ const definitionVulnerableTools = results
486
+ .filter((r) => r.definitionMutated)
487
+ .map((r) => `${r.tool} (description changed at invocation ${r.definitionMutationAt})`)
488
+ .join(", ");
489
+ parts.push(`CRITICAL: ${definitionMutationsDetected} tool(s) mutated their definition/description: ${definitionVulnerableTools}`);
490
+ }
491
+ if (parts.length === 0) {
492
+ return `All ${results.length} tools showed consistent behavior and definitions across repeated invocations.`;
389
493
  }
390
- const vulnerableTools = results
391
- .filter((r) => r.vulnerable)
392
- .map((r) => `${r.tool} (changed at invocation ${r.firstDeviationAt})`)
393
- .join(", ");
394
- return `CRITICAL: ${rugPullsDetected} tool(s) showed temporal behavior changes indicating potential rug pull vulnerability: ${vulnerableTools}`;
494
+ return parts.join(" ");
395
495
  }
396
496
  generateRecommendations(results) {
397
497
  const recommendations = [];
398
- const vulnerableTools = results.filter((r) => r.vulnerable);
399
- if (vulnerableTools.length > 0) {
498
+ // Response-based rug pulls
499
+ const responseVulnerableTools = results.filter((r) => r.vulnerable && r.pattern === "RUG_PULL_TEMPORAL");
500
+ if (responseVulnerableTools.length > 0) {
400
501
  recommendations.push("Immediately investigate tools with temporal behavior changes - this pattern is characteristic of rug pull attacks.");
401
- for (const tool of vulnerableTools) {
502
+ for (const tool of responseVulnerableTools) {
402
503
  recommendations.push(`Review ${tool.tool}: behavior changed after ${tool.firstDeviationAt} invocations. Compare safe vs malicious responses in evidence.`);
403
504
  }
404
505
  recommendations.push("Check for invocation counters, time-based triggers, or state accumulation in the tool implementation.");
405
506
  }
507
+ // Definition mutation rug pulls
508
+ const definitionMutatedTools = results.filter((r) => r.definitionMutated);
509
+ if (definitionMutatedTools.length > 0) {
510
+ recommendations.push("CRITICAL: Tool definition/description mutations detected - this is a sophisticated rug pull attack that injects malicious instructions after N calls.");
511
+ for (const tool of definitionMutatedTools) {
512
+ const baseline = tool.definitionEvidence?.baselineDescription
513
+ ? `"${tool.definitionEvidence.baselineDescription.substring(0, 100)}..."`
514
+ : "unknown";
515
+ const mutated = tool.definitionEvidence?.mutatedDescription
516
+ ? `"${tool.definitionEvidence.mutatedDescription.substring(0, 100)}..."`
517
+ : "unknown";
518
+ recommendations.push(`${tool.tool}: Description changed at invocation ${tool.definitionMutationAt}. Baseline: ${baseline} → Mutated: ${mutated}`);
519
+ }
520
+ recommendations.push("Review tool source code for global state that mutates __doc__, description, or tool metadata based on call count.");
521
+ }
406
522
  const errorTools = results.filter((r) => r.errorCount > 0);
407
- if (errorTools.length > 0 && vulnerableTools.length === 0) {
523
+ if (errorTools.length > 0 &&
524
+ responseVulnerableTools.length === 0 &&
525
+ definitionMutatedTools.length === 0) {
408
526
  recommendations.push(`${errorTools.length} tool(s) had errors during repeated invocations. Review error handling and rate limiting.`);
409
527
  }
410
528
  return recommendations;
@@ -79,6 +79,11 @@ export declare class ToolAnnotationAssessor extends BaseAssessor {
79
79
  * Now includes alignment status with confidence-aware logic
80
80
  */
81
81
  private assessTool;
82
+ /**
83
+ * Scan tool description for poisoning patterns (Issue #8)
84
+ * Detects hidden instructions, override commands, concealment, and exfiltration attempts
85
+ */
86
+ private scanDescriptionForPoisoning;
82
87
  /**
83
88
  * Extract annotations from a tool
84
89
  * MCP SDK may have annotations in different locations
@@ -1 +1 @@
1
- {"version":3,"file":"ToolAnnotationAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/ToolAnnotationAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,KAAK,EACV,wBAAwB,EACxB,oBAAoB,EAKpB,uBAAuB,EAExB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAChE,OAAO,EACL,KAAK,gBAAgB,EAGtB,MAAM,8BAA8B,CAAC;AAEtC;;GAEG;AACH,MAAM,WAAW,4BAA6B,SAAQ,oBAAoB;IACxE,eAAe,CAAC,EAAE;QAChB,gBAAgB,EAAE,OAAO,CAAC;QAC1B,mBAAmB,EAAE,OAAO,CAAC;QAC7B,UAAU,EAAE,MAAM,CAAC;QACnB,SAAS,EAAE,MAAM,CAAC;QAClB,oBAAoB,EAAE;YACpB,YAAY,CAAC,EAAE,OAAO,CAAC;YACvB,eAAe,CAAC,EAAE,OAAO,CAAC;YAC1B,cAAc,CAAC,EAAE,OAAO,CAAC;SAC1B,CAAC;QACF,oBAAoB,EAAE,OAAO,CAAC;QAC9B,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,MAAM,EAAE,iBAAiB,GAAG,eAAe,CAAC;KAC7C,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,gCAAiC,SAAQ,wBAAwB;IAChF,WAAW,EAAE,4BAA4B,EAAE,CAAC;IAC5C,cAAc,EAAE,OAAO,CAAC;IACxB,2BAA2B,EAAE,4BAA4B,EAAE,CAAC;CAC7D;AAKD,qBAAa,sBAAuB,SAAQ,YAAY;IACtD,OAAO,CAAC,YAAY,CAAC,CAAmB;IACxC,OAAO,CAAC,gBAAgB,CAAmB;gBAE/B,MAAM,EAAE,uBAAuB;IAM3C;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,gBAAgB,GAAG,IAAI;IAK7C;;OAEG;IACH,eAAe,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI;IAK/C;;OAEG;IACH,eAAe,IAAI,OAAO;IAO1B;;OAEG;IACG,MAAM,CACV,OAAO,EAAE,iBAAiB,GACzB,OAAO,CAAC,wBAAwB,GAAG,gCAAgC,CAAC;IA8QvE;;OAEG;YACW,0BAA0B;IA+IxC;;OAEG;IACH,OAAO,CAAC,2BAA2B;IAiCnC;;OAEG;IACH,OAAO,CAAC,+BAA+B;IAoFvC;;;OAGG;IACH,OAAO,CAAC,UAAU;IA+GlB;;;;;;;;;OASG;IACH,OAAO,CAAC,kBAAkB;IAyE1B;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAuBzB;;;OAGG;IACH,OAAO,CAAC,aAAa;IAgGrB;;;OAGG;IACH,OAAO,CAAC,yBAAyB;IAkDjC;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAiDxB;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAmC3B;;OAEG;IACH,OAAO,CAAC,uBAAuB;CA2ChC"}
1
+ {"version":3,"file":"ToolAnnotationAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/ToolAnnotationAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,KAAK,EACV,wBAAwB,EACxB,oBAAoB,EAKpB,uBAAuB,EAExB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAChE,OAAO,EACL,KAAK,gBAAgB,EAGtB,MAAM,8BAA8B,CAAC;AAgNtC;;GAEG;AACH,MAAM,WAAW,4BAA6B,SAAQ,oBAAoB;IACxE,eAAe,CAAC,EAAE;QAChB,gBAAgB,EAAE,OAAO,CAAC;QAC1B,mBAAmB,EAAE,OAAO,CAAC;QAC7B,UAAU,EAAE,MAAM,CAAC;QACnB,SAAS,EAAE,MAAM,CAAC;QAClB,oBAAoB,EAAE;YACpB,YAAY,CAAC,EAAE,OAAO,CAAC;YACvB,eAAe,CAAC,EAAE,OAAO,CAAC;YAC1B,cAAc,CAAC,EAAE,OAAO,CAAC;SAC1B,CAAC;QACF,oBAAoB,EAAE,OAAO,CAAC;QAC9B,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,MAAM,EAAE,iBAAiB,GAAG,eAAe,CAAC;KAC7C,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,gCAAiC,SAAQ,wBAAwB;IAChF,WAAW,EAAE,4BAA4B,EAAE,CAAC;IAC5C,cAAc,EAAE,OAAO,CAAC;IACxB,2BAA2B,EAAE,4BAA4B,EAAE,CAAC;CAC7D;AAKD,qBAAa,sBAAuB,SAAQ,YAAY;IACtD,OAAO,CAAC,YAAY,CAAC,CAAmB;IACxC,OAAO,CAAC,gBAAgB,CAAmB;gBAE/B,MAAM,EAAE,uBAAuB;IAM3C;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,gBAAgB,GAAG,IAAI;IAK7C;;OAEG;IACH,eAAe,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI;IAK/C;;OAEG;IACH,eAAe,IAAI,OAAO;IAO1B;;OAEG;IACG,MAAM,CACV,OAAO,EAAE,iBAAiB,GACzB,OAAO,CAAC,wBAAwB,GAAG,gCAAgC,CAAC;IAkSvE;;OAEG;YACW,0BAA0B;IA+IxC;;OAEG;IACH,OAAO,CAAC,2BAA2B;IAiCnC;;OAEG;IACH,OAAO,CAAC,+BAA+B;IAoFvC;;;OAGG;IACH,OAAO,CAAC,UAAU;IA2HlB;;;OAGG;IACH,OAAO,CAAC,2BAA2B;IA2DnC;;;;;;;;;OASG;IACH,OAAO,CAAC,kBAAkB;IAyE1B;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAuBzB;;;OAGG;IACH,OAAO,CAAC,aAAa;IAgGrB;;;OAGG;IACH,OAAO,CAAC,yBAAyB;IA0DjC;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAiDxB;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAmC3B;;OAEG;IACH,OAAO,CAAC,uBAAuB;CA2ChC"}
@@ -12,6 +12,190 @@
12
12
  */
13
13
  import { BaseAssessor } from "./BaseAssessor.js";
14
14
  import { getDefaultCompiledPatterns, matchToolPattern, } from "../config/annotationPatterns.js";
15
+ const DESCRIPTION_POISONING_PATTERNS = [
16
+ // Hidden instruction tags
17
+ {
18
+ name: "hidden_tag",
19
+ pattern: /<HIDDEN>[\s\S]*?<\/HIDDEN>/gi,
20
+ severity: "HIGH",
21
+ category: "hidden_instructions",
22
+ },
23
+ {
24
+ name: "important_tag",
25
+ pattern: /<IMPORTANT>[\s\S]*?<\/IMPORTANT>/gi,
26
+ severity: "HIGH",
27
+ category: "hidden_instructions",
28
+ },
29
+ {
30
+ name: "system_tag",
31
+ pattern: /<SYSTEM>[\s\S]*?<\/SYSTEM>/gi,
32
+ severity: "HIGH",
33
+ category: "hidden_instructions",
34
+ },
35
+ {
36
+ name: "instruction_tag",
37
+ pattern: /<INSTRUCTION>[\s\S]*?<\/INSTRUCTION>/gi,
38
+ severity: "HIGH",
39
+ category: "hidden_instructions",
40
+ },
41
+ // Override commands
42
+ {
43
+ name: "ignore_instructions",
44
+ pattern: /ignore\s+(all\s+)?(previous|prior|above)\s+instructions/i,
45
+ severity: "HIGH",
46
+ category: "override",
47
+ },
48
+ {
49
+ name: "disregard_instructions",
50
+ pattern: /disregard\s+(all\s+)?(previous|prior)\s+instructions/i,
51
+ severity: "HIGH",
52
+ category: "override",
53
+ },
54
+ {
55
+ name: "you_are_now",
56
+ pattern: /you\s+are\s+now\s+(a|an|the)/i,
57
+ severity: "MEDIUM",
58
+ category: "override",
59
+ },
60
+ {
61
+ name: "system_override",
62
+ pattern: /system:\s*override/i,
63
+ severity: "HIGH",
64
+ category: "override",
65
+ },
66
+ // Secrecy/concealment
67
+ {
68
+ name: "do_not_mention",
69
+ pattern: /do\s+not\s+(mention|tell|inform|reveal)/i,
70
+ severity: "HIGH",
71
+ category: "concealment",
72
+ },
73
+ {
74
+ name: "secretly",
75
+ pattern: /\bsecretly\b/i,
76
+ severity: "MEDIUM",
77
+ category: "concealment",
78
+ },
79
+ {
80
+ name: "hide_from_user",
81
+ pattern: /hide\s+(this\s+)?from\s+(the\s+)?user/i,
82
+ severity: "HIGH",
83
+ category: "concealment",
84
+ },
85
+ // Data exfiltration
86
+ {
87
+ name: "return_keys",
88
+ pattern: /return\s+(all\s+)?(api\s*)?keys/i,
89
+ severity: "HIGH",
90
+ category: "exfiltration",
91
+ },
92
+ {
93
+ name: "include_credentials",
94
+ pattern: /include.*(api.?key|credential|password|secret)/i,
95
+ severity: "HIGH",
96
+ category: "exfiltration",
97
+ },
98
+ {
99
+ name: "reveal_secrets",
100
+ pattern: /reveal\s+(all\s+)?(secrets|credentials|api\s*keys)/i,
101
+ severity: "HIGH",
102
+ category: "exfiltration",
103
+ },
104
+ {
105
+ name: "access_internal_secrets",
106
+ pattern: /access\s+(the\s+)?internal\s+(api\s*)?(key|secret|credential|password|token)/i,
107
+ severity: "HIGH",
108
+ category: "exfiltration",
109
+ },
110
+ // Delimiter injection
111
+ {
112
+ name: "system_codeblock",
113
+ pattern: /```system[\s\S]*?```/gi,
114
+ severity: "HIGH",
115
+ category: "delimiter",
116
+ },
117
+ {
118
+ name: "inst_tags",
119
+ pattern: /\[INST\][\s\S]*?\[\/INST\]/gi,
120
+ severity: "HIGH",
121
+ category: "delimiter",
122
+ },
123
+ {
124
+ name: "chatml_system",
125
+ pattern: /<\|im_start\|>system/gi,
126
+ severity: "HIGH",
127
+ category: "delimiter",
128
+ },
129
+ {
130
+ name: "llama_sys",
131
+ pattern: /<<SYS>>/gi,
132
+ severity: "HIGH",
133
+ category: "delimiter",
134
+ },
135
+ {
136
+ name: "user_assistant_block",
137
+ pattern: /\[USER\][\s\S]*?\[ASSISTANT\]/gi,
138
+ severity: "HIGH",
139
+ category: "delimiter",
140
+ },
141
+ // Role/persona injection (Warning #4)
142
+ {
143
+ name: "act_as",
144
+ pattern: /act\s+(like|as)\s+(a|an|the)/i,
145
+ severity: "MEDIUM",
146
+ category: "override",
147
+ },
148
+ {
149
+ name: "pretend_to_be",
150
+ pattern: /pretend\s+(to\s+be|you\s*'?re)/i,
151
+ severity: "MEDIUM",
152
+ category: "override",
153
+ },
154
+ {
155
+ name: "roleplay_as",
156
+ pattern: /role\s*play\s+(as|like)/i,
157
+ severity: "MEDIUM",
158
+ category: "override",
159
+ },
160
+ {
161
+ name: "new_task",
162
+ pattern: /new\s+(task|instruction|objective):\s*/i,
163
+ severity: "HIGH",
164
+ category: "override",
165
+ },
166
+ // Encoding bypass detection (Warning #1)
167
+ {
168
+ name: "base64_encoded_block",
169
+ pattern: /[A-Za-z0-9+/]{50,}={0,2}/g, // Large Base64 strings (50+ chars)
170
+ severity: "MEDIUM",
171
+ category: "encoding_bypass",
172
+ },
173
+ {
174
+ name: "unicode_escape_sequence",
175
+ pattern: /(?:\\u[0-9a-fA-F]{4}){3,}/gi, // 3+ consecutive Unicode escapes
176
+ severity: "MEDIUM",
177
+ category: "encoding_bypass",
178
+ },
179
+ {
180
+ name: "html_entity_block",
181
+ pattern: /(?:&#x?[0-9a-fA-F]+;){3,}/gi, // 3+ consecutive HTML entities
182
+ severity: "MEDIUM",
183
+ category: "encoding_bypass",
184
+ },
185
+ // Typoglycemia/evasion patterns (Warning #2)
186
+ {
187
+ name: "ignore_instructions_typo",
188
+ pattern: /ign[o0]r[e3]?\s+(all\s+)?(pr[e3]v[i1][o0]us|pr[i1][o0]r|ab[o0]v[e3])\s+[i1]nstruct[i1][o0]ns?/i,
189
+ severity: "HIGH",
190
+ category: "override",
191
+ },
192
+ {
193
+ name: "disregard_typo",
194
+ pattern: /d[i1]sr[e3]g[a4]rd\s+(all\s+)?(pr[e3]v[i1][o0]us|pr[i1][o0]r)\s+[i1]nstruct[i1][o0]ns?/i,
195
+ severity: "HIGH",
196
+ category: "override",
197
+ },
198
+ ];
15
199
  // NOTE: Pattern arrays moved to config/annotationPatterns.ts for configurability
16
200
  // The patterns are now loaded from getDefaultCompiledPatterns() or custom config
17
201
  export class ToolAnnotationAssessor extends BaseAssessor {
@@ -53,6 +237,7 @@ export class ToolAnnotationAssessor extends BaseAssessor {
53
237
  let annotatedCount = 0;
54
238
  let missingAnnotationsCount = 0;
55
239
  let misalignedAnnotationsCount = 0;
240
+ let poisonedDescriptionsCount = 0;
56
241
  // Track annotation sources
57
242
  const annotationSourceCounts = {
58
243
  mcp: 0,
@@ -128,6 +313,20 @@ export class ToolAnnotationAssessor extends BaseAssessor {
128
313
  else {
129
314
  annotationSourceCounts.none++;
130
315
  }
316
+ // Track and emit poisoned description detection (Issue #8)
317
+ if (latestResult.descriptionPoisoning?.detected) {
318
+ poisonedDescriptionsCount++;
319
+ this.log(`POISONED DESCRIPTION DETECTED: ${tool.name} contains suspicious patterns`);
320
+ if (context.onProgress) {
321
+ context.onProgress({
322
+ type: "annotation_poisoned",
323
+ tool: tool.name,
324
+ description: tool.description,
325
+ patterns: latestResult.descriptionPoisoning.patterns,
326
+ riskLevel: latestResult.descriptionPoisoning.riskLevel,
327
+ });
328
+ }
329
+ }
131
330
  // Emit annotation_missing event with tool details
132
331
  if (!latestResult.hasAnnotations) {
133
332
  if (context.onProgress && latestResult.inferredBehavior) {
@@ -231,7 +430,7 @@ export class ToolAnnotationAssessor extends BaseAssessor {
231
430
  const recommendations = this.generateRecommendations(toolResults);
232
431
  // Calculate new metrics and alignment breakdown
233
432
  const { metrics, alignmentBreakdown } = this.calculateMetrics(toolResults, context.tools.length);
234
- this.log(`Assessment complete: ${annotatedCount}/${context.tools.length} tools annotated, ${misalignedAnnotationsCount} misaligned, ${alignmentBreakdown.reviewRecommended} need review`);
433
+ this.log(`Assessment complete: ${annotatedCount}/${context.tools.length} tools annotated, ${misalignedAnnotationsCount} misaligned, ${alignmentBreakdown.reviewRecommended} need review, ${poisonedDescriptionsCount} poisoned`);
235
434
  // Return enhanced assessment if Claude was used
236
435
  if (useClaudeInference) {
237
436
  const highConfidenceMisalignments = toolResults.filter((r) => r.claudeInference &&
@@ -249,6 +448,7 @@ export class ToolAnnotationAssessor extends BaseAssessor {
249
448
  metrics,
250
449
  alignmentBreakdown,
251
450
  annotationSources: annotationSourceCounts,
451
+ poisonedDescriptionsDetected: poisonedDescriptionsCount,
252
452
  claudeEnhanced: true,
253
453
  highConfidenceMisalignments,
254
454
  };
@@ -264,6 +464,7 @@ export class ToolAnnotationAssessor extends BaseAssessor {
264
464
  metrics,
265
465
  alignmentBreakdown,
266
466
  annotationSources: annotationSourceCounts,
467
+ poisonedDescriptionsDetected: poisonedDescriptionsCount,
267
468
  };
268
469
  }
269
470
  /**
@@ -516,6 +717,12 @@ export class ToolAnnotationAssessor extends BaseAssessor {
516
717
  alignmentStatus = "MISALIGNED";
517
718
  }
518
719
  }
720
+ // Scan for description poisoning (Issue #8)
721
+ const descriptionPoisoning = this.scanDescriptionForPoisoning(tool);
722
+ if (descriptionPoisoning.detected) {
723
+ issues.push(`Tool description contains suspicious patterns: ${descriptionPoisoning.patterns.map((p) => p.name).join(", ")}`);
724
+ recommendations.push(`Review ${tool.name} description for potential prompt injection or hidden instructions`);
725
+ }
519
726
  return {
520
727
  toolName: tool.name,
521
728
  hasAnnotations,
@@ -525,6 +732,49 @@ export class ToolAnnotationAssessor extends BaseAssessor {
525
732
  alignmentStatus,
526
733
  issues,
527
734
  recommendations,
735
+ descriptionPoisoning,
736
+ };
737
+ }
738
+ /**
739
+ * Scan tool description for poisoning patterns (Issue #8)
740
+ * Detects hidden instructions, override commands, concealment, and exfiltration attempts
741
+ */
742
+ scanDescriptionForPoisoning(tool) {
743
+ const description = tool.description || "";
744
+ const matches = [];
745
+ for (const patternDef of DESCRIPTION_POISONING_PATTERNS) {
746
+ // Create a fresh regex to reset lastIndex
747
+ const regex = new RegExp(patternDef.pattern.source, patternDef.pattern.flags);
748
+ // Loop to find all matches (not just first)
749
+ let match;
750
+ while ((match = regex.exec(description)) !== null) {
751
+ matches.push({
752
+ name: patternDef.name,
753
+ pattern: patternDef.pattern.toString(),
754
+ severity: patternDef.severity,
755
+ category: patternDef.category,
756
+ evidence: match[0].substring(0, 100) + (match[0].length > 100 ? "..." : ""),
757
+ });
758
+ // Prevent infinite loop for patterns without 'g' flag
759
+ if (!regex.global)
760
+ break;
761
+ }
762
+ }
763
+ // Determine overall risk level based on highest severity match
764
+ let riskLevel = "NONE";
765
+ if (matches.some((m) => m.severity === "HIGH")) {
766
+ riskLevel = "HIGH";
767
+ }
768
+ else if (matches.some((m) => m.severity === "MEDIUM")) {
769
+ riskLevel = "MEDIUM";
770
+ }
771
+ else if (matches.length > 0) {
772
+ riskLevel = "LOW";
773
+ }
774
+ return {
775
+ detected: matches.length > 0,
776
+ patterns: matches,
777
+ riskLevel,
528
778
  };
529
779
  }
530
780
  /**
@@ -700,6 +950,11 @@ export class ToolAnnotationAssessor extends BaseAssessor {
700
950
  if (totalTools === 0)
701
951
  return "PASS";
702
952
  const annotatedCount = results.filter((r) => r.hasAnnotations).length;
953
+ // Check for poisoned descriptions (Issue #8) - critical security issue
954
+ const poisonedCount = results.filter((r) => r.descriptionPoisoning?.detected === true).length;
955
+ if (poisonedCount > 0) {
956
+ return "FAIL";
957
+ }
703
958
  // Only count actual MISALIGNED, not REVIEW_RECOMMENDED
704
959
  const misalignedCount = results.filter((r) => r.alignmentStatus === "MISALIGNED").length;
705
960
  // Count high-confidence destructive tools without proper hints