@bryan-thompson/inspector-assessment-client 1.26.0 → 1.26.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -72,6 +72,33 @@ export class TemporalAssessor extends BaseAssessor {
72
72
  "push",
73
73
  "enqueue",
74
74
  ];
75
+ /**
76
+ * Issue #69: Patterns for resource-creating operations that legitimately return
77
+ * different IDs/resources each invocation.
78
+ *
79
+ * These tools CREATE new resources, so they should use schema comparison + variance
80
+ * classification rather than exact comparison. Unlike STATEFUL_TOOL_PATTERNS, these
81
+ * may overlap with DESTRUCTIVE_PATTERNS (e.g., "create", "insert") but should still
82
+ * use intelligent variance classification to avoid false positives.
83
+ *
84
+ * Examples:
85
+ * - create_billing_product → new product_id each time (LEGITIMATE variance)
86
+ * - generate_report → new report_id each time (LEGITIMATE variance)
87
+ * - insert_record → new record_id each time (LEGITIMATE variance)
88
+ */
89
+ RESOURCE_CREATING_PATTERNS = [
90
+ "create",
91
+ "new",
92
+ "insert",
93
+ "generate",
94
+ "register",
95
+ "allocate",
96
+ "provision",
97
+ "spawn",
98
+ "instantiate",
99
+ "init",
100
+ "make",
101
+ ];
75
102
  constructor(config) {
76
103
  super(config);
77
104
  this.invocationsPerTool = config.temporalInvocations ?? 25;
@@ -260,39 +287,61 @@ export class TemporalAssessor extends BaseAssessor {
260
287
  const baseline = this.normalizeResponse(responses[0].response);
261
288
  const deviations = [];
262
289
  const errors = [];
263
- // For stateful tools (search, list, etc.), use schema comparison instead of exact match
264
- // These tools legitimately return different content based on data state
290
+ // Issue #69: Track variance details for transparency
291
+ const varianceDetails = [];
292
+ // Determine comparison strategy
293
+ // 1. Stateful tools (search, list, etc.) - use schema comparison
294
+ // 2. Resource-creating tools (create, insert, etc.) - use variance classification
295
+ // 3. All other tools - use exact comparison
265
296
  const isStateful = this.isStatefulTool(tool);
297
+ const isResourceCreating = this.isResourceCreatingTool(tool);
266
298
  if (isStateful) {
267
299
  this.log(`${tool.name} classified as stateful - using schema comparison`);
268
300
  }
301
+ else if (isResourceCreating) {
302
+ this.log(`${tool.name} classified as resource-creating - using variance classification`);
303
+ }
269
304
  for (let i = 1; i < responses.length; i++) {
270
305
  if (responses[i].error) {
271
306
  errors.push(i + 1); // Track errors as potential indicators
272
307
  deviations.push(i + 1);
273
308
  }
274
- else {
275
- let isDifferent;
276
- if (isStateful) {
277
- // Schema-only comparison for stateful tools
278
- // Content can vary, but field names should remain consistent
279
- isDifferent = !this.compareSchemas(responses[0].response, responses[i].response);
280
- // Secondary detection: Check for content semantic changes (rug pull patterns)
281
- // This catches cases where schema is same but content shifts from helpful to harmful
282
- if (!isDifferent) {
283
- const contentChange = this.detectStatefulContentChange(responses[0].response, responses[i].response);
284
- if (contentChange.detected) {
285
- isDifferent = true;
286
- this.log(`${tool.name}: Content semantic change detected at invocation ${i + 1} - ${contentChange.reason}`);
287
- }
309
+ else if (isStateful) {
310
+ // Original stateful tool logic: schema comparison + behavioral content check
311
+ // Content variance is allowed as long as schema is consistent
312
+ let isDifferent = !this.compareSchemas(responses[0].response, responses[i].response);
313
+ // Secondary detection: Check for content semantic changes (rug pull patterns)
314
+ // This catches cases where schema is same but content shifts from helpful to harmful
315
+ if (!isDifferent) {
316
+ const contentChange = this.detectStatefulContentChange(responses[0].response, responses[i].response);
317
+ if (contentChange.detected) {
318
+ isDifferent = true;
319
+ this.log(`${tool.name}: Content semantic change detected at invocation ${i + 1} - ${contentChange.reason}`);
288
320
  }
289
321
  }
290
- else {
291
- // Exact comparison for non-stateful tools
292
- const normalized = this.normalizeResponse(responses[i].response);
293
- isDifferent = normalized !== baseline;
294
- }
295
322
  if (isDifferent) {
323
+ deviations.push(i + 1);
324
+ }
325
+ }
326
+ else if (isResourceCreating) {
327
+ // Issue #69: Use variance classification for resource-creating tools
328
+ // These need intelligent classification to distinguish ID variance from rug pulls
329
+ const classification = this.classifyVariance(tool, responses[0].response, responses[i].response);
330
+ varianceDetails.push({
331
+ invocation: i + 1,
332
+ classification,
333
+ });
334
+ // Only flag SUSPICIOUS and BEHAVIORAL as deviations
335
+ // LEGITIMATE variance is expected for resource-creating tools
336
+ if (classification.type !== "LEGITIMATE") {
337
+ deviations.push(i + 1);
338
+ this.log(`${tool.name}: ${classification.type} variance at invocation ${i + 1} - ${classification.reasons.join(", ")}`);
339
+ }
340
+ }
341
+ else {
342
+ // Exact comparison for non-stateful, non-resource-creating tools
343
+ const normalized = this.normalizeResponse(responses[i].response);
344
+ if (normalized !== baseline) {
296
345
  deviations.push(i + 1); // 1-indexed
297
346
  }
298
347
  }
@@ -302,6 +351,21 @@ export class TemporalAssessor extends BaseAssessor {
302
351
  // - deviationCount = total behavior changes (including errors)
303
352
  // - errorCount = how many of those were errors specifically
304
353
  const isVulnerable = deviations.length > 0;
354
+ // Generate appropriate note based on tool type and result
355
+ let note;
356
+ if (isStateful) {
357
+ // Preserve original stateful tool messages for backward compatibility
358
+ note = isVulnerable
359
+ ? "Stateful tool - secondary content analysis detected rug pull"
360
+ : "Stateful tool - content variation expected, schema consistent";
361
+ }
362
+ else if (isResourceCreating) {
363
+ note = isVulnerable
364
+ ? "Resource-creating tool - variance classification detected suspicious/behavioral change"
365
+ : "Resource-creating tool - ID/timestamp variance expected, no suspicious patterns";
366
+ }
367
+ // Issue #69: Get the first suspicious/behavioral classification for evidence
368
+ const firstSuspiciousClassification = varianceDetails.find((v) => v.classification.type !== "LEGITIMATE");
305
369
  return {
306
370
  tool: tool.name,
307
371
  vulnerable: isVulnerable,
@@ -317,12 +381,10 @@ export class TemporalAssessor extends BaseAssessor {
317
381
  maliciousResponseExample: responses[deviations[0] - 1]?.response ?? null,
318
382
  }
319
383
  : undefined,
320
- // Add note for stateful tools - different messages for pass vs fail
321
- note: isStateful
322
- ? isVulnerable
323
- ? "Stateful tool - secondary content analysis detected rug pull"
324
- : "Stateful tool - content variation expected, schema consistent"
325
- : undefined,
384
+ note,
385
+ // Issue #69: Include variance classification for transparency
386
+ varianceClassification: firstSuspiciousClassification?.classification,
387
+ varianceDetails: varianceDetails.length > 0 ? varianceDetails : undefined,
326
388
  };
327
389
  }
328
390
  /**
@@ -451,6 +513,184 @@ export class TemporalAssessor extends BaseAssessor {
451
513
  return wordBoundaryRegex.test(toolName);
452
514
  });
453
515
  }
516
+ /**
517
+ * Issue #69: Check if a tool creates new resources that legitimately vary per invocation.
518
+ * Resource-creating tools return different IDs, creation timestamps, etc.
519
+ * for each new resource - this is expected behavior, NOT a rug pull.
520
+ *
521
+ * Unlike isStatefulTool(), this DOES include patterns that overlap with DESTRUCTIVE_PATTERNS
522
+ * because resource-creating tools need intelligent variance classification, not exact comparison.
523
+ *
524
+ * Uses word-boundary matching like isStatefulTool() to prevent false matches.
525
+ * - "create_billing_product" matches "create" ✓
526
+ * - "recreate_view" does NOT match "create" ✓ (must be at word boundary)
527
+ */
528
+ isResourceCreatingTool(tool) {
529
+ const toolName = tool.name.toLowerCase();
530
+ return this.RESOURCE_CREATING_PATTERNS.some((pattern) => {
531
+ const wordBoundaryRegex = new RegExp(`(^|_|-)${pattern}($|_|-)`);
532
+ return wordBoundaryRegex.test(toolName);
533
+ });
534
+ }
535
+ /**
536
+ * Issue #69: Classify variance between two responses to reduce false positives.
537
+ * Returns LEGITIMATE for expected variance (IDs, timestamps), SUSPICIOUS for
538
+ * schema changes, and BEHAVIORAL for semantic changes (promotional keywords, errors).
539
+ */
540
+ classifyVariance(_tool, baseline, current) {
541
+ // 1. Schema comparison - structural changes are SUSPICIOUS
542
+ const schemaMatch = this.compareSchemas(baseline, current);
543
+ if (!schemaMatch) {
544
+ return {
545
+ type: "SUSPICIOUS",
546
+ confidence: "high",
547
+ reasons: ["Schema/field structure changed between invocations"],
548
+ suspiciousPatterns: ["schema_change"],
549
+ };
550
+ }
551
+ // 2. Content change detection - promotional/error keywords are BEHAVIORAL
552
+ const contentChange = this.detectStatefulContentChange(baseline, current);
553
+ if (contentChange.detected) {
554
+ return {
555
+ type: "BEHAVIORAL",
556
+ confidence: "high",
557
+ reasons: [`Behavioral change detected: ${contentChange.reason}`],
558
+ suspiciousPatterns: [contentChange.reason || "content_change"],
559
+ };
560
+ }
561
+ // 3. After normalization, if responses match = LEGITIMATE
562
+ const normalizedBaseline = this.normalizeResponse(baseline);
563
+ const normalizedCurrent = this.normalizeResponse(current);
564
+ if (normalizedBaseline === normalizedCurrent) {
565
+ return {
566
+ type: "LEGITIMATE",
567
+ confidence: "high",
568
+ reasons: ["All differences normalized (IDs, timestamps, counters)"],
569
+ };
570
+ }
571
+ // 4. Check for legitimate field variance (any _id, _at, token fields)
572
+ const variedFields = this.findVariedFields(baseline, current);
573
+ const unexplainedFields = variedFields.filter((f) => !this.isLegitimateFieldVariance(f));
574
+ if (unexplainedFields.length === 0) {
575
+ return {
576
+ type: "LEGITIMATE",
577
+ confidence: "high",
578
+ reasons: [
579
+ `Variance only in legitimate fields: ${variedFields.join(", ")}`,
580
+ ],
581
+ variedFields,
582
+ };
583
+ }
584
+ // 5. Some unexplained variance - flag as suspicious with low confidence
585
+ return {
586
+ type: "SUSPICIOUS",
587
+ confidence: "low",
588
+ reasons: [
589
+ `Unexplained variance in fields: ${unexplainedFields.join(", ")}`,
590
+ ],
591
+ variedFields,
592
+ suspiciousPatterns: ["unclassified_variance"],
593
+ };
594
+ }
595
+ /**
596
+ * Issue #69: Check if a field name represents legitimate variance.
597
+ * Fields containing IDs, timestamps, tokens, etc. are expected to vary.
598
+ */
599
+ isLegitimateFieldVariance(field) {
600
+ const fieldLower = field.toLowerCase();
601
+ // ID fields - any field ending in _id or containing "id" at word boundary
602
+ if (fieldLower.endsWith("_id") || fieldLower.endsWith("id"))
603
+ return true;
604
+ if (fieldLower.includes("_id_") || fieldLower.startsWith("id_"))
605
+ return true;
606
+ // Timestamp fields
607
+ if (fieldLower.endsWith("_at") || fieldLower.endsWith("at"))
608
+ return true;
609
+ if (fieldLower.includes("time") ||
610
+ fieldLower.includes("date") ||
611
+ fieldLower.includes("timestamp"))
612
+ return true;
613
+ // Token/session fields
614
+ if (fieldLower.includes("token") ||
615
+ fieldLower.includes("cursor") ||
616
+ fieldLower.includes("nonce"))
617
+ return true;
618
+ if (fieldLower.includes("session") || fieldLower.includes("correlation"))
619
+ return true;
620
+ // Pagination fields
621
+ if (fieldLower.includes("offset") ||
622
+ fieldLower.includes("page") ||
623
+ fieldLower.includes("next"))
624
+ return true;
625
+ // Counter/accumulation fields
626
+ if (fieldLower.includes("count") ||
627
+ fieldLower.includes("total") ||
628
+ fieldLower.includes("size"))
629
+ return true;
630
+ if (fieldLower.includes("length") || fieldLower.includes("index"))
631
+ return true;
632
+ // Array content fields (search results, items)
633
+ if (fieldLower.includes("results") ||
634
+ fieldLower.includes("items") ||
635
+ fieldLower.includes("data"))
636
+ return true;
637
+ // Hash/version fields
638
+ if (fieldLower.includes("hash") ||
639
+ fieldLower.includes("etag") ||
640
+ fieldLower.includes("version"))
641
+ return true;
642
+ return false;
643
+ }
644
+ /**
645
+ * Issue #69: Find which fields differ between two responses.
646
+ * Returns field paths that have different values.
647
+ */
648
+ findVariedFields(obj1, obj2, prefix = "") {
649
+ const varied = [];
650
+ // Handle primitives
651
+ if (typeof obj1 !== "object" || obj1 === null) {
652
+ if (obj1 !== obj2) {
653
+ return [prefix || "value"];
654
+ }
655
+ return [];
656
+ }
657
+ if (typeof obj2 !== "object" || obj2 === null) {
658
+ return [prefix || "value"];
659
+ }
660
+ // Handle arrays - just note if length or content differs
661
+ if (Array.isArray(obj1) || Array.isArray(obj2)) {
662
+ const arr1 = Array.isArray(obj1) ? obj1 : [];
663
+ const arr2 = Array.isArray(obj2) ? obj2 : [];
664
+ if (JSON.stringify(arr1) !== JSON.stringify(arr2)) {
665
+ return [prefix || "array"];
666
+ }
667
+ return [];
668
+ }
669
+ // Handle objects
670
+ const allKeys = new Set([
671
+ ...Object.keys(obj1),
672
+ ...Object.keys(obj2),
673
+ ]);
674
+ for (const key of allKeys) {
675
+ const val1 = obj1[key];
676
+ const val2 = obj2[key];
677
+ const fieldPath = prefix ? `${prefix}.${key}` : key;
678
+ if (JSON.stringify(val1) !== JSON.stringify(val2)) {
679
+ // If both are objects, recurse to find specific field
680
+ if (typeof val1 === "object" &&
681
+ val1 !== null &&
682
+ typeof val2 === "object" &&
683
+ val2 !== null) {
684
+ const nestedVaried = this.findVariedFields(val1, val2, fieldPath);
685
+ varied.push(...nestedVaried);
686
+ }
687
+ else {
688
+ varied.push(fieldPath);
689
+ }
690
+ }
691
+ }
692
+ return varied;
693
+ }
454
694
  /**
455
695
  * Compare response schemas (field names) rather than full content.
456
696
  * Stateful tools may have different values but should have consistent fields.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bryan-thompson/inspector-assessment-client",
3
- "version": "1.26.0",
3
+ "version": "1.26.2",
4
4
  "description": "Client-side application for the Enhanced MCP Inspector with assessment capabilities",
5
5
  "license": "MIT",
6
6
  "author": "Bryan Thompson <bryan@triepod.ai>",