@bryan-thompson/inspector-assessment-client 1.28.0 → 1.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/dist/assets/{OAuthCallback-JnKCxulS.js → OAuthCallback-9Gbb39Ii.js} +1 -1
  2. package/dist/assets/{OAuthDebugCallback-C2zSlEIQ.js → OAuthDebugCallback-B76J2MBn.js} +1 -1
  3. package/dist/assets/{index-C3xZdIFQ.js → index-CHTOR9VI.js} +4 -4
  4. package/dist/index.html +1 -1
  5. package/lib/lib/assessment/resultTypes.d.ts +8 -0
  6. package/lib/lib/assessment/resultTypes.d.ts.map +1 -1
  7. package/lib/lib/securityPatterns.d.ts +1 -1
  8. package/lib/lib/securityPatterns.js +1 -1
  9. package/lib/services/assessment/modules/TemporalAssessor.d.ts +5 -129
  10. package/lib/services/assessment/modules/TemporalAssessor.d.ts.map +1 -1
  11. package/lib/services/assessment/modules/TemporalAssessor.js +18 -554
  12. package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts +10 -70
  13. package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts.map +1 -1
  14. package/lib/services/assessment/modules/ToolAnnotationAssessor.js +32 -625
  15. package/lib/services/assessment/modules/annotations/AlignmentChecker.d.ts +65 -0
  16. package/lib/services/assessment/modules/annotations/AlignmentChecker.d.ts.map +1 -0
  17. package/lib/services/assessment/modules/annotations/AlignmentChecker.js +289 -0
  18. package/lib/services/assessment/modules/annotations/ClaudeIntegration.d.ts +22 -0
  19. package/lib/services/assessment/modules/annotations/ClaudeIntegration.d.ts.map +1 -0
  20. package/lib/services/assessment/modules/annotations/ClaudeIntegration.js +139 -0
  21. package/lib/services/assessment/modules/annotations/EventEmitter.d.ts +20 -0
  22. package/lib/services/assessment/modules/annotations/EventEmitter.d.ts.map +1 -0
  23. package/lib/services/assessment/modules/annotations/EventEmitter.js +100 -0
  24. package/lib/services/assessment/modules/annotations/ExplanationGenerator.d.ts +25 -0
  25. package/lib/services/assessment/modules/annotations/ExplanationGenerator.d.ts.map +1 -0
  26. package/lib/services/assessment/modules/annotations/ExplanationGenerator.js +122 -0
  27. package/lib/services/assessment/modules/annotations/index.d.ts +5 -0
  28. package/lib/services/assessment/modules/annotations/index.d.ts.map +1 -1
  29. package/lib/services/assessment/modules/annotations/index.js +8 -0
  30. package/lib/services/assessment/modules/annotations/types.d.ts +33 -0
  31. package/lib/services/assessment/modules/annotations/types.d.ts.map +1 -0
  32. package/lib/services/assessment/modules/annotations/types.js +7 -0
  33. package/lib/services/assessment/modules/securityTests/SafeResponseDetector.d.ts +3 -0
  34. package/lib/services/assessment/modules/securityTests/SafeResponseDetector.d.ts.map +1 -1
  35. package/lib/services/assessment/modules/securityTests/SafeResponseDetector.js +14 -1
  36. package/lib/services/assessment/modules/securityTests/SecurityPatternLibrary.d.ts +29 -0
  37. package/lib/services/assessment/modules/securityTests/SecurityPatternLibrary.d.ts.map +1 -1
  38. package/lib/services/assessment/modules/securityTests/SecurityPatternLibrary.js +71 -0
  39. package/lib/services/assessment/modules/securityTests/SecurityPayloadTester.d.ts.map +1 -1
  40. package/lib/services/assessment/modules/securityTests/SecurityPayloadTester.js +24 -0
  41. package/lib/services/assessment/modules/securityTests/SecurityResponseAnalyzer.d.ts +66 -0
  42. package/lib/services/assessment/modules/securityTests/SecurityResponseAnalyzer.d.ts.map +1 -1
  43. package/lib/services/assessment/modules/securityTests/SecurityResponseAnalyzer.js +228 -3
  44. package/lib/services/assessment/modules/temporal/MutationDetector.d.ts +75 -0
  45. package/lib/services/assessment/modules/temporal/MutationDetector.d.ts.map +1 -0
  46. package/lib/services/assessment/modules/temporal/MutationDetector.js +147 -0
  47. package/lib/services/assessment/modules/temporal/VarianceClassifier.d.ts +112 -0
  48. package/lib/services/assessment/modules/temporal/VarianceClassifier.d.ts.map +1 -0
  49. package/lib/services/assessment/modules/temporal/VarianceClassifier.js +427 -0
  50. package/lib/services/assessment/modules/temporal/index.d.ts +10 -0
  51. package/lib/services/assessment/modules/temporal/index.d.ts.map +1 -0
  52. package/lib/services/assessment/modules/temporal/index.js +9 -0
  53. package/package.json +1 -1
@@ -15,11 +15,25 @@
15
15
  * - BehaviorInference: Infers expected behavior from tool names
16
16
  * - AnnotationDeceptionDetector: Detects keyword-based misalignments
17
17
  * - DescriptionPoisoningDetector: Detects malicious content in descriptions
18
+ *
19
+ * Refactored in Issue #105 to delegate to extracted helper modules:
20
+ * - AlignmentChecker: Tool alignment assessment and metrics
21
+ * - ExplanationGenerator: Explanation and recommendation generation
22
+ * - EventEmitter: Progress event emission
23
+ * - ClaudeIntegration: Claude-enhanced behavior inference
18
24
  */
19
25
  import { BaseAssessor } from "./BaseAssessor.js";
20
26
  import { getDefaultCompiledPatterns, detectPersistenceModel, } from "../config/annotationPatterns.js";
21
- // Import from extracted modules
22
- import { scanDescriptionForPoisoning, detectAnnotationDeception, isActionableConfidence, inferBehavior, detectArchitecture, } from "./annotations/index.js";
27
+ // Import from extracted modules (Issue #105)
28
+ import { detectArchitecture,
29
+ // Alignment checking
30
+ assessSingleTool, determineAnnotationStatus, calculateMetrics,
31
+ // Explanation generation
32
+ generateExplanation, generateEnhancedExplanation, generateRecommendations, generateEnhancedRecommendations,
33
+ // Event emission
34
+ emitAnnotationEvents,
35
+ // Claude integration
36
+ enhanceWithClaudeInference, createPatternBasedInference, } from "./annotations/index.js";
23
37
  export class ToolAnnotationAssessor extends BaseAssessor {
24
38
  claudeBridge;
25
39
  compiledPatterns;
@@ -112,10 +126,11 @@ export class ToolAnnotationAssessor extends BaseAssessor {
112
126
  }
113
127
  for (const tool of context.tools) {
114
128
  this.testCount++;
115
- const result = this.assessTool(tool);
129
+ // Use extracted assessSingleTool function
130
+ const result = assessSingleTool(tool, this.compiledPatterns, this.persistenceContext);
116
131
  // Enhance with Claude inference if available
117
132
  if (useClaudeInference) {
118
- const enhancedResult = await this.enhanceWithClaudeInference(tool, result);
133
+ const enhancedResult = await enhanceWithClaudeInference(tool, result, this.claudeBridge, this.logger);
119
134
  toolResults.push(enhancedResult);
120
135
  if (enhancedResult.claudeInference &&
121
136
  enhancedResult.claudeInference.confidence >= 70 &&
@@ -127,26 +142,9 @@ export class ToolAnnotationAssessor extends BaseAssessor {
127
142
  }
128
143
  }
129
144
  else {
130
- const inferredBehavior = result.inferredBehavior ?? {
131
- expectedReadOnly: false,
132
- expectedDestructive: false,
133
- reason: "No behavior inference available",
134
- };
135
- toolResults.push({
136
- ...result,
137
- claudeInference: {
138
- expectedReadOnly: inferredBehavior.expectedReadOnly,
139
- expectedDestructive: inferredBehavior.expectedDestructive,
140
- confidence: 50,
141
- reasoning: inferredBehavior.reason,
142
- suggestedAnnotations: {
143
- readOnlyHint: inferredBehavior.expectedReadOnly,
144
- destructiveHint: inferredBehavior.expectedDestructive,
145
- },
146
- misalignmentDetected: result.issues.some((i) => i.includes("misaligned")),
147
- source: "pattern-based",
148
- },
149
- });
145
+ // Use pattern-based inference fallback
146
+ const enhancedResult = createPatternBasedInference(result);
147
+ toolResults.push(enhancedResult);
150
148
  if (result.issues.some((i) => i.includes("misaligned"))) {
151
149
  misalignedAnnotationsCount++;
152
150
  }
@@ -222,13 +220,14 @@ export class ToolAnnotationAssessor extends BaseAssessor {
222
220
  });
223
221
  }
224
222
  }
225
- // Emit annotation events
226
- this.emitAnnotationEvents(context, tool, latestResult);
227
- }
228
- const status = this.determineAnnotationStatus(toolResults, context.tools.length);
229
- const explanation = this.generateExplanation(annotatedCount, missingAnnotationsCount, misalignedAnnotationsCount, context.tools.length);
230
- const recommendations = this.generateRecommendations(toolResults);
231
- const { metrics, alignmentBreakdown } = this.calculateMetrics(toolResults, context.tools.length);
223
+ // Emit annotation events using extracted function
224
+ emitAnnotationEvents(context.onProgress, tool, latestResult);
225
+ }
226
+ // Use extracted functions for status, explanation, and recommendations
227
+ const status = determineAnnotationStatus(toolResults, context.tools.length);
228
+ const explanation = generateExplanation(annotatedCount, missingAnnotationsCount, misalignedAnnotationsCount, context.tools.length);
229
+ const recommendations = generateRecommendations(toolResults);
230
+ const { metrics, alignmentBreakdown } = calculateMetrics(toolResults, context.tools.length);
232
231
  this.logger.info(`Assessment complete: ${annotatedCount}/${context.tools.length} tools annotated, ${misalignedAnnotationsCount} misaligned, ${alignmentBreakdown.reviewRecommended} need review, ${poisonedDescriptionsCount} poisoned`);
233
232
  if (useClaudeInference) {
234
233
  const highConfidenceMisalignments = toolResults.filter((r) => r.claudeInference &&
@@ -240,8 +239,8 @@ export class ToolAnnotationAssessor extends BaseAssessor {
240
239
  missingAnnotationsCount,
241
240
  misalignedAnnotationsCount,
242
241
  status,
243
- explanation: this.generateEnhancedExplanation(annotatedCount, missingAnnotationsCount, highConfidenceMisalignments.length, context.tools.length),
244
- recommendations: this.generateEnhancedRecommendations(toolResults),
242
+ explanation: generateEnhancedExplanation(annotatedCount, missingAnnotationsCount, highConfidenceMisalignments.length, context.tools.length),
243
+ recommendations: generateEnhancedRecommendations(toolResults),
245
244
  metrics,
246
245
  alignmentBreakdown,
247
246
  annotationSources: annotationSourceCounts,
@@ -288,596 +287,4 @@ export class ToolAnnotationAssessor extends BaseAssessor {
288
287
  },
289
288
  };
290
289
  }
291
- /**
292
- * Emit annotation-related progress events
293
- */
294
- emitAnnotationEvents(context, tool, result) {
295
- if (!context.onProgress || !result.inferredBehavior)
296
- return;
297
- const annotations = result.annotations;
298
- const inferred = result.inferredBehavior;
299
- const confidence = result.claudeInference?.confidence ?? 50;
300
- const toolParams = this.extractToolParams(tool.inputSchema);
301
- const toolAnnotations = this.extractAnnotations(tool);
302
- // Emit missing annotation event
303
- if (!result.hasAnnotations) {
304
- context.onProgress({
305
- type: "annotation_missing",
306
- tool: tool.name,
307
- title: toolAnnotations.title,
308
- description: tool.description,
309
- parameters: toolParams,
310
- inferredBehavior: {
311
- expectedReadOnly: inferred.expectedReadOnly,
312
- expectedDestructive: inferred.expectedDestructive,
313
- reason: inferred.reason,
314
- },
315
- });
316
- return;
317
- }
318
- // Emit aligned event
319
- if (result.alignmentStatus === "ALIGNED") {
320
- context.onProgress({
321
- type: "annotation_aligned",
322
- tool: tool.name,
323
- confidence: inferred.confidence ?? "medium",
324
- annotations: {
325
- readOnlyHint: annotations?.readOnlyHint,
326
- destructiveHint: annotations?.destructiveHint,
327
- openWorldHint: annotations?.openWorldHint,
328
- idempotentHint: annotations?.idempotentHint,
329
- },
330
- });
331
- return;
332
- }
333
- // Check readOnlyHint mismatch
334
- if (annotations?.readOnlyHint !== undefined &&
335
- annotations.readOnlyHint !== inferred.expectedReadOnly) {
336
- this.emitMismatchEvent(context, tool, toolParams, toolAnnotations, "readOnlyHint", annotations.readOnlyHint, inferred.expectedReadOnly, confidence, inferred, result.alignmentStatus);
337
- }
338
- // Check destructiveHint mismatch
339
- if (annotations?.destructiveHint !== undefined &&
340
- annotations.destructiveHint !== inferred.expectedDestructive) {
341
- this.emitMismatchEvent(context, tool, toolParams, toolAnnotations, "destructiveHint", annotations.destructiveHint, inferred.expectedDestructive, confidence, inferred, result.alignmentStatus);
342
- }
343
- }
344
- /**
345
- * Emit mismatch event (misaligned or review_recommended)
346
- */
347
- emitMismatchEvent(context, tool, toolParams, toolAnnotations, field, actual, expected, confidence, inferred, alignmentStatus) {
348
- if (!context.onProgress)
349
- return;
350
- if (alignmentStatus === "REVIEW_RECOMMENDED") {
351
- context.onProgress({
352
- type: "annotation_review_recommended",
353
- tool: tool.name,
354
- title: toolAnnotations.title,
355
- description: tool.description,
356
- parameters: toolParams,
357
- field,
358
- actual,
359
- inferred: expected,
360
- confidence: inferred.confidence,
361
- isAmbiguous: inferred.isAmbiguous,
362
- reason: inferred.reason,
363
- });
364
- }
365
- else if (!inferred.isAmbiguous &&
366
- isActionableConfidence(inferred.confidence)) {
367
- context.onProgress({
368
- type: "annotation_misaligned",
369
- tool: tool.name,
370
- title: toolAnnotations.title,
371
- description: tool.description,
372
- parameters: toolParams,
373
- field,
374
- actual,
375
- expected,
376
- confidence,
377
- reason: `Tool has ${field}=${actual}, but ${inferred.reason}`,
378
- });
379
- }
380
- }
381
- /**
382
- * Enhance tool assessment with Claude inference
383
- */
384
- async enhanceWithClaudeInference(tool, baseResult) {
385
- const inferredBehavior = baseResult.inferredBehavior ?? {
386
- expectedReadOnly: false,
387
- expectedDestructive: false,
388
- reason: "No behavior inference available",
389
- };
390
- if (!this.claudeBridge) {
391
- return {
392
- ...baseResult,
393
- claudeInference: {
394
- expectedReadOnly: inferredBehavior.expectedReadOnly,
395
- expectedDestructive: inferredBehavior.expectedDestructive,
396
- confidence: 50,
397
- reasoning: inferredBehavior.reason,
398
- suggestedAnnotations: {
399
- readOnlyHint: inferredBehavior.expectedReadOnly,
400
- destructiveHint: inferredBehavior.expectedDestructive,
401
- },
402
- misalignmentDetected: baseResult.issues.some((i) => i.includes("misaligned")),
403
- source: "pattern-based",
404
- },
405
- };
406
- }
407
- try {
408
- const currentAnnotations = baseResult.annotations
409
- ? {
410
- readOnlyHint: baseResult.annotations.readOnlyHint,
411
- destructiveHint: baseResult.annotations.destructiveHint,
412
- }
413
- : undefined;
414
- const inference = await this.claudeBridge.inferToolBehavior(tool, currentAnnotations);
415
- if (!inference) {
416
- return {
417
- ...baseResult,
418
- claudeInference: {
419
- expectedReadOnly: inferredBehavior.expectedReadOnly,
420
- expectedDestructive: inferredBehavior.expectedDestructive,
421
- confidence: 0,
422
- reasoning: "Claude inference unavailable. Using pattern-based analysis.",
423
- suggestedAnnotations: {},
424
- misalignmentDetected: false,
425
- source: "pattern-based",
426
- },
427
- };
428
- }
429
- const updatedIssues = [...baseResult.issues];
430
- const updatedRecommendations = [...baseResult.recommendations];
431
- if (inference.misalignmentDetected && inference.confidence >= 70) {
432
- const misalignmentMsg = inference.misalignmentDetails
433
- ? `Claude analysis (${inference.confidence}% confidence): ${inference.misalignmentDetails}`
434
- : `Claude analysis detected annotation misalignment with ${inference.confidence}% confidence`;
435
- if (!updatedIssues.some((i) => i.includes("Claude analysis"))) {
436
- updatedIssues.push(misalignmentMsg);
437
- }
438
- if (inference.suggestedAnnotations) {
439
- const { readOnlyHint, destructiveHint, idempotentHint } = inference.suggestedAnnotations;
440
- if (readOnlyHint !== undefined &&
441
- readOnlyHint !== baseResult.annotations?.readOnlyHint) {
442
- updatedRecommendations.push(`Claude suggests: Set readOnlyHint=${readOnlyHint} for ${tool.name}`);
443
- }
444
- if (destructiveHint !== undefined &&
445
- destructiveHint !== baseResult.annotations?.destructiveHint) {
446
- updatedRecommendations.push(`Claude suggests: Set destructiveHint=${destructiveHint} for ${tool.name}`);
447
- }
448
- if (idempotentHint !== undefined) {
449
- updatedRecommendations.push(`Claude suggests: Consider adding idempotentHint=${idempotentHint} for ${tool.name}`);
450
- }
451
- }
452
- }
453
- return {
454
- ...baseResult,
455
- issues: updatedIssues,
456
- recommendations: updatedRecommendations,
457
- claudeInference: {
458
- expectedReadOnly: inference.expectedReadOnly,
459
- expectedDestructive: inference.expectedDestructive,
460
- confidence: inference.confidence,
461
- reasoning: inference.reasoning,
462
- suggestedAnnotations: inference.suggestedAnnotations,
463
- misalignmentDetected: inference.misalignmentDetected,
464
- misalignmentDetails: inference.misalignmentDetails,
465
- source: "claude-inferred",
466
- },
467
- };
468
- }
469
- catch (error) {
470
- this.logger.error(`Claude inference failed for ${tool.name}`, { error });
471
- return {
472
- ...baseResult,
473
- claudeInference: {
474
- expectedReadOnly: inferredBehavior.expectedReadOnly,
475
- expectedDestructive: inferredBehavior.expectedDestructive,
476
- confidence: 50,
477
- reasoning: `Claude inference failed, using pattern-based: ${inferredBehavior.reason}`,
478
- suggestedAnnotations: {
479
- readOnlyHint: inferredBehavior.expectedReadOnly,
480
- destructiveHint: inferredBehavior.expectedDestructive,
481
- },
482
- misalignmentDetected: baseResult.issues.some((i) => i.includes("misaligned")),
483
- source: "pattern-based",
484
- },
485
- };
486
- }
487
- }
488
- /**
489
- * Assess a single tool's annotations
490
- */
491
- assessTool(tool) {
492
- const issues = [];
493
- const recommendations = [];
494
- const annotations = this.extractAnnotations(tool);
495
- const hasAnnotations = annotations.readOnlyHint !== undefined ||
496
- annotations.destructiveHint !== undefined;
497
- const inferredBehavior = inferBehavior(tool.name, tool.description, this.compiledPatterns, this.persistenceContext);
498
- let alignmentStatus = "ALIGNED";
499
- if (!hasAnnotations) {
500
- issues.push("Missing tool annotations (readOnlyHint, destructiveHint)");
501
- recommendations.push(`Add annotations to ${tool.name}: readOnlyHint=${inferredBehavior.expectedReadOnly}, destructiveHint=${inferredBehavior.expectedDestructive}`);
502
- alignmentStatus = "UNKNOWN";
503
- }
504
- else {
505
- // Check for high-confidence deception
506
- const deception = detectAnnotationDeception(tool.name, {
507
- readOnlyHint: annotations.readOnlyHint,
508
- destructiveHint: annotations.destructiveHint,
509
- });
510
- if (deception) {
511
- alignmentStatus = "MISALIGNED";
512
- issues.push(`DECEPTIVE ANNOTATION: ${deception.reason}`);
513
- recommendations.push(`CRITICAL: Fix deceptive ${deception.field} for ${tool.name} - tool name contains '${deception.matchedKeyword}' which contradicts the annotation`);
514
- if (deception.field === "readOnlyHint") {
515
- inferredBehavior.expectedReadOnly = false;
516
- inferredBehavior.confidence = "high";
517
- inferredBehavior.isAmbiguous = false;
518
- inferredBehavior.reason = deception.reason;
519
- }
520
- else {
521
- inferredBehavior.expectedDestructive = true;
522
- inferredBehavior.confidence = "high";
523
- inferredBehavior.isAmbiguous = false;
524
- inferredBehavior.reason = deception.reason;
525
- }
526
- }
527
- else {
528
- // Check for misaligned annotations
529
- const readOnlyMismatch = annotations.readOnlyHint !== undefined &&
530
- annotations.readOnlyHint !== inferredBehavior.expectedReadOnly;
531
- const destructiveMismatch = annotations.destructiveHint !== undefined &&
532
- annotations.destructiveHint !== inferredBehavior.expectedDestructive;
533
- if (readOnlyMismatch || destructiveMismatch) {
534
- if (!inferredBehavior.isAmbiguous &&
535
- isActionableConfidence(inferredBehavior.confidence)) {
536
- alignmentStatus = "MISALIGNED";
537
- if (readOnlyMismatch) {
538
- issues.push(`Potentially misaligned readOnlyHint: set to ${annotations.readOnlyHint}, expected ${inferredBehavior.expectedReadOnly} based on tool name pattern`);
539
- recommendations.push(`Verify readOnlyHint for ${tool.name}: currently ${annotations.readOnlyHint}, tool name suggests ${inferredBehavior.expectedReadOnly}`);
540
- }
541
- if (destructiveMismatch) {
542
- issues.push(`Potentially misaligned destructiveHint: set to ${annotations.destructiveHint}, expected ${inferredBehavior.expectedDestructive} based on tool name pattern`);
543
- recommendations.push(`Verify destructiveHint for ${tool.name}: currently ${annotations.destructiveHint}, tool name suggests ${inferredBehavior.expectedDestructive}`);
544
- }
545
- }
546
- }
547
- }
548
- }
549
- // Check for destructive tools without explicit hint
550
- if (inferredBehavior.expectedDestructive &&
551
- isActionableConfidence(inferredBehavior.confidence) &&
552
- annotations.destructiveHint !== true) {
553
- issues.push("Tool appears destructive but destructiveHint is not set to true");
554
- recommendations.push(`Set destructiveHint=true for ${tool.name} - this tool appears to perform destructive operations`);
555
- if (inferredBehavior.confidence === "high") {
556
- alignmentStatus = "MISALIGNED";
557
- }
558
- }
559
- // Scan for description poisoning
560
- const descriptionPoisoning = scanDescriptionForPoisoning(tool);
561
- if (descriptionPoisoning.detected) {
562
- issues.push(`Tool description contains suspicious patterns: ${descriptionPoisoning.patterns.map((p) => p.name).join(", ")}`);
563
- recommendations.push(`Review ${tool.name} description for potential prompt injection or hidden instructions`);
564
- }
565
- // Extract extended metadata (Issue #54)
566
- const extendedMetadata = this.extractExtendedMetadata(tool);
567
- return {
568
- toolName: tool.name,
569
- hasAnnotations,
570
- annotations: hasAnnotations ? annotations : undefined,
571
- annotationSource: annotations.source,
572
- inferredBehavior,
573
- alignmentStatus,
574
- issues,
575
- recommendations,
576
- descriptionPoisoning,
577
- extendedMetadata,
578
- };
579
- }
580
- /**
581
- * Extract annotations from a tool
582
- */
583
- extractAnnotations(tool) {
584
- const toolAny = tool;
585
- // Priority 1: Check annotations object (MCP 2024-11 spec)
586
- if (toolAny.annotations) {
587
- const hasAnnotations = toolAny.annotations.readOnlyHint !== undefined ||
588
- toolAny.annotations.destructiveHint !== undefined;
589
- if (hasAnnotations) {
590
- return {
591
- readOnlyHint: toolAny.annotations.readOnlyHint,
592
- destructiveHint: toolAny.annotations.destructiveHint,
593
- title: toolAny.annotations.title || toolAny.title,
594
- description: tool.description,
595
- idempotentHint: toolAny.annotations.idempotentHint,
596
- openWorldHint: toolAny.annotations.openWorldHint,
597
- source: "mcp",
598
- };
599
- }
600
- }
601
- // Priority 2: Check direct properties
602
- if (toolAny.readOnlyHint !== undefined ||
603
- toolAny.destructiveHint !== undefined) {
604
- return {
605
- readOnlyHint: toolAny.readOnlyHint,
606
- destructiveHint: toolAny.destructiveHint,
607
- title: toolAny.title,
608
- description: tool.description,
609
- idempotentHint: toolAny.idempotentHint,
610
- openWorldHint: toolAny.openWorldHint,
611
- source: "mcp",
612
- };
613
- }
614
- // Priority 3: Check metadata
615
- if (toolAny.metadata) {
616
- const hasMetadataAnnotations = toolAny.metadata.readOnlyHint !== undefined ||
617
- toolAny.metadata.destructiveHint !== undefined;
618
- if (hasMetadataAnnotations) {
619
- return {
620
- readOnlyHint: toolAny.metadata.readOnlyHint,
621
- destructiveHint: toolAny.metadata.destructiveHint,
622
- title: toolAny.metadata.title || toolAny.title,
623
- description: tool.description,
624
- idempotentHint: toolAny.metadata.idempotentHint,
625
- openWorldHint: toolAny.metadata.openWorldHint,
626
- source: "mcp",
627
- };
628
- }
629
- }
630
- return {
631
- title: toolAny.title,
632
- description: tool.description,
633
- source: "none",
634
- };
635
- }
636
- /**
637
- * Extract extended metadata from tool (Issue #54)
638
- * Extracts rate limits, permissions, return schemas, and bulk operation support
639
- */
640
- extractExtendedMetadata(tool) {
641
- const toolAny = tool;
642
- const metadata = {};
643
- // Rate limiting - check annotations, metadata, and direct props
644
- const rateLimit = toolAny.rateLimit ||
645
- toolAny.annotations?.rateLimit ||
646
- toolAny.metadata?.rateLimit;
647
- if (rateLimit && typeof rateLimit === "object") {
648
- metadata.rateLimit = {
649
- windowMs: rateLimit.windowMs,
650
- maxRequests: rateLimit.maxRequests,
651
- requestsPerMinute: rateLimit.requestsPerMinute,
652
- requestsPerSecond: rateLimit.requestsPerSecond,
653
- };
654
- }
655
- // Permissions - check requiredPermission, permissions, scopes
656
- const permissions = toolAny.requiredPermission ||
657
- toolAny.permissions ||
658
- toolAny.annotations?.permissions ||
659
- toolAny.metadata?.requiredPermission ||
660
- toolAny.metadata?.permissions;
661
- if (permissions) {
662
- const required = Array.isArray(permissions) ? permissions : [permissions];
663
- const scopes = toolAny.scopes ||
664
- toolAny.annotations?.scopes ||
665
- toolAny.metadata?.scopes;
666
- metadata.permissions = {
667
- required: required.filter((p) => typeof p === "string"),
668
- scopes: Array.isArray(scopes)
669
- ? scopes.filter((s) => typeof s === "string")
670
- : undefined,
671
- };
672
- }
673
- // Return schema - check outputSchema (MCP 2025-06-18 spec)
674
- if (toolAny.outputSchema) {
675
- metadata.returnSchema = {
676
- hasSchema: true,
677
- schema: toolAny.outputSchema,
678
- };
679
- }
680
- // Bulk operations - check metadata for batch support
681
- const bulkSupport = toolAny.supportsBulkOperations ||
682
- toolAny.annotations?.supportsBulkOperations ||
683
- toolAny.metadata?.supportsBulkOperations;
684
- const maxBatchSize = toolAny.metadata?.maxBatchSize;
685
- if (bulkSupport !== undefined || maxBatchSize !== undefined) {
686
- metadata.bulkOperations = {
687
- supported: !!bulkSupport,
688
- maxBatchSize: typeof maxBatchSize === "number" ? maxBatchSize : undefined,
689
- };
690
- }
691
- return Object.keys(metadata).length > 0 ? metadata : undefined;
692
- }
693
- /**
694
- * Extract parameters from tool input schema
695
- */
696
- extractToolParams(schema) {
697
- if (!schema || typeof schema !== "object")
698
- return [];
699
- const s = schema;
700
- if (!s.properties || typeof s.properties !== "object")
701
- return [];
702
- const required = new Set(Array.isArray(s.required) ? s.required : []);
703
- const properties = s.properties;
704
- return Object.entries(properties).map(([name, prop]) => {
705
- const param = {
706
- name,
707
- type: prop.type || "any",
708
- required: required.has(name),
709
- };
710
- if (prop.description) {
711
- param.description = prop.description;
712
- }
713
- return param;
714
- });
715
- }
716
- /**
717
- * Determine overall status
718
- */
719
- determineAnnotationStatus(results, totalTools) {
720
- if (totalTools === 0)
721
- return "PASS";
722
- const annotatedCount = results.filter((r) => r.hasAnnotations).length;
723
- const poisonedCount = results.filter((r) => r.descriptionPoisoning?.detected === true).length;
724
- if (poisonedCount > 0)
725
- return "FAIL";
726
- const misalignedCount = results.filter((r) => r.alignmentStatus === "MISALIGNED").length;
727
- const destructiveWithoutHint = results.filter((r) => r.inferredBehavior?.expectedDestructive === true &&
728
- r.inferredBehavior?.confidence === "high" &&
729
- r.annotations?.destructiveHint !== true).length;
730
- if (destructiveWithoutHint > 0)
731
- return "FAIL";
732
- if (misalignedCount > 0)
733
- return "FAIL";
734
- if (annotatedCount === totalTools)
735
- return "PASS";
736
- const annotationRate = annotatedCount / totalTools;
737
- if (annotationRate >= 0.8)
738
- return "NEED_MORE_INFO";
739
- if (annotationRate < 0.5)
740
- return "FAIL";
741
- return "NEED_MORE_INFO";
742
- }
743
- /**
744
- * Calculate metrics and alignment breakdown
745
- */
746
- calculateMetrics(results, totalTools) {
747
- const alignmentBreakdown = {
748
- aligned: results.filter((r) => r.alignmentStatus === "ALIGNED").length,
749
- misaligned: results.filter((r) => r.alignmentStatus === "MISALIGNED")
750
- .length,
751
- reviewRecommended: results.filter((r) => r.alignmentStatus === "REVIEW_RECOMMENDED").length,
752
- unknown: results.filter((r) => r.alignmentStatus === "UNKNOWN").length,
753
- };
754
- const annotatedCount = results.filter((r) => r.hasAnnotations).length;
755
- const metrics = {
756
- coverage: totalTools > 0 ? (annotatedCount / totalTools) * 100 : 100,
757
- consistency: totalTools > 0
758
- ? ((totalTools - alignmentBreakdown.misaligned) / totalTools) * 100
759
- : 100,
760
- correctness: annotatedCount > 0
761
- ? (alignmentBreakdown.aligned / annotatedCount) * 100
762
- : 0,
763
- reviewRequired: alignmentBreakdown.reviewRecommended,
764
- };
765
- return { metrics, alignmentBreakdown };
766
- }
767
- /**
768
- * Generate explanation
769
- */
770
- generateExplanation(annotatedCount, missingCount, misalignedCount, totalTools) {
771
- const parts = [];
772
- if (totalTools === 0) {
773
- return "No tools found to assess for annotations.";
774
- }
775
- parts.push(`Tool annotation coverage: ${annotatedCount}/${totalTools} tools have annotations.`);
776
- if (missingCount > 0) {
777
- parts.push(`${missingCount} tool(s) are missing required annotations (readOnlyHint, destructiveHint).`);
778
- }
779
- if (misalignedCount > 0) {
780
- parts.push(`${misalignedCount} tool(s) have potentially misaligned annotations based on naming patterns.`);
781
- }
782
- if (missingCount === 0 && misalignedCount === 0) {
783
- parts.push("All tools are properly annotated.");
784
- }
785
- return parts.join(" ");
786
- }
787
- /**
788
- * Generate enhanced explanation with Claude analysis
789
- */
790
- generateEnhancedExplanation(annotatedCount, missingCount, highConfidenceMisalignments, totalTools) {
791
- const parts = [];
792
- if (totalTools === 0) {
793
- return "No tools found to assess for annotations.";
794
- }
795
- parts.push(`Tool annotation coverage: ${annotatedCount}/${totalTools} tools have annotations.`);
796
- if (missingCount > 0) {
797
- parts.push(`${missingCount} tool(s) are missing required annotations (readOnlyHint, destructiveHint).`);
798
- }
799
- if (highConfidenceMisalignments > 0) {
800
- parts.push(`Claude analysis identified ${highConfidenceMisalignments} high-confidence annotation misalignment(s).`);
801
- }
802
- parts.push("Analysis enhanced with Claude semantic behavior inference.");
803
- return parts.join(" ");
804
- }
805
- /**
806
- * Generate recommendations
807
- */
808
- generateRecommendations(results) {
809
- const recommendations = [];
810
- const allRecs = new Set();
811
- for (const result of results) {
812
- for (const rec of result.recommendations) {
813
- allRecs.add(rec);
814
- }
815
- }
816
- const destructiveRecs = Array.from(allRecs).filter((r) => r.includes("destructive"));
817
- const otherRecs = Array.from(allRecs).filter((r) => !r.includes("destructive"));
818
- if (destructiveRecs.length > 0) {
819
- recommendations.push("PRIORITY: The following tools appear to perform destructive operations but lack proper destructiveHint annotation:");
820
- recommendations.push(...destructiveRecs.slice(0, 5));
821
- }
822
- if (otherRecs.length > 0) {
823
- recommendations.push(...otherRecs.slice(0, 5));
824
- }
825
- if (recommendations.length === 0) {
826
- recommendations.push("All tools have proper annotations. No action required.");
827
- }
828
- else {
829
- recommendations.push("Reference: MCP Directory Policy #17 requires tools to have readOnlyHint and destructiveHint annotations.");
830
- }
831
- return recommendations;
832
- }
833
- /**
834
- * Generate enhanced recommendations with Claude analysis
835
- */
836
- generateEnhancedRecommendations(results) {
837
- const recommendations = [];
838
- const claudeMisalignments = results.filter((r) => r.claudeInference &&
839
- r.claudeInference.source === "claude-inferred" &&
840
- r.claudeInference.confidence >= 70 &&
841
- r.claudeInference.misalignmentDetected);
842
- if (claudeMisalignments.length > 0) {
843
- recommendations.push("HIGH CONFIDENCE: Claude analysis identified the following annotation issues:");
844
- for (const result of claudeMisalignments.slice(0, 5)) {
845
- if (result.claudeInference) {
846
- recommendations.push(` - ${result.toolName}: ${result.claudeInference.reasoning}`);
847
- }
848
- }
849
- }
850
- const claudeSuggestions = results
851
- .filter((r) => r.claudeInference &&
852
- r.claudeInference.source === "claude-inferred" &&
853
- r.claudeInference.confidence >= 60)
854
- .flatMap((r) => r.recommendations.filter((rec) => rec.includes("Claude")));
855
- if (claudeSuggestions.length > 0) {
856
- recommendations.push(...claudeSuggestions.slice(0, 5));
857
- }
858
- const patternRecs = new Set();
859
- for (const result of results) {
860
- for (const rec of result.recommendations) {
861
- if (!rec.includes("Claude")) {
862
- patternRecs.add(rec);
863
- }
864
- }
865
- }
866
- const destructiveRecs = Array.from(patternRecs).filter((r) => r.includes("destructive"));
867
- const otherRecs = Array.from(patternRecs).filter((r) => !r.includes("destructive"));
868
- if (destructiveRecs.length > 0) {
869
- recommendations.push("PRIORITY: Potential destructive tools without proper hints:");
870
- recommendations.push(...destructiveRecs.slice(0, 3));
871
- }
872
- if (otherRecs.length > 0 && recommendations.length < 10) {
873
- recommendations.push(...otherRecs.slice(0, 3));
874
- }
875
- if (recommendations.length === 0) {
876
- recommendations.push("All tools have proper annotations. No action required.");
877
- }
878
- else {
879
- recommendations.push("Reference: MCP Directory Policy #17 requires tools to have readOnlyHint and destructiveHint annotations.");
880
- }
881
- return recommendations;
882
- }
883
290
  }