@bryan-thompson/inspector-assessment-client 1.28.0 → 1.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/{OAuthCallback-JnKCxulS.js → OAuthCallback-9Gbb39Ii.js} +1 -1
- package/dist/assets/{OAuthDebugCallback-C2zSlEIQ.js → OAuthDebugCallback-B76J2MBn.js} +1 -1
- package/dist/assets/{index-C3xZdIFQ.js → index-CHTOR9VI.js} +4 -4
- package/dist/index.html +1 -1
- package/lib/lib/assessment/resultTypes.d.ts +8 -0
- package/lib/lib/assessment/resultTypes.d.ts.map +1 -1
- package/lib/lib/securityPatterns.d.ts +1 -1
- package/lib/lib/securityPatterns.js +1 -1
- package/lib/services/assessment/modules/TemporalAssessor.d.ts +5 -129
- package/lib/services/assessment/modules/TemporalAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/TemporalAssessor.js +18 -554
- package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts +10 -70
- package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/ToolAnnotationAssessor.js +32 -625
- package/lib/services/assessment/modules/annotations/AlignmentChecker.d.ts +65 -0
- package/lib/services/assessment/modules/annotations/AlignmentChecker.d.ts.map +1 -0
- package/lib/services/assessment/modules/annotations/AlignmentChecker.js +289 -0
- package/lib/services/assessment/modules/annotations/ClaudeIntegration.d.ts +22 -0
- package/lib/services/assessment/modules/annotations/ClaudeIntegration.d.ts.map +1 -0
- package/lib/services/assessment/modules/annotations/ClaudeIntegration.js +139 -0
- package/lib/services/assessment/modules/annotations/EventEmitter.d.ts +20 -0
- package/lib/services/assessment/modules/annotations/EventEmitter.d.ts.map +1 -0
- package/lib/services/assessment/modules/annotations/EventEmitter.js +100 -0
- package/lib/services/assessment/modules/annotations/ExplanationGenerator.d.ts +25 -0
- package/lib/services/assessment/modules/annotations/ExplanationGenerator.d.ts.map +1 -0
- package/lib/services/assessment/modules/annotations/ExplanationGenerator.js +122 -0
- package/lib/services/assessment/modules/annotations/index.d.ts +5 -0
- package/lib/services/assessment/modules/annotations/index.d.ts.map +1 -1
- package/lib/services/assessment/modules/annotations/index.js +8 -0
- package/lib/services/assessment/modules/annotations/types.d.ts +33 -0
- package/lib/services/assessment/modules/annotations/types.d.ts.map +1 -0
- package/lib/services/assessment/modules/annotations/types.js +7 -0
- package/lib/services/assessment/modules/securityTests/SafeResponseDetector.d.ts +3 -0
- package/lib/services/assessment/modules/securityTests/SafeResponseDetector.d.ts.map +1 -1
- package/lib/services/assessment/modules/securityTests/SafeResponseDetector.js +14 -1
- package/lib/services/assessment/modules/securityTests/SecurityPatternLibrary.d.ts +29 -0
- package/lib/services/assessment/modules/securityTests/SecurityPatternLibrary.d.ts.map +1 -1
- package/lib/services/assessment/modules/securityTests/SecurityPatternLibrary.js +71 -0
- package/lib/services/assessment/modules/securityTests/SecurityPayloadTester.d.ts.map +1 -1
- package/lib/services/assessment/modules/securityTests/SecurityPayloadTester.js +24 -0
- package/lib/services/assessment/modules/securityTests/SecurityResponseAnalyzer.d.ts +66 -0
- package/lib/services/assessment/modules/securityTests/SecurityResponseAnalyzer.d.ts.map +1 -1
- package/lib/services/assessment/modules/securityTests/SecurityResponseAnalyzer.js +228 -3
- package/lib/services/assessment/modules/temporal/MutationDetector.d.ts +75 -0
- package/lib/services/assessment/modules/temporal/MutationDetector.d.ts.map +1 -0
- package/lib/services/assessment/modules/temporal/MutationDetector.js +147 -0
- package/lib/services/assessment/modules/temporal/VarianceClassifier.d.ts +112 -0
- package/lib/services/assessment/modules/temporal/VarianceClassifier.d.ts.map +1 -0
- package/lib/services/assessment/modules/temporal/VarianceClassifier.js +427 -0
- package/lib/services/assessment/modules/temporal/index.d.ts +10 -0
- package/lib/services/assessment/modules/temporal/index.d.ts.map +1 -0
- package/lib/services/assessment/modules/temporal/index.js +9 -0
- package/package.json +1 -1
|
@@ -15,11 +15,25 @@
|
|
|
15
15
|
* - BehaviorInference: Infers expected behavior from tool names
|
|
16
16
|
* - AnnotationDeceptionDetector: Detects keyword-based misalignments
|
|
17
17
|
* - DescriptionPoisoningDetector: Detects malicious content in descriptions
|
|
18
|
+
*
|
|
19
|
+
* Refactored in Issue #105 to delegate to extracted helper modules:
|
|
20
|
+
* - AlignmentChecker: Tool alignment assessment and metrics
|
|
21
|
+
* - ExplanationGenerator: Explanation and recommendation generation
|
|
22
|
+
* - EventEmitter: Progress event emission
|
|
23
|
+
* - ClaudeIntegration: Claude-enhanced behavior inference
|
|
18
24
|
*/
|
|
19
25
|
import { BaseAssessor } from "./BaseAssessor.js";
|
|
20
26
|
import { getDefaultCompiledPatterns, detectPersistenceModel, } from "../config/annotationPatterns.js";
|
|
21
|
-
// Import from extracted modules
|
|
22
|
-
import {
|
|
27
|
+
// Import from extracted modules (Issue #105)
|
|
28
|
+
import { detectArchitecture,
|
|
29
|
+
// Alignment checking
|
|
30
|
+
assessSingleTool, determineAnnotationStatus, calculateMetrics,
|
|
31
|
+
// Explanation generation
|
|
32
|
+
generateExplanation, generateEnhancedExplanation, generateRecommendations, generateEnhancedRecommendations,
|
|
33
|
+
// Event emission
|
|
34
|
+
emitAnnotationEvents,
|
|
35
|
+
// Claude integration
|
|
36
|
+
enhanceWithClaudeInference, createPatternBasedInference, } from "./annotations/index.js";
|
|
23
37
|
export class ToolAnnotationAssessor extends BaseAssessor {
|
|
24
38
|
claudeBridge;
|
|
25
39
|
compiledPatterns;
|
|
@@ -112,10 +126,11 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
112
126
|
}
|
|
113
127
|
for (const tool of context.tools) {
|
|
114
128
|
this.testCount++;
|
|
115
|
-
|
|
129
|
+
// Use extracted assessSingleTool function
|
|
130
|
+
const result = assessSingleTool(tool, this.compiledPatterns, this.persistenceContext);
|
|
116
131
|
// Enhance with Claude inference if available
|
|
117
132
|
if (useClaudeInference) {
|
|
118
|
-
const enhancedResult = await
|
|
133
|
+
const enhancedResult = await enhanceWithClaudeInference(tool, result, this.claudeBridge, this.logger);
|
|
119
134
|
toolResults.push(enhancedResult);
|
|
120
135
|
if (enhancedResult.claudeInference &&
|
|
121
136
|
enhancedResult.claudeInference.confidence >= 70 &&
|
|
@@ -127,26 +142,9 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
127
142
|
}
|
|
128
143
|
}
|
|
129
144
|
else {
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
reason: "No behavior inference available",
|
|
134
|
-
};
|
|
135
|
-
toolResults.push({
|
|
136
|
-
...result,
|
|
137
|
-
claudeInference: {
|
|
138
|
-
expectedReadOnly: inferredBehavior.expectedReadOnly,
|
|
139
|
-
expectedDestructive: inferredBehavior.expectedDestructive,
|
|
140
|
-
confidence: 50,
|
|
141
|
-
reasoning: inferredBehavior.reason,
|
|
142
|
-
suggestedAnnotations: {
|
|
143
|
-
readOnlyHint: inferredBehavior.expectedReadOnly,
|
|
144
|
-
destructiveHint: inferredBehavior.expectedDestructive,
|
|
145
|
-
},
|
|
146
|
-
misalignmentDetected: result.issues.some((i) => i.includes("misaligned")),
|
|
147
|
-
source: "pattern-based",
|
|
148
|
-
},
|
|
149
|
-
});
|
|
145
|
+
// Use pattern-based inference fallback
|
|
146
|
+
const enhancedResult = createPatternBasedInference(result);
|
|
147
|
+
toolResults.push(enhancedResult);
|
|
150
148
|
if (result.issues.some((i) => i.includes("misaligned"))) {
|
|
151
149
|
misalignedAnnotationsCount++;
|
|
152
150
|
}
|
|
@@ -222,13 +220,14 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
222
220
|
});
|
|
223
221
|
}
|
|
224
222
|
}
|
|
225
|
-
// Emit annotation events
|
|
226
|
-
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
const
|
|
230
|
-
const
|
|
231
|
-
const
|
|
223
|
+
// Emit annotation events using extracted function
|
|
224
|
+
emitAnnotationEvents(context.onProgress, tool, latestResult);
|
|
225
|
+
}
|
|
226
|
+
// Use extracted functions for status, explanation, and recommendations
|
|
227
|
+
const status = determineAnnotationStatus(toolResults, context.tools.length);
|
|
228
|
+
const explanation = generateExplanation(annotatedCount, missingAnnotationsCount, misalignedAnnotationsCount, context.tools.length);
|
|
229
|
+
const recommendations = generateRecommendations(toolResults);
|
|
230
|
+
const { metrics, alignmentBreakdown } = calculateMetrics(toolResults, context.tools.length);
|
|
232
231
|
this.logger.info(`Assessment complete: ${annotatedCount}/${context.tools.length} tools annotated, ${misalignedAnnotationsCount} misaligned, ${alignmentBreakdown.reviewRecommended} need review, ${poisonedDescriptionsCount} poisoned`);
|
|
233
232
|
if (useClaudeInference) {
|
|
234
233
|
const highConfidenceMisalignments = toolResults.filter((r) => r.claudeInference &&
|
|
@@ -240,8 +239,8 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
240
239
|
missingAnnotationsCount,
|
|
241
240
|
misalignedAnnotationsCount,
|
|
242
241
|
status,
|
|
243
|
-
explanation:
|
|
244
|
-
recommendations:
|
|
242
|
+
explanation: generateEnhancedExplanation(annotatedCount, missingAnnotationsCount, highConfidenceMisalignments.length, context.tools.length),
|
|
243
|
+
recommendations: generateEnhancedRecommendations(toolResults),
|
|
245
244
|
metrics,
|
|
246
245
|
alignmentBreakdown,
|
|
247
246
|
annotationSources: annotationSourceCounts,
|
|
@@ -288,596 +287,4 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
288
287
|
},
|
|
289
288
|
};
|
|
290
289
|
}
|
|
291
|
-
/**
|
|
292
|
-
* Emit annotation-related progress events
|
|
293
|
-
*/
|
|
294
|
-
emitAnnotationEvents(context, tool, result) {
|
|
295
|
-
if (!context.onProgress || !result.inferredBehavior)
|
|
296
|
-
return;
|
|
297
|
-
const annotations = result.annotations;
|
|
298
|
-
const inferred = result.inferredBehavior;
|
|
299
|
-
const confidence = result.claudeInference?.confidence ?? 50;
|
|
300
|
-
const toolParams = this.extractToolParams(tool.inputSchema);
|
|
301
|
-
const toolAnnotations = this.extractAnnotations(tool);
|
|
302
|
-
// Emit missing annotation event
|
|
303
|
-
if (!result.hasAnnotations) {
|
|
304
|
-
context.onProgress({
|
|
305
|
-
type: "annotation_missing",
|
|
306
|
-
tool: tool.name,
|
|
307
|
-
title: toolAnnotations.title,
|
|
308
|
-
description: tool.description,
|
|
309
|
-
parameters: toolParams,
|
|
310
|
-
inferredBehavior: {
|
|
311
|
-
expectedReadOnly: inferred.expectedReadOnly,
|
|
312
|
-
expectedDestructive: inferred.expectedDestructive,
|
|
313
|
-
reason: inferred.reason,
|
|
314
|
-
},
|
|
315
|
-
});
|
|
316
|
-
return;
|
|
317
|
-
}
|
|
318
|
-
// Emit aligned event
|
|
319
|
-
if (result.alignmentStatus === "ALIGNED") {
|
|
320
|
-
context.onProgress({
|
|
321
|
-
type: "annotation_aligned",
|
|
322
|
-
tool: tool.name,
|
|
323
|
-
confidence: inferred.confidence ?? "medium",
|
|
324
|
-
annotations: {
|
|
325
|
-
readOnlyHint: annotations?.readOnlyHint,
|
|
326
|
-
destructiveHint: annotations?.destructiveHint,
|
|
327
|
-
openWorldHint: annotations?.openWorldHint,
|
|
328
|
-
idempotentHint: annotations?.idempotentHint,
|
|
329
|
-
},
|
|
330
|
-
});
|
|
331
|
-
return;
|
|
332
|
-
}
|
|
333
|
-
// Check readOnlyHint mismatch
|
|
334
|
-
if (annotations?.readOnlyHint !== undefined &&
|
|
335
|
-
annotations.readOnlyHint !== inferred.expectedReadOnly) {
|
|
336
|
-
this.emitMismatchEvent(context, tool, toolParams, toolAnnotations, "readOnlyHint", annotations.readOnlyHint, inferred.expectedReadOnly, confidence, inferred, result.alignmentStatus);
|
|
337
|
-
}
|
|
338
|
-
// Check destructiveHint mismatch
|
|
339
|
-
if (annotations?.destructiveHint !== undefined &&
|
|
340
|
-
annotations.destructiveHint !== inferred.expectedDestructive) {
|
|
341
|
-
this.emitMismatchEvent(context, tool, toolParams, toolAnnotations, "destructiveHint", annotations.destructiveHint, inferred.expectedDestructive, confidence, inferred, result.alignmentStatus);
|
|
342
|
-
}
|
|
343
|
-
}
|
|
344
|
-
/**
|
|
345
|
-
* Emit mismatch event (misaligned or review_recommended)
|
|
346
|
-
*/
|
|
347
|
-
emitMismatchEvent(context, tool, toolParams, toolAnnotations, field, actual, expected, confidence, inferred, alignmentStatus) {
|
|
348
|
-
if (!context.onProgress)
|
|
349
|
-
return;
|
|
350
|
-
if (alignmentStatus === "REVIEW_RECOMMENDED") {
|
|
351
|
-
context.onProgress({
|
|
352
|
-
type: "annotation_review_recommended",
|
|
353
|
-
tool: tool.name,
|
|
354
|
-
title: toolAnnotations.title,
|
|
355
|
-
description: tool.description,
|
|
356
|
-
parameters: toolParams,
|
|
357
|
-
field,
|
|
358
|
-
actual,
|
|
359
|
-
inferred: expected,
|
|
360
|
-
confidence: inferred.confidence,
|
|
361
|
-
isAmbiguous: inferred.isAmbiguous,
|
|
362
|
-
reason: inferred.reason,
|
|
363
|
-
});
|
|
364
|
-
}
|
|
365
|
-
else if (!inferred.isAmbiguous &&
|
|
366
|
-
isActionableConfidence(inferred.confidence)) {
|
|
367
|
-
context.onProgress({
|
|
368
|
-
type: "annotation_misaligned",
|
|
369
|
-
tool: tool.name,
|
|
370
|
-
title: toolAnnotations.title,
|
|
371
|
-
description: tool.description,
|
|
372
|
-
parameters: toolParams,
|
|
373
|
-
field,
|
|
374
|
-
actual,
|
|
375
|
-
expected,
|
|
376
|
-
confidence,
|
|
377
|
-
reason: `Tool has ${field}=${actual}, but ${inferred.reason}`,
|
|
378
|
-
});
|
|
379
|
-
}
|
|
380
|
-
}
|
|
381
|
-
/**
|
|
382
|
-
* Enhance tool assessment with Claude inference
|
|
383
|
-
*/
|
|
384
|
-
async enhanceWithClaudeInference(tool, baseResult) {
|
|
385
|
-
const inferredBehavior = baseResult.inferredBehavior ?? {
|
|
386
|
-
expectedReadOnly: false,
|
|
387
|
-
expectedDestructive: false,
|
|
388
|
-
reason: "No behavior inference available",
|
|
389
|
-
};
|
|
390
|
-
if (!this.claudeBridge) {
|
|
391
|
-
return {
|
|
392
|
-
...baseResult,
|
|
393
|
-
claudeInference: {
|
|
394
|
-
expectedReadOnly: inferredBehavior.expectedReadOnly,
|
|
395
|
-
expectedDestructive: inferredBehavior.expectedDestructive,
|
|
396
|
-
confidence: 50,
|
|
397
|
-
reasoning: inferredBehavior.reason,
|
|
398
|
-
suggestedAnnotations: {
|
|
399
|
-
readOnlyHint: inferredBehavior.expectedReadOnly,
|
|
400
|
-
destructiveHint: inferredBehavior.expectedDestructive,
|
|
401
|
-
},
|
|
402
|
-
misalignmentDetected: baseResult.issues.some((i) => i.includes("misaligned")),
|
|
403
|
-
source: "pattern-based",
|
|
404
|
-
},
|
|
405
|
-
};
|
|
406
|
-
}
|
|
407
|
-
try {
|
|
408
|
-
const currentAnnotations = baseResult.annotations
|
|
409
|
-
? {
|
|
410
|
-
readOnlyHint: baseResult.annotations.readOnlyHint,
|
|
411
|
-
destructiveHint: baseResult.annotations.destructiveHint,
|
|
412
|
-
}
|
|
413
|
-
: undefined;
|
|
414
|
-
const inference = await this.claudeBridge.inferToolBehavior(tool, currentAnnotations);
|
|
415
|
-
if (!inference) {
|
|
416
|
-
return {
|
|
417
|
-
...baseResult,
|
|
418
|
-
claudeInference: {
|
|
419
|
-
expectedReadOnly: inferredBehavior.expectedReadOnly,
|
|
420
|
-
expectedDestructive: inferredBehavior.expectedDestructive,
|
|
421
|
-
confidence: 0,
|
|
422
|
-
reasoning: "Claude inference unavailable. Using pattern-based analysis.",
|
|
423
|
-
suggestedAnnotations: {},
|
|
424
|
-
misalignmentDetected: false,
|
|
425
|
-
source: "pattern-based",
|
|
426
|
-
},
|
|
427
|
-
};
|
|
428
|
-
}
|
|
429
|
-
const updatedIssues = [...baseResult.issues];
|
|
430
|
-
const updatedRecommendations = [...baseResult.recommendations];
|
|
431
|
-
if (inference.misalignmentDetected && inference.confidence >= 70) {
|
|
432
|
-
const misalignmentMsg = inference.misalignmentDetails
|
|
433
|
-
? `Claude analysis (${inference.confidence}% confidence): ${inference.misalignmentDetails}`
|
|
434
|
-
: `Claude analysis detected annotation misalignment with ${inference.confidence}% confidence`;
|
|
435
|
-
if (!updatedIssues.some((i) => i.includes("Claude analysis"))) {
|
|
436
|
-
updatedIssues.push(misalignmentMsg);
|
|
437
|
-
}
|
|
438
|
-
if (inference.suggestedAnnotations) {
|
|
439
|
-
const { readOnlyHint, destructiveHint, idempotentHint } = inference.suggestedAnnotations;
|
|
440
|
-
if (readOnlyHint !== undefined &&
|
|
441
|
-
readOnlyHint !== baseResult.annotations?.readOnlyHint) {
|
|
442
|
-
updatedRecommendations.push(`Claude suggests: Set readOnlyHint=${readOnlyHint} for ${tool.name}`);
|
|
443
|
-
}
|
|
444
|
-
if (destructiveHint !== undefined &&
|
|
445
|
-
destructiveHint !== baseResult.annotations?.destructiveHint) {
|
|
446
|
-
updatedRecommendations.push(`Claude suggests: Set destructiveHint=${destructiveHint} for ${tool.name}`);
|
|
447
|
-
}
|
|
448
|
-
if (idempotentHint !== undefined) {
|
|
449
|
-
updatedRecommendations.push(`Claude suggests: Consider adding idempotentHint=${idempotentHint} for ${tool.name}`);
|
|
450
|
-
}
|
|
451
|
-
}
|
|
452
|
-
}
|
|
453
|
-
return {
|
|
454
|
-
...baseResult,
|
|
455
|
-
issues: updatedIssues,
|
|
456
|
-
recommendations: updatedRecommendations,
|
|
457
|
-
claudeInference: {
|
|
458
|
-
expectedReadOnly: inference.expectedReadOnly,
|
|
459
|
-
expectedDestructive: inference.expectedDestructive,
|
|
460
|
-
confidence: inference.confidence,
|
|
461
|
-
reasoning: inference.reasoning,
|
|
462
|
-
suggestedAnnotations: inference.suggestedAnnotations,
|
|
463
|
-
misalignmentDetected: inference.misalignmentDetected,
|
|
464
|
-
misalignmentDetails: inference.misalignmentDetails,
|
|
465
|
-
source: "claude-inferred",
|
|
466
|
-
},
|
|
467
|
-
};
|
|
468
|
-
}
|
|
469
|
-
catch (error) {
|
|
470
|
-
this.logger.error(`Claude inference failed for ${tool.name}`, { error });
|
|
471
|
-
return {
|
|
472
|
-
...baseResult,
|
|
473
|
-
claudeInference: {
|
|
474
|
-
expectedReadOnly: inferredBehavior.expectedReadOnly,
|
|
475
|
-
expectedDestructive: inferredBehavior.expectedDestructive,
|
|
476
|
-
confidence: 50,
|
|
477
|
-
reasoning: `Claude inference failed, using pattern-based: ${inferredBehavior.reason}`,
|
|
478
|
-
suggestedAnnotations: {
|
|
479
|
-
readOnlyHint: inferredBehavior.expectedReadOnly,
|
|
480
|
-
destructiveHint: inferredBehavior.expectedDestructive,
|
|
481
|
-
},
|
|
482
|
-
misalignmentDetected: baseResult.issues.some((i) => i.includes("misaligned")),
|
|
483
|
-
source: "pattern-based",
|
|
484
|
-
},
|
|
485
|
-
};
|
|
486
|
-
}
|
|
487
|
-
}
|
|
488
|
-
/**
|
|
489
|
-
* Assess a single tool's annotations
|
|
490
|
-
*/
|
|
491
|
-
assessTool(tool) {
|
|
492
|
-
const issues = [];
|
|
493
|
-
const recommendations = [];
|
|
494
|
-
const annotations = this.extractAnnotations(tool);
|
|
495
|
-
const hasAnnotations = annotations.readOnlyHint !== undefined ||
|
|
496
|
-
annotations.destructiveHint !== undefined;
|
|
497
|
-
const inferredBehavior = inferBehavior(tool.name, tool.description, this.compiledPatterns, this.persistenceContext);
|
|
498
|
-
let alignmentStatus = "ALIGNED";
|
|
499
|
-
if (!hasAnnotations) {
|
|
500
|
-
issues.push("Missing tool annotations (readOnlyHint, destructiveHint)");
|
|
501
|
-
recommendations.push(`Add annotations to ${tool.name}: readOnlyHint=${inferredBehavior.expectedReadOnly}, destructiveHint=${inferredBehavior.expectedDestructive}`);
|
|
502
|
-
alignmentStatus = "UNKNOWN";
|
|
503
|
-
}
|
|
504
|
-
else {
|
|
505
|
-
// Check for high-confidence deception
|
|
506
|
-
const deception = detectAnnotationDeception(tool.name, {
|
|
507
|
-
readOnlyHint: annotations.readOnlyHint,
|
|
508
|
-
destructiveHint: annotations.destructiveHint,
|
|
509
|
-
});
|
|
510
|
-
if (deception) {
|
|
511
|
-
alignmentStatus = "MISALIGNED";
|
|
512
|
-
issues.push(`DECEPTIVE ANNOTATION: ${deception.reason}`);
|
|
513
|
-
recommendations.push(`CRITICAL: Fix deceptive ${deception.field} for ${tool.name} - tool name contains '${deception.matchedKeyword}' which contradicts the annotation`);
|
|
514
|
-
if (deception.field === "readOnlyHint") {
|
|
515
|
-
inferredBehavior.expectedReadOnly = false;
|
|
516
|
-
inferredBehavior.confidence = "high";
|
|
517
|
-
inferredBehavior.isAmbiguous = false;
|
|
518
|
-
inferredBehavior.reason = deception.reason;
|
|
519
|
-
}
|
|
520
|
-
else {
|
|
521
|
-
inferredBehavior.expectedDestructive = true;
|
|
522
|
-
inferredBehavior.confidence = "high";
|
|
523
|
-
inferredBehavior.isAmbiguous = false;
|
|
524
|
-
inferredBehavior.reason = deception.reason;
|
|
525
|
-
}
|
|
526
|
-
}
|
|
527
|
-
else {
|
|
528
|
-
// Check for misaligned annotations
|
|
529
|
-
const readOnlyMismatch = annotations.readOnlyHint !== undefined &&
|
|
530
|
-
annotations.readOnlyHint !== inferredBehavior.expectedReadOnly;
|
|
531
|
-
const destructiveMismatch = annotations.destructiveHint !== undefined &&
|
|
532
|
-
annotations.destructiveHint !== inferredBehavior.expectedDestructive;
|
|
533
|
-
if (readOnlyMismatch || destructiveMismatch) {
|
|
534
|
-
if (!inferredBehavior.isAmbiguous &&
|
|
535
|
-
isActionableConfidence(inferredBehavior.confidence)) {
|
|
536
|
-
alignmentStatus = "MISALIGNED";
|
|
537
|
-
if (readOnlyMismatch) {
|
|
538
|
-
issues.push(`Potentially misaligned readOnlyHint: set to ${annotations.readOnlyHint}, expected ${inferredBehavior.expectedReadOnly} based on tool name pattern`);
|
|
539
|
-
recommendations.push(`Verify readOnlyHint for ${tool.name}: currently ${annotations.readOnlyHint}, tool name suggests ${inferredBehavior.expectedReadOnly}`);
|
|
540
|
-
}
|
|
541
|
-
if (destructiveMismatch) {
|
|
542
|
-
issues.push(`Potentially misaligned destructiveHint: set to ${annotations.destructiveHint}, expected ${inferredBehavior.expectedDestructive} based on tool name pattern`);
|
|
543
|
-
recommendations.push(`Verify destructiveHint for ${tool.name}: currently ${annotations.destructiveHint}, tool name suggests ${inferredBehavior.expectedDestructive}`);
|
|
544
|
-
}
|
|
545
|
-
}
|
|
546
|
-
}
|
|
547
|
-
}
|
|
548
|
-
}
|
|
549
|
-
// Check for destructive tools without explicit hint
|
|
550
|
-
if (inferredBehavior.expectedDestructive &&
|
|
551
|
-
isActionableConfidence(inferredBehavior.confidence) &&
|
|
552
|
-
annotations.destructiveHint !== true) {
|
|
553
|
-
issues.push("Tool appears destructive but destructiveHint is not set to true");
|
|
554
|
-
recommendations.push(`Set destructiveHint=true for ${tool.name} - this tool appears to perform destructive operations`);
|
|
555
|
-
if (inferredBehavior.confidence === "high") {
|
|
556
|
-
alignmentStatus = "MISALIGNED";
|
|
557
|
-
}
|
|
558
|
-
}
|
|
559
|
-
// Scan for description poisoning
|
|
560
|
-
const descriptionPoisoning = scanDescriptionForPoisoning(tool);
|
|
561
|
-
if (descriptionPoisoning.detected) {
|
|
562
|
-
issues.push(`Tool description contains suspicious patterns: ${descriptionPoisoning.patterns.map((p) => p.name).join(", ")}`);
|
|
563
|
-
recommendations.push(`Review ${tool.name} description for potential prompt injection or hidden instructions`);
|
|
564
|
-
}
|
|
565
|
-
// Extract extended metadata (Issue #54)
|
|
566
|
-
const extendedMetadata = this.extractExtendedMetadata(tool);
|
|
567
|
-
return {
|
|
568
|
-
toolName: tool.name,
|
|
569
|
-
hasAnnotations,
|
|
570
|
-
annotations: hasAnnotations ? annotations : undefined,
|
|
571
|
-
annotationSource: annotations.source,
|
|
572
|
-
inferredBehavior,
|
|
573
|
-
alignmentStatus,
|
|
574
|
-
issues,
|
|
575
|
-
recommendations,
|
|
576
|
-
descriptionPoisoning,
|
|
577
|
-
extendedMetadata,
|
|
578
|
-
};
|
|
579
|
-
}
|
|
580
|
-
/**
|
|
581
|
-
* Extract annotations from a tool
|
|
582
|
-
*/
|
|
583
|
-
extractAnnotations(tool) {
|
|
584
|
-
const toolAny = tool;
|
|
585
|
-
// Priority 1: Check annotations object (MCP 2024-11 spec)
|
|
586
|
-
if (toolAny.annotations) {
|
|
587
|
-
const hasAnnotations = toolAny.annotations.readOnlyHint !== undefined ||
|
|
588
|
-
toolAny.annotations.destructiveHint !== undefined;
|
|
589
|
-
if (hasAnnotations) {
|
|
590
|
-
return {
|
|
591
|
-
readOnlyHint: toolAny.annotations.readOnlyHint,
|
|
592
|
-
destructiveHint: toolAny.annotations.destructiveHint,
|
|
593
|
-
title: toolAny.annotations.title || toolAny.title,
|
|
594
|
-
description: tool.description,
|
|
595
|
-
idempotentHint: toolAny.annotations.idempotentHint,
|
|
596
|
-
openWorldHint: toolAny.annotations.openWorldHint,
|
|
597
|
-
source: "mcp",
|
|
598
|
-
};
|
|
599
|
-
}
|
|
600
|
-
}
|
|
601
|
-
// Priority 2: Check direct properties
|
|
602
|
-
if (toolAny.readOnlyHint !== undefined ||
|
|
603
|
-
toolAny.destructiveHint !== undefined) {
|
|
604
|
-
return {
|
|
605
|
-
readOnlyHint: toolAny.readOnlyHint,
|
|
606
|
-
destructiveHint: toolAny.destructiveHint,
|
|
607
|
-
title: toolAny.title,
|
|
608
|
-
description: tool.description,
|
|
609
|
-
idempotentHint: toolAny.idempotentHint,
|
|
610
|
-
openWorldHint: toolAny.openWorldHint,
|
|
611
|
-
source: "mcp",
|
|
612
|
-
};
|
|
613
|
-
}
|
|
614
|
-
// Priority 3: Check metadata
|
|
615
|
-
if (toolAny.metadata) {
|
|
616
|
-
const hasMetadataAnnotations = toolAny.metadata.readOnlyHint !== undefined ||
|
|
617
|
-
toolAny.metadata.destructiveHint !== undefined;
|
|
618
|
-
if (hasMetadataAnnotations) {
|
|
619
|
-
return {
|
|
620
|
-
readOnlyHint: toolAny.metadata.readOnlyHint,
|
|
621
|
-
destructiveHint: toolAny.metadata.destructiveHint,
|
|
622
|
-
title: toolAny.metadata.title || toolAny.title,
|
|
623
|
-
description: tool.description,
|
|
624
|
-
idempotentHint: toolAny.metadata.idempotentHint,
|
|
625
|
-
openWorldHint: toolAny.metadata.openWorldHint,
|
|
626
|
-
source: "mcp",
|
|
627
|
-
};
|
|
628
|
-
}
|
|
629
|
-
}
|
|
630
|
-
return {
|
|
631
|
-
title: toolAny.title,
|
|
632
|
-
description: tool.description,
|
|
633
|
-
source: "none",
|
|
634
|
-
};
|
|
635
|
-
}
|
|
636
|
-
/**
|
|
637
|
-
* Extract extended metadata from tool (Issue #54)
|
|
638
|
-
* Extracts rate limits, permissions, return schemas, and bulk operation support
|
|
639
|
-
*/
|
|
640
|
-
extractExtendedMetadata(tool) {
|
|
641
|
-
const toolAny = tool;
|
|
642
|
-
const metadata = {};
|
|
643
|
-
// Rate limiting - check annotations, metadata, and direct props
|
|
644
|
-
const rateLimit = toolAny.rateLimit ||
|
|
645
|
-
toolAny.annotations?.rateLimit ||
|
|
646
|
-
toolAny.metadata?.rateLimit;
|
|
647
|
-
if (rateLimit && typeof rateLimit === "object") {
|
|
648
|
-
metadata.rateLimit = {
|
|
649
|
-
windowMs: rateLimit.windowMs,
|
|
650
|
-
maxRequests: rateLimit.maxRequests,
|
|
651
|
-
requestsPerMinute: rateLimit.requestsPerMinute,
|
|
652
|
-
requestsPerSecond: rateLimit.requestsPerSecond,
|
|
653
|
-
};
|
|
654
|
-
}
|
|
655
|
-
// Permissions - check requiredPermission, permissions, scopes
|
|
656
|
-
const permissions = toolAny.requiredPermission ||
|
|
657
|
-
toolAny.permissions ||
|
|
658
|
-
toolAny.annotations?.permissions ||
|
|
659
|
-
toolAny.metadata?.requiredPermission ||
|
|
660
|
-
toolAny.metadata?.permissions;
|
|
661
|
-
if (permissions) {
|
|
662
|
-
const required = Array.isArray(permissions) ? permissions : [permissions];
|
|
663
|
-
const scopes = toolAny.scopes ||
|
|
664
|
-
toolAny.annotations?.scopes ||
|
|
665
|
-
toolAny.metadata?.scopes;
|
|
666
|
-
metadata.permissions = {
|
|
667
|
-
required: required.filter((p) => typeof p === "string"),
|
|
668
|
-
scopes: Array.isArray(scopes)
|
|
669
|
-
? scopes.filter((s) => typeof s === "string")
|
|
670
|
-
: undefined,
|
|
671
|
-
};
|
|
672
|
-
}
|
|
673
|
-
// Return schema - check outputSchema (MCP 2025-06-18 spec)
|
|
674
|
-
if (toolAny.outputSchema) {
|
|
675
|
-
metadata.returnSchema = {
|
|
676
|
-
hasSchema: true,
|
|
677
|
-
schema: toolAny.outputSchema,
|
|
678
|
-
};
|
|
679
|
-
}
|
|
680
|
-
// Bulk operations - check metadata for batch support
|
|
681
|
-
const bulkSupport = toolAny.supportsBulkOperations ||
|
|
682
|
-
toolAny.annotations?.supportsBulkOperations ||
|
|
683
|
-
toolAny.metadata?.supportsBulkOperations;
|
|
684
|
-
const maxBatchSize = toolAny.metadata?.maxBatchSize;
|
|
685
|
-
if (bulkSupport !== undefined || maxBatchSize !== undefined) {
|
|
686
|
-
metadata.bulkOperations = {
|
|
687
|
-
supported: !!bulkSupport,
|
|
688
|
-
maxBatchSize: typeof maxBatchSize === "number" ? maxBatchSize : undefined,
|
|
689
|
-
};
|
|
690
|
-
}
|
|
691
|
-
return Object.keys(metadata).length > 0 ? metadata : undefined;
|
|
692
|
-
}
|
|
693
|
-
/**
|
|
694
|
-
* Extract parameters from tool input schema
|
|
695
|
-
*/
|
|
696
|
-
extractToolParams(schema) {
|
|
697
|
-
if (!schema || typeof schema !== "object")
|
|
698
|
-
return [];
|
|
699
|
-
const s = schema;
|
|
700
|
-
if (!s.properties || typeof s.properties !== "object")
|
|
701
|
-
return [];
|
|
702
|
-
const required = new Set(Array.isArray(s.required) ? s.required : []);
|
|
703
|
-
const properties = s.properties;
|
|
704
|
-
return Object.entries(properties).map(([name, prop]) => {
|
|
705
|
-
const param = {
|
|
706
|
-
name,
|
|
707
|
-
type: prop.type || "any",
|
|
708
|
-
required: required.has(name),
|
|
709
|
-
};
|
|
710
|
-
if (prop.description) {
|
|
711
|
-
param.description = prop.description;
|
|
712
|
-
}
|
|
713
|
-
return param;
|
|
714
|
-
});
|
|
715
|
-
}
|
|
716
|
-
/**
|
|
717
|
-
* Determine overall status
|
|
718
|
-
*/
|
|
719
|
-
determineAnnotationStatus(results, totalTools) {
|
|
720
|
-
if (totalTools === 0)
|
|
721
|
-
return "PASS";
|
|
722
|
-
const annotatedCount = results.filter((r) => r.hasAnnotations).length;
|
|
723
|
-
const poisonedCount = results.filter((r) => r.descriptionPoisoning?.detected === true).length;
|
|
724
|
-
if (poisonedCount > 0)
|
|
725
|
-
return "FAIL";
|
|
726
|
-
const misalignedCount = results.filter((r) => r.alignmentStatus === "MISALIGNED").length;
|
|
727
|
-
const destructiveWithoutHint = results.filter((r) => r.inferredBehavior?.expectedDestructive === true &&
|
|
728
|
-
r.inferredBehavior?.confidence === "high" &&
|
|
729
|
-
r.annotations?.destructiveHint !== true).length;
|
|
730
|
-
if (destructiveWithoutHint > 0)
|
|
731
|
-
return "FAIL";
|
|
732
|
-
if (misalignedCount > 0)
|
|
733
|
-
return "FAIL";
|
|
734
|
-
if (annotatedCount === totalTools)
|
|
735
|
-
return "PASS";
|
|
736
|
-
const annotationRate = annotatedCount / totalTools;
|
|
737
|
-
if (annotationRate >= 0.8)
|
|
738
|
-
return "NEED_MORE_INFO";
|
|
739
|
-
if (annotationRate < 0.5)
|
|
740
|
-
return "FAIL";
|
|
741
|
-
return "NEED_MORE_INFO";
|
|
742
|
-
}
|
|
743
|
-
/**
|
|
744
|
-
* Calculate metrics and alignment breakdown
|
|
745
|
-
*/
|
|
746
|
-
calculateMetrics(results, totalTools) {
|
|
747
|
-
const alignmentBreakdown = {
|
|
748
|
-
aligned: results.filter((r) => r.alignmentStatus === "ALIGNED").length,
|
|
749
|
-
misaligned: results.filter((r) => r.alignmentStatus === "MISALIGNED")
|
|
750
|
-
.length,
|
|
751
|
-
reviewRecommended: results.filter((r) => r.alignmentStatus === "REVIEW_RECOMMENDED").length,
|
|
752
|
-
unknown: results.filter((r) => r.alignmentStatus === "UNKNOWN").length,
|
|
753
|
-
};
|
|
754
|
-
const annotatedCount = results.filter((r) => r.hasAnnotations).length;
|
|
755
|
-
const metrics = {
|
|
756
|
-
coverage: totalTools > 0 ? (annotatedCount / totalTools) * 100 : 100,
|
|
757
|
-
consistency: totalTools > 0
|
|
758
|
-
? ((totalTools - alignmentBreakdown.misaligned) / totalTools) * 100
|
|
759
|
-
: 100,
|
|
760
|
-
correctness: annotatedCount > 0
|
|
761
|
-
? (alignmentBreakdown.aligned / annotatedCount) * 100
|
|
762
|
-
: 0,
|
|
763
|
-
reviewRequired: alignmentBreakdown.reviewRecommended,
|
|
764
|
-
};
|
|
765
|
-
return { metrics, alignmentBreakdown };
|
|
766
|
-
}
|
|
767
|
-
/**
|
|
768
|
-
* Generate explanation
|
|
769
|
-
*/
|
|
770
|
-
generateExplanation(annotatedCount, missingCount, misalignedCount, totalTools) {
|
|
771
|
-
const parts = [];
|
|
772
|
-
if (totalTools === 0) {
|
|
773
|
-
return "No tools found to assess for annotations.";
|
|
774
|
-
}
|
|
775
|
-
parts.push(`Tool annotation coverage: ${annotatedCount}/${totalTools} tools have annotations.`);
|
|
776
|
-
if (missingCount > 0) {
|
|
777
|
-
parts.push(`${missingCount} tool(s) are missing required annotations (readOnlyHint, destructiveHint).`);
|
|
778
|
-
}
|
|
779
|
-
if (misalignedCount > 0) {
|
|
780
|
-
parts.push(`${misalignedCount} tool(s) have potentially misaligned annotations based on naming patterns.`);
|
|
781
|
-
}
|
|
782
|
-
if (missingCount === 0 && misalignedCount === 0) {
|
|
783
|
-
parts.push("All tools are properly annotated.");
|
|
784
|
-
}
|
|
785
|
-
return parts.join(" ");
|
|
786
|
-
}
|
|
787
|
-
/**
|
|
788
|
-
* Generate enhanced explanation with Claude analysis
|
|
789
|
-
*/
|
|
790
|
-
generateEnhancedExplanation(annotatedCount, missingCount, highConfidenceMisalignments, totalTools) {
|
|
791
|
-
const parts = [];
|
|
792
|
-
if (totalTools === 0) {
|
|
793
|
-
return "No tools found to assess for annotations.";
|
|
794
|
-
}
|
|
795
|
-
parts.push(`Tool annotation coverage: ${annotatedCount}/${totalTools} tools have annotations.`);
|
|
796
|
-
if (missingCount > 0) {
|
|
797
|
-
parts.push(`${missingCount} tool(s) are missing required annotations (readOnlyHint, destructiveHint).`);
|
|
798
|
-
}
|
|
799
|
-
if (highConfidenceMisalignments > 0) {
|
|
800
|
-
parts.push(`Claude analysis identified ${highConfidenceMisalignments} high-confidence annotation misalignment(s).`);
|
|
801
|
-
}
|
|
802
|
-
parts.push("Analysis enhanced with Claude semantic behavior inference.");
|
|
803
|
-
return parts.join(" ");
|
|
804
|
-
}
|
|
805
|
-
/**
|
|
806
|
-
* Generate recommendations
|
|
807
|
-
*/
|
|
808
|
-
generateRecommendations(results) {
|
|
809
|
-
const recommendations = [];
|
|
810
|
-
const allRecs = new Set();
|
|
811
|
-
for (const result of results) {
|
|
812
|
-
for (const rec of result.recommendations) {
|
|
813
|
-
allRecs.add(rec);
|
|
814
|
-
}
|
|
815
|
-
}
|
|
816
|
-
const destructiveRecs = Array.from(allRecs).filter((r) => r.includes("destructive"));
|
|
817
|
-
const otherRecs = Array.from(allRecs).filter((r) => !r.includes("destructive"));
|
|
818
|
-
if (destructiveRecs.length > 0) {
|
|
819
|
-
recommendations.push("PRIORITY: The following tools appear to perform destructive operations but lack proper destructiveHint annotation:");
|
|
820
|
-
recommendations.push(...destructiveRecs.slice(0, 5));
|
|
821
|
-
}
|
|
822
|
-
if (otherRecs.length > 0) {
|
|
823
|
-
recommendations.push(...otherRecs.slice(0, 5));
|
|
824
|
-
}
|
|
825
|
-
if (recommendations.length === 0) {
|
|
826
|
-
recommendations.push("All tools have proper annotations. No action required.");
|
|
827
|
-
}
|
|
828
|
-
else {
|
|
829
|
-
recommendations.push("Reference: MCP Directory Policy #17 requires tools to have readOnlyHint and destructiveHint annotations.");
|
|
830
|
-
}
|
|
831
|
-
return recommendations;
|
|
832
|
-
}
|
|
833
|
-
/**
|
|
834
|
-
* Generate enhanced recommendations with Claude analysis
|
|
835
|
-
*/
|
|
836
|
-
generateEnhancedRecommendations(results) {
|
|
837
|
-
const recommendations = [];
|
|
838
|
-
const claudeMisalignments = results.filter((r) => r.claudeInference &&
|
|
839
|
-
r.claudeInference.source === "claude-inferred" &&
|
|
840
|
-
r.claudeInference.confidence >= 70 &&
|
|
841
|
-
r.claudeInference.misalignmentDetected);
|
|
842
|
-
if (claudeMisalignments.length > 0) {
|
|
843
|
-
recommendations.push("HIGH CONFIDENCE: Claude analysis identified the following annotation issues:");
|
|
844
|
-
for (const result of claudeMisalignments.slice(0, 5)) {
|
|
845
|
-
if (result.claudeInference) {
|
|
846
|
-
recommendations.push(` - ${result.toolName}: ${result.claudeInference.reasoning}`);
|
|
847
|
-
}
|
|
848
|
-
}
|
|
849
|
-
}
|
|
850
|
-
const claudeSuggestions = results
|
|
851
|
-
.filter((r) => r.claudeInference &&
|
|
852
|
-
r.claudeInference.source === "claude-inferred" &&
|
|
853
|
-
r.claudeInference.confidence >= 60)
|
|
854
|
-
.flatMap((r) => r.recommendations.filter((rec) => rec.includes("Claude")));
|
|
855
|
-
if (claudeSuggestions.length > 0) {
|
|
856
|
-
recommendations.push(...claudeSuggestions.slice(0, 5));
|
|
857
|
-
}
|
|
858
|
-
const patternRecs = new Set();
|
|
859
|
-
for (const result of results) {
|
|
860
|
-
for (const rec of result.recommendations) {
|
|
861
|
-
if (!rec.includes("Claude")) {
|
|
862
|
-
patternRecs.add(rec);
|
|
863
|
-
}
|
|
864
|
-
}
|
|
865
|
-
}
|
|
866
|
-
const destructiveRecs = Array.from(patternRecs).filter((r) => r.includes("destructive"));
|
|
867
|
-
const otherRecs = Array.from(patternRecs).filter((r) => !r.includes("destructive"));
|
|
868
|
-
if (destructiveRecs.length > 0) {
|
|
869
|
-
recommendations.push("PRIORITY: Potential destructive tools without proper hints:");
|
|
870
|
-
recommendations.push(...destructiveRecs.slice(0, 3));
|
|
871
|
-
}
|
|
872
|
-
if (otherRecs.length > 0 && recommendations.length < 10) {
|
|
873
|
-
recommendations.push(...otherRecs.slice(0, 3));
|
|
874
|
-
}
|
|
875
|
-
if (recommendations.length === 0) {
|
|
876
|
-
recommendations.push("All tools have proper annotations. No action required.");
|
|
877
|
-
}
|
|
878
|
-
else {
|
|
879
|
-
recommendations.push("Reference: MCP Directory Policy #17 requires tools to have readOnlyHint and destructiveHint annotations.");
|
|
880
|
-
}
|
|
881
|
-
return recommendations;
|
|
882
|
-
}
|
|
883
290
|
}
|