@bryan-thompson/inspector-assessment-client 1.11.0 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/{OAuthCallback-DQS8ZWD9.js → OAuthCallback-DD8JgGmx.js} +1 -1
- package/dist/assets/{OAuthDebugCallback-CeG1zXQt.js → OAuthDebugCallback-CGeg00AP.js} +1 -1
- package/dist/assets/{index-C89qxkOz.js → index-sUICDw7A.js} +1194 -384
- package/dist/index.html +1 -1
- package/lib/lib/assessmentTypes.d.ts +108 -1
- package/lib/lib/assessmentTypes.d.ts.map +1 -1
- package/lib/lib/moduleScoring.d.ts +23 -0
- package/lib/lib/moduleScoring.d.ts.map +1 -0
- package/lib/lib/moduleScoring.js +53 -0
- package/lib/services/assessment/AssessmentOrchestrator.d.ts.map +1 -1
- package/lib/services/assessment/AssessmentOrchestrator.js +22 -37
- package/lib/services/assessment/TestDataGenerator.d.ts +22 -0
- package/lib/services/assessment/TestDataGenerator.d.ts.map +1 -1
- package/lib/services/assessment/TestDataGenerator.js +78 -0
- package/lib/services/assessment/config/annotationPatterns.d.ts +70 -0
- package/lib/services/assessment/config/annotationPatterns.d.ts.map +1 -0
- package/lib/services/assessment/config/annotationPatterns.js +305 -0
- package/lib/services/assessment/modules/FunctionalityAssessor.d.ts +15 -0
- package/lib/services/assessment/modules/FunctionalityAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/FunctionalityAssessor.js +137 -6
- package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts +20 -2
- package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/ToolAnnotationAssessor.js +266 -106
- package/package.json +1 -1
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"annotationPatterns.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/config/annotationPatterns.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH;;;;GAIG;AACH,MAAM,WAAW,uBAAuB;IACtC,iFAAiF;IACjF,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,0FAA0F;IAC1F,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,8FAA8F;IAC9F,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,mFAAmF;IACnF,SAAS,EAAE,MAAM,EAAE,CAAC;CACrB;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,SAAS,EAAE,MAAM,EAAE,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,QAAQ,EAAE,UAAU,GAAG,aAAa,GAAG,OAAO,GAAG,WAAW,GAAG,SAAS,CAAC;IACzE,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACtC,WAAW,EAAE,OAAO,CAAC;CACtB;AAED;;;GAGG;AACH,eAAO,MAAM,2BAA2B,EAAE,uBAqKzC,CAAC;AAoBF;;GAEG;AACH,wBAAgB,eAAe,CAC7B,MAAM,EAAE,uBAAuB,GAC9B,gBAAgB,CAOlB;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAC/B,UAAU,CAAC,EAAE,MAAM,GAClB,uBAAuB,CAyBzB;AAED;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAC9B,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,gBAAgB,GACzB,kBAAkB,CA0DpB;AAOD;;GAEG;AACH,wBAAgB,0BAA0B,IAAI,gBAAgB,CAK7D"}
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool Annotation Pattern Configuration
|
|
3
|
+
*
|
|
4
|
+
* Configurable pattern system for inferring expected tool behavior from names.
|
|
5
|
+
* Supports JSON configuration files for customization.
|
|
6
|
+
*/
|
|
7
|
+
import * as fs from "fs";
|
|
8
|
+
/**
|
|
9
|
+
* Default annotation patterns.
|
|
10
|
+
* These patterns have been validated against real-world MCP servers.
|
|
11
|
+
*/
|
|
12
|
+
export const DEFAULT_ANNOTATION_PATTERNS = {
|
|
13
|
+
readOnly: [
|
|
14
|
+
"get_",
|
|
15
|
+
"get-",
|
|
16
|
+
"list_",
|
|
17
|
+
"list-",
|
|
18
|
+
"fetch_",
|
|
19
|
+
"fetch-",
|
|
20
|
+
"read_",
|
|
21
|
+
"read-",
|
|
22
|
+
"query_",
|
|
23
|
+
"query-",
|
|
24
|
+
"search_",
|
|
25
|
+
"search-",
|
|
26
|
+
"find_",
|
|
27
|
+
"find-",
|
|
28
|
+
"show_",
|
|
29
|
+
"show-",
|
|
30
|
+
"view_",
|
|
31
|
+
"view-",
|
|
32
|
+
"describe_",
|
|
33
|
+
"describe-",
|
|
34
|
+
"check_",
|
|
35
|
+
"check-",
|
|
36
|
+
"verify_",
|
|
37
|
+
"verify-",
|
|
38
|
+
"validate_",
|
|
39
|
+
"validate-",
|
|
40
|
+
"count_",
|
|
41
|
+
"count-",
|
|
42
|
+
"status_",
|
|
43
|
+
"status-",
|
|
44
|
+
"info_",
|
|
45
|
+
"info-",
|
|
46
|
+
"lookup_",
|
|
47
|
+
"lookup-",
|
|
48
|
+
"browse_",
|
|
49
|
+
"browse-",
|
|
50
|
+
"preview_",
|
|
51
|
+
"preview-",
|
|
52
|
+
"download_",
|
|
53
|
+
"download-",
|
|
54
|
+
],
|
|
55
|
+
destructive: [
|
|
56
|
+
"delete_",
|
|
57
|
+
"delete-",
|
|
58
|
+
"remove_",
|
|
59
|
+
"remove-",
|
|
60
|
+
"destroy_",
|
|
61
|
+
"destroy-",
|
|
62
|
+
"drop_",
|
|
63
|
+
"drop-",
|
|
64
|
+
"purge_",
|
|
65
|
+
"purge-",
|
|
66
|
+
"clear_",
|
|
67
|
+
"clear-",
|
|
68
|
+
"wipe_",
|
|
69
|
+
"wipe-",
|
|
70
|
+
"erase_",
|
|
71
|
+
"erase-",
|
|
72
|
+
"reset_",
|
|
73
|
+
"reset-",
|
|
74
|
+
"truncate_",
|
|
75
|
+
"truncate-",
|
|
76
|
+
"revoke_",
|
|
77
|
+
"revoke-",
|
|
78
|
+
"terminate_",
|
|
79
|
+
"terminate-",
|
|
80
|
+
"cancel_",
|
|
81
|
+
"cancel-",
|
|
82
|
+
"kill_",
|
|
83
|
+
"kill-",
|
|
84
|
+
"force_",
|
|
85
|
+
"force-",
|
|
86
|
+
],
|
|
87
|
+
write: [
|
|
88
|
+
"create_",
|
|
89
|
+
"create-",
|
|
90
|
+
"add_",
|
|
91
|
+
"add-",
|
|
92
|
+
"insert_",
|
|
93
|
+
"insert-",
|
|
94
|
+
"update_",
|
|
95
|
+
"update-",
|
|
96
|
+
"modify_",
|
|
97
|
+
"modify-",
|
|
98
|
+
"edit_",
|
|
99
|
+
"edit-",
|
|
100
|
+
"change_",
|
|
101
|
+
"change-",
|
|
102
|
+
"set_",
|
|
103
|
+
"set-",
|
|
104
|
+
"put_",
|
|
105
|
+
"put-",
|
|
106
|
+
"patch_",
|
|
107
|
+
"patch-",
|
|
108
|
+
"post_",
|
|
109
|
+
"post-",
|
|
110
|
+
"write_",
|
|
111
|
+
"write-",
|
|
112
|
+
"upload_",
|
|
113
|
+
"upload-",
|
|
114
|
+
"send_",
|
|
115
|
+
"send-",
|
|
116
|
+
"submit_",
|
|
117
|
+
"submit-",
|
|
118
|
+
"publish_",
|
|
119
|
+
"publish-",
|
|
120
|
+
"enable_",
|
|
121
|
+
"enable-",
|
|
122
|
+
"disable_",
|
|
123
|
+
"disable-",
|
|
124
|
+
"start_",
|
|
125
|
+
"start-",
|
|
126
|
+
"stop_",
|
|
127
|
+
"stop-",
|
|
128
|
+
"run_",
|
|
129
|
+
"run-",
|
|
130
|
+
"execute_",
|
|
131
|
+
"execute-",
|
|
132
|
+
],
|
|
133
|
+
ambiguous: [
|
|
134
|
+
"store_",
|
|
135
|
+
"store-",
|
|
136
|
+
"queue_",
|
|
137
|
+
"queue-",
|
|
138
|
+
"cache_",
|
|
139
|
+
"cache-",
|
|
140
|
+
"process_",
|
|
141
|
+
"process-",
|
|
142
|
+
"handle_",
|
|
143
|
+
"handle-",
|
|
144
|
+
"manage_",
|
|
145
|
+
"manage-",
|
|
146
|
+
"sync_",
|
|
147
|
+
"sync-",
|
|
148
|
+
"transfer_",
|
|
149
|
+
"transfer-",
|
|
150
|
+
"push_",
|
|
151
|
+
"push-",
|
|
152
|
+
"pop_",
|
|
153
|
+
"pop-",
|
|
154
|
+
"apply_",
|
|
155
|
+
"apply-",
|
|
156
|
+
"compute_",
|
|
157
|
+
"compute-",
|
|
158
|
+
"calculate_",
|
|
159
|
+
"calculate-",
|
|
160
|
+
"transform_",
|
|
161
|
+
"transform-",
|
|
162
|
+
"convert_",
|
|
163
|
+
"convert-",
|
|
164
|
+
"evaluate_",
|
|
165
|
+
"evaluate-",
|
|
166
|
+
"log_",
|
|
167
|
+
"log-",
|
|
168
|
+
"record_",
|
|
169
|
+
"record-",
|
|
170
|
+
"track_",
|
|
171
|
+
"track-",
|
|
172
|
+
"register_",
|
|
173
|
+
"register-",
|
|
174
|
+
"save_",
|
|
175
|
+
"save-",
|
|
176
|
+
],
|
|
177
|
+
};
|
|
178
|
+
/**
|
|
179
|
+
* Convert a string pattern to a RegExp.
|
|
180
|
+
* Handles patterns like "get_" -> /^get[_-]?/i
|
|
181
|
+
*/
|
|
182
|
+
function patternToRegex(pattern) {
|
|
183
|
+
// Remove trailing underscore/hyphen for the base pattern
|
|
184
|
+
const base = pattern.replace(/[_-]$/, "");
|
|
185
|
+
// Create regex that matches pattern at start of string, with optional underscore/hyphen
|
|
186
|
+
return new RegExp(`^${escapeRegex(base)}[_-]?`, "i");
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Escape special regex characters in a string.
|
|
190
|
+
*/
|
|
191
|
+
function escapeRegex(str) {
|
|
192
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* Compile string patterns into RegExp objects for efficient matching.
|
|
196
|
+
*/
|
|
197
|
+
export function compilePatterns(config) {
|
|
198
|
+
return {
|
|
199
|
+
readOnly: config.readOnly.map(patternToRegex),
|
|
200
|
+
destructive: config.destructive.map(patternToRegex),
|
|
201
|
+
write: config.write.map(patternToRegex),
|
|
202
|
+
ambiguous: config.ambiguous.map(patternToRegex),
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Load pattern configuration from a JSON file.
|
|
207
|
+
* Partial configs are merged with defaults.
|
|
208
|
+
*
|
|
209
|
+
* @param configPath - Path to JSON configuration file
|
|
210
|
+
* @returns Merged configuration with defaults
|
|
211
|
+
*/
|
|
212
|
+
export function loadPatternConfig(configPath) {
|
|
213
|
+
if (!configPath) {
|
|
214
|
+
return DEFAULT_ANNOTATION_PATTERNS;
|
|
215
|
+
}
|
|
216
|
+
try {
|
|
217
|
+
const configContent = fs.readFileSync(configPath, "utf-8");
|
|
218
|
+
const userConfig = JSON.parse(configContent);
|
|
219
|
+
// Merge with defaults - user config overrides defaults for specified categories
|
|
220
|
+
return {
|
|
221
|
+
readOnly: userConfig.readOnly ?? DEFAULT_ANNOTATION_PATTERNS.readOnly,
|
|
222
|
+
destructive: userConfig.destructive ?? DEFAULT_ANNOTATION_PATTERNS.destructive,
|
|
223
|
+
write: userConfig.write ?? DEFAULT_ANNOTATION_PATTERNS.write,
|
|
224
|
+
ambiguous: userConfig.ambiguous ?? DEFAULT_ANNOTATION_PATTERNS.ambiguous,
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
catch (error) {
|
|
228
|
+
console.warn(`Warning: Could not load pattern config from ${configPath}, using defaults`);
|
|
229
|
+
return DEFAULT_ANNOTATION_PATTERNS;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Match a tool name against compiled patterns and return the result.
|
|
234
|
+
*
|
|
235
|
+
* @param toolName - The tool name to match
|
|
236
|
+
* @param patterns - Compiled pattern sets
|
|
237
|
+
* @returns Match result with category, confidence, and ambiguity flag
|
|
238
|
+
*/
|
|
239
|
+
export function matchToolPattern(toolName, patterns) {
|
|
240
|
+
const lowerName = toolName.toLowerCase();
|
|
241
|
+
// Check ambiguous patterns FIRST (highest priority for this feature)
|
|
242
|
+
for (const pattern of patterns.ambiguous) {
|
|
243
|
+
if (pattern.test(lowerName)) {
|
|
244
|
+
return {
|
|
245
|
+
category: "ambiguous",
|
|
246
|
+
pattern: pattern.source,
|
|
247
|
+
confidence: "low",
|
|
248
|
+
isAmbiguous: true,
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
// Check destructive patterns (high confidence)
|
|
253
|
+
for (const pattern of patterns.destructive) {
|
|
254
|
+
if (pattern.test(lowerName)) {
|
|
255
|
+
return {
|
|
256
|
+
category: "destructive",
|
|
257
|
+
pattern: pattern.source,
|
|
258
|
+
confidence: "high",
|
|
259
|
+
isAmbiguous: false,
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
// Check read-only patterns (high confidence)
|
|
264
|
+
for (const pattern of patterns.readOnly) {
|
|
265
|
+
if (pattern.test(lowerName)) {
|
|
266
|
+
return {
|
|
267
|
+
category: "readOnly",
|
|
268
|
+
pattern: pattern.source,
|
|
269
|
+
confidence: "high",
|
|
270
|
+
isAmbiguous: false,
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
// Check write patterns (medium confidence)
|
|
275
|
+
for (const pattern of patterns.write) {
|
|
276
|
+
if (pattern.test(lowerName)) {
|
|
277
|
+
return {
|
|
278
|
+
category: "write",
|
|
279
|
+
pattern: pattern.source,
|
|
280
|
+
confidence: "medium",
|
|
281
|
+
isAmbiguous: false,
|
|
282
|
+
};
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
// No pattern match
|
|
286
|
+
return {
|
|
287
|
+
category: "unknown",
|
|
288
|
+
pattern: null,
|
|
289
|
+
confidence: "low",
|
|
290
|
+
isAmbiguous: true,
|
|
291
|
+
};
|
|
292
|
+
}
|
|
293
|
+
/**
|
|
294
|
+
* Singleton instance of compiled default patterns for performance.
|
|
295
|
+
*/
|
|
296
|
+
let defaultCompiledPatterns = null;
|
|
297
|
+
/**
|
|
298
|
+
* Get compiled default patterns (cached for performance).
|
|
299
|
+
*/
|
|
300
|
+
export function getDefaultCompiledPatterns() {
|
|
301
|
+
if (!defaultCompiledPatterns) {
|
|
302
|
+
defaultCompiledPatterns = compilePatterns(DEFAULT_ANNOTATION_PATTERNS);
|
|
303
|
+
}
|
|
304
|
+
return defaultCompiledPatterns;
|
|
305
|
+
}
|
|
@@ -6,6 +6,7 @@ import { FunctionalityAssessment } from "../../../lib/assessmentTypes.js";
|
|
|
6
6
|
import { BaseAssessor } from "./BaseAssessor.js";
|
|
7
7
|
import { AssessmentContext } from "../AssessmentOrchestrator.js";
|
|
8
8
|
export declare class FunctionalityAssessor extends BaseAssessor {
|
|
9
|
+
private toolClassifier;
|
|
9
10
|
/**
|
|
10
11
|
* Select tools for testing based on configuration
|
|
11
12
|
*/
|
|
@@ -14,6 +15,20 @@ export declare class FunctionalityAssessor extends BaseAssessor {
|
|
|
14
15
|
private testTool;
|
|
15
16
|
private generateMinimalParams;
|
|
16
17
|
private generateParamValue;
|
|
18
|
+
/**
|
|
19
|
+
* Field names that indicate specific data types regardless of tool category.
|
|
20
|
+
* These take precedence over category-specific generation.
|
|
21
|
+
*/
|
|
22
|
+
private static readonly SPECIFIC_FIELD_PATTERNS;
|
|
23
|
+
/**
|
|
24
|
+
* Generate smart parameter value with metadata about how it was generated.
|
|
25
|
+
* Returns value, source type, and reason for downstream consumers.
|
|
26
|
+
*/
|
|
27
|
+
private generateSmartParamValueWithMetadata;
|
|
28
|
+
/**
|
|
29
|
+
* Determine overall generation strategy based on field sources
|
|
30
|
+
*/
|
|
31
|
+
private determineStrategy;
|
|
17
32
|
generateTestInput(schema: any): unknown;
|
|
18
33
|
private generateExplanation;
|
|
19
34
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"FunctionalityAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/FunctionalityAssessor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,
|
|
1
|
+
{"version":3,"file":"FunctionalityAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/FunctionalityAssessor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,uBAAuB,EAGxB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAM9D,qBAAa,qBAAsB,SAAQ,YAAY;IACrD,OAAO,CAAC,cAAc,CAAwB;IAE9C;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAoCvB,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,uBAAuB,CAAC;YA0H5D,QAAQ;IAiFtB,OAAO,CAAC,qBAAqB;IA0D7B,OAAO,CAAC,kBAAkB;IAwF1B;;;OAGG;IACH,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,uBAAuB,CAe7C;IAEF;;;OAGG;IACH,OAAO,CAAC,mCAAmC;IAsF3C;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAWlB,iBAAiB,CAAC,MAAM,EAAE,GAAG,GAAG,OAAO;IAI9C,OAAO,CAAC,mBAAmB;CA+B5B"}
|
|
@@ -5,7 +5,10 @@
|
|
|
5
5
|
import { BaseAssessor } from "./BaseAssessor.js";
|
|
6
6
|
import { ResponseValidator } from "../ResponseValidator.js";
|
|
7
7
|
import { createConcurrencyLimit } from "../lib/concurrencyLimit.js";
|
|
8
|
+
import { ToolClassifier, ToolCategory } from "../ToolClassifier.js";
|
|
9
|
+
import { TestDataGenerator } from "../TestDataGenerator.js";
|
|
8
10
|
export class FunctionalityAssessor extends BaseAssessor {
|
|
11
|
+
toolClassifier = new ToolClassifier();
|
|
9
12
|
/**
|
|
10
13
|
* Select tools for testing based on configuration
|
|
11
14
|
*/
|
|
@@ -118,9 +121,9 @@ export class FunctionalityAssessor extends BaseAssessor {
|
|
|
118
121
|
}
|
|
119
122
|
async testTool(tool, callTool) {
|
|
120
123
|
const startTime = Date.now();
|
|
124
|
+
// Generate minimal valid parameters with metadata
|
|
125
|
+
const { params: testParams, metadata } = this.generateMinimalParams(tool);
|
|
121
126
|
try {
|
|
122
|
-
// Generate minimal valid parameters
|
|
123
|
-
const testParams = this.generateMinimalParams(tool);
|
|
124
127
|
this.log(`Testing tool: ${tool.name} with params: ${JSON.stringify(testParams)}`);
|
|
125
128
|
// Execute tool with timeout
|
|
126
129
|
const response = await this.executeWithTimeout(callTool(tool.name, testParams), this.config.testTimeout);
|
|
@@ -145,6 +148,7 @@ export class FunctionalityAssessor extends BaseAssessor {
|
|
|
145
148
|
executionTime,
|
|
146
149
|
testParameters: testParams,
|
|
147
150
|
response,
|
|
151
|
+
testInputMetadata: metadata,
|
|
148
152
|
};
|
|
149
153
|
}
|
|
150
154
|
// Real tool failure (not just validation)
|
|
@@ -156,6 +160,7 @@ export class FunctionalityAssessor extends BaseAssessor {
|
|
|
156
160
|
executionTime,
|
|
157
161
|
testParameters: testParams,
|
|
158
162
|
response,
|
|
163
|
+
testInputMetadata: metadata,
|
|
159
164
|
};
|
|
160
165
|
}
|
|
161
166
|
return {
|
|
@@ -165,6 +170,7 @@ export class FunctionalityAssessor extends BaseAssessor {
|
|
|
165
170
|
executionTime,
|
|
166
171
|
testParameters: testParams,
|
|
167
172
|
response,
|
|
173
|
+
testInputMetadata: metadata,
|
|
168
174
|
};
|
|
169
175
|
}
|
|
170
176
|
catch (error) {
|
|
@@ -174,28 +180,50 @@ export class FunctionalityAssessor extends BaseAssessor {
|
|
|
174
180
|
status: "broken",
|
|
175
181
|
error: this.extractErrorMessage(error),
|
|
176
182
|
executionTime: Date.now() - startTime,
|
|
183
|
+
testInputMetadata: metadata,
|
|
177
184
|
};
|
|
178
185
|
}
|
|
179
186
|
}
|
|
180
187
|
generateMinimalParams(tool) {
|
|
188
|
+
// Classify tool to get category for smart parameter generation
|
|
189
|
+
const classification = this.toolClassifier.classify(tool.name, tool.description || "");
|
|
190
|
+
const primaryCategory = classification.categories[0] || ToolCategory.GENERIC;
|
|
191
|
+
const emptyResult = {
|
|
192
|
+
params: {},
|
|
193
|
+
metadata: {
|
|
194
|
+
toolCategory: primaryCategory,
|
|
195
|
+
generationStrategy: "default",
|
|
196
|
+
fieldSources: {},
|
|
197
|
+
},
|
|
198
|
+
};
|
|
181
199
|
if (!tool.inputSchema)
|
|
182
|
-
return
|
|
200
|
+
return emptyResult;
|
|
183
201
|
const schema = typeof tool.inputSchema === "string"
|
|
184
202
|
? this.safeJsonParse(tool.inputSchema)
|
|
185
203
|
: tool.inputSchema;
|
|
186
204
|
if (!schema?.properties)
|
|
187
|
-
return
|
|
205
|
+
return emptyResult;
|
|
188
206
|
const params = {};
|
|
207
|
+
const fieldSources = {};
|
|
189
208
|
const required = schema.required || [];
|
|
190
209
|
// For functionality testing, only generate REQUIRED parameters
|
|
191
210
|
// This avoids triggering validation errors on optional parameters with complex rules
|
|
192
211
|
for (const [key, prop] of Object.entries(schema.properties)) {
|
|
193
212
|
// Only include required parameters for basic functionality testing
|
|
194
213
|
if (required.includes(key)) {
|
|
195
|
-
|
|
214
|
+
const { value, source, reason } = this.generateSmartParamValueWithMetadata(prop, key, primaryCategory);
|
|
215
|
+
params[key] = value;
|
|
216
|
+
fieldSources[key] = { field: key, value, source, reason };
|
|
196
217
|
}
|
|
197
218
|
}
|
|
198
|
-
return
|
|
219
|
+
return {
|
|
220
|
+
params,
|
|
221
|
+
metadata: {
|
|
222
|
+
toolCategory: primaryCategory,
|
|
223
|
+
generationStrategy: this.determineStrategy(fieldSources),
|
|
224
|
+
fieldSources,
|
|
225
|
+
},
|
|
226
|
+
};
|
|
199
227
|
}
|
|
200
228
|
generateParamValue(prop, fieldName, includeOptional = false) {
|
|
201
229
|
const type = prop.type;
|
|
@@ -269,6 +297,109 @@ export class FunctionalityAssessor extends BaseAssessor {
|
|
|
269
297
|
return {};
|
|
270
298
|
}
|
|
271
299
|
}
|
|
300
|
+
/**
|
|
301
|
+
* Field names that indicate specific data types regardless of tool category.
|
|
302
|
+
* These take precedence over category-specific generation.
|
|
303
|
+
*/
|
|
304
|
+
static SPECIFIC_FIELD_PATTERNS = [
|
|
305
|
+
/url/i,
|
|
306
|
+
/endpoint/i,
|
|
307
|
+
/link/i,
|
|
308
|
+
/email/i,
|
|
309
|
+
/mail/i,
|
|
310
|
+
/path/i,
|
|
311
|
+
/file/i,
|
|
312
|
+
/directory/i,
|
|
313
|
+
/folder/i,
|
|
314
|
+
/uuid/i,
|
|
315
|
+
/page_id/i,
|
|
316
|
+
/database_id/i,
|
|
317
|
+
/user_id/i,
|
|
318
|
+
/block_id/i,
|
|
319
|
+
];
|
|
320
|
+
/**
|
|
321
|
+
* Generate smart parameter value with metadata about how it was generated.
|
|
322
|
+
* Returns value, source type, and reason for downstream consumers.
|
|
323
|
+
*/
|
|
324
|
+
generateSmartParamValueWithMetadata(prop, fieldName, category) {
|
|
325
|
+
// Handle enum first
|
|
326
|
+
if (prop.enum && prop.enum.length > 0) {
|
|
327
|
+
return {
|
|
328
|
+
value: prop.enum[0],
|
|
329
|
+
source: "enum",
|
|
330
|
+
reason: `First enum value: ${prop.enum[0]}`,
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
// Handle format (uri, email, etc.)
|
|
334
|
+
if (prop.format === "uri") {
|
|
335
|
+
return {
|
|
336
|
+
value: "https://example.com",
|
|
337
|
+
source: "format",
|
|
338
|
+
reason: "URI format detected",
|
|
339
|
+
};
|
|
340
|
+
}
|
|
341
|
+
if (prop.format === "email") {
|
|
342
|
+
return {
|
|
343
|
+
value: "test@example.com",
|
|
344
|
+
source: "format",
|
|
345
|
+
reason: "Email format detected",
|
|
346
|
+
};
|
|
347
|
+
}
|
|
348
|
+
// For non-string types, use standard generation
|
|
349
|
+
if (prop.type !== "string") {
|
|
350
|
+
const value = this.generateParamValue(prop, fieldName);
|
|
351
|
+
return {
|
|
352
|
+
value,
|
|
353
|
+
source: "default",
|
|
354
|
+
reason: `Default for type: ${prop.type}`,
|
|
355
|
+
};
|
|
356
|
+
}
|
|
357
|
+
// Specific field names (url, email, path, etc.) take precedence over category
|
|
358
|
+
// These indicate explicit data type requirements regardless of tool category
|
|
359
|
+
const isSpecificFieldName = FunctionalityAssessor.SPECIFIC_FIELD_PATTERNS.some((pattern) => pattern.test(fieldName));
|
|
360
|
+
if (isSpecificFieldName) {
|
|
361
|
+
const fieldValue = TestDataGenerator.generateSingleValue(fieldName, prop);
|
|
362
|
+
return {
|
|
363
|
+
value: fieldValue,
|
|
364
|
+
source: "field-name",
|
|
365
|
+
reason: `Field name pattern: ${fieldName}`,
|
|
366
|
+
};
|
|
367
|
+
}
|
|
368
|
+
// Check category-specific data
|
|
369
|
+
const categoryData = TestDataGenerator.TOOL_CATEGORY_DATA[category];
|
|
370
|
+
if (categoryData?.default) {
|
|
371
|
+
return {
|
|
372
|
+
value: categoryData.default[0],
|
|
373
|
+
source: "category",
|
|
374
|
+
reason: `Category ${category} default value`,
|
|
375
|
+
};
|
|
376
|
+
}
|
|
377
|
+
// Fall back to field-name detection for generic fields
|
|
378
|
+
const fieldValue = TestDataGenerator.generateSingleValue(fieldName, prop);
|
|
379
|
+
if (fieldValue !== "test") {
|
|
380
|
+
return {
|
|
381
|
+
value: fieldValue,
|
|
382
|
+
source: "field-name",
|
|
383
|
+
reason: `Field name pattern: ${fieldName}`,
|
|
384
|
+
};
|
|
385
|
+
}
|
|
386
|
+
return {
|
|
387
|
+
value: "test",
|
|
388
|
+
source: "default",
|
|
389
|
+
reason: "No specific pattern matched",
|
|
390
|
+
};
|
|
391
|
+
}
|
|
392
|
+
/**
|
|
393
|
+
* Determine overall generation strategy based on field sources
|
|
394
|
+
*/
|
|
395
|
+
determineStrategy(fieldSources) {
|
|
396
|
+
const sources = Object.values(fieldSources).map((f) => f.source);
|
|
397
|
+
if (sources.includes("category"))
|
|
398
|
+
return "category-specific";
|
|
399
|
+
if (sources.includes("field-name"))
|
|
400
|
+
return "field-name-aware";
|
|
401
|
+
return "default";
|
|
402
|
+
}
|
|
272
403
|
// Public method for testing purposes - allows tests to verify parameter generation logic
|
|
273
404
|
// Always includes optional properties to test full schema
|
|
274
405
|
generateTestInput(schema) {
|
|
@@ -12,8 +12,9 @@
|
|
|
12
12
|
*/
|
|
13
13
|
import { BaseAssessor } from "./BaseAssessor.js";
|
|
14
14
|
import { AssessmentContext } from "../AssessmentOrchestrator.js";
|
|
15
|
-
import type { ToolAnnotationAssessment, ToolAnnotationResult } from "../../../lib/assessmentTypes.js";
|
|
15
|
+
import type { ToolAnnotationAssessment, ToolAnnotationResult, AssessmentConfiguration } from "../../../lib/assessmentTypes.js";
|
|
16
16
|
import type { ClaudeCodeBridge } from "../lib/claudeCodeBridge.js";
|
|
17
|
+
import { type CompiledPatterns } from "../config/annotationPatterns.js";
|
|
17
18
|
/**
|
|
18
19
|
* Enhanced tool annotation result with Claude inference
|
|
19
20
|
*/
|
|
@@ -43,6 +44,12 @@ export interface EnhancedToolAnnotationAssessment extends ToolAnnotationAssessme
|
|
|
43
44
|
}
|
|
44
45
|
export declare class ToolAnnotationAssessor extends BaseAssessor {
|
|
45
46
|
private claudeBridge?;
|
|
47
|
+
private compiledPatterns;
|
|
48
|
+
constructor(config: AssessmentConfiguration);
|
|
49
|
+
/**
|
|
50
|
+
* Set custom compiled patterns for behavior inference
|
|
51
|
+
*/
|
|
52
|
+
setPatterns(patterns: CompiledPatterns): void;
|
|
46
53
|
/**
|
|
47
54
|
* Set Claude Code Bridge for enhanced behavior inference
|
|
48
55
|
*/
|
|
@@ -69,6 +76,7 @@ export declare class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
69
76
|
private generateEnhancedRecommendations;
|
|
70
77
|
/**
|
|
71
78
|
* Assess a single tool's annotations
|
|
79
|
+
* Now includes alignment status with confidence-aware logic
|
|
72
80
|
*/
|
|
73
81
|
private assessTool;
|
|
74
82
|
/**
|
|
@@ -76,14 +84,24 @@ export declare class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
76
84
|
* MCP SDK may have annotations in different locations
|
|
77
85
|
*/
|
|
78
86
|
private extractAnnotations;
|
|
87
|
+
/**
|
|
88
|
+
* Extract parameters from tool input schema for event emission
|
|
89
|
+
*/
|
|
90
|
+
private extractToolParams;
|
|
79
91
|
/**
|
|
80
92
|
* Infer expected behavior from tool name and description
|
|
93
|
+
* Now returns confidence level and ambiguity flag for better handling
|
|
81
94
|
*/
|
|
82
95
|
private inferBehavior;
|
|
83
96
|
/**
|
|
84
|
-
* Determine overall status
|
|
97
|
+
* Determine overall status using alignment status.
|
|
98
|
+
* Only MISALIGNED counts as failure; REVIEW_RECOMMENDED does not fail.
|
|
85
99
|
*/
|
|
86
100
|
private determineAnnotationStatus;
|
|
101
|
+
/**
|
|
102
|
+
* Calculate metrics and alignment breakdown for the assessment
|
|
103
|
+
*/
|
|
104
|
+
private calculateMetrics;
|
|
87
105
|
/**
|
|
88
106
|
* Generate explanation
|
|
89
107
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ToolAnnotationAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/ToolAnnotationAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,KAAK,EACV,wBAAwB,EACxB,oBAAoB,
|
|
1
|
+
{"version":3,"file":"ToolAnnotationAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/ToolAnnotationAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,KAAK,EACV,wBAAwB,EACxB,oBAAoB,EAKpB,uBAAuB,EACxB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAChE,OAAO,EACL,KAAK,gBAAgB,EAGtB,MAAM,8BAA8B,CAAC;AAEtC;;GAEG;AACH,MAAM,WAAW,4BAA6B,SAAQ,oBAAoB;IACxE,eAAe,CAAC,EAAE;QAChB,gBAAgB,EAAE,OAAO,CAAC;QAC1B,mBAAmB,EAAE,OAAO,CAAC;QAC7B,UAAU,EAAE,MAAM,CAAC;QACnB,SAAS,EAAE,MAAM,CAAC;QAClB,oBAAoB,EAAE;YACpB,YAAY,CAAC,EAAE,OAAO,CAAC;YACvB,eAAe,CAAC,EAAE,OAAO,CAAC;YAC1B,cAAc,CAAC,EAAE,OAAO,CAAC;SAC1B,CAAC;QACF,oBAAoB,EAAE,OAAO,CAAC;QAC9B,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,MAAM,EAAE,iBAAiB,GAAG,eAAe,CAAC;KAC7C,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,gCAAiC,SAAQ,wBAAwB;IAChF,WAAW,EAAE,4BAA4B,EAAE,CAAC;IAC5C,cAAc,EAAE,OAAO,CAAC;IACxB,2BAA2B,EAAE,4BAA4B,EAAE,CAAC;CAC7D;AAKD,qBAAa,sBAAuB,SAAQ,YAAY;IACtD,OAAO,CAAC,YAAY,CAAC,CAAmB;IACxC,OAAO,CAAC,gBAAgB,CAAmB;gBAE/B,MAAM,EAAE,uBAAuB;IAM3C;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,gBAAgB,GAAG,IAAI;IAK7C;;OAEG;IACH,eAAe,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI;IAK/C;;OAEG;IACH,eAAe,IAAI,OAAO;IAO1B;;OAEG;IACG,MAAM,CACV,OAAO,EAAE,iBAAiB,GACzB,OAAO,CAAC,wBAAwB,GAAG,gCAAgC,CAAC;IAqPvE;;OAEG;YACW,0BAA0B;IA+IxC;;OAEG;IACH,OAAO,CAAC,2BAA2B;IAiCnC;;OAEG;IACH,OAAO,CAAC,+BAA+B;IAoFvC;;;OAGG;IACH,OAAO,CAAC,UAAU;IA8GlB;;;OAGG;IACH,OAAO,CAAC,kBAAkB;IAyC1B;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAuBzB;;;OAGG;IACH,OAAO,CAAC,aAAa;IAgGrB;;;OAGG;IACH,OAAO,CAAC,yBAAyB;IAkDjC;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAiDxB;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAmC3B;;OAEG;IACH,OAAO,CAAC,uBAAuB;CA2ChC"}
|