@dotsetlabs/bellwether 1.0.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +74 -0
- package/README.md +8 -2
- package/dist/baseline/accessors.d.ts +1 -1
- package/dist/baseline/accessors.js +1 -3
- package/dist/baseline/baseline-format.d.ts +287 -0
- package/dist/baseline/baseline-format.js +12 -0
- package/dist/baseline/comparator.js +249 -11
- package/dist/baseline/converter.d.ts +15 -15
- package/dist/baseline/converter.js +46 -34
- package/dist/baseline/diff.d.ts +1 -1
- package/dist/baseline/diff.js +45 -28
- package/dist/baseline/error-analyzer.d.ts +1 -1
- package/dist/baseline/error-analyzer.js +90 -17
- package/dist/baseline/incremental-checker.js +8 -5
- package/dist/baseline/index.d.ts +2 -12
- package/dist/baseline/index.js +3 -23
- package/dist/baseline/performance-tracker.d.ts +0 -1
- package/dist/baseline/performance-tracker.js +13 -20
- package/dist/baseline/response-fingerprint.js +39 -2
- package/dist/baseline/saver.js +41 -10
- package/dist/baseline/schema-compare.d.ts +22 -0
- package/dist/baseline/schema-compare.js +259 -16
- package/dist/baseline/types.d.ts +10 -7
- package/dist/cache/response-cache.d.ts +8 -0
- package/dist/cache/response-cache.js +110 -0
- package/dist/cli/commands/check.js +23 -6
- package/dist/cli/commands/explore.js +34 -14
- package/dist/cli/index.js +8 -0
- package/dist/config/template.js +8 -7
- package/dist/config/validator.d.ts +59 -59
- package/dist/config/validator.js +245 -90
- package/dist/constants/core.d.ts +4 -0
- package/dist/constants/core.js +8 -19
- package/dist/constants/registry.d.ts +17 -0
- package/dist/constants/registry.js +18 -0
- package/dist/constants/testing.d.ts +0 -369
- package/dist/constants/testing.js +18 -456
- package/dist/constants.d.ts +1 -1
- package/dist/constants.js +1 -1
- package/dist/docs/contract.js +131 -83
- package/dist/docs/report.js +8 -5
- package/dist/interview/insights.d.ts +17 -0
- package/dist/interview/insights.js +52 -0
- package/dist/interview/interviewer.js +52 -10
- package/dist/interview/prompt-test-generator.d.ts +12 -0
- package/dist/interview/prompt-test-generator.js +77 -0
- package/dist/interview/resource-test-generator.d.ts +12 -0
- package/dist/interview/resource-test-generator.js +20 -0
- package/dist/interview/schema-inferrer.js +26 -4
- package/dist/interview/schema-test-generator.js +278 -31
- package/dist/interview/stateful-test-runner.d.ts +3 -0
- package/dist/interview/stateful-test-runner.js +80 -0
- package/dist/interview/types.d.ts +12 -0
- package/dist/transport/mcp-client.js +1 -1
- package/dist/transport/sse-transport.d.ts +7 -3
- package/dist/transport/sse-transport.js +157 -67
- package/dist/version.js +1 -1
- package/man/bellwether.1 +1 -1
- package/man/bellwether.1.md +2 -2
- package/package.json +1 -1
- package/schemas/bellwether-check.schema.json +185 -0
- package/schemas/bellwether-explore.schema.json +837 -0
- package/scripts/completions/bellwether.bash +10 -4
- package/scripts/completions/bellwether.zsh +55 -2
|
@@ -24,6 +24,26 @@ function getPrimaryType(schema) {
|
|
|
24
24
|
* Used when generating array items or object properties.
|
|
25
25
|
*/
|
|
26
26
|
function generateDefaultValueForSchema(schema, fixtures) {
|
|
27
|
+
// Prefer conditional schema branches if present
|
|
28
|
+
if (schema.if && schema.then) {
|
|
29
|
+
const merged = mergeSchemas(schema, schema.then);
|
|
30
|
+
return generateDefaultValueForSchema(merged, fixtures);
|
|
31
|
+
}
|
|
32
|
+
if (schema.if && schema.else) {
|
|
33
|
+
const merged = mergeSchemas(schema, schema.else);
|
|
34
|
+
return generateDefaultValueForSchema(merged, fixtures);
|
|
35
|
+
}
|
|
36
|
+
// Handle compositional schemas by selecting a representative variant
|
|
37
|
+
if (schema.oneOf && schema.oneOf.length > 0) {
|
|
38
|
+
return generateDefaultValueForSchema(schema.oneOf[0], fixtures);
|
|
39
|
+
}
|
|
40
|
+
if (schema.anyOf && schema.anyOf.length > 0) {
|
|
41
|
+
return generateDefaultValueForSchema(schema.anyOf[0], fixtures);
|
|
42
|
+
}
|
|
43
|
+
if (schema.allOf && schema.allOf.length > 0) {
|
|
44
|
+
const merged = mergeAllOfSchemas(schema.allOf);
|
|
45
|
+
return generateDefaultValueForSchema(merged, fixtures);
|
|
46
|
+
}
|
|
27
47
|
const type = getPrimaryType(schema);
|
|
28
48
|
// Use schema example if available
|
|
29
49
|
if (schema.examples && schema.examples.length > 0) {
|
|
@@ -61,6 +81,27 @@ function generateDefaultValueForSchema(schema, fixtures) {
|
|
|
61
81
|
return 'test';
|
|
62
82
|
}
|
|
63
83
|
}
|
|
84
|
+
/**
|
|
85
|
+
* Merge a base schema with an override schema (used for if/then/else).
|
|
86
|
+
* Only merges a safe subset of fields used by the test generator.
|
|
87
|
+
*/
|
|
88
|
+
function mergeSchemas(base, override) {
|
|
89
|
+
return {
|
|
90
|
+
...base,
|
|
91
|
+
...override,
|
|
92
|
+
properties: {
|
|
93
|
+
...(base.properties ?? {}),
|
|
94
|
+
...(override.properties ?? {}),
|
|
95
|
+
},
|
|
96
|
+
required: Array.from(new Set([...(base.required ?? []), ...(override.required ?? [])])),
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Merge allOf schemas into a single schema (best-effort).
|
|
101
|
+
*/
|
|
102
|
+
function mergeAllOfSchemas(schemas) {
|
|
103
|
+
return schemas.reduce((acc, schema) => mergeSchemas(acc, schema), {});
|
|
104
|
+
}
|
|
64
105
|
/**
|
|
65
106
|
* Generate a smart string value for a schema without property name context.
|
|
66
107
|
* Used for nested array items where we don't have a property name.
|
|
@@ -129,6 +170,7 @@ function generateMinimalObject(schema, fixtures) {
|
|
|
129
170
|
const result = {};
|
|
130
171
|
const requiredProps = schema.required ?? [];
|
|
131
172
|
const properties = schema.properties ?? {};
|
|
173
|
+
const patternProperties = schema.patternProperties ?? {};
|
|
132
174
|
// Only populate required properties
|
|
133
175
|
for (const propName of requiredProps) {
|
|
134
176
|
const propSchema = properties[propName];
|
|
@@ -141,8 +183,34 @@ function generateMinimalObject(schema, fixtures) {
|
|
|
141
183
|
result[propName] = 'test';
|
|
142
184
|
}
|
|
143
185
|
}
|
|
186
|
+
// If there are no required properties but patternProperties exist, add one matching key
|
|
187
|
+
if (requiredProps.length === 0 &&
|
|
188
|
+
Object.keys(result).length === 0 &&
|
|
189
|
+
Object.keys(patternProperties).length > 0) {
|
|
190
|
+
const [pattern, propSchema] = Object.entries(patternProperties)[0];
|
|
191
|
+
const key = generateKeyForPattern(pattern);
|
|
192
|
+
result[key] = generateDefaultValueForPropertySchema(key, propSchema, fixtures);
|
|
193
|
+
}
|
|
144
194
|
return result;
|
|
145
195
|
}
|
|
196
|
+
/**
|
|
197
|
+
* Generate a key that matches a regex pattern (best-effort).
|
|
198
|
+
*/
|
|
199
|
+
function generateKeyForPattern(pattern) {
|
|
200
|
+
try {
|
|
201
|
+
const regex = new RegExp(pattern);
|
|
202
|
+
const candidates = ['test', 'key', 'value', 'item', 'prop'];
|
|
203
|
+
for (const candidate of candidates) {
|
|
204
|
+
if (regex.test(candidate)) {
|
|
205
|
+
return candidate;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
catch {
|
|
210
|
+
// Ignore invalid regex - fall back to a generic key
|
|
211
|
+
}
|
|
212
|
+
return 'test';
|
|
213
|
+
}
|
|
146
214
|
/**
|
|
147
215
|
* Generate an array with a specific number of items based on the item schema.
|
|
148
216
|
* Used for boundary testing at minItems/maxItems limits.
|
|
@@ -169,6 +237,23 @@ function generateDefaultValueForPropertySchema(propName, schema, fixtures) {
|
|
|
169
237
|
if (fixtureValue !== undefined) {
|
|
170
238
|
return fixtureValue;
|
|
171
239
|
}
|
|
240
|
+
// Prefer conditional branches
|
|
241
|
+
if (schema.if && schema.then) {
|
|
242
|
+
return generateDefaultValueForPropertySchema(propName, mergeSchemas(schema, schema.then), fixtures);
|
|
243
|
+
}
|
|
244
|
+
if (schema.if && schema.else) {
|
|
245
|
+
return generateDefaultValueForPropertySchema(propName, mergeSchemas(schema, schema.else), fixtures);
|
|
246
|
+
}
|
|
247
|
+
// Handle compositional schemas
|
|
248
|
+
if (schema.oneOf && schema.oneOf.length > 0) {
|
|
249
|
+
return generateDefaultValueForPropertySchema(propName, schema.oneOf[0], fixtures);
|
|
250
|
+
}
|
|
251
|
+
if (schema.anyOf && schema.anyOf.length > 0) {
|
|
252
|
+
return generateDefaultValueForPropertySchema(propName, schema.anyOf[0], fixtures);
|
|
253
|
+
}
|
|
254
|
+
if (schema.allOf && schema.allOf.length > 0) {
|
|
255
|
+
return generateDefaultValueForPropertySchema(propName, mergeAllOfSchemas(schema.allOf), fixtures);
|
|
256
|
+
}
|
|
172
257
|
const type = getPrimaryType(schema);
|
|
173
258
|
// Use schema example if available
|
|
174
259
|
if (schema.examples && schema.examples.length > 0) {
|
|
@@ -294,14 +379,34 @@ function generateDefaultValue(propName, prop, fixtures) {
|
|
|
294
379
|
*/
|
|
295
380
|
const DATE_FORMAT_PATTERNS = [
|
|
296
381
|
// ISO 8601 date patterns
|
|
297
|
-
{
|
|
382
|
+
{
|
|
383
|
+
pattern: /YYYY-MM-DD|ISO\s*8601\s*date|date.*format.*YYYY/i,
|
|
384
|
+
value: '2024-01-15',
|
|
385
|
+
formatName: 'ISO 8601 date',
|
|
386
|
+
},
|
|
298
387
|
{ pattern: /YYYY-MM|year-month|month.*format/i, value: '2024-01', formatName: 'year-month' },
|
|
299
|
-
{
|
|
388
|
+
{
|
|
389
|
+
pattern: /ISO\s*8601\s*(datetime|timestamp)|datetime.*format|timestamp.*ISO/i,
|
|
390
|
+
value: '2024-01-15T14:30:00Z',
|
|
391
|
+
formatName: 'ISO 8601 datetime',
|
|
392
|
+
},
|
|
300
393
|
// Unix timestamp patterns
|
|
301
|
-
{
|
|
302
|
-
|
|
394
|
+
{
|
|
395
|
+
pattern: /unix\s*timestamp|epoch\s*time|seconds\s*since/i,
|
|
396
|
+
value: '1705330200',
|
|
397
|
+
formatName: 'Unix timestamp',
|
|
398
|
+
},
|
|
399
|
+
{
|
|
400
|
+
pattern: /milliseconds?\s*(since|timestamp)|ms\s*timestamp/i,
|
|
401
|
+
value: '1705330200000',
|
|
402
|
+
formatName: 'Unix timestamp (ms)',
|
|
403
|
+
},
|
|
303
404
|
// Time patterns
|
|
304
|
-
{
|
|
405
|
+
{
|
|
406
|
+
pattern: /HH:MM:SS|time.*format.*HH|24.hour.*time/i,
|
|
407
|
+
value: '14:30:00',
|
|
408
|
+
formatName: '24-hour time',
|
|
409
|
+
},
|
|
305
410
|
{ pattern: /HH:MM|hour.*minute/i, value: '14:30', formatName: 'hour:minute' },
|
|
306
411
|
// Other date formats
|
|
307
412
|
{ pattern: /MM\/DD\/YYYY|US\s*date/i, value: '01/15/2024', formatName: 'US date' },
|
|
@@ -317,7 +422,11 @@ const SEMANTIC_FORMAT_PATTERNS = [
|
|
|
317
422
|
// Phone patterns
|
|
318
423
|
{ pattern: /phone.*number|telephone/i, value: '+1-555-123-4567', formatName: 'phone' },
|
|
319
424
|
// UUID patterns
|
|
320
|
-
{
|
|
425
|
+
{
|
|
426
|
+
pattern: /UUID|unique.*identifier/i,
|
|
427
|
+
value: '550e8400-e29b-41d4-a716-446655440000',
|
|
428
|
+
formatName: 'UUID',
|
|
429
|
+
},
|
|
321
430
|
// IP address patterns
|
|
322
431
|
{ pattern: /IP.*address|IPv4/i, value: '192.168.1.100', formatName: 'IP address' },
|
|
323
432
|
// JSON patterns
|
|
@@ -382,12 +491,16 @@ function generateSmartStringValue(propName, prop) {
|
|
|
382
491
|
if (lowerName.includes('email') || description.includes('email')) {
|
|
383
492
|
return 'test@example.com';
|
|
384
493
|
}
|
|
385
|
-
if (lowerName.includes('url') ||
|
|
386
|
-
|
|
494
|
+
if (lowerName.includes('url') ||
|
|
495
|
+
lowerName.includes('uri') ||
|
|
496
|
+
description.includes('url') ||
|
|
497
|
+
description.includes('uri')) {
|
|
387
498
|
return 'https://example.com';
|
|
388
499
|
}
|
|
389
|
-
if (lowerName.includes('path') ||
|
|
390
|
-
lowerName.includes('
|
|
500
|
+
if (lowerName.includes('path') ||
|
|
501
|
+
lowerName.includes('directory') ||
|
|
502
|
+
lowerName.includes('dir') ||
|
|
503
|
+
description.includes('path')) {
|
|
391
504
|
return '/tmp/test';
|
|
392
505
|
}
|
|
393
506
|
if (lowerName.includes('id') || description.includes('identifier')) {
|
|
@@ -608,7 +721,7 @@ function detectOperationBasedPattern(properties) {
|
|
|
608
721
|
* @returns Detection result with reason if self-stateful
|
|
609
722
|
*/
|
|
610
723
|
function detectSelfStatefulPattern(toolName, toolDescription, properties, requiredParams) {
|
|
611
|
-
const { DESCRIPTION_PATTERNS, STATE_PARAM_PATTERNS, STATEFUL_TOOL_NAME_PATTERNS
|
|
724
|
+
const { DESCRIPTION_PATTERNS, STATE_PARAM_PATTERNS, STATEFUL_TOOL_NAME_PATTERNS } = SELF_STATEFUL_DETECTION;
|
|
612
725
|
// Check description for state dependency patterns
|
|
613
726
|
if (toolDescription) {
|
|
614
727
|
for (const pattern of DESCRIPTION_PATTERNS) {
|
|
@@ -689,7 +802,7 @@ function getSchemaDepth(schema, currentDepth = 0) {
|
|
|
689
802
|
* @returns Detection result with complex array parameter names
|
|
690
803
|
*/
|
|
691
804
|
function detectComplexArraySchema(properties) {
|
|
692
|
-
const { MAX_SIMPLE_DEPTH, MIN_REQUIRED_PROPERTIES, STRUCTURED_DATA_PATTERNS
|
|
805
|
+
const { MAX_SIMPLE_DEPTH, MIN_REQUIRED_PROPERTIES, STRUCTURED_DATA_PATTERNS } = COMPLEX_SCHEMA_DETECTION;
|
|
693
806
|
const complexParams = [];
|
|
694
807
|
for (const [paramName, prop] of Object.entries(properties)) {
|
|
695
808
|
if (prop.type !== 'array' || !prop.items)
|
|
@@ -764,18 +877,14 @@ function detectFalsePositivePatterns(toolName, toolDescription, properties, requ
|
|
|
764
877
|
* For operation-based, self-stateful, or complex array tools: Tests use 'either' outcome
|
|
765
878
|
* since we cannot reliably predict success for these patterns.
|
|
766
879
|
*/
|
|
767
|
-
function generateHappyPathTests(toolName, toolDescription, properties, requiredParams, fixtures) {
|
|
880
|
+
function generateHappyPathTests(toolName, toolDescription, schema, properties, requiredParams, fixtures) {
|
|
768
881
|
const questions = [];
|
|
769
882
|
const { CATEGORY_DESCRIPTIONS } = SCHEMA_TESTING;
|
|
770
883
|
// Detect all patterns that commonly cause false positives
|
|
771
884
|
const detection = detectFalsePositivePatterns(toolName, toolDescription, properties, requiredParams);
|
|
772
885
|
// Determine if we need to use 'either' outcome due to detected patterns
|
|
773
|
-
const needsFlexibleOutcome = detection.isOperationBased ||
|
|
774
|
-
|
|
775
|
-
detection.hasComplexArrays;
|
|
776
|
-
const happyPathOutcome = needsFlexibleOutcome
|
|
777
|
-
? 'either'
|
|
778
|
-
: 'success';
|
|
886
|
+
const needsFlexibleOutcome = detection.isOperationBased || detection.isSelfStateful || detection.hasComplexArrays;
|
|
887
|
+
const happyPathOutcome = needsFlexibleOutcome ? 'either' : 'success';
|
|
779
888
|
// Build suffix string for test descriptions
|
|
780
889
|
const suffixes = [];
|
|
781
890
|
if (detection.isOperationBased)
|
|
@@ -847,8 +956,129 @@ function generateHappyPathTests(toolName, toolDescription, properties, requiredP
|
|
|
847
956
|
metadata: buildMetadata(),
|
|
848
957
|
});
|
|
849
958
|
}
|
|
959
|
+
// Conditional schema tests (if/then/else)
|
|
960
|
+
if (schema && questions.length < SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY) {
|
|
961
|
+
const conditionalTests = generateConditionalHappyPathTests(schema, properties, requiredParams, fixtures, happyPathOutcome, buildMetadata());
|
|
962
|
+
for (const test of conditionalTests) {
|
|
963
|
+
if (questions.length >= SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY)
|
|
964
|
+
break;
|
|
965
|
+
addQuestion(questions, test);
|
|
966
|
+
}
|
|
967
|
+
}
|
|
968
|
+
// Variant coverage for oneOf/anyOf
|
|
969
|
+
if (questions.length < SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY) {
|
|
970
|
+
const variantTests = generateVariantHappyPathTests(properties, requiredParams, fixtures, happyPathOutcome, buildMetadata());
|
|
971
|
+
for (const test of variantTests) {
|
|
972
|
+
if (questions.length >= SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY)
|
|
973
|
+
break;
|
|
974
|
+
addQuestion(questions, test);
|
|
975
|
+
}
|
|
976
|
+
}
|
|
850
977
|
return questions.slice(0, SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY);
|
|
851
978
|
}
|
|
979
|
+
/**
|
|
980
|
+
* Generate happy path tests for conditional schemas (if/then/else).
|
|
981
|
+
* Best-effort: only handles const/enum conditions.
|
|
982
|
+
*/
|
|
983
|
+
function generateConditionalHappyPathTests(schema, properties, requiredParams, fixtures, expectedOutcome, metadata) {
|
|
984
|
+
const tests = [];
|
|
985
|
+
const { CATEGORY_DESCRIPTIONS } = SCHEMA_TESTING;
|
|
986
|
+
if (!schema.if) {
|
|
987
|
+
return tests;
|
|
988
|
+
}
|
|
989
|
+
const ifArgs = buildArgsForCondition(schema.if, fixtures);
|
|
990
|
+
if (Object.keys(ifArgs).length > 0 && schema.then) {
|
|
991
|
+
const thenSchema = mergeSchemas(schema, schema.then);
|
|
992
|
+
const thenArgs = buildBaseArgs(thenSchema.properties ?? properties, thenSchema.required ?? requiredParams, fixtures);
|
|
993
|
+
tests.push({
|
|
994
|
+
description: `${CATEGORY_DESCRIPTIONS.HAPPY_PATH}: conditional (if/then)`,
|
|
995
|
+
category: 'happy_path',
|
|
996
|
+
args: { ...thenArgs, ...ifArgs },
|
|
997
|
+
expectedOutcome,
|
|
998
|
+
metadata,
|
|
999
|
+
});
|
|
1000
|
+
}
|
|
1001
|
+
if (Object.keys(ifArgs).length > 0 && schema.else) {
|
|
1002
|
+
const elseSchema = mergeSchemas(schema, schema.else);
|
|
1003
|
+
const elseArgs = buildBaseArgs(elseSchema.properties ?? properties, elseSchema.required ?? requiredParams, fixtures);
|
|
1004
|
+
const invertedArgs = invertConditionArgs(ifArgs);
|
|
1005
|
+
tests.push({
|
|
1006
|
+
description: `${CATEGORY_DESCRIPTIONS.HAPPY_PATH}: conditional (if/else)`,
|
|
1007
|
+
category: 'happy_path',
|
|
1008
|
+
args: { ...elseArgs, ...invertedArgs },
|
|
1009
|
+
expectedOutcome,
|
|
1010
|
+
metadata,
|
|
1011
|
+
});
|
|
1012
|
+
}
|
|
1013
|
+
return tests;
|
|
1014
|
+
}
|
|
1015
|
+
/**
|
|
1016
|
+
* Generate additional happy path tests for oneOf/anyOf variants.
|
|
1017
|
+
*/
|
|
1018
|
+
function generateVariantHappyPathTests(properties, requiredParams, fixtures, expectedOutcome, metadata) {
|
|
1019
|
+
const tests = [];
|
|
1020
|
+
const { CATEGORY_DESCRIPTIONS } = SCHEMA_TESTING;
|
|
1021
|
+
for (const [propName, prop] of Object.entries(properties)) {
|
|
1022
|
+
const variants = prop.oneOf ?? prop.anyOf;
|
|
1023
|
+
if (!variants || variants.length < 2) {
|
|
1024
|
+
continue;
|
|
1025
|
+
}
|
|
1026
|
+
const baseArgs = buildBaseArgs(properties, requiredParams, fixtures);
|
|
1027
|
+
const variant = variants[1];
|
|
1028
|
+
baseArgs[propName] = generateDefaultValueForPropertySchema(propName, variant, fixtures);
|
|
1029
|
+
tests.push({
|
|
1030
|
+
description: `${CATEGORY_DESCRIPTIONS.HAPPY_PATH}: variant input for "${propName}"`,
|
|
1031
|
+
category: 'happy_path',
|
|
1032
|
+
args: baseArgs,
|
|
1033
|
+
expectedOutcome,
|
|
1034
|
+
metadata,
|
|
1035
|
+
});
|
|
1036
|
+
if (tests.length >= 2) {
|
|
1037
|
+
break;
|
|
1038
|
+
}
|
|
1039
|
+
}
|
|
1040
|
+
return tests;
|
|
1041
|
+
}
|
|
1042
|
+
/**
|
|
1043
|
+
* Build args to satisfy a conditional schema (if).
|
|
1044
|
+
*/
|
|
1045
|
+
function buildArgsForCondition(condition, fixtures) {
|
|
1046
|
+
const args = {};
|
|
1047
|
+
const props = condition.properties ?? {};
|
|
1048
|
+
for (const [name, prop] of Object.entries(props)) {
|
|
1049
|
+
if (prop.const !== undefined) {
|
|
1050
|
+
args[name] = prop.const;
|
|
1051
|
+
continue;
|
|
1052
|
+
}
|
|
1053
|
+
if (prop.enum && prop.enum.length > 0) {
|
|
1054
|
+
args[name] = prop.enum[0];
|
|
1055
|
+
continue;
|
|
1056
|
+
}
|
|
1057
|
+
args[name] = generateDefaultValueForPropertySchema(name, prop, fixtures);
|
|
1058
|
+
}
|
|
1059
|
+
return args;
|
|
1060
|
+
}
|
|
1061
|
+
/**
|
|
1062
|
+
* Invert simple condition arguments to trigger else branch.
|
|
1063
|
+
*/
|
|
1064
|
+
function invertConditionArgs(args) {
|
|
1065
|
+
const inverted = {};
|
|
1066
|
+
for (const [key, value] of Object.entries(args)) {
|
|
1067
|
+
if (typeof value === 'boolean') {
|
|
1068
|
+
inverted[key] = !value;
|
|
1069
|
+
}
|
|
1070
|
+
else if (typeof value === 'number') {
|
|
1071
|
+
inverted[key] = value + 1;
|
|
1072
|
+
}
|
|
1073
|
+
else if (typeof value === 'string') {
|
|
1074
|
+
inverted[key] = `${value}_alt`;
|
|
1075
|
+
}
|
|
1076
|
+
else {
|
|
1077
|
+
inverted[key] = value;
|
|
1078
|
+
}
|
|
1079
|
+
}
|
|
1080
|
+
return inverted;
|
|
1081
|
+
}
|
|
852
1082
|
/**
|
|
853
1083
|
* Generate boundary value tests.
|
|
854
1084
|
* Tests edge cases like empty strings, zero, large numbers.
|
|
@@ -874,7 +1104,8 @@ function generateBoundaryTests(properties, requiredParams, fixtures) {
|
|
|
874
1104
|
});
|
|
875
1105
|
}
|
|
876
1106
|
// Test long string if no maxLength
|
|
877
|
-
if (prop.maxLength === undefined &&
|
|
1107
|
+
if (prop.maxLength === undefined &&
|
|
1108
|
+
questions.length < SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY) {
|
|
878
1109
|
const longString = 'x'.repeat(BOUNDARY_VALUES.LONG_STRING_LENGTH);
|
|
879
1110
|
addQuestion(questions, {
|
|
880
1111
|
description: `${CATEGORY_DESCRIPTIONS.BOUNDARY}: long string for "${propName}"`,
|
|
@@ -1067,7 +1298,9 @@ function generateArrayTests(properties, requiredParams, fixtures) {
|
|
|
1067
1298
|
});
|
|
1068
1299
|
}
|
|
1069
1300
|
// Test with exact maxItems (if defined and reasonable)
|
|
1070
|
-
if (maxItems !== undefined &&
|
|
1301
|
+
if (maxItems !== undefined &&
|
|
1302
|
+
maxItems <= ARRAY_TESTS.MANY_ITEMS_COUNT &&
|
|
1303
|
+
questions.length < SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY) {
|
|
1071
1304
|
const maxItemsArray = generateArrayItems(prop.items, maxItems);
|
|
1072
1305
|
addQuestion(questions, {
|
|
1073
1306
|
description: `${CATEGORY_DESCRIPTIONS.ARRAY_HANDLING}: exact maxItems (${maxItems}) for "${propName}"`,
|
|
@@ -1124,7 +1357,7 @@ function generateNullabilityTests(properties, requiredParams, fixtures) {
|
|
|
1124
1357
|
* Tests that required parameters are properly validated.
|
|
1125
1358
|
* All error handling tests expect error - tool should reject missing required params.
|
|
1126
1359
|
*/
|
|
1127
|
-
function generateErrorHandlingTests(properties, requiredParams, fixtures) {
|
|
1360
|
+
function generateErrorHandlingTests(schema, properties, requiredParams, fixtures) {
|
|
1128
1361
|
const questions = [];
|
|
1129
1362
|
const { CATEGORY_DESCRIPTIONS } = SCHEMA_TESTING;
|
|
1130
1363
|
// Test missing all required params
|
|
@@ -1149,6 +1382,24 @@ function generateErrorHandlingTests(properties, requiredParams, fixtures) {
|
|
|
1149
1382
|
expectedOutcome: 'error',
|
|
1150
1383
|
});
|
|
1151
1384
|
}
|
|
1385
|
+
// Test dependentRequired constraints
|
|
1386
|
+
const dependent = schema?.dependentRequired ?? {};
|
|
1387
|
+
for (const [prop, deps] of Object.entries(dependent)) {
|
|
1388
|
+
if (questions.length >= SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY)
|
|
1389
|
+
break;
|
|
1390
|
+
if (deps.length === 0)
|
|
1391
|
+
continue;
|
|
1392
|
+
const args = buildBaseArgs(properties, requiredParams, fixtures);
|
|
1393
|
+
args[prop] = args[prop] ?? generateDefaultValue(prop, properties[prop] ?? {}, fixtures);
|
|
1394
|
+
// Remove one dependency to trigger validation error
|
|
1395
|
+
delete args[deps[0]];
|
|
1396
|
+
addQuestion(questions, {
|
|
1397
|
+
description: `${CATEGORY_DESCRIPTIONS.MISSING_REQUIRED}: dependent "${deps[0]}" missing for "${prop}"`,
|
|
1398
|
+
category: 'error_handling',
|
|
1399
|
+
args,
|
|
1400
|
+
expectedOutcome: 'error',
|
|
1401
|
+
});
|
|
1402
|
+
}
|
|
1152
1403
|
return questions;
|
|
1153
1404
|
}
|
|
1154
1405
|
// ==================== Varied Tests for Simple Tools ====================
|
|
@@ -1165,12 +1416,8 @@ function generateVariedTestsForSimpleTools(toolName, toolDescription, properties
|
|
|
1165
1416
|
const { CATEGORY_DESCRIPTIONS } = SCHEMA_TESTING;
|
|
1166
1417
|
// Detect patterns that affect expected outcome
|
|
1167
1418
|
const detection = detectFalsePositivePatterns(toolName, toolDescription, properties, requiredParams);
|
|
1168
|
-
const needsFlexibleOutcome = detection.isOperationBased ||
|
|
1169
|
-
|
|
1170
|
-
detection.hasComplexArrays;
|
|
1171
|
-
const variedTestOutcome = needsFlexibleOutcome
|
|
1172
|
-
? 'either'
|
|
1173
|
-
: 'success';
|
|
1419
|
+
const needsFlexibleOutcome = detection.isOperationBased || detection.isSelfStateful || detection.hasComplexArrays;
|
|
1420
|
+
const variedTestOutcome = needsFlexibleOutcome ? 'either' : 'success';
|
|
1174
1421
|
// Build metadata for varied tests if patterns detected
|
|
1175
1422
|
const variedMetadata = needsFlexibleOutcome
|
|
1176
1423
|
? {
|
|
@@ -1190,7 +1437,7 @@ function generateVariedTestsForSimpleTools(toolName, toolDescription, properties
|
|
|
1190
1437
|
}
|
|
1191
1438
|
: undefined;
|
|
1192
1439
|
// Get existing arg signatures to avoid duplicates
|
|
1193
|
-
const existingArgSignatures = new Set(existingQuestions.map(q => JSON.stringify(q.args)));
|
|
1440
|
+
const existingArgSignatures = new Set(existingQuestions.map((q) => JSON.stringify(q.args)));
|
|
1194
1441
|
// Variation strategies for simple/no-param tools
|
|
1195
1442
|
const variationStrategies = [];
|
|
1196
1443
|
// Strategy 1: Different timing contexts (useful for stateful tools)
|
|
@@ -1425,7 +1672,7 @@ export function generateSchemaTestsWithInferences(tool, options = {}) {
|
|
|
1425
1672
|
const properties = schema?.properties ?? {};
|
|
1426
1673
|
const requiredParams = (schema?.required ?? []);
|
|
1427
1674
|
// 1. Happy Path Tests (always included)
|
|
1428
|
-
questions.push(...generateHappyPathTests(tool.name, tool.description, properties, requiredParams, fixtures));
|
|
1675
|
+
questions.push(...generateHappyPathTests(tool.name, tool.description, schema, properties, requiredParams, fixtures));
|
|
1429
1676
|
// 2. Boundary Value Tests
|
|
1430
1677
|
questions.push(...generateBoundaryTests(properties, requiredParams, fixtures));
|
|
1431
1678
|
// 3. Type Coercion Tests (unless skipping error tests)
|
|
@@ -1442,7 +1689,7 @@ export function generateSchemaTestsWithInferences(tool, options = {}) {
|
|
|
1442
1689
|
questions.push(...generateNullabilityTests(properties, requiredParams, fixtures));
|
|
1443
1690
|
// 7. Error Handling Tests (unless skipped)
|
|
1444
1691
|
if (!options.skipErrorTests) {
|
|
1445
|
-
questions.push(...generateErrorHandlingTests(properties, requiredParams, fixtures));
|
|
1692
|
+
questions.push(...generateErrorHandlingTests(schema, properties, requiredParams, fixtures));
|
|
1446
1693
|
}
|
|
1447
1694
|
// 8. Semantic Validation Tests (unless skipped)
|
|
1448
1695
|
let semanticInferences = [];
|
|
@@ -6,6 +6,8 @@ import type { InterviewQuestion } from './types.js';
|
|
|
6
6
|
export declare class StatefulTestRunner {
|
|
7
7
|
private options;
|
|
8
8
|
private values;
|
|
9
|
+
private jsonPathValues;
|
|
10
|
+
private recentResponses;
|
|
9
11
|
constructor(options: {
|
|
10
12
|
shareOutputs: boolean;
|
|
11
13
|
});
|
|
@@ -15,5 +17,6 @@ export declare class StatefulTestRunner {
|
|
|
15
17
|
};
|
|
16
18
|
recordResponse(tool: MCPTool, response: MCPToolCallResult | null): string[];
|
|
17
19
|
private findMatchingValue;
|
|
20
|
+
private findByJsonPath;
|
|
18
21
|
}
|
|
19
22
|
//# sourceMappingURL=stateful-test-runner.d.ts.map
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
import { STATEFUL_TESTING } from '../constants.js';
|
|
2
2
|
import { extractTextContent } from './schema-inferrer.js';
|
|
3
|
+
import { getValueAtPath } from '../utils/jsonpath.js';
|
|
3
4
|
/**
|
|
4
5
|
* Maintains shared state between tool calls for stateful testing.
|
|
5
6
|
*/
|
|
6
7
|
export class StatefulTestRunner {
|
|
7
8
|
options;
|
|
8
9
|
values = new Map();
|
|
10
|
+
jsonPathValues = new Map();
|
|
11
|
+
recentResponses = [];
|
|
9
12
|
constructor(options) {
|
|
10
13
|
this.options = options;
|
|
11
14
|
}
|
|
@@ -48,21 +51,69 @@ export class StatefulTestRunner {
|
|
|
48
51
|
this.values.set(key, { value, sourceTool: tool.name });
|
|
49
52
|
providedKeys.push(key);
|
|
50
53
|
}
|
|
54
|
+
// Record JSONPath values for richer mapping
|
|
55
|
+
const pathValues = collectJsonPaths(parsed);
|
|
56
|
+
for (const [path, value] of Object.entries(pathValues)) {
|
|
57
|
+
if (this.jsonPathValues.size >= STATEFUL_TESTING.MAX_STORED_VALUES) {
|
|
58
|
+
break;
|
|
59
|
+
}
|
|
60
|
+
this.jsonPathValues.set(path, { value, sourceTool: tool.name });
|
|
61
|
+
}
|
|
62
|
+
// Keep a bounded list of recent responses for direct JSONPath lookup
|
|
63
|
+
this.recentResponses.unshift({ value: parsed, sourceTool: tool.name });
|
|
64
|
+
if (this.recentResponses.length > STATEFUL_TESTING.MAX_STORED_VALUES) {
|
|
65
|
+
this.recentResponses.pop();
|
|
66
|
+
}
|
|
51
67
|
return providedKeys;
|
|
52
68
|
}
|
|
53
69
|
findMatchingValue(paramName) {
|
|
70
|
+
if (looksLikeJsonPath(paramName)) {
|
|
71
|
+
const direct = this.findByJsonPath(paramName);
|
|
72
|
+
if (direct)
|
|
73
|
+
return direct;
|
|
74
|
+
}
|
|
54
75
|
const normalizedParam = normalizeKey(paramName);
|
|
55
76
|
for (const [key, value] of this.values.entries()) {
|
|
56
77
|
if (normalizeKey(key) === normalizedParam) {
|
|
57
78
|
return value;
|
|
58
79
|
}
|
|
59
80
|
}
|
|
81
|
+
for (const [key, value] of this.jsonPathValues.entries()) {
|
|
82
|
+
if (normalizeKey(key) === normalizedParam) {
|
|
83
|
+
return value;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
60
86
|
for (const [key, value] of this.values.entries()) {
|
|
61
87
|
const normalizedKey = normalizeKey(key);
|
|
62
88
|
if (normalizedKey.endsWith(normalizedParam)) {
|
|
63
89
|
return value;
|
|
64
90
|
}
|
|
65
91
|
}
|
|
92
|
+
for (const [key, value] of this.jsonPathValues.entries()) {
|
|
93
|
+
const normalizedKey = normalizeKey(key);
|
|
94
|
+
if (normalizedKey.endsWith(normalizedParam)) {
|
|
95
|
+
return value;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
if (!looksLikeJsonPath(paramName)) {
|
|
99
|
+
const pathMatch = this.findByJsonPath(`$.${paramName}`);
|
|
100
|
+
if (pathMatch)
|
|
101
|
+
return pathMatch;
|
|
102
|
+
}
|
|
103
|
+
return null;
|
|
104
|
+
}
|
|
105
|
+
findByJsonPath(path) {
|
|
106
|
+
const normalized = path.startsWith('$') ? path : `$.${path}`;
|
|
107
|
+
const stored = this.jsonPathValues.get(normalized);
|
|
108
|
+
if (stored) {
|
|
109
|
+
return stored;
|
|
110
|
+
}
|
|
111
|
+
for (const entry of this.recentResponses) {
|
|
112
|
+
const value = getValueAtPath(entry.value, normalized);
|
|
113
|
+
if (value !== undefined) {
|
|
114
|
+
return { value, sourceTool: entry.sourceTool };
|
|
115
|
+
}
|
|
116
|
+
}
|
|
66
117
|
return null;
|
|
67
118
|
}
|
|
68
119
|
}
|
|
@@ -103,4 +154,33 @@ function flattenValue(value, prefix = '') {
|
|
|
103
154
|
}
|
|
104
155
|
return result;
|
|
105
156
|
}
|
|
157
|
+
function looksLikeJsonPath(value) {
|
|
158
|
+
return value.startsWith('$') || value.includes('.') || value.includes('[');
|
|
159
|
+
}
|
|
160
|
+
function collectJsonPaths(value, path = '$', depth = 0, result = {}) {
|
|
161
|
+
if (depth > 4 || value === null || value === undefined) {
|
|
162
|
+
return result;
|
|
163
|
+
}
|
|
164
|
+
if (Array.isArray(value)) {
|
|
165
|
+
const sample = value.slice(0, 3);
|
|
166
|
+
sample.forEach((item, index) => {
|
|
167
|
+
collectJsonPaths(item, `${path}[${index}]`, depth + 1, result);
|
|
168
|
+
});
|
|
169
|
+
return result;
|
|
170
|
+
}
|
|
171
|
+
if (typeof value !== 'object') {
|
|
172
|
+
result[path] = value;
|
|
173
|
+
return result;
|
|
174
|
+
}
|
|
175
|
+
for (const [key, child] of Object.entries(value)) {
|
|
176
|
+
const childPath = `${path}.${key}`;
|
|
177
|
+
if (typeof child === 'object' && child !== null) {
|
|
178
|
+
collectJsonPaths(child, childPath, depth + 1, result);
|
|
179
|
+
}
|
|
180
|
+
else {
|
|
181
|
+
result[childPath] = child;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
return result;
|
|
185
|
+
}
|
|
106
186
|
//# sourceMappingURL=stateful-test-runner.js.map
|
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
import type { DiscoveryResult } from '../discovery/types.js';
|
|
2
2
|
import type { MCPToolCallResult, MCPPromptGetResult, MCPResourceReadResult } from '../transport/types.js';
|
|
3
3
|
import type { InferredSchema } from '../baseline/response-fingerprint.js';
|
|
4
|
+
import type { ResponseSchemaEvolution } from '../baseline/response-schema-tracker.js';
|
|
5
|
+
import type { ErrorAnalysisSummary } from '../baseline/error-analyzer.js';
|
|
6
|
+
import type { DocumentationScore } from '../baseline/documentation-scorer.js';
|
|
7
|
+
import type { SemanticInference } from '../validation/semantic-types.js';
|
|
4
8
|
import type { Persona, QuestionCategory } from '../persona/types.js';
|
|
5
9
|
import type { Workflow, WorkflowResult, WorkflowTimeoutConfig } from '../workflow/types.js';
|
|
6
10
|
import type { LoadedScenarios, ScenarioResult } from '../scenarios/types.js';
|
|
@@ -401,6 +405,14 @@ export interface InterviewResult {
|
|
|
401
405
|
limitations: string[];
|
|
402
406
|
/** Overall recommendations */
|
|
403
407
|
recommendations: string[];
|
|
408
|
+
/** Semantic type inferences by tool */
|
|
409
|
+
semanticInferences?: Record<string, SemanticInference[]>;
|
|
410
|
+
/** Response schema evolution by tool */
|
|
411
|
+
schemaEvolution?: Record<string, ResponseSchemaEvolution>;
|
|
412
|
+
/** Enhanced error analysis summaries by tool */
|
|
413
|
+
errorAnalysisSummaries?: Record<string, ErrorAnalysisSummary>;
|
|
414
|
+
/** Documentation quality score */
|
|
415
|
+
documentationScore?: DocumentationScore;
|
|
404
416
|
/** Interview metadata */
|
|
405
417
|
metadata: InterviewMetadata;
|
|
406
418
|
}
|
|
@@ -29,7 +29,7 @@ export interface SSETransportConfig extends BaseTransportConfig {
|
|
|
29
29
|
* - POST {baseUrl}/message - Endpoint for sending messages
|
|
30
30
|
*/
|
|
31
31
|
export declare class SSETransport extends BaseTransport {
|
|
32
|
-
private
|
|
32
|
+
private streamAbortController;
|
|
33
33
|
private abortController;
|
|
34
34
|
private connected;
|
|
35
35
|
private reconnectAttempts;
|
|
@@ -55,6 +55,10 @@ export declare class SSETransport extends BaseTransport {
|
|
|
55
55
|
* Handle an incoming SSE message.
|
|
56
56
|
*/
|
|
57
57
|
private handleSSEMessage;
|
|
58
|
+
/**
|
|
59
|
+
* Stream and parse SSE events from a fetch response.
|
|
60
|
+
*/
|
|
61
|
+
private readSSEStream;
|
|
58
62
|
/**
|
|
59
63
|
* Handle reconnection after a connection error.
|
|
60
64
|
*
|
|
@@ -63,7 +67,7 @@ export declare class SSETransport extends BaseTransport {
|
|
|
63
67
|
* - Uses capped exponential backoff
|
|
64
68
|
* - Clears reconnect timer on close
|
|
65
69
|
* - Checks isClosing flag to prevent reconnection after close()
|
|
66
|
-
* - Explicitly
|
|
70
|
+
* - Explicitly aborts SSE stream on max attempts
|
|
67
71
|
*/
|
|
68
72
|
private handleReconnect;
|
|
69
73
|
/**
|
|
@@ -74,7 +78,7 @@ export declare class SSETransport extends BaseTransport {
|
|
|
74
78
|
* Close the SSE connection.
|
|
75
79
|
*
|
|
76
80
|
* RELIABILITY: Properly cleans up all resources including:
|
|
77
|
-
* -
|
|
81
|
+
* - SSE stream connection
|
|
78
82
|
* - Pending HTTP requests (via abort controller)
|
|
79
83
|
* - Reconnection timer
|
|
80
84
|
* - Sets isClosing flag to prevent reconnection attempts
|