@dotsetlabs/bellwether 1.0.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CHANGELOG.md +74 -0
  2. package/README.md +8 -2
  3. package/dist/baseline/accessors.d.ts +1 -1
  4. package/dist/baseline/accessors.js +1 -3
  5. package/dist/baseline/baseline-format.d.ts +287 -0
  6. package/dist/baseline/baseline-format.js +12 -0
  7. package/dist/baseline/comparator.js +249 -11
  8. package/dist/baseline/converter.d.ts +15 -15
  9. package/dist/baseline/converter.js +46 -34
  10. package/dist/baseline/diff.d.ts +1 -1
  11. package/dist/baseline/diff.js +45 -28
  12. package/dist/baseline/error-analyzer.d.ts +1 -1
  13. package/dist/baseline/error-analyzer.js +90 -17
  14. package/dist/baseline/incremental-checker.js +8 -5
  15. package/dist/baseline/index.d.ts +2 -12
  16. package/dist/baseline/index.js +3 -23
  17. package/dist/baseline/performance-tracker.d.ts +0 -1
  18. package/dist/baseline/performance-tracker.js +13 -20
  19. package/dist/baseline/response-fingerprint.js +39 -2
  20. package/dist/baseline/saver.js +41 -10
  21. package/dist/baseline/schema-compare.d.ts +22 -0
  22. package/dist/baseline/schema-compare.js +259 -16
  23. package/dist/baseline/types.d.ts +10 -7
  24. package/dist/cache/response-cache.d.ts +8 -0
  25. package/dist/cache/response-cache.js +110 -0
  26. package/dist/cli/commands/check.js +23 -6
  27. package/dist/cli/commands/explore.js +34 -14
  28. package/dist/cli/index.js +8 -0
  29. package/dist/config/template.js +8 -7
  30. package/dist/config/validator.d.ts +59 -59
  31. package/dist/config/validator.js +245 -90
  32. package/dist/constants/core.d.ts +4 -0
  33. package/dist/constants/core.js +8 -19
  34. package/dist/constants/registry.d.ts +17 -0
  35. package/dist/constants/registry.js +18 -0
  36. package/dist/constants/testing.d.ts +0 -369
  37. package/dist/constants/testing.js +18 -456
  38. package/dist/constants.d.ts +1 -1
  39. package/dist/constants.js +1 -1
  40. package/dist/docs/contract.js +131 -83
  41. package/dist/docs/report.js +8 -5
  42. package/dist/interview/insights.d.ts +17 -0
  43. package/dist/interview/insights.js +52 -0
  44. package/dist/interview/interviewer.js +52 -10
  45. package/dist/interview/prompt-test-generator.d.ts +12 -0
  46. package/dist/interview/prompt-test-generator.js +77 -0
  47. package/dist/interview/resource-test-generator.d.ts +12 -0
  48. package/dist/interview/resource-test-generator.js +20 -0
  49. package/dist/interview/schema-inferrer.js +26 -4
  50. package/dist/interview/schema-test-generator.js +278 -31
  51. package/dist/interview/stateful-test-runner.d.ts +3 -0
  52. package/dist/interview/stateful-test-runner.js +80 -0
  53. package/dist/interview/types.d.ts +12 -0
  54. package/dist/transport/mcp-client.js +1 -1
  55. package/dist/transport/sse-transport.d.ts +7 -3
  56. package/dist/transport/sse-transport.js +157 -67
  57. package/dist/version.js +1 -1
  58. package/man/bellwether.1 +1 -1
  59. package/man/bellwether.1.md +2 -2
  60. package/package.json +1 -1
  61. package/schemas/bellwether-check.schema.json +185 -0
  62. package/schemas/bellwether-explore.schema.json +837 -0
  63. package/scripts/completions/bellwether.bash +10 -4
  64. package/scripts/completions/bellwether.zsh +55 -2
@@ -24,6 +24,26 @@ function getPrimaryType(schema) {
24
24
  * Used when generating array items or object properties.
25
25
  */
26
26
  function generateDefaultValueForSchema(schema, fixtures) {
27
+ // Prefer conditional schema branches if present
28
+ if (schema.if && schema.then) {
29
+ const merged = mergeSchemas(schema, schema.then);
30
+ return generateDefaultValueForSchema(merged, fixtures);
31
+ }
32
+ if (schema.if && schema.else) {
33
+ const merged = mergeSchemas(schema, schema.else);
34
+ return generateDefaultValueForSchema(merged, fixtures);
35
+ }
36
+ // Handle compositional schemas by selecting a representative variant
37
+ if (schema.oneOf && schema.oneOf.length > 0) {
38
+ return generateDefaultValueForSchema(schema.oneOf[0], fixtures);
39
+ }
40
+ if (schema.anyOf && schema.anyOf.length > 0) {
41
+ return generateDefaultValueForSchema(schema.anyOf[0], fixtures);
42
+ }
43
+ if (schema.allOf && schema.allOf.length > 0) {
44
+ const merged = mergeAllOfSchemas(schema.allOf);
45
+ return generateDefaultValueForSchema(merged, fixtures);
46
+ }
27
47
  const type = getPrimaryType(schema);
28
48
  // Use schema example if available
29
49
  if (schema.examples && schema.examples.length > 0) {
@@ -61,6 +81,27 @@ function generateDefaultValueForSchema(schema, fixtures) {
61
81
  return 'test';
62
82
  }
63
83
  }
84
+ /**
85
+ * Merge a base schema with an override schema (used for if/then/else).
86
+ * Only merges a safe subset of fields used by the test generator.
87
+ */
88
+ function mergeSchemas(base, override) {
89
+ return {
90
+ ...base,
91
+ ...override,
92
+ properties: {
93
+ ...(base.properties ?? {}),
94
+ ...(override.properties ?? {}),
95
+ },
96
+ required: Array.from(new Set([...(base.required ?? []), ...(override.required ?? [])])),
97
+ };
98
+ }
99
+ /**
100
+ * Merge allOf schemas into a single schema (best-effort).
101
+ */
102
+ function mergeAllOfSchemas(schemas) {
103
+ return schemas.reduce((acc, schema) => mergeSchemas(acc, schema), {});
104
+ }
64
105
  /**
65
106
  * Generate a smart string value for a schema without property name context.
66
107
  * Used for nested array items where we don't have a property name.
@@ -129,6 +170,7 @@ function generateMinimalObject(schema, fixtures) {
129
170
  const result = {};
130
171
  const requiredProps = schema.required ?? [];
131
172
  const properties = schema.properties ?? {};
173
+ const patternProperties = schema.patternProperties ?? {};
132
174
  // Only populate required properties
133
175
  for (const propName of requiredProps) {
134
176
  const propSchema = properties[propName];
@@ -141,8 +183,34 @@ function generateMinimalObject(schema, fixtures) {
141
183
  result[propName] = 'test';
142
184
  }
143
185
  }
186
+ // If there are no required properties but patternProperties exist, add one matching key
187
+ if (requiredProps.length === 0 &&
188
+ Object.keys(result).length === 0 &&
189
+ Object.keys(patternProperties).length > 0) {
190
+ const [pattern, propSchema] = Object.entries(patternProperties)[0];
191
+ const key = generateKeyForPattern(pattern);
192
+ result[key] = generateDefaultValueForPropertySchema(key, propSchema, fixtures);
193
+ }
144
194
  return result;
145
195
  }
196
+ /**
197
+ * Generate a key that matches a regex pattern (best-effort).
198
+ */
199
+ function generateKeyForPattern(pattern) {
200
+ try {
201
+ const regex = new RegExp(pattern);
202
+ const candidates = ['test', 'key', 'value', 'item', 'prop'];
203
+ for (const candidate of candidates) {
204
+ if (regex.test(candidate)) {
205
+ return candidate;
206
+ }
207
+ }
208
+ }
209
+ catch {
210
+ // Ignore invalid regex - fall back to a generic key
211
+ }
212
+ return 'test';
213
+ }
146
214
  /**
147
215
  * Generate an array with a specific number of items based on the item schema.
148
216
  * Used for boundary testing at minItems/maxItems limits.
@@ -169,6 +237,23 @@ function generateDefaultValueForPropertySchema(propName, schema, fixtures) {
169
237
  if (fixtureValue !== undefined) {
170
238
  return fixtureValue;
171
239
  }
240
+ // Prefer conditional branches
241
+ if (schema.if && schema.then) {
242
+ return generateDefaultValueForPropertySchema(propName, mergeSchemas(schema, schema.then), fixtures);
243
+ }
244
+ if (schema.if && schema.else) {
245
+ return generateDefaultValueForPropertySchema(propName, mergeSchemas(schema, schema.else), fixtures);
246
+ }
247
+ // Handle compositional schemas
248
+ if (schema.oneOf && schema.oneOf.length > 0) {
249
+ return generateDefaultValueForPropertySchema(propName, schema.oneOf[0], fixtures);
250
+ }
251
+ if (schema.anyOf && schema.anyOf.length > 0) {
252
+ return generateDefaultValueForPropertySchema(propName, schema.anyOf[0], fixtures);
253
+ }
254
+ if (schema.allOf && schema.allOf.length > 0) {
255
+ return generateDefaultValueForPropertySchema(propName, mergeAllOfSchemas(schema.allOf), fixtures);
256
+ }
172
257
  const type = getPrimaryType(schema);
173
258
  // Use schema example if available
174
259
  if (schema.examples && schema.examples.length > 0) {
@@ -294,14 +379,34 @@ function generateDefaultValue(propName, prop, fixtures) {
294
379
  */
295
380
  const DATE_FORMAT_PATTERNS = [
296
381
  // ISO 8601 date patterns
297
- { pattern: /YYYY-MM-DD|ISO\s*8601\s*date|date.*format.*YYYY/i, value: '2024-01-15', formatName: 'ISO 8601 date' },
382
+ {
383
+ pattern: /YYYY-MM-DD|ISO\s*8601\s*date|date.*format.*YYYY/i,
384
+ value: '2024-01-15',
385
+ formatName: 'ISO 8601 date',
386
+ },
298
387
  { pattern: /YYYY-MM|year-month|month.*format/i, value: '2024-01', formatName: 'year-month' },
299
- { pattern: /ISO\s*8601\s*(datetime|timestamp)|datetime.*format|timestamp.*ISO/i, value: '2024-01-15T14:30:00Z', formatName: 'ISO 8601 datetime' },
388
+ {
389
+ pattern: /ISO\s*8601\s*(datetime|timestamp)|datetime.*format|timestamp.*ISO/i,
390
+ value: '2024-01-15T14:30:00Z',
391
+ formatName: 'ISO 8601 datetime',
392
+ },
300
393
  // Unix timestamp patterns
301
- { pattern: /unix\s*timestamp|epoch\s*time|seconds\s*since/i, value: '1705330200', formatName: 'Unix timestamp' },
302
- { pattern: /milliseconds?\s*(since|timestamp)|ms\s*timestamp/i, value: '1705330200000', formatName: 'Unix timestamp (ms)' },
394
+ {
395
+ pattern: /unix\s*timestamp|epoch\s*time|seconds\s*since/i,
396
+ value: '1705330200',
397
+ formatName: 'Unix timestamp',
398
+ },
399
+ {
400
+ pattern: /milliseconds?\s*(since|timestamp)|ms\s*timestamp/i,
401
+ value: '1705330200000',
402
+ formatName: 'Unix timestamp (ms)',
403
+ },
303
404
  // Time patterns
304
- { pattern: /HH:MM:SS|time.*format.*HH|24.hour.*time/i, value: '14:30:00', formatName: '24-hour time' },
405
+ {
406
+ pattern: /HH:MM:SS|time.*format.*HH|24.hour.*time/i,
407
+ value: '14:30:00',
408
+ formatName: '24-hour time',
409
+ },
305
410
  { pattern: /HH:MM|hour.*minute/i, value: '14:30', formatName: 'hour:minute' },
306
411
  // Other date formats
307
412
  { pattern: /MM\/DD\/YYYY|US\s*date/i, value: '01/15/2024', formatName: 'US date' },
@@ -317,7 +422,11 @@ const SEMANTIC_FORMAT_PATTERNS = [
317
422
  // Phone patterns
318
423
  { pattern: /phone.*number|telephone/i, value: '+1-555-123-4567', formatName: 'phone' },
319
424
  // UUID patterns
320
- { pattern: /UUID|unique.*identifier/i, value: '550e8400-e29b-41d4-a716-446655440000', formatName: 'UUID' },
425
+ {
426
+ pattern: /UUID|unique.*identifier/i,
427
+ value: '550e8400-e29b-41d4-a716-446655440000',
428
+ formatName: 'UUID',
429
+ },
321
430
  // IP address patterns
322
431
  { pattern: /IP.*address|IPv4/i, value: '192.168.1.100', formatName: 'IP address' },
323
432
  // JSON patterns
@@ -382,12 +491,16 @@ function generateSmartStringValue(propName, prop) {
382
491
  if (lowerName.includes('email') || description.includes('email')) {
383
492
  return 'test@example.com';
384
493
  }
385
- if (lowerName.includes('url') || lowerName.includes('uri') ||
386
- description.includes('url') || description.includes('uri')) {
494
+ if (lowerName.includes('url') ||
495
+ lowerName.includes('uri') ||
496
+ description.includes('url') ||
497
+ description.includes('uri')) {
387
498
  return 'https://example.com';
388
499
  }
389
- if (lowerName.includes('path') || lowerName.includes('directory') ||
390
- lowerName.includes('dir') || description.includes('path')) {
500
+ if (lowerName.includes('path') ||
501
+ lowerName.includes('directory') ||
502
+ lowerName.includes('dir') ||
503
+ description.includes('path')) {
391
504
  return '/tmp/test';
392
505
  }
393
506
  if (lowerName.includes('id') || description.includes('identifier')) {
@@ -608,7 +721,7 @@ function detectOperationBasedPattern(properties) {
608
721
  * @returns Detection result with reason if self-stateful
609
722
  */
610
723
  function detectSelfStatefulPattern(toolName, toolDescription, properties, requiredParams) {
611
- const { DESCRIPTION_PATTERNS, STATE_PARAM_PATTERNS, STATEFUL_TOOL_NAME_PATTERNS, } = SELF_STATEFUL_DETECTION;
724
+ const { DESCRIPTION_PATTERNS, STATE_PARAM_PATTERNS, STATEFUL_TOOL_NAME_PATTERNS } = SELF_STATEFUL_DETECTION;
612
725
  // Check description for state dependency patterns
613
726
  if (toolDescription) {
614
727
  for (const pattern of DESCRIPTION_PATTERNS) {
@@ -689,7 +802,7 @@ function getSchemaDepth(schema, currentDepth = 0) {
689
802
  * @returns Detection result with complex array parameter names
690
803
  */
691
804
  function detectComplexArraySchema(properties) {
692
- const { MAX_SIMPLE_DEPTH, MIN_REQUIRED_PROPERTIES, STRUCTURED_DATA_PATTERNS, } = COMPLEX_SCHEMA_DETECTION;
805
+ const { MAX_SIMPLE_DEPTH, MIN_REQUIRED_PROPERTIES, STRUCTURED_DATA_PATTERNS } = COMPLEX_SCHEMA_DETECTION;
693
806
  const complexParams = [];
694
807
  for (const [paramName, prop] of Object.entries(properties)) {
695
808
  if (prop.type !== 'array' || !prop.items)
@@ -764,18 +877,14 @@ function detectFalsePositivePatterns(toolName, toolDescription, properties, requ
764
877
  * For operation-based, self-stateful, or complex array tools: Tests use 'either' outcome
765
878
  * since we cannot reliably predict success for these patterns.
766
879
  */
767
- function generateHappyPathTests(toolName, toolDescription, properties, requiredParams, fixtures) {
880
+ function generateHappyPathTests(toolName, toolDescription, schema, properties, requiredParams, fixtures) {
768
881
  const questions = [];
769
882
  const { CATEGORY_DESCRIPTIONS } = SCHEMA_TESTING;
770
883
  // Detect all patterns that commonly cause false positives
771
884
  const detection = detectFalsePositivePatterns(toolName, toolDescription, properties, requiredParams);
772
885
  // Determine if we need to use 'either' outcome due to detected patterns
773
- const needsFlexibleOutcome = detection.isOperationBased ||
774
- detection.isSelfStateful ||
775
- detection.hasComplexArrays;
776
- const happyPathOutcome = needsFlexibleOutcome
777
- ? 'either'
778
- : 'success';
886
+ const needsFlexibleOutcome = detection.isOperationBased || detection.isSelfStateful || detection.hasComplexArrays;
887
+ const happyPathOutcome = needsFlexibleOutcome ? 'either' : 'success';
779
888
  // Build suffix string for test descriptions
780
889
  const suffixes = [];
781
890
  if (detection.isOperationBased)
@@ -847,8 +956,129 @@ function generateHappyPathTests(toolName, toolDescription, properties, requiredP
847
956
  metadata: buildMetadata(),
848
957
  });
849
958
  }
959
+ // Conditional schema tests (if/then/else)
960
+ if (schema && questions.length < SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY) {
961
+ const conditionalTests = generateConditionalHappyPathTests(schema, properties, requiredParams, fixtures, happyPathOutcome, buildMetadata());
962
+ for (const test of conditionalTests) {
963
+ if (questions.length >= SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY)
964
+ break;
965
+ addQuestion(questions, test);
966
+ }
967
+ }
968
+ // Variant coverage for oneOf/anyOf
969
+ if (questions.length < SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY) {
970
+ const variantTests = generateVariantHappyPathTests(properties, requiredParams, fixtures, happyPathOutcome, buildMetadata());
971
+ for (const test of variantTests) {
972
+ if (questions.length >= SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY)
973
+ break;
974
+ addQuestion(questions, test);
975
+ }
976
+ }
850
977
  return questions.slice(0, SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY);
851
978
  }
979
+ /**
980
+ * Generate happy path tests for conditional schemas (if/then/else).
981
+ * Best-effort: only handles const/enum conditions.
982
+ */
983
+ function generateConditionalHappyPathTests(schema, properties, requiredParams, fixtures, expectedOutcome, metadata) {
984
+ const tests = [];
985
+ const { CATEGORY_DESCRIPTIONS } = SCHEMA_TESTING;
986
+ if (!schema.if) {
987
+ return tests;
988
+ }
989
+ const ifArgs = buildArgsForCondition(schema.if, fixtures);
990
+ if (Object.keys(ifArgs).length > 0 && schema.then) {
991
+ const thenSchema = mergeSchemas(schema, schema.then);
992
+ const thenArgs = buildBaseArgs(thenSchema.properties ?? properties, thenSchema.required ?? requiredParams, fixtures);
993
+ tests.push({
994
+ description: `${CATEGORY_DESCRIPTIONS.HAPPY_PATH}: conditional (if/then)`,
995
+ category: 'happy_path',
996
+ args: { ...thenArgs, ...ifArgs },
997
+ expectedOutcome,
998
+ metadata,
999
+ });
1000
+ }
1001
+ if (Object.keys(ifArgs).length > 0 && schema.else) {
1002
+ const elseSchema = mergeSchemas(schema, schema.else);
1003
+ const elseArgs = buildBaseArgs(elseSchema.properties ?? properties, elseSchema.required ?? requiredParams, fixtures);
1004
+ const invertedArgs = invertConditionArgs(ifArgs);
1005
+ tests.push({
1006
+ description: `${CATEGORY_DESCRIPTIONS.HAPPY_PATH}: conditional (if/else)`,
1007
+ category: 'happy_path',
1008
+ args: { ...elseArgs, ...invertedArgs },
1009
+ expectedOutcome,
1010
+ metadata,
1011
+ });
1012
+ }
1013
+ return tests;
1014
+ }
1015
+ /**
1016
+ * Generate additional happy path tests for oneOf/anyOf variants.
1017
+ */
1018
+ function generateVariantHappyPathTests(properties, requiredParams, fixtures, expectedOutcome, metadata) {
1019
+ const tests = [];
1020
+ const { CATEGORY_DESCRIPTIONS } = SCHEMA_TESTING;
1021
+ for (const [propName, prop] of Object.entries(properties)) {
1022
+ const variants = prop.oneOf ?? prop.anyOf;
1023
+ if (!variants || variants.length < 2) {
1024
+ continue;
1025
+ }
1026
+ const baseArgs = buildBaseArgs(properties, requiredParams, fixtures);
1027
+ const variant = variants[1];
1028
+ baseArgs[propName] = generateDefaultValueForPropertySchema(propName, variant, fixtures);
1029
+ tests.push({
1030
+ description: `${CATEGORY_DESCRIPTIONS.HAPPY_PATH}: variant input for "${propName}"`,
1031
+ category: 'happy_path',
1032
+ args: baseArgs,
1033
+ expectedOutcome,
1034
+ metadata,
1035
+ });
1036
+ if (tests.length >= 2) {
1037
+ break;
1038
+ }
1039
+ }
1040
+ return tests;
1041
+ }
1042
+ /**
1043
+ * Build args to satisfy a conditional schema (if).
1044
+ */
1045
+ function buildArgsForCondition(condition, fixtures) {
1046
+ const args = {};
1047
+ const props = condition.properties ?? {};
1048
+ for (const [name, prop] of Object.entries(props)) {
1049
+ if (prop.const !== undefined) {
1050
+ args[name] = prop.const;
1051
+ continue;
1052
+ }
1053
+ if (prop.enum && prop.enum.length > 0) {
1054
+ args[name] = prop.enum[0];
1055
+ continue;
1056
+ }
1057
+ args[name] = generateDefaultValueForPropertySchema(name, prop, fixtures);
1058
+ }
1059
+ return args;
1060
+ }
1061
+ /**
1062
+ * Invert simple condition arguments to trigger else branch.
1063
+ */
1064
+ function invertConditionArgs(args) {
1065
+ const inverted = {};
1066
+ for (const [key, value] of Object.entries(args)) {
1067
+ if (typeof value === 'boolean') {
1068
+ inverted[key] = !value;
1069
+ }
1070
+ else if (typeof value === 'number') {
1071
+ inverted[key] = value + 1;
1072
+ }
1073
+ else if (typeof value === 'string') {
1074
+ inverted[key] = `${value}_alt`;
1075
+ }
1076
+ else {
1077
+ inverted[key] = value;
1078
+ }
1079
+ }
1080
+ return inverted;
1081
+ }
852
1082
  /**
853
1083
  * Generate boundary value tests.
854
1084
  * Tests edge cases like empty strings, zero, large numbers.
@@ -874,7 +1104,8 @@ function generateBoundaryTests(properties, requiredParams, fixtures) {
874
1104
  });
875
1105
  }
876
1106
  // Test long string if no maxLength
877
- if (prop.maxLength === undefined && questions.length < SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY) {
1107
+ if (prop.maxLength === undefined &&
1108
+ questions.length < SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY) {
878
1109
  const longString = 'x'.repeat(BOUNDARY_VALUES.LONG_STRING_LENGTH);
879
1110
  addQuestion(questions, {
880
1111
  description: `${CATEGORY_DESCRIPTIONS.BOUNDARY}: long string for "${propName}"`,
@@ -1067,7 +1298,9 @@ function generateArrayTests(properties, requiredParams, fixtures) {
1067
1298
  });
1068
1299
  }
1069
1300
  // Test with exact maxItems (if defined and reasonable)
1070
- if (maxItems !== undefined && maxItems <= ARRAY_TESTS.MANY_ITEMS_COUNT && questions.length < SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY) {
1301
+ if (maxItems !== undefined &&
1302
+ maxItems <= ARRAY_TESTS.MANY_ITEMS_COUNT &&
1303
+ questions.length < SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY) {
1071
1304
  const maxItemsArray = generateArrayItems(prop.items, maxItems);
1072
1305
  addQuestion(questions, {
1073
1306
  description: `${CATEGORY_DESCRIPTIONS.ARRAY_HANDLING}: exact maxItems (${maxItems}) for "${propName}"`,
@@ -1124,7 +1357,7 @@ function generateNullabilityTests(properties, requiredParams, fixtures) {
1124
1357
  * Tests that required parameters are properly validated.
1125
1358
  * All error handling tests expect error - tool should reject missing required params.
1126
1359
  */
1127
- function generateErrorHandlingTests(properties, requiredParams, fixtures) {
1360
+ function generateErrorHandlingTests(schema, properties, requiredParams, fixtures) {
1128
1361
  const questions = [];
1129
1362
  const { CATEGORY_DESCRIPTIONS } = SCHEMA_TESTING;
1130
1363
  // Test missing all required params
@@ -1149,6 +1382,24 @@ function generateErrorHandlingTests(properties, requiredParams, fixtures) {
1149
1382
  expectedOutcome: 'error',
1150
1383
  });
1151
1384
  }
1385
+ // Test dependentRequired constraints
1386
+ const dependent = schema?.dependentRequired ?? {};
1387
+ for (const [prop, deps] of Object.entries(dependent)) {
1388
+ if (questions.length >= SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY)
1389
+ break;
1390
+ if (deps.length === 0)
1391
+ continue;
1392
+ const args = buildBaseArgs(properties, requiredParams, fixtures);
1393
+ args[prop] = args[prop] ?? generateDefaultValue(prop, properties[prop] ?? {}, fixtures);
1394
+ // Remove one dependency to trigger validation error
1395
+ delete args[deps[0]];
1396
+ addQuestion(questions, {
1397
+ description: `${CATEGORY_DESCRIPTIONS.MISSING_REQUIRED}: dependent "${deps[0]}" missing for "${prop}"`,
1398
+ category: 'error_handling',
1399
+ args,
1400
+ expectedOutcome: 'error',
1401
+ });
1402
+ }
1152
1403
  return questions;
1153
1404
  }
1154
1405
  // ==================== Varied Tests for Simple Tools ====================
@@ -1165,12 +1416,8 @@ function generateVariedTestsForSimpleTools(toolName, toolDescription, properties
1165
1416
  const { CATEGORY_DESCRIPTIONS } = SCHEMA_TESTING;
1166
1417
  // Detect patterns that affect expected outcome
1167
1418
  const detection = detectFalsePositivePatterns(toolName, toolDescription, properties, requiredParams);
1168
- const needsFlexibleOutcome = detection.isOperationBased ||
1169
- detection.isSelfStateful ||
1170
- detection.hasComplexArrays;
1171
- const variedTestOutcome = needsFlexibleOutcome
1172
- ? 'either'
1173
- : 'success';
1419
+ const needsFlexibleOutcome = detection.isOperationBased || detection.isSelfStateful || detection.hasComplexArrays;
1420
+ const variedTestOutcome = needsFlexibleOutcome ? 'either' : 'success';
1174
1421
  // Build metadata for varied tests if patterns detected
1175
1422
  const variedMetadata = needsFlexibleOutcome
1176
1423
  ? {
@@ -1190,7 +1437,7 @@ function generateVariedTestsForSimpleTools(toolName, toolDescription, properties
1190
1437
  }
1191
1438
  : undefined;
1192
1439
  // Get existing arg signatures to avoid duplicates
1193
- const existingArgSignatures = new Set(existingQuestions.map(q => JSON.stringify(q.args)));
1440
+ const existingArgSignatures = new Set(existingQuestions.map((q) => JSON.stringify(q.args)));
1194
1441
  // Variation strategies for simple/no-param tools
1195
1442
  const variationStrategies = [];
1196
1443
  // Strategy 1: Different timing contexts (useful for stateful tools)
@@ -1425,7 +1672,7 @@ export function generateSchemaTestsWithInferences(tool, options = {}) {
1425
1672
  const properties = schema?.properties ?? {};
1426
1673
  const requiredParams = (schema?.required ?? []);
1427
1674
  // 1. Happy Path Tests (always included)
1428
- questions.push(...generateHappyPathTests(tool.name, tool.description, properties, requiredParams, fixtures));
1675
+ questions.push(...generateHappyPathTests(tool.name, tool.description, schema, properties, requiredParams, fixtures));
1429
1676
  // 2. Boundary Value Tests
1430
1677
  questions.push(...generateBoundaryTests(properties, requiredParams, fixtures));
1431
1678
  // 3. Type Coercion Tests (unless skipping error tests)
@@ -1442,7 +1689,7 @@ export function generateSchemaTestsWithInferences(tool, options = {}) {
1442
1689
  questions.push(...generateNullabilityTests(properties, requiredParams, fixtures));
1443
1690
  // 7. Error Handling Tests (unless skipped)
1444
1691
  if (!options.skipErrorTests) {
1445
- questions.push(...generateErrorHandlingTests(properties, requiredParams, fixtures));
1692
+ questions.push(...generateErrorHandlingTests(schema, properties, requiredParams, fixtures));
1446
1693
  }
1447
1694
  // 8. Semantic Validation Tests (unless skipped)
1448
1695
  let semanticInferences = [];
@@ -6,6 +6,8 @@ import type { InterviewQuestion } from './types.js';
6
6
  export declare class StatefulTestRunner {
7
7
  private options;
8
8
  private values;
9
+ private jsonPathValues;
10
+ private recentResponses;
9
11
  constructor(options: {
10
12
  shareOutputs: boolean;
11
13
  });
@@ -15,5 +17,6 @@ export declare class StatefulTestRunner {
15
17
  };
16
18
  recordResponse(tool: MCPTool, response: MCPToolCallResult | null): string[];
17
19
  private findMatchingValue;
20
+ private findByJsonPath;
18
21
  }
19
22
  //# sourceMappingURL=stateful-test-runner.d.ts.map
@@ -1,11 +1,14 @@
1
1
  import { STATEFUL_TESTING } from '../constants.js';
2
2
  import { extractTextContent } from './schema-inferrer.js';
3
+ import { getValueAtPath } from '../utils/jsonpath.js';
3
4
  /**
4
5
  * Maintains shared state between tool calls for stateful testing.
5
6
  */
6
7
  export class StatefulTestRunner {
7
8
  options;
8
9
  values = new Map();
10
+ jsonPathValues = new Map();
11
+ recentResponses = [];
9
12
  constructor(options) {
10
13
  this.options = options;
11
14
  }
@@ -48,21 +51,69 @@ export class StatefulTestRunner {
48
51
  this.values.set(key, { value, sourceTool: tool.name });
49
52
  providedKeys.push(key);
50
53
  }
54
+ // Record JSONPath values for richer mapping
55
+ const pathValues = collectJsonPaths(parsed);
56
+ for (const [path, value] of Object.entries(pathValues)) {
57
+ if (this.jsonPathValues.size >= STATEFUL_TESTING.MAX_STORED_VALUES) {
58
+ break;
59
+ }
60
+ this.jsonPathValues.set(path, { value, sourceTool: tool.name });
61
+ }
62
+ // Keep a bounded list of recent responses for direct JSONPath lookup
63
+ this.recentResponses.unshift({ value: parsed, sourceTool: tool.name });
64
+ if (this.recentResponses.length > STATEFUL_TESTING.MAX_STORED_VALUES) {
65
+ this.recentResponses.pop();
66
+ }
51
67
  return providedKeys;
52
68
  }
53
69
  findMatchingValue(paramName) {
70
+ if (looksLikeJsonPath(paramName)) {
71
+ const direct = this.findByJsonPath(paramName);
72
+ if (direct)
73
+ return direct;
74
+ }
54
75
  const normalizedParam = normalizeKey(paramName);
55
76
  for (const [key, value] of this.values.entries()) {
56
77
  if (normalizeKey(key) === normalizedParam) {
57
78
  return value;
58
79
  }
59
80
  }
81
+ for (const [key, value] of this.jsonPathValues.entries()) {
82
+ if (normalizeKey(key) === normalizedParam) {
83
+ return value;
84
+ }
85
+ }
60
86
  for (const [key, value] of this.values.entries()) {
61
87
  const normalizedKey = normalizeKey(key);
62
88
  if (normalizedKey.endsWith(normalizedParam)) {
63
89
  return value;
64
90
  }
65
91
  }
92
+ for (const [key, value] of this.jsonPathValues.entries()) {
93
+ const normalizedKey = normalizeKey(key);
94
+ if (normalizedKey.endsWith(normalizedParam)) {
95
+ return value;
96
+ }
97
+ }
98
+ if (!looksLikeJsonPath(paramName)) {
99
+ const pathMatch = this.findByJsonPath(`$.${paramName}`);
100
+ if (pathMatch)
101
+ return pathMatch;
102
+ }
103
+ return null;
104
+ }
105
+ findByJsonPath(path) {
106
+ const normalized = path.startsWith('$') ? path : `$.${path}`;
107
+ const stored = this.jsonPathValues.get(normalized);
108
+ if (stored) {
109
+ return stored;
110
+ }
111
+ for (const entry of this.recentResponses) {
112
+ const value = getValueAtPath(entry.value, normalized);
113
+ if (value !== undefined) {
114
+ return { value, sourceTool: entry.sourceTool };
115
+ }
116
+ }
66
117
  return null;
67
118
  }
68
119
  }
@@ -103,4 +154,33 @@ function flattenValue(value, prefix = '') {
103
154
  }
104
155
  return result;
105
156
  }
157
+ function looksLikeJsonPath(value) {
158
+ return value.startsWith('$') || value.includes('.') || value.includes('[');
159
+ }
160
+ function collectJsonPaths(value, path = '$', depth = 0, result = {}) {
161
+ if (depth > 4 || value === null || value === undefined) {
162
+ return result;
163
+ }
164
+ if (Array.isArray(value)) {
165
+ const sample = value.slice(0, 3);
166
+ sample.forEach((item, index) => {
167
+ collectJsonPaths(item, `${path}[${index}]`, depth + 1, result);
168
+ });
169
+ return result;
170
+ }
171
+ if (typeof value !== 'object') {
172
+ result[path] = value;
173
+ return result;
174
+ }
175
+ for (const [key, child] of Object.entries(value)) {
176
+ const childPath = `${path}.${key}`;
177
+ if (typeof child === 'object' && child !== null) {
178
+ collectJsonPaths(child, childPath, depth + 1, result);
179
+ }
180
+ else {
181
+ result[childPath] = child;
182
+ }
183
+ }
184
+ return result;
185
+ }
106
186
  //# sourceMappingURL=stateful-test-runner.js.map
@@ -1,6 +1,10 @@
1
1
  import type { DiscoveryResult } from '../discovery/types.js';
2
2
  import type { MCPToolCallResult, MCPPromptGetResult, MCPResourceReadResult } from '../transport/types.js';
3
3
  import type { InferredSchema } from '../baseline/response-fingerprint.js';
4
+ import type { ResponseSchemaEvolution } from '../baseline/response-schema-tracker.js';
5
+ import type { ErrorAnalysisSummary } from '../baseline/error-analyzer.js';
6
+ import type { DocumentationScore } from '../baseline/documentation-scorer.js';
7
+ import type { SemanticInference } from '../validation/semantic-types.js';
4
8
  import type { Persona, QuestionCategory } from '../persona/types.js';
5
9
  import type { Workflow, WorkflowResult, WorkflowTimeoutConfig } from '../workflow/types.js';
6
10
  import type { LoadedScenarios, ScenarioResult } from '../scenarios/types.js';
@@ -401,6 +405,14 @@ export interface InterviewResult {
401
405
  limitations: string[];
402
406
  /** Overall recommendations */
403
407
  recommendations: string[];
408
+ /** Semantic type inferences by tool */
409
+ semanticInferences?: Record<string, SemanticInference[]>;
410
+ /** Response schema evolution by tool */
411
+ schemaEvolution?: Record<string, ResponseSchemaEvolution>;
412
+ /** Enhanced error analysis summaries by tool */
413
+ errorAnalysisSummaries?: Record<string, ErrorAnalysisSummary>;
414
+ /** Documentation quality score */
415
+ documentationScore?: DocumentationScore;
404
416
  /** Interview metadata */
405
417
  metadata: InterviewMetadata;
406
418
  }
@@ -18,7 +18,7 @@ const FILTERED_ENV_VARS = new Set([
18
18
  'COHERE_API_KEY',
19
19
  'HUGGINGFACE_API_KEY',
20
20
  'REPLICATE_API_TOKEN',
21
- // Cloud provider credentials
21
+ // Provider credentials
22
22
  'AWS_SECRET_ACCESS_KEY',
23
23
  'AWS_SESSION_TOKEN',
24
24
  'AZURE_CLIENT_SECRET',
@@ -29,7 +29,7 @@ export interface SSETransportConfig extends BaseTransportConfig {
29
29
  * - POST {baseUrl}/message - Endpoint for sending messages
30
30
  */
31
31
  export declare class SSETransport extends BaseTransport {
32
- private eventSource;
32
+ private streamAbortController;
33
33
  private abortController;
34
34
  private connected;
35
35
  private reconnectAttempts;
@@ -55,6 +55,10 @@ export declare class SSETransport extends BaseTransport {
55
55
  * Handle an incoming SSE message.
56
56
  */
57
57
  private handleSSEMessage;
58
+ /**
59
+ * Stream and parse SSE events from a fetch response.
60
+ */
61
+ private readSSEStream;
58
62
  /**
59
63
  * Handle reconnection after a connection error.
60
64
  *
@@ -63,7 +67,7 @@ export declare class SSETransport extends BaseTransport {
63
67
  * - Uses capped exponential backoff
64
68
  * - Clears reconnect timer on close
65
69
  * - Checks isClosing flag to prevent reconnection after close()
66
- * - Explicitly closes EventSource on max attempts
70
+ * - Explicitly aborts SSE stream on max attempts
67
71
  */
68
72
  private handleReconnect;
69
73
  /**
@@ -74,7 +78,7 @@ export declare class SSETransport extends BaseTransport {
74
78
  * Close the SSE connection.
75
79
  *
76
80
  * RELIABILITY: Properly cleans up all resources including:
77
- * - EventSource connection
81
+ * - SSE stream connection
78
82
  * - Pending HTTP requests (via abort controller)
79
83
  * - Reconnection timer
80
84
  * - Sets isClosing flag to prevent reconnection attempts