npm - @dotsetlabs/bellwether - Versions diffs - 1.0.3 → 2.0.0 - Mend

@dotsetlabs/bellwether 1.0.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/CHANGELOG.md +74 -0
package/README.md +8 -2
package/dist/baseline/accessors.d.ts +1 -1
package/dist/baseline/accessors.js +1 -3
package/dist/baseline/baseline-format.d.ts +287 -0
package/dist/baseline/baseline-format.js +12 -0
package/dist/baseline/comparator.js +249 -11
package/dist/baseline/converter.d.ts +15 -15
package/dist/baseline/converter.js +46 -34
package/dist/baseline/diff.d.ts +1 -1
package/dist/baseline/diff.js +45 -28
package/dist/baseline/error-analyzer.d.ts +1 -1
package/dist/baseline/error-analyzer.js +90 -17
package/dist/baseline/incremental-checker.js +8 -5
package/dist/baseline/index.d.ts +2 -12
package/dist/baseline/index.js +3 -23
package/dist/baseline/performance-tracker.d.ts +0 -1
package/dist/baseline/performance-tracker.js +13 -20
package/dist/baseline/response-fingerprint.js +39 -2
package/dist/baseline/saver.js +41 -10
package/dist/baseline/schema-compare.d.ts +22 -0
package/dist/baseline/schema-compare.js +259 -16
package/dist/baseline/types.d.ts +10 -7
package/dist/cache/response-cache.d.ts +8 -0
package/dist/cache/response-cache.js +110 -0
package/dist/cli/commands/check.js +23 -6
package/dist/cli/commands/explore.js +34 -14
package/dist/cli/index.js +8 -0
package/dist/config/template.js +8 -7
package/dist/config/validator.d.ts +59 -59
package/dist/config/validator.js +245 -90
package/dist/constants/core.d.ts +4 -0
package/dist/constants/core.js +8 -19
package/dist/constants/registry.d.ts +17 -0
package/dist/constants/registry.js +18 -0
package/dist/constants/testing.d.ts +0 -369
package/dist/constants/testing.js +18 -456
package/dist/constants.d.ts +1 -1
package/dist/constants.js +1 -1
package/dist/docs/contract.js +131 -83
package/dist/docs/report.js +8 -5
package/dist/interview/insights.d.ts +17 -0
package/dist/interview/insights.js +52 -0
package/dist/interview/interviewer.js +52 -10
package/dist/interview/prompt-test-generator.d.ts +12 -0
package/dist/interview/prompt-test-generator.js +77 -0
package/dist/interview/resource-test-generator.d.ts +12 -0
package/dist/interview/resource-test-generator.js +20 -0
package/dist/interview/schema-inferrer.js +26 -4
package/dist/interview/schema-test-generator.js +278 -31
package/dist/interview/stateful-test-runner.d.ts +3 -0
package/dist/interview/stateful-test-runner.js +80 -0
package/dist/interview/types.d.ts +12 -0
package/dist/transport/mcp-client.js +1 -1
package/dist/transport/sse-transport.d.ts +7 -3
package/dist/transport/sse-transport.js +157 -67
package/dist/version.js +1 -1
package/man/bellwether.1 +1 -1
package/man/bellwether.1.md +2 -2
package/package.json +1 -1
package/schemas/bellwether-check.schema.json +185 -0
package/schemas/bellwether-explore.schema.json +837 -0
package/scripts/completions/bellwether.bash +10 -4
package/scripts/completions/bellwether.zsh +55 -2

package/dist/interview/schema-test-generator.js CHANGED Viewed

@@ -24,6 +24,26 @@ function getPrimaryType(schema) {
  * Used when generating array items or object properties.
  */
 function generateDefaultValueForSchema(schema, fixtures) {
+    // Prefer conditional schema branches if present
+    if (schema.if && schema.then) {
+        const merged = mergeSchemas(schema, schema.then);
+        return generateDefaultValueForSchema(merged, fixtures);
+    }
+    if (schema.if && schema.else) {
+        const merged = mergeSchemas(schema, schema.else);
+        return generateDefaultValueForSchema(merged, fixtures);
+    }
+    // Handle compositional schemas by selecting a representative variant
+    if (schema.oneOf && schema.oneOf.length > 0) {
+        return generateDefaultValueForSchema(schema.oneOf[0], fixtures);
+    }
+    if (schema.anyOf && schema.anyOf.length > 0) {
+        return generateDefaultValueForSchema(schema.anyOf[0], fixtures);
+    }
+    if (schema.allOf && schema.allOf.length > 0) {
+        const merged = mergeAllOfSchemas(schema.allOf);
+        return generateDefaultValueForSchema(merged, fixtures);
+    }
     const type = getPrimaryType(schema);
     // Use schema example if available
     if (schema.examples && schema.examples.length > 0) {
@@ -61,6 +81,27 @@ function generateDefaultValueForSchema(schema, fixtures) {
             return 'test';
     }
 }
+/**
+ * Merge a base schema with an override schema (used for if/then/else).
+ * Only merges a safe subset of fields used by the test generator.
+ */
+function mergeSchemas(base, override) {
+    return {
+        ...base,
+        ...override,
+        properties: {
+            ...(base.properties ?? {}),
+            ...(override.properties ?? {}),
+        },
+        required: Array.from(new Set([...(base.required ?? []), ...(override.required ?? [])])),
+    };
+}
+/**
+ * Merge allOf schemas into a single schema (best-effort).
+ */
+function mergeAllOfSchemas(schemas) {
+    return schemas.reduce((acc, schema) => mergeSchemas(acc, schema), {});
+}
 /**
  * Generate a smart string value for a schema without property name context.
  * Used for nested array items where we don't have a property name.
@@ -129,6 +170,7 @@ function generateMinimalObject(schema, fixtures) {
     const result = {};
     const requiredProps = schema.required ?? [];
     const properties = schema.properties ?? {};
+    const patternProperties = schema.patternProperties ?? {};
     // Only populate required properties
     for (const propName of requiredProps) {
         const propSchema = properties[propName];
@@ -141,8 +183,34 @@ function generateMinimalObject(schema, fixtures) {
             result[propName] = 'test';
         }
     }
+    // If there are no required properties but patternProperties exist, add one matching key
+    if (requiredProps.length === 0 &&
+        Object.keys(result).length === 0 &&
+        Object.keys(patternProperties).length > 0) {
+        const [pattern, propSchema] = Object.entries(patternProperties)[0];
+        const key = generateKeyForPattern(pattern);
+        result[key] = generateDefaultValueForPropertySchema(key, propSchema, fixtures);
+    }
     return result;
 }
+/**
+ * Generate a key that matches a regex pattern (best-effort).
+ */
+function generateKeyForPattern(pattern) {
+    try {
+        const regex = new RegExp(pattern);
+        const candidates = ['test', 'key', 'value', 'item', 'prop'];
+        for (const candidate of candidates) {
+            if (regex.test(candidate)) {
+                return candidate;
+            }
+        }
+    }
+    catch {
+        // Ignore invalid regex - fall back to a generic key
+    }
+    return 'test';
+}
 /**
  * Generate an array with a specific number of items based on the item schema.
  * Used for boundary testing at minItems/maxItems limits.
@@ -169,6 +237,23 @@ function generateDefaultValueForPropertySchema(propName, schema, fixtures) {
     if (fixtureValue !== undefined) {
         return fixtureValue;
     }
+    // Prefer conditional branches
+    if (schema.if && schema.then) {
+        return generateDefaultValueForPropertySchema(propName, mergeSchemas(schema, schema.then), fixtures);
+    }
+    if (schema.if && schema.else) {
+        return generateDefaultValueForPropertySchema(propName, mergeSchemas(schema, schema.else), fixtures);
+    }
+    // Handle compositional schemas
+    if (schema.oneOf && schema.oneOf.length > 0) {
+        return generateDefaultValueForPropertySchema(propName, schema.oneOf[0], fixtures);
+    }
+    if (schema.anyOf && schema.anyOf.length > 0) {
+        return generateDefaultValueForPropertySchema(propName, schema.anyOf[0], fixtures);
+    }
+    if (schema.allOf && schema.allOf.length > 0) {
+        return generateDefaultValueForPropertySchema(propName, mergeAllOfSchemas(schema.allOf), fixtures);
+    }
     const type = getPrimaryType(schema);
     // Use schema example if available
     if (schema.examples && schema.examples.length > 0) {
@@ -294,14 +379,34 @@ function generateDefaultValue(propName, prop, fixtures) {
  */
 const DATE_FORMAT_PATTERNS = [
     // ISO 8601 date patterns
-    { pattern: /YYYY-MM-DD|ISO\s*8601\s*date|date.*format.*YYYY/i, value: '2024-01-15', formatName: 'ISO 8601 date' },
+    {
+        pattern: /YYYY-MM-DD|ISO\s*8601\s*date|date.*format.*YYYY/i,
+        value: '2024-01-15',
+        formatName: 'ISO 8601 date',
+    },
     { pattern: /YYYY-MM|year-month|month.*format/i, value: '2024-01', formatName: 'year-month' },
-    { pattern: /ISO\s*8601\s*(datetime|timestamp)|datetime.*format|timestamp.*ISO/i, value: '2024-01-15T14:30:00Z', formatName: 'ISO 8601 datetime' },
+    {
+        pattern: /ISO\s*8601\s*(datetime|timestamp)|datetime.*format|timestamp.*ISO/i,
+        value: '2024-01-15T14:30:00Z',
+        formatName: 'ISO 8601 datetime',
+    },
     // Unix timestamp patterns
-    { pattern: /unix\s*timestamp|epoch\s*time|seconds\s*since/i, value: '1705330200', formatName: 'Unix timestamp' },
-    { pattern: /milliseconds?\s*(since|timestamp)|ms\s*timestamp/i, value: '1705330200000', formatName: 'Unix timestamp (ms)' },
+    {
+        pattern: /unix\s*timestamp|epoch\s*time|seconds\s*since/i,
+        value: '1705330200',
+        formatName: 'Unix timestamp',
+    },
+    {
+        pattern: /milliseconds?\s*(since|timestamp)|ms\s*timestamp/i,
+        value: '1705330200000',
+        formatName: 'Unix timestamp (ms)',
+    },
     // Time patterns
-    { pattern: /HH:MM:SS|time.*format.*HH|24.hour.*time/i, value: '14:30:00', formatName: '24-hour time' },
+    {
+        pattern: /HH:MM:SS|time.*format.*HH|24.hour.*time/i,
+        value: '14:30:00',
+        formatName: '24-hour time',
+    },
     { pattern: /HH:MM|hour.*minute/i, value: '14:30', formatName: 'hour:minute' },
     // Other date formats
     { pattern: /MM\/DD\/YYYY|US\s*date/i, value: '01/15/2024', formatName: 'US date' },
@@ -317,7 +422,11 @@ const SEMANTIC_FORMAT_PATTERNS = [
     // Phone patterns
     { pattern: /phone.*number|telephone/i, value: '+1-555-123-4567', formatName: 'phone' },
     // UUID patterns
-    { pattern: /UUID|unique.*identifier/i, value: '550e8400-e29b-41d4-a716-446655440000', formatName: 'UUID' },
+    {
+        pattern: /UUID|unique.*identifier/i,
+        value: '550e8400-e29b-41d4-a716-446655440000',
+        formatName: 'UUID',
+    },
     // IP address patterns
     { pattern: /IP.*address|IPv4/i, value: '192.168.1.100', formatName: 'IP address' },
     // JSON patterns
@@ -382,12 +491,16 @@ function generateSmartStringValue(propName, prop) {
     if (lowerName.includes('email') || description.includes('email')) {
         return 'test@example.com';
     }
-    if (lowerName.includes('url') || lowerName.includes('uri') ||
-        description.includes('url') || description.includes('uri')) {
+    if (lowerName.includes('url') ||
+        lowerName.includes('uri') ||
+        description.includes('url') ||
+        description.includes('uri')) {
         return 'https://example.com';
     }
-    if (lowerName.includes('path') || lowerName.includes('directory') ||
-        lowerName.includes('dir') || description.includes('path')) {
+    if (lowerName.includes('path') ||
+        lowerName.includes('directory') ||
+        lowerName.includes('dir') ||
+        description.includes('path')) {
         return '/tmp/test';
     }
     if (lowerName.includes('id') || description.includes('identifier')) {
@@ -608,7 +721,7 @@ function detectOperationBasedPattern(properties) {
  * @returns Detection result with reason if self-stateful
  */
 function detectSelfStatefulPattern(toolName, toolDescription, properties, requiredParams) {
-    const { DESCRIPTION_PATTERNS, STATE_PARAM_PATTERNS, STATEFUL_TOOL_NAME_PATTERNS, } = SELF_STATEFUL_DETECTION;
+    const { DESCRIPTION_PATTERNS, STATE_PARAM_PATTERNS, STATEFUL_TOOL_NAME_PATTERNS } = SELF_STATEFUL_DETECTION;
     // Check description for state dependency patterns
     if (toolDescription) {
         for (const pattern of DESCRIPTION_PATTERNS) {
@@ -689,7 +802,7 @@ function getSchemaDepth(schema, currentDepth = 0) {
  * @returns Detection result with complex array parameter names
  */
 function detectComplexArraySchema(properties) {
-    const { MAX_SIMPLE_DEPTH, MIN_REQUIRED_PROPERTIES, STRUCTURED_DATA_PATTERNS, } = COMPLEX_SCHEMA_DETECTION;
+    const { MAX_SIMPLE_DEPTH, MIN_REQUIRED_PROPERTIES, STRUCTURED_DATA_PATTERNS } = COMPLEX_SCHEMA_DETECTION;
     const complexParams = [];
     for (const [paramName, prop] of Object.entries(properties)) {
         if (prop.type !== 'array' || !prop.items)
@@ -764,18 +877,14 @@ function detectFalsePositivePatterns(toolName, toolDescription, properties, requ
  * For operation-based, self-stateful, or complex array tools: Tests use 'either' outcome
  * since we cannot reliably predict success for these patterns.
  */
-function generateHappyPathTests(toolName, toolDescription, properties, requiredParams, fixtures) {
+function generateHappyPathTests(toolName, toolDescription, schema, properties, requiredParams, fixtures) {
     const questions = [];
     const { CATEGORY_DESCRIPTIONS } = SCHEMA_TESTING;
     // Detect all patterns that commonly cause false positives
     const detection = detectFalsePositivePatterns(toolName, toolDescription, properties, requiredParams);
     // Determine if we need to use 'either' outcome due to detected patterns
-    const needsFlexibleOutcome = detection.isOperationBased ||
-        detection.isSelfStateful ||
-        detection.hasComplexArrays;
-    const happyPathOutcome = needsFlexibleOutcome
-        ? 'either'
-        : 'success';
+    const needsFlexibleOutcome = detection.isOperationBased || detection.isSelfStateful || detection.hasComplexArrays;
+    const happyPathOutcome = needsFlexibleOutcome ? 'either' : 'success';
     // Build suffix string for test descriptions
     const suffixes = [];
     if (detection.isOperationBased)
@@ -847,8 +956,129 @@ function generateHappyPathTests(toolName, toolDescription, properties, requiredP
             metadata: buildMetadata(),
         });
     }
+    // Conditional schema tests (if/then/else)
+    if (schema && questions.length < SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY) {
+        const conditionalTests = generateConditionalHappyPathTests(schema, properties, requiredParams, fixtures, happyPathOutcome, buildMetadata());
+        for (const test of conditionalTests) {
+            if (questions.length >= SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY)
+                break;
+            addQuestion(questions, test);
+        }
+    }
+    // Variant coverage for oneOf/anyOf
+    if (questions.length < SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY) {
+        const variantTests = generateVariantHappyPathTests(properties, requiredParams, fixtures, happyPathOutcome, buildMetadata());
+        for (const test of variantTests) {
+            if (questions.length >= SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY)
+                break;
+            addQuestion(questions, test);
+        }
+    }
     return questions.slice(0, SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY);
 }
+/**
+ * Generate happy path tests for conditional schemas (if/then/else).
+ * Best-effort: only handles const/enum conditions.
+ */
+function generateConditionalHappyPathTests(schema, properties, requiredParams, fixtures, expectedOutcome, metadata) {
+    const tests = [];
+    const { CATEGORY_DESCRIPTIONS } = SCHEMA_TESTING;
+    if (!schema.if) {
+        return tests;
+    }
+    const ifArgs = buildArgsForCondition(schema.if, fixtures);
+    if (Object.keys(ifArgs).length > 0 && schema.then) {
+        const thenSchema = mergeSchemas(schema, schema.then);
+        const thenArgs = buildBaseArgs(thenSchema.properties ?? properties, thenSchema.required ?? requiredParams, fixtures);
+        tests.push({
+            description: `${CATEGORY_DESCRIPTIONS.HAPPY_PATH}: conditional (if/then)`,
+            category: 'happy_path',
+            args: { ...thenArgs, ...ifArgs },
+            expectedOutcome,
+            metadata,
+        });
+    }
+    if (Object.keys(ifArgs).length > 0 && schema.else) {
+        const elseSchema = mergeSchemas(schema, schema.else);
+        const elseArgs = buildBaseArgs(elseSchema.properties ?? properties, elseSchema.required ?? requiredParams, fixtures);
+        const invertedArgs = invertConditionArgs(ifArgs);
+        tests.push({
+            description: `${CATEGORY_DESCRIPTIONS.HAPPY_PATH}: conditional (if/else)`,
+            category: 'happy_path',
+            args: { ...elseArgs, ...invertedArgs },
+            expectedOutcome,
+            metadata,
+        });
+    }
+    return tests;
+}
+/**
+ * Generate additional happy path tests for oneOf/anyOf variants.
+ */
+function generateVariantHappyPathTests(properties, requiredParams, fixtures, expectedOutcome, metadata) {
+    const tests = [];
+    const { CATEGORY_DESCRIPTIONS } = SCHEMA_TESTING;
+    for (const [propName, prop] of Object.entries(properties)) {
+        const variants = prop.oneOf ?? prop.anyOf;
+        if (!variants || variants.length < 2) {
+            continue;
+        }
+        const baseArgs = buildBaseArgs(properties, requiredParams, fixtures);
+        const variant = variants[1];
+        baseArgs[propName] = generateDefaultValueForPropertySchema(propName, variant, fixtures);
+        tests.push({
+            description: `${CATEGORY_DESCRIPTIONS.HAPPY_PATH}: variant input for "${propName}"`,
+            category: 'happy_path',
+            args: baseArgs,
+            expectedOutcome,
+            metadata,
+        });
+        if (tests.length >= 2) {
+            break;
+        }
+    }
+    return tests;
+}
+/**
+ * Build args to satisfy a conditional schema (if).
+ */
+function buildArgsForCondition(condition, fixtures) {
+    const args = {};
+    const props = condition.properties ?? {};
+    for (const [name, prop] of Object.entries(props)) {
+        if (prop.const !== undefined) {
+            args[name] = prop.const;
+            continue;
+        }
+        if (prop.enum && prop.enum.length > 0) {
+            args[name] = prop.enum[0];
+            continue;
+        }
+        args[name] = generateDefaultValueForPropertySchema(name, prop, fixtures);
+    }
+    return args;
+}
+/**
+ * Invert simple condition arguments to trigger else branch.
+ */
+function invertConditionArgs(args) {
+    const inverted = {};
+    for (const [key, value] of Object.entries(args)) {
+        if (typeof value === 'boolean') {
+            inverted[key] = !value;
+        }
+        else if (typeof value === 'number') {
+            inverted[key] = value + 1;
+        }
+        else if (typeof value === 'string') {
+            inverted[key] = `${value}_alt`;
+        }
+        else {
+            inverted[key] = value;
+        }
+    }
+    return inverted;
+}
 /**
  * Generate boundary value tests.
  * Tests edge cases like empty strings, zero, large numbers.
@@ -874,7 +1104,8 @@ function generateBoundaryTests(properties, requiredParams, fixtures) {
                     });
                 }
                 // Test long string if no maxLength
-                if (prop.maxLength === undefined && questions.length < SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY) {
+                if (prop.maxLength === undefined &&
+                    questions.length < SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY) {
                     const longString = 'x'.repeat(BOUNDARY_VALUES.LONG_STRING_LENGTH);
                     addQuestion(questions, {
                         description: `${CATEGORY_DESCRIPTIONS.BOUNDARY}: long string for "${propName}"`,
@@ -1067,7 +1298,9 @@ function generateArrayTests(properties, requiredParams, fixtures) {
                 });
             }
             // Test with exact maxItems (if defined and reasonable)
-            if (maxItems !== undefined && maxItems <= ARRAY_TESTS.MANY_ITEMS_COUNT && questions.length < SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY) {
+            if (maxItems !== undefined &&
+                maxItems <= ARRAY_TESTS.MANY_ITEMS_COUNT &&
+                questions.length < SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY) {
                 const maxItemsArray = generateArrayItems(prop.items, maxItems);
                 addQuestion(questions, {
                     description: `${CATEGORY_DESCRIPTIONS.ARRAY_HANDLING}: exact maxItems (${maxItems}) for "${propName}"`,
@@ -1124,7 +1357,7 @@ function generateNullabilityTests(properties, requiredParams, fixtures) {
  * Tests that required parameters are properly validated.
  * All error handling tests expect error - tool should reject missing required params.
  */
-function generateErrorHandlingTests(properties, requiredParams, fixtures) {
+function generateErrorHandlingTests(schema, properties, requiredParams, fixtures) {
     const questions = [];
     const { CATEGORY_DESCRIPTIONS } = SCHEMA_TESTING;
     // Test missing all required params
@@ -1149,6 +1382,24 @@ function generateErrorHandlingTests(properties, requiredParams, fixtures) {
             expectedOutcome: 'error',
         });
     }
+    // Test dependentRequired constraints
+    const dependent = schema?.dependentRequired ?? {};
+    for (const [prop, deps] of Object.entries(dependent)) {
+        if (questions.length >= SCHEMA_TESTING.MAX_TESTS_PER_CATEGORY)
+            break;
+        if (deps.length === 0)
+            continue;
+        const args = buildBaseArgs(properties, requiredParams, fixtures);
+        args[prop] = args[prop] ?? generateDefaultValue(prop, properties[prop] ?? {}, fixtures);
+        // Remove one dependency to trigger validation error
+        delete args[deps[0]];
+        addQuestion(questions, {
+            description: `${CATEGORY_DESCRIPTIONS.MISSING_REQUIRED}: dependent "${deps[0]}" missing for "${prop}"`,
+            category: 'error_handling',
+            args,
+            expectedOutcome: 'error',
+        });
+    }
     return questions;
 }
 // ==================== Varied Tests for Simple Tools ====================
@@ -1165,12 +1416,8 @@ function generateVariedTestsForSimpleTools(toolName, toolDescription, properties
     const { CATEGORY_DESCRIPTIONS } = SCHEMA_TESTING;
     // Detect patterns that affect expected outcome
     const detection = detectFalsePositivePatterns(toolName, toolDescription, properties, requiredParams);
-    const needsFlexibleOutcome = detection.isOperationBased ||
-        detection.isSelfStateful ||
-        detection.hasComplexArrays;
-    const variedTestOutcome = needsFlexibleOutcome
-        ? 'either'
-        : 'success';
+    const needsFlexibleOutcome = detection.isOperationBased || detection.isSelfStateful || detection.hasComplexArrays;
+    const variedTestOutcome = needsFlexibleOutcome ? 'either' : 'success';
     // Build metadata for varied tests if patterns detected
     const variedMetadata = needsFlexibleOutcome
         ? {
@@ -1190,7 +1437,7 @@ function generateVariedTestsForSimpleTools(toolName, toolDescription, properties
         }
         : undefined;
     // Get existing arg signatures to avoid duplicates
-    const existingArgSignatures = new Set(existingQuestions.map(q => JSON.stringify(q.args)));
+    const existingArgSignatures = new Set(existingQuestions.map((q) => JSON.stringify(q.args)));
     // Variation strategies for simple/no-param tools
     const variationStrategies = [];
     // Strategy 1: Different timing contexts (useful for stateful tools)
@@ -1425,7 +1672,7 @@ export function generateSchemaTestsWithInferences(tool, options = {}) {
     const properties = schema?.properties ?? {};
     const requiredParams = (schema?.required ?? []);
     // 1. Happy Path Tests (always included)
-    questions.push(...generateHappyPathTests(tool.name, tool.description, properties, requiredParams, fixtures));
+    questions.push(...generateHappyPathTests(tool.name, tool.description, schema, properties, requiredParams, fixtures));
     // 2. Boundary Value Tests
     questions.push(...generateBoundaryTests(properties, requiredParams, fixtures));
     // 3. Type Coercion Tests (unless skipping error tests)
@@ -1442,7 +1689,7 @@ export function generateSchemaTestsWithInferences(tool, options = {}) {
     questions.push(...generateNullabilityTests(properties, requiredParams, fixtures));
     // 7. Error Handling Tests (unless skipped)
     if (!options.skipErrorTests) {
-        questions.push(...generateErrorHandlingTests(properties, requiredParams, fixtures));
+        questions.push(...generateErrorHandlingTests(schema, properties, requiredParams, fixtures));
     }
     // 8. Semantic Validation Tests (unless skipped)
     let semanticInferences = [];

package/dist/interview/stateful-test-runner.d.ts CHANGED Viewed

@@ -6,6 +6,8 @@ import type { InterviewQuestion } from './types.js';
 export declare class StatefulTestRunner {
     private options;
     private values;
+    private jsonPathValues;
+    private recentResponses;
     constructor(options: {
         shareOutputs: boolean;
     });
@@ -15,5 +17,6 @@ export declare class StatefulTestRunner {
     };
     recordResponse(tool: MCPTool, response: MCPToolCallResult | null): string[];
     private findMatchingValue;
+    private findByJsonPath;
 }
 //# sourceMappingURL=stateful-test-runner.d.ts.map

package/dist/interview/stateful-test-runner.js CHANGED Viewed

@@ -1,11 +1,14 @@
 import { STATEFUL_TESTING } from '../constants.js';
 import { extractTextContent } from './schema-inferrer.js';
+import { getValueAtPath } from '../utils/jsonpath.js';
 /**
  * Maintains shared state between tool calls for stateful testing.
  */
 export class StatefulTestRunner {
     options;
     values = new Map();
+    jsonPathValues = new Map();
+    recentResponses = [];
     constructor(options) {
         this.options = options;
     }
@@ -48,21 +51,69 @@ export class StatefulTestRunner {
             this.values.set(key, { value, sourceTool: tool.name });
             providedKeys.push(key);
         }
+        // Record JSONPath values for richer mapping
+        const pathValues = collectJsonPaths(parsed);
+        for (const [path, value] of Object.entries(pathValues)) {
+            if (this.jsonPathValues.size >= STATEFUL_TESTING.MAX_STORED_VALUES) {
+                break;
+            }
+            this.jsonPathValues.set(path, { value, sourceTool: tool.name });
+        }
+        // Keep a bounded list of recent responses for direct JSONPath lookup
+        this.recentResponses.unshift({ value: parsed, sourceTool: tool.name });
+        if (this.recentResponses.length > STATEFUL_TESTING.MAX_STORED_VALUES) {
+            this.recentResponses.pop();
+        }
         return providedKeys;
     }
     findMatchingValue(paramName) {
+        if (looksLikeJsonPath(paramName)) {
+            const direct = this.findByJsonPath(paramName);
+            if (direct)
+                return direct;
+        }
         const normalizedParam = normalizeKey(paramName);
         for (const [key, value] of this.values.entries()) {
             if (normalizeKey(key) === normalizedParam) {
                 return value;
             }
         }
+        for (const [key, value] of this.jsonPathValues.entries()) {
+            if (normalizeKey(key) === normalizedParam) {
+                return value;
+            }
+        }
         for (const [key, value] of this.values.entries()) {
             const normalizedKey = normalizeKey(key);
             if (normalizedKey.endsWith(normalizedParam)) {
                 return value;
             }
         }
+        for (const [key, value] of this.jsonPathValues.entries()) {
+            const normalizedKey = normalizeKey(key);
+            if (normalizedKey.endsWith(normalizedParam)) {
+                return value;
+            }
+        }
+        if (!looksLikeJsonPath(paramName)) {
+            const pathMatch = this.findByJsonPath(`$.${paramName}`);
+            if (pathMatch)
+                return pathMatch;
+        }
+        return null;
+    }
+    findByJsonPath(path) {
+        const normalized = path.startsWith('$') ? path : `$.${path}`;
+        const stored = this.jsonPathValues.get(normalized);
+        if (stored) {
+            return stored;
+        }
+        for (const entry of this.recentResponses) {
+            const value = getValueAtPath(entry.value, normalized);
+            if (value !== undefined) {
+                return { value, sourceTool: entry.sourceTool };
+            }
+        }
         return null;
     }
 }
@@ -103,4 +154,33 @@ function flattenValue(value, prefix = '') {
     }
     return result;
 }
+function looksLikeJsonPath(value) {
+    return value.startsWith('$') || value.includes('.') || value.includes('[');
+}
+function collectJsonPaths(value, path = '$', depth = 0, result = {}) {
+    if (depth > 4 || value === null || value === undefined) {
+        return result;
+    }
+    if (Array.isArray(value)) {
+        const sample = value.slice(0, 3);
+        sample.forEach((item, index) => {
+            collectJsonPaths(item, `${path}[${index}]`, depth + 1, result);
+        });
+        return result;
+    }
+    if (typeof value !== 'object') {
+        result[path] = value;
+        return result;
+    }
+    for (const [key, child] of Object.entries(value)) {
+        const childPath = `${path}.${key}`;
+        if (typeof child === 'object' && child !== null) {
+            collectJsonPaths(child, childPath, depth + 1, result);
+        }
+        else {
+            result[childPath] = child;
+        }
+    }
+    return result;
+}
 //# sourceMappingURL=stateful-test-runner.js.map

package/dist/interview/types.d.ts CHANGED Viewed

@@ -1,6 +1,10 @@
 import type { DiscoveryResult } from '../discovery/types.js';
 import type { MCPToolCallResult, MCPPromptGetResult, MCPResourceReadResult } from '../transport/types.js';
 import type { InferredSchema } from '../baseline/response-fingerprint.js';
+import type { ResponseSchemaEvolution } from '../baseline/response-schema-tracker.js';
+import type { ErrorAnalysisSummary } from '../baseline/error-analyzer.js';
+import type { DocumentationScore } from '../baseline/documentation-scorer.js';
+import type { SemanticInference } from '../validation/semantic-types.js';
 import type { Persona, QuestionCategory } from '../persona/types.js';
 import type { Workflow, WorkflowResult, WorkflowTimeoutConfig } from '../workflow/types.js';
 import type { LoadedScenarios, ScenarioResult } from '../scenarios/types.js';
@@ -401,6 +405,14 @@ export interface InterviewResult {
     limitations: string[];
     /** Overall recommendations */
     recommendations: string[];
+    /** Semantic type inferences by tool */
+    semanticInferences?: Record<string, SemanticInference[]>;
+    /** Response schema evolution by tool */
+    schemaEvolution?: Record<string, ResponseSchemaEvolution>;
+    /** Enhanced error analysis summaries by tool */
+    errorAnalysisSummaries?: Record<string, ErrorAnalysisSummary>;
+    /** Documentation quality score */
+    documentationScore?: DocumentationScore;
     /** Interview metadata */
     metadata: InterviewMetadata;
 }

package/dist/transport/mcp-client.js CHANGED Viewed

@@ -18,7 +18,7 @@ const FILTERED_ENV_VARS = new Set([
     'COHERE_API_KEY',
     'HUGGINGFACE_API_KEY',
     'REPLICATE_API_TOKEN',
-    // Cloud provider credentials
+    // Provider credentials
     'AWS_SECRET_ACCESS_KEY',
     'AWS_SESSION_TOKEN',
     'AZURE_CLIENT_SECRET',

package/dist/transport/sse-transport.d.ts CHANGED Viewed

@@ -29,7 +29,7 @@ export interface SSETransportConfig extends BaseTransportConfig {
  * - POST {baseUrl}/message - Endpoint for sending messages
  */
 export declare class SSETransport extends BaseTransport {
-    private eventSource;
+    private streamAbortController;
     private abortController;
     private connected;
     private reconnectAttempts;
@@ -55,6 +55,10 @@ export declare class SSETransport extends BaseTransport {
      * Handle an incoming SSE message.
      */
     private handleSSEMessage;
+    /**
+     * Stream and parse SSE events from a fetch response.
+     */
+    private readSSEStream;
     /**
      * Handle reconnection after a connection error.
      *
@@ -63,7 +67,7 @@ export declare class SSETransport extends BaseTransport {
      * - Uses capped exponential backoff
      * - Clears reconnect timer on close
      * - Checks isClosing flag to prevent reconnection after close()
-     * - Explicitly closes EventSource on max attempts
+     * - Explicitly aborts SSE stream on max attempts
      */
     private handleReconnect;
     /**
@@ -74,7 +78,7 @@ export declare class SSETransport extends BaseTransport {
      * Close the SSE connection.
      *
      * RELIABILITY: Properly cleans up all resources including:
-     * - EventSource connection
+     * - SSE stream connection
      * - Pending HTTP requests (via abort controller)
      * - Reconnection timer
      * - Sets isClosing flag to prevent reconnection attempts