npm - @dotsetlabs/bellwether - Versions diffs - 1.0.3 → 2.0.0 - Mend

@dotsetlabs/bellwether 1.0.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/CHANGELOG.md +74 -0
package/README.md +8 -2
package/dist/baseline/accessors.d.ts +1 -1
package/dist/baseline/accessors.js +1 -3
package/dist/baseline/baseline-format.d.ts +287 -0
package/dist/baseline/baseline-format.js +12 -0
package/dist/baseline/comparator.js +249 -11
package/dist/baseline/converter.d.ts +15 -15
package/dist/baseline/converter.js +46 -34
package/dist/baseline/diff.d.ts +1 -1
package/dist/baseline/diff.js +45 -28
package/dist/baseline/error-analyzer.d.ts +1 -1
package/dist/baseline/error-analyzer.js +90 -17
package/dist/baseline/incremental-checker.js +8 -5
package/dist/baseline/index.d.ts +2 -12
package/dist/baseline/index.js +3 -23
package/dist/baseline/performance-tracker.d.ts +0 -1
package/dist/baseline/performance-tracker.js +13 -20
package/dist/baseline/response-fingerprint.js +39 -2
package/dist/baseline/saver.js +41 -10
package/dist/baseline/schema-compare.d.ts +22 -0
package/dist/baseline/schema-compare.js +259 -16
package/dist/baseline/types.d.ts +10 -7
package/dist/cache/response-cache.d.ts +8 -0
package/dist/cache/response-cache.js +110 -0
package/dist/cli/commands/check.js +23 -6
package/dist/cli/commands/explore.js +34 -14
package/dist/cli/index.js +8 -0
package/dist/config/template.js +8 -7
package/dist/config/validator.d.ts +59 -59
package/dist/config/validator.js +245 -90
package/dist/constants/core.d.ts +4 -0
package/dist/constants/core.js +8 -19
package/dist/constants/registry.d.ts +17 -0
package/dist/constants/registry.js +18 -0
package/dist/constants/testing.d.ts +0 -369
package/dist/constants/testing.js +18 -456
package/dist/constants.d.ts +1 -1
package/dist/constants.js +1 -1
package/dist/docs/contract.js +131 -83
package/dist/docs/report.js +8 -5
package/dist/interview/insights.d.ts +17 -0
package/dist/interview/insights.js +52 -0
package/dist/interview/interviewer.js +52 -10
package/dist/interview/prompt-test-generator.d.ts +12 -0
package/dist/interview/prompt-test-generator.js +77 -0
package/dist/interview/resource-test-generator.d.ts +12 -0
package/dist/interview/resource-test-generator.js +20 -0
package/dist/interview/schema-inferrer.js +26 -4
package/dist/interview/schema-test-generator.js +278 -31
package/dist/interview/stateful-test-runner.d.ts +3 -0
package/dist/interview/stateful-test-runner.js +80 -0
package/dist/interview/types.d.ts +12 -0
package/dist/transport/mcp-client.js +1 -1
package/dist/transport/sse-transport.d.ts +7 -3
package/dist/transport/sse-transport.js +157 -67
package/dist/version.js +1 -1
package/man/bellwether.1 +1 -1
package/man/bellwether.1.md +2 -2
package/package.json +1 -1
package/schemas/bellwether-check.schema.json +185 -0
package/schemas/bellwether-explore.schema.json +837 -0
package/scripts/completions/bellwether.bash +10 -4
package/scripts/completions/bellwether.zsh +55 -2

package/dist/interview/insights.js ADDED Viewed

@@ -0,0 +1,52 @@
+import { generateSemanticTests } from '../validation/semantic-test-generator.js';
+import { SEMANTIC_VALIDATION } from '../constants.js';
+import { analyzeResponses } from '../baseline/response-fingerprint.js';
+import { buildSchemaEvolution } from '../baseline/response-schema-tracker.js';
+import { generateErrorSummary } from '../baseline/error-analyzer.js';
+import { scoreDocumentation } from '../baseline/documentation-scorer.js';
+/**
+ * Build derived insights from an interview result.
+ * These insights are used for documentation and JSON report enrichment.
+ */
+export function buildInterviewInsights(result) {
+    const semanticInferences = {};
+    for (const tool of result.discovery.tools) {
+        const inferenceResult = generateSemanticTests(tool, {
+            minConfidence: SEMANTIC_VALIDATION.MIN_CONFIDENCE_THRESHOLD,
+            maxInvalidValuesPerParam: SEMANTIC_VALIDATION.MAX_INVALID_VALUES_PER_PARAM,
+            skipSemanticTests: false,
+        });
+        if (inferenceResult.inferences.length > 0) {
+            semanticInferences[tool.name] = inferenceResult.inferences;
+        }
+    }
+    const schemaEvolution = {};
+    const errorAnalysisSummaries = {};
+    for (const profile of result.toolProfiles) {
+        const responseData = profile.interactions
+            .filter((i) => !i.mocked)
+            .map((i) => ({ response: i.response, error: i.error }));
+        const responseAnalysis = analyzeResponses(responseData);
+        if (responseAnalysis.schemas.length > 0) {
+            schemaEvolution[profile.name] = buildSchemaEvolution(responseAnalysis.schemas);
+        }
+        if (responseAnalysis.errorPatterns.length > 0) {
+            const summary = generateErrorSummary(profile.name, responseAnalysis.errorPatterns);
+            const categoryCounts = summary.categoryCounts instanceof Map
+                ? Object.fromEntries(summary.categoryCounts.entries())
+                : summary.categoryCounts;
+            errorAnalysisSummaries[profile.name] = {
+                ...summary,
+                categoryCounts,
+            };
+        }
+    }
+    const documentationScore = scoreDocumentation(result.discovery.tools);
+    return {
+        semanticInferences: Object.keys(semanticInferences).length > 0 ? semanticInferences : undefined,
+        schemaEvolution: Object.keys(schemaEvolution).length > 0 ? schemaEvolution : undefined,
+        errorAnalysisSummaries: Object.keys(errorAnalysisSummaries).length > 0 ? errorAnalysisSummaries : undefined,
+        documentationScore,
+    };
+}
+//# sourceMappingURL=insights.js.map

package/dist/interview/interviewer.js CHANGED Viewed

@@ -6,6 +6,8 @@ import { evaluateAssertions } from '../scenarios/evaluator.js';
 import { withTimeout, DEFAULT_TIMEOUTS, parallelLimit, createMutex } from '../utils/index.js';
 import { INTERVIEW, WORKFLOW, DISPLAY_LIMITS, SCHEMA_TESTING, OUTCOME_ASSESSMENT, } from '../constants.js';
 import { generateSchemaTests } from './schema-test-generator.js';
+import { generatePromptTests } from './prompt-test-generator.js';
+import { generateResourceTests } from './resource-test-generator.js';
 import { WorkflowDiscoverer } from '../workflow/discovery.js';
 import { WorkflowExecutor } from '../workflow/executor.js';
 import { RateLimiter, calculateBackoffMs, isRateLimitError } from './rate-limiter.js';
@@ -682,6 +684,9 @@ export class Interviewer {
                 const promptInteractions = [];
                 // Check for custom scenarios for this prompt
                 const customScenarios = this.getScenariosForPrompt(prompt.name);
+                const deterministicQuestions = this.config.customScenariosOnly
+                    ? []
+                    : generatePromptTests(prompt, { maxTests: this.config.checkMode ? 3 : 2 });
                 // Build questions list - custom scenarios + LLM-generated (unless customScenariosOnly)
                 let questions = [];
                 if (customScenarios.length > 0) {
@@ -693,21 +698,30 @@ export class Interviewer {
                         description: s.description,
                         args: s.args,
                     }));
+                    // Add deterministic prompt tests
+                    if (deterministicQuestions.length > 0) {
+                        questions = mergePromptQuestions(questions, deterministicQuestions);
+                    }
                     // If not custom-only mode and not fast CI mode, also generate LLM questions
                     if (!this.config.customScenariosOnly && !this.config.checkMode && primaryOrchestrator) {
                         const llmQuestions = await primaryOrchestrator.generatePromptQuestions(prompt, 2);
-                        questions = [...questions, ...llmQuestions];
+                        questions = mergePromptQuestions(questions, llmQuestions);
                     }
                 }
                 else if (!this.config.customScenariosOnly &&
                     !this.config.checkMode &&
                     primaryOrchestrator) {
-                    // No custom scenarios - generate LLM questions as usual
-                    questions = await primaryOrchestrator.generatePromptQuestions(prompt, 2);
+                    // No custom scenarios - deterministic tests + LLM questions
+                    questions = mergePromptQuestions(questions, deterministicQuestions);
+                    const llmQuestions = await primaryOrchestrator.generatePromptQuestions(prompt, 2);
+                    questions = mergePromptQuestions(questions, llmQuestions);
                 }
                 else if (this.config.checkMode) {
-                    // Fast CI mode: use simple fallback question for prompt
-                    questions = [{ description: 'Basic prompt test', args: {} }];
+                    // Fast CI mode: use deterministic prompt tests
+                    questions =
+                        deterministicQuestions.length > 0
+                            ? deterministicQuestions
+                            : [{ description: 'Basic prompt test', args: {} }];
                 }
                 // If customScenariosOnly and no scenarios for this prompt, skip it
                 for (const question of questions) {
@@ -791,13 +805,17 @@ export class Interviewer {
                 // Generate resource questions (skip LLM in fast CI mode)
                 let questions;
                 if (this.config.checkMode || !primaryOrchestrator) {
-                    // Fast CI mode: use simple fallback question
-                    questions = [
-                        { description: 'Basic resource read test', category: 'happy_path' },
-                    ];
+                    // Fast CI mode: use deterministic resource tests
+                    const deterministic = generateResourceTests(resource, { maxTests: 2 });
+                    questions =
+                        deterministic.length > 0
+                            ? deterministic
+                            : [{ description: 'Basic resource read test', category: 'happy_path' }];
                 }
                 else {
-                    questions = await primaryOrchestrator.generateResourceQuestions(resource, 2);
+                    const deterministic = generateResourceTests(resource, { maxTests: 2 });
+                    const llmQuestions = await primaryOrchestrator.generateResourceQuestions(resource, 2);
+                    questions = mergeResourceQuestions(deterministic, llmQuestions);
                 }
                 for (const question of questions) {
                     const interactionStart = Date.now();
@@ -1944,6 +1962,30 @@ export class Interviewer {
         return { results, summary };
     }
 }
+function mergePromptQuestions(base, additions) {
+    const merged = [...base];
+    const signatures = new Set(base.map((q) => `${q.description}|${JSON.stringify(q.args)}`));
+    for (const q of additions) {
+        const sig = `${q.description}|${JSON.stringify(q.args)}`;
+        if (!signatures.has(sig)) {
+            merged.push(q);
+            signatures.add(sig);
+        }
+    }
+    return merged;
+}
+function mergeResourceQuestions(base, additions) {
+    const merged = [...base];
+    const signatures = new Set(base.map((q) => `${q.description}|${q.category}`));
+    for (const q of additions) {
+        const sig = `${q.description}|${q.category}`;
+        if (!signatures.has(sig)) {
+            merged.push(q);
+            signatures.add(sig);
+        }
+    }
+    return merged;
+}
 function summarizeAssertions(interactions) {
     const allResults = interactions.filter((i) => !i.mocked).flatMap((i) => i.assertionResults ?? []);
     if (allResults.length === 0)

package/dist/interview/prompt-test-generator.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+import type { MCPPrompt } from '../transport/types.js';
+import type { PromptQuestion } from './types.js';
+export interface PromptTestOptions {
+    /** Maximum tests to generate */
+    maxTests?: number;
+}
+/**
+ * Generate deterministic prompt tests based on prompt arguments.
+ * Focuses on valid inputs to avoid false negatives.
+ */
+export declare function generatePromptTests(prompt: MCPPrompt, options?: PromptTestOptions): PromptQuestion[];
+//# sourceMappingURL=prompt-test-generator.d.ts.map

package/dist/interview/prompt-test-generator.js ADDED Viewed

@@ -0,0 +1,77 @@
+/**
+ * Generate deterministic prompt tests based on prompt arguments.
+ * Focuses on valid inputs to avoid false negatives.
+ */
+export function generatePromptTests(prompt, options = {}) {
+    const maxTests = options.maxTests ?? 3;
+    const questions = [];
+    const argsSpec = prompt.arguments ?? [];
+    const requiredArgs = argsSpec.filter((a) => a.required);
+    const optionalArgs = argsSpec.filter((a) => !a.required);
+    const baseArgs = {};
+    for (const arg of requiredArgs) {
+        baseArgs[arg.name] = generatePromptArgValue(arg.name, arg.description);
+    }
+    // Basic invocation
+    questions.push({
+        description: requiredArgs.length > 0 ? 'Basic prompt invocation' : 'Prompt invocation (no args)',
+        args: baseArgs,
+    });
+    if (questions.length < maxTests && optionalArgs.length > 0) {
+        const fullArgs = { ...baseArgs };
+        for (const arg of optionalArgs.slice(0, 2)) {
+            fullArgs[arg.name] = generatePromptArgValue(arg.name, arg.description);
+        }
+        questions.push({
+            description: 'Prompt invocation with optional arguments',
+            args: fullArgs,
+        });
+    }
+    if (questions.length < maxTests && Object.keys(baseArgs).length > 0) {
+        const altArgs = {};
+        for (const arg of requiredArgs) {
+            altArgs[arg.name] = generateAlternateValue(baseArgs[arg.name], arg.name);
+        }
+        questions.push({
+            description: 'Prompt invocation with alternate values',
+            args: altArgs,
+        });
+    }
+    return questions.slice(0, maxTests);
+}
+function generatePromptArgValue(name, description) {
+    const lowerName = name.toLowerCase();
+    const lowerDesc = (description ?? '').toLowerCase();
+    if (lowerName.includes('path') || lowerDesc.includes('path'))
+        return '/tmp/example.txt';
+    if (lowerName.includes('url') || lowerDesc.includes('url') || lowerName.includes('uri')) {
+        return 'https://example.com';
+    }
+    if (lowerName.includes('email') || lowerDesc.includes('email'))
+        return 'test@example.com';
+    if (lowerName.includes('date') || lowerDesc.includes('date'))
+        return '2024-01-15';
+    if (lowerName.includes('time') || lowerDesc.includes('time'))
+        return '2024-01-15T14:30:00Z';
+    if (lowerName.includes('id') || lowerDesc.includes('identifier'))
+        return 'id_123';
+    if (lowerName.includes('query') || lowerDesc.includes('search'))
+        return 'example query';
+    return 'example';
+}
+function generateAlternateValue(value, name) {
+    if (value.startsWith('http')) {
+        return 'https://example.org';
+    }
+    if (value.startsWith('/')) {
+        return '/tmp/alternate.txt';
+    }
+    if (value.includes('@')) {
+        return 'user@example.org';
+    }
+    if (name.toLowerCase().includes('id')) {
+        return 'id_456';
+    }
+    return `${value}-alt`;
+}
+//# sourceMappingURL=prompt-test-generator.js.map

package/dist/interview/resource-test-generator.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+import type { MCPResource } from '../transport/types.js';
+import type { ResourceQuestion } from './types.js';
+export interface ResourceTestOptions {
+    /** Maximum tests to generate */
+    maxTests?: number;
+}
+/**
+ * Generate deterministic resource tests.
+ * Since resource reads are URI-based with no args, tests focus on consistency.
+ */
+export declare function generateResourceTests(resource: MCPResource, options?: ResourceTestOptions): ResourceQuestion[];
+//# sourceMappingURL=resource-test-generator.d.ts.map

package/dist/interview/resource-test-generator.js ADDED Viewed

@@ -0,0 +1,20 @@
+/**
+ * Generate deterministic resource tests.
+ * Since resource reads are URI-based with no args, tests focus on consistency.
+ */
+export function generateResourceTests(resource, options = {}) {
+    const maxTests = options.maxTests ?? 2;
+    const questions = [];
+    questions.push({
+        description: `Basic resource read (${resource.name})`,
+        category: 'happy_path',
+    });
+    if (questions.length < maxTests) {
+        questions.push({
+            description: `Repeated resource read (${resource.name})`,
+            category: 'edge_case',
+        });
+    }
+    return questions.slice(0, maxTests);
+}
+//# sourceMappingURL=resource-test-generator.js.map

package/dist/interview/schema-inferrer.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { createHash } from 'crypto';
-import { inferSchemaFromValue, computeInferredSchemaHash } from '../baseline/response-fingerprint.js';
+import { inferSchemaFromValue, computeInferredSchemaHash, } from '../baseline/response-fingerprint.js';
 /**
  * Infer a response schema from an MCP tool response.
  */
@@ -26,7 +26,9 @@ export function inferResponseSchema(response) {
         };
     }
     const markdownStructure = detectMarkdownStructure(textContent);
-    if (markdownStructure.hasHeaders || markdownStructure.hasTables || markdownStructure.hasCodeBlocks) {
+    if (markdownStructure.hasHeaders ||
+        markdownStructure.hasTables ||
+        markdownStructure.hasCodeBlocks) {
         return {
             inferredType: 'markdown',
             markdownStructure,
@@ -43,10 +45,16 @@ export function extractTextContent(response) {
         return null;
     }
     const textBlocks = response.content
-        .filter((c) => c.type === 'text' && typeof c.text === 'string')
+        .filter((c) => typeof c.text === 'string')
         .map((c) => c.text);
     if (textBlocks.length === 0) {
-        return null;
+        const decodedBlocks = response.content
+            .map((c) => decodeDataBlock(c.data, c.mimeType))
+            .filter((v) => typeof v === 'string');
+        if (decodedBlocks.length === 0) {
+            return null;
+        }
+        return decodedBlocks.join('\n');
     }
     return textBlocks.join('\n');
 }
@@ -68,4 +76,18 @@ function detectMarkdownStructure(text) {
 function hashString(value) {
     return createHash('sha256').update(value).digest('hex');
 }
+function decodeDataBlock(data, mimeType) {
+    if (!data || typeof data !== 'string')
+        return null;
+    const mime = (mimeType ?? '').toLowerCase();
+    if (!mime.includes('json') && !mime.startsWith('text/')) {
+        return null;
+    }
+    try {
+        return Buffer.from(data, 'base64').toString('utf8');
+    }
+    catch {
+        return null;
+    }
+}
 //# sourceMappingURL=schema-inferrer.js.map