npm - @dotsetlabs/bellwether - Versions diffs - 1.0.3 → 2.0.0 - Mend

@dotsetlabs/bellwether 1.0.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/CHANGELOG.md +74 -0
package/README.md +8 -2
package/dist/baseline/accessors.d.ts +1 -1
package/dist/baseline/accessors.js +1 -3
package/dist/baseline/baseline-format.d.ts +287 -0
package/dist/baseline/baseline-format.js +12 -0
package/dist/baseline/comparator.js +249 -11
package/dist/baseline/converter.d.ts +15 -15
package/dist/baseline/converter.js +46 -34
package/dist/baseline/diff.d.ts +1 -1
package/dist/baseline/diff.js +45 -28
package/dist/baseline/error-analyzer.d.ts +1 -1
package/dist/baseline/error-analyzer.js +90 -17
package/dist/baseline/incremental-checker.js +8 -5
package/dist/baseline/index.d.ts +2 -12
package/dist/baseline/index.js +3 -23
package/dist/baseline/performance-tracker.d.ts +0 -1
package/dist/baseline/performance-tracker.js +13 -20
package/dist/baseline/response-fingerprint.js +39 -2
package/dist/baseline/saver.js +41 -10
package/dist/baseline/schema-compare.d.ts +22 -0
package/dist/baseline/schema-compare.js +259 -16
package/dist/baseline/types.d.ts +10 -7
package/dist/cache/response-cache.d.ts +8 -0
package/dist/cache/response-cache.js +110 -0
package/dist/cli/commands/check.js +23 -6
package/dist/cli/commands/explore.js +34 -14
package/dist/cli/index.js +8 -0
package/dist/config/template.js +8 -7
package/dist/config/validator.d.ts +59 -59
package/dist/config/validator.js +245 -90
package/dist/constants/core.d.ts +4 -0
package/dist/constants/core.js +8 -19
package/dist/constants/registry.d.ts +17 -0
package/dist/constants/registry.js +18 -0
package/dist/constants/testing.d.ts +0 -369
package/dist/constants/testing.js +18 -456
package/dist/constants.d.ts +1 -1
package/dist/constants.js +1 -1
package/dist/docs/contract.js +131 -83
package/dist/docs/report.js +8 -5
package/dist/interview/insights.d.ts +17 -0
package/dist/interview/insights.js +52 -0
package/dist/interview/interviewer.js +52 -10
package/dist/interview/prompt-test-generator.d.ts +12 -0
package/dist/interview/prompt-test-generator.js +77 -0
package/dist/interview/resource-test-generator.d.ts +12 -0
package/dist/interview/resource-test-generator.js +20 -0
package/dist/interview/schema-inferrer.js +26 -4
package/dist/interview/schema-test-generator.js +278 -31
package/dist/interview/stateful-test-runner.d.ts +3 -0
package/dist/interview/stateful-test-runner.js +80 -0
package/dist/interview/types.d.ts +12 -0
package/dist/transport/mcp-client.js +1 -1
package/dist/transport/sse-transport.d.ts +7 -3
package/dist/transport/sse-transport.js +157 -67
package/dist/version.js +1 -1
package/man/bellwether.1 +1 -1
package/man/bellwether.1.md +2 -2
package/package.json +1 -1
package/schemas/bellwether-check.schema.json +185 -0
package/schemas/bellwether-explore.schema.json +837 -0
package/scripts/completions/bellwether.bash +10 -4
package/scripts/completions/bellwether.zsh +55 -2

package/dist/cache/response-cache.js CHANGED Viewed

@@ -3,6 +3,8 @@
  * Enables reuse of tool call results and LLM analysis across personas.
  */
 import { createHash } from 'crypto';
+import { existsSync, mkdirSync, readFileSync, readdirSync, unlinkSync, writeFileSync } from 'fs';
+import { join } from 'path';
 import { getLogger } from '../logging/logger.js';
 import { TIME_CONSTANTS, CACHE } from '../constants.js';
 const logger = getLogger('response-cache');
@@ -18,13 +20,19 @@ export class ResponseCache {
         evictions: 0,
     };
     totalSizeBytes = 0;
+    cacheDir;
     constructor(config = {}) {
         this.config = {
             defaultTTLMs: config.defaultTTLMs ?? TIME_CONSTANTS.DEFAULT_CACHE_TTL,
             maxEntries: config.maxEntries ?? CACHE.MAX_ENTRIES,
             maxSizeBytes: config.maxSizeBytes ?? 50 * 1024 * 1024, // 50MB
             enabled: config.enabled ?? true,
+            dir: config.dir ?? '',
         };
+        this.cacheDir = this.config.enabled ? this.config.dir || undefined : undefined;
+        if (this.cacheDir) {
+            this.ensureCacheDir(this.cacheDir);
+        }
     }
     /**
      * Generate a cache key from input data.
@@ -43,6 +51,13 @@ export class ResponseCache {
         }
         const entry = this.cache.get(key);
         if (!entry) {
+            const diskEntry = this.loadFromDisk(key);
+            if (diskEntry) {
+                this.cache.set(key, diskEntry);
+                this.totalSizeBytes += this.estimateSize(diskEntry.value);
+                this.stats.hits++;
+                return diskEntry.value;
+            }
             this.stats.misses++;
             return undefined;
         }
@@ -89,6 +104,7 @@ export class ResponseCache {
         this.totalSizeBytes += entrySize;
         this.cache.set(key, entry);
         logger.debug({ key, ttlMs: ttl, description: options?.description }, 'Cache entry set');
+        this.saveToDisk(entry);
     }
     /**
      * Check if key exists and is not expired.
@@ -115,8 +131,10 @@ export class ResponseCache {
         if (entry) {
             this.totalSizeBytes -= this.estimateSize(entry.value);
             this.cache.delete(key);
+            this.deleteFromDisk(key);
             return true;
         }
+        this.deleteFromDisk(key);
         return false;
     }
     /**
@@ -125,6 +143,16 @@ export class ResponseCache {
     clear() {
         this.cache.clear();
         this.totalSizeBytes = 0;
+        if (this.cacheDir && existsSync(this.cacheDir)) {
+            try {
+                for (const file of listCacheFiles(this.cacheDir)) {
+                    unlinkSync(file);
+                }
+            }
+            catch {
+                // Ignore disk cleanup errors
+            }
+        }
         logger.debug('Cache cleared');
     }
     /**
@@ -195,6 +223,88 @@ export class ResponseCache {
             return 1000; // Default estimate for non-serializable values
         }
     }
+    ensureCacheDir(dir) {
+        try {
+            if (!existsSync(dir)) {
+                mkdirSync(dir, { recursive: true });
+            }
+        }
+        catch (error) {
+            logger.warn({ dir, error: String(error) }, 'Failed to create cache directory');
+            this.cacheDir = undefined;
+        }
+    }
+    getCachePath(key) {
+        if (!this.cacheDir)
+            return null;
+        return join(this.cacheDir, `${key}.json`);
+    }
+    saveToDisk(entry) {
+        const path = this.getCachePath(entry.key);
+        if (!path)
+            return;
+        try {
+            const serialized = JSON.stringify({
+                ...entry,
+                createdAt: entry.createdAt.toISOString(),
+                lastAccessedAt: entry.lastAccessedAt.toISOString(),
+                expiresAt: entry.expiresAt.toISOString(),
+            });
+            writeFileSync(path, serialized, 'utf-8');
+        }
+        catch (error) {
+            logger.debug({ key: entry.key, error: String(error) }, 'Failed to persist cache entry');
+        }
+    }
+    loadFromDisk(key) {
+        const path = this.getCachePath(key);
+        if (!path || !existsSync(path))
+            return null;
+        try {
+            const raw = readFileSync(path, 'utf-8');
+            const parsed = JSON.parse(raw);
+            const entry = {
+                ...parsed,
+                createdAt: new Date(parsed.createdAt),
+                lastAccessedAt: new Date(parsed.lastAccessedAt),
+                expiresAt: new Date(parsed.expiresAt),
+            };
+            if (new Date() > entry.expiresAt) {
+                this.deleteFromDisk(key);
+                return null;
+            }
+            entry.hitCount = (entry.hitCount ?? 0) + 1;
+            entry.lastAccessedAt = new Date();
+            this.saveToDisk(entry);
+            return entry;
+        }
+        catch (error) {
+            logger.debug({ key, error: String(error) }, 'Failed to load cache entry');
+            return null;
+        }
+    }
+    deleteFromDisk(key) {
+        const path = this.getCachePath(key);
+        if (!path || !existsSync(path))
+            return;
+        try {
+            unlinkSync(path);
+        }
+        catch {
+            // Ignore delete errors
+        }
+    }
+}
+function listCacheFiles(dir) {
+    try {
+        const entries = readdirSync(dir, { withFileTypes: true });
+        return entries
+            .filter((entry) => entry.isFile())
+            .map((entry) => join(dir, entry.name));
+    }
+    catch {
+        return [];
+    }
 }
 /**
  * Stable, deterministic JSON stringify with deep key sorting.

package/dist/cli/commands/check.js CHANGED Viewed

@@ -26,6 +26,7 @@ import { loadWorkflowsFromFile, tryLoadDefaultWorkflows, DEFAULT_WORKFLOWS_FILE,
 import * as output from '../output.js';
 import { extractServerContextFromArgs } from '../utils/server-context.js';
 import { configureLogger } from '../../logging/logger.js';
+import { buildInterviewInsights } from '../../interview/insights.js';
 import { EXIT_CODES, SEVERITY_TO_EXIT_CODE, PATHS, SECURITY_TESTING, CHECK_SAMPLING, WORKFLOW, REPORT_SCHEMAS, PERCENTAGE_CONVERSION, } from '../../constants.js';
 export const checkCommand = new Command('check')
     .description('Check MCP server schema and detect drift (free, fast, deterministic)')
@@ -166,7 +167,7 @@ export const checkCommand = new Command('check')
     metricsCollector.startInterview();
     // Initialize cache
     resetGlobalCache();
-    const cache = getGlobalCache({ enabled: cacheEnabled });
+    const cache = getGlobalCache({ enabled: cacheEnabled, dir: config.cache.dir });
     if (cacheEnabled && verbose) {
         output.info('Response caching enabled');
     }
@@ -382,6 +383,8 @@ export const checkCommand = new Command('check')
         };
         output.info('Checking schemas...\n');
         const result = await interviewer.interview(mcpClient, discovery, progressCallback);
+        const insights = buildInterviewInsights(result);
+        const enrichedResult = { ...result, ...insights };
         progressBar.stop();
         if (!verbose) {
             output.newline();
@@ -448,7 +451,7 @@ export const checkCommand = new Command('check')
                 output.info(`Rate-limited tools: ${rateLimit.tools.slice(0, 5).join(', ')}${rateLimit.tools.length > 5 ? ' ...' : ''}`);
             }
         }
-        const checkSummary = buildCheckSummary(result);
+        const checkSummary = buildCheckSummary(enrichedResult);
         output.newline();
         output.lines(...checkSummary.lines);
         if (checkSummary.nextSteps.length > 0) {
@@ -629,12 +632,25 @@ export const checkCommand = new Command('check')
         }
         // Generate documentation (after security testing so findings can be included)
         output.info('Generating documentation...');
-        const writeDocs = outputFormat === 'both' || outputFormat === 'agents.md';
+        const writeDocs = outputFormat === 'both' || outputFormat === 'docs';
         const writeJson = outputFormat === 'both' || outputFormat === 'json';
         if (writeDocs) {
-            const contractMd = generateContractMd(result, {
+            const semanticMap = insights.semanticInferences
+                ? new Map(Object.entries(insights.semanticInferences))
+                : undefined;
+            const schemaEvolutionMap = insights.schemaEvolution
+                ? new Map(Object.entries(insights.schemaEvolution))
+                : undefined;
+            const errorAnalysisMap = insights.errorAnalysisSummaries
+                ? new Map(Object.entries(insights.errorAnalysisSummaries))
+                : undefined;
+            const contractMd = generateContractMd(enrichedResult, {
                 securityFingerprints: securityEnabled ? securityFingerprints : undefined,
                 workflowResults: workflowResults.length > 0 ? workflowResults : undefined,
+                semanticInferences: semanticMap,
+                schemaEvolution: schemaEvolutionMap,
+                errorAnalysisSummaries: errorAnalysisMap,
+                documentationScore: insights.documentationScore,
                 exampleLength,
                 fullExamples,
                 maxExamplesPerTool,
@@ -648,11 +664,12 @@ export const checkCommand = new Command('check')
         }
         if (writeJson) {
             // Add workflow results to the result object for the JSON report
-            const resultWithWorkflows = workflowResults.length > 0 ? { ...result, workflowResults } : result;
+            const resultWithWorkflows = workflowResults.length > 0 ? { ...enrichedResult, workflowResults } : enrichedResult;
             let jsonReport;
             try {
                 jsonReport = generateJsonReport(resultWithWorkflows, {
                     schemaUrl: REPORT_SCHEMAS.CHECK_REPORT_SCHEMA_URL,
+                    schemaPath: REPORT_SCHEMAS.CHECK_REPORT_SCHEMA_FILE,
                     validate: true,
                 });
             }
@@ -665,7 +682,7 @@ export const checkCommand = new Command('check')
             output.info(`Written: ${jsonPath}`);
         }
         // Create baseline from results
-        let currentBaseline = createBaseline(result, fullServerCommand);
+        let currentBaseline = createBaseline(enrichedResult, fullServerCommand);
         // Attach security fingerprints to tool fingerprints if security testing was run
         if (securityEnabled && securityFingerprints.size > 0) {
             currentBaseline = {

package/dist/cli/commands/explore.js CHANGED Viewed

@@ -13,22 +13,23 @@ import { MCPClient } from '../../transport/mcp-client.js';
 import { discover } from '../../discovery/discovery.js';
 import { Interviewer } from '../../interview/interviewer.js';
 import { generateAgentsMd, generateJsonReport } from '../../docs/generator.js';
-import { loadConfig, ConfigNotFoundError, parseCommandString } from '../../config/loader.js';
+import { loadConfig, ConfigNotFoundError, parseCommandString, } from '../../config/loader.js';
 import { validateConfigForExplore } from '../../config/validator.js';
 import { CostTracker, estimateInterviewCost, estimateInterviewTime, formatCostAndTimeEstimate, suggestOptimizations, formatOptimizationSuggestions, } from '../../cost/index.js';
 import { getMetricsCollector, resetMetricsCollector } from '../../metrics/collector.js';
-import { EXIT_CODES, WORKFLOW, PATHS } from '../../constants.js';
+import { EXIT_CODES, WORKFLOW, PATHS, REPORT_SCHEMAS } from '../../constants.js';
 import { FallbackLLMClient } from '../../llm/fallback.js';
 import { getGlobalCache, resetGlobalCache } from '../../cache/response-cache.js';
 import { InterviewProgressBar, formatExploreBanner } from '../utils/progress.js';
 import { parsePersonas } from '../../persona/builtins.js';
-import { loadScenariosFromFile, tryLoadDefaultScenarios, DEFAULT_SCENARIOS_FILE } from '../../scenarios/index.js';
-import { loadWorkflowsFromFile, tryLoadDefaultWorkflows, DEFAULT_WORKFLOWS_FILE } from '../../workflow/loader.js';
+import { loadScenariosFromFile, tryLoadDefaultScenarios, DEFAULT_SCENARIOS_FILE, } from '../../scenarios/index.js';
+import { loadWorkflowsFromFile, tryLoadDefaultWorkflows, DEFAULT_WORKFLOWS_FILE, } from '../../workflow/loader.js';
 import * as output from '../output.js';
 import { StreamingDisplay } from '../output.js';
-import { suppressLogs, restoreLogLevel, configureLogger } from '../../logging/logger.js';
+import { suppressLogs, restoreLogLevel, configureLogger, } from '../../logging/logger.js';
 import { extractServerContextFromArgs } from '../utils/server-context.js';
 import { isCI } from '../utils/env.js';
+import { buildInterviewInsights } from '../../interview/insights.js';
 /**
  * Wrapper to parse personas with warning output.
  */
@@ -120,7 +121,7 @@ export const exploreCommand = new Command('explore')
     metricsCollector.startInterview();
     // Initialize cache
     resetGlobalCache();
-    const cache = getGlobalCache({ enabled: cacheEnabled });
+    const cache = getGlobalCache({ enabled: cacheEnabled, dir: config.cache.dir });
     if (cacheEnabled && verbose) {
         output.info('Response caching enabled');
     }
@@ -138,7 +139,13 @@ export const exploreCommand = new Command('explore')
     };
     try {
         llmClient = new FallbackLLMClient({
-            providers: [{ provider, model, baseUrl: provider === 'ollama' ? config.llm.ollama.baseUrl : undefined }],
+            providers: [
+                {
+                    provider,
+                    model,
+                    baseUrl: provider === 'ollama' ? config.llm.ollama.baseUrl : undefined,
+                },
+            ],
             useOllamaFallback: true,
             onUsage: onUsageCallback,
         });
@@ -166,9 +173,12 @@ export const exploreCommand = new Command('explore')
         }
         // Discovery phase
         output.info('Discovering capabilities...');
-        const discovery = await discover(mcpClient, transport === 'stdio' ? serverCommand : remoteUrl ?? serverCommand, transport === 'stdio' ? args : []);
+        const discovery = await discover(mcpClient, transport === 'stdio' ? serverCommand : (remoteUrl ?? serverCommand), transport === 'stdio' ? args : []);
         const resourceCount = discovery.resources?.length ?? 0;
-        const discoveryParts = [`${discovery.tools.length} tools`, `${discovery.prompts.length} prompts`];
+        const discoveryParts = [
+            `${discovery.tools.length} tools`,
+            `${discovery.prompts.length} prompts`,
+        ];
         if (resourceCount > 0) {
             discoveryParts.push(`${resourceCount} resources`);
         }
@@ -277,13 +287,17 @@ export const exploreCommand = new Command('explore')
                     let prefix = '';
                     switch (opType) {
                         case 'generate-questions':
-                            prefix = context ? `\n  Generating questions for ${context}... ` : '\n  Generating questions... ';
+                            prefix = context
+                                ? `\n  Generating questions for ${context}... `
+                                : '\n  Generating questions... ';
                             break;
                         case 'analyze':
                             prefix = context ? `\n  Analyzing ${context}... ` : '\n  Analyzing... ';
                             break;
                         case 'synthesize-tool':
-                            prefix = context ? `\n  Synthesizing profile for ${context}... ` : '\n  Synthesizing profile... ';
+                            prefix = context
+                                ? `\n  Synthesizing profile for ${context}... `
+                                : '\n  Synthesizing profile... ';
                             break;
                         case 'synthesize-overall':
                             prefix = '\n  Synthesizing overall findings... ';
@@ -365,6 +379,8 @@ export const exploreCommand = new Command('explore')
         };
         output.info('Starting exploration...\n');
         const result = await interviewer.interview(mcpClient, discovery, progressCallback);
+        const insights = buildInterviewInsights(result);
+        const enrichedResult = { ...result, ...insights };
         progressBar.stop();
         if (!verbose) {
             output.newline();
@@ -375,16 +391,20 @@ export const exploreCommand = new Command('explore')
         if (docsDir !== outputDir) {
             mkdirSync(docsDir, { recursive: true });
         }
-        const writeDocs = outputFormat === 'both' || outputFormat === 'agents.md';
+        const writeDocs = outputFormat === 'both' || outputFormat === 'docs';
         const writeJson = outputFormat === 'both' || outputFormat === 'json';
         if (writeDocs) {
-            const agentsMd = generateAgentsMd(result);
+            const agentsMd = generateAgentsMd(enrichedResult);
             const agentsMdPath = join(docsDir, config.output.files.agentsDoc);
             writeFileSync(agentsMdPath, agentsMd);
             output.info(`Written: ${agentsMdPath}`);
         }
         if (writeJson) {
-            const jsonReport = generateJsonReport(result);
+            const jsonReport = generateJsonReport(enrichedResult, {
+                schemaUrl: REPORT_SCHEMAS.EXPLORE_REPORT_SCHEMA_URL,
+                schemaPath: REPORT_SCHEMAS.EXPLORE_REPORT_SCHEMA_FILE,
+                validate: true,
+            });
             const jsonPath = join(outputDir, config.output.files.exploreReport);
             writeFileSync(jsonPath, jsonReport);
             output.info(`Written: ${jsonPath}`);

package/dist/cli/index.js CHANGED Viewed

@@ -115,6 +115,14 @@ Check MCP servers for drift. Explore behavior. Generate documentation.
 Commands:
   check    - Schema validation and drift detection (free, fast, deterministic)
   explore  - LLM-powered behavioral exploration and documentation
+  discover - Quick capability discovery (no tests)
+  registry - Search the MCP Registry
+  baseline - Manage baselines (save/compare/accept/diff/show)
+  golden   - Golden output regression testing
+  contract - Contract validation (generate/validate/show)
+  watch    - Continuous checking on file changes
+  auth     - Manage LLM provider API keys
+  validate-config - Validate bellwether.yaml without running tests
 For more information on a specific command, use:
   bellwether <command> --help`)

package/dist/config/template.js CHANGED Viewed

@@ -23,13 +23,11 @@ export function generateConfigTemplate(options = {}) {
     // Override security.enabled if preset specifies it
     const securityEnabledValue = securityEnabled ? 'true' : String(defaults.check.security.enabled);
     const serverArgsYaml = serverArgs.length > 0
-        ? `\n  args:\n${serverArgs.map(arg => `    - "${arg}"`).join('\n')}`
+        ? `\n  args:\n${serverArgs.map((arg) => `    - "${arg}"`).join('\n')}`
         : '\n  args: []';
     const presetComment = preset ? `# Generated with: bellwether init --preset ${preset}\n` : '';
     // Generate env section if env vars were detected
-    const envVarsYaml = envVars.length > 0
-        ? `\n  env:\n${envVars.map(v => `    ${v}: "\${${v}}"`).join('\n')}`
-        : '';
+    const envVarsYaml = envVars.length > 0 ? `\n  env:\n${envVars.map((v) => `    ${v}: "\${${v}}"`).join('\n')}` : '';
     return `# Bellwether Configuration
 # Generated by: bellwether init
 # Docs: https://docs.bellwether.sh/guides/configuration
@@ -61,10 +59,12 @@ server:
   timeout: ${defaults.server.timeout}
   # Additional environment variables for the server process
-  # Use \${VAR} syntax to reference environment variables${envVarsYaml}${envVars.length === 0 ? `
+  # Use \${VAR} syntax to reference environment variables${envVarsYaml}${envVars.length === 0
+        ? `
   # env:
   #   NODE_ENV: production
-  #   API_KEY: "\${API_KEY}"` : ''}
+  #   API_KEY: "\${API_KEY}"`
+        : ''}
 # =============================================================================
 # SCENARIOS (used by both commands)
@@ -91,7 +91,8 @@ output:
   # These are kept in root by default for visibility
   docsDir: "${defaults.output.docsDir}"
-  # Output format: agents.md (markdown only), json (JSON only), or both
+  # Output format: docs (markdown only), json (JSON only), or both
+  # Legacy alias: agents.md (treated as docs)
   format: ${defaults.output.format}
   # Example output settings (for CONTRACT.md and AGENTS.md)