npm - @dotsetlabs/bellwether - Versions diffs - 1.0.3 → 2.0.0 - Mend

@dotsetlabs/bellwether 1.0.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/CHANGELOG.md +74 -0
package/README.md +8 -2
package/dist/baseline/accessors.d.ts +1 -1
package/dist/baseline/accessors.js +1 -3
package/dist/baseline/baseline-format.d.ts +287 -0
package/dist/baseline/baseline-format.js +12 -0
package/dist/baseline/comparator.js +249 -11
package/dist/baseline/converter.d.ts +15 -15
package/dist/baseline/converter.js +46 -34
package/dist/baseline/diff.d.ts +1 -1
package/dist/baseline/diff.js +45 -28
package/dist/baseline/error-analyzer.d.ts +1 -1
package/dist/baseline/error-analyzer.js +90 -17
package/dist/baseline/incremental-checker.js +8 -5
package/dist/baseline/index.d.ts +2 -12
package/dist/baseline/index.js +3 -23
package/dist/baseline/performance-tracker.d.ts +0 -1
package/dist/baseline/performance-tracker.js +13 -20
package/dist/baseline/response-fingerprint.js +39 -2
package/dist/baseline/saver.js +41 -10
package/dist/baseline/schema-compare.d.ts +22 -0
package/dist/baseline/schema-compare.js +259 -16
package/dist/baseline/types.d.ts +10 -7
package/dist/cache/response-cache.d.ts +8 -0
package/dist/cache/response-cache.js +110 -0
package/dist/cli/commands/check.js +23 -6
package/dist/cli/commands/explore.js +34 -14
package/dist/cli/index.js +8 -0
package/dist/config/template.js +8 -7
package/dist/config/validator.d.ts +59 -59
package/dist/config/validator.js +245 -90
package/dist/constants/core.d.ts +4 -0
package/dist/constants/core.js +8 -19
package/dist/constants/registry.d.ts +17 -0
package/dist/constants/registry.js +18 -0
package/dist/constants/testing.d.ts +0 -369
package/dist/constants/testing.js +18 -456
package/dist/constants.d.ts +1 -1
package/dist/constants.js +1 -1
package/dist/docs/contract.js +131 -83
package/dist/docs/report.js +8 -5
package/dist/interview/insights.d.ts +17 -0
package/dist/interview/insights.js +52 -0
package/dist/interview/interviewer.js +52 -10
package/dist/interview/prompt-test-generator.d.ts +12 -0
package/dist/interview/prompt-test-generator.js +77 -0
package/dist/interview/resource-test-generator.d.ts +12 -0
package/dist/interview/resource-test-generator.js +20 -0
package/dist/interview/schema-inferrer.js +26 -4
package/dist/interview/schema-test-generator.js +278 -31
package/dist/interview/stateful-test-runner.d.ts +3 -0
package/dist/interview/stateful-test-runner.js +80 -0
package/dist/interview/types.d.ts +12 -0
package/dist/transport/mcp-client.js +1 -1
package/dist/transport/sse-transport.d.ts +7 -3
package/dist/transport/sse-transport.js +157 -67
package/dist/version.js +1 -1
package/man/bellwether.1 +1 -1
package/man/bellwether.1.md +2 -2
package/package.json +1 -1
package/schemas/bellwether-check.schema.json +185 -0
package/schemas/bellwether-explore.schema.json +837 -0
package/scripts/completions/bellwether.bash +10 -4
package/scripts/completions/bellwether.zsh +55 -2

package/dist/baseline/diff.js CHANGED Viewed

@@ -73,16 +73,15 @@ export function formatDiffText(diff, useColors = true) {
         lines.push(red('─── Performance Regressions ───'));
         for (const regression of diff.performanceReport.regressions) {
             const percentStr = (regression.regressionPercent * 100).toFixed(1);
-            const confidenceNote = regression.isReliable
-                ? ''
-                : ` ${yellow('(low confidence)')}`;
+            const confidenceNote = regression.isReliable ? '' : ` ${yellow('(low confidence)')}`;
             lines.push(`  ${red('!')} ${regression.toolName}: ` +
                 `${regression.previousP50Ms.toFixed(0)}ms → ` +
                 `${regression.currentP50Ms.toFixed(0)}ms (+${percentStr}%)${confidenceNote}`);
         }
         lines.push('');
         // Show low confidence tools warning
-        if (diff.performanceReport.lowConfidenceTools && diff.performanceReport.lowConfidenceTools.length > 0) {
+        if (diff.performanceReport.lowConfidenceTools &&
+            diff.performanceReport.lowConfidenceTools.length > 0) {
             lines.push(yellow('  Note: Some tools have low confidence metrics.'));
             lines.push(yellow(`  Run with more samples for reliable baselines: ${diff.performanceReport.lowConfidenceTools.join(', ')}`));
             lines.push('');
@@ -94,7 +93,8 @@ export function formatDiffText(diff, useColors = true) {
         lines.push('');
     }
     // Performance confidence changes
-    if (diff.performanceReport?.confidenceChanges && diff.performanceReport.confidenceChanges.length > 0) {
+    if (diff.performanceReport?.confidenceChanges &&
+        diff.performanceReport.confidenceChanges.length > 0) {
         lines.push(cyan('─── Confidence Changes ───'));
         for (const change of diff.performanceReport.confidenceChanges) {
             const icon = change.improved ? green('↑') : change.degraded ? yellow('↓') : '→';
@@ -212,7 +212,8 @@ export function formatDiffText(diff, useColors = true) {
     lines.push(`  Info: ${diff.infoCount}`);
     if (diff.performanceReport) {
         lines.push(`  Performance regressions: ${diff.performanceReport.regressionCount}`);
-        if (diff.performanceReport.lowConfidenceTools && diff.performanceReport.lowConfidenceTools.length > 0) {
+        if (diff.performanceReport.lowConfidenceTools &&
+            diff.performanceReport.lowConfidenceTools.length > 0) {
             lines.push(`  Low confidence tools: ${diff.performanceReport.lowConfidenceTools.length}`);
         }
     }
@@ -276,7 +277,8 @@ export function formatDiffCompact(diff) {
     if (diff.performanceReport?.regressionCount ?? 0 > 0) {
         parts.push(`perf_regressions=${diff.performanceReport?.regressionCount}`);
     }
-    if (diff.performanceReport?.lowConfidenceTools && diff.performanceReport.lowConfidenceTools.length > 0) {
+    if (diff.performanceReport?.lowConfidenceTools &&
+        diff.performanceReport.lowConfidenceTools.length > 0) {
         parts.push(`low_confidence_tools=${diff.performanceReport.lowConfidenceTools.length}`);
     }
     if (diff.securityReport) {
@@ -336,8 +338,11 @@ export function formatDiffGitHubActions(diff) {
         lines.push(`::notice::Minor changes: ${diff.summary}`);
     }
     for (const change of diff.behaviorChanges) {
-        const level = change.severity === 'breaking' ? 'error' :
-            change.severity === 'warning' ? 'warning' : 'notice';
+        const level = change.severity === 'breaking'
+            ? 'error'
+            : change.severity === 'warning'
+                ? 'warning'
+                : 'notice';
         lines.push(`::${level}::${change.tool} - ${change.description}`);
     }
     for (const tool of diff.toolsRemoved) {
@@ -355,7 +360,8 @@ export function formatDiffGitHubActions(diff) {
         }
     }
     // Low confidence warning
-    if (diff.performanceReport?.lowConfidenceTools && diff.performanceReport.lowConfidenceTools.length > 0) {
+    if (diff.performanceReport?.lowConfidenceTools &&
+        diff.performanceReport.lowConfidenceTools.length > 0) {
         lines.push(`::notice::Low confidence metrics for ${diff.performanceReport.lowConfidenceTools.length} tool(s): ${diff.performanceReport.lowConfidenceTools.join(', ')}`);
     }
     // Security findings
@@ -440,7 +446,9 @@ export function formatDiffMarkdown(diff) {
                 toolDiff.schemaChanged ? 'Schema changed' : '',
                 toolDiff.descriptionChanged ? 'Description changed' : '',
                 `${toolDiff.changes.length} change(s)`,
-            ].filter(Boolean).join(', ');
+            ]
+                .filter(Boolean)
+                .join(', ');
             lines.push(`| ${toolDiff.tool} | ⚠️ Modified | ${details} |`);
         }
         lines.push('');
@@ -451,8 +459,7 @@ export function formatDiffMarkdown(diff) {
         lines.push('| Tool | Aspect | Severity | Description |');
         lines.push('|------|--------|----------|-------------|');
         for (const change of diff.behaviorChanges) {
-            const sevEmoji = change.severity === 'breaking' ? '🔴' :
-                change.severity === 'warning' ? '🟡' : '🟢';
+            const sevEmoji = change.severity === 'breaking' ? '🔴' : change.severity === 'warning' ? '🟡' : '🟢';
             lines.push(`| ${change.tool} | ${change.aspect} | ${sevEmoji} ${change.severity} | ${change.description} |`);
         }
         lines.push('');
@@ -503,7 +510,11 @@ export function formatDiffMarkdown(diff) {
                 lines.push('|------|--------|---------|');
                 for (const issue of schemaReport.toolsWithIssues) {
                     const statusIcon = issue.isBreaking ? '🔴' : issue.becameUnstable ? '🟡' : '🔵';
-                    const status = issue.isBreaking ? 'Breaking' : issue.becameUnstable ? 'Unstable' : 'Changed';
+                    const status = issue.isBreaking
+                        ? 'Breaking'
+                        : issue.becameUnstable
+                            ? 'Unstable'
+                            : 'Changed';
                     lines.push(`| ${issue.toolName} | ${statusIcon} ${status} | ${issue.summary} |`);
                 }
                 lines.push('');
@@ -525,11 +536,13 @@ export function formatDiffMarkdown(diff) {
                 lines.push(`⚠️ **Error behavior changed**: ${et.summary}`);
                 lines.push('');
             }
-            if (et.newCategories.length > 0 || et.resolvedCategories.length > 0 ||
-                et.increasingCategories.length > 0 || et.decreasingCategories.length > 0) {
+            if (et.newCategories.length > 0 ||
+                et.resolvedCategories.length > 0 ||
+                et.increasingCategories.length > 0 ||
+                et.decreasingCategories.length > 0) {
                 lines.push('| Category | Trend | Previous | Current | Change |');
                 lines.push('|----------|-------|----------|---------|--------|');
-                for (const trend of et.trends.filter(t => t.trend !== 'stable')) {
+                for (const trend of et.trends.filter((t) => t.trend !== 'stable')) {
                     const trendEmoji = getTrendEmoji(trend.trend);
                     const changeStr = trend.changePercent !== 0
                         ? `${trend.changePercent > 0 ? '+' : ''}${trend.changePercent}%`
@@ -543,7 +556,8 @@ export function formatDiffMarkdown(diff) {
     // Performance section
     if (diff.performanceReport) {
         const perfReport = diff.performanceReport;
-        if (perfReport.hasRegressions || perfReport.improvementCount > 0 ||
+        if (perfReport.hasRegressions ||
+            perfReport.improvementCount > 0 ||
             (perfReport.lowConfidenceTools && perfReport.lowConfidenceTools.length > 0)) {
             lines.push('### Performance');
             lines.push('');
@@ -605,7 +619,8 @@ export function formatDiffMarkdown(diff) {
     lines.push(`- Info: **${diff.infoCount}**`);
     if (diff.performanceReport) {
         lines.push(`- Performance regressions: **${diff.performanceReport.regressionCount}**`);
-        if (diff.performanceReport.lowConfidenceTools && diff.performanceReport.lowConfidenceTools.length > 0) {
+        if (diff.performanceReport.lowConfidenceTools &&
+            diff.performanceReport.lowConfidenceTools.length > 0) {
             lines.push(`- Low confidence tools: **${diff.performanceReport.lowConfidenceTools.length}**`);
         }
     }
@@ -674,9 +689,7 @@ function getTrendEmoji(trend) {
  */
 export function formatDiffJUnit(diff, suiteName = 'bellwether') {
     const timestamp = new Date().toISOString();
-    const totalTests = diff.toolsAdded.length +
-        diff.toolsRemoved.length +
-        diff.behaviorChanges.length;
+    const totalTests = diff.toolsAdded.length + diff.toolsRemoved.length + diff.behaviorChanges.length;
     const failures = diff.breakingCount;
     const errors = 0;
     const skipped = 0;
@@ -743,7 +756,8 @@ export function formatDiffJUnit(diff, suiteName = 'bellwether') {
             lines.push('    </testcase>');
         }
         // Low confidence tools
-        if (diff.performanceReport.lowConfidenceTools && diff.performanceReport.lowConfidenceTools.length > 0) {
+        if (diff.performanceReport.lowConfidenceTools &&
+            diff.performanceReport.lowConfidenceTools.length > 0) {
             for (const tool of diff.performanceReport.lowConfidenceTools) {
                 const name = escapeXml(`confidence-${tool}`);
                 lines.push(`    <testcase name="${name}" classname="drift.confidence">`);
@@ -805,7 +819,8 @@ export function formatDiffJUnit(diff, suiteName = 'bellwether') {
             lines.push('    </testcase>');
         }
         // Show stable schemas as passing tests
-        if (diff.schemaEvolutionReport.stableCount > 0 && diff.schemaEvolutionReport.toolsWithIssues.length === 0) {
+        if (diff.schemaEvolutionReport.stableCount > 0 &&
+            diff.schemaEvolutionReport.toolsWithIssues.length === 0) {
             lines.push(`    <testcase name="schema-stability-check" classname="drift.schema">`);
             lines.push(`      <system-out>${diff.schemaEvolutionReport.stableCount} tool(s) have stable response schemas</system-out>`);
             lines.push('    </testcase>');
@@ -830,7 +845,7 @@ export function formatDiffJUnit(diff, suiteName = 'bellwether') {
         }
         // Increasing error types (warnings)
         for (const category of et.increasingCategories) {
-            const trend = et.trends.find(t => t.category === category);
+            const trend = et.trends.find((t) => t.category === category);
             const name = escapeXml(`error-trend-increasing-${category}`);
             lines.push(`    <testcase name="${name}" classname="drift.errors">`);
             lines.push(`      <system-err>[WARNING] Error frequency increasing: ${escapeXml(category)}${trend ? ` (+${trend.changePercent}%)` : ''}</system-err>`);
@@ -878,7 +893,7 @@ export function formatDiffJUnit(diff, suiteName = 'bellwether') {
  * Format diff as SARIF (Static Analysis Results Interchange Format) for GitHub Code Scanning.
  *
  * SARIF is the standard format for GitHub's code scanning feature and can be
- * uploaded to show drift detection results in pull request reviews.
+ * used to show drift detection results in pull request reviews.
  *
  * @see https://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html
  *
@@ -1313,11 +1328,13 @@ export function formatDiffSarif(diff, baselinePath = 'bellwether-baseline.json')
         }
         // Increasing error types
         for (const category of et.increasingCategories) {
-            const trend = et.trends.find(t => t.category === category);
+            const trend = et.trends.find((t) => t.category === category);
             results.push({
                 ruleId: 'BWH011',
                 level: 'warning',
-                message: { text: `Error frequency increasing: ${category}${trend ? ` (+${trend.changePercent}%)` : ''}` },
+                message: {
+                    text: `Error frequency increasing: ${category}${trend ? ` (+${trend.changePercent}%)` : ''}`,
+                },
                 locations: [
                     {
                         physicalLocation: {

package/dist/baseline/error-analyzer.d.ts CHANGED Viewed

@@ -69,7 +69,7 @@ export interface ErrorAnalysisSummary {
     /** Unique remediations suggested */
     remediations: string[];
     /** Counts by error category */
-    categoryCounts: Map<string, number>;
+    categoryCounts: Map<string, number> | Record<string, number>;
     /** Top root causes (most common) */
     topRootCauses: string[];
     /** Top remediations (most actionable) */

package/dist/baseline/error-analyzer.js CHANGED Viewed

@@ -22,7 +22,8 @@ export function analyzeError(errorMessage, context) {
     let statusCategory = categorizeHttpStatus(httpStatus);
     const wasExpected = context?.wasExpected ?? context?.expectedOutcome === 'error';
     // If the error was expected (validation test), recategorize it
-    if (wasExpected && (statusCategory === 'client_error_validation' || statusCategory === 'unknown')) {
+    if (wasExpected &&
+        (statusCategory === 'client_error_validation' || statusCategory === 'unknown')) {
         statusCategory = 'validation_expected';
     }
     const rootCause = wasExpected
@@ -96,7 +97,9 @@ export function generateErrorSummary(toolName, patterns) {
         }
     }
     // Count transient and actionable errors
-    const transientErrors = analyses.filter((a) => a.transient).reduce((sum, a) => sum + a.pattern.count, 0);
+    const transientErrors = analyses
+        .filter((a) => a.transient)
+        .reduce((sum, a) => sum + a.pattern.count, 0);
     const actionableCount = analyses.filter((a) => a.remediation && !a.remediation.includes('Review')).length;
     // Collect unique remediations with frequency
     const remediationCounts = new Map();
@@ -324,7 +327,9 @@ export function inferRootCause(message, category) {
     if (lower.includes('invalid') || lower.includes('malformed')) {
         return 'Invalid input format or value';
     }
-    if (lower.includes('not found') || lower.includes('does not exist') || lower.includes("doesn't exist")) {
+    if (lower.includes('not found') ||
+        lower.includes('does not exist') ||
+        lower.includes("doesn't exist")) {
         return 'Referenced resource does not exist';
     }
     if (lower.includes('already exists') || lower.includes('duplicate')) {
@@ -333,7 +338,9 @@ export function inferRootCause(message, category) {
     if (lower.includes('unauthorized') || lower.includes('authentication')) {
         return 'Authentication credentials missing or invalid';
     }
-    if (lower.includes('forbidden') || lower.includes('permission') || lower.includes('access denied')) {
+    if (lower.includes('forbidden') ||
+        lower.includes('permission') ||
+        lower.includes('access denied')) {
         return 'Insufficient permissions for this operation';
     }
     if (lower.includes('rate') || lower.includes('throttl') || lower.includes('too many')) {
@@ -480,15 +487,69 @@ export function extractRelatedParameters(message) {
  */
 function isCommonWord(word) {
     const commonWords = new Set([
-        'the', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
-        'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would',
-        'could', 'should', 'may', 'might', 'must', 'shall', 'can',
-        'need', 'not', 'and', 'but', 'or', 'if', 'then', 'else',
-        'for', 'with', 'from', 'this', 'that', 'these', 'those',
-        'error', 'message', 'failed', 'invalid', 'missing', 'required',
-        'found', 'exist', 'exists', 'value', 'input', 'output', 'type',
-        'string', 'number', 'boolean', 'object', 'array', 'null', 'undefined',
-        'field', 'parameter', 'property', 'argument', 'key',
+        'the',
+        'is',
+        'are',
+        'was',
+        'were',
+        'be',
+        'been',
+        'being',
+        'have',
+        'has',
+        'had',
+        'do',
+        'does',
+        'did',
+        'will',
+        'would',
+        'could',
+        'should',
+        'may',
+        'might',
+        'must',
+        'shall',
+        'can',
+        'need',
+        'not',
+        'and',
+        'but',
+        'or',
+        'if',
+        'then',
+        'else',
+        'for',
+        'with',
+        'from',
+        'this',
+        'that',
+        'these',
+        'those',
+        'error',
+        'message',
+        'failed',
+        'invalid',
+        'missing',
+        'required',
+        'found',
+        'exist',
+        'exists',
+        'value',
+        'input',
+        'output',
+        'type',
+        'string',
+        'number',
+        'boolean',
+        'object',
+        'array',
+        'null',
+        'undefined',
+        'field',
+        'parameter',
+        'property',
+        'argument',
+        'key',
     ]);
     return commonWords.has(word.toLowerCase());
 }
@@ -510,9 +571,19 @@ export function isTransientError(category, message) {
         return true;
     // Check for transient keywords
     const transientKeywords = [
-        'timeout', 'timed out', 'temporarily', 'retry', 'unavailable',
-        'connection', 'network', 'service unavailable', 'too many requests',
-        'try again', 'overloaded', 'busy', 'maintenance',
+        'timeout',
+        'timed out',
+        'temporarily',
+        'retry',
+        'unavailable',
+        'connection',
+        'network',
+        'service unavailable',
+        'too many requests',
+        'try again',
+        'overloaded',
+        'busy',
+        'maintenance',
     ];
     return transientKeywords.some((keyword) => lower.includes(keyword));
 }
@@ -661,7 +732,9 @@ export function formatErrorTrendReport(report, useColors = false) {
     lines.push('  Trend details:');
     for (const trend of report.trends.filter((t) => t.trend !== 'stable')) {
         const arrow = getTrendArrow(trend.trend);
-        const changeText = trend.changePercent !== 0 ? ` (${trend.changePercent > 0 ? '+' : ''}${trend.changePercent}%)` : '';
+        const changeText = trend.changePercent !== 0
+            ? ` (${trend.changePercent > 0 ? '+' : ''}${trend.changePercent}%)`
+            : '';
         lines.push(`    ${arrow} ${trend.category}: ${trend.previousCount} → ${trend.currentCount}${changeText}`);
     }
     return lines.join('\n');

package/dist/baseline/incremental-checker.js CHANGED Viewed

@@ -26,7 +26,7 @@ export function analyzeForIncremental(currentTools, baseline, options = {}) {
     // If no baseline or force retest, test everything
     if (!baseline || forceRetest) {
         return {
-            toolsToTest: currentTools.map(t => t.name),
+            toolsToTest: currentTools.map((t) => t.name),
             toolsToSkip: [],
             cachedFingerprints: [],
             changeSummary: {
@@ -34,7 +34,7 @@ export function analyzeForIncremental(currentTools, baseline, options = {}) {
                 changedTools: 0,
                 unchangedTools: 0,
                 removedTools: 0,
-                newToolNames: baseline ? [] : currentTools.map(t => t.name),
+                newToolNames: baseline ? [] : currentTools.map((t) => t.name),
                 changedToolNames: [],
                 removedToolNames: [],
             },
@@ -47,8 +47,8 @@ export function analyzeForIncremental(currentTools, baseline, options = {}) {
     const changedToolNames = [];
     const removedToolNames = [];
     // Build maps for comparison
-    const baselineToolMap = new Map(getToolFingerprints(baseline).map(t => [t.name, t]));
-    const currentToolSet = new Set(currentTools.map(t => t.name));
+    const baselineToolMap = new Map(getToolFingerprints(baseline).map((t) => [t.name, t]));
+    const currentToolSet = new Set(currentTools.map((t) => t.name));
     // Check current tools against baseline
     for (const tool of currentTools) {
         const baselineTool = baselineToolMap.get(tool.name);
@@ -65,7 +65,10 @@ export function analyzeForIncremental(currentTools, baseline, options = {}) {
         }
         // Check if schema changed
         const currentSchemaHash = computeSchemaHash(tool.inputSchema);
-        const baselineSchemaHash = baselineTool.schemaHash;
+        const baselineSchemaHash = baselineTool.inputSchemaHashAtTest ??
+            (baselineTool.inputSchema
+                ? computeSchemaHash(baselineTool.inputSchema)
+                : baselineTool.schemaHash);
         if (currentSchemaHash !== baselineSchemaHash) {
             // Schema changed - needs retesting
             toolsToTest.push(tool.name);

package/dist/baseline/index.d.ts CHANGED Viewed

@@ -6,26 +6,16 @@ export { getBaselineGeneratedAt, getBaselineHash, getBaselineServerCommand, getB
 export { createBaseline, saveBaseline, loadBaseline, verifyBaselineHash, baselineExists, recalculateBaselineHash, acceptDrift, hasAcceptance, clearAcceptance, type LoadBaselineOptions, type AcceptDriftOptions, } from './saver.js';
 export { compareWithBaseline, compareBaselines, hasBreakingChanges, hasSecurityChanges, filterByMinimumSeverity, checkBaselineVersionCompatibility, compareSeverity, severityMeetsThreshold, applyAspectOverride, applySeverityConfig, shouldFailOnDiff, } from './comparator.js';
 export { formatDiffText, formatDiffJson, formatDiffCompact, formatDiffGitHubActions, formatDiffMarkdown, formatDiffJUnit, formatDiffSarif, formatSecurityReport, } from './diff.js';
-export { createCloudBaseline, } from './converter.js';
+export { createBaselineFromInterview } from './converter.js';
 export { computeSchemaHash, compareSchemas, computeConsensusSchemaHash, type SchemaChangeType, type SchemaChange, type SchemaComparisonResult, } from './schema-compare.js';
 export { getBaselineVersion, parseVersion, areVersionsCompatible, compareVersions, getCompatibilityWarning, checkVersionCompatibility, assertVersionCompatibility, formatVersion, isCurrentVersion, isOlderVersion, isNewerVersion, requiresMigration, BaselineVersionError, type FormatVersion, type VersionCompatibility, } from './version.js';
 export { analyzeForIncremental, mergeFingerprints, formatIncrementalSummary, isIncrementalWorthwhile, addIncrementalMetadata, type IncrementalCheckResult, type IncrementalChangeSummary, type IncrementalCheckOptions, } from './incremental-checker.js';
 export { analyzeResponses, inferSchemaFromValue, compareFingerprints, compareErrorPatterns, computeInferredSchemaHash, type ResponseFingerprint, type ResponseContentType, type ResponseSize, type InferredSchema, type ErrorPattern, type ResponseAnalysis, type FingerprintDiff, type FingerprintChange, type ErrorPatternDiff, } from './response-fingerprint.js';
 export { compareInferredSchemas, buildSchemaEvolution, compareSchemaEvolution, formatSchemaEvolution, formatSchemaEvolutionDiff, hasSchemaEvolutionIssues, getSchemaStabilityGrade, type ResponseSchemaEvolution, type SchemaVersion as SchemaEvolutionVersion, type SchemaEvolutionDiff, type SchemaTypeChange, } from './response-schema-tracker.js';
-export { analyzeToolChangeImpact, analyzeDiffImpact, analyzeSchemaChanges, isBreakingChange, getBreakingChangeSummary, CHANGE_IMPACT, type SchemaChangeType as ImpactSchemaChangeType, type SchemaChangeDetail, type MigrationComplexity, type ChangeImpact, type DiffImpactAnalysis, type ActionItem, } from './change-impact-analyzer.js';
-export { calculateMetrics, createPerformanceBaseline, extractPerformanceBaselines, comparePerformance, generatePerformanceReport, formatMetrics, formatComparison, isPerformanceAcceptable, aggregateSamplesByTool, calculatePerformanceConfidence, calculateConfidenceFromMetrics, formatConfidenceLevel, hasReliableConfidence, PERFORMANCE, type LatencyTrend, type ToolPerformanceMetrics, type PerformanceBaseline, type PerformanceComparison, type PerformanceReport, type LatencySample, } from './performance-tracker.js';
-export { checkDeprecations, checkToolDeprecation, markAsDeprecated, clearDeprecation, getDeprecatedTools, getExpiredTools, getUpcomingRemovals, formatDeprecationWarning, formatDeprecationReport, shouldFailOnDeprecation, DEPRECATION, DEPRECATION_DEFAULTS, DEPRECATION_THRESHOLDS, type DeprecationStatus, type DeprecationWarning, type DeprecationReport, type DeprecationConfig, } from './deprecation-tracker.js';
-export { calculateHealthScore, formatHealthScore, meetsHealthThreshold, getHealthBadgeColor, createHealthHistoryEntry, HEALTH_SCORING, HEALTH_WEIGHTS, GRADE_THRESHOLDS, SEVERITY_THRESHOLDS, HEALTH_PENALTIES, type HealthTrend, type ActionPriority, type HealthActionItem, type HealthComponents, type HealthScore, type HealthHistory, type HealthInput, } from './health-scorer.js';
-export { buildServerTimeline, buildToolTimeline, formatTimeline, formatServerTimelineSummary, generateVisualTimeline, serializeTimeline, deserializeTimeline, serializeServerTimeline, deserializeServerTimeline, getMostActiveTools, getMostBreakingTools, getBreakingChanges, getVersionAtTime, getChangesBetween, hadBreakingChanges, type SchemaEventType, type SchemaVersion, type SchemaTimeline, type ServerTimeline, type DeprecationEvent, type TimelineStats, type TimelineBuildOptions, } from './schema-evolution.js';
-export { generateMigrationGuide, formatMigrationGuideMarkdown, formatMigrationGuideText, hasBreakingMigrationChanges, getBreakingTools, type MigrationEffort, type MigrationStepType, type CodeExample, type BreakingChange, type MigrationStep, type MigrationGuide, type MigrationStats, } from './migration-generator.js';
-export { generateToolScenarios, generateBaselineScenarios, formatScenariosAsYaml, formatScenariosReport, getScenariosByPriority, getScenariosByCategory, getCriticalScenarios, getSecurityScenarios, type ScenarioCategory, type ScenarioPriority, type TestScenario, type AutoGeneratedScenarios, type ScenarioGenerationSummary, type ScenarioGenerationResult, type ScenarioGenerationConfig, } from './scenario-generator.js';
-export { generatePRComment, generateCompactPRComment, generateCIStatusSummary, generateDiffTable, generateBadgeUrl, generateBadgeMarkdown, getBadgeColor, shouldBlockMerge, getSeverityEmoji, type BadgeColor, type CommentSection, type AffectedWorkflow, type PRComment, type PRCommentConfig, } from './pr-comment-generator.js';
+export { calculateMetrics, createPerformanceBaseline, extractPerformanceBaselines, comparePerformance, generatePerformanceReport, formatMetrics, formatComparison, isPerformanceAcceptable, aggregateSamplesByTool, calculatePerformanceConfidence, calculateConfidenceFromMetrics, formatConfidenceLevel, hasReliableConfidence, type LatencyTrend, type ToolPerformanceMetrics, type PerformanceBaseline, type PerformanceComparison, type PerformanceReport, type LatencySample, } from './performance-tracker.js';
 export type { SecurityCategory, RiskLevel, SecurityPayload, SecurityTestResult, SecurityFinding, SecurityFingerprint, SecurityDiff, SecurityTestOptions, SecurityTestContext, SecurityToolCallResult, SecurityReport, } from '../security/types.js';
 export { runSecurityTests, compareSecurityFingerprints, getRiskLevelFromScore, parseSecurityCategories, getPayloadsForCategory, getAllSecurityPayloads, getAllSecurityCategories, } from '../security/index.js';
 export type { HttpStatusCategory, ErrorSeverity, EnhancedErrorAnalysis, ErrorAnalysisSummary, ErrorTrend, ErrorTrendReport, } from './error-analyzer.js';
 export { analyzeError, analyzeErrorPatterns, generateErrorSummary, analyzeErrorTrends, extractHttpStatus, categorizeHttpStatus, inferRootCause, generateRemediation, extractRelatedParameters, isTransientError, assessErrorSeverity, mapStatusToErrorCategory, formatEnhancedError, formatErrorTrendReport, formatCategoryName, } from './error-analyzer.js';
 export { scoreDocumentation, scoreToolDocumentation, calculateDescriptionCoverage, calculateDescriptionQuality, calculateParameterDocumentation, calculateExampleCoverage, hasExamples, scoreToGrade, generateSuggestions, compareDocumentationScores, formatDocumentationScore, formatDocumentationScoreCompact, formatDocumentationScoreChange, toDocumentationScoreSummary, getGradeIndicator, getGradeBadgeColor, meetsDocumentationThreshold, meetsDocumentationGrade, type DocumentationIssueSeverity, type DocumentationIssueType, } from './documentation-scorer.js';
-export { calculateAICompatibilityScore, generateAICompatibilityMarkdown, type AICompatibilityScore, type ScoreComponent, type AICompatibilityRecommendation, type ToolAIScore, type AICompatibilityInput, } from './ai-compatibility-scorer.js';
-export { calculateRiskScore, generateRiskScoreMarkdown, type RegressionRiskScore, type RiskFactor, } from './risk-scorer.js';
-export { calculatePruningDecisions, calculateToolPruning, prioritizeTools, generatePruningSummary, generatePruningMarkdown, type TestCategory, type TestCategoryDecision, type ToolPruningDecision, type ToolCharacteristics, type PruningInput, type PruningSummary, } from './test-pruner.js';
 //# sourceMappingURL=index.d.ts.map

package/dist/baseline/index.js CHANGED Viewed

@@ -5,39 +5,19 @@ export { getBaselineGeneratedAt, getBaselineHash, getBaselineServerCommand, getB
 export { createBaseline, saveBaseline, loadBaseline, verifyBaselineHash, baselineExists, recalculateBaselineHash, acceptDrift, hasAcceptance, clearAcceptance, } from './saver.js';
 export { compareWithBaseline, compareBaselines, hasBreakingChanges, hasSecurityChanges, filterByMinimumSeverity, checkBaselineVersionCompatibility, compareSeverity, severityMeetsThreshold, applyAspectOverride, applySeverityConfig, shouldFailOnDiff, } from './comparator.js';
 export { formatDiffText, formatDiffJson, formatDiffCompact, formatDiffGitHubActions, formatDiffMarkdown, formatDiffJUnit, formatDiffSarif, formatSecurityReport, } from './diff.js';
-export { createCloudBaseline, } from './converter.js';
+export { createBaselineFromInterview } from './converter.js';
 export { computeSchemaHash, compareSchemas, computeConsensusSchemaHash, } from './schema-compare.js';
 export { getBaselineVersion, parseVersion, areVersionsCompatible, compareVersions, getCompatibilityWarning, checkVersionCompatibility, assertVersionCompatibility, formatVersion, isCurrentVersion, isOlderVersion, isNewerVersion, requiresMigration, BaselineVersionError, } from './version.js';
-// Legacy baseline migrations removed; cloud baseline is canonical.
+// Legacy baseline migrations removed; current baseline format is canonical.
 // Incremental checking
 export { analyzeForIncremental, mergeFingerprints, formatIncrementalSummary, isIncrementalWorthwhile, addIncrementalMetadata, } from './incremental-checker.js';
 export { analyzeResponses, inferSchemaFromValue, compareFingerprints, compareErrorPatterns, computeInferredSchemaHash, } from './response-fingerprint.js';
 // Response schema evolution tracking
 export { compareInferredSchemas, buildSchemaEvolution, compareSchemaEvolution, formatSchemaEvolution, formatSchemaEvolutionDiff, hasSchemaEvolutionIssues, getSchemaStabilityGrade, } from './response-schema-tracker.js';
-// Change impact analysis
-export { analyzeToolChangeImpact, analyzeDiffImpact, analyzeSchemaChanges, isBreakingChange, getBreakingChangeSummary, CHANGE_IMPACT, } from './change-impact-analyzer.js';
 // Performance tracking
-export { calculateMetrics, createPerformanceBaseline, extractPerformanceBaselines, comparePerformance, generatePerformanceReport, formatMetrics, formatComparison, isPerformanceAcceptable, aggregateSamplesByTool, calculatePerformanceConfidence, calculateConfidenceFromMetrics, formatConfidenceLevel, hasReliableConfidence, PERFORMANCE, } from './performance-tracker.js';
-// Deprecation tracking
-export { checkDeprecations, checkToolDeprecation, markAsDeprecated, clearDeprecation, getDeprecatedTools, getExpiredTools, getUpcomingRemovals, formatDeprecationWarning, formatDeprecationReport, shouldFailOnDeprecation, DEPRECATION, DEPRECATION_DEFAULTS, DEPRECATION_THRESHOLDS, } from './deprecation-tracker.js';
-// Health scoring
-export { calculateHealthScore, formatHealthScore, meetsHealthThreshold, getHealthBadgeColor, createHealthHistoryEntry, HEALTH_SCORING, HEALTH_WEIGHTS, GRADE_THRESHOLDS, SEVERITY_THRESHOLDS, HEALTH_PENALTIES, } from './health-scorer.js';
-// Schema evolution timeline
-export { buildServerTimeline, buildToolTimeline, formatTimeline, formatServerTimelineSummary, generateVisualTimeline, serializeTimeline, deserializeTimeline, serializeServerTimeline, deserializeServerTimeline, getMostActiveTools, getMostBreakingTools, getBreakingChanges, getVersionAtTime, getChangesBetween, hadBreakingChanges, } from './schema-evolution.js';
-// Migration guide generation
-export { generateMigrationGuide, formatMigrationGuideMarkdown, formatMigrationGuideText, hasBreakingMigrationChanges, getBreakingTools, } from './migration-generator.js';
-// Auto-generated test scenarios
-export { generateToolScenarios, generateBaselineScenarios, formatScenariosAsYaml, formatScenariosReport, getScenariosByPriority, getScenariosByCategory, getCriticalScenarios, getSecurityScenarios, } from './scenario-generator.js';
-// Enhanced PR comments
-export { generatePRComment, generateCompactPRComment, generateCIStatusSummary, generateDiffTable, generateBadgeUrl, generateBadgeMarkdown, getBadgeColor, shouldBlockMerge, getSeverityEmoji, } from './pr-comment-generator.js';
+export { calculateMetrics, createPerformanceBaseline, extractPerformanceBaselines, comparePerformance, generatePerformanceReport, formatMetrics, formatComparison, isPerformanceAcceptable, aggregateSamplesByTool, calculatePerformanceConfidence, calculateConfidenceFromMetrics, formatConfidenceLevel, hasReliableConfidence, } from './performance-tracker.js';
 export { runSecurityTests, compareSecurityFingerprints, getRiskLevelFromScore, parseSecurityCategories, getPayloadsForCategory, getAllSecurityPayloads, getAllSecurityCategories, } from '../security/index.js';
 export { analyzeError, analyzeErrorPatterns, generateErrorSummary, analyzeErrorTrends, extractHttpStatus, categorizeHttpStatus, inferRootCause, generateRemediation, extractRelatedParameters, isTransientError, assessErrorSeverity, mapStatusToErrorCategory, formatEnhancedError, formatErrorTrendReport, formatCategoryName, } from './error-analyzer.js';
 // Documentation quality scoring
 export { scoreDocumentation, scoreToolDocumentation, calculateDescriptionCoverage, calculateDescriptionQuality, calculateParameterDocumentation, calculateExampleCoverage, hasExamples, scoreToGrade, generateSuggestions, compareDocumentationScores, formatDocumentationScore, formatDocumentationScoreCompact, formatDocumentationScoreChange, toDocumentationScoreSummary, getGradeIndicator, getGradeBadgeColor, meetsDocumentationThreshold, meetsDocumentationGrade, } from './documentation-scorer.js';
-// AI Agent Compatibility Scoring
-export { calculateAICompatibilityScore, generateAICompatibilityMarkdown, } from './ai-compatibility-scorer.js';
-// Regression Risk Scoring
-export { calculateRiskScore, generateRiskScoreMarkdown, } from './risk-scorer.js';
-// Intelligent Test Pruning
-export { calculatePruningDecisions, calculateToolPruning, prioritizeTools, generatePruningSummary, generatePruningMarkdown, } from './test-pruner.js';
 //# sourceMappingURL=index.js.map

package/dist/baseline/performance-tracker.d.ts CHANGED Viewed

@@ -132,7 +132,6 @@ export interface LatencySample {
      */
     outcomeCorrect?: boolean;
 }
-export { PERFORMANCE_TRACKING as PERFORMANCE } from '../constants.js';
 /**
  * Calculate statistical confidence for performance metrics.
  *

package/dist/baseline/performance-tracker.js CHANGED Viewed

@@ -6,8 +6,6 @@
  */
 import { getBaselineGeneratedAt, getToolFingerprints } from './accessors.js';
 import { PERFORMANCE_TRACKING, PERFORMANCE_CONFIDENCE } from '../constants.js';
-// Re-export centralized constant for backwards compatibility
-export { PERFORMANCE_TRACKING as PERFORMANCE } from '../constants.js';
 /**
  * Calculate statistical confidence for performance metrics.
  *
@@ -45,14 +43,14 @@ export function calculatePerformanceConfidence(samples, options = {}) {
     }
     // Categorize samples by expected outcome
     // Happy path tests: expectedOutcome === 'success' or undefined (backward compat)
-    const happyPathSamples = samples.filter(s => s.expectedOutcome === 'success' || s.expectedOutcome === undefined);
+    const happyPathSamples = samples.filter((s) => s.expectedOutcome === 'success' || s.expectedOutcome === undefined);
     // Validation tests: expectedOutcome === 'error'
-    const validationTestSamples = samples.filter(s => s.expectedOutcome === 'error');
+    const validationTestSamples = samples.filter((s) => s.expectedOutcome === 'error');
     // Count validation samples that correctly rejected (error as expected = success)
-    const validationSuccesses = validationTestSamples.filter(s => !s.success && (s.outcomeCorrect === undefined || s.outcomeCorrect === true)).length;
+    const validationSuccesses = validationTestSamples.filter((s) => !s.success && (s.outcomeCorrect === undefined || s.outcomeCorrect === true)).length;
     // For confidence, only use happy path samples that succeeded
-    const successfulHappyPath = happyPathSamples.filter(s => s.success);
-    const allDurations = successfulHappyPath.map(s => s.durationMs);
+    const successfulHappyPath = happyPathSamples.filter((s) => s.success);
+    const allDurations = successfulHappyPath.map((s) => s.durationMs);
     // Handle all failures case
     if (allDurations.length === 0) {
         return {
@@ -69,12 +67,10 @@ export function calculatePerformanceConfidence(samples, options = {}) {
     // For variance calculation, exclude the first sample (cold start warmup)
     // This prevents JIT compilation, connection setup, and cache warming from
     // inflating the coefficient of variation and lowering confidence scores.
-    const durationsForVariance = excludeWarmup && allDurations.length > 1
-        ? allDurations.slice(1)
-        : allDurations;
+    const durationsForVariance = excludeWarmup && allDurations.length > 1 ? allDurations.slice(1) : allDurations;
     // Calculate variance using post-warmup samples only
     const meanForVariance = durationsForVariance.reduce((sum, d) => sum + d, 0) / durationsForVariance.length;
-    const squaredDiffs = durationsForVariance.map(d => Math.pow(d - meanForVariance, 2));
+    const squaredDiffs = durationsForVariance.map((d) => Math.pow(d - meanForVariance, 2));
     const variance = squaredDiffs.reduce((sum, d) => sum + d, 0) / durationsForVariance.length;
     const standardDeviation = Math.sqrt(variance);
     // Calculate coefficient of variation (CV = stdDev / mean)
@@ -186,8 +182,8 @@ export function calculateMetrics(samples) {
         return null;
     }
     const toolName = samples[0].toolName;
-    const successfulSamples = samples.filter(s => s.success);
-    const durations = successfulSamples.map(s => s.durationMs).sort((a, b) => a - b);
+    const successfulSamples = samples.filter((s) => s.success);
+    const durations = successfulSamples.map((s) => s.durationMs).sort((a, b) => a - b);
     if (durations.length === 0) {
         // All calls failed
         const confidence = calculatePerformanceConfidence(samples);
@@ -213,7 +209,7 @@ export function calculateMetrics(samples) {
     const minMs = durations[0];
     const maxMs = durations[durations.length - 1];
     // Calculate standard deviation
-    const squaredDiffs = durations.map(d => Math.pow(d - avgMs, 2));
+    const squaredDiffs = durations.map((d) => Math.pow(d - avgMs, 2));
     const avgSquaredDiff = squaredDiffs.reduce((sum, d) => sum + d, 0) / squaredDiffs.length;
     const stdDevMs = Math.sqrt(avgSquaredDiff);
     // Calculate confidence from samples
@@ -323,8 +319,8 @@ export function comparePerformance(current, baseline, regressionThreshold = PERF
     const trend = determineTrend(p50Regression);
     // Check for regression
     const maxRegression = baseline.maxAllowedRegression ?? regressionThreshold;
-    const hasRegression = p50Regression !== null && p50Regression > maxRegression ||
-        p95Regression !== null && p95Regression > maxRegression;
+    const hasRegression = (p50Regression !== null && p50Regression > maxRegression) ||
+        (p95Regression !== null && p95Regression > maxRegression);
     // Determine severity
     const severity = determinePerformanceSeverity(p50Regression, p95Regression, maxRegression);
     // Generate summary (include confidence note if low)
@@ -520,10 +516,7 @@ export function formatMetrics(metrics) {
  * Format performance comparison for display.
  */
 export function formatComparison(comparison) {
-    const lines = [
-        `Tool: ${comparison.toolName}`,
-        `  Trend: ${comparison.trend.toUpperCase()}`,
-    ];
+    const lines = [`Tool: ${comparison.toolName}`, `  Trend: ${comparison.trend.toUpperCase()}`];
     if (comparison.p50RegressionPercent !== null) {
         const sign = comparison.p50RegressionPercent >= 0 ? '+' : '';
         lines.push(`  p50 change: ${sign}${(comparison.p50RegressionPercent * 100).toFixed(1)}%`);