npm - @dotsetlabs/bellwether - Versions diffs - 2.0.0 → 2.0.1 - Mend

@dotsetlabs/bellwether 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/CHANGELOG.md +44 -0
package/README.md +9 -0
package/dist/auth/credentials.js +2 -0
package/dist/baseline/accessors.js +12 -0
package/dist/baseline/baseline-format.d.ts +48 -0
package/dist/baseline/comparator.js +263 -20
package/dist/baseline/converter.js +52 -4
package/dist/baseline/response-fingerprint.js +1 -1
package/dist/baseline/saver.js +34 -0
package/dist/baseline/types.d.ts +21 -1
package/dist/cache/response-cache.js +9 -2
package/dist/cli/commands/baseline.js +70 -35
package/dist/cli/commands/check.js +48 -9
package/dist/cli/commands/explore.js +36 -3
package/dist/cli/commands/init.js +10 -7
package/dist/cli/commands/watch.js +5 -5
package/dist/config/loader.js +2 -2
package/dist/constants/core.d.ts +1 -1
package/dist/constants/core.js +1 -1
package/dist/discovery/discovery.js +88 -14
package/dist/discovery/types.d.ts +5 -1
package/dist/docs/agents.js +138 -50
package/dist/docs/contract.js +63 -1
package/dist/errors/retry.js +11 -5
package/dist/interview/rate-limiter.js +7 -3
package/dist/llm/anthropic.js +14 -4
package/dist/llm/fallback.d.ts +1 -0
package/dist/llm/fallback.js +7 -1
package/dist/llm/openai.js +15 -4
package/dist/protocol/index.d.ts +2 -0
package/dist/protocol/index.js +2 -0
package/dist/protocol/version-registry.d.ts +66 -0
package/dist/protocol/version-registry.js +159 -0
package/dist/transport/http-transport.d.ts +11 -1
package/dist/transport/http-transport.js +21 -2
package/dist/transport/mcp-client.d.ts +29 -1
package/dist/transport/mcp-client.js +92 -7
package/dist/transport/sse-transport.js +5 -4
package/dist/transport/types.d.ts +134 -1
package/dist/utils/concurrency.d.ts +2 -0
package/dist/utils/concurrency.js +9 -2
package/dist/utils/markdown.js +13 -18
package/dist/utils/timeout.js +2 -1
package/dist/version.js +1 -1
package/man/bellwether.1 +1 -1
package/man/bellwether.1.md +2 -2
package/package.json +1 -1

package/dist/docs/agents.js CHANGED Viewed

@@ -1,6 +1,7 @@
-import { formatDateISO, formatDuration, escapeTableCell, mermaidLabel, validateJsonForCodeBlock } from '../utils/index.js';
-import { DISPLAY_LIMITS, MATH_FACTORS } from '../constants.js';
+import { formatDateISO, formatDuration, escapeTableCell, mermaidLabel, validateJsonForCodeBlock, } from '../utils/index.js';
+import { DISPLAY_LIMITS, MATH_FACTORS, MCP } from '../constants.js';
 import { calculatePerformanceMetrics, extractParameters, looksLikeError } from './shared.js';
+import { getFeatureFlags } from '../protocol/index.js';
 /**
  * Detect configuration issues based on error patterns.
  * Returns a warning message if issues detected, null otherwise.
@@ -17,7 +18,7 @@ function detectConfigurationIssues(profiles, metadata) {
     for (const profile of profiles) {
         for (const interaction of profile.interactions) {
             // Get response text content
-            const textContent = interaction.response?.content?.find(c => c.type === 'text');
+            const textContent = interaction.response?.content?.find((c) => c.type === 'text');
             const responseText = textContent && 'text' in textContent ? String(textContent.text) : '';
             const errorText = interaction.error || '';
             const combined = errorText + responseText;
@@ -33,8 +34,8 @@ function detectConfigurationIssues(profiles, metadata) {
     }
     // If we found access-related errors, show warning
     if (totalErrors > 0 && accessErrors / totalErrors > 0.5) {
-        return 'Most tool calls failed with access-related errors. The server may not have been configured with allowed directories. ' +
-            'For filesystem servers, try: `bellwether check npx @modelcontextprotocol/server-filesystem /path/to/allowed/dir`';
+        return ('Most tool calls failed with access-related errors. The server may not have been configured with allowed directories. ' +
+            'For filesystem servers, try: `bellwether check npx @modelcontextprotocol/server-filesystem /path/to/allowed/dir`');
     }
     // Also check synthesized limitations for access patterns (fallback)
     let accessRelatedLimitations = 0;
@@ -48,9 +49,11 @@ function detectConfigurationIssues(profiles, metadata) {
         }
     }
     // If most tools have access-related limitations and high error rate, show warning
-    if (totalLimitations > 0 && accessRelatedLimitations / totalLimitations > 0.5 && errorRate > 0.8) {
-        return 'Most tool calls failed, likely due to missing allowed directories configuration. ' +
-            'For filesystem servers, try: `bellwether check npx @modelcontextprotocol/server-filesystem /path/to/allowed/dir`';
+    if (totalLimitations > 0 &&
+        accessRelatedLimitations / totalLimitations > 0.5 &&
+        errorRate > 0.8) {
+        return ('Most tool calls failed, likely due to missing allowed directories configuration. ' +
+            'For filesystem servers, try: `bellwether check npx @modelcontextprotocol/server-filesystem /path/to/allowed/dir`');
     }
     return null;
 }
@@ -75,14 +78,25 @@ export function generateAgentsMd(result) {
     lines.push('');
     lines.push(summary);
     lines.push('');
+    const features = getFeatureFlags(discovery.protocolVersion);
     lines.push(`**Server Version:** ${discovery.serverInfo.version}`);
     lines.push(`**Protocol Version:** ${discovery.protocolVersion}`);
+    if (discovery.protocolVersion !== MCP.PROTOCOL_VERSION) {
+        lines.push(`*(Server protocol; bellwether supports up to ${MCP.PROTOCOL_VERSION})*`);
+    }
     // Show personas used
     if (metadata.personas && metadata.personas.length > 0) {
-        const personaNames = metadata.personas.map(p => p.name).join(', ');
+        const personaNames = metadata.personas.map((p) => p.name).join(', ');
         lines.push(`**Interview Personas:** ${personaNames}`);
     }
     lines.push('');
+    // Server instructions
+    if (discovery.instructions) {
+        lines.push('## Server Instructions');
+        lines.push('');
+        lines.push(discovery.instructions);
+        lines.push('');
+    }
     // Capabilities summary
     lines.push('## Capabilities');
     lines.push('');
@@ -95,6 +109,15 @@ export function generateAgentsMd(result) {
     if (discovery.capabilities.resources) {
         lines.push(`- **Resources:** ${(discovery.resources ?? []).length} available`);
     }
+    if (discovery.resourceTemplates && discovery.resourceTemplates.length > 0) {
+        lines.push(`- **Resource Templates:** ${discovery.resourceTemplates.length} available`);
+    }
+    if (discovery.capabilities.completions && features.completions) {
+        lines.push('- **Completions:** Supported');
+    }
+    if (discovery.capabilities.tasks && features.tasks) {
+        lines.push('- **Tasks:** Supported');
+    }
     if (discovery.capabilities.logging) {
         lines.push('- **Logging:** Supported');
     }
@@ -129,7 +152,23 @@ export function generateAgentsMd(result) {
             lines.push(profile.description);
             lines.push('');
             // Find the original tool to get schema
-            const tool = discovery.tools.find(t => t.name === profile.name);
+            const tool = discovery.tools.find((t) => t.name === profile.name);
+            // Show tool annotations (behavioral hints) — version-gated
+            if (features.toolAnnotations && tool?.annotations) {
+                const hints = [];
+                if (tool.annotations.readOnlyHint)
+                    hints.push('read-only');
+                if (tool.annotations.destructiveHint)
+                    hints.push('destructive');
+                if (tool.annotations.idempotentHint)
+                    hints.push('idempotent');
+                if (tool.annotations.openWorldHint)
+                    hints.push('open-world');
+                if (hints.length > 0) {
+                    lines.push(`**Behavioral Hints:** ${hints.join(', ')}`);
+                    lines.push('');
+                }
+            }
             if (tool?.inputSchema) {
                 lines.push('**Input Schema:**');
                 // Validate JSON and escape for code block
@@ -139,6 +178,15 @@ export function generateAgentsMd(result) {
                 lines.push('```');
                 lines.push('');
             }
+            // Show output schema if present — version-gated
+            if (features.structuredOutput && tool?.outputSchema) {
+                lines.push('**Output Schema:**');
+                const outputSchemaJson = validateJsonForCodeBlock(tool.outputSchema);
+                lines.push('```json');
+                lines.push(outputSchemaJson.content);
+                lines.push('```');
+                lines.push('');
+            }
             // Add sample response if we have successful calls
             const sampleResponse = generateSampleResponse(profile);
             if (sampleResponse.length > 0) {
@@ -203,14 +251,14 @@ export function generateAgentsMd(result) {
     }
     // Common Workflows section (summarized view of successful workflows)
     if (result.workflowResults && result.workflowResults.length > 0) {
-        const successfulWorkflows = result.workflowResults.filter(wr => wr.success);
+        const successfulWorkflows = result.workflowResults.filter((wr) => wr.success);
         if (successfulWorkflows.length > 0) {
             lines.push('## Common Workflows');
             lines.push('');
             lines.push('These workflows demonstrate recommended patterns for using tools together:');
             lines.push('');
             for (const wr of successfulWorkflows) {
-                const toolSequence = wr.workflow.steps.map(s => `\`${s.tool}\``).join(' → ');
+                const toolSequence = wr.workflow.steps.map((s) => `\`${s.tool}\``).join(' → ');
                 lines.push(`### ${wr.workflow.name}`);
                 lines.push('');
                 lines.push(wr.workflow.description);
@@ -418,6 +466,26 @@ export function generateAgentsMd(result) {
             }
         }
     }
+    // Resource Templates section
+    if (discovery.resourceTemplates && discovery.resourceTemplates.length > 0) {
+        lines.push('## Resource Templates');
+        lines.push('');
+        lines.push('Resource templates define URI patterns for dynamically-generated resources.');
+        lines.push('');
+        for (const template of discovery.resourceTemplates) {
+            lines.push(`### ${template.name}`);
+            lines.push('');
+            lines.push(`**URI Template:** \`${template.uriTemplate}\``);
+            if (template.mimeType) {
+                lines.push(`**MIME Type:** ${template.mimeType}`);
+            }
+            lines.push('');
+            if (template.description) {
+                lines.push(template.description);
+                lines.push('');
+            }
+        }
+    }
     // Overall limitations
     if (limitations.length > 0) {
         lines.push('## Known Limitations');
@@ -485,7 +553,7 @@ function generateQuickReference(tools, profiles) {
     lines.push('|------|------------|---------|');
     for (const tool of tools) {
         const params = extractParameters(tool.inputSchema);
-        const profile = profiles.find(p => p.name === tool.name);
+        const profile = profiles.find((p) => p.name === tool.name);
         const returnType = inferReturnTypeDetailed(profile);
         // Escape table cell content to prevent broken tables
         lines.push(`| \`${escapeTableCell(tool.name)}\` | ${escapeTableCell(params)} | ${escapeTableCell(returnType)} |`);
@@ -494,7 +562,7 @@ function generateQuickReference(tools, profiles) {
     // Only add example section if we have at least one successful example
     const successfulExamples = [];
     for (const tool of tools) {
-        const profile = profiles.find(p => p.name === tool.name);
+        const profile = profiles.find((p) => p.name === tool.name);
         const example = generateExampleSnippet(tool, profile);
         if (example) {
             successfulExamples.push({ tool, example });
@@ -554,10 +622,10 @@ function inferReturnTypeDetailed(profile) {
         return 'unknown';
     }
     // Look at successful interactions that don't have error-like content
-    const successful = profile.interactions.find(i => {
+    const successful = profile.interactions.find((i) => {
         if (i.error || !i.response || i.response.isError)
             return false;
-        const textContent = i.response.content?.find(c => c.type === 'text');
+        const textContent = i.response.content?.find((c) => c.type === 'text');
         if (textContent && 'text' in textContent) {
             if (looksLikeError(String(textContent.text)))
                 return false;
@@ -577,7 +645,7 @@ function inferReturnTypeDetailed(profile) {
         return 'empty response';
     }
     // Check content types
-    const types = new Set(content.map(c => c.type));
+    const types = new Set(content.map((c) => c.type));
     if (types.size === 1) {
         const type = content[0].type;
         if (type === 'text') {
@@ -634,10 +702,10 @@ function generateExampleSnippet(tool, profile) {
         return null;
     }
     // Find a successful interaction (not an error) that doesn't have error-like content
-    const successful = profile.interactions.find(i => {
+    const successful = profile.interactions.find((i) => {
         if (i.error || !i.response || i.response.isError)
             return false;
-        const textContent = i.response.content?.find(c => c.type === 'text');
+        const textContent = i.response.content?.find((c) => c.type === 'text');
         if (textContent && 'text' in textContent) {
             if (looksLikeError(String(textContent.text)))
                 return false;
@@ -659,11 +727,11 @@ function generateExampleSnippet(tool, profile) {
 function generateSampleResponse(profile) {
     const lines = [];
     // Find a successful interaction with a response that doesn't look like an error
-    const successful = profile.interactions.find(i => {
+    const successful = profile.interactions.find((i) => {
         if (i.error || !i.response || i.response.isError)
             return false;
         // Also check if the response content looks like an error
-        const textContent = i.response.content?.find(c => c.type === 'text');
+        const textContent = i.response.content?.find((c) => c.type === 'text');
         if (textContent && 'text' in textContent) {
             if (looksLikeError(String(textContent.text)))
                 return false;
@@ -673,7 +741,7 @@ function generateSampleResponse(profile) {
     if (!successful || !successful.response?.content) {
         return lines;
     }
-    const textContent = successful.response.content.find(c => c.type === 'text');
+    const textContent = successful.response.content.find((c) => c.type === 'text');
     if (!textContent || !('text' in textContent)) {
         return lines;
     }
@@ -710,16 +778,22 @@ function extractCommonConstraints(profiles) {
     const constraintCounts = new Map();
     const toolConstraints = new Map();
     const normalizeConstraint = (c) => {
-        return c.toLowerCase()
-            .replace(/['"`]/g, '')
-            .replace(/\s+/g, ' ')
-            .trim();
+        return c.toLowerCase().replace(/['"`]/g, '').replace(/\s+/g, ' ').trim();
     };
     // Common patterns that should be grouped
     const commonPatterns = [
-        { pattern: /directory.*restriction|access.*control|allowed.*director|within allowed/i, label: 'Directory access restrictions apply' },
-        { pattern: /path.*restriction|access denied.*path|outside.*allowed/i, label: 'Path access is restricted' },
-        { pattern: /requires.*parameter|parameter.*required|missing.*parameter/i, label: 'Validates required parameters' },
+        {
+            pattern: /directory.*restriction|access.*control|allowed.*director|within allowed/i,
+            label: 'Directory access restrictions apply',
+        },
+        {
+            pattern: /path.*restriction|access denied.*path|outside.*allowed/i,
+            label: 'Path access is restricted',
+        },
+        {
+            pattern: /requires.*parameter|parameter.*required|missing.*parameter/i,
+            label: 'Validates required parameters',
+        },
     ];
     for (const profile of profiles) {
         const toolSpecific = [];
@@ -767,11 +841,11 @@ function extractCommonConstraints(profiles) {
     // Remove common constraints from per-tool lists
     if (common.length > 0) {
         for (const [toolName, constraints] of toolConstraints) {
-            const filtered = constraints.filter(c => {
+            const filtered = constraints.filter((c) => {
                 const normalized = normalizeConstraint(c);
                 // Keep if it's tool-specific
-                return !common.some(common => normalizeConstraint(common) === normalized ||
-                    commonPatterns.some(p => p.label === common && p.pattern.test(c)));
+                return !common.some((common) => normalizeConstraint(common) === normalized ||
+                    commonPatterns.some((p) => p.label === common && p.pattern.test(c)));
             });
             toolConstraints.set(toolName, filtered);
         }
@@ -815,9 +889,9 @@ function generateSecuritySection(profiles) {
     lines.push('## Security Considerations');
     lines.push('');
     // Group by severity
-    const critical = securityFindings.filter(f => f.severity === 'critical');
-    const warnings = securityFindings.filter(f => f.severity === 'warning');
-    const info = securityFindings.filter(f => f.severity === 'info');
+    const critical = securityFindings.filter((f) => f.severity === 'critical');
+    const warnings = securityFindings.filter((f) => f.severity === 'warning');
+    const info = securityFindings.filter((f) => f.severity === 'info');
     if (critical.length > 0) {
         lines.push('### Critical Issues');
         lines.push('');
@@ -849,12 +923,28 @@ function generateSecuritySection(profiles) {
  */
 function classifySecuritySeverity(note) {
     const lowerNote = note.toLowerCase();
-    const criticalKeywords = ['injection', 'rce', 'remote code', 'arbitrary code', 'command execution', 'sql injection', 'xss'];
-    const warningKeywords = ['risk', 'vulnerab', 'dangerous', 'unsafe', 'leak', 'exposure', 'sensitive'];
-    if (criticalKeywords.some(kw => lowerNote.includes(kw))) {
+    const criticalKeywords = [
+        'injection',
+        'rce',
+        'remote code',
+        'arbitrary code',
+        'command execution',
+        'sql injection',
+        'xss',
+    ];
+    const warningKeywords = [
+        'risk',
+        'vulnerab',
+        'dangerous',
+        'unsafe',
+        'leak',
+        'exposure',
+        'sensitive',
+    ];
+    if (criticalKeywords.some((kw) => lowerNote.includes(kw))) {
         return 'critical';
     }
-    if (warningKeywords.some(kw => lowerNote.includes(kw))) {
+    if (warningKeywords.some((kw) => lowerNote.includes(kw))) {
         return 'warning';
     }
     return 'info';
@@ -882,7 +972,7 @@ function generatePerformanceSection(profiles) {
     }
     lines.push('');
     // Add timing breakdown if separate timing data is available
-    const metricsWithBreakdown = metrics.filter(m => m.avgToolMs !== undefined && m.avgAnalysisMs !== undefined);
+    const metricsWithBreakdown = metrics.filter((m) => m.avgToolMs !== undefined && m.avgAnalysisMs !== undefined);
     if (metricsWithBreakdown.length > 0) {
         lines.push('### Timing Breakdown');
         lines.push('');
@@ -891,19 +981,17 @@ function generatePerformanceSection(profiles) {
         lines.push('| Tool | Total Avg | Tool Exec | LLM Analysis | Tool % |');
         lines.push('|------|-----------|-----------|--------------|--------|');
         for (const m of metricsWithBreakdown) {
-            const toolPct = m.avgToolMs !== undefined && m.avgMs > 0
-                ? Math.round((m.avgToolMs / m.avgMs) * 100)
-                : 0;
+            const toolPct = m.avgToolMs !== undefined && m.avgMs > 0 ? Math.round((m.avgToolMs / m.avgMs) * 100) : 0;
             lines.push(`| \`${escapeTableCell(m.toolName)}\` | ${m.avgMs}ms | ${m.avgToolMs}ms | ${m.avgAnalysisMs}ms | ${toolPct}% |`);
         }
         lines.push('');
     }
     // Add performance insights
-    const slowTools = metrics.filter(m => m.avgMs > 1000);
-    const unreliableTools = metrics.filter(m => m.errorRate > 0.3);
+    const slowTools = metrics.filter((m) => m.avgMs > 1000);
+    const unreliableTools = metrics.filter((m) => m.errorRate > 0.3);
     // Identify tools where LLM analysis dominates (>70% of total time)
-    const llmDominatedTools = metricsWithBreakdown.filter(m => {
-        const toolPct = m.avgToolMs !== undefined && m.avgMs > 0 ? (m.avgToolMs / m.avgMs) : 0;
+    const llmDominatedTools = metricsWithBreakdown.filter((m) => {
+        const toolPct = m.avgToolMs !== undefined && m.avgMs > 0 ? m.avgToolMs / m.avgMs : 0;
         return toolPct < 0.3; // Tool execution is < 30% means LLM is > 70%
     });
     if (slowTools.length > 0 || unreliableTools.length > 0 || llmDominatedTools.length > 0) {
@@ -948,7 +1036,7 @@ function generatePerformanceSection(profiles) {
 function generateBehavioralMatrix(profiles, personas) {
     const lines = [];
     // Check if we have findings by persona
-    const hasPersonaFindings = profiles.some(p => p.findingsByPersona && p.findingsByPersona.length > 0);
+    const hasPersonaFindings = profiles.some((p) => p.findingsByPersona && p.findingsByPersona.length > 0);
     if (!hasPersonaFindings) {
         return [];
     }
@@ -957,14 +1045,14 @@ function generateBehavioralMatrix(profiles, personas) {
     lines.push('Summary of findings by tool and persona:');
     lines.push('');
     // Build header - escape persona names in case they contain special characters
-    const header = ['Tool', ...personas.map(p => escapeTableCell(p.name))];
+    const header = ['Tool', ...personas.map((p) => escapeTableCell(p.name))];
     lines.push(`| ${header.join(' | ')} |`);
     lines.push(`| ${header.map(() => '---').join(' | ')} |`);
     // Build rows
     for (const profile of profiles) {
         const row = [escapeTableCell(profile.name)];
         for (const persona of personas) {
-            const findings = profile.findingsByPersona?.find(f => f.personaId === persona.id);
+            const findings = profile.findingsByPersona?.find((f) => f.personaId === persona.id);
             if (findings) {
                 const count = findings.behavioralNotes.length +
                     findings.limitations.length +

package/dist/docs/contract.js CHANGED Viewed

@@ -5,7 +5,8 @@ import { formatDateISO, formatDuration, escapeTableCell, mermaidLabel, validateJ
 import { smartTruncate, getExampleLength } from '../utils/smart-truncate.js';
 import { calculatePerformanceMetrics, extractParameters, looksLikeError } from './shared.js';
 import { analyzeExternalDependencies, formatExternalDependenciesMarkdown, } from '../baseline/external-dependency-detector.js';
-import { SEMANTIC_VALIDATION, SCHEMA_EVOLUTION, ERROR_ANALYSIS, PERFORMANCE_CONFIDENCE, DOCUMENTATION_SCORING, EXAMPLE_OUTPUT, EXTERNAL_DEPENDENCIES, RELIABILITY_DISPLAY, CONFIDENCE_INDICATORS, DISPLAY_LIMITS, ISSUE_CLASSIFICATION, } from '../constants.js';
+import { SEMANTIC_VALIDATION, SCHEMA_EVOLUTION, ERROR_ANALYSIS, PERFORMANCE_CONFIDENCE, DOCUMENTATION_SCORING, EXAMPLE_OUTPUT, EXTERNAL_DEPENDENCIES, RELIABILITY_DISPLAY, CONFIDENCE_INDICATORS, DISPLAY_LIMITS, ISSUE_CLASSIFICATION, MCP, } from '../constants.js';
+import { getFeatureFlags } from '../protocol/index.js';
 /**
  * Classify issues by their source to help users understand which issues
  * are actual bugs vs expected behavior or environment issues.
@@ -147,11 +148,20 @@ export function generateContractMd(result, options) {
     // Overview
     lines.push('## Overview');
     lines.push('');
+    const features = getFeatureFlags(discovery.protocolVersion);
     lines.push(`**Server Version:** ${discovery.serverInfo.version}`);
     lines.push(`**Protocol Version:** ${discovery.protocolVersion}`);
+    if (discovery.protocolVersion !== MCP.PROTOCOL_VERSION) {
+        lines.push(`*(Server protocol; bellwether supports up to ${MCP.PROTOCOL_VERSION})*`);
+    }
     lines.push('');
     const performanceMetrics = calculatePerformanceMetrics(toolProfiles);
     const performanceByTool = new Map(performanceMetrics.map((metric) => [metric.toolName, metric]));
+    // Server instructions
+    if (discovery.instructions) {
+        lines.push(`**Server Instructions:** ${discovery.instructions}`);
+        lines.push('');
+    }
     // Capabilities summary
     lines.push('## Capabilities');
     lines.push('');
@@ -164,6 +174,15 @@ export function generateContractMd(result, options) {
     if (discovery.capabilities.resources) {
         lines.push(`- **Resources:** ${(discovery.resources ?? []).length} available`);
     }
+    if (discovery.resourceTemplates && discovery.resourceTemplates.length > 0) {
+        lines.push(`- **Resource Templates:** ${discovery.resourceTemplates.length} available`);
+    }
+    if (discovery.capabilities.completions && features.completions) {
+        lines.push('- **Completions:** Supported');
+    }
+    if (discovery.capabilities.tasks && features.tasks) {
+        lines.push('- **Tasks:** Supported');
+    }
     if (discovery.capabilities.logging) {
         lines.push('- **Logging:** Supported');
     }
@@ -321,6 +340,22 @@ export function generateContractMd(result, options) {
                     lines.push('');
                 }
             }
+            // Show tool annotations (behavioral hints) — version-gated
+            if (features.toolAnnotations && tool.annotations) {
+                const hints = [];
+                if (tool.annotations.readOnlyHint)
+                    hints.push('read-only');
+                if (tool.annotations.destructiveHint)
+                    hints.push('destructive');
+                if (tool.annotations.idempotentHint)
+                    hints.push('idempotent');
+                if (tool.annotations.openWorldHint)
+                    hints.push('open-world');
+                if (hints.length > 0) {
+                    lines.push(`**Behavioral Hints:** ${hints.join(', ')}`);
+                    lines.push('');
+                }
+            }
             if (tool.inputSchema) {
                 lines.push('**Input Schema:**');
                 const schemaJson = validateJsonForCodeBlock(tool.inputSchema);
@@ -329,6 +364,15 @@ export function generateContractMd(result, options) {
                 lines.push('```');
                 lines.push('');
             }
+            // Show output schema if present — version-gated
+            if (features.structuredOutput && tool.outputSchema) {
+                lines.push('**Output Schema:**');
+                const outputSchemaJson = validateJsonForCodeBlock(tool.outputSchema);
+                lines.push('```json');
+                lines.push(outputSchemaJson.content);
+                lines.push('```');
+                lines.push('');
+            }
             // Add example usage from successful interactions
             const examples = generateToolExamples(profile, maxExamplesPerTool, exampleLength);
             if (examples.length > 0) {
@@ -380,6 +424,24 @@ export function generateContractMd(result, options) {
             }
         }
     }
+    // Resource Templates section
+    if (discovery.resourceTemplates && discovery.resourceTemplates.length > 0) {
+        lines.push('## Resource Templates');
+        lines.push('');
+        for (const template of discovery.resourceTemplates) {
+            lines.push(`### ${template.name}`);
+            lines.push('');
+            lines.push(`**URI Template:** \`${template.uriTemplate}\``);
+            if (template.mimeType) {
+                lines.push(`**MIME Type:** ${template.mimeType}`);
+            }
+            lines.push('');
+            if (template.description) {
+                lines.push(template.description);
+                lines.push('');
+            }
+        }
+    }
     // Error Summary section
     const errorSummary = generateErrorSummarySection(toolProfiles);
     if (errorSummary.length > 0) {

package/dist/errors/retry.js CHANGED Viewed

@@ -166,11 +166,15 @@ export const LLM_RETRY_OPTIONS = {
             return true;
         }
         // Auth errors - don't retry
-        if (message.includes('401') || message.includes('unauthorized') || message.includes('api key')) {
+        if (message.includes('401') ||
+            message.includes('unauthorized') ||
+            message.includes('api key')) {
             return false;
         }
         // Quota errors - don't retry
-        if (message.includes('quota') || message.includes('insufficient') || message.includes('credit')) {
+        if (message.includes('quota') ||
+            message.includes('insufficient') ||
+            message.includes('credit')) {
             return false;
         }
         // Default: use standard isRetryable
@@ -256,9 +260,7 @@ export function createCircuitBreaker(name, options = {}) {
         // Check if circuit is open
         if (state.isOpen) {
             const now = new Date();
-            const timeSinceOpen = state.openedAt
-                ? now.getTime() - state.openedAt.getTime()
-                : 0;
+            const timeSinceOpen = state.openedAt ? now.getTime() - state.openedAt.getTime() : 0;
             if (timeSinceOpen < resetTimeMs) {
                 // Still in open state
                 throw new BellwetherError(`Circuit breaker '${name}' is open`, {
@@ -306,6 +308,10 @@ export function createCircuitBreaker(name, options = {}) {
             }
             state.failures++;
             state.lastFailure = now;
+            // If half-open test failed, reset openedAt to restart cooldown timer
+            if (state.isOpen) {
+                state.openedAt = now;
+            }
             // Check if we should open the circuit
             if (state.failures >= failureThreshold && !state.isOpen) {
                 state.isOpen = true;

package/dist/interview/rate-limiter.js CHANGED Viewed

@@ -8,6 +8,12 @@ export class RateLimiter {
     lastRefill;
     constructor(config) {
         this.config = config;
+        if (config.requestsPerSecond <= 0) {
+            throw new Error(`requestsPerSecond must be positive, got ${config.requestsPerSecond}`);
+        }
+        if (config.burstLimit <= 0) {
+            throw new Error(`burstLimit must be positive, got ${config.burstLimit}`);
+        }
         this.tokens = config.burstLimit;
         this.lastRefill = Date.now();
     }
@@ -45,9 +51,7 @@ export function calculateBackoffMs(attempt, strategy) {
     const baseDelay = RATE_LIMITING.BASE_DELAY_MS;
     const maxDelay = RATE_LIMITING.MAX_DELAY_MS;
     const jitter = RATE_LIMITING.JITTER_RATIO;
-    const rawDelay = strategy === 'linear'
-        ? baseDelay * (attempt + 1)
-        : baseDelay * Math.pow(2, attempt);
+    const rawDelay = strategy === 'linear' ? baseDelay * (attempt + 1) : baseDelay * Math.pow(2, attempt);
     const capped = Math.min(rawDelay, maxDelay);
     const jitterDelta = capped * jitter * (Math.random() - 0.5) * 2;
     return Math.max(0, Math.round(capped + jitterDelta));

package/dist/llm/anthropic.js CHANGED Viewed

@@ -334,11 +334,17 @@ export class AnthropicClient {
                 }
                 // Convert to typed errors for retry logic (same as chat method)
                 if (error instanceof Error) {
-                    const message = error.message.toLowerCase();
-                    if (message.includes('401') || message.includes('authentication')) {
+                    const status = getErrorStatus(error);
+                    const code = (getErrorCode(error) ?? '').toLowerCase();
+                    const type = (getErrorType(error) ?? '').toLowerCase();
+                    const message = getErrorMessage(error).toLowerCase();
+                    if (status === 401 || status === 403 || message.includes('authentication')) {
                         throw new LLMAuthError('anthropic', model);
                     }
-                    if (message.includes('429') || message.includes('rate limit')) {
+                    if (status === 429 ||
+                        code.includes('rate_limit') ||
+                        type.includes('rate_limit') ||
+                        message.includes('rate limit')) {
                         let retryAfterMs;
                         const apiError = error;
                         if (apiError.headers?.get) {
@@ -361,7 +367,11 @@ export class AnthropicClient {
                         }
                         throw new LLMRateLimitError('anthropic', retryAfterMs, model);
                     }
-                    if (message.includes('insufficient') || message.includes('credit')) {
+                    if (status === 402 ||
+                        code.includes('insufficient') ||
+                        type.includes('insufficient') ||
+                        message.includes('insufficient') ||
+                        message.includes('credit')) {
                         throw new LLMQuotaError('anthropic', model);
                     }
                     if (message.includes('econnrefused') || message.includes('fetch failed')) {

package/dist/llm/fallback.d.ts CHANGED Viewed

@@ -69,6 +69,7 @@ export declare class FallbackLLMClient implements LLMClient {
     /**
      * Check if a provider is currently healthy.
      */
+    private healthCheckInProgress;
     private isProviderHealthy;
     /**
      * Mark a provider as failed.

package/dist/llm/fallback.js CHANGED Viewed

@@ -60,6 +60,7 @@ export class FallbackLLMClient {
             try {
                 const ollamaClient = new OllamaClient({
                     model: this.config.ollamaModel,
+                    onUsage: this.config.onUsage,
                 });
                 this.clients.set('ollama', ollamaClient);
                 this.providerOrder.push('ollama');
@@ -112,18 +113,23 @@ export class FallbackLLMClient {
     /**
      * Check if a provider is currently healthy.
      */
+    healthCheckInProgress = false;
     isProviderHealthy(providerId) {
         const health = this.health.get(providerId);
         if (!health)
             return false;
         // If marked unhealthy, check if retry delay has passed
-        if (!health.healthy) {
+        if (!health.healthy && !this.healthCheckInProgress) {
             const timeSinceCheck = Date.now() - health.lastChecked.getTime();
             if (timeSinceCheck >= this.config.unhealthyRetryDelayMs) {
+                // Prevent concurrent health resets
+                this.healthCheckInProgress = true;
                 // Reset to allow retry
                 health.healthy = true;
                 health.consecutiveFailures = 0;
+                health.lastChecked = new Date();
                 logger.info({ provider: providerId }, 'Resetting unhealthy provider for retry');
+                this.healthCheckInProgress = false;
             }
         }
         return health.healthy;