@dotsetlabs/bellwether 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/CHANGELOG.md +44 -0
  2. package/README.md +9 -0
  3. package/dist/auth/credentials.js +2 -0
  4. package/dist/baseline/accessors.js +12 -0
  5. package/dist/baseline/baseline-format.d.ts +48 -0
  6. package/dist/baseline/comparator.js +263 -20
  7. package/dist/baseline/converter.js +52 -4
  8. package/dist/baseline/response-fingerprint.js +1 -1
  9. package/dist/baseline/saver.js +34 -0
  10. package/dist/baseline/types.d.ts +21 -1
  11. package/dist/cache/response-cache.js +9 -2
  12. package/dist/cli/commands/baseline.js +70 -35
  13. package/dist/cli/commands/check.js +48 -9
  14. package/dist/cli/commands/explore.js +36 -3
  15. package/dist/cli/commands/init.js +10 -7
  16. package/dist/cli/commands/watch.js +5 -5
  17. package/dist/config/loader.js +2 -2
  18. package/dist/constants/core.d.ts +1 -1
  19. package/dist/constants/core.js +1 -1
  20. package/dist/discovery/discovery.js +88 -14
  21. package/dist/discovery/types.d.ts +5 -1
  22. package/dist/docs/agents.js +138 -50
  23. package/dist/docs/contract.js +63 -1
  24. package/dist/errors/retry.js +11 -5
  25. package/dist/interview/rate-limiter.js +7 -3
  26. package/dist/llm/anthropic.js +14 -4
  27. package/dist/llm/fallback.d.ts +1 -0
  28. package/dist/llm/fallback.js +7 -1
  29. package/dist/llm/openai.js +15 -4
  30. package/dist/protocol/index.d.ts +2 -0
  31. package/dist/protocol/index.js +2 -0
  32. package/dist/protocol/version-registry.d.ts +66 -0
  33. package/dist/protocol/version-registry.js +159 -0
  34. package/dist/transport/http-transport.d.ts +11 -1
  35. package/dist/transport/http-transport.js +21 -2
  36. package/dist/transport/mcp-client.d.ts +29 -1
  37. package/dist/transport/mcp-client.js +92 -7
  38. package/dist/transport/sse-transport.js +5 -4
  39. package/dist/transport/types.d.ts +134 -1
  40. package/dist/utils/concurrency.d.ts +2 -0
  41. package/dist/utils/concurrency.js +9 -2
  42. package/dist/utils/markdown.js +13 -18
  43. package/dist/utils/timeout.js +2 -1
  44. package/dist/version.js +1 -1
  45. package/man/bellwether.1 +1 -1
  46. package/man/bellwether.1.md +2 -2
  47. package/package.json +1 -1
@@ -1,6 +1,7 @@
1
- import { formatDateISO, formatDuration, escapeTableCell, mermaidLabel, validateJsonForCodeBlock } from '../utils/index.js';
2
- import { DISPLAY_LIMITS, MATH_FACTORS } from '../constants.js';
1
+ import { formatDateISO, formatDuration, escapeTableCell, mermaidLabel, validateJsonForCodeBlock, } from '../utils/index.js';
2
+ import { DISPLAY_LIMITS, MATH_FACTORS, MCP } from '../constants.js';
3
3
  import { calculatePerformanceMetrics, extractParameters, looksLikeError } from './shared.js';
4
+ import { getFeatureFlags } from '../protocol/index.js';
4
5
  /**
5
6
  * Detect configuration issues based on error patterns.
6
7
  * Returns a warning message if issues detected, null otherwise.
@@ -17,7 +18,7 @@ function detectConfigurationIssues(profiles, metadata) {
17
18
  for (const profile of profiles) {
18
19
  for (const interaction of profile.interactions) {
19
20
  // Get response text content
20
- const textContent = interaction.response?.content?.find(c => c.type === 'text');
21
+ const textContent = interaction.response?.content?.find((c) => c.type === 'text');
21
22
  const responseText = textContent && 'text' in textContent ? String(textContent.text) : '';
22
23
  const errorText = interaction.error || '';
23
24
  const combined = errorText + responseText;
@@ -33,8 +34,8 @@ function detectConfigurationIssues(profiles, metadata) {
33
34
  }
34
35
  // If we found access-related errors, show warning
35
36
  if (totalErrors > 0 && accessErrors / totalErrors > 0.5) {
36
- return 'Most tool calls failed with access-related errors. The server may not have been configured with allowed directories. ' +
37
- 'For filesystem servers, try: `bellwether check npx @modelcontextprotocol/server-filesystem /path/to/allowed/dir`';
37
+ return ('Most tool calls failed with access-related errors. The server may not have been configured with allowed directories. ' +
38
+ 'For filesystem servers, try: `bellwether check npx @modelcontextprotocol/server-filesystem /path/to/allowed/dir`');
38
39
  }
39
40
  // Also check synthesized limitations for access patterns (fallback)
40
41
  let accessRelatedLimitations = 0;
@@ -48,9 +49,11 @@ function detectConfigurationIssues(profiles, metadata) {
48
49
  }
49
50
  }
50
51
  // If most tools have access-related limitations and high error rate, show warning
51
- if (totalLimitations > 0 && accessRelatedLimitations / totalLimitations > 0.5 && errorRate > 0.8) {
52
- return 'Most tool calls failed, likely due to missing allowed directories configuration. ' +
53
- 'For filesystem servers, try: `bellwether check npx @modelcontextprotocol/server-filesystem /path/to/allowed/dir`';
52
+ if (totalLimitations > 0 &&
53
+ accessRelatedLimitations / totalLimitations > 0.5 &&
54
+ errorRate > 0.8) {
55
+ return ('Most tool calls failed, likely due to missing allowed directories configuration. ' +
56
+ 'For filesystem servers, try: `bellwether check npx @modelcontextprotocol/server-filesystem /path/to/allowed/dir`');
54
57
  }
55
58
  return null;
56
59
  }
@@ -75,14 +78,25 @@ export function generateAgentsMd(result) {
75
78
  lines.push('');
76
79
  lines.push(summary);
77
80
  lines.push('');
81
+ const features = getFeatureFlags(discovery.protocolVersion);
78
82
  lines.push(`**Server Version:** ${discovery.serverInfo.version}`);
79
83
  lines.push(`**Protocol Version:** ${discovery.protocolVersion}`);
84
+ if (discovery.protocolVersion !== MCP.PROTOCOL_VERSION) {
85
+ lines.push(`*(Server protocol; bellwether supports up to ${MCP.PROTOCOL_VERSION})*`);
86
+ }
80
87
  // Show personas used
81
88
  if (metadata.personas && metadata.personas.length > 0) {
82
- const personaNames = metadata.personas.map(p => p.name).join(', ');
89
+ const personaNames = metadata.personas.map((p) => p.name).join(', ');
83
90
  lines.push(`**Interview Personas:** ${personaNames}`);
84
91
  }
85
92
  lines.push('');
93
+ // Server instructions
94
+ if (discovery.instructions) {
95
+ lines.push('## Server Instructions');
96
+ lines.push('');
97
+ lines.push(discovery.instructions);
98
+ lines.push('');
99
+ }
86
100
  // Capabilities summary
87
101
  lines.push('## Capabilities');
88
102
  lines.push('');
@@ -95,6 +109,15 @@ export function generateAgentsMd(result) {
95
109
  if (discovery.capabilities.resources) {
96
110
  lines.push(`- **Resources:** ${(discovery.resources ?? []).length} available`);
97
111
  }
112
+ if (discovery.resourceTemplates && discovery.resourceTemplates.length > 0) {
113
+ lines.push(`- **Resource Templates:** ${discovery.resourceTemplates.length} available`);
114
+ }
115
+ if (discovery.capabilities.completions && features.completions) {
116
+ lines.push('- **Completions:** Supported');
117
+ }
118
+ if (discovery.capabilities.tasks && features.tasks) {
119
+ lines.push('- **Tasks:** Supported');
120
+ }
98
121
  if (discovery.capabilities.logging) {
99
122
  lines.push('- **Logging:** Supported');
100
123
  }
@@ -129,7 +152,23 @@ export function generateAgentsMd(result) {
129
152
  lines.push(profile.description);
130
153
  lines.push('');
131
154
  // Find the original tool to get schema
132
- const tool = discovery.tools.find(t => t.name === profile.name);
155
+ const tool = discovery.tools.find((t) => t.name === profile.name);
156
+ // Show tool annotations (behavioral hints) — version-gated
157
+ if (features.toolAnnotations && tool?.annotations) {
158
+ const hints = [];
159
+ if (tool.annotations.readOnlyHint)
160
+ hints.push('read-only');
161
+ if (tool.annotations.destructiveHint)
162
+ hints.push('destructive');
163
+ if (tool.annotations.idempotentHint)
164
+ hints.push('idempotent');
165
+ if (tool.annotations.openWorldHint)
166
+ hints.push('open-world');
167
+ if (hints.length > 0) {
168
+ lines.push(`**Behavioral Hints:** ${hints.join(', ')}`);
169
+ lines.push('');
170
+ }
171
+ }
133
172
  if (tool?.inputSchema) {
134
173
  lines.push('**Input Schema:**');
135
174
  // Validate JSON and escape for code block
@@ -139,6 +178,15 @@ export function generateAgentsMd(result) {
139
178
  lines.push('```');
140
179
  lines.push('');
141
180
  }
181
+ // Show output schema if present — version-gated
182
+ if (features.structuredOutput && tool?.outputSchema) {
183
+ lines.push('**Output Schema:**');
184
+ const outputSchemaJson = validateJsonForCodeBlock(tool.outputSchema);
185
+ lines.push('```json');
186
+ lines.push(outputSchemaJson.content);
187
+ lines.push('```');
188
+ lines.push('');
189
+ }
142
190
  // Add sample response if we have successful calls
143
191
  const sampleResponse = generateSampleResponse(profile);
144
192
  if (sampleResponse.length > 0) {
@@ -203,14 +251,14 @@ export function generateAgentsMd(result) {
203
251
  }
204
252
  // Common Workflows section (summarized view of successful workflows)
205
253
  if (result.workflowResults && result.workflowResults.length > 0) {
206
- const successfulWorkflows = result.workflowResults.filter(wr => wr.success);
254
+ const successfulWorkflows = result.workflowResults.filter((wr) => wr.success);
207
255
  if (successfulWorkflows.length > 0) {
208
256
  lines.push('## Common Workflows');
209
257
  lines.push('');
210
258
  lines.push('These workflows demonstrate recommended patterns for using tools together:');
211
259
  lines.push('');
212
260
  for (const wr of successfulWorkflows) {
213
- const toolSequence = wr.workflow.steps.map(s => `\`${s.tool}\``).join(' → ');
261
+ const toolSequence = wr.workflow.steps.map((s) => `\`${s.tool}\``).join(' → ');
214
262
  lines.push(`### ${wr.workflow.name}`);
215
263
  lines.push('');
216
264
  lines.push(wr.workflow.description);
@@ -418,6 +466,26 @@ export function generateAgentsMd(result) {
418
466
  }
419
467
  }
420
468
  }
469
+ // Resource Templates section
470
+ if (discovery.resourceTemplates && discovery.resourceTemplates.length > 0) {
471
+ lines.push('## Resource Templates');
472
+ lines.push('');
473
+ lines.push('Resource templates define URI patterns for dynamically-generated resources.');
474
+ lines.push('');
475
+ for (const template of discovery.resourceTemplates) {
476
+ lines.push(`### ${template.name}`);
477
+ lines.push('');
478
+ lines.push(`**URI Template:** \`${template.uriTemplate}\``);
479
+ if (template.mimeType) {
480
+ lines.push(`**MIME Type:** ${template.mimeType}`);
481
+ }
482
+ lines.push('');
483
+ if (template.description) {
484
+ lines.push(template.description);
485
+ lines.push('');
486
+ }
487
+ }
488
+ }
421
489
  // Overall limitations
422
490
  if (limitations.length > 0) {
423
491
  lines.push('## Known Limitations');
@@ -485,7 +553,7 @@ function generateQuickReference(tools, profiles) {
485
553
  lines.push('|------|------------|---------|');
486
554
  for (const tool of tools) {
487
555
  const params = extractParameters(tool.inputSchema);
488
- const profile = profiles.find(p => p.name === tool.name);
556
+ const profile = profiles.find((p) => p.name === tool.name);
489
557
  const returnType = inferReturnTypeDetailed(profile);
490
558
  // Escape table cell content to prevent broken tables
491
559
  lines.push(`| \`${escapeTableCell(tool.name)}\` | ${escapeTableCell(params)} | ${escapeTableCell(returnType)} |`);
@@ -494,7 +562,7 @@ function generateQuickReference(tools, profiles) {
494
562
  // Only add example section if we have at least one successful example
495
563
  const successfulExamples = [];
496
564
  for (const tool of tools) {
497
- const profile = profiles.find(p => p.name === tool.name);
565
+ const profile = profiles.find((p) => p.name === tool.name);
498
566
  const example = generateExampleSnippet(tool, profile);
499
567
  if (example) {
500
568
  successfulExamples.push({ tool, example });
@@ -554,10 +622,10 @@ function inferReturnTypeDetailed(profile) {
554
622
  return 'unknown';
555
623
  }
556
624
  // Look at successful interactions that don't have error-like content
557
- const successful = profile.interactions.find(i => {
625
+ const successful = profile.interactions.find((i) => {
558
626
  if (i.error || !i.response || i.response.isError)
559
627
  return false;
560
- const textContent = i.response.content?.find(c => c.type === 'text');
628
+ const textContent = i.response.content?.find((c) => c.type === 'text');
561
629
  if (textContent && 'text' in textContent) {
562
630
  if (looksLikeError(String(textContent.text)))
563
631
  return false;
@@ -577,7 +645,7 @@ function inferReturnTypeDetailed(profile) {
577
645
  return 'empty response';
578
646
  }
579
647
  // Check content types
580
- const types = new Set(content.map(c => c.type));
648
+ const types = new Set(content.map((c) => c.type));
581
649
  if (types.size === 1) {
582
650
  const type = content[0].type;
583
651
  if (type === 'text') {
@@ -634,10 +702,10 @@ function generateExampleSnippet(tool, profile) {
634
702
  return null;
635
703
  }
636
704
  // Find a successful interaction (not an error) that doesn't have error-like content
637
- const successful = profile.interactions.find(i => {
705
+ const successful = profile.interactions.find((i) => {
638
706
  if (i.error || !i.response || i.response.isError)
639
707
  return false;
640
- const textContent = i.response.content?.find(c => c.type === 'text');
708
+ const textContent = i.response.content?.find((c) => c.type === 'text');
641
709
  if (textContent && 'text' in textContent) {
642
710
  if (looksLikeError(String(textContent.text)))
643
711
  return false;
@@ -659,11 +727,11 @@ function generateExampleSnippet(tool, profile) {
659
727
  function generateSampleResponse(profile) {
660
728
  const lines = [];
661
729
  // Find a successful interaction with a response that doesn't look like an error
662
- const successful = profile.interactions.find(i => {
730
+ const successful = profile.interactions.find((i) => {
663
731
  if (i.error || !i.response || i.response.isError)
664
732
  return false;
665
733
  // Also check if the response content looks like an error
666
- const textContent = i.response.content?.find(c => c.type === 'text');
734
+ const textContent = i.response.content?.find((c) => c.type === 'text');
667
735
  if (textContent && 'text' in textContent) {
668
736
  if (looksLikeError(String(textContent.text)))
669
737
  return false;
@@ -673,7 +741,7 @@ function generateSampleResponse(profile) {
673
741
  if (!successful || !successful.response?.content) {
674
742
  return lines;
675
743
  }
676
- const textContent = successful.response.content.find(c => c.type === 'text');
744
+ const textContent = successful.response.content.find((c) => c.type === 'text');
677
745
  if (!textContent || !('text' in textContent)) {
678
746
  return lines;
679
747
  }
@@ -710,16 +778,22 @@ function extractCommonConstraints(profiles) {
710
778
  const constraintCounts = new Map();
711
779
  const toolConstraints = new Map();
712
780
  const normalizeConstraint = (c) => {
713
- return c.toLowerCase()
714
- .replace(/['"`]/g, '')
715
- .replace(/\s+/g, ' ')
716
- .trim();
781
+ return c.toLowerCase().replace(/['"`]/g, '').replace(/\s+/g, ' ').trim();
717
782
  };
718
783
  // Common patterns that should be grouped
719
784
  const commonPatterns = [
720
- { pattern: /directory.*restriction|access.*control|allowed.*director|within allowed/i, label: 'Directory access restrictions apply' },
721
- { pattern: /path.*restriction|access denied.*path|outside.*allowed/i, label: 'Path access is restricted' },
722
- { pattern: /requires.*parameter|parameter.*required|missing.*parameter/i, label: 'Validates required parameters' },
785
+ {
786
+ pattern: /directory.*restriction|access.*control|allowed.*director|within allowed/i,
787
+ label: 'Directory access restrictions apply',
788
+ },
789
+ {
790
+ pattern: /path.*restriction|access denied.*path|outside.*allowed/i,
791
+ label: 'Path access is restricted',
792
+ },
793
+ {
794
+ pattern: /requires.*parameter|parameter.*required|missing.*parameter/i,
795
+ label: 'Validates required parameters',
796
+ },
723
797
  ];
724
798
  for (const profile of profiles) {
725
799
  const toolSpecific = [];
@@ -767,11 +841,11 @@ function extractCommonConstraints(profiles) {
767
841
  // Remove common constraints from per-tool lists
768
842
  if (common.length > 0) {
769
843
  for (const [toolName, constraints] of toolConstraints) {
770
- const filtered = constraints.filter(c => {
844
+ const filtered = constraints.filter((c) => {
771
845
  const normalized = normalizeConstraint(c);
772
846
  // Keep if it's tool-specific
773
- return !common.some(common => normalizeConstraint(common) === normalized ||
774
- commonPatterns.some(p => p.label === common && p.pattern.test(c)));
847
+ return !common.some((common) => normalizeConstraint(common) === normalized ||
848
+ commonPatterns.some((p) => p.label === common && p.pattern.test(c)));
775
849
  });
776
850
  toolConstraints.set(toolName, filtered);
777
851
  }
@@ -815,9 +889,9 @@ function generateSecuritySection(profiles) {
815
889
  lines.push('## Security Considerations');
816
890
  lines.push('');
817
891
  // Group by severity
818
- const critical = securityFindings.filter(f => f.severity === 'critical');
819
- const warnings = securityFindings.filter(f => f.severity === 'warning');
820
- const info = securityFindings.filter(f => f.severity === 'info');
892
+ const critical = securityFindings.filter((f) => f.severity === 'critical');
893
+ const warnings = securityFindings.filter((f) => f.severity === 'warning');
894
+ const info = securityFindings.filter((f) => f.severity === 'info');
821
895
  if (critical.length > 0) {
822
896
  lines.push('### Critical Issues');
823
897
  lines.push('');
@@ -849,12 +923,28 @@ function generateSecuritySection(profiles) {
849
923
  */
850
924
  function classifySecuritySeverity(note) {
851
925
  const lowerNote = note.toLowerCase();
852
- const criticalKeywords = ['injection', 'rce', 'remote code', 'arbitrary code', 'command execution', 'sql injection', 'xss'];
853
- const warningKeywords = ['risk', 'vulnerab', 'dangerous', 'unsafe', 'leak', 'exposure', 'sensitive'];
854
- if (criticalKeywords.some(kw => lowerNote.includes(kw))) {
926
+ const criticalKeywords = [
927
+ 'injection',
928
+ 'rce',
929
+ 'remote code',
930
+ 'arbitrary code',
931
+ 'command execution',
932
+ 'sql injection',
933
+ 'xss',
934
+ ];
935
+ const warningKeywords = [
936
+ 'risk',
937
+ 'vulnerab',
938
+ 'dangerous',
939
+ 'unsafe',
940
+ 'leak',
941
+ 'exposure',
942
+ 'sensitive',
943
+ ];
944
+ if (criticalKeywords.some((kw) => lowerNote.includes(kw))) {
855
945
  return 'critical';
856
946
  }
857
- if (warningKeywords.some(kw => lowerNote.includes(kw))) {
947
+ if (warningKeywords.some((kw) => lowerNote.includes(kw))) {
858
948
  return 'warning';
859
949
  }
860
950
  return 'info';
@@ -882,7 +972,7 @@ function generatePerformanceSection(profiles) {
882
972
  }
883
973
  lines.push('');
884
974
  // Add timing breakdown if separate timing data is available
885
- const metricsWithBreakdown = metrics.filter(m => m.avgToolMs !== undefined && m.avgAnalysisMs !== undefined);
975
+ const metricsWithBreakdown = metrics.filter((m) => m.avgToolMs !== undefined && m.avgAnalysisMs !== undefined);
886
976
  if (metricsWithBreakdown.length > 0) {
887
977
  lines.push('### Timing Breakdown');
888
978
  lines.push('');
@@ -891,19 +981,17 @@ function generatePerformanceSection(profiles) {
891
981
  lines.push('| Tool | Total Avg | Tool Exec | LLM Analysis | Tool % |');
892
982
  lines.push('|------|-----------|-----------|--------------|--------|');
893
983
  for (const m of metricsWithBreakdown) {
894
- const toolPct = m.avgToolMs !== undefined && m.avgMs > 0
895
- ? Math.round((m.avgToolMs / m.avgMs) * 100)
896
- : 0;
984
+ const toolPct = m.avgToolMs !== undefined && m.avgMs > 0 ? Math.round((m.avgToolMs / m.avgMs) * 100) : 0;
897
985
  lines.push(`| \`${escapeTableCell(m.toolName)}\` | ${m.avgMs}ms | ${m.avgToolMs}ms | ${m.avgAnalysisMs}ms | ${toolPct}% |`);
898
986
  }
899
987
  lines.push('');
900
988
  }
901
989
  // Add performance insights
902
- const slowTools = metrics.filter(m => m.avgMs > 1000);
903
- const unreliableTools = metrics.filter(m => m.errorRate > 0.3);
990
+ const slowTools = metrics.filter((m) => m.avgMs > 1000);
991
+ const unreliableTools = metrics.filter((m) => m.errorRate > 0.3);
904
992
  // Identify tools where LLM analysis dominates (>70% of total time)
905
- const llmDominatedTools = metricsWithBreakdown.filter(m => {
906
- const toolPct = m.avgToolMs !== undefined && m.avgMs > 0 ? (m.avgToolMs / m.avgMs) : 0;
993
+ const llmDominatedTools = metricsWithBreakdown.filter((m) => {
994
+ const toolPct = m.avgToolMs !== undefined && m.avgMs > 0 ? m.avgToolMs / m.avgMs : 0;
907
995
  return toolPct < 0.3; // Tool execution is < 30% means LLM is > 70%
908
996
  });
909
997
  if (slowTools.length > 0 || unreliableTools.length > 0 || llmDominatedTools.length > 0) {
@@ -948,7 +1036,7 @@ function generatePerformanceSection(profiles) {
948
1036
  function generateBehavioralMatrix(profiles, personas) {
949
1037
  const lines = [];
950
1038
  // Check if we have findings by persona
951
- const hasPersonaFindings = profiles.some(p => p.findingsByPersona && p.findingsByPersona.length > 0);
1039
+ const hasPersonaFindings = profiles.some((p) => p.findingsByPersona && p.findingsByPersona.length > 0);
952
1040
  if (!hasPersonaFindings) {
953
1041
  return [];
954
1042
  }
@@ -957,14 +1045,14 @@ function generateBehavioralMatrix(profiles, personas) {
957
1045
  lines.push('Summary of findings by tool and persona:');
958
1046
  lines.push('');
959
1047
  // Build header - escape persona names in case they contain special characters
960
- const header = ['Tool', ...personas.map(p => escapeTableCell(p.name))];
1048
+ const header = ['Tool', ...personas.map((p) => escapeTableCell(p.name))];
961
1049
  lines.push(`| ${header.join(' | ')} |`);
962
1050
  lines.push(`| ${header.map(() => '---').join(' | ')} |`);
963
1051
  // Build rows
964
1052
  for (const profile of profiles) {
965
1053
  const row = [escapeTableCell(profile.name)];
966
1054
  for (const persona of personas) {
967
- const findings = profile.findingsByPersona?.find(f => f.personaId === persona.id);
1055
+ const findings = profile.findingsByPersona?.find((f) => f.personaId === persona.id);
968
1056
  if (findings) {
969
1057
  const count = findings.behavioralNotes.length +
970
1058
  findings.limitations.length +
@@ -5,7 +5,8 @@ import { formatDateISO, formatDuration, escapeTableCell, mermaidLabel, validateJ
5
5
  import { smartTruncate, getExampleLength } from '../utils/smart-truncate.js';
6
6
  import { calculatePerformanceMetrics, extractParameters, looksLikeError } from './shared.js';
7
7
  import { analyzeExternalDependencies, formatExternalDependenciesMarkdown, } from '../baseline/external-dependency-detector.js';
8
- import { SEMANTIC_VALIDATION, SCHEMA_EVOLUTION, ERROR_ANALYSIS, PERFORMANCE_CONFIDENCE, DOCUMENTATION_SCORING, EXAMPLE_OUTPUT, EXTERNAL_DEPENDENCIES, RELIABILITY_DISPLAY, CONFIDENCE_INDICATORS, DISPLAY_LIMITS, ISSUE_CLASSIFICATION, } from '../constants.js';
8
+ import { SEMANTIC_VALIDATION, SCHEMA_EVOLUTION, ERROR_ANALYSIS, PERFORMANCE_CONFIDENCE, DOCUMENTATION_SCORING, EXAMPLE_OUTPUT, EXTERNAL_DEPENDENCIES, RELIABILITY_DISPLAY, CONFIDENCE_INDICATORS, DISPLAY_LIMITS, ISSUE_CLASSIFICATION, MCP, } from '../constants.js';
9
+ import { getFeatureFlags } from '../protocol/index.js';
9
10
  /**
10
11
  * Classify issues by their source to help users understand which issues
11
12
  * are actual bugs vs expected behavior or environment issues.
@@ -147,11 +148,20 @@ export function generateContractMd(result, options) {
147
148
  // Overview
148
149
  lines.push('## Overview');
149
150
  lines.push('');
151
+ const features = getFeatureFlags(discovery.protocolVersion);
150
152
  lines.push(`**Server Version:** ${discovery.serverInfo.version}`);
151
153
  lines.push(`**Protocol Version:** ${discovery.protocolVersion}`);
154
+ if (discovery.protocolVersion !== MCP.PROTOCOL_VERSION) {
155
+ lines.push(`*(Server protocol; bellwether supports up to ${MCP.PROTOCOL_VERSION})*`);
156
+ }
152
157
  lines.push('');
153
158
  const performanceMetrics = calculatePerformanceMetrics(toolProfiles);
154
159
  const performanceByTool = new Map(performanceMetrics.map((metric) => [metric.toolName, metric]));
160
+ // Server instructions
161
+ if (discovery.instructions) {
162
+ lines.push(`**Server Instructions:** ${discovery.instructions}`);
163
+ lines.push('');
164
+ }
155
165
  // Capabilities summary
156
166
  lines.push('## Capabilities');
157
167
  lines.push('');
@@ -164,6 +174,15 @@ export function generateContractMd(result, options) {
164
174
  if (discovery.capabilities.resources) {
165
175
  lines.push(`- **Resources:** ${(discovery.resources ?? []).length} available`);
166
176
  }
177
+ if (discovery.resourceTemplates && discovery.resourceTemplates.length > 0) {
178
+ lines.push(`- **Resource Templates:** ${discovery.resourceTemplates.length} available`);
179
+ }
180
+ if (discovery.capabilities.completions && features.completions) {
181
+ lines.push('- **Completions:** Supported');
182
+ }
183
+ if (discovery.capabilities.tasks && features.tasks) {
184
+ lines.push('- **Tasks:** Supported');
185
+ }
167
186
  if (discovery.capabilities.logging) {
168
187
  lines.push('- **Logging:** Supported');
169
188
  }
@@ -321,6 +340,22 @@ export function generateContractMd(result, options) {
321
340
  lines.push('');
322
341
  }
323
342
  }
343
+ // Show tool annotations (behavioral hints) — version-gated
344
+ if (features.toolAnnotations && tool.annotations) {
345
+ const hints = [];
346
+ if (tool.annotations.readOnlyHint)
347
+ hints.push('read-only');
348
+ if (tool.annotations.destructiveHint)
349
+ hints.push('destructive');
350
+ if (tool.annotations.idempotentHint)
351
+ hints.push('idempotent');
352
+ if (tool.annotations.openWorldHint)
353
+ hints.push('open-world');
354
+ if (hints.length > 0) {
355
+ lines.push(`**Behavioral Hints:** ${hints.join(', ')}`);
356
+ lines.push('');
357
+ }
358
+ }
324
359
  if (tool.inputSchema) {
325
360
  lines.push('**Input Schema:**');
326
361
  const schemaJson = validateJsonForCodeBlock(tool.inputSchema);
@@ -329,6 +364,15 @@ export function generateContractMd(result, options) {
329
364
  lines.push('```');
330
365
  lines.push('');
331
366
  }
367
+ // Show output schema if present — version-gated
368
+ if (features.structuredOutput && tool.outputSchema) {
369
+ lines.push('**Output Schema:**');
370
+ const outputSchemaJson = validateJsonForCodeBlock(tool.outputSchema);
371
+ lines.push('```json');
372
+ lines.push(outputSchemaJson.content);
373
+ lines.push('```');
374
+ lines.push('');
375
+ }
332
376
  // Add example usage from successful interactions
333
377
  const examples = generateToolExamples(profile, maxExamplesPerTool, exampleLength);
334
378
  if (examples.length > 0) {
@@ -380,6 +424,24 @@ export function generateContractMd(result, options) {
380
424
  }
381
425
  }
382
426
  }
427
+ // Resource Templates section
428
+ if (discovery.resourceTemplates && discovery.resourceTemplates.length > 0) {
429
+ lines.push('## Resource Templates');
430
+ lines.push('');
431
+ for (const template of discovery.resourceTemplates) {
432
+ lines.push(`### ${template.name}`);
433
+ lines.push('');
434
+ lines.push(`**URI Template:** \`${template.uriTemplate}\``);
435
+ if (template.mimeType) {
436
+ lines.push(`**MIME Type:** ${template.mimeType}`);
437
+ }
438
+ lines.push('');
439
+ if (template.description) {
440
+ lines.push(template.description);
441
+ lines.push('');
442
+ }
443
+ }
444
+ }
383
445
  // Error Summary section
384
446
  const errorSummary = generateErrorSummarySection(toolProfiles);
385
447
  if (errorSummary.length > 0) {
@@ -166,11 +166,15 @@ export const LLM_RETRY_OPTIONS = {
166
166
  return true;
167
167
  }
168
168
  // Auth errors - don't retry
169
- if (message.includes('401') || message.includes('unauthorized') || message.includes('api key')) {
169
+ if (message.includes('401') ||
170
+ message.includes('unauthorized') ||
171
+ message.includes('api key')) {
170
172
  return false;
171
173
  }
172
174
  // Quota errors - don't retry
173
- if (message.includes('quota') || message.includes('insufficient') || message.includes('credit')) {
175
+ if (message.includes('quota') ||
176
+ message.includes('insufficient') ||
177
+ message.includes('credit')) {
174
178
  return false;
175
179
  }
176
180
  // Default: use standard isRetryable
@@ -256,9 +260,7 @@ export function createCircuitBreaker(name, options = {}) {
256
260
  // Check if circuit is open
257
261
  if (state.isOpen) {
258
262
  const now = new Date();
259
- const timeSinceOpen = state.openedAt
260
- ? now.getTime() - state.openedAt.getTime()
261
- : 0;
263
+ const timeSinceOpen = state.openedAt ? now.getTime() - state.openedAt.getTime() : 0;
262
264
  if (timeSinceOpen < resetTimeMs) {
263
265
  // Still in open state
264
266
  throw new BellwetherError(`Circuit breaker '${name}' is open`, {
@@ -306,6 +308,10 @@ export function createCircuitBreaker(name, options = {}) {
306
308
  }
307
309
  state.failures++;
308
310
  state.lastFailure = now;
311
+ // If half-open test failed, reset openedAt to restart cooldown timer
312
+ if (state.isOpen) {
313
+ state.openedAt = now;
314
+ }
309
315
  // Check if we should open the circuit
310
316
  if (state.failures >= failureThreshold && !state.isOpen) {
311
317
  state.isOpen = true;
@@ -8,6 +8,12 @@ export class RateLimiter {
8
8
  lastRefill;
9
9
  constructor(config) {
10
10
  this.config = config;
11
+ if (config.requestsPerSecond <= 0) {
12
+ throw new Error(`requestsPerSecond must be positive, got ${config.requestsPerSecond}`);
13
+ }
14
+ if (config.burstLimit <= 0) {
15
+ throw new Error(`burstLimit must be positive, got ${config.burstLimit}`);
16
+ }
11
17
  this.tokens = config.burstLimit;
12
18
  this.lastRefill = Date.now();
13
19
  }
@@ -45,9 +51,7 @@ export function calculateBackoffMs(attempt, strategy) {
45
51
  const baseDelay = RATE_LIMITING.BASE_DELAY_MS;
46
52
  const maxDelay = RATE_LIMITING.MAX_DELAY_MS;
47
53
  const jitter = RATE_LIMITING.JITTER_RATIO;
48
- const rawDelay = strategy === 'linear'
49
- ? baseDelay * (attempt + 1)
50
- : baseDelay * Math.pow(2, attempt);
54
+ const rawDelay = strategy === 'linear' ? baseDelay * (attempt + 1) : baseDelay * Math.pow(2, attempt);
51
55
  const capped = Math.min(rawDelay, maxDelay);
52
56
  const jitterDelta = capped * jitter * (Math.random() - 0.5) * 2;
53
57
  return Math.max(0, Math.round(capped + jitterDelta));
@@ -334,11 +334,17 @@ export class AnthropicClient {
334
334
  }
335
335
  // Convert to typed errors for retry logic (same as chat method)
336
336
  if (error instanceof Error) {
337
- const message = error.message.toLowerCase();
338
- if (message.includes('401') || message.includes('authentication')) {
337
+ const status = getErrorStatus(error);
338
+ const code = (getErrorCode(error) ?? '').toLowerCase();
339
+ const type = (getErrorType(error) ?? '').toLowerCase();
340
+ const message = getErrorMessage(error).toLowerCase();
341
+ if (status === 401 || status === 403 || message.includes('authentication')) {
339
342
  throw new LLMAuthError('anthropic', model);
340
343
  }
341
- if (message.includes('429') || message.includes('rate limit')) {
344
+ if (status === 429 ||
345
+ code.includes('rate_limit') ||
346
+ type.includes('rate_limit') ||
347
+ message.includes('rate limit')) {
342
348
  let retryAfterMs;
343
349
  const apiError = error;
344
350
  if (apiError.headers?.get) {
@@ -361,7 +367,11 @@ export class AnthropicClient {
361
367
  }
362
368
  throw new LLMRateLimitError('anthropic', retryAfterMs, model);
363
369
  }
364
- if (message.includes('insufficient') || message.includes('credit')) {
370
+ if (status === 402 ||
371
+ code.includes('insufficient') ||
372
+ type.includes('insufficient') ||
373
+ message.includes('insufficient') ||
374
+ message.includes('credit')) {
365
375
  throw new LLMQuotaError('anthropic', model);
366
376
  }
367
377
  if (message.includes('econnrefused') || message.includes('fetch failed')) {
@@ -69,6 +69,7 @@ export declare class FallbackLLMClient implements LLMClient {
69
69
  /**
70
70
  * Check if a provider is currently healthy.
71
71
  */
72
+ private healthCheckInProgress;
72
73
  private isProviderHealthy;
73
74
  /**
74
75
  * Mark a provider as failed.
@@ -60,6 +60,7 @@ export class FallbackLLMClient {
60
60
  try {
61
61
  const ollamaClient = new OllamaClient({
62
62
  model: this.config.ollamaModel,
63
+ onUsage: this.config.onUsage,
63
64
  });
64
65
  this.clients.set('ollama', ollamaClient);
65
66
  this.providerOrder.push('ollama');
@@ -112,18 +113,23 @@ export class FallbackLLMClient {
112
113
  /**
113
114
  * Check if a provider is currently healthy.
114
115
  */
116
+ healthCheckInProgress = false;
115
117
  isProviderHealthy(providerId) {
116
118
  const health = this.health.get(providerId);
117
119
  if (!health)
118
120
  return false;
119
121
  // If marked unhealthy, check if retry delay has passed
120
- if (!health.healthy) {
122
+ if (!health.healthy && !this.healthCheckInProgress) {
121
123
  const timeSinceCheck = Date.now() - health.lastChecked.getTime();
122
124
  if (timeSinceCheck >= this.config.unhealthyRetryDelayMs) {
125
+ // Prevent concurrent health resets
126
+ this.healthCheckInProgress = true;
123
127
  // Reset to allow retry
124
128
  health.healthy = true;
125
129
  health.consecutiveFailures = 0;
130
+ health.lastChecked = new Date();
126
131
  logger.info({ provider: providerId }, 'Resetting unhealthy provider for retry');
132
+ this.healthCheckInProgress = false;
127
133
  }
128
134
  }
129
135
  return health.healthy;