@dotsetlabs/bellwether 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +44 -0
- package/README.md +9 -0
- package/dist/auth/credentials.js +2 -0
- package/dist/baseline/accessors.js +12 -0
- package/dist/baseline/baseline-format.d.ts +48 -0
- package/dist/baseline/comparator.js +263 -20
- package/dist/baseline/converter.js +52 -4
- package/dist/baseline/response-fingerprint.js +1 -1
- package/dist/baseline/saver.js +34 -0
- package/dist/baseline/types.d.ts +21 -1
- package/dist/cache/response-cache.js +9 -2
- package/dist/cli/commands/baseline.js +70 -35
- package/dist/cli/commands/check.js +48 -9
- package/dist/cli/commands/explore.js +36 -3
- package/dist/cli/commands/init.js +10 -7
- package/dist/cli/commands/watch.js +5 -5
- package/dist/config/loader.js +2 -2
- package/dist/constants/core.d.ts +1 -1
- package/dist/constants/core.js +1 -1
- package/dist/discovery/discovery.js +88 -14
- package/dist/discovery/types.d.ts +5 -1
- package/dist/docs/agents.js +138 -50
- package/dist/docs/contract.js +63 -1
- package/dist/errors/retry.js +11 -5
- package/dist/interview/rate-limiter.js +7 -3
- package/dist/llm/anthropic.js +14 -4
- package/dist/llm/fallback.d.ts +1 -0
- package/dist/llm/fallback.js +7 -1
- package/dist/llm/openai.js +15 -4
- package/dist/protocol/index.d.ts +2 -0
- package/dist/protocol/index.js +2 -0
- package/dist/protocol/version-registry.d.ts +66 -0
- package/dist/protocol/version-registry.js +159 -0
- package/dist/transport/http-transport.d.ts +11 -1
- package/dist/transport/http-transport.js +21 -2
- package/dist/transport/mcp-client.d.ts +29 -1
- package/dist/transport/mcp-client.js +92 -7
- package/dist/transport/sse-transport.js +5 -4
- package/dist/transport/types.d.ts +134 -1
- package/dist/utils/concurrency.d.ts +2 -0
- package/dist/utils/concurrency.js +9 -2
- package/dist/utils/markdown.js +13 -18
- package/dist/utils/timeout.js +2 -1
- package/dist/version.js +1 -1
- package/man/bellwether.1 +1 -1
- package/man/bellwether.1.md +2 -2
- package/package.json +1 -1
package/dist/docs/agents.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import { formatDateISO, formatDuration, escapeTableCell, mermaidLabel, validateJsonForCodeBlock } from '../utils/index.js';
|
|
2
|
-
import { DISPLAY_LIMITS, MATH_FACTORS } from '../constants.js';
|
|
1
|
+
import { formatDateISO, formatDuration, escapeTableCell, mermaidLabel, validateJsonForCodeBlock, } from '../utils/index.js';
|
|
2
|
+
import { DISPLAY_LIMITS, MATH_FACTORS, MCP } from '../constants.js';
|
|
3
3
|
import { calculatePerformanceMetrics, extractParameters, looksLikeError } from './shared.js';
|
|
4
|
+
import { getFeatureFlags } from '../protocol/index.js';
|
|
4
5
|
/**
|
|
5
6
|
* Detect configuration issues based on error patterns.
|
|
6
7
|
* Returns a warning message if issues detected, null otherwise.
|
|
@@ -17,7 +18,7 @@ function detectConfigurationIssues(profiles, metadata) {
|
|
|
17
18
|
for (const profile of profiles) {
|
|
18
19
|
for (const interaction of profile.interactions) {
|
|
19
20
|
// Get response text content
|
|
20
|
-
const textContent = interaction.response?.content?.find(c => c.type === 'text');
|
|
21
|
+
const textContent = interaction.response?.content?.find((c) => c.type === 'text');
|
|
21
22
|
const responseText = textContent && 'text' in textContent ? String(textContent.text) : '';
|
|
22
23
|
const errorText = interaction.error || '';
|
|
23
24
|
const combined = errorText + responseText;
|
|
@@ -33,8 +34,8 @@ function detectConfigurationIssues(profiles, metadata) {
|
|
|
33
34
|
}
|
|
34
35
|
// If we found access-related errors, show warning
|
|
35
36
|
if (totalErrors > 0 && accessErrors / totalErrors > 0.5) {
|
|
36
|
-
return 'Most tool calls failed with access-related errors. The server may not have been configured with allowed directories. ' +
|
|
37
|
-
'For filesystem servers, try: `bellwether check npx @modelcontextprotocol/server-filesystem /path/to/allowed/dir`';
|
|
37
|
+
return ('Most tool calls failed with access-related errors. The server may not have been configured with allowed directories. ' +
|
|
38
|
+
'For filesystem servers, try: `bellwether check npx @modelcontextprotocol/server-filesystem /path/to/allowed/dir`');
|
|
38
39
|
}
|
|
39
40
|
// Also check synthesized limitations for access patterns (fallback)
|
|
40
41
|
let accessRelatedLimitations = 0;
|
|
@@ -48,9 +49,11 @@ function detectConfigurationIssues(profiles, metadata) {
|
|
|
48
49
|
}
|
|
49
50
|
}
|
|
50
51
|
// If most tools have access-related limitations and high error rate, show warning
|
|
51
|
-
if (totalLimitations > 0 &&
|
|
52
|
-
|
|
53
|
-
|
|
52
|
+
if (totalLimitations > 0 &&
|
|
53
|
+
accessRelatedLimitations / totalLimitations > 0.5 &&
|
|
54
|
+
errorRate > 0.8) {
|
|
55
|
+
return ('Most tool calls failed, likely due to missing allowed directories configuration. ' +
|
|
56
|
+
'For filesystem servers, try: `bellwether check npx @modelcontextprotocol/server-filesystem /path/to/allowed/dir`');
|
|
54
57
|
}
|
|
55
58
|
return null;
|
|
56
59
|
}
|
|
@@ -75,14 +78,25 @@ export function generateAgentsMd(result) {
|
|
|
75
78
|
lines.push('');
|
|
76
79
|
lines.push(summary);
|
|
77
80
|
lines.push('');
|
|
81
|
+
const features = getFeatureFlags(discovery.protocolVersion);
|
|
78
82
|
lines.push(`**Server Version:** ${discovery.serverInfo.version}`);
|
|
79
83
|
lines.push(`**Protocol Version:** ${discovery.protocolVersion}`);
|
|
84
|
+
if (discovery.protocolVersion !== MCP.PROTOCOL_VERSION) {
|
|
85
|
+
lines.push(`*(Server protocol; bellwether supports up to ${MCP.PROTOCOL_VERSION})*`);
|
|
86
|
+
}
|
|
80
87
|
// Show personas used
|
|
81
88
|
if (metadata.personas && metadata.personas.length > 0) {
|
|
82
|
-
const personaNames = metadata.personas.map(p => p.name).join(', ');
|
|
89
|
+
const personaNames = metadata.personas.map((p) => p.name).join(', ');
|
|
83
90
|
lines.push(`**Interview Personas:** ${personaNames}`);
|
|
84
91
|
}
|
|
85
92
|
lines.push('');
|
|
93
|
+
// Server instructions
|
|
94
|
+
if (discovery.instructions) {
|
|
95
|
+
lines.push('## Server Instructions');
|
|
96
|
+
lines.push('');
|
|
97
|
+
lines.push(discovery.instructions);
|
|
98
|
+
lines.push('');
|
|
99
|
+
}
|
|
86
100
|
// Capabilities summary
|
|
87
101
|
lines.push('## Capabilities');
|
|
88
102
|
lines.push('');
|
|
@@ -95,6 +109,15 @@ export function generateAgentsMd(result) {
|
|
|
95
109
|
if (discovery.capabilities.resources) {
|
|
96
110
|
lines.push(`- **Resources:** ${(discovery.resources ?? []).length} available`);
|
|
97
111
|
}
|
|
112
|
+
if (discovery.resourceTemplates && discovery.resourceTemplates.length > 0) {
|
|
113
|
+
lines.push(`- **Resource Templates:** ${discovery.resourceTemplates.length} available`);
|
|
114
|
+
}
|
|
115
|
+
if (discovery.capabilities.completions && features.completions) {
|
|
116
|
+
lines.push('- **Completions:** Supported');
|
|
117
|
+
}
|
|
118
|
+
if (discovery.capabilities.tasks && features.tasks) {
|
|
119
|
+
lines.push('- **Tasks:** Supported');
|
|
120
|
+
}
|
|
98
121
|
if (discovery.capabilities.logging) {
|
|
99
122
|
lines.push('- **Logging:** Supported');
|
|
100
123
|
}
|
|
@@ -129,7 +152,23 @@ export function generateAgentsMd(result) {
|
|
|
129
152
|
lines.push(profile.description);
|
|
130
153
|
lines.push('');
|
|
131
154
|
// Find the original tool to get schema
|
|
132
|
-
const tool = discovery.tools.find(t => t.name === profile.name);
|
|
155
|
+
const tool = discovery.tools.find((t) => t.name === profile.name);
|
|
156
|
+
// Show tool annotations (behavioral hints) — version-gated
|
|
157
|
+
if (features.toolAnnotations && tool?.annotations) {
|
|
158
|
+
const hints = [];
|
|
159
|
+
if (tool.annotations.readOnlyHint)
|
|
160
|
+
hints.push('read-only');
|
|
161
|
+
if (tool.annotations.destructiveHint)
|
|
162
|
+
hints.push('destructive');
|
|
163
|
+
if (tool.annotations.idempotentHint)
|
|
164
|
+
hints.push('idempotent');
|
|
165
|
+
if (tool.annotations.openWorldHint)
|
|
166
|
+
hints.push('open-world');
|
|
167
|
+
if (hints.length > 0) {
|
|
168
|
+
lines.push(`**Behavioral Hints:** ${hints.join(', ')}`);
|
|
169
|
+
lines.push('');
|
|
170
|
+
}
|
|
171
|
+
}
|
|
133
172
|
if (tool?.inputSchema) {
|
|
134
173
|
lines.push('**Input Schema:**');
|
|
135
174
|
// Validate JSON and escape for code block
|
|
@@ -139,6 +178,15 @@ export function generateAgentsMd(result) {
|
|
|
139
178
|
lines.push('```');
|
|
140
179
|
lines.push('');
|
|
141
180
|
}
|
|
181
|
+
// Show output schema if present — version-gated
|
|
182
|
+
if (features.structuredOutput && tool?.outputSchema) {
|
|
183
|
+
lines.push('**Output Schema:**');
|
|
184
|
+
const outputSchemaJson = validateJsonForCodeBlock(tool.outputSchema);
|
|
185
|
+
lines.push('```json');
|
|
186
|
+
lines.push(outputSchemaJson.content);
|
|
187
|
+
lines.push('```');
|
|
188
|
+
lines.push('');
|
|
189
|
+
}
|
|
142
190
|
// Add sample response if we have successful calls
|
|
143
191
|
const sampleResponse = generateSampleResponse(profile);
|
|
144
192
|
if (sampleResponse.length > 0) {
|
|
@@ -203,14 +251,14 @@ export function generateAgentsMd(result) {
|
|
|
203
251
|
}
|
|
204
252
|
// Common Workflows section (summarized view of successful workflows)
|
|
205
253
|
if (result.workflowResults && result.workflowResults.length > 0) {
|
|
206
|
-
const successfulWorkflows = result.workflowResults.filter(wr => wr.success);
|
|
254
|
+
const successfulWorkflows = result.workflowResults.filter((wr) => wr.success);
|
|
207
255
|
if (successfulWorkflows.length > 0) {
|
|
208
256
|
lines.push('## Common Workflows');
|
|
209
257
|
lines.push('');
|
|
210
258
|
lines.push('These workflows demonstrate recommended patterns for using tools together:');
|
|
211
259
|
lines.push('');
|
|
212
260
|
for (const wr of successfulWorkflows) {
|
|
213
|
-
const toolSequence = wr.workflow.steps.map(s => `\`${s.tool}\``).join(' → ');
|
|
261
|
+
const toolSequence = wr.workflow.steps.map((s) => `\`${s.tool}\``).join(' → ');
|
|
214
262
|
lines.push(`### ${wr.workflow.name}`);
|
|
215
263
|
lines.push('');
|
|
216
264
|
lines.push(wr.workflow.description);
|
|
@@ -418,6 +466,26 @@ export function generateAgentsMd(result) {
|
|
|
418
466
|
}
|
|
419
467
|
}
|
|
420
468
|
}
|
|
469
|
+
// Resource Templates section
|
|
470
|
+
if (discovery.resourceTemplates && discovery.resourceTemplates.length > 0) {
|
|
471
|
+
lines.push('## Resource Templates');
|
|
472
|
+
lines.push('');
|
|
473
|
+
lines.push('Resource templates define URI patterns for dynamically-generated resources.');
|
|
474
|
+
lines.push('');
|
|
475
|
+
for (const template of discovery.resourceTemplates) {
|
|
476
|
+
lines.push(`### ${template.name}`);
|
|
477
|
+
lines.push('');
|
|
478
|
+
lines.push(`**URI Template:** \`${template.uriTemplate}\``);
|
|
479
|
+
if (template.mimeType) {
|
|
480
|
+
lines.push(`**MIME Type:** ${template.mimeType}`);
|
|
481
|
+
}
|
|
482
|
+
lines.push('');
|
|
483
|
+
if (template.description) {
|
|
484
|
+
lines.push(template.description);
|
|
485
|
+
lines.push('');
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
}
|
|
421
489
|
// Overall limitations
|
|
422
490
|
if (limitations.length > 0) {
|
|
423
491
|
lines.push('## Known Limitations');
|
|
@@ -485,7 +553,7 @@ function generateQuickReference(tools, profiles) {
|
|
|
485
553
|
lines.push('|------|------------|---------|');
|
|
486
554
|
for (const tool of tools) {
|
|
487
555
|
const params = extractParameters(tool.inputSchema);
|
|
488
|
-
const profile = profiles.find(p => p.name === tool.name);
|
|
556
|
+
const profile = profiles.find((p) => p.name === tool.name);
|
|
489
557
|
const returnType = inferReturnTypeDetailed(profile);
|
|
490
558
|
// Escape table cell content to prevent broken tables
|
|
491
559
|
lines.push(`| \`${escapeTableCell(tool.name)}\` | ${escapeTableCell(params)} | ${escapeTableCell(returnType)} |`);
|
|
@@ -494,7 +562,7 @@ function generateQuickReference(tools, profiles) {
|
|
|
494
562
|
// Only add example section if we have at least one successful example
|
|
495
563
|
const successfulExamples = [];
|
|
496
564
|
for (const tool of tools) {
|
|
497
|
-
const profile = profiles.find(p => p.name === tool.name);
|
|
565
|
+
const profile = profiles.find((p) => p.name === tool.name);
|
|
498
566
|
const example = generateExampleSnippet(tool, profile);
|
|
499
567
|
if (example) {
|
|
500
568
|
successfulExamples.push({ tool, example });
|
|
@@ -554,10 +622,10 @@ function inferReturnTypeDetailed(profile) {
|
|
|
554
622
|
return 'unknown';
|
|
555
623
|
}
|
|
556
624
|
// Look at successful interactions that don't have error-like content
|
|
557
|
-
const successful = profile.interactions.find(i => {
|
|
625
|
+
const successful = profile.interactions.find((i) => {
|
|
558
626
|
if (i.error || !i.response || i.response.isError)
|
|
559
627
|
return false;
|
|
560
|
-
const textContent = i.response.content?.find(c => c.type === 'text');
|
|
628
|
+
const textContent = i.response.content?.find((c) => c.type === 'text');
|
|
561
629
|
if (textContent && 'text' in textContent) {
|
|
562
630
|
if (looksLikeError(String(textContent.text)))
|
|
563
631
|
return false;
|
|
@@ -577,7 +645,7 @@ function inferReturnTypeDetailed(profile) {
|
|
|
577
645
|
return 'empty response';
|
|
578
646
|
}
|
|
579
647
|
// Check content types
|
|
580
|
-
const types = new Set(content.map(c => c.type));
|
|
648
|
+
const types = new Set(content.map((c) => c.type));
|
|
581
649
|
if (types.size === 1) {
|
|
582
650
|
const type = content[0].type;
|
|
583
651
|
if (type === 'text') {
|
|
@@ -634,10 +702,10 @@ function generateExampleSnippet(tool, profile) {
|
|
|
634
702
|
return null;
|
|
635
703
|
}
|
|
636
704
|
// Find a successful interaction (not an error) that doesn't have error-like content
|
|
637
|
-
const successful = profile.interactions.find(i => {
|
|
705
|
+
const successful = profile.interactions.find((i) => {
|
|
638
706
|
if (i.error || !i.response || i.response.isError)
|
|
639
707
|
return false;
|
|
640
|
-
const textContent = i.response.content?.find(c => c.type === 'text');
|
|
708
|
+
const textContent = i.response.content?.find((c) => c.type === 'text');
|
|
641
709
|
if (textContent && 'text' in textContent) {
|
|
642
710
|
if (looksLikeError(String(textContent.text)))
|
|
643
711
|
return false;
|
|
@@ -659,11 +727,11 @@ function generateExampleSnippet(tool, profile) {
|
|
|
659
727
|
function generateSampleResponse(profile) {
|
|
660
728
|
const lines = [];
|
|
661
729
|
// Find a successful interaction with a response that doesn't look like an error
|
|
662
|
-
const successful = profile.interactions.find(i => {
|
|
730
|
+
const successful = profile.interactions.find((i) => {
|
|
663
731
|
if (i.error || !i.response || i.response.isError)
|
|
664
732
|
return false;
|
|
665
733
|
// Also check if the response content looks like an error
|
|
666
|
-
const textContent = i.response.content?.find(c => c.type === 'text');
|
|
734
|
+
const textContent = i.response.content?.find((c) => c.type === 'text');
|
|
667
735
|
if (textContent && 'text' in textContent) {
|
|
668
736
|
if (looksLikeError(String(textContent.text)))
|
|
669
737
|
return false;
|
|
@@ -673,7 +741,7 @@ function generateSampleResponse(profile) {
|
|
|
673
741
|
if (!successful || !successful.response?.content) {
|
|
674
742
|
return lines;
|
|
675
743
|
}
|
|
676
|
-
const textContent = successful.response.content.find(c => c.type === 'text');
|
|
744
|
+
const textContent = successful.response.content.find((c) => c.type === 'text');
|
|
677
745
|
if (!textContent || !('text' in textContent)) {
|
|
678
746
|
return lines;
|
|
679
747
|
}
|
|
@@ -710,16 +778,22 @@ function extractCommonConstraints(profiles) {
|
|
|
710
778
|
const constraintCounts = new Map();
|
|
711
779
|
const toolConstraints = new Map();
|
|
712
780
|
const normalizeConstraint = (c) => {
|
|
713
|
-
return c.toLowerCase()
|
|
714
|
-
.replace(/['"`]/g, '')
|
|
715
|
-
.replace(/\s+/g, ' ')
|
|
716
|
-
.trim();
|
|
781
|
+
return c.toLowerCase().replace(/['"`]/g, '').replace(/\s+/g, ' ').trim();
|
|
717
782
|
};
|
|
718
783
|
// Common patterns that should be grouped
|
|
719
784
|
const commonPatterns = [
|
|
720
|
-
{
|
|
721
|
-
|
|
722
|
-
|
|
785
|
+
{
|
|
786
|
+
pattern: /directory.*restriction|access.*control|allowed.*director|within allowed/i,
|
|
787
|
+
label: 'Directory access restrictions apply',
|
|
788
|
+
},
|
|
789
|
+
{
|
|
790
|
+
pattern: /path.*restriction|access denied.*path|outside.*allowed/i,
|
|
791
|
+
label: 'Path access is restricted',
|
|
792
|
+
},
|
|
793
|
+
{
|
|
794
|
+
pattern: /requires.*parameter|parameter.*required|missing.*parameter/i,
|
|
795
|
+
label: 'Validates required parameters',
|
|
796
|
+
},
|
|
723
797
|
];
|
|
724
798
|
for (const profile of profiles) {
|
|
725
799
|
const toolSpecific = [];
|
|
@@ -767,11 +841,11 @@ function extractCommonConstraints(profiles) {
|
|
|
767
841
|
// Remove common constraints from per-tool lists
|
|
768
842
|
if (common.length > 0) {
|
|
769
843
|
for (const [toolName, constraints] of toolConstraints) {
|
|
770
|
-
const filtered = constraints.filter(c => {
|
|
844
|
+
const filtered = constraints.filter((c) => {
|
|
771
845
|
const normalized = normalizeConstraint(c);
|
|
772
846
|
// Keep if it's tool-specific
|
|
773
|
-
return !common.some(common => normalizeConstraint(common) === normalized ||
|
|
774
|
-
commonPatterns.some(p => p.label === common && p.pattern.test(c)));
|
|
847
|
+
return !common.some((common) => normalizeConstraint(common) === normalized ||
|
|
848
|
+
commonPatterns.some((p) => p.label === common && p.pattern.test(c)));
|
|
775
849
|
});
|
|
776
850
|
toolConstraints.set(toolName, filtered);
|
|
777
851
|
}
|
|
@@ -815,9 +889,9 @@ function generateSecuritySection(profiles) {
|
|
|
815
889
|
lines.push('## Security Considerations');
|
|
816
890
|
lines.push('');
|
|
817
891
|
// Group by severity
|
|
818
|
-
const critical = securityFindings.filter(f => f.severity === 'critical');
|
|
819
|
-
const warnings = securityFindings.filter(f => f.severity === 'warning');
|
|
820
|
-
const info = securityFindings.filter(f => f.severity === 'info');
|
|
892
|
+
const critical = securityFindings.filter((f) => f.severity === 'critical');
|
|
893
|
+
const warnings = securityFindings.filter((f) => f.severity === 'warning');
|
|
894
|
+
const info = securityFindings.filter((f) => f.severity === 'info');
|
|
821
895
|
if (critical.length > 0) {
|
|
822
896
|
lines.push('### Critical Issues');
|
|
823
897
|
lines.push('');
|
|
@@ -849,12 +923,28 @@ function generateSecuritySection(profiles) {
|
|
|
849
923
|
*/
|
|
850
924
|
function classifySecuritySeverity(note) {
|
|
851
925
|
const lowerNote = note.toLowerCase();
|
|
852
|
-
const criticalKeywords = [
|
|
853
|
-
|
|
854
|
-
|
|
926
|
+
const criticalKeywords = [
|
|
927
|
+
'injection',
|
|
928
|
+
'rce',
|
|
929
|
+
'remote code',
|
|
930
|
+
'arbitrary code',
|
|
931
|
+
'command execution',
|
|
932
|
+
'sql injection',
|
|
933
|
+
'xss',
|
|
934
|
+
];
|
|
935
|
+
const warningKeywords = [
|
|
936
|
+
'risk',
|
|
937
|
+
'vulnerab',
|
|
938
|
+
'dangerous',
|
|
939
|
+
'unsafe',
|
|
940
|
+
'leak',
|
|
941
|
+
'exposure',
|
|
942
|
+
'sensitive',
|
|
943
|
+
];
|
|
944
|
+
if (criticalKeywords.some((kw) => lowerNote.includes(kw))) {
|
|
855
945
|
return 'critical';
|
|
856
946
|
}
|
|
857
|
-
if (warningKeywords.some(kw => lowerNote.includes(kw))) {
|
|
947
|
+
if (warningKeywords.some((kw) => lowerNote.includes(kw))) {
|
|
858
948
|
return 'warning';
|
|
859
949
|
}
|
|
860
950
|
return 'info';
|
|
@@ -882,7 +972,7 @@ function generatePerformanceSection(profiles) {
|
|
|
882
972
|
}
|
|
883
973
|
lines.push('');
|
|
884
974
|
// Add timing breakdown if separate timing data is available
|
|
885
|
-
const metricsWithBreakdown = metrics.filter(m => m.avgToolMs !== undefined && m.avgAnalysisMs !== undefined);
|
|
975
|
+
const metricsWithBreakdown = metrics.filter((m) => m.avgToolMs !== undefined && m.avgAnalysisMs !== undefined);
|
|
886
976
|
if (metricsWithBreakdown.length > 0) {
|
|
887
977
|
lines.push('### Timing Breakdown');
|
|
888
978
|
lines.push('');
|
|
@@ -891,19 +981,17 @@ function generatePerformanceSection(profiles) {
|
|
|
891
981
|
lines.push('| Tool | Total Avg | Tool Exec | LLM Analysis | Tool % |');
|
|
892
982
|
lines.push('|------|-----------|-----------|--------------|--------|');
|
|
893
983
|
for (const m of metricsWithBreakdown) {
|
|
894
|
-
const toolPct = m.avgToolMs !== undefined && m.avgMs > 0
|
|
895
|
-
? Math.round((m.avgToolMs / m.avgMs) * 100)
|
|
896
|
-
: 0;
|
|
984
|
+
const toolPct = m.avgToolMs !== undefined && m.avgMs > 0 ? Math.round((m.avgToolMs / m.avgMs) * 100) : 0;
|
|
897
985
|
lines.push(`| \`${escapeTableCell(m.toolName)}\` | ${m.avgMs}ms | ${m.avgToolMs}ms | ${m.avgAnalysisMs}ms | ${toolPct}% |`);
|
|
898
986
|
}
|
|
899
987
|
lines.push('');
|
|
900
988
|
}
|
|
901
989
|
// Add performance insights
|
|
902
|
-
const slowTools = metrics.filter(m => m.avgMs > 1000);
|
|
903
|
-
const unreliableTools = metrics.filter(m => m.errorRate > 0.3);
|
|
990
|
+
const slowTools = metrics.filter((m) => m.avgMs > 1000);
|
|
991
|
+
const unreliableTools = metrics.filter((m) => m.errorRate > 0.3);
|
|
904
992
|
// Identify tools where LLM analysis dominates (>70% of total time)
|
|
905
|
-
const llmDominatedTools = metricsWithBreakdown.filter(m => {
|
|
906
|
-
const toolPct = m.avgToolMs !== undefined && m.avgMs > 0 ?
|
|
993
|
+
const llmDominatedTools = metricsWithBreakdown.filter((m) => {
|
|
994
|
+
const toolPct = m.avgToolMs !== undefined && m.avgMs > 0 ? m.avgToolMs / m.avgMs : 0;
|
|
907
995
|
return toolPct < 0.3; // Tool execution is < 30% means LLM is > 70%
|
|
908
996
|
});
|
|
909
997
|
if (slowTools.length > 0 || unreliableTools.length > 0 || llmDominatedTools.length > 0) {
|
|
@@ -948,7 +1036,7 @@ function generatePerformanceSection(profiles) {
|
|
|
948
1036
|
function generateBehavioralMatrix(profiles, personas) {
|
|
949
1037
|
const lines = [];
|
|
950
1038
|
// Check if we have findings by persona
|
|
951
|
-
const hasPersonaFindings = profiles.some(p => p.findingsByPersona && p.findingsByPersona.length > 0);
|
|
1039
|
+
const hasPersonaFindings = profiles.some((p) => p.findingsByPersona && p.findingsByPersona.length > 0);
|
|
952
1040
|
if (!hasPersonaFindings) {
|
|
953
1041
|
return [];
|
|
954
1042
|
}
|
|
@@ -957,14 +1045,14 @@ function generateBehavioralMatrix(profiles, personas) {
|
|
|
957
1045
|
lines.push('Summary of findings by tool and persona:');
|
|
958
1046
|
lines.push('');
|
|
959
1047
|
// Build header - escape persona names in case they contain special characters
|
|
960
|
-
const header = ['Tool', ...personas.map(p => escapeTableCell(p.name))];
|
|
1048
|
+
const header = ['Tool', ...personas.map((p) => escapeTableCell(p.name))];
|
|
961
1049
|
lines.push(`| ${header.join(' | ')} |`);
|
|
962
1050
|
lines.push(`| ${header.map(() => '---').join(' | ')} |`);
|
|
963
1051
|
// Build rows
|
|
964
1052
|
for (const profile of profiles) {
|
|
965
1053
|
const row = [escapeTableCell(profile.name)];
|
|
966
1054
|
for (const persona of personas) {
|
|
967
|
-
const findings = profile.findingsByPersona?.find(f => f.personaId === persona.id);
|
|
1055
|
+
const findings = profile.findingsByPersona?.find((f) => f.personaId === persona.id);
|
|
968
1056
|
if (findings) {
|
|
969
1057
|
const count = findings.behavioralNotes.length +
|
|
970
1058
|
findings.limitations.length +
|
package/dist/docs/contract.js
CHANGED
|
@@ -5,7 +5,8 @@ import { formatDateISO, formatDuration, escapeTableCell, mermaidLabel, validateJ
|
|
|
5
5
|
import { smartTruncate, getExampleLength } from '../utils/smart-truncate.js';
|
|
6
6
|
import { calculatePerformanceMetrics, extractParameters, looksLikeError } from './shared.js';
|
|
7
7
|
import { analyzeExternalDependencies, formatExternalDependenciesMarkdown, } from '../baseline/external-dependency-detector.js';
|
|
8
|
-
import { SEMANTIC_VALIDATION, SCHEMA_EVOLUTION, ERROR_ANALYSIS, PERFORMANCE_CONFIDENCE, DOCUMENTATION_SCORING, EXAMPLE_OUTPUT, EXTERNAL_DEPENDENCIES, RELIABILITY_DISPLAY, CONFIDENCE_INDICATORS, DISPLAY_LIMITS, ISSUE_CLASSIFICATION, } from '../constants.js';
|
|
8
|
+
import { SEMANTIC_VALIDATION, SCHEMA_EVOLUTION, ERROR_ANALYSIS, PERFORMANCE_CONFIDENCE, DOCUMENTATION_SCORING, EXAMPLE_OUTPUT, EXTERNAL_DEPENDENCIES, RELIABILITY_DISPLAY, CONFIDENCE_INDICATORS, DISPLAY_LIMITS, ISSUE_CLASSIFICATION, MCP, } from '../constants.js';
|
|
9
|
+
import { getFeatureFlags } from '../protocol/index.js';
|
|
9
10
|
/**
|
|
10
11
|
* Classify issues by their source to help users understand which issues
|
|
11
12
|
* are actual bugs vs expected behavior or environment issues.
|
|
@@ -147,11 +148,20 @@ export function generateContractMd(result, options) {
|
|
|
147
148
|
// Overview
|
|
148
149
|
lines.push('## Overview');
|
|
149
150
|
lines.push('');
|
|
151
|
+
const features = getFeatureFlags(discovery.protocolVersion);
|
|
150
152
|
lines.push(`**Server Version:** ${discovery.serverInfo.version}`);
|
|
151
153
|
lines.push(`**Protocol Version:** ${discovery.protocolVersion}`);
|
|
154
|
+
if (discovery.protocolVersion !== MCP.PROTOCOL_VERSION) {
|
|
155
|
+
lines.push(`*(Server protocol; bellwether supports up to ${MCP.PROTOCOL_VERSION})*`);
|
|
156
|
+
}
|
|
152
157
|
lines.push('');
|
|
153
158
|
const performanceMetrics = calculatePerformanceMetrics(toolProfiles);
|
|
154
159
|
const performanceByTool = new Map(performanceMetrics.map((metric) => [metric.toolName, metric]));
|
|
160
|
+
// Server instructions
|
|
161
|
+
if (discovery.instructions) {
|
|
162
|
+
lines.push(`**Server Instructions:** ${discovery.instructions}`);
|
|
163
|
+
lines.push('');
|
|
164
|
+
}
|
|
155
165
|
// Capabilities summary
|
|
156
166
|
lines.push('## Capabilities');
|
|
157
167
|
lines.push('');
|
|
@@ -164,6 +174,15 @@ export function generateContractMd(result, options) {
|
|
|
164
174
|
if (discovery.capabilities.resources) {
|
|
165
175
|
lines.push(`- **Resources:** ${(discovery.resources ?? []).length} available`);
|
|
166
176
|
}
|
|
177
|
+
if (discovery.resourceTemplates && discovery.resourceTemplates.length > 0) {
|
|
178
|
+
lines.push(`- **Resource Templates:** ${discovery.resourceTemplates.length} available`);
|
|
179
|
+
}
|
|
180
|
+
if (discovery.capabilities.completions && features.completions) {
|
|
181
|
+
lines.push('- **Completions:** Supported');
|
|
182
|
+
}
|
|
183
|
+
if (discovery.capabilities.tasks && features.tasks) {
|
|
184
|
+
lines.push('- **Tasks:** Supported');
|
|
185
|
+
}
|
|
167
186
|
if (discovery.capabilities.logging) {
|
|
168
187
|
lines.push('- **Logging:** Supported');
|
|
169
188
|
}
|
|
@@ -321,6 +340,22 @@ export function generateContractMd(result, options) {
|
|
|
321
340
|
lines.push('');
|
|
322
341
|
}
|
|
323
342
|
}
|
|
343
|
+
// Show tool annotations (behavioral hints) — version-gated
|
|
344
|
+
if (features.toolAnnotations && tool.annotations) {
|
|
345
|
+
const hints = [];
|
|
346
|
+
if (tool.annotations.readOnlyHint)
|
|
347
|
+
hints.push('read-only');
|
|
348
|
+
if (tool.annotations.destructiveHint)
|
|
349
|
+
hints.push('destructive');
|
|
350
|
+
if (tool.annotations.idempotentHint)
|
|
351
|
+
hints.push('idempotent');
|
|
352
|
+
if (tool.annotations.openWorldHint)
|
|
353
|
+
hints.push('open-world');
|
|
354
|
+
if (hints.length > 0) {
|
|
355
|
+
lines.push(`**Behavioral Hints:** ${hints.join(', ')}`);
|
|
356
|
+
lines.push('');
|
|
357
|
+
}
|
|
358
|
+
}
|
|
324
359
|
if (tool.inputSchema) {
|
|
325
360
|
lines.push('**Input Schema:**');
|
|
326
361
|
const schemaJson = validateJsonForCodeBlock(tool.inputSchema);
|
|
@@ -329,6 +364,15 @@ export function generateContractMd(result, options) {
|
|
|
329
364
|
lines.push('```');
|
|
330
365
|
lines.push('');
|
|
331
366
|
}
|
|
367
|
+
// Show output schema if present — version-gated
|
|
368
|
+
if (features.structuredOutput && tool.outputSchema) {
|
|
369
|
+
lines.push('**Output Schema:**');
|
|
370
|
+
const outputSchemaJson = validateJsonForCodeBlock(tool.outputSchema);
|
|
371
|
+
lines.push('```json');
|
|
372
|
+
lines.push(outputSchemaJson.content);
|
|
373
|
+
lines.push('```');
|
|
374
|
+
lines.push('');
|
|
375
|
+
}
|
|
332
376
|
// Add example usage from successful interactions
|
|
333
377
|
const examples = generateToolExamples(profile, maxExamplesPerTool, exampleLength);
|
|
334
378
|
if (examples.length > 0) {
|
|
@@ -380,6 +424,24 @@ export function generateContractMd(result, options) {
|
|
|
380
424
|
}
|
|
381
425
|
}
|
|
382
426
|
}
|
|
427
|
+
// Resource Templates section
|
|
428
|
+
if (discovery.resourceTemplates && discovery.resourceTemplates.length > 0) {
|
|
429
|
+
lines.push('## Resource Templates');
|
|
430
|
+
lines.push('');
|
|
431
|
+
for (const template of discovery.resourceTemplates) {
|
|
432
|
+
lines.push(`### ${template.name}`);
|
|
433
|
+
lines.push('');
|
|
434
|
+
lines.push(`**URI Template:** \`${template.uriTemplate}\``);
|
|
435
|
+
if (template.mimeType) {
|
|
436
|
+
lines.push(`**MIME Type:** ${template.mimeType}`);
|
|
437
|
+
}
|
|
438
|
+
lines.push('');
|
|
439
|
+
if (template.description) {
|
|
440
|
+
lines.push(template.description);
|
|
441
|
+
lines.push('');
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
}
|
|
383
445
|
// Error Summary section
|
|
384
446
|
const errorSummary = generateErrorSummarySection(toolProfiles);
|
|
385
447
|
if (errorSummary.length > 0) {
|
package/dist/errors/retry.js
CHANGED
|
@@ -166,11 +166,15 @@ export const LLM_RETRY_OPTIONS = {
|
|
|
166
166
|
return true;
|
|
167
167
|
}
|
|
168
168
|
// Auth errors - don't retry
|
|
169
|
-
if (message.includes('401') ||
|
|
169
|
+
if (message.includes('401') ||
|
|
170
|
+
message.includes('unauthorized') ||
|
|
171
|
+
message.includes('api key')) {
|
|
170
172
|
return false;
|
|
171
173
|
}
|
|
172
174
|
// Quota errors - don't retry
|
|
173
|
-
if (message.includes('quota') ||
|
|
175
|
+
if (message.includes('quota') ||
|
|
176
|
+
message.includes('insufficient') ||
|
|
177
|
+
message.includes('credit')) {
|
|
174
178
|
return false;
|
|
175
179
|
}
|
|
176
180
|
// Default: use standard isRetryable
|
|
@@ -256,9 +260,7 @@ export function createCircuitBreaker(name, options = {}) {
|
|
|
256
260
|
// Check if circuit is open
|
|
257
261
|
if (state.isOpen) {
|
|
258
262
|
const now = new Date();
|
|
259
|
-
const timeSinceOpen = state.openedAt
|
|
260
|
-
? now.getTime() - state.openedAt.getTime()
|
|
261
|
-
: 0;
|
|
263
|
+
const timeSinceOpen = state.openedAt ? now.getTime() - state.openedAt.getTime() : 0;
|
|
262
264
|
if (timeSinceOpen < resetTimeMs) {
|
|
263
265
|
// Still in open state
|
|
264
266
|
throw new BellwetherError(`Circuit breaker '${name}' is open`, {
|
|
@@ -306,6 +308,10 @@ export function createCircuitBreaker(name, options = {}) {
|
|
|
306
308
|
}
|
|
307
309
|
state.failures++;
|
|
308
310
|
state.lastFailure = now;
|
|
311
|
+
// If half-open test failed, reset openedAt to restart cooldown timer
|
|
312
|
+
if (state.isOpen) {
|
|
313
|
+
state.openedAt = now;
|
|
314
|
+
}
|
|
309
315
|
// Check if we should open the circuit
|
|
310
316
|
if (state.failures >= failureThreshold && !state.isOpen) {
|
|
311
317
|
state.isOpen = true;
|
|
@@ -8,6 +8,12 @@ export class RateLimiter {
|
|
|
8
8
|
lastRefill;
|
|
9
9
|
constructor(config) {
|
|
10
10
|
this.config = config;
|
|
11
|
+
if (config.requestsPerSecond <= 0) {
|
|
12
|
+
throw new Error(`requestsPerSecond must be positive, got ${config.requestsPerSecond}`);
|
|
13
|
+
}
|
|
14
|
+
if (config.burstLimit <= 0) {
|
|
15
|
+
throw new Error(`burstLimit must be positive, got ${config.burstLimit}`);
|
|
16
|
+
}
|
|
11
17
|
this.tokens = config.burstLimit;
|
|
12
18
|
this.lastRefill = Date.now();
|
|
13
19
|
}
|
|
@@ -45,9 +51,7 @@ export function calculateBackoffMs(attempt, strategy) {
|
|
|
45
51
|
const baseDelay = RATE_LIMITING.BASE_DELAY_MS;
|
|
46
52
|
const maxDelay = RATE_LIMITING.MAX_DELAY_MS;
|
|
47
53
|
const jitter = RATE_LIMITING.JITTER_RATIO;
|
|
48
|
-
const rawDelay = strategy === 'linear'
|
|
49
|
-
? baseDelay * (attempt + 1)
|
|
50
|
-
: baseDelay * Math.pow(2, attempt);
|
|
54
|
+
const rawDelay = strategy === 'linear' ? baseDelay * (attempt + 1) : baseDelay * Math.pow(2, attempt);
|
|
51
55
|
const capped = Math.min(rawDelay, maxDelay);
|
|
52
56
|
const jitterDelta = capped * jitter * (Math.random() - 0.5) * 2;
|
|
53
57
|
return Math.max(0, Math.round(capped + jitterDelta));
|
package/dist/llm/anthropic.js
CHANGED
|
@@ -334,11 +334,17 @@ export class AnthropicClient {
|
|
|
334
334
|
}
|
|
335
335
|
// Convert to typed errors for retry logic (same as chat method)
|
|
336
336
|
if (error instanceof Error) {
|
|
337
|
-
const
|
|
338
|
-
|
|
337
|
+
const status = getErrorStatus(error);
|
|
338
|
+
const code = (getErrorCode(error) ?? '').toLowerCase();
|
|
339
|
+
const type = (getErrorType(error) ?? '').toLowerCase();
|
|
340
|
+
const message = getErrorMessage(error).toLowerCase();
|
|
341
|
+
if (status === 401 || status === 403 || message.includes('authentication')) {
|
|
339
342
|
throw new LLMAuthError('anthropic', model);
|
|
340
343
|
}
|
|
341
|
-
if (
|
|
344
|
+
if (status === 429 ||
|
|
345
|
+
code.includes('rate_limit') ||
|
|
346
|
+
type.includes('rate_limit') ||
|
|
347
|
+
message.includes('rate limit')) {
|
|
342
348
|
let retryAfterMs;
|
|
343
349
|
const apiError = error;
|
|
344
350
|
if (apiError.headers?.get) {
|
|
@@ -361,7 +367,11 @@ export class AnthropicClient {
|
|
|
361
367
|
}
|
|
362
368
|
throw new LLMRateLimitError('anthropic', retryAfterMs, model);
|
|
363
369
|
}
|
|
364
|
-
if (
|
|
370
|
+
if (status === 402 ||
|
|
371
|
+
code.includes('insufficient') ||
|
|
372
|
+
type.includes('insufficient') ||
|
|
373
|
+
message.includes('insufficient') ||
|
|
374
|
+
message.includes('credit')) {
|
|
365
375
|
throw new LLMQuotaError('anthropic', model);
|
|
366
376
|
}
|
|
367
377
|
if (message.includes('econnrefused') || message.includes('fetch failed')) {
|
package/dist/llm/fallback.d.ts
CHANGED
package/dist/llm/fallback.js
CHANGED
|
@@ -60,6 +60,7 @@ export class FallbackLLMClient {
|
|
|
60
60
|
try {
|
|
61
61
|
const ollamaClient = new OllamaClient({
|
|
62
62
|
model: this.config.ollamaModel,
|
|
63
|
+
onUsage: this.config.onUsage,
|
|
63
64
|
});
|
|
64
65
|
this.clients.set('ollama', ollamaClient);
|
|
65
66
|
this.providerOrder.push('ollama');
|
|
@@ -112,18 +113,23 @@ export class FallbackLLMClient {
|
|
|
112
113
|
/**
|
|
113
114
|
* Check if a provider is currently healthy.
|
|
114
115
|
*/
|
|
116
|
+
healthCheckInProgress = false;
|
|
115
117
|
isProviderHealthy(providerId) {
|
|
116
118
|
const health = this.health.get(providerId);
|
|
117
119
|
if (!health)
|
|
118
120
|
return false;
|
|
119
121
|
// If marked unhealthy, check if retry delay has passed
|
|
120
|
-
if (!health.healthy) {
|
|
122
|
+
if (!health.healthy && !this.healthCheckInProgress) {
|
|
121
123
|
const timeSinceCheck = Date.now() - health.lastChecked.getTime();
|
|
122
124
|
if (timeSinceCheck >= this.config.unhealthyRetryDelayMs) {
|
|
125
|
+
// Prevent concurrent health resets
|
|
126
|
+
this.healthCheckInProgress = true;
|
|
123
127
|
// Reset to allow retry
|
|
124
128
|
health.healthy = true;
|
|
125
129
|
health.consecutiveFailures = 0;
|
|
130
|
+
health.lastChecked = new Date();
|
|
126
131
|
logger.info({ provider: providerId }, 'Resetting unhealthy provider for retry');
|
|
132
|
+
this.healthCheckInProgress = false;
|
|
127
133
|
}
|
|
128
134
|
}
|
|
129
135
|
return health.healthy;
|