@dotsetlabs/bellwether 1.0.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +97 -0
- package/README.md +9 -2
- package/dist/baseline/accessors.d.ts +1 -1
- package/dist/baseline/accessors.js +1 -3
- package/dist/baseline/baseline-format.d.ts +287 -0
- package/dist/baseline/baseline-format.js +12 -0
- package/dist/baseline/comparator.js +249 -11
- package/dist/baseline/converter.d.ts +15 -15
- package/dist/baseline/converter.js +46 -34
- package/dist/baseline/diff.d.ts +1 -1
- package/dist/baseline/diff.js +45 -28
- package/dist/baseline/error-analyzer.d.ts +1 -1
- package/dist/baseline/error-analyzer.js +90 -17
- package/dist/baseline/incremental-checker.js +8 -5
- package/dist/baseline/index.d.ts +2 -12
- package/dist/baseline/index.js +3 -23
- package/dist/baseline/performance-tracker.d.ts +0 -1
- package/dist/baseline/performance-tracker.js +13 -20
- package/dist/baseline/response-fingerprint.js +39 -2
- package/dist/baseline/saver.js +41 -10
- package/dist/baseline/schema-compare.d.ts +22 -0
- package/dist/baseline/schema-compare.js +259 -16
- package/dist/baseline/types.d.ts +10 -7
- package/dist/cache/response-cache.d.ts +12 -2
- package/dist/cache/response-cache.js +178 -30
- package/dist/cli/commands/check.js +100 -54
- package/dist/cli/commands/explore.js +34 -14
- package/dist/cli/index.js +13 -3
- package/dist/config/template.js +8 -7
- package/dist/config/validator.d.ts +59 -59
- package/dist/config/validator.js +245 -90
- package/dist/constants/core.d.ts +4 -0
- package/dist/constants/core.js +8 -19
- package/dist/constants/registry.d.ts +17 -0
- package/dist/constants/registry.js +18 -0
- package/dist/constants/testing.d.ts +0 -369
- package/dist/constants/testing.js +18 -456
- package/dist/constants.d.ts +1 -1
- package/dist/constants.js +1 -1
- package/dist/docs/contract.js +131 -83
- package/dist/docs/report.js +8 -5
- package/dist/interview/insights.d.ts +17 -0
- package/dist/interview/insights.js +52 -0
- package/dist/interview/interviewer.js +119 -57
- package/dist/interview/orchestrator.js +49 -22
- package/dist/interview/prompt-test-generator.d.ts +12 -0
- package/dist/interview/prompt-test-generator.js +77 -0
- package/dist/interview/resource-test-generator.d.ts +12 -0
- package/dist/interview/resource-test-generator.js +20 -0
- package/dist/interview/schema-inferrer.js +26 -4
- package/dist/interview/schema-test-generator.js +278 -31
- package/dist/interview/stateful-test-runner.d.ts +3 -0
- package/dist/interview/stateful-test-runner.js +80 -0
- package/dist/interview/types.d.ts +12 -0
- package/dist/llm/anthropic.js +49 -16
- package/dist/llm/client.d.ts +2 -0
- package/dist/llm/client.js +61 -0
- package/dist/llm/ollama.js +9 -4
- package/dist/llm/openai.js +34 -23
- package/dist/transport/base-transport.d.ts +1 -1
- package/dist/transport/http-transport.d.ts +2 -2
- package/dist/transport/http-transport.js +26 -6
- package/dist/transport/mcp-client.d.ts +18 -6
- package/dist/transport/mcp-client.js +50 -20
- package/dist/transport/sse-transport.d.ts +8 -4
- package/dist/transport/sse-transport.js +161 -69
- package/dist/transport/stdio-transport.d.ts +1 -1
- package/dist/transport/stdio-transport.js +1 -1
- package/dist/utils/timeout.d.ts +10 -2
- package/dist/utils/timeout.js +9 -5
- package/dist/version.js +1 -1
- package/dist/workflow/executor.js +18 -13
- package/dist/workflow/loader.js +4 -1
- package/dist/workflow/state-tracker.js +22 -18
- package/man/bellwether.1 +204 -0
- package/man/bellwether.1.md +148 -0
- package/package.json +6 -7
- package/schemas/bellwether-check.schema.json +185 -0
- package/schemas/bellwether-explore.schema.json +837 -0
- package/scripts/completions/bellwether.bash +10 -4
- package/scripts/completions/bellwether.zsh +55 -2
|
@@ -63,7 +63,7 @@ export class WorkflowExecutor {
|
|
|
63
63
|
emitProgress(workflow, phase, currentStep, startTime, currentStepInfo) {
|
|
64
64
|
if (!this.onProgress)
|
|
65
65
|
return;
|
|
66
|
-
const stepsFailed = this.stepResults.filter(r => !r.success).length;
|
|
66
|
+
const stepsFailed = this.stepResults.filter((r) => !r.success).length;
|
|
67
67
|
this.onProgress({
|
|
68
68
|
phase,
|
|
69
69
|
workflow,
|
|
@@ -118,7 +118,9 @@ export class WorkflowExecutor {
|
|
|
118
118
|
if (requireSuccessfulDeps) {
|
|
119
119
|
const failedDependencies = this.getFailedDependencies(step, i);
|
|
120
120
|
if (failedDependencies.length > 0) {
|
|
121
|
-
const failedStepNames = failedDependencies
|
|
121
|
+
const failedStepNames = failedDependencies
|
|
122
|
+
.map((idx) => `step ${idx + 1} (${workflow.steps[idx]?.tool ?? 'unknown'})`)
|
|
123
|
+
.join(', ');
|
|
122
124
|
this.logger.debug({
|
|
123
125
|
stepIndex: i,
|
|
124
126
|
tool: step.tool,
|
|
@@ -229,7 +231,7 @@ export class WorkflowExecutor {
|
|
|
229
231
|
workflowId: workflow.id,
|
|
230
232
|
success,
|
|
231
233
|
stepsCompleted: this.stepResults.length,
|
|
232
|
-
stepsFailed: this.stepResults.filter(r => !r.success).length,
|
|
234
|
+
stepsFailed: this.stepResults.filter((r) => !r.success).length,
|
|
233
235
|
durationMs,
|
|
234
236
|
}, 'Workflow execution complete');
|
|
235
237
|
done();
|
|
@@ -251,7 +253,7 @@ export class WorkflowExecutor {
|
|
|
251
253
|
async executeStep(step, stepIndex, workflow) {
|
|
252
254
|
const startTime = Date.now();
|
|
253
255
|
// Verify tool exists
|
|
254
|
-
const tool = this.tools.find(t => t.name === step.tool);
|
|
256
|
+
const tool = this.tools.find((t) => t.name === step.tool);
|
|
255
257
|
if (!tool) {
|
|
256
258
|
return {
|
|
257
259
|
step,
|
|
@@ -284,7 +286,8 @@ export class WorkflowExecutor {
|
|
|
284
286
|
let error;
|
|
285
287
|
const stepTimeout = this.options.stepTimeout ?? DEFAULT_OPTIONS.stepTimeout;
|
|
286
288
|
try {
|
|
287
|
-
|
|
289
|
+
const abortController = new AbortController();
|
|
290
|
+
response = await withTimeout(this.client.callTool(step.tool, resolvedArgs, { signal: abortController.signal }), stepTimeout, `Tool call '${step.tool}'`, { abortController });
|
|
288
291
|
if (response.isError) {
|
|
289
292
|
error = this.extractErrorMessage(response);
|
|
290
293
|
}
|
|
@@ -296,7 +299,7 @@ export class WorkflowExecutor {
|
|
|
296
299
|
const assertionResults = step.assertions
|
|
297
300
|
? this.runAssertions(step.assertions, response)
|
|
298
301
|
: undefined;
|
|
299
|
-
const assertionsFailed = assertionResults?.some(r => !r.passed) ?? false;
|
|
302
|
+
const assertionsFailed = assertionResults?.some((r) => !r.passed) ?? false;
|
|
300
303
|
const success = !error && !assertionsFailed;
|
|
301
304
|
// Generate analysis if requested
|
|
302
305
|
let analysis;
|
|
@@ -359,7 +362,7 @@ export class WorkflowExecutor {
|
|
|
359
362
|
if (propertyPath.startsWith('result.') || propertyPath === 'result') {
|
|
360
363
|
// Extract text content from the response
|
|
361
364
|
const content = stepResult.response.content;
|
|
362
|
-
const textContent = content.find(c => c.type === 'text' && c.text !== undefined);
|
|
365
|
+
const textContent = content.find((c) => c.type === 'text' && c.text !== undefined);
|
|
363
366
|
if (!textContent || textContent.text === undefined) {
|
|
364
367
|
throw new Error(`Step ${stepIndex} response has no text content`);
|
|
365
368
|
}
|
|
@@ -418,7 +421,7 @@ export class WorkflowExecutor {
|
|
|
418
421
|
* Run assertions against a step response.
|
|
419
422
|
*/
|
|
420
423
|
runAssertions(assertions, response) {
|
|
421
|
-
return assertions.map(assertion => this.runAssertion(assertion, response));
|
|
424
|
+
return assertions.map((assertion) => this.runAssertion(assertion, response));
|
|
422
425
|
}
|
|
423
426
|
/**
|
|
424
427
|
* Run a single assertion.
|
|
@@ -435,7 +438,7 @@ export class WorkflowExecutor {
|
|
|
435
438
|
let actualValue;
|
|
436
439
|
try {
|
|
437
440
|
// Parse the response content as JSON
|
|
438
|
-
const textContent = response.content.find(c => c.type === 'text' && c.text !== undefined);
|
|
441
|
+
const textContent = response.content.find((c) => c.type === 'text' && c.text !== undefined);
|
|
439
442
|
if (!textContent || textContent.text === undefined) {
|
|
440
443
|
throw new Error('No text content in response');
|
|
441
444
|
}
|
|
@@ -484,14 +487,16 @@ export class WorkflowExecutor {
|
|
|
484
487
|
assertion,
|
|
485
488
|
passed,
|
|
486
489
|
actualValue,
|
|
487
|
-
message: passed
|
|
490
|
+
message: passed
|
|
491
|
+
? undefined
|
|
492
|
+
: (assertion.message ?? `Assertion failed: ${assertion.condition}`),
|
|
488
493
|
};
|
|
489
494
|
}
|
|
490
495
|
/**
|
|
491
496
|
* Extract error message from a tool response.
|
|
492
497
|
*/
|
|
493
498
|
extractErrorMessage(response) {
|
|
494
|
-
const textContent = response.content.find(c => c.type === 'text');
|
|
499
|
+
const textContent = response.content.find((c) => c.type === 'text');
|
|
495
500
|
if (textContent && 'text' in textContent) {
|
|
496
501
|
return String(textContent.text);
|
|
497
502
|
}
|
|
@@ -595,7 +600,7 @@ export class WorkflowExecutor {
|
|
|
595
600
|
if (!this.llm) {
|
|
596
601
|
return success
|
|
597
602
|
? `Workflow "${workflow.name}" completed successfully with ${stepResults.length} steps.`
|
|
598
|
-
: `Workflow "${workflow.name}" failed at step ${stepResults.findIndex(r => !r.success) + 1}.`;
|
|
603
|
+
: `Workflow "${workflow.name}" failed at step ${stepResults.findIndex((r) => !r.success) + 1}.`;
|
|
599
604
|
}
|
|
600
605
|
const prompt = buildWorkflowSummaryPrompt({ workflow, stepResults, success });
|
|
601
606
|
try {
|
|
@@ -604,7 +609,7 @@ export class WorkflowExecutor {
|
|
|
604
609
|
catch {
|
|
605
610
|
return success
|
|
606
611
|
? `Workflow "${workflow.name}" completed successfully with ${stepResults.length} steps.`
|
|
607
|
-
: `Workflow "${workflow.name}" failed at step ${stepResults.findIndex(r => !r.success) + 1}.`;
|
|
612
|
+
: `Workflow "${workflow.name}" failed at step ${stepResults.findIndex((r) => !r.success) + 1}.`;
|
|
608
613
|
}
|
|
609
614
|
}
|
|
610
615
|
}
|
package/dist/workflow/loader.js
CHANGED
|
@@ -6,8 +6,10 @@ import { join } from 'path';
|
|
|
6
6
|
import { parseAllDocuments } from 'yaml';
|
|
7
7
|
import { parseYamlSecure, YAML_SECURITY_LIMITS } from '../utils/yaml-parser.js';
|
|
8
8
|
import { PATHS } from '../constants.js';
|
|
9
|
+
import { getLogger } from '../logging/logger.js';
|
|
9
10
|
/** Default file name for workflow definitions */
|
|
10
11
|
export const DEFAULT_WORKFLOWS_FILE = PATHS.DEFAULT_WORKFLOWS_FILE;
|
|
12
|
+
const logger = getLogger('workflow');
|
|
11
13
|
/**
|
|
12
14
|
* Load workflows from a YAML file.
|
|
13
15
|
* Supports both single-document and multi-document YAML (separated by ---).
|
|
@@ -56,9 +58,10 @@ export function tryLoadDefaultWorkflows(directory) {
|
|
|
56
58
|
try {
|
|
57
59
|
return loadWorkflowsFromFile(path);
|
|
58
60
|
}
|
|
59
|
-
catch {
|
|
61
|
+
catch (error) {
|
|
60
62
|
// If the file exists but is invalid, return null rather than throwing
|
|
61
63
|
// This allows the interview to proceed without workflows
|
|
64
|
+
logger.warn({ path, error: error instanceof Error ? error.message : String(error) }, 'Failed to load default workflow file');
|
|
62
65
|
return null;
|
|
63
66
|
}
|
|
64
67
|
}
|
|
@@ -74,7 +74,7 @@ export class StateTracker {
|
|
|
74
74
|
}
|
|
75
75
|
// Use specified probe tools if provided
|
|
76
76
|
if (this.options.probeTools?.length) {
|
|
77
|
-
this.probeTools = this.options.probeTools.filter(name => this.tools.some(t => t.name === name));
|
|
77
|
+
this.probeTools = this.options.probeTools.filter((name) => this.tools.some((t) => t.name === name));
|
|
78
78
|
}
|
|
79
79
|
this.logger.debug({
|
|
80
80
|
toolCount: this.tools.length,
|
|
@@ -89,9 +89,9 @@ export class StateTracker {
|
|
|
89
89
|
const name = tool.name;
|
|
90
90
|
const description = tool.description ?? '';
|
|
91
91
|
const combined = `${name} ${description}`;
|
|
92
|
-
const isReader = READER_PATTERNS.some(p => p.test(combined));
|
|
93
|
-
const isWriter = WRITER_PATTERNS.some(p => p.test(combined));
|
|
94
|
-
const isProbe = PROBE_PATTERNS.some(p => p.test(combined));
|
|
92
|
+
const isReader = READER_PATTERNS.some((p) => p.test(combined));
|
|
93
|
+
const isWriter = WRITER_PATTERNS.some((p) => p.test(combined));
|
|
94
|
+
const isProbe = PROBE_PATTERNS.some((p) => p.test(combined));
|
|
95
95
|
let role;
|
|
96
96
|
let confidence;
|
|
97
97
|
if (isReader && isWriter) {
|
|
@@ -194,8 +194,9 @@ export class StateTracker {
|
|
|
194
194
|
break;
|
|
195
195
|
}
|
|
196
196
|
try {
|
|
197
|
+
const abortController = new AbortController();
|
|
197
198
|
// Apply timeout to individual probe tool call
|
|
198
|
-
const result = await withTimeout(this.client.callTool(probeName, {}), this.probeTimeout, `Probe tool '${probeName}'
|
|
199
|
+
const result = await withTimeout(this.client.callTool(probeName, {}, { signal: abortController.signal }), this.probeTimeout, `Probe tool '${probeName}'`, { abortController });
|
|
199
200
|
const content = this.extractContent(result);
|
|
200
201
|
stateData[probeName] = content;
|
|
201
202
|
successCount++;
|
|
@@ -240,7 +241,7 @@ export class StateTracker {
|
|
|
240
241
|
* Extract content from a tool call result.
|
|
241
242
|
*/
|
|
242
243
|
extractContent(result) {
|
|
243
|
-
const textContent = result.content.find(c => c.type === 'text' && c.text !== undefined);
|
|
244
|
+
const textContent = result.content.find((c) => c.type === 'text' && c.text !== undefined);
|
|
244
245
|
if (!textContent || textContent.text === undefined) {
|
|
245
246
|
return null;
|
|
246
247
|
}
|
|
@@ -256,7 +257,10 @@ export class StateTracker {
|
|
|
256
257
|
*/
|
|
257
258
|
hashState(data) {
|
|
258
259
|
const json = JSON.stringify(data, null, 0);
|
|
259
|
-
return createHash('sha256')
|
|
260
|
+
return createHash('sha256')
|
|
261
|
+
.update(json)
|
|
262
|
+
.digest('hex')
|
|
263
|
+
.slice(0, DISPLAY_LIMITS.HASH_DISPLAY_LENGTH);
|
|
260
264
|
}
|
|
261
265
|
/**
|
|
262
266
|
* Compare two snapshots and identify changes.
|
|
@@ -351,7 +355,7 @@ export class StateTracker {
|
|
|
351
355
|
for (const stateType of stateTypes) {
|
|
352
356
|
const writers = writerSteps.get(stateType) ?? [];
|
|
353
357
|
// Find most recent writer for this state type
|
|
354
|
-
const recentWriters = writers.filter(w => w < i);
|
|
358
|
+
const recentWriters = writers.filter((w) => w < i);
|
|
355
359
|
if (recentWriters.length > 0) {
|
|
356
360
|
const producerStep = recentWriters[recentWriters.length - 1];
|
|
357
361
|
const producerTool = stepResults[producerStep].step.tool;
|
|
@@ -372,9 +376,9 @@ export class StateTracker {
|
|
|
372
376
|
* Verify dependencies using state snapshots.
|
|
373
377
|
*/
|
|
374
378
|
verifyDependencies(dependencies, _snapshots, changes) {
|
|
375
|
-
return dependencies.map(dep => {
|
|
379
|
+
return dependencies.map((dep) => {
|
|
376
380
|
// Check if the producer step caused any changes
|
|
377
|
-
const producerChanges = changes.filter(c => c.causedByStep === dep.producerStep);
|
|
381
|
+
const producerChanges = changes.filter((c) => c.causedByStep === dep.producerStep);
|
|
378
382
|
const verified = producerChanges.length > 0;
|
|
379
383
|
return {
|
|
380
384
|
...dep,
|
|
@@ -388,19 +392,19 @@ export class StateTracker {
|
|
|
388
392
|
async generateSummary(tracking) {
|
|
389
393
|
const parts = [];
|
|
390
394
|
// Summarize tool roles
|
|
391
|
-
const writers = tracking.toolRoles.filter(t => t.role === 'writer' || t.role === 'both');
|
|
392
|
-
const readers = tracking.toolRoles.filter(t => t.role === 'reader' || t.role === 'both');
|
|
395
|
+
const writers = tracking.toolRoles.filter((t) => t.role === 'writer' || t.role === 'both');
|
|
396
|
+
const readers = tracking.toolRoles.filter((t) => t.role === 'reader' || t.role === 'both');
|
|
393
397
|
if (writers.length > 0) {
|
|
394
|
-
parts.push(`State writers: ${writers.map(t => t.tool).join(', ')}`);
|
|
398
|
+
parts.push(`State writers: ${writers.map((t) => t.tool).join(', ')}`);
|
|
395
399
|
}
|
|
396
400
|
if (readers.length > 0) {
|
|
397
|
-
parts.push(`State readers: ${readers.map(t => t.tool).join(', ')}`);
|
|
401
|
+
parts.push(`State readers: ${readers.map((t) => t.tool).join(', ')}`);
|
|
398
402
|
}
|
|
399
403
|
// Summarize changes
|
|
400
404
|
if (tracking.changes.length > 0) {
|
|
401
|
-
const created = tracking.changes.filter(c => c.type === 'created').length;
|
|
402
|
-
const modified = tracking.changes.filter(c => c.type === 'modified').length;
|
|
403
|
-
const deleted = tracking.changes.filter(c => c.type === 'deleted').length;
|
|
405
|
+
const created = tracking.changes.filter((c) => c.type === 'created').length;
|
|
406
|
+
const modified = tracking.changes.filter((c) => c.type === 'modified').length;
|
|
407
|
+
const deleted = tracking.changes.filter((c) => c.type === 'deleted').length;
|
|
404
408
|
const changeParts = [];
|
|
405
409
|
if (created > 0)
|
|
406
410
|
changeParts.push(`${created} created`);
|
|
@@ -415,7 +419,7 @@ export class StateTracker {
|
|
|
415
419
|
}
|
|
416
420
|
// Summarize dependencies
|
|
417
421
|
if (tracking.dependencies.length > 0) {
|
|
418
|
-
const verified = tracking.dependencies.filter(d => d.verified).length;
|
|
422
|
+
const verified = tracking.dependencies.filter((d) => d.verified).length;
|
|
419
423
|
parts.push(`Dependencies: ${tracking.dependencies.length} inferred (${verified} verified)`);
|
|
420
424
|
}
|
|
421
425
|
return `${parts.join('. ')}.`;
|
package/man/bellwether.1
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
.TH "BELLWETHER" "1" "2026\-02\-04" "Bellwether 2.0.0" "User Commands"
|
|
2
|
+
.SH NAME
|
|
3
|
+
.PP
|
|
4
|
+
bellwether \[em] MCP server testing and validation tool
|
|
5
|
+
.SH SYNOPSIS
|
|
6
|
+
.PP
|
|
7
|
+
\f[B]bellwether\f[] [OPTIONS] COMMAND [ARGS...]
|
|
8
|
+
.PP
|
|
9
|
+
\f[B]bellwether\f[] \f[B]\-\-version\f[]
|
|
10
|
+
.PP
|
|
11
|
+
\f[B]bellwether\f[] \f[B]\-\-help\f[]
|
|
12
|
+
.SH DESCRIPTION
|
|
13
|
+
.PP
|
|
14
|
+
Bellwether is an open\-source MCP (Model Context Protocol) testing tool
|
|
15
|
+
that provides structural drift detection and behavioral documentation
|
|
16
|
+
for MCP servers.
|
|
17
|
+
.SH COMMANDS
|
|
18
|
+
.TP
|
|
19
|
+
.B \f[B]check\f[] [\f[I]options\f[]] [server\-command]
|
|
20
|
+
Schema validation and drift detection (free, fast, deterministic)
|
|
21
|
+
.RS
|
|
22
|
+
.RE
|
|
23
|
+
.TP
|
|
24
|
+
.B \f[B]explore\f[] [\f[I]options\f[]] [server\-command]
|
|
25
|
+
LLM\-powered behavioral exploration and documentation
|
|
26
|
+
.RS
|
|
27
|
+
.RE
|
|
28
|
+
.TP
|
|
29
|
+
.B \f[B]discover\f[] [\f[I]options\f[]] [server\-command]
|
|
30
|
+
Discover MCP server capabilities (tools, prompts, resources)
|
|
31
|
+
.RS
|
|
32
|
+
.RE
|
|
33
|
+
.TP
|
|
34
|
+
.B \f[B]watch\f[] [\f[I]options\f[]]
|
|
35
|
+
Watch for MCP server changes and auto\-check
|
|
36
|
+
.RS
|
|
37
|
+
.RE
|
|
38
|
+
.TP
|
|
39
|
+
.B \f[B]init\f[] [\f[I]options\f[]] [server\-command]
|
|
40
|
+
Initialize a bellwether.yaml configuration file
|
|
41
|
+
.RS
|
|
42
|
+
.RE
|
|
43
|
+
.TP
|
|
44
|
+
.B \f[B]auth\f[] \f[I]subcommand\f[] [\f[I]options\f[]]
|
|
45
|
+
Manage LLM provider API keys
|
|
46
|
+
.RS
|
|
47
|
+
.RE
|
|
48
|
+
.TP
|
|
49
|
+
.B \f[B]baseline\f[] \f[I]subcommand\f[] [\f[I]options\f[]]
|
|
50
|
+
Manage baselines for drift detection
|
|
51
|
+
.RS
|
|
52
|
+
.RE
|
|
53
|
+
.TP
|
|
54
|
+
.B \f[B]golden\f[] \f[I]subcommand\f[] [\f[I]options\f[]]
|
|
55
|
+
Manage golden outputs for validation
|
|
56
|
+
.RS
|
|
57
|
+
.RE
|
|
58
|
+
.TP
|
|
59
|
+
.B \f[B]registry\f[] [\f[I]options\f[]] \f[I]search\f[]
|
|
60
|
+
Search the MCP Registry for servers
|
|
61
|
+
.RS
|
|
62
|
+
.RE
|
|
63
|
+
.TP
|
|
64
|
+
.B \f[B]contract\f[] \f[I]subcommand\f[] [\f[I]options\f[]]
|
|
65
|
+
Validate MCP servers against contracts
|
|
66
|
+
.RS
|
|
67
|
+
.RE
|
|
68
|
+
.TP
|
|
69
|
+
.B \f[B]validate\-config\f[] [\f[I]options\f[]]
|
|
70
|
+
Validate bellwether.yaml configuration
|
|
71
|
+
.RS
|
|
72
|
+
.RE
|
|
73
|
+
.SH GLOBAL OPTIONS
|
|
74
|
+
.TP
|
|
75
|
+
.B \f[B]\-h\f[], \f[B]\-\-help\f[]
|
|
76
|
+
Show help message and exit
|
|
77
|
+
.RS
|
|
78
|
+
.RE
|
|
79
|
+
.TP
|
|
80
|
+
.B \f[B]\-\-version\f[]
|
|
81
|
+
Show version information and exit
|
|
82
|
+
.RS
|
|
83
|
+
.RE
|
|
84
|
+
.TP
|
|
85
|
+
.B \f[B]\-\-log\-level\f[] \f[I]LEVEL\f[]
|
|
86
|
+
Set log level: debug, info, warn, error, silent
|
|
87
|
+
.RS
|
|
88
|
+
.RE
|
|
89
|
+
.TP
|
|
90
|
+
.B \f[B]\-\-log\-file\f[] \f[I]PATH\f[]
|
|
91
|
+
Write logs to file instead of stderr
|
|
92
|
+
.RS
|
|
93
|
+
.RE
|
|
94
|
+
.SH EXAMPLES
|
|
95
|
+
.PP
|
|
96
|
+
Initialize configuration:
|
|
97
|
+
.IP
|
|
98
|
+
.nf
|
|
99
|
+
\f[C]
|
|
100
|
+
bellwether\ init\ npx\ \@modelcontextprotocol/server\-filesystem
|
|
101
|
+
\f[]
|
|
102
|
+
.fi
|
|
103
|
+
.PP
|
|
104
|
+
Run drift detection:
|
|
105
|
+
.IP
|
|
106
|
+
.nf
|
|
107
|
+
\f[C]
|
|
108
|
+
bellwether\ check
|
|
109
|
+
\f[]
|
|
110
|
+
.fi
|
|
111
|
+
.PP
|
|
112
|
+
Save baseline:
|
|
113
|
+
.IP
|
|
114
|
+
.nf
|
|
115
|
+
\f[C]
|
|
116
|
+
bellwether\ baseline\ save
|
|
117
|
+
\f[]
|
|
118
|
+
.fi
|
|
119
|
+
.PP
|
|
120
|
+
Explore with LLM:
|
|
121
|
+
.IP
|
|
122
|
+
.nf
|
|
123
|
+
\f[C]
|
|
124
|
+
bellwether\ explore
|
|
125
|
+
\f[]
|
|
126
|
+
.fi
|
|
127
|
+
.SH FILES
|
|
128
|
+
.TP
|
|
129
|
+
.B \f[I]bellwether.yaml\f[]
|
|
130
|
+
Configuration file for the project
|
|
131
|
+
.RS
|
|
132
|
+
.RE
|
|
133
|
+
.TP
|
|
134
|
+
.B \f[I]bellwether\-baseline.json\f[]
|
|
135
|
+
Saved baseline for drift detection
|
|
136
|
+
.RS
|
|
137
|
+
.RE
|
|
138
|
+
.TP
|
|
139
|
+
.B \f[I]CONTRACT.md\f[]
|
|
140
|
+
Generated contract documentation
|
|
141
|
+
.RS
|
|
142
|
+
.RE
|
|
143
|
+
.TP
|
|
144
|
+
.B \f[I]AGENTS.md\f[]
|
|
145
|
+
Generated behavioral documentation
|
|
146
|
+
.RS
|
|
147
|
+
.RE
|
|
148
|
+
.SH ENVIRONMENT
|
|
149
|
+
.TP
|
|
150
|
+
.B \f[I]OPENAI_API_KEY\f[]
|
|
151
|
+
API key for OpenAI (explore mode only)
|
|
152
|
+
.RS
|
|
153
|
+
.RE
|
|
154
|
+
.TP
|
|
155
|
+
.B \f[I]ANTHROPIC_API_KEY\f[]
|
|
156
|
+
API key for Anthropic (explore mode only)
|
|
157
|
+
.RS
|
|
158
|
+
.RE
|
|
159
|
+
.TP
|
|
160
|
+
.B \f[I]OLLAMA_BASE_URL\f[]
|
|
161
|
+
Ollama URL (default: http://localhost:11434)
|
|
162
|
+
.RS
|
|
163
|
+
.RE
|
|
164
|
+
.SH EXIT STATUS
|
|
165
|
+
.TP
|
|
166
|
+
.B \f[B]0\f[]
|
|
167
|
+
Success, no changes detected
|
|
168
|
+
.RS
|
|
169
|
+
.RE
|
|
170
|
+
.TP
|
|
171
|
+
.B \f[B]1\f[]
|
|
172
|
+
Info\-level changes only
|
|
173
|
+
.RS
|
|
174
|
+
.RE
|
|
175
|
+
.TP
|
|
176
|
+
.B \f[B]2\f[]
|
|
177
|
+
Warning\-level changes
|
|
178
|
+
.RS
|
|
179
|
+
.RE
|
|
180
|
+
.TP
|
|
181
|
+
.B \f[B]3\f[]
|
|
182
|
+
Breaking changes detected
|
|
183
|
+
.RS
|
|
184
|
+
.RE
|
|
185
|
+
.TP
|
|
186
|
+
.B \f[B]4\f[]
|
|
187
|
+
Runtime error
|
|
188
|
+
.RS
|
|
189
|
+
.RE
|
|
190
|
+
.TP
|
|
191
|
+
.B \f[B]5\f[]
|
|
192
|
+
Low confidence metrics
|
|
193
|
+
.RS
|
|
194
|
+
.RE
|
|
195
|
+
.SH SEE ALSO
|
|
196
|
+
.PP
|
|
197
|
+
Project homepage: <https://github.com/dotsetlabs/bellwether>
|
|
198
|
+
.PP
|
|
199
|
+
Documentation: <https://docs.bellwether.sh>
|
|
200
|
+
.PP
|
|
201
|
+
MCP Specification: <https://modelcontextprotocol.io>
|
|
202
|
+
.SH AUTHORS
|
|
203
|
+
.PP
|
|
204
|
+
Dotset Labs LLC <hello@dotsetlabs.com>
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: BELLWETHER
|
|
3
|
+
section: 1
|
|
4
|
+
header: User Commands
|
|
5
|
+
footer: Bellwether 2.0.0
|
|
6
|
+
date: 2026-02-04
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# NAME
|
|
10
|
+
|
|
11
|
+
bellwether — MCP server testing and validation tool
|
|
12
|
+
|
|
13
|
+
# SYNOPSIS
|
|
14
|
+
|
|
15
|
+
**bellwether** [OPTIONS] COMMAND [ARGS...]
|
|
16
|
+
|
|
17
|
+
**bellwether** **--version**
|
|
18
|
+
|
|
19
|
+
**bellwether** **--help**
|
|
20
|
+
|
|
21
|
+
# DESCRIPTION
|
|
22
|
+
|
|
23
|
+
Bellwether is an open-source MCP (Model Context Protocol) testing tool that provides
|
|
24
|
+
structural drift detection and behavioral documentation for MCP servers.
|
|
25
|
+
|
|
26
|
+
# COMMANDS
|
|
27
|
+
|
|
28
|
+
**check** [*options*] [server-command]
|
|
29
|
+
: Schema validation and drift detection (free, fast, deterministic)
|
|
30
|
+
|
|
31
|
+
**explore** [*options*] [server-command]
|
|
32
|
+
: LLM-powered behavioral exploration and documentation
|
|
33
|
+
|
|
34
|
+
**discover** [*options*] [server-command]
|
|
35
|
+
: Discover MCP server capabilities (tools, prompts, resources)
|
|
36
|
+
|
|
37
|
+
**watch** [*options*]
|
|
38
|
+
: Watch for MCP server changes and auto-check
|
|
39
|
+
|
|
40
|
+
**init** [*options*] [server-command]
|
|
41
|
+
: Initialize a bellwether.yaml configuration file
|
|
42
|
+
|
|
43
|
+
**auth** *subcommand* [*options*]
|
|
44
|
+
: Manage LLM provider API keys
|
|
45
|
+
|
|
46
|
+
**baseline** *subcommand* [*options*]
|
|
47
|
+
: Manage baselines for drift detection
|
|
48
|
+
|
|
49
|
+
**golden** *subcommand* [*options*]
|
|
50
|
+
: Manage golden outputs for validation
|
|
51
|
+
|
|
52
|
+
**registry** [*options*] *search*
|
|
53
|
+
: Search the MCP Registry for servers
|
|
54
|
+
|
|
55
|
+
**contract** *subcommand* [*options*]
|
|
56
|
+
: Validate MCP servers against contracts
|
|
57
|
+
|
|
58
|
+
**validate-config** [*options*]
|
|
59
|
+
: Validate bellwether.yaml configuration
|
|
60
|
+
|
|
61
|
+
# GLOBAL OPTIONS
|
|
62
|
+
|
|
63
|
+
**-h**, **--help**
|
|
64
|
+
: Show help message and exit
|
|
65
|
+
|
|
66
|
+
**--version**
|
|
67
|
+
: Show version information and exit
|
|
68
|
+
|
|
69
|
+
**--log-level** *LEVEL*
|
|
70
|
+
: Set log level: debug, info, warn, error, silent
|
|
71
|
+
|
|
72
|
+
**--log-file** *PATH*
|
|
73
|
+
: Write logs to file instead of stderr
|
|
74
|
+
|
|
75
|
+
# EXAMPLES
|
|
76
|
+
|
|
77
|
+
Initialize configuration:
|
|
78
|
+
|
|
79
|
+
bellwether init npx @modelcontextprotocol/server-filesystem
|
|
80
|
+
|
|
81
|
+
Run drift detection:
|
|
82
|
+
|
|
83
|
+
bellwether check
|
|
84
|
+
|
|
85
|
+
Save baseline:
|
|
86
|
+
|
|
87
|
+
bellwether baseline save
|
|
88
|
+
|
|
89
|
+
Explore with LLM:
|
|
90
|
+
|
|
91
|
+
bellwether explore
|
|
92
|
+
|
|
93
|
+
# FILES
|
|
94
|
+
|
|
95
|
+
*bellwether.yaml*
|
|
96
|
+
: Configuration file for the project
|
|
97
|
+
|
|
98
|
+
*bellwether-baseline.json*
|
|
99
|
+
: Saved baseline for drift detection
|
|
100
|
+
|
|
101
|
+
*CONTRACT.md*
|
|
102
|
+
: Generated contract documentation
|
|
103
|
+
|
|
104
|
+
*AGENTS.md*
|
|
105
|
+
: Generated behavioral documentation
|
|
106
|
+
|
|
107
|
+
# ENVIRONMENT
|
|
108
|
+
|
|
109
|
+
*OPENAI_API_KEY*
|
|
110
|
+
: API key for OpenAI (explore mode only)
|
|
111
|
+
|
|
112
|
+
*ANTHROPIC_API_KEY*
|
|
113
|
+
: API key for Anthropic (explore mode only)
|
|
114
|
+
|
|
115
|
+
*OLLAMA_BASE_URL*
|
|
116
|
+
: Ollama URL (default: http://localhost:11434)
|
|
117
|
+
|
|
118
|
+
# EXIT STATUS
|
|
119
|
+
|
|
120
|
+
**0**
|
|
121
|
+
: Success, no changes detected
|
|
122
|
+
|
|
123
|
+
**1**
|
|
124
|
+
: Info-level changes only
|
|
125
|
+
|
|
126
|
+
**2**
|
|
127
|
+
: Warning-level changes
|
|
128
|
+
|
|
129
|
+
**3**
|
|
130
|
+
: Breaking changes detected
|
|
131
|
+
|
|
132
|
+
**4**
|
|
133
|
+
: Runtime error
|
|
134
|
+
|
|
135
|
+
**5**
|
|
136
|
+
: Low confidence metrics
|
|
137
|
+
|
|
138
|
+
# SEE ALSO
|
|
139
|
+
|
|
140
|
+
Project homepage: <https://github.com/dotsetlabs/bellwether>
|
|
141
|
+
|
|
142
|
+
Documentation: <https://docs.bellwether.sh>
|
|
143
|
+
|
|
144
|
+
MCP Specification: <https://modelcontextprotocol.io>
|
|
145
|
+
|
|
146
|
+
# AUTHORS
|
|
147
|
+
|
|
148
|
+
Dotset Labs LLC <hello@dotsetlabs.com>
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@dotsetlabs/bellwether",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.0.0",
|
|
4
4
|
"description": "The open-source MCP testing tool. Structural drift detection and behavioral documentation for Model Context Protocol servers.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -33,10 +33,11 @@
|
|
|
33
33
|
"format": "prettier --write \"src/**/*.ts\"",
|
|
34
34
|
"format:check": "prettier --check \"src/**/*.ts\"",
|
|
35
35
|
"clean": "rm -rf dist",
|
|
36
|
-
"docs:generate": "
|
|
36
|
+
"docs:generate": "npm --prefix website run build",
|
|
37
|
+
"docs:dev": "npm --prefix website run start",
|
|
37
38
|
"man:generate": "./scripts/generate-manpage.sh",
|
|
38
39
|
"prepare": "husky install || true",
|
|
39
|
-
"prepublishOnly": "npm run build"
|
|
40
|
+
"prepublishOnly": "npm run build && npm run man:generate"
|
|
40
41
|
},
|
|
41
42
|
"keywords": [
|
|
42
43
|
"mcp",
|
|
@@ -59,8 +60,7 @@
|
|
|
59
60
|
"license": "MIT",
|
|
60
61
|
"repository": {
|
|
61
62
|
"type": "git",
|
|
62
|
-
"url": "https://github.com/dotsetlabs/bellwether"
|
|
63
|
-
"directory": "cli"
|
|
63
|
+
"url": "https://github.com/dotsetlabs/bellwether"
|
|
64
64
|
},
|
|
65
65
|
"funding": {
|
|
66
66
|
"type": "github",
|
|
@@ -97,10 +97,9 @@
|
|
|
97
97
|
"@typescript-eslint/parser": "^6.21.0",
|
|
98
98
|
"eslint": "^8.57.1",
|
|
99
99
|
"husky": "^9.1.0",
|
|
100
|
-
"lint-staged": "^
|
|
100
|
+
"lint-staged": "^16.2.7",
|
|
101
101
|
"prettier": "^3.3.0",
|
|
102
102
|
"tsx": "^4.21.0",
|
|
103
|
-
"typedoc": "^0.28.16",
|
|
104
103
|
"typescript": "^5.3.0",
|
|
105
104
|
"vitest": "^4.0.17"
|
|
106
105
|
},
|