@dotsetlabs/bellwether 1.0.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/CHANGELOG.md +97 -0
  2. package/README.md +9 -2
  3. package/dist/baseline/accessors.d.ts +1 -1
  4. package/dist/baseline/accessors.js +1 -3
  5. package/dist/baseline/baseline-format.d.ts +287 -0
  6. package/dist/baseline/baseline-format.js +12 -0
  7. package/dist/baseline/comparator.js +249 -11
  8. package/dist/baseline/converter.d.ts +15 -15
  9. package/dist/baseline/converter.js +46 -34
  10. package/dist/baseline/diff.d.ts +1 -1
  11. package/dist/baseline/diff.js +45 -28
  12. package/dist/baseline/error-analyzer.d.ts +1 -1
  13. package/dist/baseline/error-analyzer.js +90 -17
  14. package/dist/baseline/incremental-checker.js +8 -5
  15. package/dist/baseline/index.d.ts +2 -12
  16. package/dist/baseline/index.js +3 -23
  17. package/dist/baseline/performance-tracker.d.ts +0 -1
  18. package/dist/baseline/performance-tracker.js +13 -20
  19. package/dist/baseline/response-fingerprint.js +39 -2
  20. package/dist/baseline/saver.js +41 -10
  21. package/dist/baseline/schema-compare.d.ts +22 -0
  22. package/dist/baseline/schema-compare.js +259 -16
  23. package/dist/baseline/types.d.ts +10 -7
  24. package/dist/cache/response-cache.d.ts +12 -2
  25. package/dist/cache/response-cache.js +178 -30
  26. package/dist/cli/commands/check.js +100 -54
  27. package/dist/cli/commands/explore.js +34 -14
  28. package/dist/cli/index.js +13 -3
  29. package/dist/config/template.js +8 -7
  30. package/dist/config/validator.d.ts +59 -59
  31. package/dist/config/validator.js +245 -90
  32. package/dist/constants/core.d.ts +4 -0
  33. package/dist/constants/core.js +8 -19
  34. package/dist/constants/registry.d.ts +17 -0
  35. package/dist/constants/registry.js +18 -0
  36. package/dist/constants/testing.d.ts +0 -369
  37. package/dist/constants/testing.js +18 -456
  38. package/dist/constants.d.ts +1 -1
  39. package/dist/constants.js +1 -1
  40. package/dist/docs/contract.js +131 -83
  41. package/dist/docs/report.js +8 -5
  42. package/dist/interview/insights.d.ts +17 -0
  43. package/dist/interview/insights.js +52 -0
  44. package/dist/interview/interviewer.js +119 -57
  45. package/dist/interview/orchestrator.js +49 -22
  46. package/dist/interview/prompt-test-generator.d.ts +12 -0
  47. package/dist/interview/prompt-test-generator.js +77 -0
  48. package/dist/interview/resource-test-generator.d.ts +12 -0
  49. package/dist/interview/resource-test-generator.js +20 -0
  50. package/dist/interview/schema-inferrer.js +26 -4
  51. package/dist/interview/schema-test-generator.js +278 -31
  52. package/dist/interview/stateful-test-runner.d.ts +3 -0
  53. package/dist/interview/stateful-test-runner.js +80 -0
  54. package/dist/interview/types.d.ts +12 -0
  55. package/dist/llm/anthropic.js +49 -16
  56. package/dist/llm/client.d.ts +2 -0
  57. package/dist/llm/client.js +61 -0
  58. package/dist/llm/ollama.js +9 -4
  59. package/dist/llm/openai.js +34 -23
  60. package/dist/transport/base-transport.d.ts +1 -1
  61. package/dist/transport/http-transport.d.ts +2 -2
  62. package/dist/transport/http-transport.js +26 -6
  63. package/dist/transport/mcp-client.d.ts +18 -6
  64. package/dist/transport/mcp-client.js +50 -20
  65. package/dist/transport/sse-transport.d.ts +8 -4
  66. package/dist/transport/sse-transport.js +161 -69
  67. package/dist/transport/stdio-transport.d.ts +1 -1
  68. package/dist/transport/stdio-transport.js +1 -1
  69. package/dist/utils/timeout.d.ts +10 -2
  70. package/dist/utils/timeout.js +9 -5
  71. package/dist/version.js +1 -1
  72. package/dist/workflow/executor.js +18 -13
  73. package/dist/workflow/loader.js +4 -1
  74. package/dist/workflow/state-tracker.js +22 -18
  75. package/man/bellwether.1 +204 -0
  76. package/man/bellwether.1.md +148 -0
  77. package/package.json +6 -7
  78. package/schemas/bellwether-check.schema.json +185 -0
  79. package/schemas/bellwether-explore.schema.json +837 -0
  80. package/scripts/completions/bellwether.bash +10 -4
  81. package/scripts/completions/bellwether.zsh +55 -2
@@ -63,7 +63,7 @@ export class WorkflowExecutor {
63
63
  emitProgress(workflow, phase, currentStep, startTime, currentStepInfo) {
64
64
  if (!this.onProgress)
65
65
  return;
66
- const stepsFailed = this.stepResults.filter(r => !r.success).length;
66
+ const stepsFailed = this.stepResults.filter((r) => !r.success).length;
67
67
  this.onProgress({
68
68
  phase,
69
69
  workflow,
@@ -118,7 +118,9 @@ export class WorkflowExecutor {
118
118
  if (requireSuccessfulDeps) {
119
119
  const failedDependencies = this.getFailedDependencies(step, i);
120
120
  if (failedDependencies.length > 0) {
121
- const failedStepNames = failedDependencies.map((idx) => `step ${idx + 1} (${workflow.steps[idx]?.tool ?? 'unknown'})`).join(', ');
121
+ const failedStepNames = failedDependencies
122
+ .map((idx) => `step ${idx + 1} (${workflow.steps[idx]?.tool ?? 'unknown'})`)
123
+ .join(', ');
122
124
  this.logger.debug({
123
125
  stepIndex: i,
124
126
  tool: step.tool,
@@ -229,7 +231,7 @@ export class WorkflowExecutor {
229
231
  workflowId: workflow.id,
230
232
  success,
231
233
  stepsCompleted: this.stepResults.length,
232
- stepsFailed: this.stepResults.filter(r => !r.success).length,
234
+ stepsFailed: this.stepResults.filter((r) => !r.success).length,
233
235
  durationMs,
234
236
  }, 'Workflow execution complete');
235
237
  done();
@@ -251,7 +253,7 @@ export class WorkflowExecutor {
251
253
  async executeStep(step, stepIndex, workflow) {
252
254
  const startTime = Date.now();
253
255
  // Verify tool exists
254
- const tool = this.tools.find(t => t.name === step.tool);
256
+ const tool = this.tools.find((t) => t.name === step.tool);
255
257
  if (!tool) {
256
258
  return {
257
259
  step,
@@ -284,7 +286,8 @@ export class WorkflowExecutor {
284
286
  let error;
285
287
  const stepTimeout = this.options.stepTimeout ?? DEFAULT_OPTIONS.stepTimeout;
286
288
  try {
287
- response = await withTimeout(this.client.callTool(step.tool, resolvedArgs), stepTimeout, `Tool call '${step.tool}'`);
289
+ const abortController = new AbortController();
290
+ response = await withTimeout(this.client.callTool(step.tool, resolvedArgs, { signal: abortController.signal }), stepTimeout, `Tool call '${step.tool}'`, { abortController });
288
291
  if (response.isError) {
289
292
  error = this.extractErrorMessage(response);
290
293
  }
@@ -296,7 +299,7 @@ export class WorkflowExecutor {
296
299
  const assertionResults = step.assertions
297
300
  ? this.runAssertions(step.assertions, response)
298
301
  : undefined;
299
- const assertionsFailed = assertionResults?.some(r => !r.passed) ?? false;
302
+ const assertionsFailed = assertionResults?.some((r) => !r.passed) ?? false;
300
303
  const success = !error && !assertionsFailed;
301
304
  // Generate analysis if requested
302
305
  let analysis;
@@ -359,7 +362,7 @@ export class WorkflowExecutor {
359
362
  if (propertyPath.startsWith('result.') || propertyPath === 'result') {
360
363
  // Extract text content from the response
361
364
  const content = stepResult.response.content;
362
- const textContent = content.find(c => c.type === 'text' && c.text !== undefined);
365
+ const textContent = content.find((c) => c.type === 'text' && c.text !== undefined);
363
366
  if (!textContent || textContent.text === undefined) {
364
367
  throw new Error(`Step ${stepIndex} response has no text content`);
365
368
  }
@@ -418,7 +421,7 @@ export class WorkflowExecutor {
418
421
  * Run assertions against a step response.
419
422
  */
420
423
  runAssertions(assertions, response) {
421
- return assertions.map(assertion => this.runAssertion(assertion, response));
424
+ return assertions.map((assertion) => this.runAssertion(assertion, response));
422
425
  }
423
426
  /**
424
427
  * Run a single assertion.
@@ -435,7 +438,7 @@ export class WorkflowExecutor {
435
438
  let actualValue;
436
439
  try {
437
440
  // Parse the response content as JSON
438
- const textContent = response.content.find(c => c.type === 'text' && c.text !== undefined);
441
+ const textContent = response.content.find((c) => c.type === 'text' && c.text !== undefined);
439
442
  if (!textContent || textContent.text === undefined) {
440
443
  throw new Error('No text content in response');
441
444
  }
@@ -484,14 +487,16 @@ export class WorkflowExecutor {
484
487
  assertion,
485
488
  passed,
486
489
  actualValue,
487
- message: passed ? undefined : (assertion.message ?? `Assertion failed: ${assertion.condition}`),
490
+ message: passed
491
+ ? undefined
492
+ : (assertion.message ?? `Assertion failed: ${assertion.condition}`),
488
493
  };
489
494
  }
490
495
  /**
491
496
  * Extract error message from a tool response.
492
497
  */
493
498
  extractErrorMessage(response) {
494
- const textContent = response.content.find(c => c.type === 'text');
499
+ const textContent = response.content.find((c) => c.type === 'text');
495
500
  if (textContent && 'text' in textContent) {
496
501
  return String(textContent.text);
497
502
  }
@@ -595,7 +600,7 @@ export class WorkflowExecutor {
595
600
  if (!this.llm) {
596
601
  return success
597
602
  ? `Workflow "${workflow.name}" completed successfully with ${stepResults.length} steps.`
598
- : `Workflow "${workflow.name}" failed at step ${stepResults.findIndex(r => !r.success) + 1}.`;
603
+ : `Workflow "${workflow.name}" failed at step ${stepResults.findIndex((r) => !r.success) + 1}.`;
599
604
  }
600
605
  const prompt = buildWorkflowSummaryPrompt({ workflow, stepResults, success });
601
606
  try {
@@ -604,7 +609,7 @@ export class WorkflowExecutor {
604
609
  catch {
605
610
  return success
606
611
  ? `Workflow "${workflow.name}" completed successfully with ${stepResults.length} steps.`
607
- : `Workflow "${workflow.name}" failed at step ${stepResults.findIndex(r => !r.success) + 1}.`;
612
+ : `Workflow "${workflow.name}" failed at step ${stepResults.findIndex((r) => !r.success) + 1}.`;
608
613
  }
609
614
  }
610
615
  }
@@ -6,8 +6,10 @@ import { join } from 'path';
6
6
  import { parseAllDocuments } from 'yaml';
7
7
  import { parseYamlSecure, YAML_SECURITY_LIMITS } from '../utils/yaml-parser.js';
8
8
  import { PATHS } from '../constants.js';
9
+ import { getLogger } from '../logging/logger.js';
9
10
  /** Default file name for workflow definitions */
10
11
  export const DEFAULT_WORKFLOWS_FILE = PATHS.DEFAULT_WORKFLOWS_FILE;
12
+ const logger = getLogger('workflow');
11
13
  /**
12
14
  * Load workflows from a YAML file.
13
15
  * Supports both single-document and multi-document YAML (separated by ---).
@@ -56,9 +58,10 @@ export function tryLoadDefaultWorkflows(directory) {
56
58
  try {
57
59
  return loadWorkflowsFromFile(path);
58
60
  }
59
- catch {
61
+ catch (error) {
60
62
  // If the file exists but is invalid, return null rather than throwing
61
63
  // This allows the interview to proceed without workflows
64
+ logger.warn({ path, error: error instanceof Error ? error.message : String(error) }, 'Failed to load default workflow file');
62
65
  return null;
63
66
  }
64
67
  }
@@ -74,7 +74,7 @@ export class StateTracker {
74
74
  }
75
75
  // Use specified probe tools if provided
76
76
  if (this.options.probeTools?.length) {
77
- this.probeTools = this.options.probeTools.filter(name => this.tools.some(t => t.name === name));
77
+ this.probeTools = this.options.probeTools.filter((name) => this.tools.some((t) => t.name === name));
78
78
  }
79
79
  this.logger.debug({
80
80
  toolCount: this.tools.length,
@@ -89,9 +89,9 @@ export class StateTracker {
89
89
  const name = tool.name;
90
90
  const description = tool.description ?? '';
91
91
  const combined = `${name} ${description}`;
92
- const isReader = READER_PATTERNS.some(p => p.test(combined));
93
- const isWriter = WRITER_PATTERNS.some(p => p.test(combined));
94
- const isProbe = PROBE_PATTERNS.some(p => p.test(combined));
92
+ const isReader = READER_PATTERNS.some((p) => p.test(combined));
93
+ const isWriter = WRITER_PATTERNS.some((p) => p.test(combined));
94
+ const isProbe = PROBE_PATTERNS.some((p) => p.test(combined));
95
95
  let role;
96
96
  let confidence;
97
97
  if (isReader && isWriter) {
@@ -194,8 +194,9 @@ export class StateTracker {
194
194
  break;
195
195
  }
196
196
  try {
197
+ const abortController = new AbortController();
197
198
  // Apply timeout to individual probe tool call
198
- const result = await withTimeout(this.client.callTool(probeName, {}), this.probeTimeout, `Probe tool '${probeName}'`);
199
+ const result = await withTimeout(this.client.callTool(probeName, {}, { signal: abortController.signal }), this.probeTimeout, `Probe tool '${probeName}'`, { abortController });
199
200
  const content = this.extractContent(result);
200
201
  stateData[probeName] = content;
201
202
  successCount++;
@@ -240,7 +241,7 @@ export class StateTracker {
240
241
  * Extract content from a tool call result.
241
242
  */
242
243
  extractContent(result) {
243
- const textContent = result.content.find(c => c.type === 'text' && c.text !== undefined);
244
+ const textContent = result.content.find((c) => c.type === 'text' && c.text !== undefined);
244
245
  if (!textContent || textContent.text === undefined) {
245
246
  return null;
246
247
  }
@@ -256,7 +257,10 @@ export class StateTracker {
256
257
  */
257
258
  hashState(data) {
258
259
  const json = JSON.stringify(data, null, 0);
259
- return createHash('sha256').update(json).digest('hex').slice(0, DISPLAY_LIMITS.HASH_DISPLAY_LENGTH);
260
+ return createHash('sha256')
261
+ .update(json)
262
+ .digest('hex')
263
+ .slice(0, DISPLAY_LIMITS.HASH_DISPLAY_LENGTH);
260
264
  }
261
265
  /**
262
266
  * Compare two snapshots and identify changes.
@@ -351,7 +355,7 @@ export class StateTracker {
351
355
  for (const stateType of stateTypes) {
352
356
  const writers = writerSteps.get(stateType) ?? [];
353
357
  // Find most recent writer for this state type
354
- const recentWriters = writers.filter(w => w < i);
358
+ const recentWriters = writers.filter((w) => w < i);
355
359
  if (recentWriters.length > 0) {
356
360
  const producerStep = recentWriters[recentWriters.length - 1];
357
361
  const producerTool = stepResults[producerStep].step.tool;
@@ -372,9 +376,9 @@ export class StateTracker {
372
376
  * Verify dependencies using state snapshots.
373
377
  */
374
378
  verifyDependencies(dependencies, _snapshots, changes) {
375
- return dependencies.map(dep => {
379
+ return dependencies.map((dep) => {
376
380
  // Check if the producer step caused any changes
377
- const producerChanges = changes.filter(c => c.causedByStep === dep.producerStep);
381
+ const producerChanges = changes.filter((c) => c.causedByStep === dep.producerStep);
378
382
  const verified = producerChanges.length > 0;
379
383
  return {
380
384
  ...dep,
@@ -388,19 +392,19 @@ export class StateTracker {
388
392
  async generateSummary(tracking) {
389
393
  const parts = [];
390
394
  // Summarize tool roles
391
- const writers = tracking.toolRoles.filter(t => t.role === 'writer' || t.role === 'both');
392
- const readers = tracking.toolRoles.filter(t => t.role === 'reader' || t.role === 'both');
395
+ const writers = tracking.toolRoles.filter((t) => t.role === 'writer' || t.role === 'both');
396
+ const readers = tracking.toolRoles.filter((t) => t.role === 'reader' || t.role === 'both');
393
397
  if (writers.length > 0) {
394
- parts.push(`State writers: ${writers.map(t => t.tool).join(', ')}`);
398
+ parts.push(`State writers: ${writers.map((t) => t.tool).join(', ')}`);
395
399
  }
396
400
  if (readers.length > 0) {
397
- parts.push(`State readers: ${readers.map(t => t.tool).join(', ')}`);
401
+ parts.push(`State readers: ${readers.map((t) => t.tool).join(', ')}`);
398
402
  }
399
403
  // Summarize changes
400
404
  if (tracking.changes.length > 0) {
401
- const created = tracking.changes.filter(c => c.type === 'created').length;
402
- const modified = tracking.changes.filter(c => c.type === 'modified').length;
403
- const deleted = tracking.changes.filter(c => c.type === 'deleted').length;
405
+ const created = tracking.changes.filter((c) => c.type === 'created').length;
406
+ const modified = tracking.changes.filter((c) => c.type === 'modified').length;
407
+ const deleted = tracking.changes.filter((c) => c.type === 'deleted').length;
404
408
  const changeParts = [];
405
409
  if (created > 0)
406
410
  changeParts.push(`${created} created`);
@@ -415,7 +419,7 @@ export class StateTracker {
415
419
  }
416
420
  // Summarize dependencies
417
421
  if (tracking.dependencies.length > 0) {
418
- const verified = tracking.dependencies.filter(d => d.verified).length;
422
+ const verified = tracking.dependencies.filter((d) => d.verified).length;
419
423
  parts.push(`Dependencies: ${tracking.dependencies.length} inferred (${verified} verified)`);
420
424
  }
421
425
  return `${parts.join('. ')}.`;
@@ -0,0 +1,204 @@
1
+ .TH "BELLWETHER" "1" "2026\-02\-04" "Bellwether 2.0.0" "User Commands"
2
+ .SH NAME
3
+ .PP
4
+ bellwether \[em] MCP server testing and validation tool
5
+ .SH SYNOPSIS
6
+ .PP
7
+ \f[B]bellwether\f[] [OPTIONS] COMMAND [ARGS...]
8
+ .PP
9
+ \f[B]bellwether\f[] \f[B]\-\-version\f[]
10
+ .PP
11
+ \f[B]bellwether\f[] \f[B]\-\-help\f[]
12
+ .SH DESCRIPTION
13
+ .PP
14
+ Bellwether is an open\-source MCP (Model Context Protocol) testing tool
15
+ that provides structural drift detection and behavioral documentation
16
+ for MCP servers.
17
+ .SH COMMANDS
18
+ .TP
19
+ .B \f[B]check\f[] [\f[I]options\f[]] [server\-command]
20
+ Schema validation and drift detection (free, fast, deterministic)
21
+ .RS
22
+ .RE
23
+ .TP
24
+ .B \f[B]explore\f[] [\f[I]options\f[]] [server\-command]
25
+ LLM\-powered behavioral exploration and documentation
26
+ .RS
27
+ .RE
28
+ .TP
29
+ .B \f[B]discover\f[] [\f[I]options\f[]] [server\-command]
30
+ Discover MCP server capabilities (tools, prompts, resources)
31
+ .RS
32
+ .RE
33
+ .TP
34
+ .B \f[B]watch\f[] [\f[I]options\f[]]
35
+ Watch for MCP server changes and auto\-check
36
+ .RS
37
+ .RE
38
+ .TP
39
+ .B \f[B]init\f[] [\f[I]options\f[]] [server\-command]
40
+ Initialize a bellwether.yaml configuration file
41
+ .RS
42
+ .RE
43
+ .TP
44
+ .B \f[B]auth\f[] \f[I]subcommand\f[] [\f[I]options\f[]]
45
+ Manage LLM provider API keys
46
+ .RS
47
+ .RE
48
+ .TP
49
+ .B \f[B]baseline\f[] \f[I]subcommand\f[] [\f[I]options\f[]]
50
+ Manage baselines for drift detection
51
+ .RS
52
+ .RE
53
+ .TP
54
+ .B \f[B]golden\f[] \f[I]subcommand\f[] [\f[I]options\f[]]
55
+ Manage golden outputs for validation
56
+ .RS
57
+ .RE
58
+ .TP
59
+ .B \f[B]registry\f[] [\f[I]options\f[]] \f[I]search\f[]
60
+ Search the MCP Registry for servers
61
+ .RS
62
+ .RE
63
+ .TP
64
+ .B \f[B]contract\f[] \f[I]subcommand\f[] [\f[I]options\f[]]
65
+ Validate MCP servers against contracts
66
+ .RS
67
+ .RE
68
+ .TP
69
+ .B \f[B]validate\-config\f[] [\f[I]options\f[]]
70
+ Validate bellwether.yaml configuration
71
+ .RS
72
+ .RE
73
+ .SH GLOBAL OPTIONS
74
+ .TP
75
+ .B \f[B]\-h\f[], \f[B]\-\-help\f[]
76
+ Show help message and exit
77
+ .RS
78
+ .RE
79
+ .TP
80
+ .B \f[B]\-\-version\f[]
81
+ Show version information and exit
82
+ .RS
83
+ .RE
84
+ .TP
85
+ .B \f[B]\-\-log\-level\f[] \f[I]LEVEL\f[]
86
+ Set log level: debug, info, warn, error, silent
87
+ .RS
88
+ .RE
89
+ .TP
90
+ .B \f[B]\-\-log\-file\f[] \f[I]PATH\f[]
91
+ Write logs to file instead of stderr
92
+ .RS
93
+ .RE
94
+ .SH EXAMPLES
95
+ .PP
96
+ Initialize configuration:
97
+ .IP
98
+ .nf
99
+ \f[C]
100
+ bellwether\ init\ npx\ \@modelcontextprotocol/server\-filesystem
101
+ \f[]
102
+ .fi
103
+ .PP
104
+ Run drift detection:
105
+ .IP
106
+ .nf
107
+ \f[C]
108
+ bellwether\ check
109
+ \f[]
110
+ .fi
111
+ .PP
112
+ Save baseline:
113
+ .IP
114
+ .nf
115
+ \f[C]
116
+ bellwether\ baseline\ save
117
+ \f[]
118
+ .fi
119
+ .PP
120
+ Explore with LLM:
121
+ .IP
122
+ .nf
123
+ \f[C]
124
+ bellwether\ explore
125
+ \f[]
126
+ .fi
127
+ .SH FILES
128
+ .TP
129
+ .B \f[I]bellwether.yaml\f[]
130
+ Configuration file for the project
131
+ .RS
132
+ .RE
133
+ .TP
134
+ .B \f[I]bellwether\-baseline.json\f[]
135
+ Saved baseline for drift detection
136
+ .RS
137
+ .RE
138
+ .TP
139
+ .B \f[I]CONTRACT.md\f[]
140
+ Generated contract documentation
141
+ .RS
142
+ .RE
143
+ .TP
144
+ .B \f[I]AGENTS.md\f[]
145
+ Generated behavioral documentation
146
+ .RS
147
+ .RE
148
+ .SH ENVIRONMENT
149
+ .TP
150
+ .B \f[I]OPENAI_API_KEY\f[]
151
+ API key for OpenAI (explore mode only)
152
+ .RS
153
+ .RE
154
+ .TP
155
+ .B \f[I]ANTHROPIC_API_KEY\f[]
156
+ API key for Anthropic (explore mode only)
157
+ .RS
158
+ .RE
159
+ .TP
160
+ .B \f[I]OLLAMA_BASE_URL\f[]
161
+ Ollama URL (default: http://localhost:11434)
162
+ .RS
163
+ .RE
164
+ .SH EXIT STATUS
165
+ .TP
166
+ .B \f[B]0\f[]
167
+ Success, no changes detected
168
+ .RS
169
+ .RE
170
+ .TP
171
+ .B \f[B]1\f[]
172
+ Info\-level changes only
173
+ .RS
174
+ .RE
175
+ .TP
176
+ .B \f[B]2\f[]
177
+ Warning\-level changes
178
+ .RS
179
+ .RE
180
+ .TP
181
+ .B \f[B]3\f[]
182
+ Breaking changes detected
183
+ .RS
184
+ .RE
185
+ .TP
186
+ .B \f[B]4\f[]
187
+ Runtime error
188
+ .RS
189
+ .RE
190
+ .TP
191
+ .B \f[B]5\f[]
192
+ Low confidence metrics
193
+ .RS
194
+ .RE
195
+ .SH SEE ALSO
196
+ .PP
197
+ Project homepage: <https://github.com/dotsetlabs/bellwether>
198
+ .PP
199
+ Documentation: <https://docs.bellwether.sh>
200
+ .PP
201
+ MCP Specification: <https://modelcontextprotocol.io>
202
+ .SH AUTHORS
203
+ .PP
204
+ Dotset Labs LLC <hello@dotsetlabs.com>
@@ -0,0 +1,148 @@
1
+ ---
2
+ title: BELLWETHER
3
+ section: 1
4
+ header: User Commands
5
+ footer: Bellwether 2.0.0
6
+ date: 2026-02-04
7
+ ---
8
+
9
+ # NAME
10
+
11
+ bellwether — MCP server testing and validation tool
12
+
13
+ # SYNOPSIS
14
+
15
+ **bellwether** [OPTIONS] COMMAND [ARGS...]
16
+
17
+ **bellwether** **--version**
18
+
19
+ **bellwether** **--help**
20
+
21
+ # DESCRIPTION
22
+
23
+ Bellwether is an open-source MCP (Model Context Protocol) testing tool that provides
24
+ structural drift detection and behavioral documentation for MCP servers.
25
+
26
+ # COMMANDS
27
+
28
+ **check** [*options*] [server-command]
29
+ : Schema validation and drift detection (free, fast, deterministic)
30
+
31
+ **explore** [*options*] [server-command]
32
+ : LLM-powered behavioral exploration and documentation
33
+
34
+ **discover** [*options*] [server-command]
35
+ : Discover MCP server capabilities (tools, prompts, resources)
36
+
37
+ **watch** [*options*]
38
+ : Watch for MCP server changes and auto-check
39
+
40
+ **init** [*options*] [server-command]
41
+ : Initialize a bellwether.yaml configuration file
42
+
43
+ **auth** *subcommand* [*options*]
44
+ : Manage LLM provider API keys
45
+
46
+ **baseline** *subcommand* [*options*]
47
+ : Manage baselines for drift detection
48
+
49
+ **golden** *subcommand* [*options*]
50
+ : Manage golden outputs for validation
51
+
52
+ **registry** [*options*] *search*
53
+ : Search the MCP Registry for servers
54
+
55
+ **contract** *subcommand* [*options*]
56
+ : Validate MCP servers against contracts
57
+
58
+ **validate-config** [*options*]
59
+ : Validate bellwether.yaml configuration
60
+
61
+ # GLOBAL OPTIONS
62
+
63
+ **-h**, **--help**
64
+ : Show help message and exit
65
+
66
+ **--version**
67
+ : Show version information and exit
68
+
69
+ **--log-level** *LEVEL*
70
+ : Set log level: debug, info, warn, error, silent
71
+
72
+ **--log-file** *PATH*
73
+ : Write logs to file instead of stderr
74
+
75
+ # EXAMPLES
76
+
77
+ Initialize configuration:
78
+
79
+ bellwether init npx @modelcontextprotocol/server-filesystem
80
+
81
+ Run drift detection:
82
+
83
+ bellwether check
84
+
85
+ Save baseline:
86
+
87
+ bellwether baseline save
88
+
89
+ Explore with LLM:
90
+
91
+ bellwether explore
92
+
93
+ # FILES
94
+
95
+ *bellwether.yaml*
96
+ : Configuration file for the project
97
+
98
+ *bellwether-baseline.json*
99
+ : Saved baseline for drift detection
100
+
101
+ *CONTRACT.md*
102
+ : Generated contract documentation
103
+
104
+ *AGENTS.md*
105
+ : Generated behavioral documentation
106
+
107
+ # ENVIRONMENT
108
+
109
+ *OPENAI_API_KEY*
110
+ : API key for OpenAI (explore mode only)
111
+
112
+ *ANTHROPIC_API_KEY*
113
+ : API key for Anthropic (explore mode only)
114
+
115
+ *OLLAMA_BASE_URL*
116
+ : Ollama URL (default: http://localhost:11434)
117
+
118
+ # EXIT STATUS
119
+
120
+ **0**
121
+ : Success, no changes detected
122
+
123
+ **1**
124
+ : Info-level changes only
125
+
126
+ **2**
127
+ : Warning-level changes
128
+
129
+ **3**
130
+ : Breaking changes detected
131
+
132
+ **4**
133
+ : Runtime error
134
+
135
+ **5**
136
+ : Low confidence metrics
137
+
138
+ # SEE ALSO
139
+
140
+ Project homepage: <https://github.com/dotsetlabs/bellwether>
141
+
142
+ Documentation: <https://docs.bellwether.sh>
143
+
144
+ MCP Specification: <https://modelcontextprotocol.io>
145
+
146
+ # AUTHORS
147
+
148
+ Dotset Labs LLC <hello@dotsetlabs.com>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dotsetlabs/bellwether",
3
- "version": "1.0.2",
3
+ "version": "2.0.0",
4
4
  "description": "The open-source MCP testing tool. Structural drift detection and behavioral documentation for Model Context Protocol servers.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -33,10 +33,11 @@
33
33
  "format": "prettier --write \"src/**/*.ts\"",
34
34
  "format:check": "prettier --check \"src/**/*.ts\"",
35
35
  "clean": "rm -rf dist",
36
- "docs:generate": "typedoc",
36
+ "docs:generate": "npm --prefix website run build",
37
+ "docs:dev": "npm --prefix website run start",
37
38
  "man:generate": "./scripts/generate-manpage.sh",
38
39
  "prepare": "husky install || true",
39
- "prepublishOnly": "npm run build"
40
+ "prepublishOnly": "npm run build && npm run man:generate"
40
41
  },
41
42
  "keywords": [
42
43
  "mcp",
@@ -59,8 +60,7 @@
59
60
  "license": "MIT",
60
61
  "repository": {
61
62
  "type": "git",
62
- "url": "https://github.com/dotsetlabs/bellwether",
63
- "directory": "cli"
63
+ "url": "https://github.com/dotsetlabs/bellwether"
64
64
  },
65
65
  "funding": {
66
66
  "type": "github",
@@ -97,10 +97,9 @@
97
97
  "@typescript-eslint/parser": "^6.21.0",
98
98
  "eslint": "^8.57.1",
99
99
  "husky": "^9.1.0",
100
- "lint-staged": "^15.2.0",
100
+ "lint-staged": "^16.2.7",
101
101
  "prettier": "^3.3.0",
102
102
  "tsx": "^4.21.0",
103
- "typedoc": "^0.28.16",
104
103
  "typescript": "^5.3.0",
105
104
  "vitest": "^4.0.17"
106
105
  },