@dotsetlabs/bellwether 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/.dockerignore +25 -0
  2. package/CHANGELOG.md +63 -0
  3. package/Dockerfile +43 -0
  4. package/dist/auth/keychain.js +14 -7
  5. package/dist/baseline/change-impact-analyzer.js +1 -1
  6. package/dist/baseline/comparator.js +2 -2
  7. package/dist/baseline/deprecation-tracker.js +1 -1
  8. package/dist/baseline/diff.js +4 -4
  9. package/dist/baseline/golden-output.js +2 -2
  10. package/dist/baseline/migration-generator.js +2 -2
  11. package/dist/baseline/performance-tracker.js +1 -1
  12. package/dist/baseline/pr-comment-generator.js +4 -4
  13. package/dist/baseline/risk-scorer.js +1 -1
  14. package/dist/baseline/schema-evolution.js +1 -1
  15. package/dist/cli/commands/baseline.js +30 -11
  16. package/dist/cli/commands/check.js +160 -8
  17. package/dist/cli/commands/contract.js +4 -4
  18. package/dist/cli/commands/discover.js +1 -1
  19. package/dist/cli/commands/explore.js +1 -1
  20. package/dist/cli/commands/registry.js +143 -5
  21. package/dist/cli/commands/watch.js +5 -5
  22. package/dist/cli/index.d.ts +6 -0
  23. package/dist/cli/index.js +8 -2
  24. package/dist/cli/output.d.ts +1 -0
  25. package/dist/cli/output.js +34 -10
  26. package/dist/cli/utils/progress.js +10 -10
  27. package/dist/config/loader.js +33 -4
  28. package/dist/config/template.d.ts +2 -0
  29. package/dist/config/template.js +8 -2
  30. package/dist/constants/core.d.ts +9 -0
  31. package/dist/constants/core.js +9 -0
  32. package/dist/constants/testing.js +0 -1
  33. package/dist/contract/validator.js +1 -1
  34. package/dist/discovery/discovery.js +4 -4
  35. package/dist/docs/agents.js +1 -1
  36. package/dist/docs/contract.js +5 -5
  37. package/dist/index.d.ts +4 -0
  38. package/dist/index.js +4 -0
  39. package/dist/interview/interviewer.js +1 -1
  40. package/dist/interview/orchestrator.js +1 -1
  41. package/dist/llm/anthropic.js +1 -1
  42. package/dist/llm/token-budget.js +1 -1
  43. package/dist/registry/client.d.ts +2 -0
  44. package/dist/registry/client.js +38 -1
  45. package/dist/security/security-tester.js +2 -2
  46. package/dist/transport/http-transport.js +1 -1
  47. package/dist/transport/mcp-client.js +2 -2
  48. package/dist/transport/stdio-transport.js +1 -1
  49. package/dist/utils/markdown.js +3 -3
  50. package/dist/utils/sanitize.js +1 -1
  51. package/dist/utils/smart-truncate.js +1 -1
  52. package/dist/version.js +1 -1
  53. package/dist/workflow/auto-generator.js +3 -3
  54. package/dist/workflow/state-tracker.js +1 -1
  55. package/package.json +34 -6
  56. package/scripts/completions/bellwether.bash +61 -0
  57. package/scripts/completions/bellwether.zsh +94 -0
package/.dockerignore ADDED
@@ -0,0 +1,25 @@
1
+ node_modules
2
+ npm-debug.log
3
+ .git
4
+ .gitignore
5
+ README.md
6
+ CHANGELOG.md
7
+ .eslintrc.json
8
+ .prettierrc
9
+ .github
10
+ .nyc_output
11
+ coverage
12
+ .vscode
13
+ .idea
14
+ test/
15
+ src/
16
+ *.test.ts
17
+ *.spec.ts
18
+ tsconfig.json
19
+ typedoc.json
20
+ vitest.config.ts
21
+ .dccache
22
+ *.md
23
+ !LICENSE
24
+ !README.md
25
+ !CHANGELOG.md
package/CHANGELOG.md CHANGED
@@ -2,6 +2,69 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [1.0.2] - 2026-01-30
11
+
12
+ ### Added
13
+
14
+ - Added SARIF and JUnit output format support for `bellwether check` without baseline comparison
15
+ - Use `--format sarif` for GitHub Code Scanning integration
16
+ - Use `--format junit` for CI/CD test reporting
17
+ - Added registry validation indicators showing environment variable requirements
18
+ - Servers requiring setup now display ⚙ indicator
19
+ - Environment variables show ✓/✗ status based on whether they're set
20
+ - Automatic detection of common service patterns (postgres→DATABASE_URL, etc.)
21
+ - Setup hints displayed for unconfigured servers
22
+
23
+ ### Changed
24
+
25
+ - Security and thorough presets now enable security testing by default (`check.security.enabled: true`)
26
+
27
+ ### Fixed
28
+
29
+ - Fixed baseline path resolution in `baseline compare` to be consistent with `baseline show`
30
+ - Now checks both output directory and current working directory before failing
31
+ - Fixed `bellwether auth status` requiring a config file
32
+ - Auth commands now work without bellwether.yaml present
33
+ - Fixed ANSI escape codes appearing in non-TTY output (e.g., when piping to files)
34
+ - StreamingDisplay now checks for TTY before applying ANSI styling
35
+ - Automatically respects `NO_COLOR` and `FORCE_COLOR=0` environment variables
36
+
37
+ ## [1.0.1] - 2026-01-29
38
+
39
+ ### Added
40
+
41
+ - Added `$VAR` syntax support for environment variable interpolation in config files
42
+ - Added rate limiting to registry client (5 req/s default)
43
+ - Added `AnthropicClient` and `OllamaClient` exports to public API
44
+ - Added `repository.directory` and `funding` fields to package.json
45
+ - Added required permissions documentation to GitHub Action
46
+ - Added debug logging for all credential operations
47
+ - Added warning when environment variables in config are not resolved
48
+
49
+ ### Changed
50
+
51
+ - Optimized GitHub Action to run check once; SARIF and JUnit are now converted from JSON output
52
+ - Removed test coverage exclusion for CLI entry point
53
+ - Removed unnecessary type casts in check.ts and security-tester.ts
54
+ - Replaced magic number 100 with PERCENTAGE_CONVERSION.DIVISOR constant
55
+ - Removed dead code sections from constants
56
+ - Refactored string concatenation to template literals in CLI output modules
57
+
58
+ ### Fixed
59
+
60
+ - Fixed version fallback inconsistency (0.13.0 → 1.0.1)
61
+ - Fixed missing pino-pretty dependency
62
+ - Fixed non-null assertion for remoteUrl in check.ts (added proper null check)
63
+ - Fixed non-null assertion for incrementalResult in check.ts
64
+ - Added debug logging to catch blocks in keychain.ts (graceful degradation with visibility)
65
+ - Fixed flaky test in workflow executor (timing assertion)
66
+ - Fixed test failures in baseline-accept tests (process.exit mock)
67
+
5
68
  ## [1.0.0] - 2026-01-27
6
69
 
7
70
  ### Breaking Changes
package/Dockerfile ADDED
@@ -0,0 +1,43 @@
1
+ # Bellwether MCP Testing Tool
2
+ # https://github.com/dotsetlabs/bellwether
3
+
4
+ FROM node:20-alpine
5
+
6
+ LABEL maintainer="Dotset Labs <hello@dotsetlabs.com>"
7
+ LABEL description="Bellwether - MCP Server Testing & Validation"
8
+ LABEL org.opencontainers.image.source="https://github.com/dotsetlabs/bellwether"
9
+
10
+ # Install git for npm dependencies that may need it
11
+ RUN apk add --no-cache git
12
+
13
+ # Create app directory
14
+ WORKDIR /app
15
+
16
+ # Copy package files
17
+ COPY package*.json ./
18
+
19
+ # Install production dependencies only
20
+ RUN npm ci --omit=dev
21
+
22
+ # Copy built application
23
+ COPY dist/ ./dist/
24
+ COPY schemas/ ./schemas/
25
+ COPY LICENSE README.md CHANGELOG.md ./
26
+
27
+ # Create non-root user
28
+ RUN addgroup -g 1001 -S bellwether && \
29
+ adduser -S bellwether -u 1001
30
+
31
+ # Set proper permissions
32
+ RUN chown -R bellwether:bellwether /app
33
+
34
+ # Switch to non-root user
35
+ USER bellwether
36
+
37
+ # Set environment
38
+ ENV NODE_ENV=production
39
+ ENV BELLWETHER_DOCKER=1
40
+
41
+ # Entry point
42
+ ENTRYPOINT ["node", "dist/cli/index.js"]
43
+ CMD ["--help"]
@@ -12,6 +12,8 @@ import { homedir } from 'os';
12
12
  import { join } from 'path';
13
13
  import { createRequire } from 'module';
14
14
  import { createCipheriv, createDecipheriv, randomBytes } from 'crypto';
15
+ import { getLogger } from '../logging/logger.js';
16
+ const logger = getLogger('keychain');
15
17
  // Create require function for loading CommonJS optional dependencies in ESM
16
18
  const require = createRequire(import.meta.url);
17
19
  // Service name for keychain entries
@@ -80,7 +82,8 @@ export function decryptEnvValue(value) {
80
82
  decipher.setAuthTag(tag);
81
83
  return Buffer.concat([decipher.update(data), decipher.final()]).toString('utf8');
82
84
  }
83
- catch {
85
+ catch (error) {
86
+ logger.debug({ error }, 'Failed to decrypt env value');
84
87
  return undefined;
85
88
  }
86
89
  }
@@ -100,8 +103,8 @@ class KeytarBackend {
100
103
  // Using require() for optional dependency
101
104
  this.keytar = require('keytar');
102
105
  }
103
- catch {
104
- // keytar not available - will use fallback
106
+ catch (error) {
107
+ logger.debug({ error }, 'keytar not available, will use file fallback');
105
108
  this.keytar = null;
106
109
  }
107
110
  })();
@@ -154,7 +157,8 @@ class FileBackend {
154
157
  this.envLines = [];
155
158
  }
156
159
  }
157
- catch {
160
+ catch (error) {
161
+ logger.debug({ error }, 'Failed to load credentials file');
158
162
  this.envLines = [];
159
163
  }
160
164
  return this.envLines;
@@ -264,7 +268,8 @@ export class KeychainService {
264
268
  require('keytar');
265
269
  return true;
266
270
  }
267
- catch {
271
+ catch (error) {
272
+ logger.debug({ error }, 'Secure keychain (keytar) not available');
268
273
  return false;
269
274
  }
270
275
  }
@@ -295,7 +300,8 @@ export class KeychainService {
295
300
  try {
296
301
  return await this.backend.getPassword(SERVICE_NAME, account);
297
302
  }
298
- catch {
303
+ catch (error) {
304
+ logger.debug({ error, provider }, 'Keychain get failed, trying file backend');
299
305
  // If keytar fails, try file backend
300
306
  if (!this.useFileBackend) {
301
307
  this.enableFileBackend();
@@ -337,7 +343,8 @@ export class KeychainService {
337
343
  try {
338
344
  return await this.backend.deletePassword(SERVICE_NAME, account);
339
345
  }
340
- catch {
346
+ catch (error) {
347
+ logger.debug({ error, provider }, 'Keychain delete failed, trying file backend');
341
348
  // If keytar fails, try file backend
342
349
  if (!this.useFileBackend) {
343
350
  this.enableFileBackend();
@@ -543,7 +543,7 @@ function generateImpactSummary(diff, toolImpacts, brokenWorkflows) {
543
543
  if (brokenWorkflows.length > 0) {
544
544
  parts.push(`${brokenWorkflows.length} workflow(s) may be affected`);
545
545
  }
546
- return parts.length > 0 ? parts.join('. ') + '.' : 'No changes detected.';
546
+ return parts.length > 0 ? `${parts.join('. ')}.` : 'No changes detected.';
547
547
  }
548
548
  /**
549
549
  * Check if a behavior change is actually breaking based on semantic analysis.
@@ -337,7 +337,7 @@ function formatSchemaChangeValue(value) {
337
337
  // For objects, show a compact representation
338
338
  try {
339
339
  const json = JSON.stringify(value);
340
- return json.length > 50 ? json.slice(0, 47) + '...' : json;
340
+ return json.length > 50 ? `${json.slice(0, 47)}...` : json;
341
341
  }
342
342
  catch {
343
343
  return String(value);
@@ -425,7 +425,7 @@ function generateSummary(toolsAdded, toolsRemoved, toolsModified, changes, sever
425
425
  if (warningChanges > 0) {
426
426
  parts.push(`${warningChanges} warning(s)`);
427
427
  }
428
- return parts.join('. ') + '.';
428
+ return `${parts.join('. ')}.`;
429
429
  }
430
430
  export function hasBreakingChanges(diff) {
431
431
  return diff.severity === 'breaking';
@@ -231,7 +231,7 @@ function generateDeprecationSummary(warnings, deprecatedCount, expiredCount, gra
231
231
  if (criticalTools.length > 0) {
232
232
  parts.push(`${criticalTools.length} tool(s) will be removed within ${DEPRECATION_THRESHOLDS.CRITICAL_REMOVAL_DAYS} days`);
233
233
  }
234
- return parts.join(', ') + '.';
234
+ return `${parts.join(', ')}.`;
235
235
  }
236
236
  /**
237
237
  * Get all deprecated tools from a baseline.
@@ -59,10 +59,10 @@ export function formatDiffText(diff, useColors = true) {
59
59
  const sevColor = getSeverityColor(change.severity, useColors);
60
60
  lines.push(` ${sevColor(`[${change.severity.toUpperCase()}]`)} ${change.aspect}`);
61
61
  if (change.before) {
62
- lines.push(` ${red('- ' + change.before)}`);
62
+ lines.push(` ${red(`- ${change.before}`)}`);
63
63
  }
64
64
  if (change.after) {
65
- lines.push(` ${green('+ ' + change.after)}`);
65
+ lines.push(` ${green(`+ ${change.after}`)}`);
66
66
  }
67
67
  }
68
68
  lines.push('');
@@ -144,10 +144,10 @@ export function formatDiffText(diff, useColors = true) {
144
144
  lines.push(` ${issueIcon} ${bold(issue.toolName)}`);
145
145
  lines.push(` ${issue.summary}`);
146
146
  if (issue.fieldsRemoved.length > 0) {
147
- lines.push(` ${red('- Removed: ' + issue.fieldsRemoved.join(', '))}`);
147
+ lines.push(` ${red(`- Removed: ${issue.fieldsRemoved.join(', ')}`)}`);
148
148
  }
149
149
  if (issue.fieldsAdded.length > 0) {
150
- lines.push(` ${green('+ Added: ' + issue.fieldsAdded.join(', '))}`);
150
+ lines.push(` ${green(`+ Added: ${issue.fieldsAdded.join(', ')}`)}`);
151
151
  }
152
152
  }
153
153
  lines.push('');
@@ -562,7 +562,7 @@ function isPathAllowed(path, allowedPaths) {
562
562
  // Normalize pattern by stripping leading $. if present
563
563
  const normalizedPattern = pattern.replace(/^\$\.?/, '');
564
564
  // Simple glob matching: * matches any segment
565
- const regex = new RegExp('^' + normalizedPattern.replace(/\*/g, '[^.]+').replace(/\./g, '\\.') + '$');
565
+ const regex = new RegExp(`^${normalizedPattern.replace(/\*/g, '[^.]+').replace(/\./g, '\\.')}$`);
566
566
  return regex.test(normalizedPath);
567
567
  });
568
568
  }
@@ -583,7 +583,7 @@ function truncateForDisplay(value, maxLength = 50) {
583
583
  const str = typeof value === 'string' ? value : JSON.stringify(value);
584
584
  if (str.length <= maxLength)
585
585
  return str;
586
- return str.slice(0, maxLength - 3) + '...';
586
+ return `${str.slice(0, maxLength - 3)}...`;
587
587
  }
588
588
  /**
589
589
  * Determine severity based on differences.
@@ -472,12 +472,12 @@ export function formatMigrationGuideMarkdown(guide) {
472
472
  lines.push(`**${example.title}**`);
473
473
  lines.push('');
474
474
  lines.push('Before:');
475
- lines.push('```' + example.language);
475
+ lines.push(`\`\`\`${example.language}`);
476
476
  lines.push(example.before);
477
477
  lines.push('```');
478
478
  lines.push('');
479
479
  lines.push('After:');
480
- lines.push('```' + example.language);
480
+ lines.push(`\`\`\`${example.language}`);
481
481
  lines.push(example.after);
482
482
  lines.push('```');
483
483
  lines.push('');
@@ -494,7 +494,7 @@ function generateReportSummary(regressions, improvements, stable, total) {
494
494
  if (parts.length === 0) {
495
495
  return `No performance data for ${total} tool(s).`;
496
496
  }
497
- return parts.join(', ') + '.';
497
+ return `${parts.join(', ')}.`;
498
498
  }
499
499
  /**
500
500
  * Format performance metrics for display.
@@ -199,12 +199,12 @@ function generateMigrationSection(guide, config) {
199
199
  if (step.codeExamples && step.codeExamples.length > 0) {
200
200
  const example = step.codeExamples[0];
201
201
  lines.push('');
202
- lines.push(' ```' + (example.language || ''));
202
+ lines.push(` \`\`\`${example.language || ''}`);
203
203
  lines.push(' // Before:');
204
- lines.push(' ' + example.before.split('\n').join('\n '));
204
+ lines.push(` ${example.before.split('\n').join('\n ')}`);
205
205
  lines.push('');
206
206
  lines.push(' // After:');
207
- lines.push(' ' + example.after.split('\n').join('\n '));
207
+ lines.push(` ${example.after.split('\n').join('\n ')}`);
208
208
  lines.push(' ```');
209
209
  }
210
210
  lines.push('');
@@ -283,7 +283,7 @@ function formatAspect(aspect) {
283
283
  function truncate(value, maxLength = PR_COMMENTS.VALUE_TRUNCATE_LENGTH) {
284
284
  if (value.length <= maxLength)
285
285
  return value;
286
- return value.substring(0, maxLength - 3) + '...';
286
+ return `${value.substring(0, maxLength - 3)}...`;
287
287
  }
288
288
  /**
289
289
  * Render a collapsible section.
@@ -429,6 +429,6 @@ export function generateRiskScoreMarkdown(riskScore) {
429
429
  function generateScoreBar(score, width = 10) {
430
430
  const filled = Math.round((score / 100) * width);
431
431
  const empty = width - filled;
432
- return '[' + '█'.repeat(filled) + '░'.repeat(empty) + ']';
432
+ return `[${'█'.repeat(filled)}${'░'.repeat(empty)}]`;
433
433
  }
434
434
  //# sourceMappingURL=risk-scorer.js.map
@@ -365,7 +365,7 @@ export function generateVisualTimeline(timeline, width = SCHEMA_EVOLUTION.DEFAUL
365
365
  const marker = v.hasBreakingChanges ? '◆' : '●';
366
366
  bar += marker + '─'.repeat(segmentWidth - 1);
367
367
  }
368
- lines.push(' ' + bar);
368
+ lines.push(` ${bar}`);
369
369
  // Version labels
370
370
  let labels = ' ';
371
371
  for (const v of displayVersions) {
@@ -146,12 +146,31 @@ baselineCommand
146
146
  output.error('No baseline path provided. Set baseline.path or baseline.comparePath in config, or pass a path argument.');
147
147
  process.exit(EXIT_CODES.ERROR);
148
148
  }
149
- const baselineBaseDir = baselinePath ? process.cwd() : outputDir;
150
- const fullBaselinePath = resolvedBaselinePath.startsWith('/')
151
- ? resolvedBaselinePath
152
- : join(baselineBaseDir, resolvedBaselinePath);
149
+ // Resolve baseline path consistently with 'show' command:
150
+ // 1. If absolute path, use as-is
151
+ // 2. First try relative to outputDir (e.g., .bellwether/)
152
+ // 3. Fall back to relative to cwd
153
+ let fullBaselinePath;
154
+ if (resolvedBaselinePath.startsWith('/')) {
155
+ fullBaselinePath = resolvedBaselinePath;
156
+ }
157
+ else {
158
+ const outputDirPath = join(outputDir, resolvedBaselinePath);
159
+ const cwdPath = join(process.cwd(), resolvedBaselinePath);
160
+ if (existsSync(outputDirPath)) {
161
+ fullBaselinePath = outputDirPath;
162
+ }
163
+ else if (existsSync(cwdPath)) {
164
+ fullBaselinePath = cwdPath;
165
+ }
166
+ else {
167
+ // Default to outputDir path for error message consistency
168
+ fullBaselinePath = outputDirPath;
169
+ }
170
+ }
153
171
  if (!existsSync(fullBaselinePath)) {
154
172
  output.error(`Baseline not found: ${fullBaselinePath}`);
173
+ output.error('\nRun `bellwether baseline save` to create a baseline.');
155
174
  process.exit(EXIT_CODES.ERROR);
156
175
  }
157
176
  let previousBaseline;
@@ -203,13 +222,13 @@ baselineCommand
203
222
  // Format and output
204
223
  switch (format) {
205
224
  case 'json':
206
- console.log(formatDiffJson(diff));
225
+ output.info(formatDiffJson(diff));
207
226
  break;
208
227
  case 'markdown':
209
- console.log(formatDiffMarkdown(diff));
228
+ output.info(formatDiffMarkdown(diff));
210
229
  break;
211
230
  case 'compact':
212
- console.log(formatDiffCompact(diff));
231
+ output.info(formatDiffCompact(diff));
213
232
  break;
214
233
  default:
215
234
  output.info('--- Drift Report ---');
@@ -271,7 +290,7 @@ baselineCommand
271
290
  }
272
291
  // Raw JSON output
273
292
  if (options.json) {
274
- console.log(JSON.stringify(baseline, null, 2));
293
+ output.info(JSON.stringify(baseline, null, 2));
275
294
  return;
276
295
  }
277
296
  // Formatted output
@@ -411,13 +430,13 @@ baselineCommand
411
430
  // Format and output
412
431
  switch (format) {
413
432
  case 'json':
414
- console.log(formatDiffJson(diff));
433
+ output.info(formatDiffJson(diff));
415
434
  break;
416
435
  case 'markdown':
417
- console.log(formatDiffMarkdown(diff));
436
+ output.info(formatDiffMarkdown(diff));
418
437
  break;
419
438
  case 'compact':
420
- console.log(formatDiffCompact(diff));
439
+ output.info(formatDiffCompact(diff));
421
440
  break;
422
441
  default:
423
442
  output.info(formatDiffText(diff));
@@ -26,7 +26,7 @@ import { loadWorkflowsFromFile, tryLoadDefaultWorkflows, DEFAULT_WORKFLOWS_FILE,
26
26
  import * as output from '../output.js';
27
27
  import { extractServerContextFromArgs } from '../utils/server-context.js';
28
28
  import { configureLogger } from '../../logging/logger.js';
29
- import { EXIT_CODES, SEVERITY_TO_EXIT_CODE, PATHS, SECURITY_TESTING, CHECK_SAMPLING, WORKFLOW, REPORT_SCHEMAS, } from '../../constants.js';
29
+ import { EXIT_CODES, SEVERITY_TO_EXIT_CODE, PATHS, SECURITY_TESTING, CHECK_SAMPLING, WORKFLOW, REPORT_SCHEMAS, PERCENTAGE_CONVERSION, } from '../../constants.js';
30
30
  export const checkCommand = new Command('check')
31
31
  .description('Check MCP server schema and detect drift (free, fast, deterministic)')
32
32
  .allowUnknownOption() // Allow server flags like -y for npx to pass through
@@ -112,7 +112,7 @@ export const checkCommand = new Command('check')
112
112
  const incrementalCacheHours = config.check.incrementalCacheHours;
113
113
  const parallelEnabled = config.check.parallel;
114
114
  const parallelWorkers = config.check.parallelWorkers;
115
- const performanceThreshold = config.check.performanceThreshold / 100;
115
+ const performanceThreshold = config.check.performanceThreshold / PERCENTAGE_CONVERSION.DIVISOR;
116
116
  const diffFormat = options.format ?? config.check.diffFormat;
117
117
  // Resolve security options from config
118
118
  const securityEnabled = config.check.security.enabled;
@@ -171,6 +171,10 @@ export const checkCommand = new Command('check')
171
171
  await mcpClient.connect(serverCommand, args, config.server.env);
172
172
  }
173
173
  else {
174
+ if (!remoteUrl) {
175
+ output.error('No server URL specified for remote transport');
176
+ process.exit(EXIT_CODES.ERROR);
177
+ }
174
178
  await mcpClient.connectRemote(remoteUrl, {
175
179
  transport,
176
180
  sessionId: remoteSessionId || undefined,
@@ -224,18 +228,19 @@ export const checkCommand = new Command('check')
224
228
  }
225
229
  else {
226
230
  incrementalBaseline = loadBaseline(baselinePath);
227
- incrementalResult = analyzeForIncremental(discovery.tools, incrementalBaseline, { maxCacheAgeHours: incrementalCacheHours });
228
- const summary = formatIncrementalSummary(incrementalResult.changeSummary);
231
+ const result = analyzeForIncremental(discovery.tools, incrementalBaseline, { maxCacheAgeHours: incrementalCacheHours });
232
+ incrementalResult = result;
233
+ const summary = formatIncrementalSummary(result.changeSummary);
229
234
  output.info(`Incremental analysis: ${summary}`);
230
- if (incrementalResult.toolsToTest.length === 0) {
235
+ if (result.toolsToTest.length === 0) {
231
236
  output.info('All tools unchanged. Using cached results.');
232
237
  // Still need to generate output with cached data
233
238
  // Skip to comparison section
234
239
  }
235
240
  else {
236
- output.info(`Testing ${incrementalResult.toolsToTest.length} tools (${incrementalResult.toolsToSkip.length} cached)\n`);
241
+ output.info(`Testing ${result.toolsToTest.length} tools (${result.toolsToSkip.length} cached)\n`);
237
242
  // Filter discovery to only include tools that need testing
238
- discovery.tools = discovery.tools.filter(t => incrementalResult.toolsToTest.includes(t.name));
243
+ discovery.tools = discovery.tools.filter(t => result.toolsToTest.includes(t.name));
239
244
  }
240
245
  }
241
246
  }
@@ -452,7 +457,7 @@ export const checkCommand = new Command('check')
452
457
  const fingerprint = await runSecurityTests({
453
458
  toolName: tool.name,
454
459
  toolDescription: tool.description || '',
455
- inputSchema: tool.inputSchema,
460
+ inputSchema: tool.inputSchema ?? {},
456
461
  callTool: async (args) => {
457
462
  try {
458
463
  const response = await mcpClient.callTool(tool.name, args);
@@ -763,6 +768,16 @@ export const checkCommand = new Command('check')
763
768
  saveBaseline(currentBaseline, saveBaselinePath);
764
769
  output.info(`\nBaseline saved: ${saveBaselinePath}`);
765
770
  }
771
+ // Output formatted results for sarif/junit when no baseline comparison
772
+ // This allows CI systems to consume check results even without drift detection
773
+ if (!baselinePath) {
774
+ const formattedCheckResults = formatCheckResults(currentBaseline, diffFormat);
775
+ if (formattedCheckResults) {
776
+ output.info('\n--- Check Results ---');
777
+ // Output directly to stdout for machine-readable formats
778
+ console.log(formattedCheckResults);
779
+ }
780
+ }
766
781
  // Handle baseline comparison
767
782
  if (baselinePath) {
768
783
  if (!existsSync(baselinePath)) {
@@ -914,4 +929,141 @@ function formatDiff(diff, format, baselinePath) {
914
929
  return formatDiffText(diff);
915
930
  }
916
931
  }
932
+ /**
933
+ * Format check results as JUnit XML (for CI systems that expect test results).
934
+ * This is used when --format junit is specified but no baseline comparison occurs.
935
+ */
936
+ function formatCheckResultsJUnit(baseline) {
937
+ const tools = getToolFingerprints(baseline);
938
+ const lines = [];
939
+ const securityFailures = tools.filter(t => t.securityFingerprint?.findings?.some(f => f.riskLevel === 'critical' || f.riskLevel === 'high')).length;
940
+ lines.push('<?xml version="1.0" encoding="UTF-8"?>');
941
+ lines.push('<testsuites>');
942
+ lines.push(` <testsuite name="bellwether-check" tests="${tools.length}" failures="${securityFailures}" errors="0">`);
943
+ for (const tool of tools) {
944
+ const successRate = tool.baselineSuccessRate ?? 1;
945
+ const status = successRate >= 0.9 ? 'passed' : 'warning';
946
+ lines.push(` <testcase name="${tool.name}" classname="mcp-tools" time="0">`);
947
+ lines.push(` <system-out>Success rate: ${(successRate * 100).toFixed(0)}%</system-out>`);
948
+ if (status === 'warning') {
949
+ lines.push(` <system-err>Tool has success rate below 90%</system-err>`);
950
+ }
951
+ lines.push(' </testcase>');
952
+ }
953
+ // Add security findings as test cases if present
954
+ const securityTools = tools.filter(t => t.securityFingerprint?.findings?.length);
955
+ if (securityTools.length > 0) {
956
+ lines.push(` <!-- Security findings -->`);
957
+ for (const tool of securityTools) {
958
+ const findings = tool.securityFingerprint?.findings ?? [];
959
+ const criticalHigh = findings.filter(f => f.riskLevel === 'critical' || f.riskLevel === 'high').length;
960
+ if (criticalHigh > 0) {
961
+ lines.push(` <testcase name="${tool.name}-security" classname="security">`);
962
+ lines.push(` <failure message="${criticalHigh} critical/high security findings">`);
963
+ for (const finding of findings.filter(f => f.riskLevel === 'critical' || f.riskLevel === 'high')) {
964
+ lines.push(` ${finding.riskLevel.toUpperCase()}: ${finding.title} (${finding.cweId})`);
965
+ }
966
+ lines.push(` </failure>`);
967
+ lines.push(' </testcase>');
968
+ }
969
+ }
970
+ }
971
+ lines.push(' </testsuite>');
972
+ lines.push('</testsuites>');
973
+ return lines.join('\n');
974
+ }
975
+ /**
976
+ * Format check results as SARIF (for GitHub Code Scanning and other tools).
977
+ * This is used when --format sarif is specified but no baseline comparison occurs.
978
+ */
979
+ function formatCheckResultsSarif(baseline) {
980
+ const tools = getToolFingerprints(baseline);
981
+ const serverUri = baseline.metadata?.serverCommand || baseline.server.name || 'mcp-server';
982
+ const results = [];
983
+ // Add results for tools with security findings
984
+ const securityTools = tools.filter(t => t.securityFingerprint?.findings?.length);
985
+ for (const tool of securityTools) {
986
+ const findings = tool.securityFingerprint?.findings ?? [];
987
+ for (const finding of findings) {
988
+ const level = finding.riskLevel === 'critical' || finding.riskLevel === 'high'
989
+ ? 'error'
990
+ : finding.riskLevel === 'medium'
991
+ ? 'warning'
992
+ : 'note';
993
+ results.push({
994
+ ruleId: finding.cweId || 'BWH-SEC',
995
+ level,
996
+ message: { text: `[${tool.name}] ${finding.title}: ${finding.description}` },
997
+ locations: [{
998
+ physicalLocation: {
999
+ artifactLocation: { uri: serverUri },
1000
+ region: { startLine: 1 },
1001
+ },
1002
+ }],
1003
+ });
1004
+ }
1005
+ }
1006
+ // Add results for tools with low success rate
1007
+ for (const tool of tools) {
1008
+ const successRate = tool.baselineSuccessRate ?? 1;
1009
+ if (successRate < 0.9) {
1010
+ results.push({
1011
+ ruleId: 'BWH-REL',
1012
+ level: 'warning',
1013
+ message: { text: `Tool "${tool.name}" has ${(successRate * 100).toFixed(0)}% success rate` },
1014
+ locations: [{
1015
+ physicalLocation: {
1016
+ artifactLocation: { uri: serverUri },
1017
+ region: { startLine: 1 },
1018
+ },
1019
+ }],
1020
+ });
1021
+ }
1022
+ }
1023
+ const sarif = {
1024
+ $schema: 'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json',
1025
+ version: '2.1.0',
1026
+ runs: [{
1027
+ tool: {
1028
+ driver: {
1029
+ name: 'bellwether',
1030
+ version: '1.0.0',
1031
+ informationUri: 'https://github.com/dotsetlabs/bellwether',
1032
+ rules: [
1033
+ {
1034
+ id: 'BWH-SEC',
1035
+ name: 'SecurityFinding',
1036
+ shortDescription: { text: 'Security vulnerability detected' },
1037
+ defaultConfiguration: { level: 'warning' },
1038
+ },
1039
+ {
1040
+ id: 'BWH-REL',
1041
+ name: 'LowReliability',
1042
+ shortDescription: { text: 'Tool reliability below threshold' },
1043
+ defaultConfiguration: { level: 'warning' },
1044
+ },
1045
+ ],
1046
+ },
1047
+ },
1048
+ results,
1049
+ }],
1050
+ };
1051
+ return JSON.stringify(sarif, null, 2);
1052
+ }
1053
+ /**
1054
+ * Format check results using the specified output format.
1055
+ * Used when no baseline comparison occurs.
1056
+ */
1057
+ function formatCheckResults(baseline, format) {
1058
+ switch (format.toLowerCase()) {
1059
+ case 'junit':
1060
+ case 'junit-xml':
1061
+ case 'xml':
1062
+ return formatCheckResultsJUnit(baseline);
1063
+ case 'sarif':
1064
+ return formatCheckResultsSarif(baseline);
1065
+ default:
1066
+ return null; // No special formatting needed for other formats
1067
+ }
1068
+ }
917
1069
  //# sourceMappingURL=check.js.map