@dotsetlabs/bellwether 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +25 -0
- package/CHANGELOG.md +63 -0
- package/Dockerfile +43 -0
- package/dist/auth/keychain.js +14 -7
- package/dist/baseline/change-impact-analyzer.js +1 -1
- package/dist/baseline/comparator.js +2 -2
- package/dist/baseline/deprecation-tracker.js +1 -1
- package/dist/baseline/diff.js +4 -4
- package/dist/baseline/golden-output.js +2 -2
- package/dist/baseline/migration-generator.js +2 -2
- package/dist/baseline/performance-tracker.js +1 -1
- package/dist/baseline/pr-comment-generator.js +4 -4
- package/dist/baseline/risk-scorer.js +1 -1
- package/dist/baseline/schema-evolution.js +1 -1
- package/dist/cli/commands/baseline.js +30 -11
- package/dist/cli/commands/check.js +160 -8
- package/dist/cli/commands/contract.js +4 -4
- package/dist/cli/commands/discover.js +1 -1
- package/dist/cli/commands/explore.js +1 -1
- package/dist/cli/commands/registry.js +143 -5
- package/dist/cli/commands/watch.js +5 -5
- package/dist/cli/index.d.ts +6 -0
- package/dist/cli/index.js +8 -2
- package/dist/cli/output.d.ts +1 -0
- package/dist/cli/output.js +34 -10
- package/dist/cli/utils/progress.js +10 -10
- package/dist/config/loader.js +33 -4
- package/dist/config/template.d.ts +2 -0
- package/dist/config/template.js +8 -2
- package/dist/constants/core.d.ts +9 -0
- package/dist/constants/core.js +9 -0
- package/dist/constants/testing.js +0 -1
- package/dist/contract/validator.js +1 -1
- package/dist/discovery/discovery.js +4 -4
- package/dist/docs/agents.js +1 -1
- package/dist/docs/contract.js +5 -5
- package/dist/index.d.ts +4 -0
- package/dist/index.js +4 -0
- package/dist/interview/interviewer.js +1 -1
- package/dist/interview/orchestrator.js +1 -1
- package/dist/llm/anthropic.js +1 -1
- package/dist/llm/token-budget.js +1 -1
- package/dist/registry/client.d.ts +2 -0
- package/dist/registry/client.js +38 -1
- package/dist/security/security-tester.js +2 -2
- package/dist/transport/http-transport.js +1 -1
- package/dist/transport/mcp-client.js +2 -2
- package/dist/transport/stdio-transport.js +1 -1
- package/dist/utils/markdown.js +3 -3
- package/dist/utils/sanitize.js +1 -1
- package/dist/utils/smart-truncate.js +1 -1
- package/dist/version.js +1 -1
- package/dist/workflow/auto-generator.js +3 -3
- package/dist/workflow/state-tracker.js +1 -1
- package/package.json +34 -6
- package/scripts/completions/bellwether.bash +61 -0
- package/scripts/completions/bellwether.zsh +94 -0
package/.dockerignore
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
node_modules
|
|
2
|
+
npm-debug.log
|
|
3
|
+
.git
|
|
4
|
+
.gitignore
|
|
5
|
+
README.md
|
|
6
|
+
CHANGELOG.md
|
|
7
|
+
.eslintrc.json
|
|
8
|
+
.prettierrc
|
|
9
|
+
.github
|
|
10
|
+
.nyc_output
|
|
11
|
+
coverage
|
|
12
|
+
.vscode
|
|
13
|
+
.idea
|
|
14
|
+
test/
|
|
15
|
+
src/
|
|
16
|
+
*.test.ts
|
|
17
|
+
*.spec.ts
|
|
18
|
+
tsconfig.json
|
|
19
|
+
typedoc.json
|
|
20
|
+
vitest.config.ts
|
|
21
|
+
.dccache
|
|
22
|
+
*.md
|
|
23
|
+
!LICENSE
|
|
24
|
+
!README.md
|
|
25
|
+
!CHANGELOG.md
|
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,69 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [1.0.2] - 2026-01-30
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Added SARIF and JUnit output format support for `bellwether check` without baseline comparison
|
|
15
|
+
- Use `--format sarif` for GitHub Code Scanning integration
|
|
16
|
+
- Use `--format junit` for CI/CD test reporting
|
|
17
|
+
- Added registry validation indicators showing environment variable requirements
|
|
18
|
+
- Servers requiring setup now display ⚙ indicator
|
|
19
|
+
- Environment variables show ✓/✗ status based on whether they're set
|
|
20
|
+
- Automatic detection of common service patterns (postgres→DATABASE_URL, etc.)
|
|
21
|
+
- Setup hints displayed for unconfigured servers
|
|
22
|
+
|
|
23
|
+
### Changed
|
|
24
|
+
|
|
25
|
+
- Security and thorough presets now enable security testing by default (`check.security.enabled: true`)
|
|
26
|
+
|
|
27
|
+
### Fixed
|
|
28
|
+
|
|
29
|
+
- Fixed baseline path resolution in `baseline compare` to be consistent with `baseline show`
|
|
30
|
+
- Now checks both output directory and current working directory before failing
|
|
31
|
+
- Fixed `bellwether auth status` requiring a config file
|
|
32
|
+
- Auth commands now work without bellwether.yaml present
|
|
33
|
+
- Fixed ANSI escape codes appearing in non-TTY output (e.g., when piping to files)
|
|
34
|
+
- StreamingDisplay now checks for TTY before applying ANSI styling
|
|
35
|
+
- Automatically respects `NO_COLOR` and `FORCE_COLOR=0` environment variables
|
|
36
|
+
|
|
37
|
+
## [1.0.1] - 2026-01-29
|
|
38
|
+
|
|
39
|
+
### Added
|
|
40
|
+
|
|
41
|
+
- Added `$VAR` syntax support for environment variable interpolation in config files
|
|
42
|
+
- Added rate limiting to registry client (5 req/s default)
|
|
43
|
+
- Added `AnthropicClient` and `OllamaClient` exports to public API
|
|
44
|
+
- Added `repository.directory` and `funding` fields to package.json
|
|
45
|
+
- Added required permissions documentation to GitHub Action
|
|
46
|
+
- Added debug logging for all credential operations
|
|
47
|
+
- Added warning when environment variables in config are not resolved
|
|
48
|
+
|
|
49
|
+
### Changed
|
|
50
|
+
|
|
51
|
+
- Optimized GitHub Action to run check once; SARIF and JUnit are now converted from JSON output
|
|
52
|
+
- Removed test coverage exclusion for CLI entry point
|
|
53
|
+
- Removed unnecessary type casts in check.ts and security-tester.ts
|
|
54
|
+
- Replaced magic number 100 with PERCENTAGE_CONVERSION.DIVISOR constant
|
|
55
|
+
- Removed dead code sections from constants
|
|
56
|
+
- Refactored string concatenation to template literals in CLI output modules
|
|
57
|
+
|
|
58
|
+
### Fixed
|
|
59
|
+
|
|
60
|
+
- Fixed version fallback inconsistency (0.13.0 → 1.0.1)
|
|
61
|
+
- Fixed missing pino-pretty dependency
|
|
62
|
+
- Fixed non-null assertion for remoteUrl in check.ts (added proper null check)
|
|
63
|
+
- Fixed non-null assertion for incrementalResult in check.ts
|
|
64
|
+
- Added debug logging to catch blocks in keychain.ts (graceful degradation with visibility)
|
|
65
|
+
- Fixed flaky test in workflow executor (timing assertion)
|
|
66
|
+
- Fixed test failures in baseline-accept tests (process.exit mock)
|
|
67
|
+
|
|
5
68
|
## [1.0.0] - 2026-01-27
|
|
6
69
|
|
|
7
70
|
### Breaking Changes
|
package/Dockerfile
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Bellwether MCP Testing Tool
|
|
2
|
+
# https://github.com/dotsetlabs/bellwether
|
|
3
|
+
|
|
4
|
+
FROM node:20-alpine
|
|
5
|
+
|
|
6
|
+
LABEL maintainer="Dotset Labs <hello@dotsetlabs.com>"
|
|
7
|
+
LABEL description="Bellwether - MCP Server Testing & Validation"
|
|
8
|
+
LABEL org.opencontainers.image.source="https://github.com/dotsetlabs/bellwether"
|
|
9
|
+
|
|
10
|
+
# Install git for npm dependencies that may need it
|
|
11
|
+
RUN apk add --no-cache git
|
|
12
|
+
|
|
13
|
+
# Create app directory
|
|
14
|
+
WORKDIR /app
|
|
15
|
+
|
|
16
|
+
# Copy package files
|
|
17
|
+
COPY package*.json ./
|
|
18
|
+
|
|
19
|
+
# Install production dependencies only
|
|
20
|
+
RUN npm ci --omit=dev
|
|
21
|
+
|
|
22
|
+
# Copy built application
|
|
23
|
+
COPY dist/ ./dist/
|
|
24
|
+
COPY schemas/ ./schemas/
|
|
25
|
+
COPY LICENSE README.md CHANGELOG.md ./
|
|
26
|
+
|
|
27
|
+
# Create non-root user
|
|
28
|
+
RUN addgroup -g 1001 -S bellwether && \
|
|
29
|
+
adduser -S bellwether -u 1001
|
|
30
|
+
|
|
31
|
+
# Set proper permissions
|
|
32
|
+
RUN chown -R bellwether:bellwether /app
|
|
33
|
+
|
|
34
|
+
# Switch to non-root user
|
|
35
|
+
USER bellwether
|
|
36
|
+
|
|
37
|
+
# Set environment
|
|
38
|
+
ENV NODE_ENV=production
|
|
39
|
+
ENV BELLWETHER_DOCKER=1
|
|
40
|
+
|
|
41
|
+
# Entry point
|
|
42
|
+
ENTRYPOINT ["node", "dist/cli/index.js"]
|
|
43
|
+
CMD ["--help"]
|
package/dist/auth/keychain.js
CHANGED
|
@@ -12,6 +12,8 @@ import { homedir } from 'os';
|
|
|
12
12
|
import { join } from 'path';
|
|
13
13
|
import { createRequire } from 'module';
|
|
14
14
|
import { createCipheriv, createDecipheriv, randomBytes } from 'crypto';
|
|
15
|
+
import { getLogger } from '../logging/logger.js';
|
|
16
|
+
const logger = getLogger('keychain');
|
|
15
17
|
// Create require function for loading CommonJS optional dependencies in ESM
|
|
16
18
|
const require = createRequire(import.meta.url);
|
|
17
19
|
// Service name for keychain entries
|
|
@@ -80,7 +82,8 @@ export function decryptEnvValue(value) {
|
|
|
80
82
|
decipher.setAuthTag(tag);
|
|
81
83
|
return Buffer.concat([decipher.update(data), decipher.final()]).toString('utf8');
|
|
82
84
|
}
|
|
83
|
-
catch {
|
|
85
|
+
catch (error) {
|
|
86
|
+
logger.debug({ error }, 'Failed to decrypt env value');
|
|
84
87
|
return undefined;
|
|
85
88
|
}
|
|
86
89
|
}
|
|
@@ -100,8 +103,8 @@ class KeytarBackend {
|
|
|
100
103
|
// Using require() for optional dependency
|
|
101
104
|
this.keytar = require('keytar');
|
|
102
105
|
}
|
|
103
|
-
catch {
|
|
104
|
-
|
|
106
|
+
catch (error) {
|
|
107
|
+
logger.debug({ error }, 'keytar not available, will use file fallback');
|
|
105
108
|
this.keytar = null;
|
|
106
109
|
}
|
|
107
110
|
})();
|
|
@@ -154,7 +157,8 @@ class FileBackend {
|
|
|
154
157
|
this.envLines = [];
|
|
155
158
|
}
|
|
156
159
|
}
|
|
157
|
-
catch {
|
|
160
|
+
catch (error) {
|
|
161
|
+
logger.debug({ error }, 'Failed to load credentials file');
|
|
158
162
|
this.envLines = [];
|
|
159
163
|
}
|
|
160
164
|
return this.envLines;
|
|
@@ -264,7 +268,8 @@ export class KeychainService {
|
|
|
264
268
|
require('keytar');
|
|
265
269
|
return true;
|
|
266
270
|
}
|
|
267
|
-
catch {
|
|
271
|
+
catch (error) {
|
|
272
|
+
logger.debug({ error }, 'Secure keychain (keytar) not available');
|
|
268
273
|
return false;
|
|
269
274
|
}
|
|
270
275
|
}
|
|
@@ -295,7 +300,8 @@ export class KeychainService {
|
|
|
295
300
|
try {
|
|
296
301
|
return await this.backend.getPassword(SERVICE_NAME, account);
|
|
297
302
|
}
|
|
298
|
-
catch {
|
|
303
|
+
catch (error) {
|
|
304
|
+
logger.debug({ error, provider }, 'Keychain get failed, trying file backend');
|
|
299
305
|
// If keytar fails, try file backend
|
|
300
306
|
if (!this.useFileBackend) {
|
|
301
307
|
this.enableFileBackend();
|
|
@@ -337,7 +343,8 @@ export class KeychainService {
|
|
|
337
343
|
try {
|
|
338
344
|
return await this.backend.deletePassword(SERVICE_NAME, account);
|
|
339
345
|
}
|
|
340
|
-
catch {
|
|
346
|
+
catch (error) {
|
|
347
|
+
logger.debug({ error, provider }, 'Keychain delete failed, trying file backend');
|
|
341
348
|
// If keytar fails, try file backend
|
|
342
349
|
if (!this.useFileBackend) {
|
|
343
350
|
this.enableFileBackend();
|
|
@@ -543,7 +543,7 @@ function generateImpactSummary(diff, toolImpacts, brokenWorkflows) {
|
|
|
543
543
|
if (brokenWorkflows.length > 0) {
|
|
544
544
|
parts.push(`${brokenWorkflows.length} workflow(s) may be affected`);
|
|
545
545
|
}
|
|
546
|
-
return parts.length > 0 ? parts.join('. ')
|
|
546
|
+
return parts.length > 0 ? `${parts.join('. ')}.` : 'No changes detected.';
|
|
547
547
|
}
|
|
548
548
|
/**
|
|
549
549
|
* Check if a behavior change is actually breaking based on semantic analysis.
|
|
@@ -337,7 +337,7 @@ function formatSchemaChangeValue(value) {
|
|
|
337
337
|
// For objects, show a compact representation
|
|
338
338
|
try {
|
|
339
339
|
const json = JSON.stringify(value);
|
|
340
|
-
return json.length > 50 ? json.slice(0, 47)
|
|
340
|
+
return json.length > 50 ? `${json.slice(0, 47)}...` : json;
|
|
341
341
|
}
|
|
342
342
|
catch {
|
|
343
343
|
return String(value);
|
|
@@ -425,7 +425,7 @@ function generateSummary(toolsAdded, toolsRemoved, toolsModified, changes, sever
|
|
|
425
425
|
if (warningChanges > 0) {
|
|
426
426
|
parts.push(`${warningChanges} warning(s)`);
|
|
427
427
|
}
|
|
428
|
-
return parts.join('. ')
|
|
428
|
+
return `${parts.join('. ')}.`;
|
|
429
429
|
}
|
|
430
430
|
export function hasBreakingChanges(diff) {
|
|
431
431
|
return diff.severity === 'breaking';
|
|
@@ -231,7 +231,7 @@ function generateDeprecationSummary(warnings, deprecatedCount, expiredCount, gra
|
|
|
231
231
|
if (criticalTools.length > 0) {
|
|
232
232
|
parts.push(`${criticalTools.length} tool(s) will be removed within ${DEPRECATION_THRESHOLDS.CRITICAL_REMOVAL_DAYS} days`);
|
|
233
233
|
}
|
|
234
|
-
return parts.join(', ')
|
|
234
|
+
return `${parts.join(', ')}.`;
|
|
235
235
|
}
|
|
236
236
|
/**
|
|
237
237
|
* Get all deprecated tools from a baseline.
|
package/dist/baseline/diff.js
CHANGED
|
@@ -59,10 +59,10 @@ export function formatDiffText(diff, useColors = true) {
|
|
|
59
59
|
const sevColor = getSeverityColor(change.severity, useColors);
|
|
60
60
|
lines.push(` ${sevColor(`[${change.severity.toUpperCase()}]`)} ${change.aspect}`);
|
|
61
61
|
if (change.before) {
|
|
62
|
-
lines.push(` ${red(
|
|
62
|
+
lines.push(` ${red(`- ${change.before}`)}`);
|
|
63
63
|
}
|
|
64
64
|
if (change.after) {
|
|
65
|
-
lines.push(` ${green(
|
|
65
|
+
lines.push(` ${green(`+ ${change.after}`)}`);
|
|
66
66
|
}
|
|
67
67
|
}
|
|
68
68
|
lines.push('');
|
|
@@ -144,10 +144,10 @@ export function formatDiffText(diff, useColors = true) {
|
|
|
144
144
|
lines.push(` ${issueIcon} ${bold(issue.toolName)}`);
|
|
145
145
|
lines.push(` ${issue.summary}`);
|
|
146
146
|
if (issue.fieldsRemoved.length > 0) {
|
|
147
|
-
lines.push(` ${red(
|
|
147
|
+
lines.push(` ${red(`- Removed: ${issue.fieldsRemoved.join(', ')}`)}`);
|
|
148
148
|
}
|
|
149
149
|
if (issue.fieldsAdded.length > 0) {
|
|
150
|
-
lines.push(` ${green(
|
|
150
|
+
lines.push(` ${green(`+ Added: ${issue.fieldsAdded.join(', ')}`)}`);
|
|
151
151
|
}
|
|
152
152
|
}
|
|
153
153
|
lines.push('');
|
|
@@ -562,7 +562,7 @@ function isPathAllowed(path, allowedPaths) {
|
|
|
562
562
|
// Normalize pattern by stripping leading $. if present
|
|
563
563
|
const normalizedPattern = pattern.replace(/^\$\.?/, '');
|
|
564
564
|
// Simple glob matching: * matches any segment
|
|
565
|
-
const regex = new RegExp(
|
|
565
|
+
const regex = new RegExp(`^${normalizedPattern.replace(/\*/g, '[^.]+').replace(/\./g, '\\.')}$`);
|
|
566
566
|
return regex.test(normalizedPath);
|
|
567
567
|
});
|
|
568
568
|
}
|
|
@@ -583,7 +583,7 @@ function truncateForDisplay(value, maxLength = 50) {
|
|
|
583
583
|
const str = typeof value === 'string' ? value : JSON.stringify(value);
|
|
584
584
|
if (str.length <= maxLength)
|
|
585
585
|
return str;
|
|
586
|
-
return str.slice(0, maxLength - 3)
|
|
586
|
+
return `${str.slice(0, maxLength - 3)}...`;
|
|
587
587
|
}
|
|
588
588
|
/**
|
|
589
589
|
* Determine severity based on differences.
|
|
@@ -472,12 +472,12 @@ export function formatMigrationGuideMarkdown(guide) {
|
|
|
472
472
|
lines.push(`**${example.title}**`);
|
|
473
473
|
lines.push('');
|
|
474
474
|
lines.push('Before:');
|
|
475
|
-
lines.push(
|
|
475
|
+
lines.push(`\`\`\`${example.language}`);
|
|
476
476
|
lines.push(example.before);
|
|
477
477
|
lines.push('```');
|
|
478
478
|
lines.push('');
|
|
479
479
|
lines.push('After:');
|
|
480
|
-
lines.push(
|
|
480
|
+
lines.push(`\`\`\`${example.language}`);
|
|
481
481
|
lines.push(example.after);
|
|
482
482
|
lines.push('```');
|
|
483
483
|
lines.push('');
|
|
@@ -494,7 +494,7 @@ function generateReportSummary(regressions, improvements, stable, total) {
|
|
|
494
494
|
if (parts.length === 0) {
|
|
495
495
|
return `No performance data for ${total} tool(s).`;
|
|
496
496
|
}
|
|
497
|
-
return parts.join(', ')
|
|
497
|
+
return `${parts.join(', ')}.`;
|
|
498
498
|
}
|
|
499
499
|
/**
|
|
500
500
|
* Format performance metrics for display.
|
|
@@ -199,12 +199,12 @@ function generateMigrationSection(guide, config) {
|
|
|
199
199
|
if (step.codeExamples && step.codeExamples.length > 0) {
|
|
200
200
|
const example = step.codeExamples[0];
|
|
201
201
|
lines.push('');
|
|
202
|
-
lines.push(
|
|
202
|
+
lines.push(` \`\`\`${example.language || ''}`);
|
|
203
203
|
lines.push(' // Before:');
|
|
204
|
-
lines.push(
|
|
204
|
+
lines.push(` ${example.before.split('\n').join('\n ')}`);
|
|
205
205
|
lines.push('');
|
|
206
206
|
lines.push(' // After:');
|
|
207
|
-
lines.push(
|
|
207
|
+
lines.push(` ${example.after.split('\n').join('\n ')}`);
|
|
208
208
|
lines.push(' ```');
|
|
209
209
|
}
|
|
210
210
|
lines.push('');
|
|
@@ -283,7 +283,7 @@ function formatAspect(aspect) {
|
|
|
283
283
|
function truncate(value, maxLength = PR_COMMENTS.VALUE_TRUNCATE_LENGTH) {
|
|
284
284
|
if (value.length <= maxLength)
|
|
285
285
|
return value;
|
|
286
|
-
return value.substring(0, maxLength - 3)
|
|
286
|
+
return `${value.substring(0, maxLength - 3)}...`;
|
|
287
287
|
}
|
|
288
288
|
/**
|
|
289
289
|
* Render a collapsible section.
|
|
@@ -429,6 +429,6 @@ export function generateRiskScoreMarkdown(riskScore) {
|
|
|
429
429
|
function generateScoreBar(score, width = 10) {
|
|
430
430
|
const filled = Math.round((score / 100) * width);
|
|
431
431
|
const empty = width - filled;
|
|
432
|
-
return
|
|
432
|
+
return `[${'█'.repeat(filled)}${'░'.repeat(empty)}]`;
|
|
433
433
|
}
|
|
434
434
|
//# sourceMappingURL=risk-scorer.js.map
|
|
@@ -365,7 +365,7 @@ export function generateVisualTimeline(timeline, width = SCHEMA_EVOLUTION.DEFAUL
|
|
|
365
365
|
const marker = v.hasBreakingChanges ? '◆' : '●';
|
|
366
366
|
bar += marker + '─'.repeat(segmentWidth - 1);
|
|
367
367
|
}
|
|
368
|
-
lines.push(
|
|
368
|
+
lines.push(` ${bar}`);
|
|
369
369
|
// Version labels
|
|
370
370
|
let labels = ' ';
|
|
371
371
|
for (const v of displayVersions) {
|
|
@@ -146,12 +146,31 @@ baselineCommand
|
|
|
146
146
|
output.error('No baseline path provided. Set baseline.path or baseline.comparePath in config, or pass a path argument.');
|
|
147
147
|
process.exit(EXIT_CODES.ERROR);
|
|
148
148
|
}
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
149
|
+
// Resolve baseline path consistently with 'show' command:
|
|
150
|
+
// 1. If absolute path, use as-is
|
|
151
|
+
// 2. First try relative to outputDir (e.g., .bellwether/)
|
|
152
|
+
// 3. Fall back to relative to cwd
|
|
153
|
+
let fullBaselinePath;
|
|
154
|
+
if (resolvedBaselinePath.startsWith('/')) {
|
|
155
|
+
fullBaselinePath = resolvedBaselinePath;
|
|
156
|
+
}
|
|
157
|
+
else {
|
|
158
|
+
const outputDirPath = join(outputDir, resolvedBaselinePath);
|
|
159
|
+
const cwdPath = join(process.cwd(), resolvedBaselinePath);
|
|
160
|
+
if (existsSync(outputDirPath)) {
|
|
161
|
+
fullBaselinePath = outputDirPath;
|
|
162
|
+
}
|
|
163
|
+
else if (existsSync(cwdPath)) {
|
|
164
|
+
fullBaselinePath = cwdPath;
|
|
165
|
+
}
|
|
166
|
+
else {
|
|
167
|
+
// Default to outputDir path for error message consistency
|
|
168
|
+
fullBaselinePath = outputDirPath;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
153
171
|
if (!existsSync(fullBaselinePath)) {
|
|
154
172
|
output.error(`Baseline not found: ${fullBaselinePath}`);
|
|
173
|
+
output.error('\nRun `bellwether baseline save` to create a baseline.');
|
|
155
174
|
process.exit(EXIT_CODES.ERROR);
|
|
156
175
|
}
|
|
157
176
|
let previousBaseline;
|
|
@@ -203,13 +222,13 @@ baselineCommand
|
|
|
203
222
|
// Format and output
|
|
204
223
|
switch (format) {
|
|
205
224
|
case 'json':
|
|
206
|
-
|
|
225
|
+
output.info(formatDiffJson(diff));
|
|
207
226
|
break;
|
|
208
227
|
case 'markdown':
|
|
209
|
-
|
|
228
|
+
output.info(formatDiffMarkdown(diff));
|
|
210
229
|
break;
|
|
211
230
|
case 'compact':
|
|
212
|
-
|
|
231
|
+
output.info(formatDiffCompact(diff));
|
|
213
232
|
break;
|
|
214
233
|
default:
|
|
215
234
|
output.info('--- Drift Report ---');
|
|
@@ -271,7 +290,7 @@ baselineCommand
|
|
|
271
290
|
}
|
|
272
291
|
// Raw JSON output
|
|
273
292
|
if (options.json) {
|
|
274
|
-
|
|
293
|
+
output.info(JSON.stringify(baseline, null, 2));
|
|
275
294
|
return;
|
|
276
295
|
}
|
|
277
296
|
// Formatted output
|
|
@@ -411,13 +430,13 @@ baselineCommand
|
|
|
411
430
|
// Format and output
|
|
412
431
|
switch (format) {
|
|
413
432
|
case 'json':
|
|
414
|
-
|
|
433
|
+
output.info(formatDiffJson(diff));
|
|
415
434
|
break;
|
|
416
435
|
case 'markdown':
|
|
417
|
-
|
|
436
|
+
output.info(formatDiffMarkdown(diff));
|
|
418
437
|
break;
|
|
419
438
|
case 'compact':
|
|
420
|
-
|
|
439
|
+
output.info(formatDiffCompact(diff));
|
|
421
440
|
break;
|
|
422
441
|
default:
|
|
423
442
|
output.info(formatDiffText(diff));
|
|
@@ -26,7 +26,7 @@ import { loadWorkflowsFromFile, tryLoadDefaultWorkflows, DEFAULT_WORKFLOWS_FILE,
|
|
|
26
26
|
import * as output from '../output.js';
|
|
27
27
|
import { extractServerContextFromArgs } from '../utils/server-context.js';
|
|
28
28
|
import { configureLogger } from '../../logging/logger.js';
|
|
29
|
-
import { EXIT_CODES, SEVERITY_TO_EXIT_CODE, PATHS, SECURITY_TESTING, CHECK_SAMPLING, WORKFLOW, REPORT_SCHEMAS, } from '../../constants.js';
|
|
29
|
+
import { EXIT_CODES, SEVERITY_TO_EXIT_CODE, PATHS, SECURITY_TESTING, CHECK_SAMPLING, WORKFLOW, REPORT_SCHEMAS, PERCENTAGE_CONVERSION, } from '../../constants.js';
|
|
30
30
|
export const checkCommand = new Command('check')
|
|
31
31
|
.description('Check MCP server schema and detect drift (free, fast, deterministic)')
|
|
32
32
|
.allowUnknownOption() // Allow server flags like -y for npx to pass through
|
|
@@ -112,7 +112,7 @@ export const checkCommand = new Command('check')
|
|
|
112
112
|
const incrementalCacheHours = config.check.incrementalCacheHours;
|
|
113
113
|
const parallelEnabled = config.check.parallel;
|
|
114
114
|
const parallelWorkers = config.check.parallelWorkers;
|
|
115
|
-
const performanceThreshold = config.check.performanceThreshold /
|
|
115
|
+
const performanceThreshold = config.check.performanceThreshold / PERCENTAGE_CONVERSION.DIVISOR;
|
|
116
116
|
const diffFormat = options.format ?? config.check.diffFormat;
|
|
117
117
|
// Resolve security options from config
|
|
118
118
|
const securityEnabled = config.check.security.enabled;
|
|
@@ -171,6 +171,10 @@ export const checkCommand = new Command('check')
|
|
|
171
171
|
await mcpClient.connect(serverCommand, args, config.server.env);
|
|
172
172
|
}
|
|
173
173
|
else {
|
|
174
|
+
if (!remoteUrl) {
|
|
175
|
+
output.error('No server URL specified for remote transport');
|
|
176
|
+
process.exit(EXIT_CODES.ERROR);
|
|
177
|
+
}
|
|
174
178
|
await mcpClient.connectRemote(remoteUrl, {
|
|
175
179
|
transport,
|
|
176
180
|
sessionId: remoteSessionId || undefined,
|
|
@@ -224,18 +228,19 @@ export const checkCommand = new Command('check')
|
|
|
224
228
|
}
|
|
225
229
|
else {
|
|
226
230
|
incrementalBaseline = loadBaseline(baselinePath);
|
|
227
|
-
|
|
228
|
-
|
|
231
|
+
const result = analyzeForIncremental(discovery.tools, incrementalBaseline, { maxCacheAgeHours: incrementalCacheHours });
|
|
232
|
+
incrementalResult = result;
|
|
233
|
+
const summary = formatIncrementalSummary(result.changeSummary);
|
|
229
234
|
output.info(`Incremental analysis: ${summary}`);
|
|
230
|
-
if (
|
|
235
|
+
if (result.toolsToTest.length === 0) {
|
|
231
236
|
output.info('All tools unchanged. Using cached results.');
|
|
232
237
|
// Still need to generate output with cached data
|
|
233
238
|
// Skip to comparison section
|
|
234
239
|
}
|
|
235
240
|
else {
|
|
236
|
-
output.info(`Testing ${
|
|
241
|
+
output.info(`Testing ${result.toolsToTest.length} tools (${result.toolsToSkip.length} cached)\n`);
|
|
237
242
|
// Filter discovery to only include tools that need testing
|
|
238
|
-
discovery.tools = discovery.tools.filter(t =>
|
|
243
|
+
discovery.tools = discovery.tools.filter(t => result.toolsToTest.includes(t.name));
|
|
239
244
|
}
|
|
240
245
|
}
|
|
241
246
|
}
|
|
@@ -452,7 +457,7 @@ export const checkCommand = new Command('check')
|
|
|
452
457
|
const fingerprint = await runSecurityTests({
|
|
453
458
|
toolName: tool.name,
|
|
454
459
|
toolDescription: tool.description || '',
|
|
455
|
-
inputSchema: tool.inputSchema,
|
|
460
|
+
inputSchema: tool.inputSchema ?? {},
|
|
456
461
|
callTool: async (args) => {
|
|
457
462
|
try {
|
|
458
463
|
const response = await mcpClient.callTool(tool.name, args);
|
|
@@ -763,6 +768,16 @@ export const checkCommand = new Command('check')
|
|
|
763
768
|
saveBaseline(currentBaseline, saveBaselinePath);
|
|
764
769
|
output.info(`\nBaseline saved: ${saveBaselinePath}`);
|
|
765
770
|
}
|
|
771
|
+
// Output formatted results for sarif/junit when no baseline comparison
|
|
772
|
+
// This allows CI systems to consume check results even without drift detection
|
|
773
|
+
if (!baselinePath) {
|
|
774
|
+
const formattedCheckResults = formatCheckResults(currentBaseline, diffFormat);
|
|
775
|
+
if (formattedCheckResults) {
|
|
776
|
+
output.info('\n--- Check Results ---');
|
|
777
|
+
// Output directly to stdout for machine-readable formats
|
|
778
|
+
console.log(formattedCheckResults);
|
|
779
|
+
}
|
|
780
|
+
}
|
|
766
781
|
// Handle baseline comparison
|
|
767
782
|
if (baselinePath) {
|
|
768
783
|
if (!existsSync(baselinePath)) {
|
|
@@ -914,4 +929,141 @@ function formatDiff(diff, format, baselinePath) {
|
|
|
914
929
|
return formatDiffText(diff);
|
|
915
930
|
}
|
|
916
931
|
}
|
|
932
|
+
/**
|
|
933
|
+
* Format check results as JUnit XML (for CI systems that expect test results).
|
|
934
|
+
* This is used when --format junit is specified but no baseline comparison occurs.
|
|
935
|
+
*/
|
|
936
|
+
function formatCheckResultsJUnit(baseline) {
|
|
937
|
+
const tools = getToolFingerprints(baseline);
|
|
938
|
+
const lines = [];
|
|
939
|
+
const securityFailures = tools.filter(t => t.securityFingerprint?.findings?.some(f => f.riskLevel === 'critical' || f.riskLevel === 'high')).length;
|
|
940
|
+
lines.push('<?xml version="1.0" encoding="UTF-8"?>');
|
|
941
|
+
lines.push('<testsuites>');
|
|
942
|
+
lines.push(` <testsuite name="bellwether-check" tests="${tools.length}" failures="${securityFailures}" errors="0">`);
|
|
943
|
+
for (const tool of tools) {
|
|
944
|
+
const successRate = tool.baselineSuccessRate ?? 1;
|
|
945
|
+
const status = successRate >= 0.9 ? 'passed' : 'warning';
|
|
946
|
+
lines.push(` <testcase name="${tool.name}" classname="mcp-tools" time="0">`);
|
|
947
|
+
lines.push(` <system-out>Success rate: ${(successRate * 100).toFixed(0)}%</system-out>`);
|
|
948
|
+
if (status === 'warning') {
|
|
949
|
+
lines.push(` <system-err>Tool has success rate below 90%</system-err>`);
|
|
950
|
+
}
|
|
951
|
+
lines.push(' </testcase>');
|
|
952
|
+
}
|
|
953
|
+
// Add security findings as test cases if present
|
|
954
|
+
const securityTools = tools.filter(t => t.securityFingerprint?.findings?.length);
|
|
955
|
+
if (securityTools.length > 0) {
|
|
956
|
+
lines.push(` <!-- Security findings -->`);
|
|
957
|
+
for (const tool of securityTools) {
|
|
958
|
+
const findings = tool.securityFingerprint?.findings ?? [];
|
|
959
|
+
const criticalHigh = findings.filter(f => f.riskLevel === 'critical' || f.riskLevel === 'high').length;
|
|
960
|
+
if (criticalHigh > 0) {
|
|
961
|
+
lines.push(` <testcase name="${tool.name}-security" classname="security">`);
|
|
962
|
+
lines.push(` <failure message="${criticalHigh} critical/high security findings">`);
|
|
963
|
+
for (const finding of findings.filter(f => f.riskLevel === 'critical' || f.riskLevel === 'high')) {
|
|
964
|
+
lines.push(` ${finding.riskLevel.toUpperCase()}: ${finding.title} (${finding.cweId})`);
|
|
965
|
+
}
|
|
966
|
+
lines.push(` </failure>`);
|
|
967
|
+
lines.push(' </testcase>');
|
|
968
|
+
}
|
|
969
|
+
}
|
|
970
|
+
}
|
|
971
|
+
lines.push(' </testsuite>');
|
|
972
|
+
lines.push('</testsuites>');
|
|
973
|
+
return lines.join('\n');
|
|
974
|
+
}
|
|
975
|
+
/**
|
|
976
|
+
* Format check results as SARIF (for GitHub Code Scanning and other tools).
|
|
977
|
+
* This is used when --format sarif is specified but no baseline comparison occurs.
|
|
978
|
+
*/
|
|
979
|
+
function formatCheckResultsSarif(baseline) {
|
|
980
|
+
const tools = getToolFingerprints(baseline);
|
|
981
|
+
const serverUri = baseline.metadata?.serverCommand || baseline.server.name || 'mcp-server';
|
|
982
|
+
const results = [];
|
|
983
|
+
// Add results for tools with security findings
|
|
984
|
+
const securityTools = tools.filter(t => t.securityFingerprint?.findings?.length);
|
|
985
|
+
for (const tool of securityTools) {
|
|
986
|
+
const findings = tool.securityFingerprint?.findings ?? [];
|
|
987
|
+
for (const finding of findings) {
|
|
988
|
+
const level = finding.riskLevel === 'critical' || finding.riskLevel === 'high'
|
|
989
|
+
? 'error'
|
|
990
|
+
: finding.riskLevel === 'medium'
|
|
991
|
+
? 'warning'
|
|
992
|
+
: 'note';
|
|
993
|
+
results.push({
|
|
994
|
+
ruleId: finding.cweId || 'BWH-SEC',
|
|
995
|
+
level,
|
|
996
|
+
message: { text: `[${tool.name}] ${finding.title}: ${finding.description}` },
|
|
997
|
+
locations: [{
|
|
998
|
+
physicalLocation: {
|
|
999
|
+
artifactLocation: { uri: serverUri },
|
|
1000
|
+
region: { startLine: 1 },
|
|
1001
|
+
},
|
|
1002
|
+
}],
|
|
1003
|
+
});
|
|
1004
|
+
}
|
|
1005
|
+
}
|
|
1006
|
+
// Add results for tools with low success rate
|
|
1007
|
+
for (const tool of tools) {
|
|
1008
|
+
const successRate = tool.baselineSuccessRate ?? 1;
|
|
1009
|
+
if (successRate < 0.9) {
|
|
1010
|
+
results.push({
|
|
1011
|
+
ruleId: 'BWH-REL',
|
|
1012
|
+
level: 'warning',
|
|
1013
|
+
message: { text: `Tool "${tool.name}" has ${(successRate * 100).toFixed(0)}% success rate` },
|
|
1014
|
+
locations: [{
|
|
1015
|
+
physicalLocation: {
|
|
1016
|
+
artifactLocation: { uri: serverUri },
|
|
1017
|
+
region: { startLine: 1 },
|
|
1018
|
+
},
|
|
1019
|
+
}],
|
|
1020
|
+
});
|
|
1021
|
+
}
|
|
1022
|
+
}
|
|
1023
|
+
const sarif = {
|
|
1024
|
+
$schema: 'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json',
|
|
1025
|
+
version: '2.1.0',
|
|
1026
|
+
runs: [{
|
|
1027
|
+
tool: {
|
|
1028
|
+
driver: {
|
|
1029
|
+
name: 'bellwether',
|
|
1030
|
+
version: '1.0.0',
|
|
1031
|
+
informationUri: 'https://github.com/dotsetlabs/bellwether',
|
|
1032
|
+
rules: [
|
|
1033
|
+
{
|
|
1034
|
+
id: 'BWH-SEC',
|
|
1035
|
+
name: 'SecurityFinding',
|
|
1036
|
+
shortDescription: { text: 'Security vulnerability detected' },
|
|
1037
|
+
defaultConfiguration: { level: 'warning' },
|
|
1038
|
+
},
|
|
1039
|
+
{
|
|
1040
|
+
id: 'BWH-REL',
|
|
1041
|
+
name: 'LowReliability',
|
|
1042
|
+
shortDescription: { text: 'Tool reliability below threshold' },
|
|
1043
|
+
defaultConfiguration: { level: 'warning' },
|
|
1044
|
+
},
|
|
1045
|
+
],
|
|
1046
|
+
},
|
|
1047
|
+
},
|
|
1048
|
+
results,
|
|
1049
|
+
}],
|
|
1050
|
+
};
|
|
1051
|
+
return JSON.stringify(sarif, null, 2);
|
|
1052
|
+
}
|
|
1053
|
+
/**
|
|
1054
|
+
* Format check results using the specified output format.
|
|
1055
|
+
* Used when no baseline comparison occurs.
|
|
1056
|
+
*/
|
|
1057
|
+
function formatCheckResults(baseline, format) {
|
|
1058
|
+
switch (format.toLowerCase()) {
|
|
1059
|
+
case 'junit':
|
|
1060
|
+
case 'junit-xml':
|
|
1061
|
+
case 'xml':
|
|
1062
|
+
return formatCheckResultsJUnit(baseline);
|
|
1063
|
+
case 'sarif':
|
|
1064
|
+
return formatCheckResultsSarif(baseline);
|
|
1065
|
+
default:
|
|
1066
|
+
return null; // No special formatting needed for other formats
|
|
1067
|
+
}
|
|
1068
|
+
}
|
|
917
1069
|
//# sourceMappingURL=check.js.map
|