@dotsetlabs/bellwether 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +50 -0
- package/README.md +3 -2
- package/dist/cache/response-cache.d.ts +4 -2
- package/dist/cache/response-cache.js +68 -30
- package/dist/cli/commands/baseline.js +23 -4
- package/dist/cli/commands/check.js +212 -36
- package/dist/cli/commands/registry.js +143 -5
- package/dist/cli/index.js +6 -4
- package/dist/cli/output.d.ts +1 -0
- package/dist/cli/output.js +32 -8
- package/dist/config/template.d.ts +2 -0
- package/dist/config/template.js +8 -2
- package/dist/interview/interviewer.js +70 -50
- package/dist/interview/orchestrator.js +49 -22
- package/dist/llm/anthropic.js +49 -16
- package/dist/llm/client.d.ts +2 -0
- package/dist/llm/client.js +61 -0
- package/dist/llm/ollama.js +9 -4
- package/dist/llm/openai.js +34 -23
- package/dist/transport/base-transport.d.ts +1 -1
- package/dist/transport/http-transport.d.ts +2 -2
- package/dist/transport/http-transport.js +26 -6
- package/dist/transport/mcp-client.d.ts +18 -6
- package/dist/transport/mcp-client.js +49 -19
- package/dist/transport/sse-transport.d.ts +1 -1
- package/dist/transport/sse-transport.js +4 -2
- package/dist/transport/stdio-transport.d.ts +1 -1
- package/dist/transport/stdio-transport.js +1 -1
- package/dist/utils/timeout.d.ts +10 -2
- package/dist/utils/timeout.js +9 -5
- package/dist/version.js +1 -1
- package/dist/workflow/executor.js +18 -13
- package/dist/workflow/loader.js +4 -1
- package/dist/workflow/state-tracker.js +22 -18
- package/man/bellwether.1 +204 -0
- package/man/bellwether.1.md +148 -0
- package/package.json +8 -9
package/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,56 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [1.0.3] - 2026-02-02
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Added `version` input to GitHub Action for explicit npm version selection
|
|
15
|
+
- Action now derives version from ref (e.g., `v1.0.3`) or accepts explicit `inputs.version`
|
|
16
|
+
- Provides clear error message when version cannot be determined
|
|
17
|
+
- Added `signal` option to LLM completion requests for request cancellation via AbortSignal
|
|
18
|
+
- Added AbortController integration to timeout utilities for proper request cancellation
|
|
19
|
+
- Added JSON extraction from mixed LLM responses (handles prose around JSON blocks)
|
|
20
|
+
|
|
21
|
+
### Changed
|
|
22
|
+
|
|
23
|
+
- Improved timeout handling with AbortController propagation across LLM and transport layers
|
|
24
|
+
- Improved error handling and resource cleanup in interview, orchestrator, and transport modules
|
|
25
|
+
- Refactored response cache, workflow executor, and state tracker for better reliability
|
|
26
|
+
- Updated CI/CD and GitHub/GitLab integration documentation
|
|
27
|
+
|
|
28
|
+
### Fixed
|
|
29
|
+
|
|
30
|
+
- Fixed GitHub Action stderr handling in check command output capture
|
|
31
|
+
- Fixed various code formatting and linting issues across LLM clients and transport modules
|
|
32
|
+
|
|
33
|
+
## [1.0.2] - 2026-01-30
|
|
34
|
+
|
|
35
|
+
### Added
|
|
36
|
+
|
|
37
|
+
- Added SARIF and JUnit output format support for `bellwether check` without baseline comparison
|
|
38
|
+
- Use `--format sarif` for GitHub Code Scanning integration
|
|
39
|
+
- Use `--format junit` for CI/CD test reporting
|
|
40
|
+
- Added registry validation indicators showing environment variable requirements
|
|
41
|
+
- Servers requiring setup now display ⚙ indicator
|
|
42
|
+
- Environment variables show ✓/✗ status based on whether they're set
|
|
43
|
+
- Automatic detection of common service patterns (postgres→DATABASE_URL, etc.)
|
|
44
|
+
- Setup hints displayed for unconfigured servers
|
|
45
|
+
|
|
46
|
+
### Changed
|
|
47
|
+
|
|
48
|
+
- Security and thorough presets now enable security testing by default (`check.security.enabled: true`)
|
|
49
|
+
|
|
50
|
+
### Fixed
|
|
51
|
+
|
|
52
|
+
- Fixed baseline path resolution in `baseline compare` to be consistent with `baseline show`
|
|
53
|
+
- Now checks both output directory and current working directory before failing
|
|
54
|
+
- Fixed `bellwether auth status` requiring a config file
|
|
55
|
+
- Auth commands now work without bellwether.yaml present
|
|
56
|
+
- Fixed ANSI escape codes appearing in non-TTY output (e.g., when piping to files)
|
|
57
|
+
- StreamingDisplay now checks for TTY before applying ANSI styling
|
|
58
|
+
- Automatically respects `NO_COLOR` and `FORCE_COLOR=0` environment variables
|
|
59
|
+
|
|
10
60
|
## [1.0.1] - 2026-01-29
|
|
11
61
|
|
|
12
62
|
### Added
|
package/README.md
CHANGED
|
@@ -124,8 +124,9 @@ Requires LLM (Ollama for free local, or OpenAI/Anthropic). Generates `AGENTS.md`
|
|
|
124
124
|
## GitHub Action
|
|
125
125
|
|
|
126
126
|
```yaml
|
|
127
|
-
- uses: dotsetlabs/bellwether@v1
|
|
127
|
+
- uses: dotsetlabs/bellwether@v1.0.2
|
|
128
128
|
with:
|
|
129
|
+
version: '1.0.2'
|
|
129
130
|
server-command: 'npx @mcp/your-server'
|
|
130
131
|
baseline-path: './bellwether-baseline.json'
|
|
131
132
|
fail-on-severity: 'warning'
|
|
@@ -167,7 +168,7 @@ bellwether init --preset local npx @mcp/server # Local Ollama (free)
|
|
|
167
168
|
|
|
168
169
|
```bash
|
|
169
170
|
git clone https://github.com/dotsetlabs/bellwether
|
|
170
|
-
cd bellwether
|
|
171
|
+
cd bellwether
|
|
171
172
|
npm install
|
|
172
173
|
npm run build
|
|
173
174
|
npm test
|
|
@@ -10,6 +10,8 @@ export interface CacheEntry<T> {
|
|
|
10
10
|
value: T;
|
|
11
11
|
/** When the entry was created */
|
|
12
12
|
createdAt: Date;
|
|
13
|
+
/** When the entry was last accessed */
|
|
14
|
+
lastAccessedAt: Date;
|
|
13
15
|
/** When the entry expires */
|
|
14
16
|
expiresAt: Date;
|
|
15
17
|
/** Cache key (hash) */
|
|
@@ -99,9 +101,9 @@ export declare class ResponseCache {
|
|
|
99
101
|
*/
|
|
100
102
|
private evictIfNeeded;
|
|
101
103
|
/**
|
|
102
|
-
* Evict the
|
|
104
|
+
* Evict the least recently used entry (LRU based on last access time).
|
|
103
105
|
*/
|
|
104
|
-
private
|
|
106
|
+
private evictLeastRecentlyUsed;
|
|
105
107
|
/**
|
|
106
108
|
* Estimate the size of a value in bytes.
|
|
107
109
|
*/
|
|
@@ -30,21 +30,9 @@ export class ResponseCache {
|
|
|
30
30
|
* Generate a cache key from input data.
|
|
31
31
|
*/
|
|
32
32
|
generateKey(...parts) {
|
|
33
|
-
const serialized = parts.map((p) =>
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
if (typeof p === 'undefined')
|
|
37
|
-
return 'undefined';
|
|
38
|
-
if (p === null)
|
|
39
|
-
return 'null';
|
|
40
|
-
try {
|
|
41
|
-
return JSON.stringify(p, Object.keys(p).sort());
|
|
42
|
-
}
|
|
43
|
-
catch {
|
|
44
|
-
return String(p);
|
|
45
|
-
}
|
|
46
|
-
}).join('|');
|
|
47
|
-
return createHash('sha256').update(serialized).digest('hex').slice(0, 16);
|
|
33
|
+
const serialized = parts.map((p) => stableStringify(p)).join('|');
|
|
34
|
+
// Use 128-bit hash (32 hex chars) to reduce collision risk.
|
|
35
|
+
return createHash('sha256').update(serialized).digest('hex').slice(0, 32);
|
|
48
36
|
}
|
|
49
37
|
/**
|
|
50
38
|
* Get an entry from cache.
|
|
@@ -66,6 +54,7 @@ export class ResponseCache {
|
|
|
66
54
|
return undefined;
|
|
67
55
|
}
|
|
68
56
|
entry.hitCount++;
|
|
57
|
+
entry.lastAccessedAt = new Date();
|
|
69
58
|
this.stats.hits++;
|
|
70
59
|
logger.debug({ key, hitCount: entry.hitCount }, 'Cache hit');
|
|
71
60
|
return entry.value;
|
|
@@ -86,6 +75,7 @@ export class ResponseCache {
|
|
|
86
75
|
const entry = {
|
|
87
76
|
value,
|
|
88
77
|
createdAt: now,
|
|
78
|
+
lastAccessedAt: now,
|
|
89
79
|
expiresAt: new Date(now.getTime() + ttl),
|
|
90
80
|
key,
|
|
91
81
|
description: options?.description,
|
|
@@ -168,31 +158,30 @@ export class ResponseCache {
|
|
|
168
158
|
evictIfNeeded(newEntrySize) {
|
|
169
159
|
// Check entry count
|
|
170
160
|
while (this.cache.size >= this.config.maxEntries) {
|
|
171
|
-
this.
|
|
161
|
+
this.evictLeastRecentlyUsed();
|
|
172
162
|
}
|
|
173
163
|
// Check size
|
|
174
|
-
while (this.totalSizeBytes + newEntrySize > this.config.maxSizeBytes &&
|
|
175
|
-
this.
|
|
176
|
-
this.evictOldest();
|
|
164
|
+
while (this.totalSizeBytes + newEntrySize > this.config.maxSizeBytes && this.cache.size > 0) {
|
|
165
|
+
this.evictLeastRecentlyUsed();
|
|
177
166
|
}
|
|
178
167
|
}
|
|
179
168
|
/**
|
|
180
|
-
* Evict the
|
|
169
|
+
* Evict the least recently used entry (LRU based on last access time).
|
|
181
170
|
*/
|
|
182
|
-
|
|
183
|
-
let
|
|
184
|
-
let
|
|
171
|
+
evictLeastRecentlyUsed() {
|
|
172
|
+
let lruKey;
|
|
173
|
+
let oldestAccessTime = Infinity;
|
|
185
174
|
for (const [key, entry] of this.cache) {
|
|
186
|
-
const time = entry.
|
|
187
|
-
if (time <
|
|
188
|
-
|
|
189
|
-
|
|
175
|
+
const time = entry.lastAccessedAt.getTime();
|
|
176
|
+
if (time < oldestAccessTime) {
|
|
177
|
+
oldestAccessTime = time;
|
|
178
|
+
lruKey = key;
|
|
190
179
|
}
|
|
191
180
|
}
|
|
192
|
-
if (
|
|
193
|
-
this.delete(
|
|
181
|
+
if (lruKey) {
|
|
182
|
+
this.delete(lruKey);
|
|
194
183
|
this.stats.evictions++;
|
|
195
|
-
logger.debug({ key:
|
|
184
|
+
logger.debug({ key: lruKey }, 'Evicted cache entry');
|
|
196
185
|
}
|
|
197
186
|
}
|
|
198
187
|
/**
|
|
@@ -207,6 +196,55 @@ export class ResponseCache {
|
|
|
207
196
|
}
|
|
208
197
|
}
|
|
209
198
|
}
|
|
199
|
+
/**
|
|
200
|
+
* Stable, deterministic JSON stringify with deep key sorting.
|
|
201
|
+
* Falls back to string conversion for unsupported types.
|
|
202
|
+
*/
|
|
203
|
+
function stableStringify(value) {
|
|
204
|
+
const seen = new WeakSet();
|
|
205
|
+
const normalize = (input) => {
|
|
206
|
+
if (input === null || input === undefined)
|
|
207
|
+
return input;
|
|
208
|
+
const type = typeof input;
|
|
209
|
+
if (type === 'string' || type === 'number' || type === 'boolean') {
|
|
210
|
+
return input;
|
|
211
|
+
}
|
|
212
|
+
if (type === 'bigint') {
|
|
213
|
+
return input.toString();
|
|
214
|
+
}
|
|
215
|
+
if (type === 'symbol' || type === 'function') {
|
|
216
|
+
return String(input);
|
|
217
|
+
}
|
|
218
|
+
if (input instanceof Date) {
|
|
219
|
+
return input.toISOString();
|
|
220
|
+
}
|
|
221
|
+
if (Array.isArray(input)) {
|
|
222
|
+
return input.map((item) => normalize(item));
|
|
223
|
+
}
|
|
224
|
+
if (typeof input === 'object') {
|
|
225
|
+
const obj = input;
|
|
226
|
+
if (seen.has(obj)) {
|
|
227
|
+
return '[Circular]';
|
|
228
|
+
}
|
|
229
|
+
seen.add(obj);
|
|
230
|
+
const keys = Object.keys(obj).sort();
|
|
231
|
+
const normalized = {};
|
|
232
|
+
for (const key of keys) {
|
|
233
|
+
normalized[key] = normalize(obj[key]);
|
|
234
|
+
}
|
|
235
|
+
return normalized;
|
|
236
|
+
}
|
|
237
|
+
try {
|
|
238
|
+
return JSON.parse(JSON.stringify(input));
|
|
239
|
+
}
|
|
240
|
+
catch {
|
|
241
|
+
return String(input);
|
|
242
|
+
}
|
|
243
|
+
};
|
|
244
|
+
const normalized = normalize(value);
|
|
245
|
+
const json = JSON.stringify(normalized);
|
|
246
|
+
return json === undefined ? 'undefined' : json;
|
|
247
|
+
}
|
|
210
248
|
/**
|
|
211
249
|
* Specialized cache for tool responses.
|
|
212
250
|
*/
|
|
@@ -146,12 +146,31 @@ baselineCommand
|
|
|
146
146
|
output.error('No baseline path provided. Set baseline.path or baseline.comparePath in config, or pass a path argument.');
|
|
147
147
|
process.exit(EXIT_CODES.ERROR);
|
|
148
148
|
}
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
149
|
+
// Resolve baseline path consistently with 'show' command:
|
|
150
|
+
// 1. If absolute path, use as-is
|
|
151
|
+
// 2. First try relative to outputDir (e.g., .bellwether/)
|
|
152
|
+
// 3. Fall back to relative to cwd
|
|
153
|
+
let fullBaselinePath;
|
|
154
|
+
if (resolvedBaselinePath.startsWith('/')) {
|
|
155
|
+
fullBaselinePath = resolvedBaselinePath;
|
|
156
|
+
}
|
|
157
|
+
else {
|
|
158
|
+
const outputDirPath = join(outputDir, resolvedBaselinePath);
|
|
159
|
+
const cwdPath = join(process.cwd(), resolvedBaselinePath);
|
|
160
|
+
if (existsSync(outputDirPath)) {
|
|
161
|
+
fullBaselinePath = outputDirPath;
|
|
162
|
+
}
|
|
163
|
+
else if (existsSync(cwdPath)) {
|
|
164
|
+
fullBaselinePath = cwdPath;
|
|
165
|
+
}
|
|
166
|
+
else {
|
|
167
|
+
// Default to outputDir path for error message consistency
|
|
168
|
+
fullBaselinePath = outputDirPath;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
153
171
|
if (!existsSync(fullBaselinePath)) {
|
|
154
172
|
output.error(`Baseline not found: ${fullBaselinePath}`);
|
|
173
|
+
output.error('\nRun `bellwether baseline save` to create a baseline.');
|
|
155
174
|
process.exit(EXIT_CODES.ERROR);
|
|
156
175
|
}
|
|
157
176
|
let previousBaseline;
|
|
@@ -13,7 +13,7 @@ import { MCPClient } from '../../transport/mcp-client.js';
|
|
|
13
13
|
import { discover } from '../../discovery/discovery.js';
|
|
14
14
|
import { Interviewer } from '../../interview/interviewer.js';
|
|
15
15
|
import { generateContractMd, generateJsonReport } from '../../docs/generator.js';
|
|
16
|
-
import { loadConfig, ConfigNotFoundError, parseCommandString } from '../../config/loader.js';
|
|
16
|
+
import { loadConfig, ConfigNotFoundError, parseCommandString, } from '../../config/loader.js';
|
|
17
17
|
import { validateConfigForCheck, getConfigWarnings } from '../../config/validator.js';
|
|
18
18
|
import { createBaseline, loadBaseline, saveBaseline, getToolFingerprints, toToolCapability, compareBaselines, acceptDrift, formatDiffText, formatDiffJson, formatDiffCompact, formatDiffGitHubActions, formatDiffMarkdown, formatDiffJUnit, formatDiffSarif, applySeverityConfig, shouldFailOnDiff, analyzeForIncremental, formatIncrementalSummary, runSecurityTests, parseSecurityCategories, getAllSecurityCategories, } from '../../baseline/index.js';
|
|
19
19
|
import { convertAssertions } from '../../baseline/converter.js';
|
|
@@ -21,7 +21,7 @@ import { getMetricsCollector, resetMetricsCollector } from '../../metrics/collec
|
|
|
21
21
|
import { getGlobalCache, resetGlobalCache } from '../../cache/response-cache.js';
|
|
22
22
|
import { InterviewProgressBar, formatCheckBanner } from '../utils/progress.js';
|
|
23
23
|
import { buildCheckSummary, colorizeConfidence, formatConfidenceLevel, formatToolResultLine, } from '../output/terminal-reporter.js';
|
|
24
|
-
import { loadScenariosFromFile, tryLoadDefaultScenarios, DEFAULT_SCENARIOS_FILE } from '../../scenarios/index.js';
|
|
24
|
+
import { loadScenariosFromFile, tryLoadDefaultScenarios, DEFAULT_SCENARIOS_FILE, } from '../../scenarios/index.js';
|
|
25
25
|
import { loadWorkflowsFromFile, tryLoadDefaultWorkflows, DEFAULT_WORKFLOWS_FILE, WorkflowExecutor, generateWorkflowsFromTools, generateWorkflowYamlContent, } from '../../workflow/index.js';
|
|
26
26
|
import * as output from '../output.js';
|
|
27
27
|
import { extractServerContextFromArgs } from '../utils/server-context.js';
|
|
@@ -73,14 +73,6 @@ export const checkCommand = new Command('check')
|
|
|
73
73
|
output.error(error instanceof Error ? error.message : String(error));
|
|
74
74
|
process.exit(EXIT_CODES.ERROR);
|
|
75
75
|
}
|
|
76
|
-
const warnings = getConfigWarnings(config);
|
|
77
|
-
if (warnings.length > 0) {
|
|
78
|
-
output.warn('Configuration warnings:');
|
|
79
|
-
for (const warning of warnings) {
|
|
80
|
-
output.warn(` - ${warning}`);
|
|
81
|
-
}
|
|
82
|
-
output.newline();
|
|
83
|
-
}
|
|
84
76
|
// Extract settings from config
|
|
85
77
|
const timeout = config.server.timeout;
|
|
86
78
|
const outputDir = config.output.dir;
|
|
@@ -105,7 +97,8 @@ export const checkCommand = new Command('check')
|
|
|
105
97
|
minimumSeverity: options.minSeverity ?? config.baseline.severity.minimumSeverity,
|
|
106
98
|
failOnSeverity: options.failOnSeverity ?? config.baseline.severity.failOnSeverity,
|
|
107
99
|
suppressWarnings: config.baseline.severity.suppressWarnings,
|
|
108
|
-
aspectOverrides: config.baseline.severity
|
|
100
|
+
aspectOverrides: config.baseline.severity
|
|
101
|
+
.aspectOverrides,
|
|
109
102
|
};
|
|
110
103
|
// Resolve check options from config (no CLI overrides for these)
|
|
111
104
|
const incrementalEnabled = config.check.incremental;
|
|
@@ -114,9 +107,26 @@ export const checkCommand = new Command('check')
|
|
|
114
107
|
const parallelWorkers = config.check.parallelWorkers;
|
|
115
108
|
const performanceThreshold = config.check.performanceThreshold / PERCENTAGE_CONVERSION.DIVISOR;
|
|
116
109
|
const diffFormat = options.format ?? config.check.diffFormat;
|
|
110
|
+
const machineReadableFormats = new Set(['json', 'junit', 'sarif']);
|
|
111
|
+
const machineReadable = machineReadableFormats.has(String(diffFormat).toLowerCase());
|
|
112
|
+
if (machineReadable) {
|
|
113
|
+
// Suppress standard CLI output to keep stdout clean for machine-readable formats.
|
|
114
|
+
output.configureOutput({ quiet: true });
|
|
115
|
+
}
|
|
116
|
+
const warnings = getConfigWarnings(config);
|
|
117
|
+
if (warnings.length > 0) {
|
|
118
|
+
output.warn('Configuration warnings:');
|
|
119
|
+
for (const warning of warnings) {
|
|
120
|
+
output.warn(` - ${warning}`);
|
|
121
|
+
}
|
|
122
|
+
if (!machineReadable) {
|
|
123
|
+
output.newline();
|
|
124
|
+
}
|
|
125
|
+
}
|
|
117
126
|
// Resolve security options from config
|
|
118
127
|
const securityEnabled = config.check.security.enabled;
|
|
119
|
-
let securityCategories = config.check.security
|
|
128
|
+
let securityCategories = config.check.security
|
|
129
|
+
.categories;
|
|
120
130
|
// Validate security categories
|
|
121
131
|
try {
|
|
122
132
|
securityCategories = parseSecurityCategories(securityCategories.join(','));
|
|
@@ -141,13 +151,15 @@ export const checkCommand = new Command('check')
|
|
|
141
151
|
? `${serverCommand} ${args.join(' ')}`.trim()
|
|
142
152
|
: (remoteUrl ?? 'unknown');
|
|
143
153
|
// Display startup banner
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
154
|
+
if (!machineReadable) {
|
|
155
|
+
const banner = formatCheckBanner({
|
|
156
|
+
serverCommand: serverIdentifier,
|
|
157
|
+
});
|
|
158
|
+
output.info(banner);
|
|
159
|
+
output.newline();
|
|
160
|
+
output.info('Check: Schema validation and drift detection (free, deterministic)');
|
|
161
|
+
output.newline();
|
|
162
|
+
}
|
|
151
163
|
// Initialize metrics collector
|
|
152
164
|
resetMetricsCollector();
|
|
153
165
|
const metricsCollector = getMetricsCollector();
|
|
@@ -182,9 +194,12 @@ export const checkCommand = new Command('check')
|
|
|
182
194
|
}
|
|
183
195
|
// Discovery phase
|
|
184
196
|
output.info('Discovering capabilities...');
|
|
185
|
-
const discovery = await discover(mcpClient, transport === 'stdio' ? serverCommand : remoteUrl ?? serverCommand, transport === 'stdio' ? args : []);
|
|
197
|
+
const discovery = await discover(mcpClient, transport === 'stdio' ? serverCommand : (remoteUrl ?? serverCommand), transport === 'stdio' ? args : []);
|
|
186
198
|
const resourceCount = discovery.resources?.length ?? 0;
|
|
187
|
-
const discoveryParts = [
|
|
199
|
+
const discoveryParts = [
|
|
200
|
+
`${discovery.tools.length} tools`,
|
|
201
|
+
`${discovery.prompts.length} prompts`,
|
|
202
|
+
];
|
|
188
203
|
if (resourceCount > 0) {
|
|
189
204
|
discoveryParts.push(`${resourceCount} resources`);
|
|
190
205
|
}
|
|
@@ -228,7 +243,9 @@ export const checkCommand = new Command('check')
|
|
|
228
243
|
}
|
|
229
244
|
else {
|
|
230
245
|
incrementalBaseline = loadBaseline(baselinePath);
|
|
231
|
-
const result = analyzeForIncremental(discovery.tools, incrementalBaseline, {
|
|
246
|
+
const result = analyzeForIncremental(discovery.tools, incrementalBaseline, {
|
|
247
|
+
maxCacheAgeHours: incrementalCacheHours,
|
|
248
|
+
});
|
|
232
249
|
incrementalResult = result;
|
|
233
250
|
const summary = formatIncrementalSummary(result.changeSummary);
|
|
234
251
|
output.info(`Incremental analysis: ${summary}`);
|
|
@@ -240,7 +257,7 @@ export const checkCommand = new Command('check')
|
|
|
240
257
|
else {
|
|
241
258
|
output.info(`Testing ${result.toolsToTest.length} tools (${result.toolsToSkip.length} cached)\n`);
|
|
242
259
|
// Filter discovery to only include tools that need testing
|
|
243
|
-
discovery.tools = discovery.tools.filter(t => result.toolsToTest.includes(t.name));
|
|
260
|
+
discovery.tools = discovery.tools.filter((t) => result.toolsToTest.includes(t.name));
|
|
244
261
|
}
|
|
245
262
|
}
|
|
246
263
|
}
|
|
@@ -323,7 +340,7 @@ export const checkCommand = new Command('check')
|
|
|
323
340
|
interviewer.setServerContext(serverContext);
|
|
324
341
|
}
|
|
325
342
|
// Set up progress display
|
|
326
|
-
const progressBar = new InterviewProgressBar({ enabled: !verbose });
|
|
343
|
+
const progressBar = new InterviewProgressBar({ enabled: !verbose && !machineReadable });
|
|
327
344
|
const reportedTools = new Set();
|
|
328
345
|
const progressCallback = (progress) => {
|
|
329
346
|
if (verbose) {
|
|
@@ -462,7 +479,7 @@ export const checkCommand = new Command('check')
|
|
|
462
479
|
try {
|
|
463
480
|
const response = await mcpClient.callTool(tool.name, args);
|
|
464
481
|
const content = response.content
|
|
465
|
-
.map((c) => c.type === 'text' ? c.text : '')
|
|
482
|
+
.map((c) => (c.type === 'text' ? c.text : ''))
|
|
466
483
|
.join('\n');
|
|
467
484
|
return {
|
|
468
485
|
isError: response.isError ?? false,
|
|
@@ -580,7 +597,7 @@ export const checkCommand = new Command('check')
|
|
|
580
597
|
const workflowResult = await workflowExecutor.execute(workflow);
|
|
581
598
|
workflowResults.push(workflowResult);
|
|
582
599
|
const statusIcon = workflowResult.success ? '\u2713' : '\u2717';
|
|
583
|
-
const stepsInfo = `${workflowResult.steps.filter(s => s.success).length}/${workflow.steps.length} steps`;
|
|
600
|
+
const stepsInfo = `${workflowResult.steps.filter((s) => s.success).length}/${workflow.steps.length} steps`;
|
|
584
601
|
if (workflowResult.success) {
|
|
585
602
|
output.success(` ${statusIcon} ${workflow.name} (${stepsInfo}) - ${workflowResult.durationMs}ms`);
|
|
586
603
|
}
|
|
@@ -599,7 +616,7 @@ export const checkCommand = new Command('check')
|
|
|
599
616
|
}
|
|
600
617
|
}
|
|
601
618
|
// Workflow summary
|
|
602
|
-
const passed = workflowResults.filter(r => r.success).length;
|
|
619
|
+
const passed = workflowResults.filter((r) => r.success).length;
|
|
603
620
|
const failed = workflowResults.length - passed;
|
|
604
621
|
output.newline();
|
|
605
622
|
if (failed === 0) {
|
|
@@ -631,9 +648,7 @@ export const checkCommand = new Command('check')
|
|
|
631
648
|
}
|
|
632
649
|
if (writeJson) {
|
|
633
650
|
// Add workflow results to the result object for the JSON report
|
|
634
|
-
const resultWithWorkflows = workflowResults.length > 0
|
|
635
|
-
? { ...result, workflowResults }
|
|
636
|
-
: result;
|
|
651
|
+
const resultWithWorkflows = workflowResults.length > 0 ? { ...result, workflowResults } : result;
|
|
637
652
|
let jsonReport;
|
|
638
653
|
try {
|
|
639
654
|
jsonReport = generateJsonReport(resultWithWorkflows, {
|
|
@@ -671,10 +686,7 @@ export const checkCommand = new Command('check')
|
|
|
671
686
|
if (incrementalResult && incrementalResult.cachedFingerprints.length > 0) {
|
|
672
687
|
// Merge new fingerprints with cached ones
|
|
673
688
|
const cachedTools = incrementalResult.cachedFingerprints.map(toToolCapability);
|
|
674
|
-
const mergedTools = [
|
|
675
|
-
...currentBaseline.capabilities.tools,
|
|
676
|
-
...cachedTools,
|
|
677
|
-
].sort((a, b) => a.name.localeCompare(b.name));
|
|
689
|
+
const mergedTools = [...currentBaseline.capabilities.tools, ...cachedTools].sort((a, b) => a.name.localeCompare(b.name));
|
|
678
690
|
currentBaseline = {
|
|
679
691
|
...currentBaseline,
|
|
680
692
|
capabilities: {
|
|
@@ -768,6 +780,18 @@ export const checkCommand = new Command('check')
|
|
|
768
780
|
saveBaseline(currentBaseline, saveBaselinePath);
|
|
769
781
|
output.info(`\nBaseline saved: ${saveBaselinePath}`);
|
|
770
782
|
}
|
|
783
|
+
// Output formatted results for sarif/junit when no baseline comparison
|
|
784
|
+
// This allows CI systems to consume check results even without drift detection
|
|
785
|
+
if (!baselinePath) {
|
|
786
|
+
const formattedCheckResults = formatCheckResults(currentBaseline, diffFormat);
|
|
787
|
+
if (formattedCheckResults) {
|
|
788
|
+
if (!machineReadable) {
|
|
789
|
+
output.info('\n--- Check Results ---');
|
|
790
|
+
}
|
|
791
|
+
// Output directly to stdout for machine-readable formats
|
|
792
|
+
console.log(formattedCheckResults);
|
|
793
|
+
}
|
|
794
|
+
}
|
|
771
795
|
// Handle baseline comparison
|
|
772
796
|
if (baselinePath) {
|
|
773
797
|
if (!existsSync(baselinePath)) {
|
|
@@ -780,10 +804,17 @@ export const checkCommand = new Command('check')
|
|
|
780
804
|
});
|
|
781
805
|
// Apply severity configuration (filtering, overrides)
|
|
782
806
|
const diff = applySeverityConfig(rawDiff, severityConfig);
|
|
783
|
-
|
|
807
|
+
if (!machineReadable) {
|
|
808
|
+
output.info('\n--- Drift Report ---');
|
|
809
|
+
}
|
|
784
810
|
// Select formatter based on --format option
|
|
785
811
|
const formattedDiff = formatDiff(diff, diffFormat, baselinePath);
|
|
786
|
-
|
|
812
|
+
if (machineReadable) {
|
|
813
|
+
console.log(formattedDiff);
|
|
814
|
+
}
|
|
815
|
+
else {
|
|
816
|
+
output.info(formattedDiff);
|
|
817
|
+
}
|
|
787
818
|
// Report performance regressions if detected
|
|
788
819
|
if (diff.performanceReport?.hasRegressions) {
|
|
789
820
|
output.warn('\n--- Performance Regressions ---');
|
|
@@ -919,4 +950,149 @@ function formatDiff(diff, format, baselinePath) {
|
|
|
919
950
|
return formatDiffText(diff);
|
|
920
951
|
}
|
|
921
952
|
}
|
|
953
|
+
/**
|
|
954
|
+
* Format check results as JUnit XML (for CI systems that expect test results).
|
|
955
|
+
* This is used when --format junit is specified but no baseline comparison occurs.
|
|
956
|
+
*/
|
|
957
|
+
function formatCheckResultsJUnit(baseline) {
|
|
958
|
+
const tools = getToolFingerprints(baseline);
|
|
959
|
+
const lines = [];
|
|
960
|
+
const securityFailures = tools.filter((t) => t.securityFingerprint?.findings?.some((f) => f.riskLevel === 'critical' || f.riskLevel === 'high')).length;
|
|
961
|
+
lines.push('<?xml version="1.0" encoding="UTF-8"?>');
|
|
962
|
+
lines.push('<testsuites>');
|
|
963
|
+
lines.push(` <testsuite name="bellwether-check" tests="${tools.length}" failures="${securityFailures}" errors="0">`);
|
|
964
|
+
for (const tool of tools) {
|
|
965
|
+
const successRate = tool.baselineSuccessRate ?? 1;
|
|
966
|
+
const status = successRate >= 0.9 ? 'passed' : 'warning';
|
|
967
|
+
lines.push(` <testcase name="${tool.name}" classname="mcp-tools" time="0">`);
|
|
968
|
+
lines.push(` <system-out>Success rate: ${(successRate * 100).toFixed(0)}%</system-out>`);
|
|
969
|
+
if (status === 'warning') {
|
|
970
|
+
lines.push(` <system-err>Tool has success rate below 90%</system-err>`);
|
|
971
|
+
}
|
|
972
|
+
lines.push(' </testcase>');
|
|
973
|
+
}
|
|
974
|
+
// Add security findings as test cases if present
|
|
975
|
+
const securityTools = tools.filter((t) => t.securityFingerprint?.findings?.length);
|
|
976
|
+
if (securityTools.length > 0) {
|
|
977
|
+
lines.push(` <!-- Security findings -->`);
|
|
978
|
+
for (const tool of securityTools) {
|
|
979
|
+
const findings = tool.securityFingerprint?.findings ?? [];
|
|
980
|
+
const criticalHigh = findings.filter((f) => f.riskLevel === 'critical' || f.riskLevel === 'high').length;
|
|
981
|
+
if (criticalHigh > 0) {
|
|
982
|
+
lines.push(` <testcase name="${tool.name}-security" classname="security">`);
|
|
983
|
+
lines.push(` <failure message="${criticalHigh} critical/high security findings">`);
|
|
984
|
+
for (const finding of findings.filter((f) => f.riskLevel === 'critical' || f.riskLevel === 'high')) {
|
|
985
|
+
lines.push(` ${finding.riskLevel.toUpperCase()}: ${finding.title} (${finding.cweId})`);
|
|
986
|
+
}
|
|
987
|
+
lines.push(` </failure>`);
|
|
988
|
+
lines.push(' </testcase>');
|
|
989
|
+
}
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
lines.push(' </testsuite>');
|
|
993
|
+
lines.push('</testsuites>');
|
|
994
|
+
return lines.join('\n');
|
|
995
|
+
}
|
|
996
|
+
/**
|
|
997
|
+
* Format check results as SARIF (for GitHub Code Scanning and other tools).
|
|
998
|
+
* This is used when --format sarif is specified but no baseline comparison occurs.
|
|
999
|
+
*/
|
|
1000
|
+
function formatCheckResultsSarif(baseline) {
|
|
1001
|
+
const tools = getToolFingerprints(baseline);
|
|
1002
|
+
const serverUri = baseline.metadata?.serverCommand || baseline.server.name || 'mcp-server';
|
|
1003
|
+
const results = [];
|
|
1004
|
+
// Add results for tools with security findings
|
|
1005
|
+
const securityTools = tools.filter((t) => t.securityFingerprint?.findings?.length);
|
|
1006
|
+
for (const tool of securityTools) {
|
|
1007
|
+
const findings = tool.securityFingerprint?.findings ?? [];
|
|
1008
|
+
for (const finding of findings) {
|
|
1009
|
+
const level = finding.riskLevel === 'critical' || finding.riskLevel === 'high'
|
|
1010
|
+
? 'error'
|
|
1011
|
+
: finding.riskLevel === 'medium'
|
|
1012
|
+
? 'warning'
|
|
1013
|
+
: 'note';
|
|
1014
|
+
results.push({
|
|
1015
|
+
ruleId: finding.cweId || 'BWH-SEC',
|
|
1016
|
+
level,
|
|
1017
|
+
message: { text: `[${tool.name}] ${finding.title}: ${finding.description}` },
|
|
1018
|
+
locations: [
|
|
1019
|
+
{
|
|
1020
|
+
physicalLocation: {
|
|
1021
|
+
artifactLocation: { uri: serverUri },
|
|
1022
|
+
region: { startLine: 1 },
|
|
1023
|
+
},
|
|
1024
|
+
},
|
|
1025
|
+
],
|
|
1026
|
+
});
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
// Add results for tools with low success rate
|
|
1030
|
+
for (const tool of tools) {
|
|
1031
|
+
const successRate = tool.baselineSuccessRate ?? 1;
|
|
1032
|
+
if (successRate < 0.9) {
|
|
1033
|
+
results.push({
|
|
1034
|
+
ruleId: 'BWH-REL',
|
|
1035
|
+
level: 'warning',
|
|
1036
|
+
message: {
|
|
1037
|
+
text: `Tool "${tool.name}" has ${(successRate * 100).toFixed(0)}% success rate`,
|
|
1038
|
+
},
|
|
1039
|
+
locations: [
|
|
1040
|
+
{
|
|
1041
|
+
physicalLocation: {
|
|
1042
|
+
artifactLocation: { uri: serverUri },
|
|
1043
|
+
region: { startLine: 1 },
|
|
1044
|
+
},
|
|
1045
|
+
},
|
|
1046
|
+
],
|
|
1047
|
+
});
|
|
1048
|
+
}
|
|
1049
|
+
}
|
|
1050
|
+
const sarif = {
|
|
1051
|
+
$schema: 'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json',
|
|
1052
|
+
version: '2.1.0',
|
|
1053
|
+
runs: [
|
|
1054
|
+
{
|
|
1055
|
+
tool: {
|
|
1056
|
+
driver: {
|
|
1057
|
+
name: 'bellwether',
|
|
1058
|
+
version: '1.0.0',
|
|
1059
|
+
informationUri: 'https://github.com/dotsetlabs/bellwether',
|
|
1060
|
+
rules: [
|
|
1061
|
+
{
|
|
1062
|
+
id: 'BWH-SEC',
|
|
1063
|
+
name: 'SecurityFinding',
|
|
1064
|
+
shortDescription: { text: 'Security vulnerability detected' },
|
|
1065
|
+
defaultConfiguration: { level: 'warning' },
|
|
1066
|
+
},
|
|
1067
|
+
{
|
|
1068
|
+
id: 'BWH-REL',
|
|
1069
|
+
name: 'LowReliability',
|
|
1070
|
+
shortDescription: { text: 'Tool reliability below threshold' },
|
|
1071
|
+
defaultConfiguration: { level: 'warning' },
|
|
1072
|
+
},
|
|
1073
|
+
],
|
|
1074
|
+
},
|
|
1075
|
+
},
|
|
1076
|
+
results,
|
|
1077
|
+
},
|
|
1078
|
+
],
|
|
1079
|
+
};
|
|
1080
|
+
return JSON.stringify(sarif, null, 2);
|
|
1081
|
+
}
|
|
1082
|
+
/**
|
|
1083
|
+
* Format check results using the specified output format.
|
|
1084
|
+
* Used when no baseline comparison occurs.
|
|
1085
|
+
*/
|
|
1086
|
+
function formatCheckResults(baseline, format) {
|
|
1087
|
+
switch (format.toLowerCase()) {
|
|
1088
|
+
case 'junit':
|
|
1089
|
+
case 'junit-xml':
|
|
1090
|
+
case 'xml':
|
|
1091
|
+
return formatCheckResultsJUnit(baseline);
|
|
1092
|
+
case 'sarif':
|
|
1093
|
+
return formatCheckResultsSarif(baseline);
|
|
1094
|
+
default:
|
|
1095
|
+
return null; // No special formatting needed for other formats
|
|
1096
|
+
}
|
|
1097
|
+
}
|
|
922
1098
|
//# sourceMappingURL=check.js.map
|