@dotsetlabs/bellwether 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -0
- package/README.md +3 -2
- package/dist/cache/response-cache.d.ts +4 -2
- package/dist/cache/response-cache.js +68 -30
- package/dist/cli/commands/check.js +78 -49
- package/dist/cli/index.js +5 -3
- package/dist/interview/interviewer.js +70 -50
- package/dist/interview/orchestrator.js +49 -22
- package/dist/llm/anthropic.js +49 -16
- package/dist/llm/client.d.ts +2 -0
- package/dist/llm/client.js +61 -0
- package/dist/llm/ollama.js +9 -4
- package/dist/llm/openai.js +34 -23
- package/dist/transport/base-transport.d.ts +1 -1
- package/dist/transport/http-transport.d.ts +2 -2
- package/dist/transport/http-transport.js +26 -6
- package/dist/transport/mcp-client.d.ts +18 -6
- package/dist/transport/mcp-client.js +49 -19
- package/dist/transport/sse-transport.d.ts +1 -1
- package/dist/transport/sse-transport.js +4 -2
- package/dist/transport/stdio-transport.d.ts +1 -1
- package/dist/transport/stdio-transport.js +1 -1
- package/dist/utils/timeout.d.ts +10 -2
- package/dist/utils/timeout.js +9 -5
- package/dist/version.js +1 -1
- package/dist/workflow/executor.js +18 -13
- package/dist/workflow/loader.js +4 -1
- package/dist/workflow/state-tracker.js +22 -18
- package/man/bellwether.1 +204 -0
- package/man/bellwether.1.md +148 -0
- package/package.json +6 -7
package/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [1.0.3] - 2026-02-02
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Added `version` input to GitHub Action for explicit npm version selection
|
|
15
|
+
- Action now derives version from ref (e.g., `v1.0.3`) or accepts explicit `inputs.version`
|
|
16
|
+
- Provides clear error message when version cannot be determined
|
|
17
|
+
- Added `signal` option to LLM completion requests for request cancellation via AbortSignal
|
|
18
|
+
- Added AbortController integration to timeout utilities for proper request cancellation
|
|
19
|
+
- Added JSON extraction from mixed LLM responses (handles prose around JSON blocks)
|
|
20
|
+
|
|
21
|
+
### Changed
|
|
22
|
+
|
|
23
|
+
- Improved timeout handling with AbortController propagation across LLM and transport layers
|
|
24
|
+
- Improved error handling and resource cleanup in interview, orchestrator, and transport modules
|
|
25
|
+
- Refactored response cache, workflow executor, and state tracker for better reliability
|
|
26
|
+
- Updated CI/CD and GitHub/GitLab integration documentation
|
|
27
|
+
|
|
28
|
+
### Fixed
|
|
29
|
+
|
|
30
|
+
- Fixed GitHub Action stderr handling in check command output capture
|
|
31
|
+
- Fixed various code formatting and linting issues across LLM clients and transport modules
|
|
32
|
+
|
|
10
33
|
## [1.0.2] - 2026-01-30
|
|
11
34
|
|
|
12
35
|
### Added
|
package/README.md
CHANGED
|
@@ -124,8 +124,9 @@ Requires LLM (Ollama for free local, or OpenAI/Anthropic). Generates `AGENTS.md`
|
|
|
124
124
|
## GitHub Action
|
|
125
125
|
|
|
126
126
|
```yaml
|
|
127
|
-
- uses: dotsetlabs/bellwether@v1
|
|
127
|
+
- uses: dotsetlabs/bellwether@v1.0.2
|
|
128
128
|
with:
|
|
129
|
+
version: '1.0.2'
|
|
129
130
|
server-command: 'npx @mcp/your-server'
|
|
130
131
|
baseline-path: './bellwether-baseline.json'
|
|
131
132
|
fail-on-severity: 'warning'
|
|
@@ -167,7 +168,7 @@ bellwether init --preset local npx @mcp/server # Local Ollama (free)
|
|
|
167
168
|
|
|
168
169
|
```bash
|
|
169
170
|
git clone https://github.com/dotsetlabs/bellwether
|
|
170
|
-
cd bellwether
|
|
171
|
+
cd bellwether
|
|
171
172
|
npm install
|
|
172
173
|
npm run build
|
|
173
174
|
npm test
|
|
@@ -10,6 +10,8 @@ export interface CacheEntry<T> {
|
|
|
10
10
|
value: T;
|
|
11
11
|
/** When the entry was created */
|
|
12
12
|
createdAt: Date;
|
|
13
|
+
/** When the entry was last accessed */
|
|
14
|
+
lastAccessedAt: Date;
|
|
13
15
|
/** When the entry expires */
|
|
14
16
|
expiresAt: Date;
|
|
15
17
|
/** Cache key (hash) */
|
|
@@ -99,9 +101,9 @@ export declare class ResponseCache {
|
|
|
99
101
|
*/
|
|
100
102
|
private evictIfNeeded;
|
|
101
103
|
/**
|
|
102
|
-
* Evict the
|
|
104
|
+
* Evict the least recently used entry (LRU based on last access time).
|
|
103
105
|
*/
|
|
104
|
-
private
|
|
106
|
+
private evictLeastRecentlyUsed;
|
|
105
107
|
/**
|
|
106
108
|
* Estimate the size of a value in bytes.
|
|
107
109
|
*/
|
|
@@ -30,21 +30,9 @@ export class ResponseCache {
|
|
|
30
30
|
* Generate a cache key from input data.
|
|
31
31
|
*/
|
|
32
32
|
generateKey(...parts) {
|
|
33
|
-
const serialized = parts.map((p) =>
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
if (typeof p === 'undefined')
|
|
37
|
-
return 'undefined';
|
|
38
|
-
if (p === null)
|
|
39
|
-
return 'null';
|
|
40
|
-
try {
|
|
41
|
-
return JSON.stringify(p, Object.keys(p).sort());
|
|
42
|
-
}
|
|
43
|
-
catch {
|
|
44
|
-
return String(p);
|
|
45
|
-
}
|
|
46
|
-
}).join('|');
|
|
47
|
-
return createHash('sha256').update(serialized).digest('hex').slice(0, 16);
|
|
33
|
+
const serialized = parts.map((p) => stableStringify(p)).join('|');
|
|
34
|
+
// Use 128-bit hash (32 hex chars) to reduce collision risk.
|
|
35
|
+
return createHash('sha256').update(serialized).digest('hex').slice(0, 32);
|
|
48
36
|
}
|
|
49
37
|
/**
|
|
50
38
|
* Get an entry from cache.
|
|
@@ -66,6 +54,7 @@ export class ResponseCache {
|
|
|
66
54
|
return undefined;
|
|
67
55
|
}
|
|
68
56
|
entry.hitCount++;
|
|
57
|
+
entry.lastAccessedAt = new Date();
|
|
69
58
|
this.stats.hits++;
|
|
70
59
|
logger.debug({ key, hitCount: entry.hitCount }, 'Cache hit');
|
|
71
60
|
return entry.value;
|
|
@@ -86,6 +75,7 @@ export class ResponseCache {
|
|
|
86
75
|
const entry = {
|
|
87
76
|
value,
|
|
88
77
|
createdAt: now,
|
|
78
|
+
lastAccessedAt: now,
|
|
89
79
|
expiresAt: new Date(now.getTime() + ttl),
|
|
90
80
|
key,
|
|
91
81
|
description: options?.description,
|
|
@@ -168,31 +158,30 @@ export class ResponseCache {
|
|
|
168
158
|
evictIfNeeded(newEntrySize) {
|
|
169
159
|
// Check entry count
|
|
170
160
|
while (this.cache.size >= this.config.maxEntries) {
|
|
171
|
-
this.
|
|
161
|
+
this.evictLeastRecentlyUsed();
|
|
172
162
|
}
|
|
173
163
|
// Check size
|
|
174
|
-
while (this.totalSizeBytes + newEntrySize > this.config.maxSizeBytes &&
|
|
175
|
-
this.
|
|
176
|
-
this.evictOldest();
|
|
164
|
+
while (this.totalSizeBytes + newEntrySize > this.config.maxSizeBytes && this.cache.size > 0) {
|
|
165
|
+
this.evictLeastRecentlyUsed();
|
|
177
166
|
}
|
|
178
167
|
}
|
|
179
168
|
/**
|
|
180
|
-
* Evict the
|
|
169
|
+
* Evict the least recently used entry (LRU based on last access time).
|
|
181
170
|
*/
|
|
182
|
-
|
|
183
|
-
let
|
|
184
|
-
let
|
|
171
|
+
evictLeastRecentlyUsed() {
|
|
172
|
+
let lruKey;
|
|
173
|
+
let oldestAccessTime = Infinity;
|
|
185
174
|
for (const [key, entry] of this.cache) {
|
|
186
|
-
const time = entry.
|
|
187
|
-
if (time <
|
|
188
|
-
|
|
189
|
-
|
|
175
|
+
const time = entry.lastAccessedAt.getTime();
|
|
176
|
+
if (time < oldestAccessTime) {
|
|
177
|
+
oldestAccessTime = time;
|
|
178
|
+
lruKey = key;
|
|
190
179
|
}
|
|
191
180
|
}
|
|
192
|
-
if (
|
|
193
|
-
this.delete(
|
|
181
|
+
if (lruKey) {
|
|
182
|
+
this.delete(lruKey);
|
|
194
183
|
this.stats.evictions++;
|
|
195
|
-
logger.debug({ key:
|
|
184
|
+
logger.debug({ key: lruKey }, 'Evicted cache entry');
|
|
196
185
|
}
|
|
197
186
|
}
|
|
198
187
|
/**
|
|
@@ -207,6 +196,55 @@ export class ResponseCache {
|
|
|
207
196
|
}
|
|
208
197
|
}
|
|
209
198
|
}
|
|
199
|
+
/**
|
|
200
|
+
* Stable, deterministic JSON stringify with deep key sorting.
|
|
201
|
+
* Falls back to string conversion for unsupported types.
|
|
202
|
+
*/
|
|
203
|
+
function stableStringify(value) {
|
|
204
|
+
const seen = new WeakSet();
|
|
205
|
+
const normalize = (input) => {
|
|
206
|
+
if (input === null || input === undefined)
|
|
207
|
+
return input;
|
|
208
|
+
const type = typeof input;
|
|
209
|
+
if (type === 'string' || type === 'number' || type === 'boolean') {
|
|
210
|
+
return input;
|
|
211
|
+
}
|
|
212
|
+
if (type === 'bigint') {
|
|
213
|
+
return input.toString();
|
|
214
|
+
}
|
|
215
|
+
if (type === 'symbol' || type === 'function') {
|
|
216
|
+
return String(input);
|
|
217
|
+
}
|
|
218
|
+
if (input instanceof Date) {
|
|
219
|
+
return input.toISOString();
|
|
220
|
+
}
|
|
221
|
+
if (Array.isArray(input)) {
|
|
222
|
+
return input.map((item) => normalize(item));
|
|
223
|
+
}
|
|
224
|
+
if (typeof input === 'object') {
|
|
225
|
+
const obj = input;
|
|
226
|
+
if (seen.has(obj)) {
|
|
227
|
+
return '[Circular]';
|
|
228
|
+
}
|
|
229
|
+
seen.add(obj);
|
|
230
|
+
const keys = Object.keys(obj).sort();
|
|
231
|
+
const normalized = {};
|
|
232
|
+
for (const key of keys) {
|
|
233
|
+
normalized[key] = normalize(obj[key]);
|
|
234
|
+
}
|
|
235
|
+
return normalized;
|
|
236
|
+
}
|
|
237
|
+
try {
|
|
238
|
+
return JSON.parse(JSON.stringify(input));
|
|
239
|
+
}
|
|
240
|
+
catch {
|
|
241
|
+
return String(input);
|
|
242
|
+
}
|
|
243
|
+
};
|
|
244
|
+
const normalized = normalize(value);
|
|
245
|
+
const json = JSON.stringify(normalized);
|
|
246
|
+
return json === undefined ? 'undefined' : json;
|
|
247
|
+
}
|
|
210
248
|
/**
|
|
211
249
|
* Specialized cache for tool responses.
|
|
212
250
|
*/
|
|
@@ -13,7 +13,7 @@ import { MCPClient } from '../../transport/mcp-client.js';
|
|
|
13
13
|
import { discover } from '../../discovery/discovery.js';
|
|
14
14
|
import { Interviewer } from '../../interview/interviewer.js';
|
|
15
15
|
import { generateContractMd, generateJsonReport } from '../../docs/generator.js';
|
|
16
|
-
import { loadConfig, ConfigNotFoundError, parseCommandString } from '../../config/loader.js';
|
|
16
|
+
import { loadConfig, ConfigNotFoundError, parseCommandString, } from '../../config/loader.js';
|
|
17
17
|
import { validateConfigForCheck, getConfigWarnings } from '../../config/validator.js';
|
|
18
18
|
import { createBaseline, loadBaseline, saveBaseline, getToolFingerprints, toToolCapability, compareBaselines, acceptDrift, formatDiffText, formatDiffJson, formatDiffCompact, formatDiffGitHubActions, formatDiffMarkdown, formatDiffJUnit, formatDiffSarif, applySeverityConfig, shouldFailOnDiff, analyzeForIncremental, formatIncrementalSummary, runSecurityTests, parseSecurityCategories, getAllSecurityCategories, } from '../../baseline/index.js';
|
|
19
19
|
import { convertAssertions } from '../../baseline/converter.js';
|
|
@@ -21,7 +21,7 @@ import { getMetricsCollector, resetMetricsCollector } from '../../metrics/collec
|
|
|
21
21
|
import { getGlobalCache, resetGlobalCache } from '../../cache/response-cache.js';
|
|
22
22
|
import { InterviewProgressBar, formatCheckBanner } from '../utils/progress.js';
|
|
23
23
|
import { buildCheckSummary, colorizeConfidence, formatConfidenceLevel, formatToolResultLine, } from '../output/terminal-reporter.js';
|
|
24
|
-
import { loadScenariosFromFile, tryLoadDefaultScenarios, DEFAULT_SCENARIOS_FILE } from '../../scenarios/index.js';
|
|
24
|
+
import { loadScenariosFromFile, tryLoadDefaultScenarios, DEFAULT_SCENARIOS_FILE, } from '../../scenarios/index.js';
|
|
25
25
|
import { loadWorkflowsFromFile, tryLoadDefaultWorkflows, DEFAULT_WORKFLOWS_FILE, WorkflowExecutor, generateWorkflowsFromTools, generateWorkflowYamlContent, } from '../../workflow/index.js';
|
|
26
26
|
import * as output from '../output.js';
|
|
27
27
|
import { extractServerContextFromArgs } from '../utils/server-context.js';
|
|
@@ -73,14 +73,6 @@ export const checkCommand = new Command('check')
|
|
|
73
73
|
output.error(error instanceof Error ? error.message : String(error));
|
|
74
74
|
process.exit(EXIT_CODES.ERROR);
|
|
75
75
|
}
|
|
76
|
-
const warnings = getConfigWarnings(config);
|
|
77
|
-
if (warnings.length > 0) {
|
|
78
|
-
output.warn('Configuration warnings:');
|
|
79
|
-
for (const warning of warnings) {
|
|
80
|
-
output.warn(` - ${warning}`);
|
|
81
|
-
}
|
|
82
|
-
output.newline();
|
|
83
|
-
}
|
|
84
76
|
// Extract settings from config
|
|
85
77
|
const timeout = config.server.timeout;
|
|
86
78
|
const outputDir = config.output.dir;
|
|
@@ -105,7 +97,8 @@ export const checkCommand = new Command('check')
|
|
|
105
97
|
minimumSeverity: options.minSeverity ?? config.baseline.severity.minimumSeverity,
|
|
106
98
|
failOnSeverity: options.failOnSeverity ?? config.baseline.severity.failOnSeverity,
|
|
107
99
|
suppressWarnings: config.baseline.severity.suppressWarnings,
|
|
108
|
-
aspectOverrides: config.baseline.severity
|
|
100
|
+
aspectOverrides: config.baseline.severity
|
|
101
|
+
.aspectOverrides,
|
|
109
102
|
};
|
|
110
103
|
// Resolve check options from config (no CLI overrides for these)
|
|
111
104
|
const incrementalEnabled = config.check.incremental;
|
|
@@ -114,9 +107,26 @@ export const checkCommand = new Command('check')
|
|
|
114
107
|
const parallelWorkers = config.check.parallelWorkers;
|
|
115
108
|
const performanceThreshold = config.check.performanceThreshold / PERCENTAGE_CONVERSION.DIVISOR;
|
|
116
109
|
const diffFormat = options.format ?? config.check.diffFormat;
|
|
110
|
+
const machineReadableFormats = new Set(['json', 'junit', 'sarif']);
|
|
111
|
+
const machineReadable = machineReadableFormats.has(String(diffFormat).toLowerCase());
|
|
112
|
+
if (machineReadable) {
|
|
113
|
+
// Suppress standard CLI output to keep stdout clean for machine-readable formats.
|
|
114
|
+
output.configureOutput({ quiet: true });
|
|
115
|
+
}
|
|
116
|
+
const warnings = getConfigWarnings(config);
|
|
117
|
+
if (warnings.length > 0) {
|
|
118
|
+
output.warn('Configuration warnings:');
|
|
119
|
+
for (const warning of warnings) {
|
|
120
|
+
output.warn(` - ${warning}`);
|
|
121
|
+
}
|
|
122
|
+
if (!machineReadable) {
|
|
123
|
+
output.newline();
|
|
124
|
+
}
|
|
125
|
+
}
|
|
117
126
|
// Resolve security options from config
|
|
118
127
|
const securityEnabled = config.check.security.enabled;
|
|
119
|
-
let securityCategories = config.check.security
|
|
128
|
+
let securityCategories = config.check.security
|
|
129
|
+
.categories;
|
|
120
130
|
// Validate security categories
|
|
121
131
|
try {
|
|
122
132
|
securityCategories = parseSecurityCategories(securityCategories.join(','));
|
|
@@ -141,13 +151,15 @@ export const checkCommand = new Command('check')
|
|
|
141
151
|
? `${serverCommand} ${args.join(' ')}`.trim()
|
|
142
152
|
: (remoteUrl ?? 'unknown');
|
|
143
153
|
// Display startup banner
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
154
|
+
if (!machineReadable) {
|
|
155
|
+
const banner = formatCheckBanner({
|
|
156
|
+
serverCommand: serverIdentifier,
|
|
157
|
+
});
|
|
158
|
+
output.info(banner);
|
|
159
|
+
output.newline();
|
|
160
|
+
output.info('Check: Schema validation and drift detection (free, deterministic)');
|
|
161
|
+
output.newline();
|
|
162
|
+
}
|
|
151
163
|
// Initialize metrics collector
|
|
152
164
|
resetMetricsCollector();
|
|
153
165
|
const metricsCollector = getMetricsCollector();
|
|
@@ -182,9 +194,12 @@ export const checkCommand = new Command('check')
|
|
|
182
194
|
}
|
|
183
195
|
// Discovery phase
|
|
184
196
|
output.info('Discovering capabilities...');
|
|
185
|
-
const discovery = await discover(mcpClient, transport === 'stdio' ? serverCommand : remoteUrl ?? serverCommand, transport === 'stdio' ? args : []);
|
|
197
|
+
const discovery = await discover(mcpClient, transport === 'stdio' ? serverCommand : (remoteUrl ?? serverCommand), transport === 'stdio' ? args : []);
|
|
186
198
|
const resourceCount = discovery.resources?.length ?? 0;
|
|
187
|
-
const discoveryParts = [
|
|
199
|
+
const discoveryParts = [
|
|
200
|
+
`${discovery.tools.length} tools`,
|
|
201
|
+
`${discovery.prompts.length} prompts`,
|
|
202
|
+
];
|
|
188
203
|
if (resourceCount > 0) {
|
|
189
204
|
discoveryParts.push(`${resourceCount} resources`);
|
|
190
205
|
}
|
|
@@ -228,7 +243,9 @@ export const checkCommand = new Command('check')
|
|
|
228
243
|
}
|
|
229
244
|
else {
|
|
230
245
|
incrementalBaseline = loadBaseline(baselinePath);
|
|
231
|
-
const result = analyzeForIncremental(discovery.tools, incrementalBaseline, {
|
|
246
|
+
const result = analyzeForIncremental(discovery.tools, incrementalBaseline, {
|
|
247
|
+
maxCacheAgeHours: incrementalCacheHours,
|
|
248
|
+
});
|
|
232
249
|
incrementalResult = result;
|
|
233
250
|
const summary = formatIncrementalSummary(result.changeSummary);
|
|
234
251
|
output.info(`Incremental analysis: ${summary}`);
|
|
@@ -240,7 +257,7 @@ export const checkCommand = new Command('check')
|
|
|
240
257
|
else {
|
|
241
258
|
output.info(`Testing ${result.toolsToTest.length} tools (${result.toolsToSkip.length} cached)\n`);
|
|
242
259
|
// Filter discovery to only include tools that need testing
|
|
243
|
-
discovery.tools = discovery.tools.filter(t => result.toolsToTest.includes(t.name));
|
|
260
|
+
discovery.tools = discovery.tools.filter((t) => result.toolsToTest.includes(t.name));
|
|
244
261
|
}
|
|
245
262
|
}
|
|
246
263
|
}
|
|
@@ -323,7 +340,7 @@ export const checkCommand = new Command('check')
|
|
|
323
340
|
interviewer.setServerContext(serverContext);
|
|
324
341
|
}
|
|
325
342
|
// Set up progress display
|
|
326
|
-
const progressBar = new InterviewProgressBar({ enabled: !verbose });
|
|
343
|
+
const progressBar = new InterviewProgressBar({ enabled: !verbose && !machineReadable });
|
|
327
344
|
const reportedTools = new Set();
|
|
328
345
|
const progressCallback = (progress) => {
|
|
329
346
|
if (verbose) {
|
|
@@ -462,7 +479,7 @@ export const checkCommand = new Command('check')
|
|
|
462
479
|
try {
|
|
463
480
|
const response = await mcpClient.callTool(tool.name, args);
|
|
464
481
|
const content = response.content
|
|
465
|
-
.map((c) => c.type === 'text' ? c.text : '')
|
|
482
|
+
.map((c) => (c.type === 'text' ? c.text : ''))
|
|
466
483
|
.join('\n');
|
|
467
484
|
return {
|
|
468
485
|
isError: response.isError ?? false,
|
|
@@ -580,7 +597,7 @@ export const checkCommand = new Command('check')
|
|
|
580
597
|
const workflowResult = await workflowExecutor.execute(workflow);
|
|
581
598
|
workflowResults.push(workflowResult);
|
|
582
599
|
const statusIcon = workflowResult.success ? '\u2713' : '\u2717';
|
|
583
|
-
const stepsInfo = `${workflowResult.steps.filter(s => s.success).length}/${workflow.steps.length} steps`;
|
|
600
|
+
const stepsInfo = `${workflowResult.steps.filter((s) => s.success).length}/${workflow.steps.length} steps`;
|
|
584
601
|
if (workflowResult.success) {
|
|
585
602
|
output.success(` ${statusIcon} ${workflow.name} (${stepsInfo}) - ${workflowResult.durationMs}ms`);
|
|
586
603
|
}
|
|
@@ -599,7 +616,7 @@ export const checkCommand = new Command('check')
|
|
|
599
616
|
}
|
|
600
617
|
}
|
|
601
618
|
// Workflow summary
|
|
602
|
-
const passed = workflowResults.filter(r => r.success).length;
|
|
619
|
+
const passed = workflowResults.filter((r) => r.success).length;
|
|
603
620
|
const failed = workflowResults.length - passed;
|
|
604
621
|
output.newline();
|
|
605
622
|
if (failed === 0) {
|
|
@@ -631,9 +648,7 @@ export const checkCommand = new Command('check')
|
|
|
631
648
|
}
|
|
632
649
|
if (writeJson) {
|
|
633
650
|
// Add workflow results to the result object for the JSON report
|
|
634
|
-
const resultWithWorkflows = workflowResults.length > 0
|
|
635
|
-
? { ...result, workflowResults }
|
|
636
|
-
: result;
|
|
651
|
+
const resultWithWorkflows = workflowResults.length > 0 ? { ...result, workflowResults } : result;
|
|
637
652
|
let jsonReport;
|
|
638
653
|
try {
|
|
639
654
|
jsonReport = generateJsonReport(resultWithWorkflows, {
|
|
@@ -671,10 +686,7 @@ export const checkCommand = new Command('check')
|
|
|
671
686
|
if (incrementalResult && incrementalResult.cachedFingerprints.length > 0) {
|
|
672
687
|
// Merge new fingerprints with cached ones
|
|
673
688
|
const cachedTools = incrementalResult.cachedFingerprints.map(toToolCapability);
|
|
674
|
-
const mergedTools = [
|
|
675
|
-
...currentBaseline.capabilities.tools,
|
|
676
|
-
...cachedTools,
|
|
677
|
-
].sort((a, b) => a.name.localeCompare(b.name));
|
|
689
|
+
const mergedTools = [...currentBaseline.capabilities.tools, ...cachedTools].sort((a, b) => a.name.localeCompare(b.name));
|
|
678
690
|
currentBaseline = {
|
|
679
691
|
...currentBaseline,
|
|
680
692
|
capabilities: {
|
|
@@ -773,7 +785,9 @@ export const checkCommand = new Command('check')
|
|
|
773
785
|
if (!baselinePath) {
|
|
774
786
|
const formattedCheckResults = formatCheckResults(currentBaseline, diffFormat);
|
|
775
787
|
if (formattedCheckResults) {
|
|
776
|
-
|
|
788
|
+
if (!machineReadable) {
|
|
789
|
+
output.info('\n--- Check Results ---');
|
|
790
|
+
}
|
|
777
791
|
// Output directly to stdout for machine-readable formats
|
|
778
792
|
console.log(formattedCheckResults);
|
|
779
793
|
}
|
|
@@ -790,10 +804,17 @@ export const checkCommand = new Command('check')
|
|
|
790
804
|
});
|
|
791
805
|
// Apply severity configuration (filtering, overrides)
|
|
792
806
|
const diff = applySeverityConfig(rawDiff, severityConfig);
|
|
793
|
-
|
|
807
|
+
if (!machineReadable) {
|
|
808
|
+
output.info('\n--- Drift Report ---');
|
|
809
|
+
}
|
|
794
810
|
// Select formatter based on --format option
|
|
795
811
|
const formattedDiff = formatDiff(diff, diffFormat, baselinePath);
|
|
796
|
-
|
|
812
|
+
if (machineReadable) {
|
|
813
|
+
console.log(formattedDiff);
|
|
814
|
+
}
|
|
815
|
+
else {
|
|
816
|
+
output.info(formattedDiff);
|
|
817
|
+
}
|
|
797
818
|
// Report performance regressions if detected
|
|
798
819
|
if (diff.performanceReport?.hasRegressions) {
|
|
799
820
|
output.warn('\n--- Performance Regressions ---');
|
|
@@ -936,7 +957,7 @@ function formatDiff(diff, format, baselinePath) {
|
|
|
936
957
|
function formatCheckResultsJUnit(baseline) {
|
|
937
958
|
const tools = getToolFingerprints(baseline);
|
|
938
959
|
const lines = [];
|
|
939
|
-
const securityFailures = tools.filter(t => t.securityFingerprint?.findings?.some(f => f.riskLevel === 'critical' || f.riskLevel === 'high')).length;
|
|
960
|
+
const securityFailures = tools.filter((t) => t.securityFingerprint?.findings?.some((f) => f.riskLevel === 'critical' || f.riskLevel === 'high')).length;
|
|
940
961
|
lines.push('<?xml version="1.0" encoding="UTF-8"?>');
|
|
941
962
|
lines.push('<testsuites>');
|
|
942
963
|
lines.push(` <testsuite name="bellwether-check" tests="${tools.length}" failures="${securityFailures}" errors="0">`);
|
|
@@ -951,16 +972,16 @@ function formatCheckResultsJUnit(baseline) {
|
|
|
951
972
|
lines.push(' </testcase>');
|
|
952
973
|
}
|
|
953
974
|
// Add security findings as test cases if present
|
|
954
|
-
const securityTools = tools.filter(t => t.securityFingerprint?.findings?.length);
|
|
975
|
+
const securityTools = tools.filter((t) => t.securityFingerprint?.findings?.length);
|
|
955
976
|
if (securityTools.length > 0) {
|
|
956
977
|
lines.push(` <!-- Security findings -->`);
|
|
957
978
|
for (const tool of securityTools) {
|
|
958
979
|
const findings = tool.securityFingerprint?.findings ?? [];
|
|
959
|
-
const criticalHigh = findings.filter(f => f.riskLevel === 'critical' || f.riskLevel === 'high').length;
|
|
980
|
+
const criticalHigh = findings.filter((f) => f.riskLevel === 'critical' || f.riskLevel === 'high').length;
|
|
960
981
|
if (criticalHigh > 0) {
|
|
961
982
|
lines.push(` <testcase name="${tool.name}-security" classname="security">`);
|
|
962
983
|
lines.push(` <failure message="${criticalHigh} critical/high security findings">`);
|
|
963
|
-
for (const finding of findings.filter(f => f.riskLevel === 'critical' || f.riskLevel === 'high')) {
|
|
984
|
+
for (const finding of findings.filter((f) => f.riskLevel === 'critical' || f.riskLevel === 'high')) {
|
|
964
985
|
lines.push(` ${finding.riskLevel.toUpperCase()}: ${finding.title} (${finding.cweId})`);
|
|
965
986
|
}
|
|
966
987
|
lines.push(` </failure>`);
|
|
@@ -981,7 +1002,7 @@ function formatCheckResultsSarif(baseline) {
|
|
|
981
1002
|
const serverUri = baseline.metadata?.serverCommand || baseline.server.name || 'mcp-server';
|
|
982
1003
|
const results = [];
|
|
983
1004
|
// Add results for tools with security findings
|
|
984
|
-
const securityTools = tools.filter(t => t.securityFingerprint?.findings?.length);
|
|
1005
|
+
const securityTools = tools.filter((t) => t.securityFingerprint?.findings?.length);
|
|
985
1006
|
for (const tool of securityTools) {
|
|
986
1007
|
const findings = tool.securityFingerprint?.findings ?? [];
|
|
987
1008
|
for (const finding of findings) {
|
|
@@ -994,12 +1015,14 @@ function formatCheckResultsSarif(baseline) {
|
|
|
994
1015
|
ruleId: finding.cweId || 'BWH-SEC',
|
|
995
1016
|
level,
|
|
996
1017
|
message: { text: `[${tool.name}] ${finding.title}: ${finding.description}` },
|
|
997
|
-
locations: [
|
|
1018
|
+
locations: [
|
|
1019
|
+
{
|
|
998
1020
|
physicalLocation: {
|
|
999
1021
|
artifactLocation: { uri: serverUri },
|
|
1000
1022
|
region: { startLine: 1 },
|
|
1001
1023
|
},
|
|
1002
|
-
}
|
|
1024
|
+
},
|
|
1025
|
+
],
|
|
1003
1026
|
});
|
|
1004
1027
|
}
|
|
1005
1028
|
}
|
|
@@ -1010,20 +1033,25 @@ function formatCheckResultsSarif(baseline) {
|
|
|
1010
1033
|
results.push({
|
|
1011
1034
|
ruleId: 'BWH-REL',
|
|
1012
1035
|
level: 'warning',
|
|
1013
|
-
message: {
|
|
1014
|
-
|
|
1036
|
+
message: {
|
|
1037
|
+
text: `Tool "${tool.name}" has ${(successRate * 100).toFixed(0)}% success rate`,
|
|
1038
|
+
},
|
|
1039
|
+
locations: [
|
|
1040
|
+
{
|
|
1015
1041
|
physicalLocation: {
|
|
1016
1042
|
artifactLocation: { uri: serverUri },
|
|
1017
1043
|
region: { startLine: 1 },
|
|
1018
1044
|
},
|
|
1019
|
-
}
|
|
1045
|
+
},
|
|
1046
|
+
],
|
|
1020
1047
|
});
|
|
1021
1048
|
}
|
|
1022
1049
|
}
|
|
1023
1050
|
const sarif = {
|
|
1024
1051
|
$schema: 'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json',
|
|
1025
1052
|
version: '2.1.0',
|
|
1026
|
-
runs: [
|
|
1053
|
+
runs: [
|
|
1054
|
+
{
|
|
1027
1055
|
tool: {
|
|
1028
1056
|
driver: {
|
|
1029
1057
|
name: 'bellwether',
|
|
@@ -1046,7 +1074,8 @@ function formatCheckResultsSarif(baseline) {
|
|
|
1046
1074
|
},
|
|
1047
1075
|
},
|
|
1048
1076
|
results,
|
|
1049
|
-
}
|
|
1077
|
+
},
|
|
1078
|
+
],
|
|
1050
1079
|
};
|
|
1051
1080
|
return JSON.stringify(sarif, null, 2);
|
|
1052
1081
|
}
|
package/dist/cli/index.js
CHANGED
|
@@ -16,7 +16,7 @@ if (existsSync(globalEnvPath)) {
|
|
|
16
16
|
config({ path: globalEnvPath, quiet: true });
|
|
17
17
|
}
|
|
18
18
|
// Then load project .env (overrides global settings)
|
|
19
|
-
config({ quiet: true });
|
|
19
|
+
config({ quiet: true, override: true });
|
|
20
20
|
function normalizeEncryptedEnvVar(key) {
|
|
21
21
|
const value = process.env[key];
|
|
22
22
|
if (!value || !isEncryptedEnvValue(value)) {
|
|
@@ -167,9 +167,11 @@ program.configureHelp({
|
|
|
167
167
|
subcommandTerm: (cmd) => `${cmd.name()} ${cmd.usage()}`,
|
|
168
168
|
});
|
|
169
169
|
// Load keychain credentials, then parse commands
|
|
170
|
-
loadKeychainCredentials()
|
|
170
|
+
loadKeychainCredentials()
|
|
171
|
+
.then(() => {
|
|
171
172
|
program.parse();
|
|
172
|
-
})
|
|
173
|
+
})
|
|
174
|
+
.catch(() => {
|
|
173
175
|
// If keychain loading fails, still parse commands
|
|
174
176
|
program.parse();
|
|
175
177
|
});
|