@dotsetlabs/bellwether 1.0.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +97 -0
- package/README.md +9 -2
- package/dist/baseline/accessors.d.ts +1 -1
- package/dist/baseline/accessors.js +1 -3
- package/dist/baseline/baseline-format.d.ts +287 -0
- package/dist/baseline/baseline-format.js +12 -0
- package/dist/baseline/comparator.js +249 -11
- package/dist/baseline/converter.d.ts +15 -15
- package/dist/baseline/converter.js +46 -34
- package/dist/baseline/diff.d.ts +1 -1
- package/dist/baseline/diff.js +45 -28
- package/dist/baseline/error-analyzer.d.ts +1 -1
- package/dist/baseline/error-analyzer.js +90 -17
- package/dist/baseline/incremental-checker.js +8 -5
- package/dist/baseline/index.d.ts +2 -12
- package/dist/baseline/index.js +3 -23
- package/dist/baseline/performance-tracker.d.ts +0 -1
- package/dist/baseline/performance-tracker.js +13 -20
- package/dist/baseline/response-fingerprint.js +39 -2
- package/dist/baseline/saver.js +41 -10
- package/dist/baseline/schema-compare.d.ts +22 -0
- package/dist/baseline/schema-compare.js +259 -16
- package/dist/baseline/types.d.ts +10 -7
- package/dist/cache/response-cache.d.ts +12 -2
- package/dist/cache/response-cache.js +178 -30
- package/dist/cli/commands/check.js +100 -54
- package/dist/cli/commands/explore.js +34 -14
- package/dist/cli/index.js +13 -3
- package/dist/config/template.js +8 -7
- package/dist/config/validator.d.ts +59 -59
- package/dist/config/validator.js +245 -90
- package/dist/constants/core.d.ts +4 -0
- package/dist/constants/core.js +8 -19
- package/dist/constants/registry.d.ts +17 -0
- package/dist/constants/registry.js +18 -0
- package/dist/constants/testing.d.ts +0 -369
- package/dist/constants/testing.js +18 -456
- package/dist/constants.d.ts +1 -1
- package/dist/constants.js +1 -1
- package/dist/docs/contract.js +131 -83
- package/dist/docs/report.js +8 -5
- package/dist/interview/insights.d.ts +17 -0
- package/dist/interview/insights.js +52 -0
- package/dist/interview/interviewer.js +119 -57
- package/dist/interview/orchestrator.js +49 -22
- package/dist/interview/prompt-test-generator.d.ts +12 -0
- package/dist/interview/prompt-test-generator.js +77 -0
- package/dist/interview/resource-test-generator.d.ts +12 -0
- package/dist/interview/resource-test-generator.js +20 -0
- package/dist/interview/schema-inferrer.js +26 -4
- package/dist/interview/schema-test-generator.js +278 -31
- package/dist/interview/stateful-test-runner.d.ts +3 -0
- package/dist/interview/stateful-test-runner.js +80 -0
- package/dist/interview/types.d.ts +12 -0
- package/dist/llm/anthropic.js +49 -16
- package/dist/llm/client.d.ts +2 -0
- package/dist/llm/client.js +61 -0
- package/dist/llm/ollama.js +9 -4
- package/dist/llm/openai.js +34 -23
- package/dist/transport/base-transport.d.ts +1 -1
- package/dist/transport/http-transport.d.ts +2 -2
- package/dist/transport/http-transport.js +26 -6
- package/dist/transport/mcp-client.d.ts +18 -6
- package/dist/transport/mcp-client.js +50 -20
- package/dist/transport/sse-transport.d.ts +8 -4
- package/dist/transport/sse-transport.js +161 -69
- package/dist/transport/stdio-transport.d.ts +1 -1
- package/dist/transport/stdio-transport.js +1 -1
- package/dist/utils/timeout.d.ts +10 -2
- package/dist/utils/timeout.js +9 -5
- package/dist/version.js +1 -1
- package/dist/workflow/executor.js +18 -13
- package/dist/workflow/loader.js +4 -1
- package/dist/workflow/state-tracker.js +22 -18
- package/man/bellwether.1 +204 -0
- package/man/bellwether.1.md +148 -0
- package/package.json +6 -7
- package/schemas/bellwether-check.schema.json +185 -0
- package/schemas/bellwether-explore.schema.json +837 -0
- package/scripts/completions/bellwether.bash +10 -4
- package/scripts/completions/bellwether.zsh +55 -2
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
* Enables reuse of tool call results and LLM analysis across personas.
|
|
4
4
|
*/
|
|
5
5
|
import { createHash } from 'crypto';
|
|
6
|
+
import { existsSync, mkdirSync, readFileSync, readdirSync, unlinkSync, writeFileSync } from 'fs';
|
|
7
|
+
import { join } from 'path';
|
|
6
8
|
import { getLogger } from '../logging/logger.js';
|
|
7
9
|
import { TIME_CONSTANTS, CACHE } from '../constants.js';
|
|
8
10
|
const logger = getLogger('response-cache');
|
|
@@ -18,33 +20,27 @@ export class ResponseCache {
|
|
|
18
20
|
evictions: 0,
|
|
19
21
|
};
|
|
20
22
|
totalSizeBytes = 0;
|
|
23
|
+
cacheDir;
|
|
21
24
|
constructor(config = {}) {
|
|
22
25
|
this.config = {
|
|
23
26
|
defaultTTLMs: config.defaultTTLMs ?? TIME_CONSTANTS.DEFAULT_CACHE_TTL,
|
|
24
27
|
maxEntries: config.maxEntries ?? CACHE.MAX_ENTRIES,
|
|
25
28
|
maxSizeBytes: config.maxSizeBytes ?? 50 * 1024 * 1024, // 50MB
|
|
26
29
|
enabled: config.enabled ?? true,
|
|
30
|
+
dir: config.dir ?? '',
|
|
27
31
|
};
|
|
32
|
+
this.cacheDir = this.config.enabled ? this.config.dir || undefined : undefined;
|
|
33
|
+
if (this.cacheDir) {
|
|
34
|
+
this.ensureCacheDir(this.cacheDir);
|
|
35
|
+
}
|
|
28
36
|
}
|
|
29
37
|
/**
|
|
30
38
|
* Generate a cache key from input data.
|
|
31
39
|
*/
|
|
32
40
|
generateKey(...parts) {
|
|
33
|
-
const serialized = parts.map((p) =>
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
if (typeof p === 'undefined')
|
|
37
|
-
return 'undefined';
|
|
38
|
-
if (p === null)
|
|
39
|
-
return 'null';
|
|
40
|
-
try {
|
|
41
|
-
return JSON.stringify(p, Object.keys(p).sort());
|
|
42
|
-
}
|
|
43
|
-
catch {
|
|
44
|
-
return String(p);
|
|
45
|
-
}
|
|
46
|
-
}).join('|');
|
|
47
|
-
return createHash('sha256').update(serialized).digest('hex').slice(0, 16);
|
|
41
|
+
const serialized = parts.map((p) => stableStringify(p)).join('|');
|
|
42
|
+
// Use 128-bit hash (32 hex chars) to reduce collision risk.
|
|
43
|
+
return createHash('sha256').update(serialized).digest('hex').slice(0, 32);
|
|
48
44
|
}
|
|
49
45
|
/**
|
|
50
46
|
* Get an entry from cache.
|
|
@@ -55,6 +51,13 @@ export class ResponseCache {
|
|
|
55
51
|
}
|
|
56
52
|
const entry = this.cache.get(key);
|
|
57
53
|
if (!entry) {
|
|
54
|
+
const diskEntry = this.loadFromDisk(key);
|
|
55
|
+
if (diskEntry) {
|
|
56
|
+
this.cache.set(key, diskEntry);
|
|
57
|
+
this.totalSizeBytes += this.estimateSize(diskEntry.value);
|
|
58
|
+
this.stats.hits++;
|
|
59
|
+
return diskEntry.value;
|
|
60
|
+
}
|
|
58
61
|
this.stats.misses++;
|
|
59
62
|
return undefined;
|
|
60
63
|
}
|
|
@@ -66,6 +69,7 @@ export class ResponseCache {
|
|
|
66
69
|
return undefined;
|
|
67
70
|
}
|
|
68
71
|
entry.hitCount++;
|
|
72
|
+
entry.lastAccessedAt = new Date();
|
|
69
73
|
this.stats.hits++;
|
|
70
74
|
logger.debug({ key, hitCount: entry.hitCount }, 'Cache hit');
|
|
71
75
|
return entry.value;
|
|
@@ -86,6 +90,7 @@ export class ResponseCache {
|
|
|
86
90
|
const entry = {
|
|
87
91
|
value,
|
|
88
92
|
createdAt: now,
|
|
93
|
+
lastAccessedAt: now,
|
|
89
94
|
expiresAt: new Date(now.getTime() + ttl),
|
|
90
95
|
key,
|
|
91
96
|
description: options?.description,
|
|
@@ -99,6 +104,7 @@ export class ResponseCache {
|
|
|
99
104
|
this.totalSizeBytes += entrySize;
|
|
100
105
|
this.cache.set(key, entry);
|
|
101
106
|
logger.debug({ key, ttlMs: ttl, description: options?.description }, 'Cache entry set');
|
|
107
|
+
this.saveToDisk(entry);
|
|
102
108
|
}
|
|
103
109
|
/**
|
|
104
110
|
* Check if key exists and is not expired.
|
|
@@ -125,8 +131,10 @@ export class ResponseCache {
|
|
|
125
131
|
if (entry) {
|
|
126
132
|
this.totalSizeBytes -= this.estimateSize(entry.value);
|
|
127
133
|
this.cache.delete(key);
|
|
134
|
+
this.deleteFromDisk(key);
|
|
128
135
|
return true;
|
|
129
136
|
}
|
|
137
|
+
this.deleteFromDisk(key);
|
|
130
138
|
return false;
|
|
131
139
|
}
|
|
132
140
|
/**
|
|
@@ -135,6 +143,16 @@ export class ResponseCache {
|
|
|
135
143
|
clear() {
|
|
136
144
|
this.cache.clear();
|
|
137
145
|
this.totalSizeBytes = 0;
|
|
146
|
+
if (this.cacheDir && existsSync(this.cacheDir)) {
|
|
147
|
+
try {
|
|
148
|
+
for (const file of listCacheFiles(this.cacheDir)) {
|
|
149
|
+
unlinkSync(file);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
catch {
|
|
153
|
+
// Ignore disk cleanup errors
|
|
154
|
+
}
|
|
155
|
+
}
|
|
138
156
|
logger.debug('Cache cleared');
|
|
139
157
|
}
|
|
140
158
|
/**
|
|
@@ -168,31 +186,30 @@ export class ResponseCache {
|
|
|
168
186
|
evictIfNeeded(newEntrySize) {
|
|
169
187
|
// Check entry count
|
|
170
188
|
while (this.cache.size >= this.config.maxEntries) {
|
|
171
|
-
this.
|
|
189
|
+
this.evictLeastRecentlyUsed();
|
|
172
190
|
}
|
|
173
191
|
// Check size
|
|
174
|
-
while (this.totalSizeBytes + newEntrySize > this.config.maxSizeBytes &&
|
|
175
|
-
this.
|
|
176
|
-
this.evictOldest();
|
|
192
|
+
while (this.totalSizeBytes + newEntrySize > this.config.maxSizeBytes && this.cache.size > 0) {
|
|
193
|
+
this.evictLeastRecentlyUsed();
|
|
177
194
|
}
|
|
178
195
|
}
|
|
179
196
|
/**
|
|
180
|
-
* Evict the
|
|
197
|
+
* Evict the least recently used entry (LRU based on last access time).
|
|
181
198
|
*/
|
|
182
|
-
|
|
183
|
-
let
|
|
184
|
-
let
|
|
199
|
+
evictLeastRecentlyUsed() {
|
|
200
|
+
let lruKey;
|
|
201
|
+
let oldestAccessTime = Infinity;
|
|
185
202
|
for (const [key, entry] of this.cache) {
|
|
186
|
-
const time = entry.
|
|
187
|
-
if (time <
|
|
188
|
-
|
|
189
|
-
|
|
203
|
+
const time = entry.lastAccessedAt.getTime();
|
|
204
|
+
if (time < oldestAccessTime) {
|
|
205
|
+
oldestAccessTime = time;
|
|
206
|
+
lruKey = key;
|
|
190
207
|
}
|
|
191
208
|
}
|
|
192
|
-
if (
|
|
193
|
-
this.delete(
|
|
209
|
+
if (lruKey) {
|
|
210
|
+
this.delete(lruKey);
|
|
194
211
|
this.stats.evictions++;
|
|
195
|
-
logger.debug({ key:
|
|
212
|
+
logger.debug({ key: lruKey }, 'Evicted cache entry');
|
|
196
213
|
}
|
|
197
214
|
}
|
|
198
215
|
/**
|
|
@@ -206,6 +223,137 @@ export class ResponseCache {
|
|
|
206
223
|
return 1000; // Default estimate for non-serializable values
|
|
207
224
|
}
|
|
208
225
|
}
|
|
226
|
+
ensureCacheDir(dir) {
|
|
227
|
+
try {
|
|
228
|
+
if (!existsSync(dir)) {
|
|
229
|
+
mkdirSync(dir, { recursive: true });
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
catch (error) {
|
|
233
|
+
logger.warn({ dir, error: String(error) }, 'Failed to create cache directory');
|
|
234
|
+
this.cacheDir = undefined;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
getCachePath(key) {
|
|
238
|
+
if (!this.cacheDir)
|
|
239
|
+
return null;
|
|
240
|
+
return join(this.cacheDir, `${key}.json`);
|
|
241
|
+
}
|
|
242
|
+
saveToDisk(entry) {
|
|
243
|
+
const path = this.getCachePath(entry.key);
|
|
244
|
+
if (!path)
|
|
245
|
+
return;
|
|
246
|
+
try {
|
|
247
|
+
const serialized = JSON.stringify({
|
|
248
|
+
...entry,
|
|
249
|
+
createdAt: entry.createdAt.toISOString(),
|
|
250
|
+
lastAccessedAt: entry.lastAccessedAt.toISOString(),
|
|
251
|
+
expiresAt: entry.expiresAt.toISOString(),
|
|
252
|
+
});
|
|
253
|
+
writeFileSync(path, serialized, 'utf-8');
|
|
254
|
+
}
|
|
255
|
+
catch (error) {
|
|
256
|
+
logger.debug({ key: entry.key, error: String(error) }, 'Failed to persist cache entry');
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
loadFromDisk(key) {
|
|
260
|
+
const path = this.getCachePath(key);
|
|
261
|
+
if (!path || !existsSync(path))
|
|
262
|
+
return null;
|
|
263
|
+
try {
|
|
264
|
+
const raw = readFileSync(path, 'utf-8');
|
|
265
|
+
const parsed = JSON.parse(raw);
|
|
266
|
+
const entry = {
|
|
267
|
+
...parsed,
|
|
268
|
+
createdAt: new Date(parsed.createdAt),
|
|
269
|
+
lastAccessedAt: new Date(parsed.lastAccessedAt),
|
|
270
|
+
expiresAt: new Date(parsed.expiresAt),
|
|
271
|
+
};
|
|
272
|
+
if (new Date() > entry.expiresAt) {
|
|
273
|
+
this.deleteFromDisk(key);
|
|
274
|
+
return null;
|
|
275
|
+
}
|
|
276
|
+
entry.hitCount = (entry.hitCount ?? 0) + 1;
|
|
277
|
+
entry.lastAccessedAt = new Date();
|
|
278
|
+
this.saveToDisk(entry);
|
|
279
|
+
return entry;
|
|
280
|
+
}
|
|
281
|
+
catch (error) {
|
|
282
|
+
logger.debug({ key, error: String(error) }, 'Failed to load cache entry');
|
|
283
|
+
return null;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
deleteFromDisk(key) {
|
|
287
|
+
const path = this.getCachePath(key);
|
|
288
|
+
if (!path || !existsSync(path))
|
|
289
|
+
return;
|
|
290
|
+
try {
|
|
291
|
+
unlinkSync(path);
|
|
292
|
+
}
|
|
293
|
+
catch {
|
|
294
|
+
// Ignore delete errors
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
function listCacheFiles(dir) {
|
|
299
|
+
try {
|
|
300
|
+
const entries = readdirSync(dir, { withFileTypes: true });
|
|
301
|
+
return entries
|
|
302
|
+
.filter((entry) => entry.isFile())
|
|
303
|
+
.map((entry) => join(dir, entry.name));
|
|
304
|
+
}
|
|
305
|
+
catch {
|
|
306
|
+
return [];
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
/**
|
|
310
|
+
* Stable, deterministic JSON stringify with deep key sorting.
|
|
311
|
+
* Falls back to string conversion for unsupported types.
|
|
312
|
+
*/
|
|
313
|
+
function stableStringify(value) {
|
|
314
|
+
const seen = new WeakSet();
|
|
315
|
+
const normalize = (input) => {
|
|
316
|
+
if (input === null || input === undefined)
|
|
317
|
+
return input;
|
|
318
|
+
const type = typeof input;
|
|
319
|
+
if (type === 'string' || type === 'number' || type === 'boolean') {
|
|
320
|
+
return input;
|
|
321
|
+
}
|
|
322
|
+
if (type === 'bigint') {
|
|
323
|
+
return input.toString();
|
|
324
|
+
}
|
|
325
|
+
if (type === 'symbol' || type === 'function') {
|
|
326
|
+
return String(input);
|
|
327
|
+
}
|
|
328
|
+
if (input instanceof Date) {
|
|
329
|
+
return input.toISOString();
|
|
330
|
+
}
|
|
331
|
+
if (Array.isArray(input)) {
|
|
332
|
+
return input.map((item) => normalize(item));
|
|
333
|
+
}
|
|
334
|
+
if (typeof input === 'object') {
|
|
335
|
+
const obj = input;
|
|
336
|
+
if (seen.has(obj)) {
|
|
337
|
+
return '[Circular]';
|
|
338
|
+
}
|
|
339
|
+
seen.add(obj);
|
|
340
|
+
const keys = Object.keys(obj).sort();
|
|
341
|
+
const normalized = {};
|
|
342
|
+
for (const key of keys) {
|
|
343
|
+
normalized[key] = normalize(obj[key]);
|
|
344
|
+
}
|
|
345
|
+
return normalized;
|
|
346
|
+
}
|
|
347
|
+
try {
|
|
348
|
+
return JSON.parse(JSON.stringify(input));
|
|
349
|
+
}
|
|
350
|
+
catch {
|
|
351
|
+
return String(input);
|
|
352
|
+
}
|
|
353
|
+
};
|
|
354
|
+
const normalized = normalize(value);
|
|
355
|
+
const json = JSON.stringify(normalized);
|
|
356
|
+
return json === undefined ? 'undefined' : json;
|
|
209
357
|
}
|
|
210
358
|
/**
|
|
211
359
|
* Specialized cache for tool responses.
|
|
@@ -13,7 +13,7 @@ import { MCPClient } from '../../transport/mcp-client.js';
|
|
|
13
13
|
import { discover } from '../../discovery/discovery.js';
|
|
14
14
|
import { Interviewer } from '../../interview/interviewer.js';
|
|
15
15
|
import { generateContractMd, generateJsonReport } from '../../docs/generator.js';
|
|
16
|
-
import { loadConfig, ConfigNotFoundError, parseCommandString } from '../../config/loader.js';
|
|
16
|
+
import { loadConfig, ConfigNotFoundError, parseCommandString, } from '../../config/loader.js';
|
|
17
17
|
import { validateConfigForCheck, getConfigWarnings } from '../../config/validator.js';
|
|
18
18
|
import { createBaseline, loadBaseline, saveBaseline, getToolFingerprints, toToolCapability, compareBaselines, acceptDrift, formatDiffText, formatDiffJson, formatDiffCompact, formatDiffGitHubActions, formatDiffMarkdown, formatDiffJUnit, formatDiffSarif, applySeverityConfig, shouldFailOnDiff, analyzeForIncremental, formatIncrementalSummary, runSecurityTests, parseSecurityCategories, getAllSecurityCategories, } from '../../baseline/index.js';
|
|
19
19
|
import { convertAssertions } from '../../baseline/converter.js';
|
|
@@ -21,11 +21,12 @@ import { getMetricsCollector, resetMetricsCollector } from '../../metrics/collec
|
|
|
21
21
|
import { getGlobalCache, resetGlobalCache } from '../../cache/response-cache.js';
|
|
22
22
|
import { InterviewProgressBar, formatCheckBanner } from '../utils/progress.js';
|
|
23
23
|
import { buildCheckSummary, colorizeConfidence, formatConfidenceLevel, formatToolResultLine, } from '../output/terminal-reporter.js';
|
|
24
|
-
import { loadScenariosFromFile, tryLoadDefaultScenarios, DEFAULT_SCENARIOS_FILE } from '../../scenarios/index.js';
|
|
24
|
+
import { loadScenariosFromFile, tryLoadDefaultScenarios, DEFAULT_SCENARIOS_FILE, } from '../../scenarios/index.js';
|
|
25
25
|
import { loadWorkflowsFromFile, tryLoadDefaultWorkflows, DEFAULT_WORKFLOWS_FILE, WorkflowExecutor, generateWorkflowsFromTools, generateWorkflowYamlContent, } from '../../workflow/index.js';
|
|
26
26
|
import * as output from '../output.js';
|
|
27
27
|
import { extractServerContextFromArgs } from '../utils/server-context.js';
|
|
28
28
|
import { configureLogger } from '../../logging/logger.js';
|
|
29
|
+
import { buildInterviewInsights } from '../../interview/insights.js';
|
|
29
30
|
import { EXIT_CODES, SEVERITY_TO_EXIT_CODE, PATHS, SECURITY_TESTING, CHECK_SAMPLING, WORKFLOW, REPORT_SCHEMAS, PERCENTAGE_CONVERSION, } from '../../constants.js';
|
|
30
31
|
export const checkCommand = new Command('check')
|
|
31
32
|
.description('Check MCP server schema and detect drift (free, fast, deterministic)')
|
|
@@ -73,14 +74,6 @@ export const checkCommand = new Command('check')
|
|
|
73
74
|
output.error(error instanceof Error ? error.message : String(error));
|
|
74
75
|
process.exit(EXIT_CODES.ERROR);
|
|
75
76
|
}
|
|
76
|
-
const warnings = getConfigWarnings(config);
|
|
77
|
-
if (warnings.length > 0) {
|
|
78
|
-
output.warn('Configuration warnings:');
|
|
79
|
-
for (const warning of warnings) {
|
|
80
|
-
output.warn(` - ${warning}`);
|
|
81
|
-
}
|
|
82
|
-
output.newline();
|
|
83
|
-
}
|
|
84
77
|
// Extract settings from config
|
|
85
78
|
const timeout = config.server.timeout;
|
|
86
79
|
const outputDir = config.output.dir;
|
|
@@ -105,7 +98,8 @@ export const checkCommand = new Command('check')
|
|
|
105
98
|
minimumSeverity: options.minSeverity ?? config.baseline.severity.minimumSeverity,
|
|
106
99
|
failOnSeverity: options.failOnSeverity ?? config.baseline.severity.failOnSeverity,
|
|
107
100
|
suppressWarnings: config.baseline.severity.suppressWarnings,
|
|
108
|
-
aspectOverrides: config.baseline.severity
|
|
101
|
+
aspectOverrides: config.baseline.severity
|
|
102
|
+
.aspectOverrides,
|
|
109
103
|
};
|
|
110
104
|
// Resolve check options from config (no CLI overrides for these)
|
|
111
105
|
const incrementalEnabled = config.check.incremental;
|
|
@@ -114,9 +108,26 @@ export const checkCommand = new Command('check')
|
|
|
114
108
|
const parallelWorkers = config.check.parallelWorkers;
|
|
115
109
|
const performanceThreshold = config.check.performanceThreshold / PERCENTAGE_CONVERSION.DIVISOR;
|
|
116
110
|
const diffFormat = options.format ?? config.check.diffFormat;
|
|
111
|
+
const machineReadableFormats = new Set(['json', 'junit', 'sarif']);
|
|
112
|
+
const machineReadable = machineReadableFormats.has(String(diffFormat).toLowerCase());
|
|
113
|
+
if (machineReadable) {
|
|
114
|
+
// Suppress standard CLI output to keep stdout clean for machine-readable formats.
|
|
115
|
+
output.configureOutput({ quiet: true });
|
|
116
|
+
}
|
|
117
|
+
const warnings = getConfigWarnings(config);
|
|
118
|
+
if (warnings.length > 0) {
|
|
119
|
+
output.warn('Configuration warnings:');
|
|
120
|
+
for (const warning of warnings) {
|
|
121
|
+
output.warn(` - ${warning}`);
|
|
122
|
+
}
|
|
123
|
+
if (!machineReadable) {
|
|
124
|
+
output.newline();
|
|
125
|
+
}
|
|
126
|
+
}
|
|
117
127
|
// Resolve security options from config
|
|
118
128
|
const securityEnabled = config.check.security.enabled;
|
|
119
|
-
let securityCategories = config.check.security
|
|
129
|
+
let securityCategories = config.check.security
|
|
130
|
+
.categories;
|
|
120
131
|
// Validate security categories
|
|
121
132
|
try {
|
|
122
133
|
securityCategories = parseSecurityCategories(securityCategories.join(','));
|
|
@@ -141,20 +152,22 @@ export const checkCommand = new Command('check')
|
|
|
141
152
|
? `${serverCommand} ${args.join(' ')}`.trim()
|
|
142
153
|
: (remoteUrl ?? 'unknown');
|
|
143
154
|
// Display startup banner
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
155
|
+
if (!machineReadable) {
|
|
156
|
+
const banner = formatCheckBanner({
|
|
157
|
+
serverCommand: serverIdentifier,
|
|
158
|
+
});
|
|
159
|
+
output.info(banner);
|
|
160
|
+
output.newline();
|
|
161
|
+
output.info('Check: Schema validation and drift detection (free, deterministic)');
|
|
162
|
+
output.newline();
|
|
163
|
+
}
|
|
151
164
|
// Initialize metrics collector
|
|
152
165
|
resetMetricsCollector();
|
|
153
166
|
const metricsCollector = getMetricsCollector();
|
|
154
167
|
metricsCollector.startInterview();
|
|
155
168
|
// Initialize cache
|
|
156
169
|
resetGlobalCache();
|
|
157
|
-
const cache = getGlobalCache({ enabled: cacheEnabled });
|
|
170
|
+
const cache = getGlobalCache({ enabled: cacheEnabled, dir: config.cache.dir });
|
|
158
171
|
if (cacheEnabled && verbose) {
|
|
159
172
|
output.info('Response caching enabled');
|
|
160
173
|
}
|
|
@@ -182,9 +195,12 @@ export const checkCommand = new Command('check')
|
|
|
182
195
|
}
|
|
183
196
|
// Discovery phase
|
|
184
197
|
output.info('Discovering capabilities...');
|
|
185
|
-
const discovery = await discover(mcpClient, transport === 'stdio' ? serverCommand : remoteUrl ?? serverCommand, transport === 'stdio' ? args : []);
|
|
198
|
+
const discovery = await discover(mcpClient, transport === 'stdio' ? serverCommand : (remoteUrl ?? serverCommand), transport === 'stdio' ? args : []);
|
|
186
199
|
const resourceCount = discovery.resources?.length ?? 0;
|
|
187
|
-
const discoveryParts = [
|
|
200
|
+
const discoveryParts = [
|
|
201
|
+
`${discovery.tools.length} tools`,
|
|
202
|
+
`${discovery.prompts.length} prompts`,
|
|
203
|
+
];
|
|
188
204
|
if (resourceCount > 0) {
|
|
189
205
|
discoveryParts.push(`${resourceCount} resources`);
|
|
190
206
|
}
|
|
@@ -228,7 +244,9 @@ export const checkCommand = new Command('check')
|
|
|
228
244
|
}
|
|
229
245
|
else {
|
|
230
246
|
incrementalBaseline = loadBaseline(baselinePath);
|
|
231
|
-
const result = analyzeForIncremental(discovery.tools, incrementalBaseline, {
|
|
247
|
+
const result = analyzeForIncremental(discovery.tools, incrementalBaseline, {
|
|
248
|
+
maxCacheAgeHours: incrementalCacheHours,
|
|
249
|
+
});
|
|
232
250
|
incrementalResult = result;
|
|
233
251
|
const summary = formatIncrementalSummary(result.changeSummary);
|
|
234
252
|
output.info(`Incremental analysis: ${summary}`);
|
|
@@ -240,7 +258,7 @@ export const checkCommand = new Command('check')
|
|
|
240
258
|
else {
|
|
241
259
|
output.info(`Testing ${result.toolsToTest.length} tools (${result.toolsToSkip.length} cached)\n`);
|
|
242
260
|
// Filter discovery to only include tools that need testing
|
|
243
|
-
discovery.tools = discovery.tools.filter(t => result.toolsToTest.includes(t.name));
|
|
261
|
+
discovery.tools = discovery.tools.filter((t) => result.toolsToTest.includes(t.name));
|
|
244
262
|
}
|
|
245
263
|
}
|
|
246
264
|
}
|
|
@@ -323,7 +341,7 @@ export const checkCommand = new Command('check')
|
|
|
323
341
|
interviewer.setServerContext(serverContext);
|
|
324
342
|
}
|
|
325
343
|
// Set up progress display
|
|
326
|
-
const progressBar = new InterviewProgressBar({ enabled: !verbose });
|
|
344
|
+
const progressBar = new InterviewProgressBar({ enabled: !verbose && !machineReadable });
|
|
327
345
|
const reportedTools = new Set();
|
|
328
346
|
const progressCallback = (progress) => {
|
|
329
347
|
if (verbose) {
|
|
@@ -365,6 +383,8 @@ export const checkCommand = new Command('check')
|
|
|
365
383
|
};
|
|
366
384
|
output.info('Checking schemas...\n');
|
|
367
385
|
const result = await interviewer.interview(mcpClient, discovery, progressCallback);
|
|
386
|
+
const insights = buildInterviewInsights(result);
|
|
387
|
+
const enrichedResult = { ...result, ...insights };
|
|
368
388
|
progressBar.stop();
|
|
369
389
|
if (!verbose) {
|
|
370
390
|
output.newline();
|
|
@@ -431,7 +451,7 @@ export const checkCommand = new Command('check')
|
|
|
431
451
|
output.info(`Rate-limited tools: ${rateLimit.tools.slice(0, 5).join(', ')}${rateLimit.tools.length > 5 ? ' ...' : ''}`);
|
|
432
452
|
}
|
|
433
453
|
}
|
|
434
|
-
const checkSummary = buildCheckSummary(
|
|
454
|
+
const checkSummary = buildCheckSummary(enrichedResult);
|
|
435
455
|
output.newline();
|
|
436
456
|
output.lines(...checkSummary.lines);
|
|
437
457
|
if (checkSummary.nextSteps.length > 0) {
|
|
@@ -462,7 +482,7 @@ export const checkCommand = new Command('check')
|
|
|
462
482
|
try {
|
|
463
483
|
const response = await mcpClient.callTool(tool.name, args);
|
|
464
484
|
const content = response.content
|
|
465
|
-
.map((c) => c.type === 'text' ? c.text : '')
|
|
485
|
+
.map((c) => (c.type === 'text' ? c.text : ''))
|
|
466
486
|
.join('\n');
|
|
467
487
|
return {
|
|
468
488
|
isError: response.isError ?? false,
|
|
@@ -580,7 +600,7 @@ export const checkCommand = new Command('check')
|
|
|
580
600
|
const workflowResult = await workflowExecutor.execute(workflow);
|
|
581
601
|
workflowResults.push(workflowResult);
|
|
582
602
|
const statusIcon = workflowResult.success ? '\u2713' : '\u2717';
|
|
583
|
-
const stepsInfo = `${workflowResult.steps.filter(s => s.success).length}/${workflow.steps.length} steps`;
|
|
603
|
+
const stepsInfo = `${workflowResult.steps.filter((s) => s.success).length}/${workflow.steps.length} steps`;
|
|
584
604
|
if (workflowResult.success) {
|
|
585
605
|
output.success(` ${statusIcon} ${workflow.name} (${stepsInfo}) - ${workflowResult.durationMs}ms`);
|
|
586
606
|
}
|
|
@@ -599,7 +619,7 @@ export const checkCommand = new Command('check')
|
|
|
599
619
|
}
|
|
600
620
|
}
|
|
601
621
|
// Workflow summary
|
|
602
|
-
const passed = workflowResults.filter(r => r.success).length;
|
|
622
|
+
const passed = workflowResults.filter((r) => r.success).length;
|
|
603
623
|
const failed = workflowResults.length - passed;
|
|
604
624
|
output.newline();
|
|
605
625
|
if (failed === 0) {
|
|
@@ -612,12 +632,25 @@ export const checkCommand = new Command('check')
|
|
|
612
632
|
}
|
|
613
633
|
// Generate documentation (after security testing so findings can be included)
|
|
614
634
|
output.info('Generating documentation...');
|
|
615
|
-
const writeDocs = outputFormat === 'both' || outputFormat === '
|
|
635
|
+
const writeDocs = outputFormat === 'both' || outputFormat === 'docs';
|
|
616
636
|
const writeJson = outputFormat === 'both' || outputFormat === 'json';
|
|
617
637
|
if (writeDocs) {
|
|
618
|
-
const
|
|
638
|
+
const semanticMap = insights.semanticInferences
|
|
639
|
+
? new Map(Object.entries(insights.semanticInferences))
|
|
640
|
+
: undefined;
|
|
641
|
+
const schemaEvolutionMap = insights.schemaEvolution
|
|
642
|
+
? new Map(Object.entries(insights.schemaEvolution))
|
|
643
|
+
: undefined;
|
|
644
|
+
const errorAnalysisMap = insights.errorAnalysisSummaries
|
|
645
|
+
? new Map(Object.entries(insights.errorAnalysisSummaries))
|
|
646
|
+
: undefined;
|
|
647
|
+
const contractMd = generateContractMd(enrichedResult, {
|
|
619
648
|
securityFingerprints: securityEnabled ? securityFingerprints : undefined,
|
|
620
649
|
workflowResults: workflowResults.length > 0 ? workflowResults : undefined,
|
|
650
|
+
semanticInferences: semanticMap,
|
|
651
|
+
schemaEvolution: schemaEvolutionMap,
|
|
652
|
+
errorAnalysisSummaries: errorAnalysisMap,
|
|
653
|
+
documentationScore: insights.documentationScore,
|
|
621
654
|
exampleLength,
|
|
622
655
|
fullExamples,
|
|
623
656
|
maxExamplesPerTool,
|
|
@@ -631,13 +664,12 @@ export const checkCommand = new Command('check')
|
|
|
631
664
|
}
|
|
632
665
|
if (writeJson) {
|
|
633
666
|
// Add workflow results to the result object for the JSON report
|
|
634
|
-
const resultWithWorkflows = workflowResults.length > 0
|
|
635
|
-
? { ...result, workflowResults }
|
|
636
|
-
: result;
|
|
667
|
+
const resultWithWorkflows = workflowResults.length > 0 ? { ...enrichedResult, workflowResults } : enrichedResult;
|
|
637
668
|
let jsonReport;
|
|
638
669
|
try {
|
|
639
670
|
jsonReport = generateJsonReport(resultWithWorkflows, {
|
|
640
671
|
schemaUrl: REPORT_SCHEMAS.CHECK_REPORT_SCHEMA_URL,
|
|
672
|
+
schemaPath: REPORT_SCHEMAS.CHECK_REPORT_SCHEMA_FILE,
|
|
641
673
|
validate: true,
|
|
642
674
|
});
|
|
643
675
|
}
|
|
@@ -650,7 +682,7 @@ export const checkCommand = new Command('check')
|
|
|
650
682
|
output.info(`Written: ${jsonPath}`);
|
|
651
683
|
}
|
|
652
684
|
// Create baseline from results
|
|
653
|
-
let currentBaseline = createBaseline(
|
|
685
|
+
let currentBaseline = createBaseline(enrichedResult, fullServerCommand);
|
|
654
686
|
// Attach security fingerprints to tool fingerprints if security testing was run
|
|
655
687
|
if (securityEnabled && securityFingerprints.size > 0) {
|
|
656
688
|
currentBaseline = {
|
|
@@ -671,10 +703,7 @@ export const checkCommand = new Command('check')
|
|
|
671
703
|
if (incrementalResult && incrementalResult.cachedFingerprints.length > 0) {
|
|
672
704
|
// Merge new fingerprints with cached ones
|
|
673
705
|
const cachedTools = incrementalResult.cachedFingerprints.map(toToolCapability);
|
|
674
|
-
const mergedTools = [
|
|
675
|
-
...currentBaseline.capabilities.tools,
|
|
676
|
-
...cachedTools,
|
|
677
|
-
].sort((a, b) => a.name.localeCompare(b.name));
|
|
706
|
+
const mergedTools = [...currentBaseline.capabilities.tools, ...cachedTools].sort((a, b) => a.name.localeCompare(b.name));
|
|
678
707
|
currentBaseline = {
|
|
679
708
|
...currentBaseline,
|
|
680
709
|
capabilities: {
|
|
@@ -773,7 +802,9 @@ export const checkCommand = new Command('check')
|
|
|
773
802
|
if (!baselinePath) {
|
|
774
803
|
const formattedCheckResults = formatCheckResults(currentBaseline, diffFormat);
|
|
775
804
|
if (formattedCheckResults) {
|
|
776
|
-
|
|
805
|
+
if (!machineReadable) {
|
|
806
|
+
output.info('\n--- Check Results ---');
|
|
807
|
+
}
|
|
777
808
|
// Output directly to stdout for machine-readable formats
|
|
778
809
|
console.log(formattedCheckResults);
|
|
779
810
|
}
|
|
@@ -790,10 +821,17 @@ export const checkCommand = new Command('check')
|
|
|
790
821
|
});
|
|
791
822
|
// Apply severity configuration (filtering, overrides)
|
|
792
823
|
const diff = applySeverityConfig(rawDiff, severityConfig);
|
|
793
|
-
|
|
824
|
+
if (!machineReadable) {
|
|
825
|
+
output.info('\n--- Drift Report ---');
|
|
826
|
+
}
|
|
794
827
|
// Select formatter based on --format option
|
|
795
828
|
const formattedDiff = formatDiff(diff, diffFormat, baselinePath);
|
|
796
|
-
|
|
829
|
+
if (machineReadable) {
|
|
830
|
+
console.log(formattedDiff);
|
|
831
|
+
}
|
|
832
|
+
else {
|
|
833
|
+
output.info(formattedDiff);
|
|
834
|
+
}
|
|
797
835
|
// Report performance regressions if detected
|
|
798
836
|
if (diff.performanceReport?.hasRegressions) {
|
|
799
837
|
output.warn('\n--- Performance Regressions ---');
|
|
@@ -936,7 +974,7 @@ function formatDiff(diff, format, baselinePath) {
|
|
|
936
974
|
function formatCheckResultsJUnit(baseline) {
|
|
937
975
|
const tools = getToolFingerprints(baseline);
|
|
938
976
|
const lines = [];
|
|
939
|
-
const securityFailures = tools.filter(t => t.securityFingerprint?.findings?.some(f => f.riskLevel === 'critical' || f.riskLevel === 'high')).length;
|
|
977
|
+
const securityFailures = tools.filter((t) => t.securityFingerprint?.findings?.some((f) => f.riskLevel === 'critical' || f.riskLevel === 'high')).length;
|
|
940
978
|
lines.push('<?xml version="1.0" encoding="UTF-8"?>');
|
|
941
979
|
lines.push('<testsuites>');
|
|
942
980
|
lines.push(` <testsuite name="bellwether-check" tests="${tools.length}" failures="${securityFailures}" errors="0">`);
|
|
@@ -951,16 +989,16 @@ function formatCheckResultsJUnit(baseline) {
|
|
|
951
989
|
lines.push(' </testcase>');
|
|
952
990
|
}
|
|
953
991
|
// Add security findings as test cases if present
|
|
954
|
-
const securityTools = tools.filter(t => t.securityFingerprint?.findings?.length);
|
|
992
|
+
const securityTools = tools.filter((t) => t.securityFingerprint?.findings?.length);
|
|
955
993
|
if (securityTools.length > 0) {
|
|
956
994
|
lines.push(` <!-- Security findings -->`);
|
|
957
995
|
for (const tool of securityTools) {
|
|
958
996
|
const findings = tool.securityFingerprint?.findings ?? [];
|
|
959
|
-
const criticalHigh = findings.filter(f => f.riskLevel === 'critical' || f.riskLevel === 'high').length;
|
|
997
|
+
const criticalHigh = findings.filter((f) => f.riskLevel === 'critical' || f.riskLevel === 'high').length;
|
|
960
998
|
if (criticalHigh > 0) {
|
|
961
999
|
lines.push(` <testcase name="${tool.name}-security" classname="security">`);
|
|
962
1000
|
lines.push(` <failure message="${criticalHigh} critical/high security findings">`);
|
|
963
|
-
for (const finding of findings.filter(f => f.riskLevel === 'critical' || f.riskLevel === 'high')) {
|
|
1001
|
+
for (const finding of findings.filter((f) => f.riskLevel === 'critical' || f.riskLevel === 'high')) {
|
|
964
1002
|
lines.push(` ${finding.riskLevel.toUpperCase()}: ${finding.title} (${finding.cweId})`);
|
|
965
1003
|
}
|
|
966
1004
|
lines.push(` </failure>`);
|
|
@@ -981,7 +1019,7 @@ function formatCheckResultsSarif(baseline) {
|
|
|
981
1019
|
const serverUri = baseline.metadata?.serverCommand || baseline.server.name || 'mcp-server';
|
|
982
1020
|
const results = [];
|
|
983
1021
|
// Add results for tools with security findings
|
|
984
|
-
const securityTools = tools.filter(t => t.securityFingerprint?.findings?.length);
|
|
1022
|
+
const securityTools = tools.filter((t) => t.securityFingerprint?.findings?.length);
|
|
985
1023
|
for (const tool of securityTools) {
|
|
986
1024
|
const findings = tool.securityFingerprint?.findings ?? [];
|
|
987
1025
|
for (const finding of findings) {
|
|
@@ -994,12 +1032,14 @@ function formatCheckResultsSarif(baseline) {
|
|
|
994
1032
|
ruleId: finding.cweId || 'BWH-SEC',
|
|
995
1033
|
level,
|
|
996
1034
|
message: { text: `[${tool.name}] ${finding.title}: ${finding.description}` },
|
|
997
|
-
locations: [
|
|
1035
|
+
locations: [
|
|
1036
|
+
{
|
|
998
1037
|
physicalLocation: {
|
|
999
1038
|
artifactLocation: { uri: serverUri },
|
|
1000
1039
|
region: { startLine: 1 },
|
|
1001
1040
|
},
|
|
1002
|
-
}
|
|
1041
|
+
},
|
|
1042
|
+
],
|
|
1003
1043
|
});
|
|
1004
1044
|
}
|
|
1005
1045
|
}
|
|
@@ -1010,20 +1050,25 @@ function formatCheckResultsSarif(baseline) {
|
|
|
1010
1050
|
results.push({
|
|
1011
1051
|
ruleId: 'BWH-REL',
|
|
1012
1052
|
level: 'warning',
|
|
1013
|
-
message: {
|
|
1014
|
-
|
|
1053
|
+
message: {
|
|
1054
|
+
text: `Tool "${tool.name}" has ${(successRate * 100).toFixed(0)}% success rate`,
|
|
1055
|
+
},
|
|
1056
|
+
locations: [
|
|
1057
|
+
{
|
|
1015
1058
|
physicalLocation: {
|
|
1016
1059
|
artifactLocation: { uri: serverUri },
|
|
1017
1060
|
region: { startLine: 1 },
|
|
1018
1061
|
},
|
|
1019
|
-
}
|
|
1062
|
+
},
|
|
1063
|
+
],
|
|
1020
1064
|
});
|
|
1021
1065
|
}
|
|
1022
1066
|
}
|
|
1023
1067
|
const sarif = {
|
|
1024
1068
|
$schema: 'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json',
|
|
1025
1069
|
version: '2.1.0',
|
|
1026
|
-
runs: [
|
|
1070
|
+
runs: [
|
|
1071
|
+
{
|
|
1027
1072
|
tool: {
|
|
1028
1073
|
driver: {
|
|
1029
1074
|
name: 'bellwether',
|
|
@@ -1046,7 +1091,8 @@ function formatCheckResultsSarif(baseline) {
|
|
|
1046
1091
|
},
|
|
1047
1092
|
},
|
|
1048
1093
|
results,
|
|
1049
|
-
}
|
|
1094
|
+
},
|
|
1095
|
+
],
|
|
1050
1096
|
};
|
|
1051
1097
|
return JSON.stringify(sarif, null, 2);
|
|
1052
1098
|
}
|