@dotsetlabs/bellwether 1.0.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/CHANGELOG.md +97 -0
  2. package/README.md +9 -2
  3. package/dist/baseline/accessors.d.ts +1 -1
  4. package/dist/baseline/accessors.js +1 -3
  5. package/dist/baseline/baseline-format.d.ts +287 -0
  6. package/dist/baseline/baseline-format.js +12 -0
  7. package/dist/baseline/comparator.js +249 -11
  8. package/dist/baseline/converter.d.ts +15 -15
  9. package/dist/baseline/converter.js +46 -34
  10. package/dist/baseline/diff.d.ts +1 -1
  11. package/dist/baseline/diff.js +45 -28
  12. package/dist/baseline/error-analyzer.d.ts +1 -1
  13. package/dist/baseline/error-analyzer.js +90 -17
  14. package/dist/baseline/incremental-checker.js +8 -5
  15. package/dist/baseline/index.d.ts +2 -12
  16. package/dist/baseline/index.js +3 -23
  17. package/dist/baseline/performance-tracker.d.ts +0 -1
  18. package/dist/baseline/performance-tracker.js +13 -20
  19. package/dist/baseline/response-fingerprint.js +39 -2
  20. package/dist/baseline/saver.js +41 -10
  21. package/dist/baseline/schema-compare.d.ts +22 -0
  22. package/dist/baseline/schema-compare.js +259 -16
  23. package/dist/baseline/types.d.ts +10 -7
  24. package/dist/cache/response-cache.d.ts +12 -2
  25. package/dist/cache/response-cache.js +178 -30
  26. package/dist/cli/commands/check.js +100 -54
  27. package/dist/cli/commands/explore.js +34 -14
  28. package/dist/cli/index.js +13 -3
  29. package/dist/config/template.js +8 -7
  30. package/dist/config/validator.d.ts +59 -59
  31. package/dist/config/validator.js +245 -90
  32. package/dist/constants/core.d.ts +4 -0
  33. package/dist/constants/core.js +8 -19
  34. package/dist/constants/registry.d.ts +17 -0
  35. package/dist/constants/registry.js +18 -0
  36. package/dist/constants/testing.d.ts +0 -369
  37. package/dist/constants/testing.js +18 -456
  38. package/dist/constants.d.ts +1 -1
  39. package/dist/constants.js +1 -1
  40. package/dist/docs/contract.js +131 -83
  41. package/dist/docs/report.js +8 -5
  42. package/dist/interview/insights.d.ts +17 -0
  43. package/dist/interview/insights.js +52 -0
  44. package/dist/interview/interviewer.js +119 -57
  45. package/dist/interview/orchestrator.js +49 -22
  46. package/dist/interview/prompt-test-generator.d.ts +12 -0
  47. package/dist/interview/prompt-test-generator.js +77 -0
  48. package/dist/interview/resource-test-generator.d.ts +12 -0
  49. package/dist/interview/resource-test-generator.js +20 -0
  50. package/dist/interview/schema-inferrer.js +26 -4
  51. package/dist/interview/schema-test-generator.js +278 -31
  52. package/dist/interview/stateful-test-runner.d.ts +3 -0
  53. package/dist/interview/stateful-test-runner.js +80 -0
  54. package/dist/interview/types.d.ts +12 -0
  55. package/dist/llm/anthropic.js +49 -16
  56. package/dist/llm/client.d.ts +2 -0
  57. package/dist/llm/client.js +61 -0
  58. package/dist/llm/ollama.js +9 -4
  59. package/dist/llm/openai.js +34 -23
  60. package/dist/transport/base-transport.d.ts +1 -1
  61. package/dist/transport/http-transport.d.ts +2 -2
  62. package/dist/transport/http-transport.js +26 -6
  63. package/dist/transport/mcp-client.d.ts +18 -6
  64. package/dist/transport/mcp-client.js +50 -20
  65. package/dist/transport/sse-transport.d.ts +8 -4
  66. package/dist/transport/sse-transport.js +161 -69
  67. package/dist/transport/stdio-transport.d.ts +1 -1
  68. package/dist/transport/stdio-transport.js +1 -1
  69. package/dist/utils/timeout.d.ts +10 -2
  70. package/dist/utils/timeout.js +9 -5
  71. package/dist/version.js +1 -1
  72. package/dist/workflow/executor.js +18 -13
  73. package/dist/workflow/loader.js +4 -1
  74. package/dist/workflow/state-tracker.js +22 -18
  75. package/man/bellwether.1 +204 -0
  76. package/man/bellwether.1.md +148 -0
  77. package/package.json +6 -7
  78. package/schemas/bellwether-check.schema.json +185 -0
  79. package/schemas/bellwether-explore.schema.json +837 -0
  80. package/scripts/completions/bellwether.bash +10 -4
  81. package/scripts/completions/bellwether.zsh +55 -2
@@ -3,6 +3,8 @@
3
3
  * Enables reuse of tool call results and LLM analysis across personas.
4
4
  */
5
5
  import { createHash } from 'crypto';
6
+ import { existsSync, mkdirSync, readFileSync, readdirSync, unlinkSync, writeFileSync } from 'fs';
7
+ import { join } from 'path';
6
8
  import { getLogger } from '../logging/logger.js';
7
9
  import { TIME_CONSTANTS, CACHE } from '../constants.js';
8
10
  const logger = getLogger('response-cache');
@@ -18,33 +20,27 @@ export class ResponseCache {
18
20
  evictions: 0,
19
21
  };
20
22
  totalSizeBytes = 0;
23
+ cacheDir;
21
24
  constructor(config = {}) {
22
25
  this.config = {
23
26
  defaultTTLMs: config.defaultTTLMs ?? TIME_CONSTANTS.DEFAULT_CACHE_TTL,
24
27
  maxEntries: config.maxEntries ?? CACHE.MAX_ENTRIES,
25
28
  maxSizeBytes: config.maxSizeBytes ?? 50 * 1024 * 1024, // 50MB
26
29
  enabled: config.enabled ?? true,
30
+ dir: config.dir ?? '',
27
31
  };
32
+ this.cacheDir = this.config.enabled ? this.config.dir || undefined : undefined;
33
+ if (this.cacheDir) {
34
+ this.ensureCacheDir(this.cacheDir);
35
+ }
28
36
  }
29
37
  /**
30
38
  * Generate a cache key from input data.
31
39
  */
32
40
  generateKey(...parts) {
33
- const serialized = parts.map((p) => {
34
- if (typeof p === 'string')
35
- return p;
36
- if (typeof p === 'undefined')
37
- return 'undefined';
38
- if (p === null)
39
- return 'null';
40
- try {
41
- return JSON.stringify(p, Object.keys(p).sort());
42
- }
43
- catch {
44
- return String(p);
45
- }
46
- }).join('|');
47
- return createHash('sha256').update(serialized).digest('hex').slice(0, 16);
41
+ const serialized = parts.map((p) => stableStringify(p)).join('|');
42
+ // Use 128-bit hash (32 hex chars) to reduce collision risk.
43
+ return createHash('sha256').update(serialized).digest('hex').slice(0, 32);
48
44
  }
49
45
  /**
50
46
  * Get an entry from cache.
@@ -55,6 +51,13 @@ export class ResponseCache {
55
51
  }
56
52
  const entry = this.cache.get(key);
57
53
  if (!entry) {
54
+ const diskEntry = this.loadFromDisk(key);
55
+ if (diskEntry) {
56
+ this.cache.set(key, diskEntry);
57
+ this.totalSizeBytes += this.estimateSize(diskEntry.value);
58
+ this.stats.hits++;
59
+ return diskEntry.value;
60
+ }
58
61
  this.stats.misses++;
59
62
  return undefined;
60
63
  }
@@ -66,6 +69,7 @@ export class ResponseCache {
66
69
  return undefined;
67
70
  }
68
71
  entry.hitCount++;
72
+ entry.lastAccessedAt = new Date();
69
73
  this.stats.hits++;
70
74
  logger.debug({ key, hitCount: entry.hitCount }, 'Cache hit');
71
75
  return entry.value;
@@ -86,6 +90,7 @@ export class ResponseCache {
86
90
  const entry = {
87
91
  value,
88
92
  createdAt: now,
93
+ lastAccessedAt: now,
89
94
  expiresAt: new Date(now.getTime() + ttl),
90
95
  key,
91
96
  description: options?.description,
@@ -99,6 +104,7 @@ export class ResponseCache {
99
104
  this.totalSizeBytes += entrySize;
100
105
  this.cache.set(key, entry);
101
106
  logger.debug({ key, ttlMs: ttl, description: options?.description }, 'Cache entry set');
107
+ this.saveToDisk(entry);
102
108
  }
103
109
  /**
104
110
  * Check if key exists and is not expired.
@@ -125,8 +131,10 @@ export class ResponseCache {
125
131
  if (entry) {
126
132
  this.totalSizeBytes -= this.estimateSize(entry.value);
127
133
  this.cache.delete(key);
134
+ this.deleteFromDisk(key);
128
135
  return true;
129
136
  }
137
+ this.deleteFromDisk(key);
130
138
  return false;
131
139
  }
132
140
  /**
@@ -135,6 +143,16 @@ export class ResponseCache {
135
143
  clear() {
136
144
  this.cache.clear();
137
145
  this.totalSizeBytes = 0;
146
+ if (this.cacheDir && existsSync(this.cacheDir)) {
147
+ try {
148
+ for (const file of listCacheFiles(this.cacheDir)) {
149
+ unlinkSync(file);
150
+ }
151
+ }
152
+ catch {
153
+ // Ignore disk cleanup errors
154
+ }
155
+ }
138
156
  logger.debug('Cache cleared');
139
157
  }
140
158
  /**
@@ -168,31 +186,30 @@ export class ResponseCache {
168
186
  evictIfNeeded(newEntrySize) {
169
187
  // Check entry count
170
188
  while (this.cache.size >= this.config.maxEntries) {
171
- this.evictOldest();
189
+ this.evictLeastRecentlyUsed();
172
190
  }
173
191
  // Check size
174
- while (this.totalSizeBytes + newEntrySize > this.config.maxSizeBytes &&
175
- this.cache.size > 0) {
176
- this.evictOldest();
192
+ while (this.totalSizeBytes + newEntrySize > this.config.maxSizeBytes && this.cache.size > 0) {
193
+ this.evictLeastRecentlyUsed();
177
194
  }
178
195
  }
179
196
  /**
180
- * Evict the oldest entry (LRU based on creation time).
197
+ * Evict the least recently used entry (LRU based on last access time).
181
198
  */
182
- evictOldest() {
183
- let oldestKey;
184
- let oldestTime = Infinity;
199
+ evictLeastRecentlyUsed() {
200
+ let lruKey;
201
+ let oldestAccessTime = Infinity;
185
202
  for (const [key, entry] of this.cache) {
186
- const time = entry.createdAt.getTime();
187
- if (time < oldestTime) {
188
- oldestTime = time;
189
- oldestKey = key;
203
+ const time = entry.lastAccessedAt.getTime();
204
+ if (time < oldestAccessTime) {
205
+ oldestAccessTime = time;
206
+ lruKey = key;
190
207
  }
191
208
  }
192
- if (oldestKey) {
193
- this.delete(oldestKey);
209
+ if (lruKey) {
210
+ this.delete(lruKey);
194
211
  this.stats.evictions++;
195
- logger.debug({ key: oldestKey }, 'Evicted cache entry');
212
+ logger.debug({ key: lruKey }, 'Evicted cache entry');
196
213
  }
197
214
  }
198
215
  /**
@@ -206,6 +223,137 @@ export class ResponseCache {
206
223
  return 1000; // Default estimate for non-serializable values
207
224
  }
208
225
  }
226
+ ensureCacheDir(dir) {
227
+ try {
228
+ if (!existsSync(dir)) {
229
+ mkdirSync(dir, { recursive: true });
230
+ }
231
+ }
232
+ catch (error) {
233
+ logger.warn({ dir, error: String(error) }, 'Failed to create cache directory');
234
+ this.cacheDir = undefined;
235
+ }
236
+ }
237
+ getCachePath(key) {
238
+ if (!this.cacheDir)
239
+ return null;
240
+ return join(this.cacheDir, `${key}.json`);
241
+ }
242
+ saveToDisk(entry) {
243
+ const path = this.getCachePath(entry.key);
244
+ if (!path)
245
+ return;
246
+ try {
247
+ const serialized = JSON.stringify({
248
+ ...entry,
249
+ createdAt: entry.createdAt.toISOString(),
250
+ lastAccessedAt: entry.lastAccessedAt.toISOString(),
251
+ expiresAt: entry.expiresAt.toISOString(),
252
+ });
253
+ writeFileSync(path, serialized, 'utf-8');
254
+ }
255
+ catch (error) {
256
+ logger.debug({ key: entry.key, error: String(error) }, 'Failed to persist cache entry');
257
+ }
258
+ }
259
+ loadFromDisk(key) {
260
+ const path = this.getCachePath(key);
261
+ if (!path || !existsSync(path))
262
+ return null;
263
+ try {
264
+ const raw = readFileSync(path, 'utf-8');
265
+ const parsed = JSON.parse(raw);
266
+ const entry = {
267
+ ...parsed,
268
+ createdAt: new Date(parsed.createdAt),
269
+ lastAccessedAt: new Date(parsed.lastAccessedAt),
270
+ expiresAt: new Date(parsed.expiresAt),
271
+ };
272
+ if (new Date() > entry.expiresAt) {
273
+ this.deleteFromDisk(key);
274
+ return null;
275
+ }
276
+ entry.hitCount = (entry.hitCount ?? 0) + 1;
277
+ entry.lastAccessedAt = new Date();
278
+ this.saveToDisk(entry);
279
+ return entry;
280
+ }
281
+ catch (error) {
282
+ logger.debug({ key, error: String(error) }, 'Failed to load cache entry');
283
+ return null;
284
+ }
285
+ }
286
+ deleteFromDisk(key) {
287
+ const path = this.getCachePath(key);
288
+ if (!path || !existsSync(path))
289
+ return;
290
+ try {
291
+ unlinkSync(path);
292
+ }
293
+ catch {
294
+ // Ignore delete errors
295
+ }
296
+ }
297
+ }
298
+ function listCacheFiles(dir) {
299
+ try {
300
+ const entries = readdirSync(dir, { withFileTypes: true });
301
+ return entries
302
+ .filter((entry) => entry.isFile())
303
+ .map((entry) => join(dir, entry.name));
304
+ }
305
+ catch {
306
+ return [];
307
+ }
308
+ }
309
+ /**
310
+ * Stable, deterministic JSON stringify with deep key sorting.
311
+ * Falls back to string conversion for unsupported types.
312
+ */
313
+ function stableStringify(value) {
314
+ const seen = new WeakSet();
315
+ const normalize = (input) => {
316
+ if (input === null || input === undefined)
317
+ return input;
318
+ const type = typeof input;
319
+ if (type === 'string' || type === 'number' || type === 'boolean') {
320
+ return input;
321
+ }
322
+ if (type === 'bigint') {
323
+ return input.toString();
324
+ }
325
+ if (type === 'symbol' || type === 'function') {
326
+ return String(input);
327
+ }
328
+ if (input instanceof Date) {
329
+ return input.toISOString();
330
+ }
331
+ if (Array.isArray(input)) {
332
+ return input.map((item) => normalize(item));
333
+ }
334
+ if (typeof input === 'object') {
335
+ const obj = input;
336
+ if (seen.has(obj)) {
337
+ return '[Circular]';
338
+ }
339
+ seen.add(obj);
340
+ const keys = Object.keys(obj).sort();
341
+ const normalized = {};
342
+ for (const key of keys) {
343
+ normalized[key] = normalize(obj[key]);
344
+ }
345
+ return normalized;
346
+ }
347
+ try {
348
+ return JSON.parse(JSON.stringify(input));
349
+ }
350
+ catch {
351
+ return String(input);
352
+ }
353
+ };
354
+ const normalized = normalize(value);
355
+ const json = JSON.stringify(normalized);
356
+ return json === undefined ? 'undefined' : json;
209
357
  }
210
358
  /**
211
359
  * Specialized cache for tool responses.
@@ -13,7 +13,7 @@ import { MCPClient } from '../../transport/mcp-client.js';
13
13
  import { discover } from '../../discovery/discovery.js';
14
14
  import { Interviewer } from '../../interview/interviewer.js';
15
15
  import { generateContractMd, generateJsonReport } from '../../docs/generator.js';
16
- import { loadConfig, ConfigNotFoundError, parseCommandString } from '../../config/loader.js';
16
+ import { loadConfig, ConfigNotFoundError, parseCommandString, } from '../../config/loader.js';
17
17
  import { validateConfigForCheck, getConfigWarnings } from '../../config/validator.js';
18
18
  import { createBaseline, loadBaseline, saveBaseline, getToolFingerprints, toToolCapability, compareBaselines, acceptDrift, formatDiffText, formatDiffJson, formatDiffCompact, formatDiffGitHubActions, formatDiffMarkdown, formatDiffJUnit, formatDiffSarif, applySeverityConfig, shouldFailOnDiff, analyzeForIncremental, formatIncrementalSummary, runSecurityTests, parseSecurityCategories, getAllSecurityCategories, } from '../../baseline/index.js';
19
19
  import { convertAssertions } from '../../baseline/converter.js';
@@ -21,11 +21,12 @@ import { getMetricsCollector, resetMetricsCollector } from '../../metrics/collec
21
21
  import { getGlobalCache, resetGlobalCache } from '../../cache/response-cache.js';
22
22
  import { InterviewProgressBar, formatCheckBanner } from '../utils/progress.js';
23
23
  import { buildCheckSummary, colorizeConfidence, formatConfidenceLevel, formatToolResultLine, } from '../output/terminal-reporter.js';
24
- import { loadScenariosFromFile, tryLoadDefaultScenarios, DEFAULT_SCENARIOS_FILE } from '../../scenarios/index.js';
24
+ import { loadScenariosFromFile, tryLoadDefaultScenarios, DEFAULT_SCENARIOS_FILE, } from '../../scenarios/index.js';
25
25
  import { loadWorkflowsFromFile, tryLoadDefaultWorkflows, DEFAULT_WORKFLOWS_FILE, WorkflowExecutor, generateWorkflowsFromTools, generateWorkflowYamlContent, } from '../../workflow/index.js';
26
26
  import * as output from '../output.js';
27
27
  import { extractServerContextFromArgs } from '../utils/server-context.js';
28
28
  import { configureLogger } from '../../logging/logger.js';
29
+ import { buildInterviewInsights } from '../../interview/insights.js';
29
30
  import { EXIT_CODES, SEVERITY_TO_EXIT_CODE, PATHS, SECURITY_TESTING, CHECK_SAMPLING, WORKFLOW, REPORT_SCHEMAS, PERCENTAGE_CONVERSION, } from '../../constants.js';
30
31
  export const checkCommand = new Command('check')
31
32
  .description('Check MCP server schema and detect drift (free, fast, deterministic)')
@@ -73,14 +74,6 @@ export const checkCommand = new Command('check')
73
74
  output.error(error instanceof Error ? error.message : String(error));
74
75
  process.exit(EXIT_CODES.ERROR);
75
76
  }
76
- const warnings = getConfigWarnings(config);
77
- if (warnings.length > 0) {
78
- output.warn('Configuration warnings:');
79
- for (const warning of warnings) {
80
- output.warn(` - ${warning}`);
81
- }
82
- output.newline();
83
- }
84
77
  // Extract settings from config
85
78
  const timeout = config.server.timeout;
86
79
  const outputDir = config.output.dir;
@@ -105,7 +98,8 @@ export const checkCommand = new Command('check')
105
98
  minimumSeverity: options.minSeverity ?? config.baseline.severity.minimumSeverity,
106
99
  failOnSeverity: options.failOnSeverity ?? config.baseline.severity.failOnSeverity,
107
100
  suppressWarnings: config.baseline.severity.suppressWarnings,
108
- aspectOverrides: config.baseline.severity.aspectOverrides,
101
+ aspectOverrides: config.baseline.severity
102
+ .aspectOverrides,
109
103
  };
110
104
  // Resolve check options from config (no CLI overrides for these)
111
105
  const incrementalEnabled = config.check.incremental;
@@ -114,9 +108,26 @@ export const checkCommand = new Command('check')
114
108
  const parallelWorkers = config.check.parallelWorkers;
115
109
  const performanceThreshold = config.check.performanceThreshold / PERCENTAGE_CONVERSION.DIVISOR;
116
110
  const diffFormat = options.format ?? config.check.diffFormat;
111
+ const machineReadableFormats = new Set(['json', 'junit', 'sarif']);
112
+ const machineReadable = machineReadableFormats.has(String(diffFormat).toLowerCase());
113
+ if (machineReadable) {
114
+ // Suppress standard CLI output to keep stdout clean for machine-readable formats.
115
+ output.configureOutput({ quiet: true });
116
+ }
117
+ const warnings = getConfigWarnings(config);
118
+ if (warnings.length > 0) {
119
+ output.warn('Configuration warnings:');
120
+ for (const warning of warnings) {
121
+ output.warn(` - ${warning}`);
122
+ }
123
+ if (!machineReadable) {
124
+ output.newline();
125
+ }
126
+ }
117
127
  // Resolve security options from config
118
128
  const securityEnabled = config.check.security.enabled;
119
- let securityCategories = config.check.security.categories;
129
+ let securityCategories = config.check.security
130
+ .categories;
120
131
  // Validate security categories
121
132
  try {
122
133
  securityCategories = parseSecurityCategories(securityCategories.join(','));
@@ -141,20 +152,22 @@ export const checkCommand = new Command('check')
141
152
  ? `${serverCommand} ${args.join(' ')}`.trim()
142
153
  : (remoteUrl ?? 'unknown');
143
154
  // Display startup banner
144
- const banner = formatCheckBanner({
145
- serverCommand: serverIdentifier,
146
- });
147
- output.info(banner);
148
- output.newline();
149
- output.info('Check: Schema validation and drift detection (free, deterministic)');
150
- output.newline();
155
+ if (!machineReadable) {
156
+ const banner = formatCheckBanner({
157
+ serverCommand: serverIdentifier,
158
+ });
159
+ output.info(banner);
160
+ output.newline();
161
+ output.info('Check: Schema validation and drift detection (free, deterministic)');
162
+ output.newline();
163
+ }
151
164
  // Initialize metrics collector
152
165
  resetMetricsCollector();
153
166
  const metricsCollector = getMetricsCollector();
154
167
  metricsCollector.startInterview();
155
168
  // Initialize cache
156
169
  resetGlobalCache();
157
- const cache = getGlobalCache({ enabled: cacheEnabled });
170
+ const cache = getGlobalCache({ enabled: cacheEnabled, dir: config.cache.dir });
158
171
  if (cacheEnabled && verbose) {
159
172
  output.info('Response caching enabled');
160
173
  }
@@ -182,9 +195,12 @@ export const checkCommand = new Command('check')
182
195
  }
183
196
  // Discovery phase
184
197
  output.info('Discovering capabilities...');
185
- const discovery = await discover(mcpClient, transport === 'stdio' ? serverCommand : remoteUrl ?? serverCommand, transport === 'stdio' ? args : []);
198
+ const discovery = await discover(mcpClient, transport === 'stdio' ? serverCommand : (remoteUrl ?? serverCommand), transport === 'stdio' ? args : []);
186
199
  const resourceCount = discovery.resources?.length ?? 0;
187
- const discoveryParts = [`${discovery.tools.length} tools`, `${discovery.prompts.length} prompts`];
200
+ const discoveryParts = [
201
+ `${discovery.tools.length} tools`,
202
+ `${discovery.prompts.length} prompts`,
203
+ ];
188
204
  if (resourceCount > 0) {
189
205
  discoveryParts.push(`${resourceCount} resources`);
190
206
  }
@@ -228,7 +244,9 @@ export const checkCommand = new Command('check')
228
244
  }
229
245
  else {
230
246
  incrementalBaseline = loadBaseline(baselinePath);
231
- const result = analyzeForIncremental(discovery.tools, incrementalBaseline, { maxCacheAgeHours: incrementalCacheHours });
247
+ const result = analyzeForIncremental(discovery.tools, incrementalBaseline, {
248
+ maxCacheAgeHours: incrementalCacheHours,
249
+ });
232
250
  incrementalResult = result;
233
251
  const summary = formatIncrementalSummary(result.changeSummary);
234
252
  output.info(`Incremental analysis: ${summary}`);
@@ -240,7 +258,7 @@ export const checkCommand = new Command('check')
240
258
  else {
241
259
  output.info(`Testing ${result.toolsToTest.length} tools (${result.toolsToSkip.length} cached)\n`);
242
260
  // Filter discovery to only include tools that need testing
243
- discovery.tools = discovery.tools.filter(t => result.toolsToTest.includes(t.name));
261
+ discovery.tools = discovery.tools.filter((t) => result.toolsToTest.includes(t.name));
244
262
  }
245
263
  }
246
264
  }
@@ -323,7 +341,7 @@ export const checkCommand = new Command('check')
323
341
  interviewer.setServerContext(serverContext);
324
342
  }
325
343
  // Set up progress display
326
- const progressBar = new InterviewProgressBar({ enabled: !verbose });
344
+ const progressBar = new InterviewProgressBar({ enabled: !verbose && !machineReadable });
327
345
  const reportedTools = new Set();
328
346
  const progressCallback = (progress) => {
329
347
  if (verbose) {
@@ -365,6 +383,8 @@ export const checkCommand = new Command('check')
365
383
  };
366
384
  output.info('Checking schemas...\n');
367
385
  const result = await interviewer.interview(mcpClient, discovery, progressCallback);
386
+ const insights = buildInterviewInsights(result);
387
+ const enrichedResult = { ...result, ...insights };
368
388
  progressBar.stop();
369
389
  if (!verbose) {
370
390
  output.newline();
@@ -431,7 +451,7 @@ export const checkCommand = new Command('check')
431
451
  output.info(`Rate-limited tools: ${rateLimit.tools.slice(0, 5).join(', ')}${rateLimit.tools.length > 5 ? ' ...' : ''}`);
432
452
  }
433
453
  }
434
- const checkSummary = buildCheckSummary(result);
454
+ const checkSummary = buildCheckSummary(enrichedResult);
435
455
  output.newline();
436
456
  output.lines(...checkSummary.lines);
437
457
  if (checkSummary.nextSteps.length > 0) {
@@ -462,7 +482,7 @@ export const checkCommand = new Command('check')
462
482
  try {
463
483
  const response = await mcpClient.callTool(tool.name, args);
464
484
  const content = response.content
465
- .map((c) => c.type === 'text' ? c.text : '')
485
+ .map((c) => (c.type === 'text' ? c.text : ''))
466
486
  .join('\n');
467
487
  return {
468
488
  isError: response.isError ?? false,
@@ -580,7 +600,7 @@ export const checkCommand = new Command('check')
580
600
  const workflowResult = await workflowExecutor.execute(workflow);
581
601
  workflowResults.push(workflowResult);
582
602
  const statusIcon = workflowResult.success ? '\u2713' : '\u2717';
583
- const stepsInfo = `${workflowResult.steps.filter(s => s.success).length}/${workflow.steps.length} steps`;
603
+ const stepsInfo = `${workflowResult.steps.filter((s) => s.success).length}/${workflow.steps.length} steps`;
584
604
  if (workflowResult.success) {
585
605
  output.success(` ${statusIcon} ${workflow.name} (${stepsInfo}) - ${workflowResult.durationMs}ms`);
586
606
  }
@@ -599,7 +619,7 @@ export const checkCommand = new Command('check')
599
619
  }
600
620
  }
601
621
  // Workflow summary
602
- const passed = workflowResults.filter(r => r.success).length;
622
+ const passed = workflowResults.filter((r) => r.success).length;
603
623
  const failed = workflowResults.length - passed;
604
624
  output.newline();
605
625
  if (failed === 0) {
@@ -612,12 +632,25 @@ export const checkCommand = new Command('check')
612
632
  }
613
633
  // Generate documentation (after security testing so findings can be included)
614
634
  output.info('Generating documentation...');
615
- const writeDocs = outputFormat === 'both' || outputFormat === 'agents.md';
635
+ const writeDocs = outputFormat === 'both' || outputFormat === 'docs';
616
636
  const writeJson = outputFormat === 'both' || outputFormat === 'json';
617
637
  if (writeDocs) {
618
- const contractMd = generateContractMd(result, {
638
+ const semanticMap = insights.semanticInferences
639
+ ? new Map(Object.entries(insights.semanticInferences))
640
+ : undefined;
641
+ const schemaEvolutionMap = insights.schemaEvolution
642
+ ? new Map(Object.entries(insights.schemaEvolution))
643
+ : undefined;
644
+ const errorAnalysisMap = insights.errorAnalysisSummaries
645
+ ? new Map(Object.entries(insights.errorAnalysisSummaries))
646
+ : undefined;
647
+ const contractMd = generateContractMd(enrichedResult, {
619
648
  securityFingerprints: securityEnabled ? securityFingerprints : undefined,
620
649
  workflowResults: workflowResults.length > 0 ? workflowResults : undefined,
650
+ semanticInferences: semanticMap,
651
+ schemaEvolution: schemaEvolutionMap,
652
+ errorAnalysisSummaries: errorAnalysisMap,
653
+ documentationScore: insights.documentationScore,
621
654
  exampleLength,
622
655
  fullExamples,
623
656
  maxExamplesPerTool,
@@ -631,13 +664,12 @@ export const checkCommand = new Command('check')
631
664
  }
632
665
  if (writeJson) {
633
666
  // Add workflow results to the result object for the JSON report
634
- const resultWithWorkflows = workflowResults.length > 0
635
- ? { ...result, workflowResults }
636
- : result;
667
+ const resultWithWorkflows = workflowResults.length > 0 ? { ...enrichedResult, workflowResults } : enrichedResult;
637
668
  let jsonReport;
638
669
  try {
639
670
  jsonReport = generateJsonReport(resultWithWorkflows, {
640
671
  schemaUrl: REPORT_SCHEMAS.CHECK_REPORT_SCHEMA_URL,
672
+ schemaPath: REPORT_SCHEMAS.CHECK_REPORT_SCHEMA_FILE,
641
673
  validate: true,
642
674
  });
643
675
  }
@@ -650,7 +682,7 @@ export const checkCommand = new Command('check')
650
682
  output.info(`Written: ${jsonPath}`);
651
683
  }
652
684
  // Create baseline from results
653
- let currentBaseline = createBaseline(result, fullServerCommand);
685
+ let currentBaseline = createBaseline(enrichedResult, fullServerCommand);
654
686
  // Attach security fingerprints to tool fingerprints if security testing was run
655
687
  if (securityEnabled && securityFingerprints.size > 0) {
656
688
  currentBaseline = {
@@ -671,10 +703,7 @@ export const checkCommand = new Command('check')
671
703
  if (incrementalResult && incrementalResult.cachedFingerprints.length > 0) {
672
704
  // Merge new fingerprints with cached ones
673
705
  const cachedTools = incrementalResult.cachedFingerprints.map(toToolCapability);
674
- const mergedTools = [
675
- ...currentBaseline.capabilities.tools,
676
- ...cachedTools,
677
- ].sort((a, b) => a.name.localeCompare(b.name));
706
+ const mergedTools = [...currentBaseline.capabilities.tools, ...cachedTools].sort((a, b) => a.name.localeCompare(b.name));
678
707
  currentBaseline = {
679
708
  ...currentBaseline,
680
709
  capabilities: {
@@ -773,7 +802,9 @@ export const checkCommand = new Command('check')
773
802
  if (!baselinePath) {
774
803
  const formattedCheckResults = formatCheckResults(currentBaseline, diffFormat);
775
804
  if (formattedCheckResults) {
776
- output.info('\n--- Check Results ---');
805
+ if (!machineReadable) {
806
+ output.info('\n--- Check Results ---');
807
+ }
777
808
  // Output directly to stdout for machine-readable formats
778
809
  console.log(formattedCheckResults);
779
810
  }
@@ -790,10 +821,17 @@ export const checkCommand = new Command('check')
790
821
  });
791
822
  // Apply severity configuration (filtering, overrides)
792
823
  const diff = applySeverityConfig(rawDiff, severityConfig);
793
- output.info('\n--- Drift Report ---');
824
+ if (!machineReadable) {
825
+ output.info('\n--- Drift Report ---');
826
+ }
794
827
  // Select formatter based on --format option
795
828
  const formattedDiff = formatDiff(diff, diffFormat, baselinePath);
796
- output.info(formattedDiff);
829
+ if (machineReadable) {
830
+ console.log(formattedDiff);
831
+ }
832
+ else {
833
+ output.info(formattedDiff);
834
+ }
797
835
  // Report performance regressions if detected
798
836
  if (diff.performanceReport?.hasRegressions) {
799
837
  output.warn('\n--- Performance Regressions ---');
@@ -936,7 +974,7 @@ function formatDiff(diff, format, baselinePath) {
936
974
  function formatCheckResultsJUnit(baseline) {
937
975
  const tools = getToolFingerprints(baseline);
938
976
  const lines = [];
939
- const securityFailures = tools.filter(t => t.securityFingerprint?.findings?.some(f => f.riskLevel === 'critical' || f.riskLevel === 'high')).length;
977
+ const securityFailures = tools.filter((t) => t.securityFingerprint?.findings?.some((f) => f.riskLevel === 'critical' || f.riskLevel === 'high')).length;
940
978
  lines.push('<?xml version="1.0" encoding="UTF-8"?>');
941
979
  lines.push('<testsuites>');
942
980
  lines.push(` <testsuite name="bellwether-check" tests="${tools.length}" failures="${securityFailures}" errors="0">`);
@@ -951,16 +989,16 @@ function formatCheckResultsJUnit(baseline) {
951
989
  lines.push(' </testcase>');
952
990
  }
953
991
  // Add security findings as test cases if present
954
- const securityTools = tools.filter(t => t.securityFingerprint?.findings?.length);
992
+ const securityTools = tools.filter((t) => t.securityFingerprint?.findings?.length);
955
993
  if (securityTools.length > 0) {
956
994
  lines.push(` <!-- Security findings -->`);
957
995
  for (const tool of securityTools) {
958
996
  const findings = tool.securityFingerprint?.findings ?? [];
959
- const criticalHigh = findings.filter(f => f.riskLevel === 'critical' || f.riskLevel === 'high').length;
997
+ const criticalHigh = findings.filter((f) => f.riskLevel === 'critical' || f.riskLevel === 'high').length;
960
998
  if (criticalHigh > 0) {
961
999
  lines.push(` <testcase name="${tool.name}-security" classname="security">`);
962
1000
  lines.push(` <failure message="${criticalHigh} critical/high security findings">`);
963
- for (const finding of findings.filter(f => f.riskLevel === 'critical' || f.riskLevel === 'high')) {
1001
+ for (const finding of findings.filter((f) => f.riskLevel === 'critical' || f.riskLevel === 'high')) {
964
1002
  lines.push(` ${finding.riskLevel.toUpperCase()}: ${finding.title} (${finding.cweId})`);
965
1003
  }
966
1004
  lines.push(` </failure>`);
@@ -981,7 +1019,7 @@ function formatCheckResultsSarif(baseline) {
981
1019
  const serverUri = baseline.metadata?.serverCommand || baseline.server.name || 'mcp-server';
982
1020
  const results = [];
983
1021
  // Add results for tools with security findings
984
- const securityTools = tools.filter(t => t.securityFingerprint?.findings?.length);
1022
+ const securityTools = tools.filter((t) => t.securityFingerprint?.findings?.length);
985
1023
  for (const tool of securityTools) {
986
1024
  const findings = tool.securityFingerprint?.findings ?? [];
987
1025
  for (const finding of findings) {
@@ -994,12 +1032,14 @@ function formatCheckResultsSarif(baseline) {
994
1032
  ruleId: finding.cweId || 'BWH-SEC',
995
1033
  level,
996
1034
  message: { text: `[${tool.name}] ${finding.title}: ${finding.description}` },
997
- locations: [{
1035
+ locations: [
1036
+ {
998
1037
  physicalLocation: {
999
1038
  artifactLocation: { uri: serverUri },
1000
1039
  region: { startLine: 1 },
1001
1040
  },
1002
- }],
1041
+ },
1042
+ ],
1003
1043
  });
1004
1044
  }
1005
1045
  }
@@ -1010,20 +1050,25 @@ function formatCheckResultsSarif(baseline) {
1010
1050
  results.push({
1011
1051
  ruleId: 'BWH-REL',
1012
1052
  level: 'warning',
1013
- message: { text: `Tool "${tool.name}" has ${(successRate * 100).toFixed(0)}% success rate` },
1014
- locations: [{
1053
+ message: {
1054
+ text: `Tool "${tool.name}" has ${(successRate * 100).toFixed(0)}% success rate`,
1055
+ },
1056
+ locations: [
1057
+ {
1015
1058
  physicalLocation: {
1016
1059
  artifactLocation: { uri: serverUri },
1017
1060
  region: { startLine: 1 },
1018
1061
  },
1019
- }],
1062
+ },
1063
+ ],
1020
1064
  });
1021
1065
  }
1022
1066
  }
1023
1067
  const sarif = {
1024
1068
  $schema: 'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json',
1025
1069
  version: '2.1.0',
1026
- runs: [{
1070
+ runs: [
1071
+ {
1027
1072
  tool: {
1028
1073
  driver: {
1029
1074
  name: 'bellwether',
@@ -1046,7 +1091,8 @@ function formatCheckResultsSarif(baseline) {
1046
1091
  },
1047
1092
  },
1048
1093
  results,
1049
- }],
1094
+ },
1095
+ ],
1050
1096
  };
1051
1097
  return JSON.stringify(sarif, null, 2);
1052
1098
  }