@dotsetlabs/bellwether 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CHANGELOG.md +55 -0
  2. package/README.md +9 -0
  3. package/dist/auth/credentials.js +2 -0
  4. package/dist/baseline/accessors.js +12 -0
  5. package/dist/baseline/baseline-format.d.ts +48 -0
  6. package/dist/baseline/comparator.js +263 -20
  7. package/dist/baseline/converter.js +52 -4
  8. package/dist/baseline/dependency-analyzer.js +46 -25
  9. package/dist/baseline/diff.js +51 -39
  10. package/dist/baseline/documentation-scorer.d.ts +1 -1
  11. package/dist/baseline/documentation-scorer.js +4 -4
  12. package/dist/baseline/error-analyzer.js +1 -1
  13. package/dist/baseline/external-dependency-detector.js +16 -7
  14. package/dist/baseline/performance-tracker.js +2 -2
  15. package/dist/baseline/response-fingerprint.js +1 -1
  16. package/dist/baseline/response-schema-tracker.js +17 -22
  17. package/dist/baseline/saver.js +34 -0
  18. package/dist/baseline/types.d.ts +21 -1
  19. package/dist/cache/response-cache.js +9 -2
  20. package/dist/cli/commands/auth.js +15 -18
  21. package/dist/cli/commands/baseline-accept.js +1 -1
  22. package/dist/cli/commands/baseline.js +71 -36
  23. package/dist/cli/commands/check.js +54 -14
  24. package/dist/cli/commands/discover.js +2 -2
  25. package/dist/cli/commands/explore.js +38 -5
  26. package/dist/cli/commands/golden.js +20 -23
  27. package/dist/cli/commands/init.js +10 -7
  28. package/dist/cli/commands/registry.js +37 -35
  29. package/dist/cli/commands/watch.js +5 -5
  30. package/dist/cli/output/terminal-reporter.js +9 -9
  31. package/dist/cli/output.d.ts +1 -1
  32. package/dist/cli/output.js +9 -11
  33. package/dist/config/loader.js +2 -2
  34. package/dist/config/validator.d.ts +33 -33
  35. package/dist/constants/core.d.ts +4 -8
  36. package/dist/constants/core.js +4 -8
  37. package/dist/constants/testing.d.ts +11 -11
  38. package/dist/constants/testing.js +11 -11
  39. package/dist/contract/validator.js +7 -7
  40. package/dist/discovery/discovery.js +88 -14
  41. package/dist/discovery/types.d.ts +5 -1
  42. package/dist/docs/agents.js +145 -57
  43. package/dist/docs/contract.js +136 -40
  44. package/dist/errors/retry.js +11 -5
  45. package/dist/interview/dependency-resolver.d.ts +3 -2
  46. package/dist/interview/dependency-resolver.js +31 -2
  47. package/dist/interview/interviewer.js +10 -2
  48. package/dist/interview/rate-limiter.js +7 -3
  49. package/dist/interview/stateful-test-runner.d.ts +1 -0
  50. package/dist/interview/stateful-test-runner.js +4 -0
  51. package/dist/interview/types.d.ts +3 -0
  52. package/dist/llm/anthropic.js +14 -4
  53. package/dist/llm/fallback.d.ts +1 -0
  54. package/dist/llm/fallback.js +7 -1
  55. package/dist/llm/openai.js +15 -4
  56. package/dist/prompts/templates.js +30 -15
  57. package/dist/protocol/index.d.ts +2 -0
  58. package/dist/protocol/index.js +2 -0
  59. package/dist/protocol/version-registry.d.ts +66 -0
  60. package/dist/protocol/version-registry.js +159 -0
  61. package/dist/scenarios/evaluator.js +9 -10
  62. package/dist/transport/http-transport.d.ts +11 -1
  63. package/dist/transport/http-transport.js +21 -2
  64. package/dist/transport/mcp-client.d.ts +29 -1
  65. package/dist/transport/mcp-client.js +92 -7
  66. package/dist/transport/sse-transport.js +5 -4
  67. package/dist/transport/types.d.ts +134 -1
  68. package/dist/utils/concurrency.d.ts +2 -0
  69. package/dist/utils/concurrency.js +9 -2
  70. package/dist/utils/markdown.js +13 -18
  71. package/dist/utils/timeout.js +2 -1
  72. package/dist/version.js +1 -1
  73. package/man/bellwether.1 +1 -1
  74. package/man/bellwether.1.md +2 -2
  75. package/package.json +2 -1
@@ -27,7 +27,8 @@ import * as output from '../output.js';
27
27
  import { extractServerContextFromArgs } from '../utils/server-context.js';
28
28
  import { configureLogger } from '../../logging/logger.js';
29
29
  import { buildInterviewInsights } from '../../interview/insights.js';
30
- import { EXIT_CODES, SEVERITY_TO_EXIT_CODE, PATHS, SECURITY_TESTING, CHECK_SAMPLING, WORKFLOW, REPORT_SCHEMAS, PERCENTAGE_CONVERSION, } from '../../constants.js';
30
+ import { EXIT_CODES, SEVERITY_TO_EXIT_CODE, PATHS, SECURITY_TESTING, CHECK_SAMPLING, WORKFLOW, REPORT_SCHEMAS, PERCENTAGE_CONVERSION, MCP, } from '../../constants.js';
31
+ import { getFeatureFlags, getExcludedFeatureNames } from '../../protocol/index.js';
31
32
  export const checkCommand = new Command('check')
32
33
  .description('Check MCP server schema and detect drift (free, fast, deterministic)')
33
34
  .allowUnknownOption() // Allow server flags like -y for npx to pass through
@@ -177,6 +178,7 @@ export const checkCommand = new Command('check')
177
178
  debug: logLevel === 'debug',
178
179
  transport,
179
180
  });
181
+ let pendingExitCode;
180
182
  try {
181
183
  // Connect to MCP server
182
184
  output.info('Connecting to MCP server...');
@@ -197,6 +199,7 @@ export const checkCommand = new Command('check')
197
199
  output.info('Discovering capabilities...');
198
200
  const discovery = await discover(mcpClient, transport === 'stdio' ? serverCommand : (remoteUrl ?? serverCommand), transport === 'stdio' ? args : []);
199
201
  const resourceCount = discovery.resources?.length ?? 0;
202
+ const resourceTemplateCount = discovery.resourceTemplates?.length ?? 0;
200
203
  const discoveryParts = [
201
204
  `${discovery.tools.length} tools`,
202
205
  `${discovery.prompts.length} prompts`,
@@ -204,11 +207,34 @@ export const checkCommand = new Command('check')
204
207
  if (resourceCount > 0) {
205
208
  discoveryParts.push(`${resourceCount} resources`);
206
209
  }
210
+ if (resourceTemplateCount > 0) {
211
+ discoveryParts.push(`${resourceTemplateCount} resource templates`);
212
+ }
207
213
  output.info(`Found ${discoveryParts.join(', ')}\n`);
214
+ // Show server instructions if provided
215
+ if (discovery.instructions) {
216
+ output.info(`Server instructions: ${discovery.instructions}\n`);
217
+ }
218
+ // Show protocol version context
219
+ const features = getFeatureFlags(discovery.protocolVersion);
220
+ if (discovery.protocolVersion !== MCP.PROTOCOL_VERSION) {
221
+ output.info(`Protocol Version: ${discovery.protocolVersion} (bellwether supports up to ${MCP.PROTOCOL_VERSION})`);
222
+ const excluded = getExcludedFeatureNames(discovery.protocolVersion);
223
+ if (excluded.length > 0) {
224
+ output.info(` Version-gated features excluded: ${excluded.join(', ')}`);
225
+ }
226
+ }
227
+ // Show new capabilities (completions, tasks) — gated by protocol version
228
+ if (discovery.capabilities.completions && features.completions) {
229
+ output.info('Server supports: Completions (autocomplete)');
230
+ }
231
+ if (discovery.capabilities.tasks && features.tasks) {
232
+ output.info('Server supports: Tasks');
233
+ }
208
234
  // Output discovery warnings (Issue D: anomaly detection)
209
235
  if (discovery.warnings && discovery.warnings.length > 0) {
210
236
  for (const warning of discovery.warnings) {
211
- output.warn(`⚠ ${warning.message}`);
237
+ output.warn(`[warn] ${warning.message}`);
212
238
  }
213
239
  output.newline();
214
240
  }
@@ -217,7 +243,7 @@ export const checkCommand = new Command('check')
217
243
  output.warn('Transport errors during discovery:');
218
244
  for (const err of discovery.transportErrors.slice(0, 3)) {
219
245
  const typeLabel = err.category.replace(/_/g, ' ');
220
- output.warn(` ${typeLabel}: ${err.message.substring(0, 100)}`);
246
+ output.warn(` [fail] ${typeLabel}: ${err.message.substring(0, 100)}`);
221
247
  }
222
248
  if (discovery.transportErrors.length > 3) {
223
249
  output.warn(` ... and ${discovery.transportErrors.length - 3} more`);
@@ -327,6 +353,7 @@ export const checkCommand = new Command('check')
327
353
  externalServices: config.check.externalServices,
328
354
  assertions: config.check.assertions,
329
355
  rateLimit: config.check.rateLimit,
356
+ testFixtures: config.check.testFixtures,
330
357
  });
331
358
  // Log sampling configuration
332
359
  if (minSamples > CHECK_SAMPLING.DEFAULT_MIN_SAMPLES) {
@@ -403,7 +430,7 @@ export const checkCommand = new Command('check')
403
430
  if (result.scenarioResults && result.scenarioResults.length > 0) {
404
431
  const passed = result.scenarioResults.filter((r) => r.passed).length;
405
432
  const failed = result.scenarioResults.length - passed;
406
- const statusIcon = failed === 0 ? '\u2713' : '\u2717';
433
+ const statusIcon = failed === 0 ? '[PASS]' : '[FAIL]';
407
434
  output.info(`\nCustom scenarios: ${passed}/${result.scenarioResults.length} passed ${statusIcon}`);
408
435
  if (failed > 0) {
409
436
  output.info('\nFailed scenarios:');
@@ -599,7 +626,7 @@ export const checkCommand = new Command('check')
599
626
  try {
600
627
  const workflowResult = await workflowExecutor.execute(workflow);
601
628
  workflowResults.push(workflowResult);
602
- const statusIcon = workflowResult.success ? '\u2713' : '\u2717';
629
+ const statusIcon = workflowResult.success ? '[PASS]' : '[FAIL]';
603
630
  const stepsInfo = `${workflowResult.steps.filter((s) => s.success).length}/${workflow.steps.length} steps`;
604
631
  if (workflowResult.success) {
605
632
  output.success(` ${statusIcon} ${workflow.name} (${stepsInfo}) - ${workflowResult.durationMs}ms`);
@@ -615,7 +642,7 @@ export const checkCommand = new Command('check')
615
642
  }
616
643
  }
617
644
  catch (error) {
618
- output.error(` \u2717 ${workflow.name} - Error: ${error instanceof Error ? error.message : error}`);
645
+ output.error(` [FAIL] ${workflow.name} - Error: ${error instanceof Error ? error.message : error}`);
619
646
  }
620
647
  }
621
648
  // Workflow summary
@@ -881,35 +908,40 @@ export const checkCommand = new Command('check')
881
908
  else if (!options.acceptDrift) {
882
909
  // Check if diff meets failure threshold based on severity config
883
910
  const shouldFail = shouldFailOnDiff(diff, severityConfig.failOnSeverity);
884
- const exitCode = SEVERITY_TO_EXIT_CODE[diff.severity] ?? EXIT_CODES.CLEAN;
911
+ const driftExitCode = SEVERITY_TO_EXIT_CODE[diff.severity] ?? EXIT_CODES.CLEAN;
885
912
  if (diff.severity === 'breaking') {
886
913
  output.error('\nBreaking changes detected!');
887
914
  output.error('Use --accept-drift to accept these changes as intentional.');
888
915
  if (failOnDrift || shouldFail) {
889
- process.exit(exitCode);
916
+ pendingExitCode = driftExitCode;
917
+ return;
890
918
  }
891
919
  }
892
920
  else if (diff.severity === 'warning') {
893
921
  output.warn('\nWarning-level changes detected.');
894
922
  output.warn('Use --accept-drift to accept these changes as intentional.');
895
923
  if (failOnDrift || shouldFail) {
896
- process.exit(exitCode);
924
+ pendingExitCode = driftExitCode;
925
+ return;
897
926
  }
898
927
  }
899
928
  else if (diff.severity === 'info') {
900
929
  output.info('\nInfo-level changes detected (non-breaking).');
901
930
  if (shouldFail) {
902
- process.exit(exitCode);
931
+ pendingExitCode = driftExitCode;
932
+ return;
903
933
  }
904
934
  }
905
935
  // Exit with appropriate code based on severity
906
936
  // This provides semantic exit codes for CI/CD even when not failing
907
- process.exit(exitCode);
937
+ pendingExitCode = driftExitCode;
938
+ return;
908
939
  }
909
940
  }
910
941
  if (config.check.assertions.strict && (result.metadata.assertions?.failed ?? 0) > 0) {
911
942
  output.error('\nAssertion failures detected and check.assertions.strict is enabled.');
912
- process.exit(EXIT_CODES.ERROR);
943
+ pendingExitCode = EXIT_CODES.BREAKING;
944
+ return;
913
945
  }
914
946
  }
915
947
  catch (error) {
@@ -931,10 +963,18 @@ export const checkCommand = new Command('check')
931
963
  output.error(' - The server command was not found');
932
964
  output.error(' - Check that the command is installed and in PATH');
933
965
  }
934
- process.exit(EXIT_CODES.ERROR);
966
+ pendingExitCode = EXIT_CODES.ERROR;
935
967
  }
936
968
  finally {
937
- await mcpClient.disconnect();
969
+ try {
970
+ await mcpClient.disconnect();
971
+ }
972
+ catch {
973
+ /* ignore cleanup errors */
974
+ }
975
+ if (pendingExitCode !== undefined) {
976
+ process.exit(pendingExitCode);
977
+ }
938
978
  }
939
979
  });
940
980
  /**
@@ -60,7 +60,7 @@ async function discoverAction(command, args, options) {
60
60
  if (result.warnings && result.warnings.length > 0) {
61
61
  output.newline();
62
62
  for (const warning of result.warnings) {
63
- output.warn(`⚠ ${warning.message}`);
63
+ output.warn(`[warn] ${warning.message}`);
64
64
  }
65
65
  }
66
66
  // Output transport errors from discovery
@@ -69,7 +69,7 @@ async function discoverAction(command, args, options) {
69
69
  output.warn('Transport errors during discovery:');
70
70
  for (const err of result.transportErrors.slice(0, 3)) {
71
71
  const typeLabel = err.category.replace(/_/g, ' ');
72
- output.warn(` ${typeLabel}: ${err.message.substring(0, 100)}`);
72
+ output.warn(` [fail] ${typeLabel}: ${err.message.substring(0, 100)}`);
73
73
  }
74
74
  if (result.transportErrors.length > 3) {
75
75
  output.warn(` ... and ${result.transportErrors.length - 3} more`);
@@ -17,7 +17,8 @@ import { loadConfig, ConfigNotFoundError, parseCommandString, } from '../../conf
17
17
  import { validateConfigForExplore } from '../../config/validator.js';
18
18
  import { CostTracker, estimateInterviewCost, estimateInterviewTime, formatCostAndTimeEstimate, suggestOptimizations, formatOptimizationSuggestions, } from '../../cost/index.js';
19
19
  import { getMetricsCollector, resetMetricsCollector } from '../../metrics/collector.js';
20
- import { EXIT_CODES, WORKFLOW, PATHS, REPORT_SCHEMAS } from '../../constants.js';
20
+ import { EXIT_CODES, WORKFLOW, PATHS, REPORT_SCHEMAS, MCP } from '../../constants.js';
21
+ import { getExcludedFeatureNames } from '../../protocol/index.js';
21
22
  import { FallbackLLMClient } from '../../llm/fallback.js';
22
23
  import { getGlobalCache, resetGlobalCache } from '../../cache/response-cache.js';
23
24
  import { InterviewProgressBar, formatExploreBanner } from '../utils/progress.js';
@@ -159,6 +160,20 @@ export const exploreCommand = new Command('explore')
159
160
  output.error(' - Ollama: No API key needed (ensure Ollama is running)');
160
161
  process.exit(EXIT_CODES.ERROR);
161
162
  }
163
+ let pendingExitCode;
164
+ // Handle SIGINT/SIGTERM for graceful shutdown
165
+ const signalCleanup = async () => {
166
+ output.info('\n\nInterrupted. Cleaning up...');
167
+ try {
168
+ await mcpClient.disconnect();
169
+ }
170
+ catch {
171
+ /* ignore cleanup errors */
172
+ }
173
+ process.exit(EXIT_CODES.ERROR);
174
+ };
175
+ process.on('SIGINT', signalCleanup);
176
+ process.on('SIGTERM', signalCleanup);
162
177
  try {
163
178
  // Connect to MCP server
164
179
  output.info('Connecting to MCP server...');
@@ -183,6 +198,14 @@ export const exploreCommand = new Command('explore')
183
198
  discoveryParts.push(`${resourceCount} resources`);
184
199
  }
185
200
  output.info(`Found ${discoveryParts.join(', ')}\n`);
201
+ // Show protocol version context
202
+ if (discovery.protocolVersion !== MCP.PROTOCOL_VERSION) {
203
+ output.info(`Protocol Version: ${discovery.protocolVersion} (bellwether supports up to ${MCP.PROTOCOL_VERSION})`);
204
+ const excluded = getExcludedFeatureNames(discovery.protocolVersion);
205
+ if (excluded.length > 0) {
206
+ output.info(` Version-gated features excluded: ${excluded.join(', ')}`);
207
+ }
208
+ }
186
209
  // Update metrics
187
210
  metricsCollector.updateInterviewCounters({
188
211
  toolsDiscovered: discovery.tools.length,
@@ -423,7 +446,7 @@ export const exploreCommand = new Command('explore')
423
446
  if (result.scenarioResults && result.scenarioResults.length > 0) {
424
447
  const passed = result.scenarioResults.filter((r) => r.passed).length;
425
448
  const failed = result.scenarioResults.length - passed;
426
- const statusIcon = failed === 0 ? '\u2713' : '\u2717';
449
+ const statusIcon = failed === 0 ? '[PASS]' : '[FAIL]';
427
450
  output.info(`\nCustom scenarios: ${passed}/${result.scenarioResults.length} passed ${statusIcon}`);
428
451
  if (failed > 0) {
429
452
  output.info('\nFailed scenarios:');
@@ -441,7 +464,7 @@ export const exploreCommand = new Command('explore')
441
464
  if (result.workflowResults && result.workflowResults.length > 0) {
442
465
  const successful = result.workflowResults.filter((wr) => wr.success).length;
443
466
  const failed = result.workflowResults.length - successful;
444
- const statusIcon = failed === 0 ? '\u2713' : '\u2717';
467
+ const statusIcon = failed === 0 ? '[PASS]' : '[FAIL]';
445
468
  output.info(`\nWorkflows: ${successful}/${result.workflowResults.length} passed ${statusIcon}`);
446
469
  if (failed > 0) {
447
470
  output.info('\nFailed workflows:');
@@ -477,11 +500,21 @@ export const exploreCommand = new Command('explore')
477
500
  output.error(' - Missing or invalid API key');
478
501
  output.error(' - Run "bellwether auth" to configure API keys');
479
502
  }
480
- process.exit(EXIT_CODES.ERROR);
503
+ pendingExitCode = EXIT_CODES.ERROR;
481
504
  }
482
505
  finally {
506
+ process.removeListener('SIGINT', signalCleanup);
507
+ process.removeListener('SIGTERM', signalCleanup);
483
508
  restoreLogLevel();
484
- await mcpClient.disconnect();
509
+ try {
510
+ await mcpClient.disconnect();
511
+ }
512
+ catch {
513
+ /* ignore cleanup errors */
514
+ }
515
+ if (pendingExitCode !== undefined) {
516
+ process.exit(pendingExitCode);
517
+ }
485
518
  }
486
519
  });
487
520
  //# sourceMappingURL=explore.js.map
@@ -17,8 +17,7 @@ import { getGoldenStorePath, saveGoldenOutput, createGoldenOutput, listGoldenOut
17
17
  import * as output from '../output.js';
18
18
  import { EXIT_CODES, PATHS } from '../../constants.js';
19
19
  import { formatDateISO } from '../../utils/index.js';
20
- export const goldenCommand = new Command('golden')
21
- .description('Manage golden outputs for tool validation');
20
+ export const goldenCommand = new Command('golden').description('Manage golden outputs for tool validation');
22
21
  // Save command
23
22
  goldenCommand
24
23
  .command('save')
@@ -56,12 +55,8 @@ goldenCommand
56
55
  }
57
56
  const argsJson = options.args ?? config.golden.defaultArgs;
58
57
  const mode = options.mode ?? config.golden.mode;
59
- const normalizeTimestamps = options.normalizeTimestamps === false
60
- ? false
61
- : config.golden.normalizeTimestamps;
62
- const normalizeUuids = options.normalizeUuids === false
63
- ? false
64
- : config.golden.normalizeUuids;
58
+ const normalizeTimestamps = options.normalizeTimestamps === false ? false : config.golden.normalizeTimestamps;
59
+ const normalizeUuids = options.normalizeUuids === false ? false : config.golden.normalizeUuids;
65
60
  // Parse tool arguments
66
61
  let toolArgs;
67
62
  try {
@@ -100,10 +95,10 @@ goldenCommand
100
95
  await mcpClient.connect(serverCommand, args, config.server.env);
101
96
  // Discover tools
102
97
  const discovery = await discover(mcpClient, serverCommand, args);
103
- const tool = discovery.tools.find(t => t.name === options.tool);
98
+ const tool = discovery.tools.find((t) => t.name === options.tool);
104
99
  if (!tool) {
105
100
  output.error(`Tool not found: ${options.tool}`);
106
- output.info(`Available tools: ${discovery.tools.map(t => t.name).join(', ')}`);
101
+ output.info(`Available tools: ${discovery.tools.map((t) => t.name).join(', ')}`);
107
102
  process.exit(EXIT_CODES.ERROR);
108
103
  }
109
104
  // Call the tool
@@ -111,7 +106,7 @@ goldenCommand
111
106
  const response = await mcpClient.callTool(options.tool, toolArgs);
112
107
  if (response.isError) {
113
108
  output.error('Tool returned an error:');
114
- const textContent = response.content.find(c => c.type === 'text');
109
+ const textContent = response.content.find((c) => c.type === 'text');
115
110
  if (textContent && 'text' in textContent) {
116
111
  output.error(String(textContent.text));
117
112
  }
@@ -192,7 +187,7 @@ goldenCommand
192
187
  }
193
188
  const goldens = listGoldenOutputs(storePath);
194
189
  const filteredGoldens = options.tool
195
- ? goldens.filter(g => g.toolName === options.tool)
190
+ ? goldens.filter((g) => g.toolName === options.tool)
196
191
  : goldens;
197
192
  if (filteredGoldens.length === 0) {
198
193
  if (options.tool) {
@@ -222,14 +217,14 @@ goldenCommand
222
217
  const response = await mcpClient.callTool(golden.toolName, golden.inputArgs);
223
218
  const result = compareWithGolden(golden, response);
224
219
  results.push(result);
225
- const icon = result.passed ? '\u2713' : '\u2717';
220
+ const icon = result.passed ? '[PASS]' : '[FAIL]';
226
221
  if (result.passed) {
227
222
  output.success(` ${icon} ${result.summary}`);
228
223
  }
229
224
  else {
230
225
  output.error(` ${icon} ${result.summary}`);
231
- if (result.differences.filter(d => !d.allowed).length <= 5) {
232
- for (const diff of result.differences.filter(d => !d.allowed)) {
226
+ if (result.differences.filter((d) => !d.allowed).length <= 5) {
227
+ for (const diff of result.differences.filter((d) => !d.allowed)) {
233
228
  output.warn(` - ${diff.description} at ${diff.path}`);
234
229
  }
235
230
  }
@@ -242,22 +237,24 @@ goldenCommand
242
237
  severity: 'breaking',
243
238
  mode: golden.tolerance.mode,
244
239
  goldenCapturedAt: golden.capturedAt,
245
- differences: [{
240
+ differences: [
241
+ {
246
242
  type: 'changed',
247
243
  path: '$',
248
244
  expected: 'successful response',
249
245
  actual: `error: ${error instanceof Error ? error.message : String(error)}`,
250
246
  allowed: false,
251
247
  description: 'Tool call failed',
252
- }],
248
+ },
249
+ ],
253
250
  summary: `Tool call failed: ${error instanceof Error ? error.message : String(error)}`,
254
251
  });
255
- output.error(` \u2717 Tool call failed: ${error instanceof Error ? error.message : String(error)}`);
252
+ output.error(` [FAIL] Tool call failed: ${error instanceof Error ? error.message : String(error)}`);
256
253
  }
257
254
  }
258
255
  output.newline();
259
256
  // Summary
260
- const passed = results.filter(r => r.passed).length;
257
+ const passed = results.filter((r) => r.passed).length;
261
258
  const failed = results.length - passed;
262
259
  if (format === 'json') {
263
260
  output.info(JSON.stringify(results, null, 2));
@@ -372,13 +369,13 @@ function formatResultsMarkdown(results) {
372
369
  lines.push('| Tool | Status | Mode | Differences |');
373
370
  lines.push('|------|--------|------|-------------|');
374
371
  for (const result of results) {
375
- const status = result.passed ? 'Match' : `✗ ${result.severity}`;
376
- const diffCount = result.differences.filter(d => !d.allowed).length;
372
+ const status = result.passed ? 'Match' : `${result.severity}`;
373
+ const diffCount = result.differences.filter((d) => !d.allowed).length;
377
374
  lines.push(`| \`${result.toolName}\` | ${status} | ${result.mode} | ${diffCount} |`);
378
375
  }
379
376
  lines.push('');
380
377
  // Details for failed comparisons
381
- const failed = results.filter(r => !r.passed);
378
+ const failed = results.filter((r) => !r.passed);
382
379
  if (failed.length > 0) {
383
380
  lines.push('### Drift Details');
384
381
  lines.push('');
@@ -390,7 +387,7 @@ function formatResultsMarkdown(results) {
390
387
  lines.push(`**Severity:** ${result.severity}`);
391
388
  lines.push('');
392
389
  lines.push('**Changes:**');
393
- for (const diff of result.differences.filter(d => !d.allowed)) {
390
+ for (const diff of result.differences.filter((d) => !d.allowed)) {
394
391
  lines.push(`- ${diff.description}`);
395
392
  if (diff.expected !== undefined) {
396
393
  lines.push(` - Expected: \`${String(diff.expected)}\``);
@@ -17,12 +17,7 @@ import * as output from '../output.js';
17
17
  * Returns an array of variable names found.
18
18
  */
19
19
  function detectEnvVars(cwd) {
20
- const envExampleFiles = [
21
- '.env.example',
22
- '.env.sample',
23
- 'env.example',
24
- 'env.sample',
25
- ];
20
+ const envExampleFiles = ['.env.example', '.env.sample', 'env.example', 'env.sample'];
26
21
  for (const filename of envExampleFiles) {
27
22
  const filepath = join(cwd, filename);
28
23
  if (existsSync(filepath)) {
@@ -129,7 +124,15 @@ export const initCommand = new Command('init')
129
124
  });
130
125
  }
131
126
  // Write config file
132
- writeFileSync(configPath, content);
127
+ try {
128
+ writeFileSync(configPath, content);
129
+ }
130
+ catch (error) {
131
+ output.error(`Failed to write config file: ${error instanceof Error ? error.message : String(error)}`);
132
+ output.error(` Path: ${configPath}`);
133
+ output.error(' Check that the directory exists and you have write permissions.');
134
+ return;
135
+ }
133
136
  // Show success message
134
137
  output.success(`Created: ${configPath}`);
135
138
  output.newline();
@@ -3,7 +3,7 @@
3
3
  */
4
4
  import { Command } from 'commander';
5
5
  import chalk from 'chalk';
6
- import { RegistryClient, generateRunCommand, } from '../../registry/index.js';
6
+ import { RegistryClient, generateRunCommand } from '../../registry/index.js';
7
7
  import { EXIT_CODES } from '../../constants.js';
8
8
  import { loadConfig, ConfigNotFoundError } from '../../config/loader.js';
9
9
  import * as output from '../output.js';
@@ -108,7 +108,7 @@ function isLikelyEnvVar(name) {
108
108
  /auth/i,
109
109
  /^[A-Z][A-Z0-9_]+$/, // ALL_CAPS_PATTERN
110
110
  ];
111
- return envPatterns.some(pattern => pattern.test(name));
111
+ return envPatterns.some((pattern) => pattern.test(name));
112
112
  }
113
113
  /**
114
114
  * Extract likely environment variable name from argument.
@@ -160,36 +160,38 @@ function analyzeServerRequirements(entry) {
160
160
  // Only look at the actual server name part (after last /) to avoid false matches
161
161
  // e.g., "io.github.user/postgres" should match "postgres", not "github"
162
162
  const fullName = entry.server.name.toLowerCase();
163
- const serverNamePart = fullName.includes('/') ? fullName.split('/').pop() || fullName : fullName;
163
+ const serverNamePart = fullName.includes('/')
164
+ ? fullName.split('/').pop() || fullName
165
+ : fullName;
164
166
  const serviceEnvVars = {
165
- 'openai': ['OPENAI_API_KEY'],
166
- 'anthropic': ['ANTHROPIC_API_KEY'],
167
- 'github': ['GITHUB_TOKEN', 'GITHUB_PERSONAL_ACCESS_TOKEN'],
168
- 'gitlab': ['GITLAB_TOKEN', 'GITLAB_PERSONAL_ACCESS_TOKEN'],
169
- 'slack': ['SLACK_TOKEN', 'SLACK_BOT_TOKEN'],
170
- 'discord': ['DISCORD_TOKEN', 'DISCORD_BOT_TOKEN'],
171
- 'postgres': ['DATABASE_URL', 'POSTGRES_CONNECTION_STRING'],
172
- 'mysql': ['DATABASE_URL', 'MYSQL_CONNECTION_STRING'],
173
- 'redis': ['REDIS_URL'],
174
- 'mongodb': ['MONGODB_URI'],
175
- 'aws': ['AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'],
176
- 'azure': ['AZURE_SUBSCRIPTION_ID', 'AZURE_CLIENT_ID'],
177
- 'gcp': ['GOOGLE_APPLICATION_CREDENTIALS'],
178
- 'google': ['GOOGLE_API_KEY'],
179
- 'stripe': ['STRIPE_API_KEY'],
180
- 'twilio': ['TWILIO_ACCOUNT_SID', 'TWILIO_AUTH_TOKEN'],
181
- 'sendgrid': ['SENDGRID_API_KEY'],
182
- 'mailgun': ['MAILGUN_API_KEY'],
183
- 'firebase': ['FIREBASE_PROJECT_ID'],
184
- 'supabase': ['SUPABASE_URL', 'SUPABASE_KEY'],
185
- 'notion': ['NOTION_API_KEY'],
186
- 'airtable': ['AIRTABLE_API_KEY'],
187
- 'letta': ['LETTA_API_KEY'],
188
- 'brave': ['BRAVE_API_KEY'],
189
- 'puppeteer': [],
190
- 'playwright': [],
191
- 'filesystem': [],
192
- 'everything': [],
167
+ openai: ['OPENAI_API_KEY'],
168
+ anthropic: ['ANTHROPIC_API_KEY'],
169
+ github: ['GITHUB_TOKEN', 'GITHUB_PERSONAL_ACCESS_TOKEN'],
170
+ gitlab: ['GITLAB_TOKEN', 'GITLAB_PERSONAL_ACCESS_TOKEN'],
171
+ slack: ['SLACK_TOKEN', 'SLACK_BOT_TOKEN'],
172
+ discord: ['DISCORD_TOKEN', 'DISCORD_BOT_TOKEN'],
173
+ postgres: ['DATABASE_URL', 'POSTGRES_CONNECTION_STRING'],
174
+ mysql: ['DATABASE_URL', 'MYSQL_CONNECTION_STRING'],
175
+ redis: ['REDIS_URL'],
176
+ mongodb: ['MONGODB_URI'],
177
+ aws: ['AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'],
178
+ azure: ['AZURE_SUBSCRIPTION_ID', 'AZURE_CLIENT_ID'],
179
+ gcp: ['GOOGLE_APPLICATION_CREDENTIALS'],
180
+ google: ['GOOGLE_API_KEY'],
181
+ stripe: ['STRIPE_API_KEY'],
182
+ twilio: ['TWILIO_ACCOUNT_SID', 'TWILIO_AUTH_TOKEN'],
183
+ sendgrid: ['SENDGRID_API_KEY'],
184
+ mailgun: ['MAILGUN_API_KEY'],
185
+ firebase: ['FIREBASE_PROJECT_ID'],
186
+ supabase: ['SUPABASE_URL', 'SUPABASE_KEY'],
187
+ notion: ['NOTION_API_KEY'],
188
+ airtable: ['AIRTABLE_API_KEY'],
189
+ letta: ['LETTA_API_KEY'],
190
+ brave: ['BRAVE_API_KEY'],
191
+ puppeteer: [],
192
+ playwright: [],
193
+ filesystem: [],
194
+ everything: [],
193
195
  'sequential-thinking': [],
194
196
  };
195
197
  for (const [service, vars] of Object.entries(serviceEnvVars)) {
@@ -225,10 +227,10 @@ function displayServer(entry) {
225
227
  nameLine += chalk.gray(` v${server.version}`);
226
228
  }
227
229
  if (meta?.status === 'active') {
228
- nameLine += chalk.green(' ');
230
+ nameLine += chalk.green(' [active]');
229
231
  }
230
232
  if (requirements.needsSetup) {
231
- nameLine += chalk.yellow(' '); // Setup required indicator
233
+ nameLine += chalk.yellow(' [setup required]');
232
234
  }
233
235
  output.info(nameLine);
234
236
  // Description
@@ -265,12 +267,12 @@ function displayServer(entry) {
265
267
  if (requirements.envVars.length > 0) {
266
268
  output.info(chalk.yellow(' Environment:'));
267
269
  for (const envVar of requirements.envVars) {
268
- const isSet = process.env[envVar] ? chalk.green('') : chalk.red('');
270
+ const isSet = process.env[envVar] ? chalk.green('set') : chalk.red('missing');
269
271
  output.info(chalk.yellow(` ${isSet} ${envVar}`));
270
272
  }
271
273
  }
272
274
  // Setup hints (new)
273
- if (requirements.setupHints.length > 0 && requirements.envVars.some(v => !process.env[v])) {
275
+ if (requirements.setupHints.length > 0 && requirements.envVars.some((v) => !process.env[v])) {
274
276
  output.info(chalk.gray(' Setup:'));
275
277
  for (const hint of requirements.setupHints) {
276
278
  output.info(chalk.gray(` → ${hint}`));
@@ -99,7 +99,7 @@ export const watchCommand = new Command('watch')
99
99
  sessionId: remoteSessionId || undefined,
100
100
  });
101
101
  }
102
- const discovery = await discover(mcpClient, transport === 'stdio' ? serverCommand : remoteUrl ?? serverCommand, transport === 'stdio' ? args : []);
102
+ const discovery = await discover(mcpClient, transport === 'stdio' ? serverCommand : (remoteUrl ?? serverCommand), transport === 'stdio' ? args : []);
103
103
  output.info(`Found ${discovery.tools.length} tools`);
104
104
  if (discovery.tools.length === 0) {
105
105
  output.info('No tools found. Skipping.');
@@ -190,7 +190,7 @@ export const watchCommand = new Command('watch')
190
190
  walkDir(fullPath);
191
191
  }
192
192
  else if (entry.isFile()) {
193
- if (extensions.some(ext => entry.name.endsWith(ext))) {
193
+ if (extensions.some((ext) => entry.name.endsWith(ext))) {
194
194
  const stat = statSync(fullPath);
195
195
  const modTime = stat.mtimeMs;
196
196
  const lastMod = fileModTimes.get(fullPath);
@@ -250,14 +250,14 @@ export const watchCommand = new Command('watch')
250
250
  }, interval);
251
251
  // Handle exit
252
252
  const cleanup = () => {
253
+ // Remove signal handlers first to prevent re-entry
254
+ process.removeListener('SIGINT', cleanup);
255
+ process.removeListener('SIGTERM', cleanup);
253
256
  output.info('\n\nExiting watch mode.');
254
257
  if (currentInterval) {
255
258
  clearInterval(currentInterval);
256
259
  currentInterval = null;
257
260
  }
258
- // Remove signal handlers to prevent accumulation
259
- process.removeListener('SIGINT', cleanup);
260
- process.removeListener('SIGTERM', cleanup);
261
261
  process.exit(EXIT_CODES.CLEAN);
262
262
  };
263
263
  process.on('SIGINT', cleanup);
@@ -22,21 +22,21 @@ export function formatToolResultLine(summary) {
22
22
  }
23
23
  export function buildCheckSummary(result) {
24
24
  const toolProfiles = result.toolProfiles;
25
- const skipped = toolProfiles.filter(p => p.skipped).map(p => p.name);
26
- const mocked = toolProfiles.filter(p => p.mocked).map(p => p.name);
27
- const issueTools = toolProfiles.filter(profileHasIssues).map(p => p.name);
28
- const fullyTested = toolProfiles.filter(p => !p.skipped && !p.mocked).length;
25
+ const skipped = toolProfiles.filter((p) => p.skipped).map((p) => p.name);
26
+ const mocked = toolProfiles.filter((p) => p.mocked).map((p) => p.name);
27
+ const issueTools = toolProfiles.filter(profileHasIssues).map((p) => p.name);
28
+ const fullyTested = toolProfiles.filter((p) => !p.skipped && !p.mocked).length;
29
29
  const lines = [];
30
30
  lines.push('Summary:');
31
- lines.push(`✓ ${fullyTested} tools fully tested`);
31
+ lines.push(`[PASS] ${fullyTested} tools fully tested`);
32
32
  if (skipped.length > 0) {
33
- lines.push(`⚠ ${skipped.length} tools skipped`);
33
+ lines.push(`[WARN] ${skipped.length} tools skipped`);
34
34
  }
35
35
  if (mocked.length > 0) {
36
- lines.push(`⚠ ${mocked.length} tools mocked`);
36
+ lines.push(`[WARN] ${mocked.length} tools mocked`);
37
37
  }
38
38
  if (issueTools.length > 0) {
39
- lines.push(`✗ ${issueTools.length} tools have issues`);
39
+ lines.push(`[FAIL] ${issueTools.length} tools have issues`);
40
40
  }
41
41
  const nextSteps = [];
42
42
  const externalServices = result.metadata.externalServices;
@@ -64,7 +64,7 @@ export function colorizeConfidence(label, _level) {
64
64
  return label;
65
65
  }
66
66
  export function profileHasIssues(profile) {
67
- return profile.interactions.some(i => !i.mocked && i.outcomeAssessment && !i.outcomeAssessment.correct);
67
+ return profile.interactions.some((i) => !i.mocked && i.outcomeAssessment && !i.outcomeAssessment.correct);
68
68
  }
69
69
  function resolveStatusSymbol(summary) {
70
70
  if (summary.skipped) {
@@ -193,7 +193,7 @@ export interface DiffSummary {
193
193
  behaviorChanges: number;
194
194
  }
195
195
  /**
196
- * Get the icon for a severity level.
196
+ * Get the label for a severity level.
197
197
  */
198
198
  export declare function getSeverityIcon(severity: string): string;
199
199
  /**