@dotsetlabs/bellwether 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +55 -0
- package/README.md +9 -0
- package/dist/auth/credentials.js +2 -0
- package/dist/baseline/accessors.js +12 -0
- package/dist/baseline/baseline-format.d.ts +48 -0
- package/dist/baseline/comparator.js +263 -20
- package/dist/baseline/converter.js +52 -4
- package/dist/baseline/dependency-analyzer.js +46 -25
- package/dist/baseline/diff.js +51 -39
- package/dist/baseline/documentation-scorer.d.ts +1 -1
- package/dist/baseline/documentation-scorer.js +4 -4
- package/dist/baseline/error-analyzer.js +1 -1
- package/dist/baseline/external-dependency-detector.js +16 -7
- package/dist/baseline/performance-tracker.js +2 -2
- package/dist/baseline/response-fingerprint.js +1 -1
- package/dist/baseline/response-schema-tracker.js +17 -22
- package/dist/baseline/saver.js +34 -0
- package/dist/baseline/types.d.ts +21 -1
- package/dist/cache/response-cache.js +9 -2
- package/dist/cli/commands/auth.js +15 -18
- package/dist/cli/commands/baseline-accept.js +1 -1
- package/dist/cli/commands/baseline.js +71 -36
- package/dist/cli/commands/check.js +54 -14
- package/dist/cli/commands/discover.js +2 -2
- package/dist/cli/commands/explore.js +38 -5
- package/dist/cli/commands/golden.js +20 -23
- package/dist/cli/commands/init.js +10 -7
- package/dist/cli/commands/registry.js +37 -35
- package/dist/cli/commands/watch.js +5 -5
- package/dist/cli/output/terminal-reporter.js +9 -9
- package/dist/cli/output.d.ts +1 -1
- package/dist/cli/output.js +9 -11
- package/dist/config/loader.js +2 -2
- package/dist/config/validator.d.ts +33 -33
- package/dist/constants/core.d.ts +4 -8
- package/dist/constants/core.js +4 -8
- package/dist/constants/testing.d.ts +11 -11
- package/dist/constants/testing.js +11 -11
- package/dist/contract/validator.js +7 -7
- package/dist/discovery/discovery.js +88 -14
- package/dist/discovery/types.d.ts +5 -1
- package/dist/docs/agents.js +145 -57
- package/dist/docs/contract.js +136 -40
- package/dist/errors/retry.js +11 -5
- package/dist/interview/dependency-resolver.d.ts +3 -2
- package/dist/interview/dependency-resolver.js +31 -2
- package/dist/interview/interviewer.js +10 -2
- package/dist/interview/rate-limiter.js +7 -3
- package/dist/interview/stateful-test-runner.d.ts +1 -0
- package/dist/interview/stateful-test-runner.js +4 -0
- package/dist/interview/types.d.ts +3 -0
- package/dist/llm/anthropic.js +14 -4
- package/dist/llm/fallback.d.ts +1 -0
- package/dist/llm/fallback.js +7 -1
- package/dist/llm/openai.js +15 -4
- package/dist/prompts/templates.js +30 -15
- package/dist/protocol/index.d.ts +2 -0
- package/dist/protocol/index.js +2 -0
- package/dist/protocol/version-registry.d.ts +66 -0
- package/dist/protocol/version-registry.js +159 -0
- package/dist/scenarios/evaluator.js +9 -10
- package/dist/transport/http-transport.d.ts +11 -1
- package/dist/transport/http-transport.js +21 -2
- package/dist/transport/mcp-client.d.ts +29 -1
- package/dist/transport/mcp-client.js +92 -7
- package/dist/transport/sse-transport.js +5 -4
- package/dist/transport/types.d.ts +134 -1
- package/dist/utils/concurrency.d.ts +2 -0
- package/dist/utils/concurrency.js +9 -2
- package/dist/utils/markdown.js +13 -18
- package/dist/utils/timeout.js +2 -1
- package/dist/version.js +1 -1
- package/man/bellwether.1 +1 -1
- package/man/bellwether.1.md +2 -2
- package/package.json +2 -1
|
@@ -27,7 +27,8 @@ import * as output from '../output.js';
|
|
|
27
27
|
import { extractServerContextFromArgs } from '../utils/server-context.js';
|
|
28
28
|
import { configureLogger } from '../../logging/logger.js';
|
|
29
29
|
import { buildInterviewInsights } from '../../interview/insights.js';
|
|
30
|
-
import { EXIT_CODES, SEVERITY_TO_EXIT_CODE, PATHS, SECURITY_TESTING, CHECK_SAMPLING, WORKFLOW, REPORT_SCHEMAS, PERCENTAGE_CONVERSION, } from '../../constants.js';
|
|
30
|
+
import { EXIT_CODES, SEVERITY_TO_EXIT_CODE, PATHS, SECURITY_TESTING, CHECK_SAMPLING, WORKFLOW, REPORT_SCHEMAS, PERCENTAGE_CONVERSION, MCP, } from '../../constants.js';
|
|
31
|
+
import { getFeatureFlags, getExcludedFeatureNames } from '../../protocol/index.js';
|
|
31
32
|
export const checkCommand = new Command('check')
|
|
32
33
|
.description('Check MCP server schema and detect drift (free, fast, deterministic)')
|
|
33
34
|
.allowUnknownOption() // Allow server flags like -y for npx to pass through
|
|
@@ -177,6 +178,7 @@ export const checkCommand = new Command('check')
|
|
|
177
178
|
debug: logLevel === 'debug',
|
|
178
179
|
transport,
|
|
179
180
|
});
|
|
181
|
+
let pendingExitCode;
|
|
180
182
|
try {
|
|
181
183
|
// Connect to MCP server
|
|
182
184
|
output.info('Connecting to MCP server...');
|
|
@@ -197,6 +199,7 @@ export const checkCommand = new Command('check')
|
|
|
197
199
|
output.info('Discovering capabilities...');
|
|
198
200
|
const discovery = await discover(mcpClient, transport === 'stdio' ? serverCommand : (remoteUrl ?? serverCommand), transport === 'stdio' ? args : []);
|
|
199
201
|
const resourceCount = discovery.resources?.length ?? 0;
|
|
202
|
+
const resourceTemplateCount = discovery.resourceTemplates?.length ?? 0;
|
|
200
203
|
const discoveryParts = [
|
|
201
204
|
`${discovery.tools.length} tools`,
|
|
202
205
|
`${discovery.prompts.length} prompts`,
|
|
@@ -204,11 +207,34 @@ export const checkCommand = new Command('check')
|
|
|
204
207
|
if (resourceCount > 0) {
|
|
205
208
|
discoveryParts.push(`${resourceCount} resources`);
|
|
206
209
|
}
|
|
210
|
+
if (resourceTemplateCount > 0) {
|
|
211
|
+
discoveryParts.push(`${resourceTemplateCount} resource templates`);
|
|
212
|
+
}
|
|
207
213
|
output.info(`Found ${discoveryParts.join(', ')}\n`);
|
|
214
|
+
// Show server instructions if provided
|
|
215
|
+
if (discovery.instructions) {
|
|
216
|
+
output.info(`Server instructions: ${discovery.instructions}\n`);
|
|
217
|
+
}
|
|
218
|
+
// Show protocol version context
|
|
219
|
+
const features = getFeatureFlags(discovery.protocolVersion);
|
|
220
|
+
if (discovery.protocolVersion !== MCP.PROTOCOL_VERSION) {
|
|
221
|
+
output.info(`Protocol Version: ${discovery.protocolVersion} (bellwether supports up to ${MCP.PROTOCOL_VERSION})`);
|
|
222
|
+
const excluded = getExcludedFeatureNames(discovery.protocolVersion);
|
|
223
|
+
if (excluded.length > 0) {
|
|
224
|
+
output.info(` Version-gated features excluded: ${excluded.join(', ')}`);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
// Show new capabilities (completions, tasks) — gated by protocol version
|
|
228
|
+
if (discovery.capabilities.completions && features.completions) {
|
|
229
|
+
output.info('Server supports: Completions (autocomplete)');
|
|
230
|
+
}
|
|
231
|
+
if (discovery.capabilities.tasks && features.tasks) {
|
|
232
|
+
output.info('Server supports: Tasks');
|
|
233
|
+
}
|
|
208
234
|
// Output discovery warnings (Issue D: anomaly detection)
|
|
209
235
|
if (discovery.warnings && discovery.warnings.length > 0) {
|
|
210
236
|
for (const warning of discovery.warnings) {
|
|
211
|
-
output.warn(
|
|
237
|
+
output.warn(`[warn] ${warning.message}`);
|
|
212
238
|
}
|
|
213
239
|
output.newline();
|
|
214
240
|
}
|
|
@@ -217,7 +243,7 @@ export const checkCommand = new Command('check')
|
|
|
217
243
|
output.warn('Transport errors during discovery:');
|
|
218
244
|
for (const err of discovery.transportErrors.slice(0, 3)) {
|
|
219
245
|
const typeLabel = err.category.replace(/_/g, ' ');
|
|
220
|
-
output.warn(`
|
|
246
|
+
output.warn(` [fail] ${typeLabel}: ${err.message.substring(0, 100)}`);
|
|
221
247
|
}
|
|
222
248
|
if (discovery.transportErrors.length > 3) {
|
|
223
249
|
output.warn(` ... and ${discovery.transportErrors.length - 3} more`);
|
|
@@ -327,6 +353,7 @@ export const checkCommand = new Command('check')
|
|
|
327
353
|
externalServices: config.check.externalServices,
|
|
328
354
|
assertions: config.check.assertions,
|
|
329
355
|
rateLimit: config.check.rateLimit,
|
|
356
|
+
testFixtures: config.check.testFixtures,
|
|
330
357
|
});
|
|
331
358
|
// Log sampling configuration
|
|
332
359
|
if (minSamples > CHECK_SAMPLING.DEFAULT_MIN_SAMPLES) {
|
|
@@ -403,7 +430,7 @@ export const checkCommand = new Command('check')
|
|
|
403
430
|
if (result.scenarioResults && result.scenarioResults.length > 0) {
|
|
404
431
|
const passed = result.scenarioResults.filter((r) => r.passed).length;
|
|
405
432
|
const failed = result.scenarioResults.length - passed;
|
|
406
|
-
const statusIcon = failed === 0 ? '
|
|
433
|
+
const statusIcon = failed === 0 ? '[PASS]' : '[FAIL]';
|
|
407
434
|
output.info(`\nCustom scenarios: ${passed}/${result.scenarioResults.length} passed ${statusIcon}`);
|
|
408
435
|
if (failed > 0) {
|
|
409
436
|
output.info('\nFailed scenarios:');
|
|
@@ -599,7 +626,7 @@ export const checkCommand = new Command('check')
|
|
|
599
626
|
try {
|
|
600
627
|
const workflowResult = await workflowExecutor.execute(workflow);
|
|
601
628
|
workflowResults.push(workflowResult);
|
|
602
|
-
const statusIcon = workflowResult.success ? '
|
|
629
|
+
const statusIcon = workflowResult.success ? '[PASS]' : '[FAIL]';
|
|
603
630
|
const stepsInfo = `${workflowResult.steps.filter((s) => s.success).length}/${workflow.steps.length} steps`;
|
|
604
631
|
if (workflowResult.success) {
|
|
605
632
|
output.success(` ${statusIcon} ${workflow.name} (${stepsInfo}) - ${workflowResult.durationMs}ms`);
|
|
@@ -615,7 +642,7 @@ export const checkCommand = new Command('check')
|
|
|
615
642
|
}
|
|
616
643
|
}
|
|
617
644
|
catch (error) {
|
|
618
|
-
output.error(`
|
|
645
|
+
output.error(` [FAIL] ${workflow.name} - Error: ${error instanceof Error ? error.message : error}`);
|
|
619
646
|
}
|
|
620
647
|
}
|
|
621
648
|
// Workflow summary
|
|
@@ -881,35 +908,40 @@ export const checkCommand = new Command('check')
|
|
|
881
908
|
else if (!options.acceptDrift) {
|
|
882
909
|
// Check if diff meets failure threshold based on severity config
|
|
883
910
|
const shouldFail = shouldFailOnDiff(diff, severityConfig.failOnSeverity);
|
|
884
|
-
const
|
|
911
|
+
const driftExitCode = SEVERITY_TO_EXIT_CODE[diff.severity] ?? EXIT_CODES.CLEAN;
|
|
885
912
|
if (diff.severity === 'breaking') {
|
|
886
913
|
output.error('\nBreaking changes detected!');
|
|
887
914
|
output.error('Use --accept-drift to accept these changes as intentional.');
|
|
888
915
|
if (failOnDrift || shouldFail) {
|
|
889
|
-
|
|
916
|
+
pendingExitCode = driftExitCode;
|
|
917
|
+
return;
|
|
890
918
|
}
|
|
891
919
|
}
|
|
892
920
|
else if (diff.severity === 'warning') {
|
|
893
921
|
output.warn('\nWarning-level changes detected.');
|
|
894
922
|
output.warn('Use --accept-drift to accept these changes as intentional.');
|
|
895
923
|
if (failOnDrift || shouldFail) {
|
|
896
|
-
|
|
924
|
+
pendingExitCode = driftExitCode;
|
|
925
|
+
return;
|
|
897
926
|
}
|
|
898
927
|
}
|
|
899
928
|
else if (diff.severity === 'info') {
|
|
900
929
|
output.info('\nInfo-level changes detected (non-breaking).');
|
|
901
930
|
if (shouldFail) {
|
|
902
|
-
|
|
931
|
+
pendingExitCode = driftExitCode;
|
|
932
|
+
return;
|
|
903
933
|
}
|
|
904
934
|
}
|
|
905
935
|
// Exit with appropriate code based on severity
|
|
906
936
|
// This provides semantic exit codes for CI/CD even when not failing
|
|
907
|
-
|
|
937
|
+
pendingExitCode = driftExitCode;
|
|
938
|
+
return;
|
|
908
939
|
}
|
|
909
940
|
}
|
|
910
941
|
if (config.check.assertions.strict && (result.metadata.assertions?.failed ?? 0) > 0) {
|
|
911
942
|
output.error('\nAssertion failures detected and check.assertions.strict is enabled.');
|
|
912
|
-
|
|
943
|
+
pendingExitCode = EXIT_CODES.BREAKING;
|
|
944
|
+
return;
|
|
913
945
|
}
|
|
914
946
|
}
|
|
915
947
|
catch (error) {
|
|
@@ -931,10 +963,18 @@ export const checkCommand = new Command('check')
|
|
|
931
963
|
output.error(' - The server command was not found');
|
|
932
964
|
output.error(' - Check that the command is installed and in PATH');
|
|
933
965
|
}
|
|
934
|
-
|
|
966
|
+
pendingExitCode = EXIT_CODES.ERROR;
|
|
935
967
|
}
|
|
936
968
|
finally {
|
|
937
|
-
|
|
969
|
+
try {
|
|
970
|
+
await mcpClient.disconnect();
|
|
971
|
+
}
|
|
972
|
+
catch {
|
|
973
|
+
/* ignore cleanup errors */
|
|
974
|
+
}
|
|
975
|
+
if (pendingExitCode !== undefined) {
|
|
976
|
+
process.exit(pendingExitCode);
|
|
977
|
+
}
|
|
938
978
|
}
|
|
939
979
|
});
|
|
940
980
|
/**
|
|
@@ -60,7 +60,7 @@ async function discoverAction(command, args, options) {
|
|
|
60
60
|
if (result.warnings && result.warnings.length > 0) {
|
|
61
61
|
output.newline();
|
|
62
62
|
for (const warning of result.warnings) {
|
|
63
|
-
output.warn(
|
|
63
|
+
output.warn(`[warn] ${warning.message}`);
|
|
64
64
|
}
|
|
65
65
|
}
|
|
66
66
|
// Output transport errors from discovery
|
|
@@ -69,7 +69,7 @@ async function discoverAction(command, args, options) {
|
|
|
69
69
|
output.warn('Transport errors during discovery:');
|
|
70
70
|
for (const err of result.transportErrors.slice(0, 3)) {
|
|
71
71
|
const typeLabel = err.category.replace(/_/g, ' ');
|
|
72
|
-
output.warn(`
|
|
72
|
+
output.warn(` [fail] ${typeLabel}: ${err.message.substring(0, 100)}`);
|
|
73
73
|
}
|
|
74
74
|
if (result.transportErrors.length > 3) {
|
|
75
75
|
output.warn(` ... and ${result.transportErrors.length - 3} more`);
|
|
@@ -17,7 +17,8 @@ import { loadConfig, ConfigNotFoundError, parseCommandString, } from '../../conf
|
|
|
17
17
|
import { validateConfigForExplore } from '../../config/validator.js';
|
|
18
18
|
import { CostTracker, estimateInterviewCost, estimateInterviewTime, formatCostAndTimeEstimate, suggestOptimizations, formatOptimizationSuggestions, } from '../../cost/index.js';
|
|
19
19
|
import { getMetricsCollector, resetMetricsCollector } from '../../metrics/collector.js';
|
|
20
|
-
import { EXIT_CODES, WORKFLOW, PATHS, REPORT_SCHEMAS } from '../../constants.js';
|
|
20
|
+
import { EXIT_CODES, WORKFLOW, PATHS, REPORT_SCHEMAS, MCP } from '../../constants.js';
|
|
21
|
+
import { getExcludedFeatureNames } from '../../protocol/index.js';
|
|
21
22
|
import { FallbackLLMClient } from '../../llm/fallback.js';
|
|
22
23
|
import { getGlobalCache, resetGlobalCache } from '../../cache/response-cache.js';
|
|
23
24
|
import { InterviewProgressBar, formatExploreBanner } from '../utils/progress.js';
|
|
@@ -159,6 +160,20 @@ export const exploreCommand = new Command('explore')
|
|
|
159
160
|
output.error(' - Ollama: No API key needed (ensure Ollama is running)');
|
|
160
161
|
process.exit(EXIT_CODES.ERROR);
|
|
161
162
|
}
|
|
163
|
+
let pendingExitCode;
|
|
164
|
+
// Handle SIGINT/SIGTERM for graceful shutdown
|
|
165
|
+
const signalCleanup = async () => {
|
|
166
|
+
output.info('\n\nInterrupted. Cleaning up...');
|
|
167
|
+
try {
|
|
168
|
+
await mcpClient.disconnect();
|
|
169
|
+
}
|
|
170
|
+
catch {
|
|
171
|
+
/* ignore cleanup errors */
|
|
172
|
+
}
|
|
173
|
+
process.exit(EXIT_CODES.ERROR);
|
|
174
|
+
};
|
|
175
|
+
process.on('SIGINT', signalCleanup);
|
|
176
|
+
process.on('SIGTERM', signalCleanup);
|
|
162
177
|
try {
|
|
163
178
|
// Connect to MCP server
|
|
164
179
|
output.info('Connecting to MCP server...');
|
|
@@ -183,6 +198,14 @@ export const exploreCommand = new Command('explore')
|
|
|
183
198
|
discoveryParts.push(`${resourceCount} resources`);
|
|
184
199
|
}
|
|
185
200
|
output.info(`Found ${discoveryParts.join(', ')}\n`);
|
|
201
|
+
// Show protocol version context
|
|
202
|
+
if (discovery.protocolVersion !== MCP.PROTOCOL_VERSION) {
|
|
203
|
+
output.info(`Protocol Version: ${discovery.protocolVersion} (bellwether supports up to ${MCP.PROTOCOL_VERSION})`);
|
|
204
|
+
const excluded = getExcludedFeatureNames(discovery.protocolVersion);
|
|
205
|
+
if (excluded.length > 0) {
|
|
206
|
+
output.info(` Version-gated features excluded: ${excluded.join(', ')}`);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
186
209
|
// Update metrics
|
|
187
210
|
metricsCollector.updateInterviewCounters({
|
|
188
211
|
toolsDiscovered: discovery.tools.length,
|
|
@@ -423,7 +446,7 @@ export const exploreCommand = new Command('explore')
|
|
|
423
446
|
if (result.scenarioResults && result.scenarioResults.length > 0) {
|
|
424
447
|
const passed = result.scenarioResults.filter((r) => r.passed).length;
|
|
425
448
|
const failed = result.scenarioResults.length - passed;
|
|
426
|
-
const statusIcon = failed === 0 ? '
|
|
449
|
+
const statusIcon = failed === 0 ? '[PASS]' : '[FAIL]';
|
|
427
450
|
output.info(`\nCustom scenarios: ${passed}/${result.scenarioResults.length} passed ${statusIcon}`);
|
|
428
451
|
if (failed > 0) {
|
|
429
452
|
output.info('\nFailed scenarios:');
|
|
@@ -441,7 +464,7 @@ export const exploreCommand = new Command('explore')
|
|
|
441
464
|
if (result.workflowResults && result.workflowResults.length > 0) {
|
|
442
465
|
const successful = result.workflowResults.filter((wr) => wr.success).length;
|
|
443
466
|
const failed = result.workflowResults.length - successful;
|
|
444
|
-
const statusIcon = failed === 0 ? '
|
|
467
|
+
const statusIcon = failed === 0 ? '[PASS]' : '[FAIL]';
|
|
445
468
|
output.info(`\nWorkflows: ${successful}/${result.workflowResults.length} passed ${statusIcon}`);
|
|
446
469
|
if (failed > 0) {
|
|
447
470
|
output.info('\nFailed workflows:');
|
|
@@ -477,11 +500,21 @@ export const exploreCommand = new Command('explore')
|
|
|
477
500
|
output.error(' - Missing or invalid API key');
|
|
478
501
|
output.error(' - Run "bellwether auth" to configure API keys');
|
|
479
502
|
}
|
|
480
|
-
|
|
503
|
+
pendingExitCode = EXIT_CODES.ERROR;
|
|
481
504
|
}
|
|
482
505
|
finally {
|
|
506
|
+
process.removeListener('SIGINT', signalCleanup);
|
|
507
|
+
process.removeListener('SIGTERM', signalCleanup);
|
|
483
508
|
restoreLogLevel();
|
|
484
|
-
|
|
509
|
+
try {
|
|
510
|
+
await mcpClient.disconnect();
|
|
511
|
+
}
|
|
512
|
+
catch {
|
|
513
|
+
/* ignore cleanup errors */
|
|
514
|
+
}
|
|
515
|
+
if (pendingExitCode !== undefined) {
|
|
516
|
+
process.exit(pendingExitCode);
|
|
517
|
+
}
|
|
485
518
|
}
|
|
486
519
|
});
|
|
487
520
|
//# sourceMappingURL=explore.js.map
|
|
@@ -17,8 +17,7 @@ import { getGoldenStorePath, saveGoldenOutput, createGoldenOutput, listGoldenOut
|
|
|
17
17
|
import * as output from '../output.js';
|
|
18
18
|
import { EXIT_CODES, PATHS } from '../../constants.js';
|
|
19
19
|
import { formatDateISO } from '../../utils/index.js';
|
|
20
|
-
export const goldenCommand = new Command('golden')
|
|
21
|
-
.description('Manage golden outputs for tool validation');
|
|
20
|
+
export const goldenCommand = new Command('golden').description('Manage golden outputs for tool validation');
|
|
22
21
|
// Save command
|
|
23
22
|
goldenCommand
|
|
24
23
|
.command('save')
|
|
@@ -56,12 +55,8 @@ goldenCommand
|
|
|
56
55
|
}
|
|
57
56
|
const argsJson = options.args ?? config.golden.defaultArgs;
|
|
58
57
|
const mode = options.mode ?? config.golden.mode;
|
|
59
|
-
const normalizeTimestamps = options.normalizeTimestamps === false
|
|
60
|
-
|
|
61
|
-
: config.golden.normalizeTimestamps;
|
|
62
|
-
const normalizeUuids = options.normalizeUuids === false
|
|
63
|
-
? false
|
|
64
|
-
: config.golden.normalizeUuids;
|
|
58
|
+
const normalizeTimestamps = options.normalizeTimestamps === false ? false : config.golden.normalizeTimestamps;
|
|
59
|
+
const normalizeUuids = options.normalizeUuids === false ? false : config.golden.normalizeUuids;
|
|
65
60
|
// Parse tool arguments
|
|
66
61
|
let toolArgs;
|
|
67
62
|
try {
|
|
@@ -100,10 +95,10 @@ goldenCommand
|
|
|
100
95
|
await mcpClient.connect(serverCommand, args, config.server.env);
|
|
101
96
|
// Discover tools
|
|
102
97
|
const discovery = await discover(mcpClient, serverCommand, args);
|
|
103
|
-
const tool = discovery.tools.find(t => t.name === options.tool);
|
|
98
|
+
const tool = discovery.tools.find((t) => t.name === options.tool);
|
|
104
99
|
if (!tool) {
|
|
105
100
|
output.error(`Tool not found: ${options.tool}`);
|
|
106
|
-
output.info(`Available tools: ${discovery.tools.map(t => t.name).join(', ')}`);
|
|
101
|
+
output.info(`Available tools: ${discovery.tools.map((t) => t.name).join(', ')}`);
|
|
107
102
|
process.exit(EXIT_CODES.ERROR);
|
|
108
103
|
}
|
|
109
104
|
// Call the tool
|
|
@@ -111,7 +106,7 @@ goldenCommand
|
|
|
111
106
|
const response = await mcpClient.callTool(options.tool, toolArgs);
|
|
112
107
|
if (response.isError) {
|
|
113
108
|
output.error('Tool returned an error:');
|
|
114
|
-
const textContent = response.content.find(c => c.type === 'text');
|
|
109
|
+
const textContent = response.content.find((c) => c.type === 'text');
|
|
115
110
|
if (textContent && 'text' in textContent) {
|
|
116
111
|
output.error(String(textContent.text));
|
|
117
112
|
}
|
|
@@ -192,7 +187,7 @@ goldenCommand
|
|
|
192
187
|
}
|
|
193
188
|
const goldens = listGoldenOutputs(storePath);
|
|
194
189
|
const filteredGoldens = options.tool
|
|
195
|
-
? goldens.filter(g => g.toolName === options.tool)
|
|
190
|
+
? goldens.filter((g) => g.toolName === options.tool)
|
|
196
191
|
: goldens;
|
|
197
192
|
if (filteredGoldens.length === 0) {
|
|
198
193
|
if (options.tool) {
|
|
@@ -222,14 +217,14 @@ goldenCommand
|
|
|
222
217
|
const response = await mcpClient.callTool(golden.toolName, golden.inputArgs);
|
|
223
218
|
const result = compareWithGolden(golden, response);
|
|
224
219
|
results.push(result);
|
|
225
|
-
const icon = result.passed ? '
|
|
220
|
+
const icon = result.passed ? '[PASS]' : '[FAIL]';
|
|
226
221
|
if (result.passed) {
|
|
227
222
|
output.success(` ${icon} ${result.summary}`);
|
|
228
223
|
}
|
|
229
224
|
else {
|
|
230
225
|
output.error(` ${icon} ${result.summary}`);
|
|
231
|
-
if (result.differences.filter(d => !d.allowed).length <= 5) {
|
|
232
|
-
for (const diff of result.differences.filter(d => !d.allowed)) {
|
|
226
|
+
if (result.differences.filter((d) => !d.allowed).length <= 5) {
|
|
227
|
+
for (const diff of result.differences.filter((d) => !d.allowed)) {
|
|
233
228
|
output.warn(` - ${diff.description} at ${diff.path}`);
|
|
234
229
|
}
|
|
235
230
|
}
|
|
@@ -242,22 +237,24 @@ goldenCommand
|
|
|
242
237
|
severity: 'breaking',
|
|
243
238
|
mode: golden.tolerance.mode,
|
|
244
239
|
goldenCapturedAt: golden.capturedAt,
|
|
245
|
-
differences: [
|
|
240
|
+
differences: [
|
|
241
|
+
{
|
|
246
242
|
type: 'changed',
|
|
247
243
|
path: '$',
|
|
248
244
|
expected: 'successful response',
|
|
249
245
|
actual: `error: ${error instanceof Error ? error.message : String(error)}`,
|
|
250
246
|
allowed: false,
|
|
251
247
|
description: 'Tool call failed',
|
|
252
|
-
}
|
|
248
|
+
},
|
|
249
|
+
],
|
|
253
250
|
summary: `Tool call failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
254
251
|
});
|
|
255
|
-
output.error(`
|
|
252
|
+
output.error(` [FAIL] Tool call failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
256
253
|
}
|
|
257
254
|
}
|
|
258
255
|
output.newline();
|
|
259
256
|
// Summary
|
|
260
|
-
const passed = results.filter(r => r.passed).length;
|
|
257
|
+
const passed = results.filter((r) => r.passed).length;
|
|
261
258
|
const failed = results.length - passed;
|
|
262
259
|
if (format === 'json') {
|
|
263
260
|
output.info(JSON.stringify(results, null, 2));
|
|
@@ -372,13 +369,13 @@ function formatResultsMarkdown(results) {
|
|
|
372
369
|
lines.push('| Tool | Status | Mode | Differences |');
|
|
373
370
|
lines.push('|------|--------|------|-------------|');
|
|
374
371
|
for (const result of results) {
|
|
375
|
-
const status = result.passed ? '
|
|
376
|
-
const diffCount = result.differences.filter(d => !d.allowed).length;
|
|
372
|
+
const status = result.passed ? 'Match' : `${result.severity}`;
|
|
373
|
+
const diffCount = result.differences.filter((d) => !d.allowed).length;
|
|
377
374
|
lines.push(`| \`${result.toolName}\` | ${status} | ${result.mode} | ${diffCount} |`);
|
|
378
375
|
}
|
|
379
376
|
lines.push('');
|
|
380
377
|
// Details for failed comparisons
|
|
381
|
-
const failed = results.filter(r => !r.passed);
|
|
378
|
+
const failed = results.filter((r) => !r.passed);
|
|
382
379
|
if (failed.length > 0) {
|
|
383
380
|
lines.push('### Drift Details');
|
|
384
381
|
lines.push('');
|
|
@@ -390,7 +387,7 @@ function formatResultsMarkdown(results) {
|
|
|
390
387
|
lines.push(`**Severity:** ${result.severity}`);
|
|
391
388
|
lines.push('');
|
|
392
389
|
lines.push('**Changes:**');
|
|
393
|
-
for (const diff of result.differences.filter(d => !d.allowed)) {
|
|
390
|
+
for (const diff of result.differences.filter((d) => !d.allowed)) {
|
|
394
391
|
lines.push(`- ${diff.description}`);
|
|
395
392
|
if (diff.expected !== undefined) {
|
|
396
393
|
lines.push(` - Expected: \`${String(diff.expected)}\``);
|
|
@@ -17,12 +17,7 @@ import * as output from '../output.js';
|
|
|
17
17
|
* Returns an array of variable names found.
|
|
18
18
|
*/
|
|
19
19
|
function detectEnvVars(cwd) {
|
|
20
|
-
const envExampleFiles = [
|
|
21
|
-
'.env.example',
|
|
22
|
-
'.env.sample',
|
|
23
|
-
'env.example',
|
|
24
|
-
'env.sample',
|
|
25
|
-
];
|
|
20
|
+
const envExampleFiles = ['.env.example', '.env.sample', 'env.example', 'env.sample'];
|
|
26
21
|
for (const filename of envExampleFiles) {
|
|
27
22
|
const filepath = join(cwd, filename);
|
|
28
23
|
if (existsSync(filepath)) {
|
|
@@ -129,7 +124,15 @@ export const initCommand = new Command('init')
|
|
|
129
124
|
});
|
|
130
125
|
}
|
|
131
126
|
// Write config file
|
|
132
|
-
|
|
127
|
+
try {
|
|
128
|
+
writeFileSync(configPath, content);
|
|
129
|
+
}
|
|
130
|
+
catch (error) {
|
|
131
|
+
output.error(`Failed to write config file: ${error instanceof Error ? error.message : String(error)}`);
|
|
132
|
+
output.error(` Path: ${configPath}`);
|
|
133
|
+
output.error(' Check that the directory exists and you have write permissions.');
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
133
136
|
// Show success message
|
|
134
137
|
output.success(`Created: ${configPath}`);
|
|
135
138
|
output.newline();
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
import { Command } from 'commander';
|
|
5
5
|
import chalk from 'chalk';
|
|
6
|
-
import { RegistryClient, generateRunCommand
|
|
6
|
+
import { RegistryClient, generateRunCommand } from '../../registry/index.js';
|
|
7
7
|
import { EXIT_CODES } from '../../constants.js';
|
|
8
8
|
import { loadConfig, ConfigNotFoundError } from '../../config/loader.js';
|
|
9
9
|
import * as output from '../output.js';
|
|
@@ -108,7 +108,7 @@ function isLikelyEnvVar(name) {
|
|
|
108
108
|
/auth/i,
|
|
109
109
|
/^[A-Z][A-Z0-9_]+$/, // ALL_CAPS_PATTERN
|
|
110
110
|
];
|
|
111
|
-
return envPatterns.some(pattern => pattern.test(name));
|
|
111
|
+
return envPatterns.some((pattern) => pattern.test(name));
|
|
112
112
|
}
|
|
113
113
|
/**
|
|
114
114
|
* Extract likely environment variable name from argument.
|
|
@@ -160,36 +160,38 @@ function analyzeServerRequirements(entry) {
|
|
|
160
160
|
// Only look at the actual server name part (after last /) to avoid false matches
|
|
161
161
|
// e.g., "io.github.user/postgres" should match "postgres", not "github"
|
|
162
162
|
const fullName = entry.server.name.toLowerCase();
|
|
163
|
-
const serverNamePart = fullName.includes('/')
|
|
163
|
+
const serverNamePart = fullName.includes('/')
|
|
164
|
+
? fullName.split('/').pop() || fullName
|
|
165
|
+
: fullName;
|
|
164
166
|
const serviceEnvVars = {
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
167
|
+
openai: ['OPENAI_API_KEY'],
|
|
168
|
+
anthropic: ['ANTHROPIC_API_KEY'],
|
|
169
|
+
github: ['GITHUB_TOKEN', 'GITHUB_PERSONAL_ACCESS_TOKEN'],
|
|
170
|
+
gitlab: ['GITLAB_TOKEN', 'GITLAB_PERSONAL_ACCESS_TOKEN'],
|
|
171
|
+
slack: ['SLACK_TOKEN', 'SLACK_BOT_TOKEN'],
|
|
172
|
+
discord: ['DISCORD_TOKEN', 'DISCORD_BOT_TOKEN'],
|
|
173
|
+
postgres: ['DATABASE_URL', 'POSTGRES_CONNECTION_STRING'],
|
|
174
|
+
mysql: ['DATABASE_URL', 'MYSQL_CONNECTION_STRING'],
|
|
175
|
+
redis: ['REDIS_URL'],
|
|
176
|
+
mongodb: ['MONGODB_URI'],
|
|
177
|
+
aws: ['AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'],
|
|
178
|
+
azure: ['AZURE_SUBSCRIPTION_ID', 'AZURE_CLIENT_ID'],
|
|
179
|
+
gcp: ['GOOGLE_APPLICATION_CREDENTIALS'],
|
|
180
|
+
google: ['GOOGLE_API_KEY'],
|
|
181
|
+
stripe: ['STRIPE_API_KEY'],
|
|
182
|
+
twilio: ['TWILIO_ACCOUNT_SID', 'TWILIO_AUTH_TOKEN'],
|
|
183
|
+
sendgrid: ['SENDGRID_API_KEY'],
|
|
184
|
+
mailgun: ['MAILGUN_API_KEY'],
|
|
185
|
+
firebase: ['FIREBASE_PROJECT_ID'],
|
|
186
|
+
supabase: ['SUPABASE_URL', 'SUPABASE_KEY'],
|
|
187
|
+
notion: ['NOTION_API_KEY'],
|
|
188
|
+
airtable: ['AIRTABLE_API_KEY'],
|
|
189
|
+
letta: ['LETTA_API_KEY'],
|
|
190
|
+
brave: ['BRAVE_API_KEY'],
|
|
191
|
+
puppeteer: [],
|
|
192
|
+
playwright: [],
|
|
193
|
+
filesystem: [],
|
|
194
|
+
everything: [],
|
|
193
195
|
'sequential-thinking': [],
|
|
194
196
|
};
|
|
195
197
|
for (const [service, vars] of Object.entries(serviceEnvVars)) {
|
|
@@ -225,10 +227,10 @@ function displayServer(entry) {
|
|
|
225
227
|
nameLine += chalk.gray(` v${server.version}`);
|
|
226
228
|
}
|
|
227
229
|
if (meta?.status === 'active') {
|
|
228
|
-
nameLine += chalk.green('
|
|
230
|
+
nameLine += chalk.green(' [active]');
|
|
229
231
|
}
|
|
230
232
|
if (requirements.needsSetup) {
|
|
231
|
-
nameLine += chalk.yellow('
|
|
233
|
+
nameLine += chalk.yellow(' [setup required]');
|
|
232
234
|
}
|
|
233
235
|
output.info(nameLine);
|
|
234
236
|
// Description
|
|
@@ -265,12 +267,12 @@ function displayServer(entry) {
|
|
|
265
267
|
if (requirements.envVars.length > 0) {
|
|
266
268
|
output.info(chalk.yellow(' Environment:'));
|
|
267
269
|
for (const envVar of requirements.envVars) {
|
|
268
|
-
const isSet = process.env[envVar] ? chalk.green('
|
|
270
|
+
const isSet = process.env[envVar] ? chalk.green('set') : chalk.red('missing');
|
|
269
271
|
output.info(chalk.yellow(` ${isSet} ${envVar}`));
|
|
270
272
|
}
|
|
271
273
|
}
|
|
272
274
|
// Setup hints (new)
|
|
273
|
-
if (requirements.setupHints.length > 0 && requirements.envVars.some(v => !process.env[v])) {
|
|
275
|
+
if (requirements.setupHints.length > 0 && requirements.envVars.some((v) => !process.env[v])) {
|
|
274
276
|
output.info(chalk.gray(' Setup:'));
|
|
275
277
|
for (const hint of requirements.setupHints) {
|
|
276
278
|
output.info(chalk.gray(` → ${hint}`));
|
|
@@ -99,7 +99,7 @@ export const watchCommand = new Command('watch')
|
|
|
99
99
|
sessionId: remoteSessionId || undefined,
|
|
100
100
|
});
|
|
101
101
|
}
|
|
102
|
-
const discovery = await discover(mcpClient, transport === 'stdio' ? serverCommand : remoteUrl ?? serverCommand, transport === 'stdio' ? args : []);
|
|
102
|
+
const discovery = await discover(mcpClient, transport === 'stdio' ? serverCommand : (remoteUrl ?? serverCommand), transport === 'stdio' ? args : []);
|
|
103
103
|
output.info(`Found ${discovery.tools.length} tools`);
|
|
104
104
|
if (discovery.tools.length === 0) {
|
|
105
105
|
output.info('No tools found. Skipping.');
|
|
@@ -190,7 +190,7 @@ export const watchCommand = new Command('watch')
|
|
|
190
190
|
walkDir(fullPath);
|
|
191
191
|
}
|
|
192
192
|
else if (entry.isFile()) {
|
|
193
|
-
if (extensions.some(ext => entry.name.endsWith(ext))) {
|
|
193
|
+
if (extensions.some((ext) => entry.name.endsWith(ext))) {
|
|
194
194
|
const stat = statSync(fullPath);
|
|
195
195
|
const modTime = stat.mtimeMs;
|
|
196
196
|
const lastMod = fileModTimes.get(fullPath);
|
|
@@ -250,14 +250,14 @@ export const watchCommand = new Command('watch')
|
|
|
250
250
|
}, interval);
|
|
251
251
|
// Handle exit
|
|
252
252
|
const cleanup = () => {
|
|
253
|
+
// Remove signal handlers first to prevent re-entry
|
|
254
|
+
process.removeListener('SIGINT', cleanup);
|
|
255
|
+
process.removeListener('SIGTERM', cleanup);
|
|
253
256
|
output.info('\n\nExiting watch mode.');
|
|
254
257
|
if (currentInterval) {
|
|
255
258
|
clearInterval(currentInterval);
|
|
256
259
|
currentInterval = null;
|
|
257
260
|
}
|
|
258
|
-
// Remove signal handlers to prevent accumulation
|
|
259
|
-
process.removeListener('SIGINT', cleanup);
|
|
260
|
-
process.removeListener('SIGTERM', cleanup);
|
|
261
261
|
process.exit(EXIT_CODES.CLEAN);
|
|
262
262
|
};
|
|
263
263
|
process.on('SIGINT', cleanup);
|
|
@@ -22,21 +22,21 @@ export function formatToolResultLine(summary) {
|
|
|
22
22
|
}
|
|
23
23
|
export function buildCheckSummary(result) {
|
|
24
24
|
const toolProfiles = result.toolProfiles;
|
|
25
|
-
const skipped = toolProfiles.filter(p => p.skipped).map(p => p.name);
|
|
26
|
-
const mocked = toolProfiles.filter(p => p.mocked).map(p => p.name);
|
|
27
|
-
const issueTools = toolProfiles.filter(profileHasIssues).map(p => p.name);
|
|
28
|
-
const fullyTested = toolProfiles.filter(p => !p.skipped && !p.mocked).length;
|
|
25
|
+
const skipped = toolProfiles.filter((p) => p.skipped).map((p) => p.name);
|
|
26
|
+
const mocked = toolProfiles.filter((p) => p.mocked).map((p) => p.name);
|
|
27
|
+
const issueTools = toolProfiles.filter(profileHasIssues).map((p) => p.name);
|
|
28
|
+
const fullyTested = toolProfiles.filter((p) => !p.skipped && !p.mocked).length;
|
|
29
29
|
const lines = [];
|
|
30
30
|
lines.push('Summary:');
|
|
31
|
-
lines.push(
|
|
31
|
+
lines.push(`[PASS] ${fullyTested} tools fully tested`);
|
|
32
32
|
if (skipped.length > 0) {
|
|
33
|
-
lines.push(
|
|
33
|
+
lines.push(`[WARN] ${skipped.length} tools skipped`);
|
|
34
34
|
}
|
|
35
35
|
if (mocked.length > 0) {
|
|
36
|
-
lines.push(
|
|
36
|
+
lines.push(`[WARN] ${mocked.length} tools mocked`);
|
|
37
37
|
}
|
|
38
38
|
if (issueTools.length > 0) {
|
|
39
|
-
lines.push(
|
|
39
|
+
lines.push(`[FAIL] ${issueTools.length} tools have issues`);
|
|
40
40
|
}
|
|
41
41
|
const nextSteps = [];
|
|
42
42
|
const externalServices = result.metadata.externalServices;
|
|
@@ -64,7 +64,7 @@ export function colorizeConfidence(label, _level) {
|
|
|
64
64
|
return label;
|
|
65
65
|
}
|
|
66
66
|
export function profileHasIssues(profile) {
|
|
67
|
-
return profile.interactions.some(i => !i.mocked && i.outcomeAssessment && !i.outcomeAssessment.correct);
|
|
67
|
+
return profile.interactions.some((i) => !i.mocked && i.outcomeAssessment && !i.outcomeAssessment.correct);
|
|
68
68
|
}
|
|
69
69
|
function resolveStatusSymbol(summary) {
|
|
70
70
|
if (summary.skipped) {
|
package/dist/cli/output.d.ts
CHANGED