@dotsetlabs/bellwether 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +291 -0
- package/LICENSE +21 -0
- package/README.md +739 -0
- package/dist/auth/credentials.d.ts +64 -0
- package/dist/auth/credentials.js +218 -0
- package/dist/auth/index.d.ts +6 -0
- package/dist/auth/index.js +6 -0
- package/dist/auth/keychain.d.ts +64 -0
- package/dist/auth/keychain.js +268 -0
- package/dist/baseline/ab-testing.d.ts +80 -0
- package/dist/baseline/ab-testing.js +236 -0
- package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
- package/dist/baseline/ai-compatibility-scorer.js +606 -0
- package/dist/baseline/calibration.d.ts +77 -0
- package/dist/baseline/calibration.js +136 -0
- package/dist/baseline/category-matching.d.ts +85 -0
- package/dist/baseline/category-matching.js +289 -0
- package/dist/baseline/change-impact-analyzer.d.ts +98 -0
- package/dist/baseline/change-impact-analyzer.js +592 -0
- package/dist/baseline/comparator.d.ts +64 -0
- package/dist/baseline/comparator.js +916 -0
- package/dist/baseline/confidence.d.ts +55 -0
- package/dist/baseline/confidence.js +122 -0
- package/dist/baseline/converter.d.ts +61 -0
- package/dist/baseline/converter.js +585 -0
- package/dist/baseline/dependency-analyzer.d.ts +89 -0
- package/dist/baseline/dependency-analyzer.js +567 -0
- package/dist/baseline/deprecation-tracker.d.ts +133 -0
- package/dist/baseline/deprecation-tracker.js +322 -0
- package/dist/baseline/diff.d.ts +55 -0
- package/dist/baseline/diff.js +1584 -0
- package/dist/baseline/documentation-scorer.d.ts +205 -0
- package/dist/baseline/documentation-scorer.js +466 -0
- package/dist/baseline/embeddings.d.ts +118 -0
- package/dist/baseline/embeddings.js +251 -0
- package/dist/baseline/error-analyzer.d.ts +198 -0
- package/dist/baseline/error-analyzer.js +721 -0
- package/dist/baseline/evaluation/evaluator.d.ts +42 -0
- package/dist/baseline/evaluation/evaluator.js +323 -0
- package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
- package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
- package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
- package/dist/baseline/evaluation/golden-dataset.js +717 -0
- package/dist/baseline/evaluation/index.d.ts +15 -0
- package/dist/baseline/evaluation/index.js +15 -0
- package/dist/baseline/evaluation/types.d.ts +186 -0
- package/dist/baseline/evaluation/types.js +8 -0
- package/dist/baseline/external-dependency-detector.d.ts +181 -0
- package/dist/baseline/external-dependency-detector.js +524 -0
- package/dist/baseline/golden-output.d.ts +162 -0
- package/dist/baseline/golden-output.js +636 -0
- package/dist/baseline/health-scorer.d.ts +174 -0
- package/dist/baseline/health-scorer.js +451 -0
- package/dist/baseline/incremental-checker.d.ts +97 -0
- package/dist/baseline/incremental-checker.js +174 -0
- package/dist/baseline/index.d.ts +31 -0
- package/dist/baseline/index.js +42 -0
- package/dist/baseline/migration-generator.d.ts +137 -0
- package/dist/baseline/migration-generator.js +554 -0
- package/dist/baseline/migrations.d.ts +60 -0
- package/dist/baseline/migrations.js +197 -0
- package/dist/baseline/performance-tracker.d.ts +214 -0
- package/dist/baseline/performance-tracker.js +577 -0
- package/dist/baseline/pr-comment-generator.d.ts +117 -0
- package/dist/baseline/pr-comment-generator.js +546 -0
- package/dist/baseline/response-fingerprint.d.ts +127 -0
- package/dist/baseline/response-fingerprint.js +728 -0
- package/dist/baseline/response-schema-tracker.d.ts +129 -0
- package/dist/baseline/response-schema-tracker.js +420 -0
- package/dist/baseline/risk-scorer.d.ts +54 -0
- package/dist/baseline/risk-scorer.js +434 -0
- package/dist/baseline/saver.d.ts +89 -0
- package/dist/baseline/saver.js +554 -0
- package/dist/baseline/scenario-generator.d.ts +151 -0
- package/dist/baseline/scenario-generator.js +905 -0
- package/dist/baseline/schema-compare.d.ts +86 -0
- package/dist/baseline/schema-compare.js +557 -0
- package/dist/baseline/schema-evolution.d.ts +189 -0
- package/dist/baseline/schema-evolution.js +467 -0
- package/dist/baseline/semantic.d.ts +203 -0
- package/dist/baseline/semantic.js +908 -0
- package/dist/baseline/synonyms.d.ts +60 -0
- package/dist/baseline/synonyms.js +386 -0
- package/dist/baseline/telemetry.d.ts +165 -0
- package/dist/baseline/telemetry.js +294 -0
- package/dist/baseline/test-pruner.d.ts +120 -0
- package/dist/baseline/test-pruner.js +387 -0
- package/dist/baseline/types.d.ts +449 -0
- package/dist/baseline/types.js +5 -0
- package/dist/baseline/version.d.ts +138 -0
- package/dist/baseline/version.js +206 -0
- package/dist/cache/index.d.ts +5 -0
- package/dist/cache/index.js +5 -0
- package/dist/cache/response-cache.d.ts +151 -0
- package/dist/cache/response-cache.js +287 -0
- package/dist/ci/index.d.ts +60 -0
- package/dist/ci/index.js +342 -0
- package/dist/cli/commands/auth.d.ts +12 -0
- package/dist/cli/commands/auth.js +352 -0
- package/dist/cli/commands/badge.d.ts +3 -0
- package/dist/cli/commands/badge.js +74 -0
- package/dist/cli/commands/baseline-accept.d.ts +15 -0
- package/dist/cli/commands/baseline-accept.js +178 -0
- package/dist/cli/commands/baseline-migrate.d.ts +12 -0
- package/dist/cli/commands/baseline-migrate.js +164 -0
- package/dist/cli/commands/baseline.d.ts +14 -0
- package/dist/cli/commands/baseline.js +449 -0
- package/dist/cli/commands/beta.d.ts +10 -0
- package/dist/cli/commands/beta.js +231 -0
- package/dist/cli/commands/check.d.ts +11 -0
- package/dist/cli/commands/check.js +820 -0
- package/dist/cli/commands/cloud/badge.d.ts +3 -0
- package/dist/cli/commands/cloud/badge.js +74 -0
- package/dist/cli/commands/cloud/diff.d.ts +6 -0
- package/dist/cli/commands/cloud/diff.js +79 -0
- package/dist/cli/commands/cloud/history.d.ts +6 -0
- package/dist/cli/commands/cloud/history.js +102 -0
- package/dist/cli/commands/cloud/link.d.ts +9 -0
- package/dist/cli/commands/cloud/link.js +119 -0
- package/dist/cli/commands/cloud/login.d.ts +7 -0
- package/dist/cli/commands/cloud/login.js +499 -0
- package/dist/cli/commands/cloud/projects.d.ts +6 -0
- package/dist/cli/commands/cloud/projects.js +44 -0
- package/dist/cli/commands/cloud/shared.d.ts +7 -0
- package/dist/cli/commands/cloud/shared.js +42 -0
- package/dist/cli/commands/cloud/teams.d.ts +8 -0
- package/dist/cli/commands/cloud/teams.js +169 -0
- package/dist/cli/commands/cloud/upload.d.ts +8 -0
- package/dist/cli/commands/cloud/upload.js +181 -0
- package/dist/cli/commands/contract.d.ts +11 -0
- package/dist/cli/commands/contract.js +280 -0
- package/dist/cli/commands/discover.d.ts +3 -0
- package/dist/cli/commands/discover.js +82 -0
- package/dist/cli/commands/eval.d.ts +9 -0
- package/dist/cli/commands/eval.js +187 -0
- package/dist/cli/commands/explore.d.ts +11 -0
- package/dist/cli/commands/explore.js +437 -0
- package/dist/cli/commands/feedback.d.ts +9 -0
- package/dist/cli/commands/feedback.js +174 -0
- package/dist/cli/commands/golden.d.ts +12 -0
- package/dist/cli/commands/golden.js +407 -0
- package/dist/cli/commands/history.d.ts +10 -0
- package/dist/cli/commands/history.js +202 -0
- package/dist/cli/commands/init.d.ts +9 -0
- package/dist/cli/commands/init.js +219 -0
- package/dist/cli/commands/interview.d.ts +3 -0
- package/dist/cli/commands/interview.js +903 -0
- package/dist/cli/commands/link.d.ts +10 -0
- package/dist/cli/commands/link.js +169 -0
- package/dist/cli/commands/login.d.ts +7 -0
- package/dist/cli/commands/login.js +499 -0
- package/dist/cli/commands/preset.d.ts +33 -0
- package/dist/cli/commands/preset.js +297 -0
- package/dist/cli/commands/profile.d.ts +33 -0
- package/dist/cli/commands/profile.js +286 -0
- package/dist/cli/commands/registry.d.ts +11 -0
- package/dist/cli/commands/registry.js +146 -0
- package/dist/cli/commands/shared.d.ts +79 -0
- package/dist/cli/commands/shared.js +196 -0
- package/dist/cli/commands/teams.d.ts +8 -0
- package/dist/cli/commands/teams.js +169 -0
- package/dist/cli/commands/test.d.ts +9 -0
- package/dist/cli/commands/test.js +500 -0
- package/dist/cli/commands/upload.d.ts +8 -0
- package/dist/cli/commands/upload.js +223 -0
- package/dist/cli/commands/validate-config.d.ts +6 -0
- package/dist/cli/commands/validate-config.js +35 -0
- package/dist/cli/commands/verify.d.ts +11 -0
- package/dist/cli/commands/verify.js +283 -0
- package/dist/cli/commands/watch.d.ts +12 -0
- package/dist/cli/commands/watch.js +253 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.js +178 -0
- package/dist/cli/interactive.d.ts +47 -0
- package/dist/cli/interactive.js +216 -0
- package/dist/cli/output/terminal-reporter.d.ts +19 -0
- package/dist/cli/output/terminal-reporter.js +104 -0
- package/dist/cli/output.d.ts +226 -0
- package/dist/cli/output.js +438 -0
- package/dist/cli/utils/env.d.ts +5 -0
- package/dist/cli/utils/env.js +14 -0
- package/dist/cli/utils/progress.d.ts +59 -0
- package/dist/cli/utils/progress.js +206 -0
- package/dist/cli/utils/server-context.d.ts +10 -0
- package/dist/cli/utils/server-context.js +36 -0
- package/dist/cloud/auth.d.ts +144 -0
- package/dist/cloud/auth.js +374 -0
- package/dist/cloud/client.d.ts +24 -0
- package/dist/cloud/client.js +65 -0
- package/dist/cloud/http-client.d.ts +38 -0
- package/dist/cloud/http-client.js +215 -0
- package/dist/cloud/index.d.ts +23 -0
- package/dist/cloud/index.js +25 -0
- package/dist/cloud/mock-client.d.ts +107 -0
- package/dist/cloud/mock-client.js +545 -0
- package/dist/cloud/types.d.ts +515 -0
- package/dist/cloud/types.js +15 -0
- package/dist/config/defaults.d.ts +160 -0
- package/dist/config/defaults.js +169 -0
- package/dist/config/loader.d.ts +24 -0
- package/dist/config/loader.js +122 -0
- package/dist/config/template.d.ts +42 -0
- package/dist/config/template.js +647 -0
- package/dist/config/validator.d.ts +2112 -0
- package/dist/config/validator.js +658 -0
- package/dist/constants/cloud.d.ts +107 -0
- package/dist/constants/cloud.js +110 -0
- package/dist/constants/core.d.ts +521 -0
- package/dist/constants/core.js +556 -0
- package/dist/constants/testing.d.ts +1283 -0
- package/dist/constants/testing.js +1568 -0
- package/dist/constants.d.ts +10 -0
- package/dist/constants.js +10 -0
- package/dist/contract/index.d.ts +6 -0
- package/dist/contract/index.js +5 -0
- package/dist/contract/validator.d.ts +177 -0
- package/dist/contract/validator.js +574 -0
- package/dist/cost/index.d.ts +6 -0
- package/dist/cost/index.js +5 -0
- package/dist/cost/tracker.d.ts +134 -0
- package/dist/cost/tracker.js +313 -0
- package/dist/discovery/discovery.d.ts +16 -0
- package/dist/discovery/discovery.js +173 -0
- package/dist/discovery/types.d.ts +51 -0
- package/dist/discovery/types.js +2 -0
- package/dist/docs/agents.d.ts +3 -0
- package/dist/docs/agents.js +995 -0
- package/dist/docs/contract.d.ts +51 -0
- package/dist/docs/contract.js +1681 -0
- package/dist/docs/generator.d.ts +4 -0
- package/dist/docs/generator.js +4 -0
- package/dist/docs/html-reporter.d.ts +9 -0
- package/dist/docs/html-reporter.js +757 -0
- package/dist/docs/index.d.ts +10 -0
- package/dist/docs/index.js +11 -0
- package/dist/docs/junit-reporter.d.ts +18 -0
- package/dist/docs/junit-reporter.js +210 -0
- package/dist/docs/report.d.ts +14 -0
- package/dist/docs/report.js +44 -0
- package/dist/docs/sarif-reporter.d.ts +19 -0
- package/dist/docs/sarif-reporter.js +335 -0
- package/dist/docs/shared.d.ts +35 -0
- package/dist/docs/shared.js +162 -0
- package/dist/docs/templates.d.ts +12 -0
- package/dist/docs/templates.js +76 -0
- package/dist/errors/index.d.ts +6 -0
- package/dist/errors/index.js +6 -0
- package/dist/errors/retry.d.ts +92 -0
- package/dist/errors/retry.js +323 -0
- package/dist/errors/types.d.ts +321 -0
- package/dist/errors/types.js +584 -0
- package/dist/index.d.ts +32 -0
- package/dist/index.js +32 -0
- package/dist/interview/dependency-resolver.d.ts +11 -0
- package/dist/interview/dependency-resolver.js +32 -0
- package/dist/interview/interviewer.d.ts +232 -0
- package/dist/interview/interviewer.js +1939 -0
- package/dist/interview/mock-response-generator.d.ts +7 -0
- package/dist/interview/mock-response-generator.js +102 -0
- package/dist/interview/orchestrator.d.ts +237 -0
- package/dist/interview/orchestrator.js +1296 -0
- package/dist/interview/rate-limiter.d.ts +15 -0
- package/dist/interview/rate-limiter.js +55 -0
- package/dist/interview/response-validator.d.ts +10 -0
- package/dist/interview/response-validator.js +132 -0
- package/dist/interview/schema-inferrer.d.ts +8 -0
- package/dist/interview/schema-inferrer.js +71 -0
- package/dist/interview/schema-test-generator.d.ts +71 -0
- package/dist/interview/schema-test-generator.js +834 -0
- package/dist/interview/smart-value-generator.d.ts +155 -0
- package/dist/interview/smart-value-generator.js +554 -0
- package/dist/interview/stateful-test-runner.d.ts +19 -0
- package/dist/interview/stateful-test-runner.js +106 -0
- package/dist/interview/types.d.ts +561 -0
- package/dist/interview/types.js +2 -0
- package/dist/llm/anthropic.d.ts +41 -0
- package/dist/llm/anthropic.js +355 -0
- package/dist/llm/client.d.ts +123 -0
- package/dist/llm/client.js +42 -0
- package/dist/llm/factory.d.ts +38 -0
- package/dist/llm/factory.js +145 -0
- package/dist/llm/fallback.d.ts +140 -0
- package/dist/llm/fallback.js +379 -0
- package/dist/llm/index.d.ts +18 -0
- package/dist/llm/index.js +15 -0
- package/dist/llm/ollama.d.ts +37 -0
- package/dist/llm/ollama.js +330 -0
- package/dist/llm/openai.d.ts +25 -0
- package/dist/llm/openai.js +320 -0
- package/dist/llm/token-budget.d.ts +161 -0
- package/dist/llm/token-budget.js +395 -0
- package/dist/logging/logger.d.ts +70 -0
- package/dist/logging/logger.js +130 -0
- package/dist/metrics/collector.d.ts +106 -0
- package/dist/metrics/collector.js +547 -0
- package/dist/metrics/index.d.ts +7 -0
- package/dist/metrics/index.js +7 -0
- package/dist/metrics/prometheus.d.ts +20 -0
- package/dist/metrics/prometheus.js +241 -0
- package/dist/metrics/types.d.ts +209 -0
- package/dist/metrics/types.js +5 -0
- package/dist/persona/builtins.d.ts +54 -0
- package/dist/persona/builtins.js +219 -0
- package/dist/persona/index.d.ts +8 -0
- package/dist/persona/index.js +8 -0
- package/dist/persona/loader.d.ts +30 -0
- package/dist/persona/loader.js +190 -0
- package/dist/persona/types.d.ts +144 -0
- package/dist/persona/types.js +5 -0
- package/dist/persona/validation.d.ts +94 -0
- package/dist/persona/validation.js +332 -0
- package/dist/prompts/index.d.ts +5 -0
- package/dist/prompts/index.js +5 -0
- package/dist/prompts/templates.d.ts +180 -0
- package/dist/prompts/templates.js +431 -0
- package/dist/registry/client.d.ts +49 -0
- package/dist/registry/client.js +191 -0
- package/dist/registry/index.d.ts +7 -0
- package/dist/registry/index.js +6 -0
- package/dist/registry/types.d.ts +140 -0
- package/dist/registry/types.js +6 -0
- package/dist/scenarios/evaluator.d.ts +43 -0
- package/dist/scenarios/evaluator.js +206 -0
- package/dist/scenarios/index.d.ts +10 -0
- package/dist/scenarios/index.js +9 -0
- package/dist/scenarios/loader.d.ts +20 -0
- package/dist/scenarios/loader.js +285 -0
- package/dist/scenarios/types.d.ts +153 -0
- package/dist/scenarios/types.js +8 -0
- package/dist/security/index.d.ts +17 -0
- package/dist/security/index.js +18 -0
- package/dist/security/payloads.d.ts +61 -0
- package/dist/security/payloads.js +268 -0
- package/dist/security/security-tester.d.ts +42 -0
- package/dist/security/security-tester.js +582 -0
- package/dist/security/types.d.ts +166 -0
- package/dist/security/types.js +8 -0
- package/dist/transport/base-transport.d.ts +59 -0
- package/dist/transport/base-transport.js +38 -0
- package/dist/transport/http-transport.d.ts +67 -0
- package/dist/transport/http-transport.js +238 -0
- package/dist/transport/mcp-client.d.ts +141 -0
- package/dist/transport/mcp-client.js +496 -0
- package/dist/transport/sse-transport.d.ts +88 -0
- package/dist/transport/sse-transport.js +316 -0
- package/dist/transport/stdio-transport.d.ts +43 -0
- package/dist/transport/stdio-transport.js +238 -0
- package/dist/transport/types.d.ts +125 -0
- package/dist/transport/types.js +16 -0
- package/dist/utils/concurrency.d.ts +123 -0
- package/dist/utils/concurrency.js +213 -0
- package/dist/utils/formatters.d.ts +16 -0
- package/dist/utils/formatters.js +37 -0
- package/dist/utils/index.d.ts +8 -0
- package/dist/utils/index.js +8 -0
- package/dist/utils/jsonpath.d.ts +87 -0
- package/dist/utils/jsonpath.js +326 -0
- package/dist/utils/markdown.d.ts +113 -0
- package/dist/utils/markdown.js +265 -0
- package/dist/utils/network.d.ts +14 -0
- package/dist/utils/network.js +17 -0
- package/dist/utils/sanitize.d.ts +92 -0
- package/dist/utils/sanitize.js +191 -0
- package/dist/utils/semantic.d.ts +194 -0
- package/dist/utils/semantic.js +1051 -0
- package/dist/utils/smart-truncate.d.ts +94 -0
- package/dist/utils/smart-truncate.js +361 -0
- package/dist/utils/timeout.d.ts +153 -0
- package/dist/utils/timeout.js +205 -0
- package/dist/utils/yaml-parser.d.ts +58 -0
- package/dist/utils/yaml-parser.js +86 -0
- package/dist/validation/index.d.ts +32 -0
- package/dist/validation/index.js +32 -0
- package/dist/validation/semantic-test-generator.d.ts +50 -0
- package/dist/validation/semantic-test-generator.js +176 -0
- package/dist/validation/semantic-types.d.ts +66 -0
- package/dist/validation/semantic-types.js +94 -0
- package/dist/validation/semantic-validator.d.ts +38 -0
- package/dist/validation/semantic-validator.js +340 -0
- package/dist/verification/index.d.ts +6 -0
- package/dist/verification/index.js +5 -0
- package/dist/verification/types.d.ts +133 -0
- package/dist/verification/types.js +5 -0
- package/dist/verification/verifier.d.ts +30 -0
- package/dist/verification/verifier.js +309 -0
- package/dist/version.d.ts +19 -0
- package/dist/version.js +48 -0
- package/dist/workflow/auto-generator.d.ts +27 -0
- package/dist/workflow/auto-generator.js +513 -0
- package/dist/workflow/discovery.d.ts +40 -0
- package/dist/workflow/discovery.js +195 -0
- package/dist/workflow/executor.d.ts +82 -0
- package/dist/workflow/executor.js +611 -0
- package/dist/workflow/index.d.ts +10 -0
- package/dist/workflow/index.js +10 -0
- package/dist/workflow/loader.d.ts +24 -0
- package/dist/workflow/loader.js +194 -0
- package/dist/workflow/state-tracker.d.ts +98 -0
- package/dist/workflow/state-tracker.js +424 -0
- package/dist/workflow/types.d.ts +337 -0
- package/dist/workflow/types.js +5 -0
- package/package.json +94 -0
- package/schemas/bellwether-check.schema.json +651 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import { Command } from 'commander';
|
|
2
|
+
import { MCPClient } from '../../transport/mcp-client.js';
|
|
3
|
+
import { discover, summarizeDiscovery } from '../../discovery/discovery.js';
|
|
4
|
+
import { EXIT_CODES } from '../../constants.js';
|
|
5
|
+
import { loadConfig, ConfigNotFoundError } from '../../config/loader.js';
|
|
6
|
+
import * as output from '../output.js';
|
|
7
|
+
/**
|
|
8
|
+
* Action handler for the discover command.
|
|
9
|
+
*/
|
|
10
|
+
async function discoverAction(command, args, options) {
|
|
11
|
+
let config;
|
|
12
|
+
try {
|
|
13
|
+
config = loadConfig(options.config);
|
|
14
|
+
}
|
|
15
|
+
catch (error) {
|
|
16
|
+
if (error instanceof ConfigNotFoundError) {
|
|
17
|
+
output.error(error.message);
|
|
18
|
+
process.exit(EXIT_CODES.ERROR);
|
|
19
|
+
}
|
|
20
|
+
throw error;
|
|
21
|
+
}
|
|
22
|
+
const timeout = parseInt(options.timeout ?? String(config.discovery.timeout), 10);
|
|
23
|
+
const transportType = (options.transport ?? config.discovery.transport);
|
|
24
|
+
const isRemoteTransport = transportType === 'sse' || transportType === 'streamable-http';
|
|
25
|
+
const outputJson = options.json ? true : config.discovery.json;
|
|
26
|
+
const remoteUrl = options.url ?? config.discovery.url;
|
|
27
|
+
const sessionId = options.sessionId ?? config.discovery.sessionId;
|
|
28
|
+
// Validate transport options
|
|
29
|
+
if (isRemoteTransport && !remoteUrl) {
|
|
30
|
+
output.error(`Error: --url is required when using --transport ${transportType}`);
|
|
31
|
+
process.exit(EXIT_CODES.ERROR);
|
|
32
|
+
}
|
|
33
|
+
if (remoteUrl && !isRemoteTransport) {
|
|
34
|
+
output.error('Error: --url requires --transport sse or --transport streamable-http');
|
|
35
|
+
process.exit(EXIT_CODES.ERROR);
|
|
36
|
+
}
|
|
37
|
+
if (!isRemoteTransport && !command) {
|
|
38
|
+
output.error('Error: Server command is required for stdio transport');
|
|
39
|
+
process.exit(EXIT_CODES.ERROR);
|
|
40
|
+
}
|
|
41
|
+
const serverIdentifier = isRemoteTransport ? remoteUrl : `${command} ${args.join(' ')}`;
|
|
42
|
+
output.info(`Connecting to MCP server: ${serverIdentifier}`);
|
|
43
|
+
const client = new MCPClient({ timeout, transport: transportType });
|
|
44
|
+
try {
|
|
45
|
+
if (isRemoteTransport) {
|
|
46
|
+
await client.connectRemote(remoteUrl, {
|
|
47
|
+
transport: transportType,
|
|
48
|
+
sessionId,
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
else {
|
|
52
|
+
await client.connect(command, args);
|
|
53
|
+
}
|
|
54
|
+
output.info('Discovering capabilities...\n');
|
|
55
|
+
const result = await discover(client, command ?? options.url, args);
|
|
56
|
+
if (outputJson) {
|
|
57
|
+
output.json(result);
|
|
58
|
+
}
|
|
59
|
+
else {
|
|
60
|
+
output.info(summarizeDiscovery(result));
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
catch (error) {
|
|
64
|
+
output.error('Discovery failed: ' + (error instanceof Error ? error.message : String(error)));
|
|
65
|
+
process.exit(EXIT_CODES.ERROR);
|
|
66
|
+
}
|
|
67
|
+
finally {
|
|
68
|
+
await client.disconnect();
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
export const discoverCommand = new Command('discover')
|
|
72
|
+
.description('Discover MCP server capabilities (tools, prompts, resources)')
|
|
73
|
+
.argument('[command]', 'Command to start the MCP server (not required for remote)')
|
|
74
|
+
.argument('[args...]', 'Arguments to pass to the server')
|
|
75
|
+
.option('-c, --config <path>', 'Path to config file')
|
|
76
|
+
.option('--json', 'Output as JSON')
|
|
77
|
+
.option('--timeout <ms>', 'Connection timeout in milliseconds')
|
|
78
|
+
.option('--transport <type>', 'Transport type: stdio, sse, streamable-http')
|
|
79
|
+
.option('--url <url>', 'URL for remote MCP server (requires --transport sse or streamable-http)')
|
|
80
|
+
.option('--session-id <id>', 'Session ID for remote server authentication')
|
|
81
|
+
.action(discoverAction);
|
|
82
|
+
//# sourceMappingURL=discover.js.map
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Eval command - evaluate drift detection algorithm accuracy.
|
|
3
|
+
*
|
|
4
|
+
* Runs the semantic comparison algorithm against a golden dataset
|
|
5
|
+
* to measure precision, recall, and calibration metrics.
|
|
6
|
+
*/
|
|
7
|
+
import { Command } from 'commander';
|
|
8
|
+
export declare function createEvalCommand(): Command;
|
|
9
|
+
//# sourceMappingURL=eval.d.ts.map
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Eval command - evaluate drift detection algorithm accuracy.
|
|
3
|
+
*
|
|
4
|
+
* Runs the semantic comparison algorithm against a golden dataset
|
|
5
|
+
* to measure precision, recall, and calibration metrics.
|
|
6
|
+
*/
|
|
7
|
+
import { Command } from 'commander';
|
|
8
|
+
import { writeFileSync } from 'fs';
|
|
9
|
+
import chalk from 'chalk';
|
|
10
|
+
import { evaluate, formatEvaluationReport, getDatasetStatistics, createSummary, } from '../../baseline/evaluation/index.js';
|
|
11
|
+
import { updateCalibrationModel, calculateCalibrationError, } from '../../baseline/calibration.js';
|
|
12
|
+
import { checkOllamaEmbeddings } from '../../baseline/embeddings.js';
|
|
13
|
+
export function createEvalCommand() {
|
|
14
|
+
const evalCmd = new Command('eval')
|
|
15
|
+
.description('Evaluate drift detection algorithm accuracy')
|
|
16
|
+
.option('-c, --category <categories...>', 'Filter by category (security, limitation, assertion)')
|
|
17
|
+
.option('-t, --tags <tags...>', 'Filter by tags')
|
|
18
|
+
.option('--json', 'Output results as JSON')
|
|
19
|
+
.option('--verbose', 'Show individual test case results')
|
|
20
|
+
.option('--failures', 'Only show failed test cases')
|
|
21
|
+
.option('--stats', 'Show dataset statistics only')
|
|
22
|
+
.option('--update-calibration', 'Update calibration model from results')
|
|
23
|
+
.option('--export-calibration <path>', 'Export new calibration model to file')
|
|
24
|
+
.option('--check-embeddings', 'Check if Ollama embeddings are available')
|
|
25
|
+
.action(async (options) => {
|
|
26
|
+
await handleEval(options);
|
|
27
|
+
});
|
|
28
|
+
return evalCmd;
|
|
29
|
+
}
|
|
30
|
+
async function handleEval(options) {
|
|
31
|
+
// Check embeddings availability
|
|
32
|
+
if (options.checkEmbeddings) {
|
|
33
|
+
console.log(chalk.gray('Checking Ollama embeddings availability...'));
|
|
34
|
+
const status = await checkOllamaEmbeddings();
|
|
35
|
+
console.log('');
|
|
36
|
+
console.log(chalk.bold('Ollama Embedding Status'));
|
|
37
|
+
console.log('─'.repeat(40));
|
|
38
|
+
console.log(` Ollama available: ${status.available ? chalk.green('Yes') : chalk.red('No')}`);
|
|
39
|
+
console.log(` Model: ${status.modelName}`);
|
|
40
|
+
console.log(` Model available: ${status.hasModel ? chalk.green('Yes') : chalk.yellow('No')}`);
|
|
41
|
+
if (status.error) {
|
|
42
|
+
console.log(` ${chalk.red('Error')}: ${status.error}`);
|
|
43
|
+
}
|
|
44
|
+
if (status.available && !status.hasModel) {
|
|
45
|
+
console.log('');
|
|
46
|
+
console.log(chalk.gray('To enable embeddings, run:'));
|
|
47
|
+
console.log(chalk.cyan(' ollama pull nomic-embed-text'));
|
|
48
|
+
}
|
|
49
|
+
console.log('');
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
// Show dataset statistics only
|
|
53
|
+
if (options.stats) {
|
|
54
|
+
const stats = getDatasetStatistics();
|
|
55
|
+
if (options.json) {
|
|
56
|
+
console.log(JSON.stringify(stats, null, 2));
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
console.log('');
|
|
60
|
+
console.log(chalk.bold('Golden Dataset Statistics'));
|
|
61
|
+
console.log('─'.repeat(40));
|
|
62
|
+
console.log(` Total Cases: ${stats.totalCases}`);
|
|
63
|
+
console.log(` True Positives: ${stats.truePositives}`);
|
|
64
|
+
console.log(` True Negatives: ${stats.trueNegatives}`);
|
|
65
|
+
console.log('');
|
|
66
|
+
console.log(chalk.gray(' By Category:'));
|
|
67
|
+
console.log(` Security: ${stats.byCategory.security}`);
|
|
68
|
+
console.log(` Limitation: ${stats.byCategory.limitation}`);
|
|
69
|
+
console.log(` Assertion: ${stats.byCategory.assertion}`);
|
|
70
|
+
console.log('');
|
|
71
|
+
}
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
// Build evaluation options
|
|
75
|
+
const evalOptions = {
|
|
76
|
+
includeFactors: options.verbose,
|
|
77
|
+
};
|
|
78
|
+
if (options.category) {
|
|
79
|
+
evalOptions.categories = options.category;
|
|
80
|
+
}
|
|
81
|
+
if (options.tags) {
|
|
82
|
+
evalOptions.tags = options.tags;
|
|
83
|
+
}
|
|
84
|
+
// Run evaluation
|
|
85
|
+
console.log(chalk.gray('Running evaluation...'));
|
|
86
|
+
const result = evaluate(evalOptions);
|
|
87
|
+
// Output as JSON
|
|
88
|
+
if (options.json) {
|
|
89
|
+
console.log(JSON.stringify(result, null, 2));
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
// Display formatted report
|
|
93
|
+
console.log(formatEvaluationReport(result));
|
|
94
|
+
// Show failures detail
|
|
95
|
+
if (options.failures || options.verbose) {
|
|
96
|
+
if (result.failures.length > 0) {
|
|
97
|
+
console.log(chalk.bold('\n Failure Details'));
|
|
98
|
+
console.log(' ' + '─'.repeat(58));
|
|
99
|
+
for (const failure of result.failures) {
|
|
100
|
+
const tc = failure.testCase;
|
|
101
|
+
const icon = failure.failureType === 'false_positive' ? '⚠️' : '❌';
|
|
102
|
+
const typeColor = failure.failureType === 'false_positive' ? chalk.yellow : chalk.red;
|
|
103
|
+
console.log('');
|
|
104
|
+
console.log(` ${icon} ${chalk.bold(tc.id)} - ${typeColor(failure.failureType || 'unknown')}`);
|
|
105
|
+
console.log(chalk.gray(` Category: ${tc.category} | Tool: ${tc.toolName}`));
|
|
106
|
+
console.log(chalk.gray(` Expected: ${tc.expectedMatch ? 'match' : 'no match'} | Got: ${failure.actualMatch ? 'match' : 'no match'}`));
|
|
107
|
+
console.log(chalk.gray(` Confidence: ${failure.actualConfidence}%`));
|
|
108
|
+
console.log('');
|
|
109
|
+
console.log(` Text 1: "${truncate(tc.text1, 60)}"`);
|
|
110
|
+
console.log(` Text 2: "${truncate(tc.text2, 60)}"`);
|
|
111
|
+
console.log(chalk.gray(` Reasoning: ${tc.reasoning}`));
|
|
112
|
+
if (options.verbose && failure.confidenceFactors) {
|
|
113
|
+
console.log(chalk.gray('\n Confidence Factors:'));
|
|
114
|
+
for (const factor of failure.confidenceFactors) {
|
|
115
|
+
console.log(chalk.gray(` - ${factor.name}: ${factor.value} (weight: ${factor.weight})`));
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
console.log('');
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
// Summary line for CI
|
|
123
|
+
const summary = createSummary(result);
|
|
124
|
+
const passedAll = result.failures.length === 0;
|
|
125
|
+
const statusLine = passedAll
|
|
126
|
+
? chalk.green(`✓ All ${result.totalCases} test cases passed`)
|
|
127
|
+
: chalk.yellow(`⚠ ${result.failures.length}/${result.totalCases} test cases failed`);
|
|
128
|
+
console.log(statusLine);
|
|
129
|
+
console.log(chalk.gray(` Accuracy: ${summary.accuracy} | Precision: ${summary.precision} | Recall: ${summary.recall}`));
|
|
130
|
+
console.log('');
|
|
131
|
+
// Update calibration model from results
|
|
132
|
+
if (options.updateCalibration || options.exportCalibration) {
|
|
133
|
+
// Convert test results to calibration format
|
|
134
|
+
const calibrationData = result.testResults.map(tr => ({
|
|
135
|
+
predictedConfidence: tr.actualConfidence,
|
|
136
|
+
wasCorrect: tr.actualMatch === tr.testCase.expectedMatch,
|
|
137
|
+
}));
|
|
138
|
+
const newCalibration = updateCalibrationModel(calibrationData);
|
|
139
|
+
const calibrationError = calculateCalibrationError(newCalibration);
|
|
140
|
+
console.log(chalk.bold('Calibration Update'));
|
|
141
|
+
console.log('─'.repeat(40));
|
|
142
|
+
console.log(` Calibration error: ${calibrationError}%`);
|
|
143
|
+
console.log('');
|
|
144
|
+
console.log(' New calibration buckets:');
|
|
145
|
+
for (const bucket of newCalibration) {
|
|
146
|
+
const range = `${bucket.min}-${bucket.max}%`;
|
|
147
|
+
console.log(` ${range.padEnd(10)} → ${bucket.calibratedAccuracy}% accuracy (n=${bucket.sampleCount})`);
|
|
148
|
+
}
|
|
149
|
+
if (options.exportCalibration) {
|
|
150
|
+
const calibrationContent = generateCalibrationCode(newCalibration);
|
|
151
|
+
writeFileSync(options.exportCalibration, calibrationContent);
|
|
152
|
+
console.log('');
|
|
153
|
+
console.log(chalk.green(`✓ Calibration exported to: ${options.exportCalibration}`));
|
|
154
|
+
}
|
|
155
|
+
console.log('');
|
|
156
|
+
}
|
|
157
|
+
// Exit with error code if failures and in CI mode
|
|
158
|
+
if (process.env.CI && result.failures.length > 0) {
|
|
159
|
+
process.exit(1);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Generate TypeScript code for a new calibration model.
|
|
164
|
+
*/
|
|
165
|
+
function generateCalibrationCode(buckets) {
|
|
166
|
+
const lines = [];
|
|
167
|
+
lines.push('/**');
|
|
168
|
+
lines.push(' * Updated Calibration Model');
|
|
169
|
+
lines.push(` * Generated: ${new Date().toISOString()}`);
|
|
170
|
+
lines.push(' */');
|
|
171
|
+
lines.push('');
|
|
172
|
+
lines.push('import type { CalibrationBucket } from \'./calibration.js\';');
|
|
173
|
+
lines.push('');
|
|
174
|
+
lines.push('export const UPDATED_CALIBRATION_MODEL: CalibrationBucket[] = [');
|
|
175
|
+
for (const bucket of buckets) {
|
|
176
|
+
lines.push(` { min: ${bucket.min}, max: ${bucket.max}, calibratedAccuracy: ${bucket.calibratedAccuracy}, sampleCount: ${bucket.sampleCount} },`);
|
|
177
|
+
}
|
|
178
|
+
lines.push('];');
|
|
179
|
+
lines.push('');
|
|
180
|
+
return lines.join('\n');
|
|
181
|
+
}
|
|
182
|
+
function truncate(str, maxLen) {
|
|
183
|
+
if (str.length <= maxLen)
|
|
184
|
+
return str;
|
|
185
|
+
return str.slice(0, maxLen - 3) + '...';
|
|
186
|
+
}
|
|
187
|
+
//# sourceMappingURL=eval.js.map
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Explore command - LLM-powered behavioral exploration for MCP servers.
|
|
3
|
+
*
|
|
4
|
+
* Purpose: Deep exploration and documentation of MCP server behavior.
|
|
5
|
+
* Output: Documentation + JSON report (filenames configurable via output.files)
|
|
6
|
+
* Baseline: None (use 'bellwether check' for drift detection)
|
|
7
|
+
* LLM: Required (OpenAI, Anthropic, or Ollama)
|
|
8
|
+
*/
|
|
9
|
+
import { Command } from 'commander';
|
|
10
|
+
export declare const exploreCommand: Command;
|
|
11
|
+
//# sourceMappingURL=explore.d.ts.map
|