@dotsetlabs/bellwether 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +291 -0
- package/LICENSE +21 -0
- package/README.md +739 -0
- package/dist/auth/credentials.d.ts +64 -0
- package/dist/auth/credentials.js +218 -0
- package/dist/auth/index.d.ts +6 -0
- package/dist/auth/index.js +6 -0
- package/dist/auth/keychain.d.ts +64 -0
- package/dist/auth/keychain.js +268 -0
- package/dist/baseline/ab-testing.d.ts +80 -0
- package/dist/baseline/ab-testing.js +236 -0
- package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
- package/dist/baseline/ai-compatibility-scorer.js +606 -0
- package/dist/baseline/calibration.d.ts +77 -0
- package/dist/baseline/calibration.js +136 -0
- package/dist/baseline/category-matching.d.ts +85 -0
- package/dist/baseline/category-matching.js +289 -0
- package/dist/baseline/change-impact-analyzer.d.ts +98 -0
- package/dist/baseline/change-impact-analyzer.js +592 -0
- package/dist/baseline/comparator.d.ts +64 -0
- package/dist/baseline/comparator.js +916 -0
- package/dist/baseline/confidence.d.ts +55 -0
- package/dist/baseline/confidence.js +122 -0
- package/dist/baseline/converter.d.ts +61 -0
- package/dist/baseline/converter.js +585 -0
- package/dist/baseline/dependency-analyzer.d.ts +89 -0
- package/dist/baseline/dependency-analyzer.js +567 -0
- package/dist/baseline/deprecation-tracker.d.ts +133 -0
- package/dist/baseline/deprecation-tracker.js +322 -0
- package/dist/baseline/diff.d.ts +55 -0
- package/dist/baseline/diff.js +1584 -0
- package/dist/baseline/documentation-scorer.d.ts +205 -0
- package/dist/baseline/documentation-scorer.js +466 -0
- package/dist/baseline/embeddings.d.ts +118 -0
- package/dist/baseline/embeddings.js +251 -0
- package/dist/baseline/error-analyzer.d.ts +198 -0
- package/dist/baseline/error-analyzer.js +721 -0
- package/dist/baseline/evaluation/evaluator.d.ts +42 -0
- package/dist/baseline/evaluation/evaluator.js +323 -0
- package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
- package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
- package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
- package/dist/baseline/evaluation/golden-dataset.js +717 -0
- package/dist/baseline/evaluation/index.d.ts +15 -0
- package/dist/baseline/evaluation/index.js +15 -0
- package/dist/baseline/evaluation/types.d.ts +186 -0
- package/dist/baseline/evaluation/types.js +8 -0
- package/dist/baseline/external-dependency-detector.d.ts +181 -0
- package/dist/baseline/external-dependency-detector.js +524 -0
- package/dist/baseline/golden-output.d.ts +162 -0
- package/dist/baseline/golden-output.js +636 -0
- package/dist/baseline/health-scorer.d.ts +174 -0
- package/dist/baseline/health-scorer.js +451 -0
- package/dist/baseline/incremental-checker.d.ts +97 -0
- package/dist/baseline/incremental-checker.js +174 -0
- package/dist/baseline/index.d.ts +31 -0
- package/dist/baseline/index.js +42 -0
- package/dist/baseline/migration-generator.d.ts +137 -0
- package/dist/baseline/migration-generator.js +554 -0
- package/dist/baseline/migrations.d.ts +60 -0
- package/dist/baseline/migrations.js +197 -0
- package/dist/baseline/performance-tracker.d.ts +214 -0
- package/dist/baseline/performance-tracker.js +577 -0
- package/dist/baseline/pr-comment-generator.d.ts +117 -0
- package/dist/baseline/pr-comment-generator.js +546 -0
- package/dist/baseline/response-fingerprint.d.ts +127 -0
- package/dist/baseline/response-fingerprint.js +728 -0
- package/dist/baseline/response-schema-tracker.d.ts +129 -0
- package/dist/baseline/response-schema-tracker.js +420 -0
- package/dist/baseline/risk-scorer.d.ts +54 -0
- package/dist/baseline/risk-scorer.js +434 -0
- package/dist/baseline/saver.d.ts +89 -0
- package/dist/baseline/saver.js +554 -0
- package/dist/baseline/scenario-generator.d.ts +151 -0
- package/dist/baseline/scenario-generator.js +905 -0
- package/dist/baseline/schema-compare.d.ts +86 -0
- package/dist/baseline/schema-compare.js +557 -0
- package/dist/baseline/schema-evolution.d.ts +189 -0
- package/dist/baseline/schema-evolution.js +467 -0
- package/dist/baseline/semantic.d.ts +203 -0
- package/dist/baseline/semantic.js +908 -0
- package/dist/baseline/synonyms.d.ts +60 -0
- package/dist/baseline/synonyms.js +386 -0
- package/dist/baseline/telemetry.d.ts +165 -0
- package/dist/baseline/telemetry.js +294 -0
- package/dist/baseline/test-pruner.d.ts +120 -0
- package/dist/baseline/test-pruner.js +387 -0
- package/dist/baseline/types.d.ts +449 -0
- package/dist/baseline/types.js +5 -0
- package/dist/baseline/version.d.ts +138 -0
- package/dist/baseline/version.js +206 -0
- package/dist/cache/index.d.ts +5 -0
- package/dist/cache/index.js +5 -0
- package/dist/cache/response-cache.d.ts +151 -0
- package/dist/cache/response-cache.js +287 -0
- package/dist/ci/index.d.ts +60 -0
- package/dist/ci/index.js +342 -0
- package/dist/cli/commands/auth.d.ts +12 -0
- package/dist/cli/commands/auth.js +352 -0
- package/dist/cli/commands/badge.d.ts +3 -0
- package/dist/cli/commands/badge.js +74 -0
- package/dist/cli/commands/baseline-accept.d.ts +15 -0
- package/dist/cli/commands/baseline-accept.js +178 -0
- package/dist/cli/commands/baseline-migrate.d.ts +12 -0
- package/dist/cli/commands/baseline-migrate.js +164 -0
- package/dist/cli/commands/baseline.d.ts +14 -0
- package/dist/cli/commands/baseline.js +449 -0
- package/dist/cli/commands/beta.d.ts +10 -0
- package/dist/cli/commands/beta.js +231 -0
- package/dist/cli/commands/check.d.ts +11 -0
- package/dist/cli/commands/check.js +820 -0
- package/dist/cli/commands/cloud/badge.d.ts +3 -0
- package/dist/cli/commands/cloud/badge.js +74 -0
- package/dist/cli/commands/cloud/diff.d.ts +6 -0
- package/dist/cli/commands/cloud/diff.js +79 -0
- package/dist/cli/commands/cloud/history.d.ts +6 -0
- package/dist/cli/commands/cloud/history.js +102 -0
- package/dist/cli/commands/cloud/link.d.ts +9 -0
- package/dist/cli/commands/cloud/link.js +119 -0
- package/dist/cli/commands/cloud/login.d.ts +7 -0
- package/dist/cli/commands/cloud/login.js +499 -0
- package/dist/cli/commands/cloud/projects.d.ts +6 -0
- package/dist/cli/commands/cloud/projects.js +44 -0
- package/dist/cli/commands/cloud/shared.d.ts +7 -0
- package/dist/cli/commands/cloud/shared.js +42 -0
- package/dist/cli/commands/cloud/teams.d.ts +8 -0
- package/dist/cli/commands/cloud/teams.js +169 -0
- package/dist/cli/commands/cloud/upload.d.ts +8 -0
- package/dist/cli/commands/cloud/upload.js +181 -0
- package/dist/cli/commands/contract.d.ts +11 -0
- package/dist/cli/commands/contract.js +280 -0
- package/dist/cli/commands/discover.d.ts +3 -0
- package/dist/cli/commands/discover.js +82 -0
- package/dist/cli/commands/eval.d.ts +9 -0
- package/dist/cli/commands/eval.js +187 -0
- package/dist/cli/commands/explore.d.ts +11 -0
- package/dist/cli/commands/explore.js +437 -0
- package/dist/cli/commands/feedback.d.ts +9 -0
- package/dist/cli/commands/feedback.js +174 -0
- package/dist/cli/commands/golden.d.ts +12 -0
- package/dist/cli/commands/golden.js +407 -0
- package/dist/cli/commands/history.d.ts +10 -0
- package/dist/cli/commands/history.js +202 -0
- package/dist/cli/commands/init.d.ts +9 -0
- package/dist/cli/commands/init.js +219 -0
- package/dist/cli/commands/interview.d.ts +3 -0
- package/dist/cli/commands/interview.js +903 -0
- package/dist/cli/commands/link.d.ts +10 -0
- package/dist/cli/commands/link.js +169 -0
- package/dist/cli/commands/login.d.ts +7 -0
- package/dist/cli/commands/login.js +499 -0
- package/dist/cli/commands/preset.d.ts +33 -0
- package/dist/cli/commands/preset.js +297 -0
- package/dist/cli/commands/profile.d.ts +33 -0
- package/dist/cli/commands/profile.js +286 -0
- package/dist/cli/commands/registry.d.ts +11 -0
- package/dist/cli/commands/registry.js +146 -0
- package/dist/cli/commands/shared.d.ts +79 -0
- package/dist/cli/commands/shared.js +196 -0
- package/dist/cli/commands/teams.d.ts +8 -0
- package/dist/cli/commands/teams.js +169 -0
- package/dist/cli/commands/test.d.ts +9 -0
- package/dist/cli/commands/test.js +500 -0
- package/dist/cli/commands/upload.d.ts +8 -0
- package/dist/cli/commands/upload.js +223 -0
- package/dist/cli/commands/validate-config.d.ts +6 -0
- package/dist/cli/commands/validate-config.js +35 -0
- package/dist/cli/commands/verify.d.ts +11 -0
- package/dist/cli/commands/verify.js +283 -0
- package/dist/cli/commands/watch.d.ts +12 -0
- package/dist/cli/commands/watch.js +253 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.js +178 -0
- package/dist/cli/interactive.d.ts +47 -0
- package/dist/cli/interactive.js +216 -0
- package/dist/cli/output/terminal-reporter.d.ts +19 -0
- package/dist/cli/output/terminal-reporter.js +104 -0
- package/dist/cli/output.d.ts +226 -0
- package/dist/cli/output.js +438 -0
- package/dist/cli/utils/env.d.ts +5 -0
- package/dist/cli/utils/env.js +14 -0
- package/dist/cli/utils/progress.d.ts +59 -0
- package/dist/cli/utils/progress.js +206 -0
- package/dist/cli/utils/server-context.d.ts +10 -0
- package/dist/cli/utils/server-context.js +36 -0
- package/dist/cloud/auth.d.ts +144 -0
- package/dist/cloud/auth.js +374 -0
- package/dist/cloud/client.d.ts +24 -0
- package/dist/cloud/client.js +65 -0
- package/dist/cloud/http-client.d.ts +38 -0
- package/dist/cloud/http-client.js +215 -0
- package/dist/cloud/index.d.ts +23 -0
- package/dist/cloud/index.js +25 -0
- package/dist/cloud/mock-client.d.ts +107 -0
- package/dist/cloud/mock-client.js +545 -0
- package/dist/cloud/types.d.ts +515 -0
- package/dist/cloud/types.js +15 -0
- package/dist/config/defaults.d.ts +160 -0
- package/dist/config/defaults.js +169 -0
- package/dist/config/loader.d.ts +24 -0
- package/dist/config/loader.js +122 -0
- package/dist/config/template.d.ts +42 -0
- package/dist/config/template.js +647 -0
- package/dist/config/validator.d.ts +2112 -0
- package/dist/config/validator.js +658 -0
- package/dist/constants/cloud.d.ts +107 -0
- package/dist/constants/cloud.js +110 -0
- package/dist/constants/core.d.ts +521 -0
- package/dist/constants/core.js +556 -0
- package/dist/constants/testing.d.ts +1283 -0
- package/dist/constants/testing.js +1568 -0
- package/dist/constants.d.ts +10 -0
- package/dist/constants.js +10 -0
- package/dist/contract/index.d.ts +6 -0
- package/dist/contract/index.js +5 -0
- package/dist/contract/validator.d.ts +177 -0
- package/dist/contract/validator.js +574 -0
- package/dist/cost/index.d.ts +6 -0
- package/dist/cost/index.js +5 -0
- package/dist/cost/tracker.d.ts +134 -0
- package/dist/cost/tracker.js +313 -0
- package/dist/discovery/discovery.d.ts +16 -0
- package/dist/discovery/discovery.js +173 -0
- package/dist/discovery/types.d.ts +51 -0
- package/dist/discovery/types.js +2 -0
- package/dist/docs/agents.d.ts +3 -0
- package/dist/docs/agents.js +995 -0
- package/dist/docs/contract.d.ts +51 -0
- package/dist/docs/contract.js +1681 -0
- package/dist/docs/generator.d.ts +4 -0
- package/dist/docs/generator.js +4 -0
- package/dist/docs/html-reporter.d.ts +9 -0
- package/dist/docs/html-reporter.js +757 -0
- package/dist/docs/index.d.ts +10 -0
- package/dist/docs/index.js +11 -0
- package/dist/docs/junit-reporter.d.ts +18 -0
- package/dist/docs/junit-reporter.js +210 -0
- package/dist/docs/report.d.ts +14 -0
- package/dist/docs/report.js +44 -0
- package/dist/docs/sarif-reporter.d.ts +19 -0
- package/dist/docs/sarif-reporter.js +335 -0
- package/dist/docs/shared.d.ts +35 -0
- package/dist/docs/shared.js +162 -0
- package/dist/docs/templates.d.ts +12 -0
- package/dist/docs/templates.js +76 -0
- package/dist/errors/index.d.ts +6 -0
- package/dist/errors/index.js +6 -0
- package/dist/errors/retry.d.ts +92 -0
- package/dist/errors/retry.js +323 -0
- package/dist/errors/types.d.ts +321 -0
- package/dist/errors/types.js +584 -0
- package/dist/index.d.ts +32 -0
- package/dist/index.js +32 -0
- package/dist/interview/dependency-resolver.d.ts +11 -0
- package/dist/interview/dependency-resolver.js +32 -0
- package/dist/interview/interviewer.d.ts +232 -0
- package/dist/interview/interviewer.js +1939 -0
- package/dist/interview/mock-response-generator.d.ts +7 -0
- package/dist/interview/mock-response-generator.js +102 -0
- package/dist/interview/orchestrator.d.ts +237 -0
- package/dist/interview/orchestrator.js +1296 -0
- package/dist/interview/rate-limiter.d.ts +15 -0
- package/dist/interview/rate-limiter.js +55 -0
- package/dist/interview/response-validator.d.ts +10 -0
- package/dist/interview/response-validator.js +132 -0
- package/dist/interview/schema-inferrer.d.ts +8 -0
- package/dist/interview/schema-inferrer.js +71 -0
- package/dist/interview/schema-test-generator.d.ts +71 -0
- package/dist/interview/schema-test-generator.js +834 -0
- package/dist/interview/smart-value-generator.d.ts +155 -0
- package/dist/interview/smart-value-generator.js +554 -0
- package/dist/interview/stateful-test-runner.d.ts +19 -0
- package/dist/interview/stateful-test-runner.js +106 -0
- package/dist/interview/types.d.ts +561 -0
- package/dist/interview/types.js +2 -0
- package/dist/llm/anthropic.d.ts +41 -0
- package/dist/llm/anthropic.js +355 -0
- package/dist/llm/client.d.ts +123 -0
- package/dist/llm/client.js +42 -0
- package/dist/llm/factory.d.ts +38 -0
- package/dist/llm/factory.js +145 -0
- package/dist/llm/fallback.d.ts +140 -0
- package/dist/llm/fallback.js +379 -0
- package/dist/llm/index.d.ts +18 -0
- package/dist/llm/index.js +15 -0
- package/dist/llm/ollama.d.ts +37 -0
- package/dist/llm/ollama.js +330 -0
- package/dist/llm/openai.d.ts +25 -0
- package/dist/llm/openai.js +320 -0
- package/dist/llm/token-budget.d.ts +161 -0
- package/dist/llm/token-budget.js +395 -0
- package/dist/logging/logger.d.ts +70 -0
- package/dist/logging/logger.js +130 -0
- package/dist/metrics/collector.d.ts +106 -0
- package/dist/metrics/collector.js +547 -0
- package/dist/metrics/index.d.ts +7 -0
- package/dist/metrics/index.js +7 -0
- package/dist/metrics/prometheus.d.ts +20 -0
- package/dist/metrics/prometheus.js +241 -0
- package/dist/metrics/types.d.ts +209 -0
- package/dist/metrics/types.js +5 -0
- package/dist/persona/builtins.d.ts +54 -0
- package/dist/persona/builtins.js +219 -0
- package/dist/persona/index.d.ts +8 -0
- package/dist/persona/index.js +8 -0
- package/dist/persona/loader.d.ts +30 -0
- package/dist/persona/loader.js +190 -0
- package/dist/persona/types.d.ts +144 -0
- package/dist/persona/types.js +5 -0
- package/dist/persona/validation.d.ts +94 -0
- package/dist/persona/validation.js +332 -0
- package/dist/prompts/index.d.ts +5 -0
- package/dist/prompts/index.js +5 -0
- package/dist/prompts/templates.d.ts +180 -0
- package/dist/prompts/templates.js +431 -0
- package/dist/registry/client.d.ts +49 -0
- package/dist/registry/client.js +191 -0
- package/dist/registry/index.d.ts +7 -0
- package/dist/registry/index.js +6 -0
- package/dist/registry/types.d.ts +140 -0
- package/dist/registry/types.js +6 -0
- package/dist/scenarios/evaluator.d.ts +43 -0
- package/dist/scenarios/evaluator.js +206 -0
- package/dist/scenarios/index.d.ts +10 -0
- package/dist/scenarios/index.js +9 -0
- package/dist/scenarios/loader.d.ts +20 -0
- package/dist/scenarios/loader.js +285 -0
- package/dist/scenarios/types.d.ts +153 -0
- package/dist/scenarios/types.js +8 -0
- package/dist/security/index.d.ts +17 -0
- package/dist/security/index.js +18 -0
- package/dist/security/payloads.d.ts +61 -0
- package/dist/security/payloads.js +268 -0
- package/dist/security/security-tester.d.ts +42 -0
- package/dist/security/security-tester.js +582 -0
- package/dist/security/types.d.ts +166 -0
- package/dist/security/types.js +8 -0
- package/dist/transport/base-transport.d.ts +59 -0
- package/dist/transport/base-transport.js +38 -0
- package/dist/transport/http-transport.d.ts +67 -0
- package/dist/transport/http-transport.js +238 -0
- package/dist/transport/mcp-client.d.ts +141 -0
- package/dist/transport/mcp-client.js +496 -0
- package/dist/transport/sse-transport.d.ts +88 -0
- package/dist/transport/sse-transport.js +316 -0
- package/dist/transport/stdio-transport.d.ts +43 -0
- package/dist/transport/stdio-transport.js +238 -0
- package/dist/transport/types.d.ts +125 -0
- package/dist/transport/types.js +16 -0
- package/dist/utils/concurrency.d.ts +123 -0
- package/dist/utils/concurrency.js +213 -0
- package/dist/utils/formatters.d.ts +16 -0
- package/dist/utils/formatters.js +37 -0
- package/dist/utils/index.d.ts +8 -0
- package/dist/utils/index.js +8 -0
- package/dist/utils/jsonpath.d.ts +87 -0
- package/dist/utils/jsonpath.js +326 -0
- package/dist/utils/markdown.d.ts +113 -0
- package/dist/utils/markdown.js +265 -0
- package/dist/utils/network.d.ts +14 -0
- package/dist/utils/network.js +17 -0
- package/dist/utils/sanitize.d.ts +92 -0
- package/dist/utils/sanitize.js +191 -0
- package/dist/utils/semantic.d.ts +194 -0
- package/dist/utils/semantic.js +1051 -0
- package/dist/utils/smart-truncate.d.ts +94 -0
- package/dist/utils/smart-truncate.js +361 -0
- package/dist/utils/timeout.d.ts +153 -0
- package/dist/utils/timeout.js +205 -0
- package/dist/utils/yaml-parser.d.ts +58 -0
- package/dist/utils/yaml-parser.js +86 -0
- package/dist/validation/index.d.ts +32 -0
- package/dist/validation/index.js +32 -0
- package/dist/validation/semantic-test-generator.d.ts +50 -0
- package/dist/validation/semantic-test-generator.js +176 -0
- package/dist/validation/semantic-types.d.ts +66 -0
- package/dist/validation/semantic-types.js +94 -0
- package/dist/validation/semantic-validator.d.ts +38 -0
- package/dist/validation/semantic-validator.js +340 -0
- package/dist/verification/index.d.ts +6 -0
- package/dist/verification/index.js +5 -0
- package/dist/verification/types.d.ts +133 -0
- package/dist/verification/types.js +5 -0
- package/dist/verification/verifier.d.ts +30 -0
- package/dist/verification/verifier.js +309 -0
- package/dist/version.d.ts +19 -0
- package/dist/version.js +48 -0
- package/dist/workflow/auto-generator.d.ts +27 -0
- package/dist/workflow/auto-generator.js +513 -0
- package/dist/workflow/discovery.d.ts +40 -0
- package/dist/workflow/discovery.js +195 -0
- package/dist/workflow/executor.d.ts +82 -0
- package/dist/workflow/executor.js +611 -0
- package/dist/workflow/index.d.ts +10 -0
- package/dist/workflow/index.js +10 -0
- package/dist/workflow/loader.d.ts +24 -0
- package/dist/workflow/loader.js +194 -0
- package/dist/workflow/state-tracker.d.ts +98 -0
- package/dist/workflow/state-tracker.js +424 -0
- package/dist/workflow/types.d.ts +337 -0
- package/dist/workflow/types.js +5 -0
- package/package.json +94 -0
- package/schemas/bellwether-check.schema.json +651 -0
|
@@ -0,0 +1,437 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Explore command - LLM-powered behavioral exploration for MCP servers.
|
|
3
|
+
*
|
|
4
|
+
* Purpose: Deep exploration and documentation of MCP server behavior.
|
|
5
|
+
* Output: Documentation + JSON report (filenames configurable via output.files)
|
|
6
|
+
* Baseline: None (use 'bellwether check' for drift detection)
|
|
7
|
+
* LLM: Required (OpenAI, Anthropic, or Ollama)
|
|
8
|
+
*/
|
|
9
|
+
import { Command } from 'commander';
|
|
10
|
+
import { writeFileSync, existsSync, mkdirSync } from 'fs';
|
|
11
|
+
import { join } from 'path';
|
|
12
|
+
import { MCPClient } from '../../transport/mcp-client.js';
|
|
13
|
+
import { discover } from '../../discovery/discovery.js';
|
|
14
|
+
import { Interviewer } from '../../interview/interviewer.js';
|
|
15
|
+
import { generateAgentsMd, generateJsonReport } from '../../docs/generator.js';
|
|
16
|
+
import { loadConfig, ConfigNotFoundError } from '../../config/loader.js';
|
|
17
|
+
import { validateConfigForExplore } from '../../config/validator.js';
|
|
18
|
+
import { CostTracker, estimateInterviewCost, estimateInterviewTime, formatCostAndTimeEstimate, suggestOptimizations, formatOptimizationSuggestions, } from '../../cost/index.js';
|
|
19
|
+
import { getMetricsCollector, resetMetricsCollector } from '../../metrics/collector.js';
|
|
20
|
+
import { EXIT_CODES, WORKFLOW, PATHS } from '../../constants.js';
|
|
21
|
+
import { FallbackLLMClient } from '../../llm/fallback.js';
|
|
22
|
+
import { getGlobalCache, resetGlobalCache } from '../../cache/response-cache.js';
|
|
23
|
+
import { InterviewProgressBar, formatExploreBanner } from '../utils/progress.js';
|
|
24
|
+
import { parsePersonas } from '../../persona/builtins.js';
|
|
25
|
+
import { loadScenariosFromFile, tryLoadDefaultScenarios, DEFAULT_SCENARIOS_FILE } from '../../scenarios/index.js';
|
|
26
|
+
import { loadWorkflowsFromFile, tryLoadDefaultWorkflows, DEFAULT_WORKFLOWS_FILE } from '../../workflow/loader.js';
|
|
27
|
+
import * as output from '../output.js';
|
|
28
|
+
import { StreamingDisplay } from '../output.js';
|
|
29
|
+
import { suppressLogs, restoreLogLevel, configureLogger } from '../../logging/logger.js';
|
|
30
|
+
import { extractServerContextFromArgs } from '../utils/server-context.js';
|
|
31
|
+
import { isCI } from '../utils/env.js';
|
|
32
|
+
/**
|
|
33
|
+
* Wrapper to parse personas with warning output.
|
|
34
|
+
*/
|
|
35
|
+
function parsePersonasWithWarning(personaList) {
|
|
36
|
+
return parsePersonas(personaList, (unknownName, validNames) => {
|
|
37
|
+
output.warn(`Unknown persona: ${unknownName}. Available: ${validNames.join(', ')}`);
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
export const exploreCommand = new Command('explore')
|
|
41
|
+
.description('Explore MCP server behavior with LLM-powered testing')
|
|
42
|
+
.argument('[server-command]', 'Server command (overrides config)')
|
|
43
|
+
.argument('[args...]', 'Server arguments')
|
|
44
|
+
.option('-c, --config <path>', 'Path to config file', PATHS.DEFAULT_CONFIG_FILENAME)
|
|
45
|
+
.action(async (serverCommandArg, serverArgs, options) => {
|
|
46
|
+
// Load configuration
|
|
47
|
+
let config;
|
|
48
|
+
try {
|
|
49
|
+
config = loadConfig(options.config);
|
|
50
|
+
}
|
|
51
|
+
catch (error) {
|
|
52
|
+
if (error instanceof ConfigNotFoundError) {
|
|
53
|
+
output.error(error.message);
|
|
54
|
+
process.exit(EXIT_CODES.ERROR);
|
|
55
|
+
}
|
|
56
|
+
throw error;
|
|
57
|
+
}
|
|
58
|
+
// Determine server command (CLI arg overrides config)
|
|
59
|
+
const serverCommand = serverCommandArg || config.server.command;
|
|
60
|
+
const args = serverArgs.length > 0 ? serverArgs : config.server.args;
|
|
61
|
+
// Validate config for explore
|
|
62
|
+
try {
|
|
63
|
+
validateConfigForExplore(config, serverCommand);
|
|
64
|
+
}
|
|
65
|
+
catch (error) {
|
|
66
|
+
output.error(error instanceof Error ? error.message : String(error));
|
|
67
|
+
process.exit(EXIT_CODES.ERROR);
|
|
68
|
+
}
|
|
69
|
+
// Extract settings from config (CLI options override config)
|
|
70
|
+
const timeout = config.server.timeout;
|
|
71
|
+
const outputDir = config.output.dir;
|
|
72
|
+
const docsDir = config.output.docsDir;
|
|
73
|
+
const cacheEnabled = config.cache.enabled;
|
|
74
|
+
const verbose = config.logging.verbose;
|
|
75
|
+
const logLevel = config.logging.level;
|
|
76
|
+
if (!process.env.BELLWETHER_LOG_OVERRIDE) {
|
|
77
|
+
const effectiveLogLevel = verbose ? logLevel : 'silent';
|
|
78
|
+
configureLogger({ level: effectiveLogLevel });
|
|
79
|
+
}
|
|
80
|
+
// Parse personas from config (using explore section)
|
|
81
|
+
const selectedPersonas = parsePersonasWithWarning(config.explore.personas);
|
|
82
|
+
const maxQuestions = config.explore.maxQuestionsPerTool;
|
|
83
|
+
const parallelPersonas = config.explore.parallelPersonas;
|
|
84
|
+
const personaConcurrency = config.explore.personaConcurrency;
|
|
85
|
+
// Get LLM settings from config
|
|
86
|
+
const provider = config.llm.provider;
|
|
87
|
+
const model = config.llm.model || undefined;
|
|
88
|
+
// Display startup banner
|
|
89
|
+
const banner = formatExploreBanner({
|
|
90
|
+
serverCommand: `${serverCommand} ${args.join(' ')}`,
|
|
91
|
+
provider,
|
|
92
|
+
model: model || 'default',
|
|
93
|
+
personas: selectedPersonas.map((p) => p.name),
|
|
94
|
+
questionsPerTool: maxQuestions,
|
|
95
|
+
});
|
|
96
|
+
output.info(banner);
|
|
97
|
+
output.newline();
|
|
98
|
+
output.info(`Explore: LLM-powered behavioral exploration (using ${provider})`);
|
|
99
|
+
output.newline();
|
|
100
|
+
// Initialize cost tracker
|
|
101
|
+
const costTracker = new CostTracker(model || 'default');
|
|
102
|
+
// Initialize metrics collector
|
|
103
|
+
resetMetricsCollector();
|
|
104
|
+
const metricsCollector = getMetricsCollector();
|
|
105
|
+
metricsCollector.startInterview();
|
|
106
|
+
// Initialize cache
|
|
107
|
+
resetGlobalCache();
|
|
108
|
+
const cache = getGlobalCache({ enabled: cacheEnabled });
|
|
109
|
+
if (cacheEnabled && verbose) {
|
|
110
|
+
output.info('Response caching enabled');
|
|
111
|
+
}
|
|
112
|
+
// Initialize MCP client
|
|
113
|
+
const mcpClient = new MCPClient({
|
|
114
|
+
timeout,
|
|
115
|
+
debug: logLevel === 'debug',
|
|
116
|
+
transport: 'stdio',
|
|
117
|
+
});
|
|
118
|
+
// Initialize LLM client
|
|
119
|
+
let llmClient;
|
|
120
|
+
const onUsageCallback = (inputTokens, outputTokens) => {
|
|
121
|
+
costTracker.addUsage(inputTokens, outputTokens);
|
|
122
|
+
metricsCollector.recordTokenUsage(provider, model || 'default', inputTokens, outputTokens, 'llm_call');
|
|
123
|
+
};
|
|
124
|
+
try {
|
|
125
|
+
llmClient = new FallbackLLMClient({
|
|
126
|
+
providers: [{ provider, model, baseUrl: provider === 'ollama' ? config.llm.ollama.baseUrl : undefined }],
|
|
127
|
+
useOllamaFallback: true,
|
|
128
|
+
onUsage: onUsageCallback,
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
catch (error) {
|
|
132
|
+
output.error('Failed to initialize LLM client: ' + (error instanceof Error ? error.message : String(error)));
|
|
133
|
+
output.error(`\nProvider: ${provider}`);
|
|
134
|
+
output.error('Make sure the appropriate API key environment variable is set:');
|
|
135
|
+
output.error(' - OpenAI: OPENAI_API_KEY');
|
|
136
|
+
output.error(' - Anthropic: ANTHROPIC_API_KEY');
|
|
137
|
+
output.error(' - Ollama: No API key needed (ensure Ollama is running)');
|
|
138
|
+
process.exit(EXIT_CODES.ERROR);
|
|
139
|
+
}
|
|
140
|
+
try {
|
|
141
|
+
// Connect to MCP server
|
|
142
|
+
output.info('Connecting to MCP server...');
|
|
143
|
+
await mcpClient.connect(serverCommand, args, config.server.env);
|
|
144
|
+
// Discovery phase
|
|
145
|
+
output.info('Discovering capabilities...');
|
|
146
|
+
const discovery = await discover(mcpClient, serverCommand, args);
|
|
147
|
+
const resourceCount = discovery.resources?.length ?? 0;
|
|
148
|
+
const discoveryParts = [`${discovery.tools.length} tools`, `${discovery.prompts.length} prompts`];
|
|
149
|
+
if (resourceCount > 0) {
|
|
150
|
+
discoveryParts.push(`${resourceCount} resources`);
|
|
151
|
+
}
|
|
152
|
+
output.info(`Found ${discoveryParts.join(', ')}\n`);
|
|
153
|
+
// Update metrics
|
|
154
|
+
metricsCollector.updateInterviewCounters({
|
|
155
|
+
toolsDiscovered: discovery.tools.length,
|
|
156
|
+
personasUsed: selectedPersonas.length,
|
|
157
|
+
});
|
|
158
|
+
if (discovery.tools.length === 0) {
|
|
159
|
+
output.info('No tools found. Nothing to explore.');
|
|
160
|
+
metricsCollector.endInterview();
|
|
161
|
+
await mcpClient.disconnect();
|
|
162
|
+
return;
|
|
163
|
+
}
|
|
164
|
+
// Show cost/time estimate (unless in CI)
|
|
165
|
+
if (!isCI()) {
|
|
166
|
+
const costEstimate = estimateInterviewCost(model || 'default', discovery.tools.length, maxQuestions, selectedPersonas.length);
|
|
167
|
+
const timeEstimate = estimateInterviewTime(discovery.tools.length, maxQuestions, selectedPersonas.length, parallelPersonas, provider, discovery.prompts.length, resourceCount, false);
|
|
168
|
+
output.info(formatCostAndTimeEstimate(costEstimate, timeEstimate));
|
|
169
|
+
// Show optimization suggestions
|
|
170
|
+
const hasScenariosFile = !!(config.scenarios.path || existsSync(join(outputDir, DEFAULT_SCENARIOS_FILE)));
|
|
171
|
+
const suggestions = suggestOptimizations({
|
|
172
|
+
estimatedCost: costEstimate.costUSD,
|
|
173
|
+
toolCount: discovery.tools.length,
|
|
174
|
+
personaCount: selectedPersonas.length,
|
|
175
|
+
isParallelPersonas: parallelPersonas,
|
|
176
|
+
isPremiumModel: false,
|
|
177
|
+
isUsingCiPreset: false,
|
|
178
|
+
hasScenariosFile,
|
|
179
|
+
});
|
|
180
|
+
if (suggestions.length > 0) {
|
|
181
|
+
output.newline();
|
|
182
|
+
output.info(formatOptimizationSuggestions(suggestions));
|
|
183
|
+
}
|
|
184
|
+
output.newline();
|
|
185
|
+
}
|
|
186
|
+
// Load custom scenarios (work in explore mode too)
|
|
187
|
+
let customScenarios;
|
|
188
|
+
if (config.scenarios.path) {
|
|
189
|
+
try {
|
|
190
|
+
customScenarios = loadScenariosFromFile(config.scenarios.path);
|
|
191
|
+
output.info(`Loaded ${customScenarios.toolScenarios.length} tool scenarios from ${config.scenarios.path}`);
|
|
192
|
+
}
|
|
193
|
+
catch (error) {
|
|
194
|
+
output.error(`Failed to load scenarios: ${error instanceof Error ? error.message : error}`);
|
|
195
|
+
process.exit(EXIT_CODES.ERROR);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
else {
|
|
199
|
+
const defaultScenarios = tryLoadDefaultScenarios(outputDir);
|
|
200
|
+
if (defaultScenarios) {
|
|
201
|
+
customScenarios = defaultScenarios;
|
|
202
|
+
output.info(`Auto-loaded ${customScenarios.toolScenarios.length} scenarios from ${DEFAULT_SCENARIOS_FILE}`);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
// Build workflow configuration
|
|
206
|
+
let workflowConfig;
|
|
207
|
+
if (config.workflows.path || config.workflows.discover) {
|
|
208
|
+
workflowConfig = {
|
|
209
|
+
discoverWorkflows: config.workflows.discover,
|
|
210
|
+
maxDiscoveredWorkflows: WORKFLOW.MAX_DISCOVERED_WORKFLOWS,
|
|
211
|
+
enableStateTracking: config.workflows.trackState,
|
|
212
|
+
stepTimeout: config.workflows.stepTimeout,
|
|
213
|
+
timeouts: config.workflows.timeouts,
|
|
214
|
+
};
|
|
215
|
+
if (config.workflows.path) {
|
|
216
|
+
try {
|
|
217
|
+
const workflows = loadWorkflowsFromFile(config.workflows.path);
|
|
218
|
+
workflowConfig.workflows = workflows;
|
|
219
|
+
workflowConfig.workflowsFile = config.workflows.path;
|
|
220
|
+
output.info(`Loaded ${workflows.length} workflow(s) from ${config.workflows.path}`);
|
|
221
|
+
}
|
|
222
|
+
catch (error) {
|
|
223
|
+
output.error(`Failed to load workflows: ${error instanceof Error ? error.message : error}`);
|
|
224
|
+
process.exit(EXIT_CODES.ERROR);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
else {
|
|
229
|
+
const defaultWorkflows = tryLoadDefaultWorkflows(outputDir);
|
|
230
|
+
if (defaultWorkflows && defaultWorkflows.length > 0) {
|
|
231
|
+
workflowConfig = {
|
|
232
|
+
discoverWorkflows: false,
|
|
233
|
+
maxDiscoveredWorkflows: WORKFLOW.MAX_DISCOVERED_WORKFLOWS,
|
|
234
|
+
enableStateTracking: config.workflows.trackState,
|
|
235
|
+
stepTimeout: config.workflows.stepTimeout,
|
|
236
|
+
timeouts: config.workflows.timeouts,
|
|
237
|
+
workflows: defaultWorkflows,
|
|
238
|
+
workflowsFile: `${outputDir}/${DEFAULT_WORKFLOWS_FILE}`,
|
|
239
|
+
};
|
|
240
|
+
output.info(`Auto-loaded ${defaultWorkflows.length} workflow(s) from ${DEFAULT_WORKFLOWS_FILE}`);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
// Set up streaming display
|
|
244
|
+
let streamingDisplay = null;
|
|
245
|
+
let streamingCallbacks;
|
|
246
|
+
if (!isCI() && logLevel !== 'silent') {
|
|
247
|
+
suppressLogs();
|
|
248
|
+
streamingDisplay = new StreamingDisplay({ style: 'dim', maxWidth: 100 });
|
|
249
|
+
streamingCallbacks = {
|
|
250
|
+
onStart: (operation) => {
|
|
251
|
+
const parts = operation.split(':');
|
|
252
|
+
const opType = parts[0];
|
|
253
|
+
const context = parts[1];
|
|
254
|
+
let prefix = '';
|
|
255
|
+
switch (opType) {
|
|
256
|
+
case 'generate-questions':
|
|
257
|
+
prefix = context ? `\n Generating questions for ${context}... ` : '\n Generating questions... ';
|
|
258
|
+
break;
|
|
259
|
+
case 'analyze':
|
|
260
|
+
prefix = context ? `\n Analyzing ${context}... ` : '\n Analyzing... ';
|
|
261
|
+
break;
|
|
262
|
+
case 'synthesize-tool':
|
|
263
|
+
prefix = context ? `\n Synthesizing profile for ${context}... ` : '\n Synthesizing profile... ';
|
|
264
|
+
break;
|
|
265
|
+
case 'synthesize-overall':
|
|
266
|
+
prefix = '\n Synthesizing overall findings... ';
|
|
267
|
+
break;
|
|
268
|
+
default:
|
|
269
|
+
prefix = '\n Processing... ';
|
|
270
|
+
}
|
|
271
|
+
streamingDisplay?.start(prefix);
|
|
272
|
+
},
|
|
273
|
+
onChunk: (chunk) => {
|
|
274
|
+
streamingDisplay?.write(chunk);
|
|
275
|
+
},
|
|
276
|
+
onComplete: () => {
|
|
277
|
+
streamingDisplay?.finish(' [done]');
|
|
278
|
+
},
|
|
279
|
+
onError: (error) => {
|
|
280
|
+
streamingDisplay?.abort(`[error: ${error.message}]`);
|
|
281
|
+
},
|
|
282
|
+
};
|
|
283
|
+
}
|
|
284
|
+
// Create interviewer for explore mode
|
|
285
|
+
const fullServerCommand = `${serverCommand} ${args.join(' ')}`.trim();
|
|
286
|
+
const interviewer = new Interviewer(llmClient, {
|
|
287
|
+
maxQuestionsPerTool: maxQuestions,
|
|
288
|
+
timeout,
|
|
289
|
+
skipErrorTests: config.explore.skipErrorTests,
|
|
290
|
+
model: model || 'default',
|
|
291
|
+
personas: selectedPersonas,
|
|
292
|
+
customScenarios,
|
|
293
|
+
customScenariosOnly: config.scenarios.only,
|
|
294
|
+
enableStreaming: !!streamingCallbacks,
|
|
295
|
+
streamingCallbacks,
|
|
296
|
+
parallelPersonas,
|
|
297
|
+
personaConcurrency,
|
|
298
|
+
cache,
|
|
299
|
+
workflowConfig,
|
|
300
|
+
checkMode: false, // Full exploration mode with LLM
|
|
301
|
+
serverCommand: fullServerCommand,
|
|
302
|
+
});
|
|
303
|
+
// Extract server context
|
|
304
|
+
const serverContext = extractServerContextFromArgs(serverCommand, args);
|
|
305
|
+
if (serverContext.allowedDirectories && serverContext.allowedDirectories.length > 0) {
|
|
306
|
+
output.info(`Detected allowed directories: ${serverContext.allowedDirectories.join(', ')}`);
|
|
307
|
+
}
|
|
308
|
+
interviewer.setServerContext(serverContext);
|
|
309
|
+
// Set up progress display
|
|
310
|
+
const progressBar = new InterviewProgressBar({ enabled: !verbose && !streamingCallbacks });
|
|
311
|
+
const progressCallback = (progress) => {
|
|
312
|
+
if (verbose) {
|
|
313
|
+
switch (progress.phase) {
|
|
314
|
+
case 'starting':
|
|
315
|
+
output.info('Starting exploration...');
|
|
316
|
+
progressBar.start(progress.totalTools, progress.totalPersonas, progress.totalPrompts ?? 0, progress.totalResources ?? 0);
|
|
317
|
+
break;
|
|
318
|
+
case 'interviewing':
|
|
319
|
+
output.info(`[${progress.currentPersona}] Exploring: ${progress.currentTool} (${progress.toolsCompleted + 1}/${progress.totalTools})`);
|
|
320
|
+
break;
|
|
321
|
+
case 'synthesizing':
|
|
322
|
+
output.info('Synthesizing findings...');
|
|
323
|
+
break;
|
|
324
|
+
case 'complete':
|
|
325
|
+
output.info('Exploration complete!');
|
|
326
|
+
break;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
else {
|
|
330
|
+
if (progress.phase === 'starting') {
|
|
331
|
+
progressBar.start(progress.totalTools, progress.totalPersonas, progress.totalPrompts ?? 0, progress.totalResources ?? 0);
|
|
332
|
+
}
|
|
333
|
+
else if (['interviewing', 'prompts', 'resources', 'workflows'].includes(progress.phase)) {
|
|
334
|
+
progressBar.update(progress);
|
|
335
|
+
}
|
|
336
|
+
else if (progress.phase === 'complete' || progress.phase === 'synthesizing') {
|
|
337
|
+
progressBar.stop();
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
};
|
|
341
|
+
output.info('Starting exploration...\n');
|
|
342
|
+
const result = await interviewer.interview(mcpClient, discovery, progressCallback);
|
|
343
|
+
progressBar.stop();
|
|
344
|
+
if (!verbose) {
|
|
345
|
+
output.newline();
|
|
346
|
+
}
|
|
347
|
+
// Generate documentation
|
|
348
|
+
output.info('Generating documentation...');
|
|
349
|
+
mkdirSync(outputDir, { recursive: true });
|
|
350
|
+
if (docsDir !== outputDir) {
|
|
351
|
+
mkdirSync(docsDir, { recursive: true });
|
|
352
|
+
}
|
|
353
|
+
const agentsMd = generateAgentsMd(result);
|
|
354
|
+
const agentsMdPath = join(docsDir, config.output.files.agentsDoc);
|
|
355
|
+
writeFileSync(agentsMdPath, agentsMd);
|
|
356
|
+
output.info(`Written: ${agentsMdPath}`);
|
|
357
|
+
// Generate JSON report
|
|
358
|
+
const jsonReport = generateJsonReport(result);
|
|
359
|
+
const jsonPath = join(outputDir, config.output.files.exploreReport);
|
|
360
|
+
writeFileSync(jsonPath, jsonReport);
|
|
361
|
+
output.info(`Written: ${jsonPath}`);
|
|
362
|
+
// End metrics
|
|
363
|
+
metricsCollector.endInterview();
|
|
364
|
+
output.info('\nExploration complete!');
|
|
365
|
+
output.info(`Duration: ${(result.metadata.durationMs / 1000).toFixed(1)}s`);
|
|
366
|
+
output.info(`Tools explored: ${result.toolProfiles.length}`);
|
|
367
|
+
// Display cost summary
|
|
368
|
+
const costEstimate = costTracker.getCost();
|
|
369
|
+
if (costEstimate.costUSD > 0) {
|
|
370
|
+
output.info(`Estimated cost: $${costEstimate.costUSD.toFixed(4)}`);
|
|
371
|
+
}
|
|
372
|
+
// Display scenario results
|
|
373
|
+
if (result.scenarioResults && result.scenarioResults.length > 0) {
|
|
374
|
+
const passed = result.scenarioResults.filter((r) => r.passed).length;
|
|
375
|
+
const failed = result.scenarioResults.length - passed;
|
|
376
|
+
const statusIcon = failed === 0 ? '\u2713' : '\u2717';
|
|
377
|
+
output.info(`\nCustom scenarios: ${passed}/${result.scenarioResults.length} passed ${statusIcon}`);
|
|
378
|
+
if (failed > 0) {
|
|
379
|
+
output.info('\nFailed scenarios:');
|
|
380
|
+
for (const scenarioResult of result.scenarioResults.filter((r) => !r.passed)) {
|
|
381
|
+
const scenario = scenarioResult.scenario;
|
|
382
|
+
const toolOrPrompt = 'tool' in scenario ? scenario.tool : scenario.prompt;
|
|
383
|
+
output.info(` - ${toolOrPrompt}: ${scenario.description}`);
|
|
384
|
+
if (scenarioResult.error) {
|
|
385
|
+
output.info(` Error: ${scenarioResult.error}`);
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
// Display workflow results
|
|
391
|
+
if (result.workflowResults && result.workflowResults.length > 0) {
|
|
392
|
+
const successful = result.workflowResults.filter((wr) => wr.success).length;
|
|
393
|
+
const failed = result.workflowResults.length - successful;
|
|
394
|
+
const statusIcon = failed === 0 ? '\u2713' : '\u2717';
|
|
395
|
+
output.info(`\nWorkflows: ${successful}/${result.workflowResults.length} passed ${statusIcon}`);
|
|
396
|
+
if (failed > 0) {
|
|
397
|
+
output.info('\nFailed workflows:');
|
|
398
|
+
for (const wr of result.workflowResults.filter((w) => !w.success)) {
|
|
399
|
+
output.info(` - ${wr.workflow.name}: ${wr.failureReason ?? 'Unknown error'}`);
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
// Note about baselines
|
|
404
|
+
output.info('\nTip: For drift detection, use "bellwether check" to create and compare baselines.');
|
|
405
|
+
}
|
|
406
|
+
catch (error) {
|
|
407
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
408
|
+
output.error('\n--- Exploration Failed ---');
|
|
409
|
+
output.error(`Error: ${errorMessage}`);
|
|
410
|
+
if (errorMessage.includes('ECONNREFUSED') || errorMessage.includes('Connection refused')) {
|
|
411
|
+
output.error('\nPossible causes:');
|
|
412
|
+
output.error(' - The MCP server is not running');
|
|
413
|
+
output.error(' - The server address/port is incorrect');
|
|
414
|
+
}
|
|
415
|
+
else if (errorMessage.includes('timeout') || errorMessage.includes('Timeout')) {
|
|
416
|
+
output.error('\nPossible causes:');
|
|
417
|
+
output.error(' - The MCP server is taking too long to respond');
|
|
418
|
+
output.error(' - Increase server.timeout in bellwether.yaml');
|
|
419
|
+
}
|
|
420
|
+
else if (errorMessage.includes('ENOENT') || errorMessage.includes('not found')) {
|
|
421
|
+
output.error('\nPossible causes:');
|
|
422
|
+
output.error(' - The server command was not found');
|
|
423
|
+
output.error(' - Check that the command is installed and in PATH');
|
|
424
|
+
}
|
|
425
|
+
else if (errorMessage.includes('API key') || errorMessage.includes('authentication')) {
|
|
426
|
+
output.error('\nPossible causes:');
|
|
427
|
+
output.error(' - Missing or invalid API key');
|
|
428
|
+
output.error(' - Run "bellwether auth" to configure API keys');
|
|
429
|
+
}
|
|
430
|
+
process.exit(EXIT_CODES.ERROR);
|
|
431
|
+
}
|
|
432
|
+
finally {
|
|
433
|
+
restoreLogLevel();
|
|
434
|
+
await mcpClient.disconnect();
|
|
435
|
+
}
|
|
436
|
+
});
|
|
437
|
+
//# sourceMappingURL=explore.js.map
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Feedback command - submit feedback on drift detection decisions.
|
|
3
|
+
*
|
|
4
|
+
* Allows users to report false positives, false negatives, and
|
|
5
|
+
* confidence calibration issues for algorithm improvement.
|
|
6
|
+
*/
|
|
7
|
+
import { Command } from 'commander';
|
|
8
|
+
export declare function createFeedbackCommand(): Command;
|
|
9
|
+
//# sourceMappingURL=feedback.d.ts.map
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Feedback command - submit feedback on drift detection decisions.
|
|
3
|
+
*
|
|
4
|
+
* Allows users to report false positives, false negatives, and
|
|
5
|
+
* confidence calibration issues for algorithm improvement.
|
|
6
|
+
*/
|
|
7
|
+
import { Command } from 'commander';
|
|
8
|
+
import chalk from 'chalk';
|
|
9
|
+
import { getDecisionLogger, getFeedbackManager, } from '../../baseline/telemetry.js';
|
|
10
|
+
export function createFeedbackCommand() {
|
|
11
|
+
const feedbackCmd = new Command('feedback')
|
|
12
|
+
.description('Submit feedback on drift detection decisions')
|
|
13
|
+
.argument('[decision-id]', 'ID of the comparison decision to report')
|
|
14
|
+
.option('-t, --type <type>', 'Feedback type: false_positive, false_negative, confidence_wrong')
|
|
15
|
+
.option('-m, --message <message>', 'Comment explaining the issue')
|
|
16
|
+
.option('--correct <answer>', 'What the correct answer should have been (true/false)')
|
|
17
|
+
.option('--list', 'List recent decisions that can receive feedback')
|
|
18
|
+
.option('--stats', 'Show feedback statistics')
|
|
19
|
+
.option('--analyze', 'Analyze all feedback for patterns')
|
|
20
|
+
.option('--export <path>', 'Export decisions to file for analysis')
|
|
21
|
+
.option('--clear', 'Clear all logged decisions and feedback')
|
|
22
|
+
.action(async (decisionId, options) => {
|
|
23
|
+
await handleFeedback(decisionId, options);
|
|
24
|
+
});
|
|
25
|
+
return feedbackCmd;
|
|
26
|
+
}
|
|
27
|
+
async function handleFeedback(decisionId, options) {
|
|
28
|
+
const logger = getDecisionLogger();
|
|
29
|
+
const feedbackManager = getFeedbackManager();
|
|
30
|
+
// List recent decisions
|
|
31
|
+
if (options.list) {
|
|
32
|
+
const decisions = logger.loadAllDecisions();
|
|
33
|
+
if (decisions.length === 0) {
|
|
34
|
+
console.log(chalk.gray('No logged decisions found.'));
|
|
35
|
+
console.log(chalk.gray('Decisions are logged when running baseline comparisons.'));
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
console.log('');
|
|
39
|
+
console.log(chalk.bold('Recent Comparison Decisions'));
|
|
40
|
+
console.log('─'.repeat(60));
|
|
41
|
+
// Show last 20 decisions
|
|
42
|
+
const recent = decisions.slice(-20).reverse();
|
|
43
|
+
for (const decision of recent) {
|
|
44
|
+
const time = new Date(decision.timestamp).toLocaleString();
|
|
45
|
+
const match = decision.matchDecision ? chalk.green('match') : chalk.red('no-match');
|
|
46
|
+
console.log('');
|
|
47
|
+
console.log(` ${chalk.cyan(decision.id.slice(0, 8))}`);
|
|
48
|
+
console.log(` Time: ${chalk.gray(time)}`);
|
|
49
|
+
console.log(` Type: ${decision.type} | Tool: ${decision.toolName}`);
|
|
50
|
+
console.log(` Decision: ${match} (confidence: ${decision.rawConfidence}%)`);
|
|
51
|
+
console.log(` Text 1: "${truncate(decision.text1, 50)}"`);
|
|
52
|
+
console.log(` Text 2: "${truncate(decision.text2, 50)}"`);
|
|
53
|
+
}
|
|
54
|
+
console.log('');
|
|
55
|
+
console.log(chalk.gray(`Total decisions: ${decisions.length}`));
|
|
56
|
+
console.log(chalk.gray('Use: bellwether feedback <id> --type false_positive'));
|
|
57
|
+
return;
|
|
58
|
+
}
|
|
59
|
+
// Show statistics
|
|
60
|
+
if (options.stats) {
|
|
61
|
+
const stats = logger.getStatistics();
|
|
62
|
+
const feedbackAnalysis = feedbackManager.analyze();
|
|
63
|
+
console.log('');
|
|
64
|
+
console.log(chalk.bold('Telemetry Statistics'));
|
|
65
|
+
console.log('─'.repeat(40));
|
|
66
|
+
console.log(` Total decisions logged: ${stats.totalDecisions}`);
|
|
67
|
+
console.log(` Average confidence: ${stats.averageConfidence}%`);
|
|
68
|
+
console.log(` Match rate: ${stats.matchRate}%`);
|
|
69
|
+
console.log('');
|
|
70
|
+
console.log(chalk.gray(' By type:'));
|
|
71
|
+
for (const [type, count] of Object.entries(stats.byType)) {
|
|
72
|
+
console.log(` ${type}: ${count}`);
|
|
73
|
+
}
|
|
74
|
+
console.log('');
|
|
75
|
+
console.log(chalk.bold('Feedback Statistics'));
|
|
76
|
+
console.log('─'.repeat(40));
|
|
77
|
+
console.log(` Total feedback reports: ${feedbackAnalysis.totalReports}`);
|
|
78
|
+
console.log(` False positive rate: ${feedbackAnalysis.falsePositiveRate}%`);
|
|
79
|
+
console.log(` False negative rate: ${feedbackAnalysis.falseNegativeRate}%`);
|
|
80
|
+
console.log(` Confidence issues: ${feedbackAnalysis.confidenceIssueRate}%`);
|
|
81
|
+
console.log('');
|
|
82
|
+
return;
|
|
83
|
+
}
|
|
84
|
+
// Analyze feedback patterns
|
|
85
|
+
if (options.analyze) {
|
|
86
|
+
const analysis = feedbackManager.analyze();
|
|
87
|
+
console.log('');
|
|
88
|
+
console.log(chalk.bold('Feedback Pattern Analysis'));
|
|
89
|
+
console.log('─'.repeat(40));
|
|
90
|
+
if (analysis.totalReports === 0) {
|
|
91
|
+
console.log(chalk.gray(' No feedback submitted yet.'));
|
|
92
|
+
console.log(chalk.gray(' Use: bellwether feedback <id> --type <type>'));
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
95
|
+
console.log(` Total reports: ${analysis.totalReports}`);
|
|
96
|
+
console.log('');
|
|
97
|
+
console.log(' Issue breakdown:');
|
|
98
|
+
console.log(` False positives: ${analysis.falsePositiveRate}%`);
|
|
99
|
+
console.log(` False negatives: ${analysis.falseNegativeRate}%`);
|
|
100
|
+
console.log(` Confidence issues: ${analysis.confidenceIssueRate}%`);
|
|
101
|
+
if (analysis.commonPatterns.length > 0) {
|
|
102
|
+
console.log('');
|
|
103
|
+
console.log(' Common patterns in feedback:');
|
|
104
|
+
for (const pattern of analysis.commonPatterns) {
|
|
105
|
+
console.log(` "${pattern.pattern}": ${pattern.count} occurrences (${pattern.feedbackType})`);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
console.log('');
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
// Export decisions
|
|
112
|
+
if (options.export) {
|
|
113
|
+
logger.exportToFile(options.export);
|
|
114
|
+
console.log(chalk.green(`Decisions exported to: ${options.export}`));
|
|
115
|
+
return;
|
|
116
|
+
}
|
|
117
|
+
// Clear all data
|
|
118
|
+
if (options.clear) {
|
|
119
|
+
logger.clear();
|
|
120
|
+
feedbackManager.clear();
|
|
121
|
+
console.log(chalk.green('Cleared all decisions and feedback.'));
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
// Submit feedback
|
|
125
|
+
if (!decisionId) {
|
|
126
|
+
console.log(chalk.red('Error: Decision ID required'));
|
|
127
|
+
console.log(chalk.gray('Use: bellwether feedback --list to see recent decisions'));
|
|
128
|
+
process.exit(1);
|
|
129
|
+
}
|
|
130
|
+
if (!options.type) {
|
|
131
|
+
console.log(chalk.red('Error: Feedback type required'));
|
|
132
|
+
console.log(chalk.gray('Options: false_positive, false_negative, confidence_wrong'));
|
|
133
|
+
process.exit(1);
|
|
134
|
+
}
|
|
135
|
+
const validTypes = ['false_positive', 'false_negative', 'confidence_wrong'];
|
|
136
|
+
if (!validTypes.includes(options.type)) {
|
|
137
|
+
console.log(chalk.red(`Error: Invalid feedback type "${options.type}"`));
|
|
138
|
+
console.log(chalk.gray(`Valid types: ${validTypes.join(', ')}`));
|
|
139
|
+
process.exit(1);
|
|
140
|
+
}
|
|
141
|
+
// Find the decision
|
|
142
|
+
const decision = logger.getDecision(decisionId) ??
|
|
143
|
+
logger.loadAllDecisions().find(d => d.id.startsWith(decisionId));
|
|
144
|
+
if (!decision) {
|
|
145
|
+
console.log(chalk.red(`Error: Decision not found: ${decisionId}`));
|
|
146
|
+
console.log(chalk.gray('Use: bellwether feedback --list to see recent decisions'));
|
|
147
|
+
process.exit(1);
|
|
148
|
+
}
|
|
149
|
+
// Submit feedback
|
|
150
|
+
const feedback = {
|
|
151
|
+
decisionId: decision.id,
|
|
152
|
+
feedbackType: options.type,
|
|
153
|
+
userComment: options.message,
|
|
154
|
+
};
|
|
155
|
+
if (options.correct !== undefined) {
|
|
156
|
+
feedback.correctAnswer = options.correct === 'true';
|
|
157
|
+
}
|
|
158
|
+
feedbackManager.submit(feedback);
|
|
159
|
+
console.log('');
|
|
160
|
+
console.log(chalk.green('✓ Feedback submitted'));
|
|
161
|
+
console.log(` Decision: ${decision.id.slice(0, 8)}`);
|
|
162
|
+
console.log(` Type: ${options.type}`);
|
|
163
|
+
if (options.message) {
|
|
164
|
+
console.log(` Comment: ${options.message}`);
|
|
165
|
+
}
|
|
166
|
+
console.log('');
|
|
167
|
+
console.log(chalk.gray('Thank you for helping improve drift detection accuracy!'));
|
|
168
|
+
}
|
|
169
|
+
function truncate(str, maxLen) {
|
|
170
|
+
if (str.length <= maxLen)
|
|
171
|
+
return str;
|
|
172
|
+
return str.slice(0, maxLen - 3) + '...';
|
|
173
|
+
}
|
|
174
|
+
//# sourceMappingURL=feedback.js.map
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Golden command - Manage golden outputs for tool validation.
|
|
3
|
+
*
|
|
4
|
+
* Commands:
|
|
5
|
+
* bellwether golden save --tool <name> Save current output as golden
|
|
6
|
+
* bellwether golden compare Compare against all golden outputs
|
|
7
|
+
* bellwether golden list List all saved golden outputs
|
|
8
|
+
* bellwether golden delete --tool <name> Delete a golden output
|
|
9
|
+
*/
|
|
10
|
+
import { Command } from 'commander';
|
|
11
|
+
export declare const goldenCommand: Command;
|
|
12
|
+
//# sourceMappingURL=golden.d.ts.map
|