@dotsetlabs/bellwether 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +291 -0
- package/LICENSE +21 -0
- package/README.md +739 -0
- package/dist/auth/credentials.d.ts +64 -0
- package/dist/auth/credentials.js +218 -0
- package/dist/auth/index.d.ts +6 -0
- package/dist/auth/index.js +6 -0
- package/dist/auth/keychain.d.ts +64 -0
- package/dist/auth/keychain.js +268 -0
- package/dist/baseline/ab-testing.d.ts +80 -0
- package/dist/baseline/ab-testing.js +236 -0
- package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
- package/dist/baseline/ai-compatibility-scorer.js +606 -0
- package/dist/baseline/calibration.d.ts +77 -0
- package/dist/baseline/calibration.js +136 -0
- package/dist/baseline/category-matching.d.ts +85 -0
- package/dist/baseline/category-matching.js +289 -0
- package/dist/baseline/change-impact-analyzer.d.ts +98 -0
- package/dist/baseline/change-impact-analyzer.js +592 -0
- package/dist/baseline/comparator.d.ts +64 -0
- package/dist/baseline/comparator.js +916 -0
- package/dist/baseline/confidence.d.ts +55 -0
- package/dist/baseline/confidence.js +122 -0
- package/dist/baseline/converter.d.ts +61 -0
- package/dist/baseline/converter.js +585 -0
- package/dist/baseline/dependency-analyzer.d.ts +89 -0
- package/dist/baseline/dependency-analyzer.js +567 -0
- package/dist/baseline/deprecation-tracker.d.ts +133 -0
- package/dist/baseline/deprecation-tracker.js +322 -0
- package/dist/baseline/diff.d.ts +55 -0
- package/dist/baseline/diff.js +1584 -0
- package/dist/baseline/documentation-scorer.d.ts +205 -0
- package/dist/baseline/documentation-scorer.js +466 -0
- package/dist/baseline/embeddings.d.ts +118 -0
- package/dist/baseline/embeddings.js +251 -0
- package/dist/baseline/error-analyzer.d.ts +198 -0
- package/dist/baseline/error-analyzer.js +721 -0
- package/dist/baseline/evaluation/evaluator.d.ts +42 -0
- package/dist/baseline/evaluation/evaluator.js +323 -0
- package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
- package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
- package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
- package/dist/baseline/evaluation/golden-dataset.js +717 -0
- package/dist/baseline/evaluation/index.d.ts +15 -0
- package/dist/baseline/evaluation/index.js +15 -0
- package/dist/baseline/evaluation/types.d.ts +186 -0
- package/dist/baseline/evaluation/types.js +8 -0
- package/dist/baseline/external-dependency-detector.d.ts +181 -0
- package/dist/baseline/external-dependency-detector.js +524 -0
- package/dist/baseline/golden-output.d.ts +162 -0
- package/dist/baseline/golden-output.js +636 -0
- package/dist/baseline/health-scorer.d.ts +174 -0
- package/dist/baseline/health-scorer.js +451 -0
- package/dist/baseline/incremental-checker.d.ts +97 -0
- package/dist/baseline/incremental-checker.js +174 -0
- package/dist/baseline/index.d.ts +31 -0
- package/dist/baseline/index.js +42 -0
- package/dist/baseline/migration-generator.d.ts +137 -0
- package/dist/baseline/migration-generator.js +554 -0
- package/dist/baseline/migrations.d.ts +60 -0
- package/dist/baseline/migrations.js +197 -0
- package/dist/baseline/performance-tracker.d.ts +214 -0
- package/dist/baseline/performance-tracker.js +577 -0
- package/dist/baseline/pr-comment-generator.d.ts +117 -0
- package/dist/baseline/pr-comment-generator.js +546 -0
- package/dist/baseline/response-fingerprint.d.ts +127 -0
- package/dist/baseline/response-fingerprint.js +728 -0
- package/dist/baseline/response-schema-tracker.d.ts +129 -0
- package/dist/baseline/response-schema-tracker.js +420 -0
- package/dist/baseline/risk-scorer.d.ts +54 -0
- package/dist/baseline/risk-scorer.js +434 -0
- package/dist/baseline/saver.d.ts +89 -0
- package/dist/baseline/saver.js +554 -0
- package/dist/baseline/scenario-generator.d.ts +151 -0
- package/dist/baseline/scenario-generator.js +905 -0
- package/dist/baseline/schema-compare.d.ts +86 -0
- package/dist/baseline/schema-compare.js +557 -0
- package/dist/baseline/schema-evolution.d.ts +189 -0
- package/dist/baseline/schema-evolution.js +467 -0
- package/dist/baseline/semantic.d.ts +203 -0
- package/dist/baseline/semantic.js +908 -0
- package/dist/baseline/synonyms.d.ts +60 -0
- package/dist/baseline/synonyms.js +386 -0
- package/dist/baseline/telemetry.d.ts +165 -0
- package/dist/baseline/telemetry.js +294 -0
- package/dist/baseline/test-pruner.d.ts +120 -0
- package/dist/baseline/test-pruner.js +387 -0
- package/dist/baseline/types.d.ts +449 -0
- package/dist/baseline/types.js +5 -0
- package/dist/baseline/version.d.ts +138 -0
- package/dist/baseline/version.js +206 -0
- package/dist/cache/index.d.ts +5 -0
- package/dist/cache/index.js +5 -0
- package/dist/cache/response-cache.d.ts +151 -0
- package/dist/cache/response-cache.js +287 -0
- package/dist/ci/index.d.ts +60 -0
- package/dist/ci/index.js +342 -0
- package/dist/cli/commands/auth.d.ts +12 -0
- package/dist/cli/commands/auth.js +352 -0
- package/dist/cli/commands/badge.d.ts +3 -0
- package/dist/cli/commands/badge.js +74 -0
- package/dist/cli/commands/baseline-accept.d.ts +15 -0
- package/dist/cli/commands/baseline-accept.js +178 -0
- package/dist/cli/commands/baseline-migrate.d.ts +12 -0
- package/dist/cli/commands/baseline-migrate.js +164 -0
- package/dist/cli/commands/baseline.d.ts +14 -0
- package/dist/cli/commands/baseline.js +449 -0
- package/dist/cli/commands/beta.d.ts +10 -0
- package/dist/cli/commands/beta.js +231 -0
- package/dist/cli/commands/check.d.ts +11 -0
- package/dist/cli/commands/check.js +820 -0
- package/dist/cli/commands/cloud/badge.d.ts +3 -0
- package/dist/cli/commands/cloud/badge.js +74 -0
- package/dist/cli/commands/cloud/diff.d.ts +6 -0
- package/dist/cli/commands/cloud/diff.js +79 -0
- package/dist/cli/commands/cloud/history.d.ts +6 -0
- package/dist/cli/commands/cloud/history.js +102 -0
- package/dist/cli/commands/cloud/link.d.ts +9 -0
- package/dist/cli/commands/cloud/link.js +119 -0
- package/dist/cli/commands/cloud/login.d.ts +7 -0
- package/dist/cli/commands/cloud/login.js +499 -0
- package/dist/cli/commands/cloud/projects.d.ts +6 -0
- package/dist/cli/commands/cloud/projects.js +44 -0
- package/dist/cli/commands/cloud/shared.d.ts +7 -0
- package/dist/cli/commands/cloud/shared.js +42 -0
- package/dist/cli/commands/cloud/teams.d.ts +8 -0
- package/dist/cli/commands/cloud/teams.js +169 -0
- package/dist/cli/commands/cloud/upload.d.ts +8 -0
- package/dist/cli/commands/cloud/upload.js +181 -0
- package/dist/cli/commands/contract.d.ts +11 -0
- package/dist/cli/commands/contract.js +280 -0
- package/dist/cli/commands/discover.d.ts +3 -0
- package/dist/cli/commands/discover.js +82 -0
- package/dist/cli/commands/eval.d.ts +9 -0
- package/dist/cli/commands/eval.js +187 -0
- package/dist/cli/commands/explore.d.ts +11 -0
- package/dist/cli/commands/explore.js +437 -0
- package/dist/cli/commands/feedback.d.ts +9 -0
- package/dist/cli/commands/feedback.js +174 -0
- package/dist/cli/commands/golden.d.ts +12 -0
- package/dist/cli/commands/golden.js +407 -0
- package/dist/cli/commands/history.d.ts +10 -0
- package/dist/cli/commands/history.js +202 -0
- package/dist/cli/commands/init.d.ts +9 -0
- package/dist/cli/commands/init.js +219 -0
- package/dist/cli/commands/interview.d.ts +3 -0
- package/dist/cli/commands/interview.js +903 -0
- package/dist/cli/commands/link.d.ts +10 -0
- package/dist/cli/commands/link.js +169 -0
- package/dist/cli/commands/login.d.ts +7 -0
- package/dist/cli/commands/login.js +499 -0
- package/dist/cli/commands/preset.d.ts +33 -0
- package/dist/cli/commands/preset.js +297 -0
- package/dist/cli/commands/profile.d.ts +33 -0
- package/dist/cli/commands/profile.js +286 -0
- package/dist/cli/commands/registry.d.ts +11 -0
- package/dist/cli/commands/registry.js +146 -0
- package/dist/cli/commands/shared.d.ts +79 -0
- package/dist/cli/commands/shared.js +196 -0
- package/dist/cli/commands/teams.d.ts +8 -0
- package/dist/cli/commands/teams.js +169 -0
- package/dist/cli/commands/test.d.ts +9 -0
- package/dist/cli/commands/test.js +500 -0
- package/dist/cli/commands/upload.d.ts +8 -0
- package/dist/cli/commands/upload.js +223 -0
- package/dist/cli/commands/validate-config.d.ts +6 -0
- package/dist/cli/commands/validate-config.js +35 -0
- package/dist/cli/commands/verify.d.ts +11 -0
- package/dist/cli/commands/verify.js +283 -0
- package/dist/cli/commands/watch.d.ts +12 -0
- package/dist/cli/commands/watch.js +253 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.js +178 -0
- package/dist/cli/interactive.d.ts +47 -0
- package/dist/cli/interactive.js +216 -0
- package/dist/cli/output/terminal-reporter.d.ts +19 -0
- package/dist/cli/output/terminal-reporter.js +104 -0
- package/dist/cli/output.d.ts +226 -0
- package/dist/cli/output.js +438 -0
- package/dist/cli/utils/env.d.ts +5 -0
- package/dist/cli/utils/env.js +14 -0
- package/dist/cli/utils/progress.d.ts +59 -0
- package/dist/cli/utils/progress.js +206 -0
- package/dist/cli/utils/server-context.d.ts +10 -0
- package/dist/cli/utils/server-context.js +36 -0
- package/dist/cloud/auth.d.ts +144 -0
- package/dist/cloud/auth.js +374 -0
- package/dist/cloud/client.d.ts +24 -0
- package/dist/cloud/client.js +65 -0
- package/dist/cloud/http-client.d.ts +38 -0
- package/dist/cloud/http-client.js +215 -0
- package/dist/cloud/index.d.ts +23 -0
- package/dist/cloud/index.js +25 -0
- package/dist/cloud/mock-client.d.ts +107 -0
- package/dist/cloud/mock-client.js +545 -0
- package/dist/cloud/types.d.ts +515 -0
- package/dist/cloud/types.js +15 -0
- package/dist/config/defaults.d.ts +160 -0
- package/dist/config/defaults.js +169 -0
- package/dist/config/loader.d.ts +24 -0
- package/dist/config/loader.js +122 -0
- package/dist/config/template.d.ts +42 -0
- package/dist/config/template.js +647 -0
- package/dist/config/validator.d.ts +2112 -0
- package/dist/config/validator.js +658 -0
- package/dist/constants/cloud.d.ts +107 -0
- package/dist/constants/cloud.js +110 -0
- package/dist/constants/core.d.ts +521 -0
- package/dist/constants/core.js +556 -0
- package/dist/constants/testing.d.ts +1283 -0
- package/dist/constants/testing.js +1568 -0
- package/dist/constants.d.ts +10 -0
- package/dist/constants.js +10 -0
- package/dist/contract/index.d.ts +6 -0
- package/dist/contract/index.js +5 -0
- package/dist/contract/validator.d.ts +177 -0
- package/dist/contract/validator.js +574 -0
- package/dist/cost/index.d.ts +6 -0
- package/dist/cost/index.js +5 -0
- package/dist/cost/tracker.d.ts +134 -0
- package/dist/cost/tracker.js +313 -0
- package/dist/discovery/discovery.d.ts +16 -0
- package/dist/discovery/discovery.js +173 -0
- package/dist/discovery/types.d.ts +51 -0
- package/dist/discovery/types.js +2 -0
- package/dist/docs/agents.d.ts +3 -0
- package/dist/docs/agents.js +995 -0
- package/dist/docs/contract.d.ts +51 -0
- package/dist/docs/contract.js +1681 -0
- package/dist/docs/generator.d.ts +4 -0
- package/dist/docs/generator.js +4 -0
- package/dist/docs/html-reporter.d.ts +9 -0
- package/dist/docs/html-reporter.js +757 -0
- package/dist/docs/index.d.ts +10 -0
- package/dist/docs/index.js +11 -0
- package/dist/docs/junit-reporter.d.ts +18 -0
- package/dist/docs/junit-reporter.js +210 -0
- package/dist/docs/report.d.ts +14 -0
- package/dist/docs/report.js +44 -0
- package/dist/docs/sarif-reporter.d.ts +19 -0
- package/dist/docs/sarif-reporter.js +335 -0
- package/dist/docs/shared.d.ts +35 -0
- package/dist/docs/shared.js +162 -0
- package/dist/docs/templates.d.ts +12 -0
- package/dist/docs/templates.js +76 -0
- package/dist/errors/index.d.ts +6 -0
- package/dist/errors/index.js +6 -0
- package/dist/errors/retry.d.ts +92 -0
- package/dist/errors/retry.js +323 -0
- package/dist/errors/types.d.ts +321 -0
- package/dist/errors/types.js +584 -0
- package/dist/index.d.ts +32 -0
- package/dist/index.js +32 -0
- package/dist/interview/dependency-resolver.d.ts +11 -0
- package/dist/interview/dependency-resolver.js +32 -0
- package/dist/interview/interviewer.d.ts +232 -0
- package/dist/interview/interviewer.js +1939 -0
- package/dist/interview/mock-response-generator.d.ts +7 -0
- package/dist/interview/mock-response-generator.js +102 -0
- package/dist/interview/orchestrator.d.ts +237 -0
- package/dist/interview/orchestrator.js +1296 -0
- package/dist/interview/rate-limiter.d.ts +15 -0
- package/dist/interview/rate-limiter.js +55 -0
- package/dist/interview/response-validator.d.ts +10 -0
- package/dist/interview/response-validator.js +132 -0
- package/dist/interview/schema-inferrer.d.ts +8 -0
- package/dist/interview/schema-inferrer.js +71 -0
- package/dist/interview/schema-test-generator.d.ts +71 -0
- package/dist/interview/schema-test-generator.js +834 -0
- package/dist/interview/smart-value-generator.d.ts +155 -0
- package/dist/interview/smart-value-generator.js +554 -0
- package/dist/interview/stateful-test-runner.d.ts +19 -0
- package/dist/interview/stateful-test-runner.js +106 -0
- package/dist/interview/types.d.ts +561 -0
- package/dist/interview/types.js +2 -0
- package/dist/llm/anthropic.d.ts +41 -0
- package/dist/llm/anthropic.js +355 -0
- package/dist/llm/client.d.ts +123 -0
- package/dist/llm/client.js +42 -0
- package/dist/llm/factory.d.ts +38 -0
- package/dist/llm/factory.js +145 -0
- package/dist/llm/fallback.d.ts +140 -0
- package/dist/llm/fallback.js +379 -0
- package/dist/llm/index.d.ts +18 -0
- package/dist/llm/index.js +15 -0
- package/dist/llm/ollama.d.ts +37 -0
- package/dist/llm/ollama.js +330 -0
- package/dist/llm/openai.d.ts +25 -0
- package/dist/llm/openai.js +320 -0
- package/dist/llm/token-budget.d.ts +161 -0
- package/dist/llm/token-budget.js +395 -0
- package/dist/logging/logger.d.ts +70 -0
- package/dist/logging/logger.js +130 -0
- package/dist/metrics/collector.d.ts +106 -0
- package/dist/metrics/collector.js +547 -0
- package/dist/metrics/index.d.ts +7 -0
- package/dist/metrics/index.js +7 -0
- package/dist/metrics/prometheus.d.ts +20 -0
- package/dist/metrics/prometheus.js +241 -0
- package/dist/metrics/types.d.ts +209 -0
- package/dist/metrics/types.js +5 -0
- package/dist/persona/builtins.d.ts +54 -0
- package/dist/persona/builtins.js +219 -0
- package/dist/persona/index.d.ts +8 -0
- package/dist/persona/index.js +8 -0
- package/dist/persona/loader.d.ts +30 -0
- package/dist/persona/loader.js +190 -0
- package/dist/persona/types.d.ts +144 -0
- package/dist/persona/types.js +5 -0
- package/dist/persona/validation.d.ts +94 -0
- package/dist/persona/validation.js +332 -0
- package/dist/prompts/index.d.ts +5 -0
- package/dist/prompts/index.js +5 -0
- package/dist/prompts/templates.d.ts +180 -0
- package/dist/prompts/templates.js +431 -0
- package/dist/registry/client.d.ts +49 -0
- package/dist/registry/client.js +191 -0
- package/dist/registry/index.d.ts +7 -0
- package/dist/registry/index.js +6 -0
- package/dist/registry/types.d.ts +140 -0
- package/dist/registry/types.js +6 -0
- package/dist/scenarios/evaluator.d.ts +43 -0
- package/dist/scenarios/evaluator.js +206 -0
- package/dist/scenarios/index.d.ts +10 -0
- package/dist/scenarios/index.js +9 -0
- package/dist/scenarios/loader.d.ts +20 -0
- package/dist/scenarios/loader.js +285 -0
- package/dist/scenarios/types.d.ts +153 -0
- package/dist/scenarios/types.js +8 -0
- package/dist/security/index.d.ts +17 -0
- package/dist/security/index.js +18 -0
- package/dist/security/payloads.d.ts +61 -0
- package/dist/security/payloads.js +268 -0
- package/dist/security/security-tester.d.ts +42 -0
- package/dist/security/security-tester.js +582 -0
- package/dist/security/types.d.ts +166 -0
- package/dist/security/types.js +8 -0
- package/dist/transport/base-transport.d.ts +59 -0
- package/dist/transport/base-transport.js +38 -0
- package/dist/transport/http-transport.d.ts +67 -0
- package/dist/transport/http-transport.js +238 -0
- package/dist/transport/mcp-client.d.ts +141 -0
- package/dist/transport/mcp-client.js +496 -0
- package/dist/transport/sse-transport.d.ts +88 -0
- package/dist/transport/sse-transport.js +316 -0
- package/dist/transport/stdio-transport.d.ts +43 -0
- package/dist/transport/stdio-transport.js +238 -0
- package/dist/transport/types.d.ts +125 -0
- package/dist/transport/types.js +16 -0
- package/dist/utils/concurrency.d.ts +123 -0
- package/dist/utils/concurrency.js +213 -0
- package/dist/utils/formatters.d.ts +16 -0
- package/dist/utils/formatters.js +37 -0
- package/dist/utils/index.d.ts +8 -0
- package/dist/utils/index.js +8 -0
- package/dist/utils/jsonpath.d.ts +87 -0
- package/dist/utils/jsonpath.js +326 -0
- package/dist/utils/markdown.d.ts +113 -0
- package/dist/utils/markdown.js +265 -0
- package/dist/utils/network.d.ts +14 -0
- package/dist/utils/network.js +17 -0
- package/dist/utils/sanitize.d.ts +92 -0
- package/dist/utils/sanitize.js +191 -0
- package/dist/utils/semantic.d.ts +194 -0
- package/dist/utils/semantic.js +1051 -0
- package/dist/utils/smart-truncate.d.ts +94 -0
- package/dist/utils/smart-truncate.js +361 -0
- package/dist/utils/timeout.d.ts +153 -0
- package/dist/utils/timeout.js +205 -0
- package/dist/utils/yaml-parser.d.ts +58 -0
- package/dist/utils/yaml-parser.js +86 -0
- package/dist/validation/index.d.ts +32 -0
- package/dist/validation/index.js +32 -0
- package/dist/validation/semantic-test-generator.d.ts +50 -0
- package/dist/validation/semantic-test-generator.js +176 -0
- package/dist/validation/semantic-types.d.ts +66 -0
- package/dist/validation/semantic-types.js +94 -0
- package/dist/validation/semantic-validator.d.ts +38 -0
- package/dist/validation/semantic-validator.js +340 -0
- package/dist/verification/index.d.ts +6 -0
- package/dist/verification/index.js +5 -0
- package/dist/verification/types.d.ts +133 -0
- package/dist/verification/types.js +5 -0
- package/dist/verification/verifier.d.ts +30 -0
- package/dist/verification/verifier.js +309 -0
- package/dist/version.d.ts +19 -0
- package/dist/version.js +48 -0
- package/dist/workflow/auto-generator.d.ts +27 -0
- package/dist/workflow/auto-generator.js +513 -0
- package/dist/workflow/discovery.d.ts +40 -0
- package/dist/workflow/discovery.js +195 -0
- package/dist/workflow/executor.d.ts +82 -0
- package/dist/workflow/executor.js +611 -0
- package/dist/workflow/index.d.ts +10 -0
- package/dist/workflow/index.js +10 -0
- package/dist/workflow/loader.d.ts +24 -0
- package/dist/workflow/loader.js +194 -0
- package/dist/workflow/state-tracker.d.ts +98 -0
- package/dist/workflow/state-tracker.js +424 -0
- package/dist/workflow/types.d.ts +337 -0
- package/dist/workflow/types.js +5 -0
- package/package.json +94 -0
- package/schemas/bellwether-check.schema.json +651 -0
|
@@ -0,0 +1,903 @@
|
|
|
1
|
+
import { Command } from 'commander';
|
|
2
|
+
import { writeFileSync, existsSync, mkdirSync } from 'fs';
|
|
3
|
+
import { join } from 'path';
|
|
4
|
+
import { MCPClient } from '../../transport/mcp-client.js';
|
|
5
|
+
import { discover } from '../../discovery/discovery.js';
|
|
6
|
+
import { createLLMClient, PREMIUM_MODELS } from '../../llm/index.js';
|
|
7
|
+
import { Interviewer } from '../../interview/interviewer.js';
|
|
8
|
+
import { generateAgentsMd, generateJsonReport } from '../../docs/generator.js';
|
|
9
|
+
import { loadConfig } from '../../config/loader.js';
|
|
10
|
+
import { createBaseline, saveBaseline, loadBaseline, compareBaselines, formatDiffText, } from '../../baseline/index.js';
|
|
11
|
+
import { createCloudBaseline } from '../../baseline/converter.js';
|
|
12
|
+
import { CostTracker, estimateInterviewCost, formatCostEstimate, estimateInterviewTime, formatCostAndTimeEstimate, suggestOptimizations, formatOptimizationSuggestions, } from '../../cost/index.js';
|
|
13
|
+
import { COST_THRESHOLDS } from '../../constants.js';
|
|
14
|
+
import { getMetricsCollector, resetMetricsCollector } from '../../metrics/collector.js';
|
|
15
|
+
import { FallbackLLMClient } from '../../llm/fallback.js';
|
|
16
|
+
import { withTokenBudget } from '../../llm/token-budget.js';
|
|
17
|
+
import { getGlobalCache, resetGlobalCache } from '../../cache/response-cache.js';
|
|
18
|
+
import { INTERVIEW } from '../../constants.js';
|
|
19
|
+
import { promptForConfig, displayConfigSummary, } from '../interactive.js';
|
|
20
|
+
import { InterviewProgressBar, formatStartupBanner, } from '../utils/progress.js';
|
|
21
|
+
import { DEFAULT_PERSONA, securityTesterPersona, qaEngineerPersona, noviceUserPersona, } from '../../persona/builtins.js';
|
|
22
|
+
import { loadScenariosFromFile, tryLoadDefaultScenarios, generateSampleScenariosYaml, DEFAULT_SCENARIOS_FILE, } from '../../scenarios/index.js';
|
|
23
|
+
import { loadWorkflowsFromFile, tryLoadDefaultWorkflows, generateSampleWorkflowYaml, DEFAULT_WORKFLOWS_FILE, } from '../../workflow/loader.js';
|
|
24
|
+
import { WORKFLOW } from '../../constants.js';
|
|
25
|
+
import * as output from '../output.js';
|
|
26
|
+
import { StreamingDisplay } from '../output.js';
|
|
27
|
+
import { suppressLogs, restoreLogLevel } from '../../logging/logger.js';
|
|
28
|
+
/**
|
|
29
|
+
* Map of persona names to persona objects.
|
|
30
|
+
*/
|
|
31
|
+
const PERSONA_MAP = {
|
|
32
|
+
technical: DEFAULT_PERSONA,
|
|
33
|
+
security: securityTesterPersona,
|
|
34
|
+
qa: qaEngineerPersona,
|
|
35
|
+
novice: noviceUserPersona,
|
|
36
|
+
};
|
|
37
|
+
const PRESETS = {
|
|
38
|
+
docs: {
|
|
39
|
+
personas: [DEFAULT_PERSONA],
|
|
40
|
+
maxQuestions: 3,
|
|
41
|
+
description: 'Documentation-focused: Technical Writer persona, 3 questions/tool (~$0.02, ~2 min)',
|
|
42
|
+
},
|
|
43
|
+
security: {
|
|
44
|
+
personas: [DEFAULT_PERSONA, securityTesterPersona],
|
|
45
|
+
maxQuestions: 3,
|
|
46
|
+
description: 'Security audit: Technical + Security personas, 3 questions/tool (~$0.05, ~3 min)',
|
|
47
|
+
},
|
|
48
|
+
thorough: {
|
|
49
|
+
personas: [DEFAULT_PERSONA, securityTesterPersona, qaEngineerPersona, noviceUserPersona],
|
|
50
|
+
maxQuestions: 5,
|
|
51
|
+
description: 'Comprehensive: All 4 personas, 5 questions/tool (~$0.12, ~8 min)',
|
|
52
|
+
},
|
|
53
|
+
ci: {
|
|
54
|
+
personas: [DEFAULT_PERSONA],
|
|
55
|
+
maxQuestions: 1,
|
|
56
|
+
description: 'Structural-only: Zero LLM, free, deterministic (for CI/CD)',
|
|
57
|
+
},
|
|
58
|
+
};
|
|
59
|
+
/**
|
|
60
|
+
* Detect if running in a CI environment.
|
|
61
|
+
*/
|
|
62
|
+
function isCI() {
|
|
63
|
+
return !!(process.env.CI ||
|
|
64
|
+
process.env.CONTINUOUS_INTEGRATION ||
|
|
65
|
+
process.env.GITHUB_ACTIONS ||
|
|
66
|
+
process.env.GITLAB_CI ||
|
|
67
|
+
process.env.CIRCLECI ||
|
|
68
|
+
process.env.JENKINS_URL ||
|
|
69
|
+
process.env.TRAVIS ||
|
|
70
|
+
process.env.BUILDKITE);
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Check if running in an interactive terminal.
|
|
74
|
+
*/
|
|
75
|
+
function isInteractiveTTY() {
|
|
76
|
+
return process.stdout.isTTY && process.stdin.isTTY && !isCI();
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Parse persona list from CLI option.
|
|
80
|
+
*/
|
|
81
|
+
function parsePersonas(personaList) {
|
|
82
|
+
if (personaList === 'all') {
|
|
83
|
+
return Object.values(PERSONA_MAP);
|
|
84
|
+
}
|
|
85
|
+
const names = personaList.split(',').map((s) => s.trim().toLowerCase());
|
|
86
|
+
const personas = [];
|
|
87
|
+
for (const name of names) {
|
|
88
|
+
const persona = PERSONA_MAP[name];
|
|
89
|
+
if (persona) {
|
|
90
|
+
personas.push(persona);
|
|
91
|
+
}
|
|
92
|
+
else {
|
|
93
|
+
output.warn(`Unknown persona: ${name}. Available: ${Object.keys(PERSONA_MAP).join(', ')}, all`);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
return personas.length > 0 ? personas : [DEFAULT_PERSONA];
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Extract server context from command and arguments.
|
|
100
|
+
* Looks for common patterns like directory paths that indicate server constraints.
|
|
101
|
+
*/
|
|
102
|
+
function extractServerContextFromArgs(command, args) {
|
|
103
|
+
const context = {
|
|
104
|
+
allowedDirectories: [],
|
|
105
|
+
constraints: [],
|
|
106
|
+
hints: [],
|
|
107
|
+
};
|
|
108
|
+
// Check if this is a known server type
|
|
109
|
+
const fullCommand = `${command} ${args.join(' ')}`.toLowerCase();
|
|
110
|
+
// Extract any arguments that look like absolute paths
|
|
111
|
+
const pathArgs = args.filter(arg => arg.startsWith('/') && !arg.startsWith('--'));
|
|
112
|
+
// Filesystem servers - look for directory arguments
|
|
113
|
+
if (fullCommand.includes('filesystem') || fullCommand.includes('file-system')) {
|
|
114
|
+
context.allowedDirectories = pathArgs;
|
|
115
|
+
if (context.allowedDirectories.length > 0) {
|
|
116
|
+
context.hints.push(`Filesystem server with allowed directories: ${context.allowedDirectories.join(', ')}`);
|
|
117
|
+
}
|
|
118
|
+
context.constraints.push('Operations limited to specified directories');
|
|
119
|
+
}
|
|
120
|
+
// Database servers - might have connection strings
|
|
121
|
+
else if (fullCommand.includes('postgres') || fullCommand.includes('mysql') || fullCommand.includes('sqlite')) {
|
|
122
|
+
context.hints.push('Database server - SQL operations expected');
|
|
123
|
+
context.constraints.push('Database operations only');
|
|
124
|
+
}
|
|
125
|
+
// Git servers
|
|
126
|
+
else if (fullCommand.includes('git')) {
|
|
127
|
+
context.allowedDirectories = pathArgs;
|
|
128
|
+
context.hints.push('Git server - repository operations expected');
|
|
129
|
+
}
|
|
130
|
+
// Generic case - any path arguments are potential allowed directories
|
|
131
|
+
else {
|
|
132
|
+
context.allowedDirectories = pathArgs;
|
|
133
|
+
}
|
|
134
|
+
return context;
|
|
135
|
+
}
|
|
136
|
+
export const interviewCommand = new Command('interview')
|
|
137
|
+
.description('Test an MCP server: structural drift detection (--structural) or full LLM interview')
|
|
138
|
+
.argument('[command]', 'Command to start the MCP server')
|
|
139
|
+
.argument('[args...]', 'Arguments to pass to the server')
|
|
140
|
+
.option('-o, --output <dir>', 'Output directory', '.')
|
|
141
|
+
.option('-c, --config <path>', 'Path to config file')
|
|
142
|
+
.option('--model <model>', 'LLM model to use')
|
|
143
|
+
.option('--max-questions <n>', 'Max questions per tool')
|
|
144
|
+
.option('--timeout <ms>', 'Timeout for tool calls in milliseconds', String(INTERVIEW.CLI_TIMEOUT))
|
|
145
|
+
.option('--json', 'Also output JSON report')
|
|
146
|
+
.option('--verbose', 'Verbose output')
|
|
147
|
+
.option('--debug', 'Debug MCP protocol')
|
|
148
|
+
.option('--save-baseline [path]', 'Save baseline for drift detection (default: bellwether-baseline.json)')
|
|
149
|
+
.option('--compare-baseline <path>', 'Compare against existing baseline')
|
|
150
|
+
.option('--fail-on-drift', 'Exit with error if behavioral drift detected')
|
|
151
|
+
.option('--cloud-format', 'Save baseline in cloud-ready format')
|
|
152
|
+
.option('--estimate-cost', 'Estimate cost before running interview')
|
|
153
|
+
.option('--show-cost', 'Show cost summary after interview')
|
|
154
|
+
.option('-i, --interactive', 'Run in interactive mode with prompts')
|
|
155
|
+
.option('-q, --quick', 'Quick mode for CI: 1 question per tool')
|
|
156
|
+
.option('-Q, --quality', 'Use premium LLM models for higher quality output')
|
|
157
|
+
.option('-p, --preset <name>', 'Use a preset configuration: docs, security, thorough, ci')
|
|
158
|
+
.option('--personas <list>', 'Comma-separated persona list: technical,security,qa,novice,all', 'technical')
|
|
159
|
+
.option('--security', 'Include security testing persona (shorthand for --personas technical,security)')
|
|
160
|
+
.option('--transport <type>', 'Transport type: stdio, sse, streamable-http', 'stdio')
|
|
161
|
+
.option('--url <url>', 'URL for remote MCP server (requires --transport sse or streamable-http)')
|
|
162
|
+
.option('--session-id <id>', 'Session ID for remote server authentication')
|
|
163
|
+
.option('--scenarios <path>', 'Path to custom test scenarios YAML file')
|
|
164
|
+
.option('--scenarios-only', 'Only run custom scenarios (skip LLM-generated questions)')
|
|
165
|
+
.option('--init-scenarios', 'Generate a sample bellwether-tests.yaml file and exit')
|
|
166
|
+
.option('--stream', 'Enable streaming output to show LLM responses in real-time')
|
|
167
|
+
.option('--quiet', 'Suppress streaming output (use with --stream to only log final results)')
|
|
168
|
+
.option('--parallel-personas', 'Run persona interviews in parallel for faster execution')
|
|
169
|
+
.option('--persona-concurrency <n>', `Max concurrent persona interviews (default: ${INTERVIEW.DEFAULT_PERSONA_CONCURRENCY}, requires --parallel-personas)`, String(INTERVIEW.DEFAULT_PERSONA_CONCURRENCY))
|
|
170
|
+
.option('--show-metrics', 'Show detailed metrics after interview (token usage, timing, costs)')
|
|
171
|
+
.option('--fallback', 'Enable automatic Ollama fallback if primary LLM provider fails')
|
|
172
|
+
.option('--max-tokens <n>', 'Maximum total tokens to use (prevents runaway costs)')
|
|
173
|
+
.option('--cache', 'Enable response caching to avoid redundant tool calls and LLM analysis (default: enabled)')
|
|
174
|
+
.option('--no-cache', 'Disable response caching')
|
|
175
|
+
.option('--resource-timeout <ms>', `Timeout for resource reads in milliseconds (default: ${INTERVIEW.RESOURCE_TIMEOUT})`, String(INTERVIEW.RESOURCE_TIMEOUT))
|
|
176
|
+
.option('--workflows <path>', 'Path to workflow definitions YAML file')
|
|
177
|
+
.option('--discover-workflows', 'Enable LLM-based workflow discovery')
|
|
178
|
+
.option('--max-workflows <n>', `Maximum workflows to discover (default: ${WORKFLOW.MAX_DISCOVERED_WORKFLOWS})`, String(WORKFLOW.MAX_DISCOVERED_WORKFLOWS))
|
|
179
|
+
.option('--init-workflows', 'Generate a sample bellwether-workflows.yaml file and exit')
|
|
180
|
+
.option('--workflow-state-tracking', 'Enable state tracking during workflow execution')
|
|
181
|
+
.option('--ci, --structural', 'Structural-only mode: zero LLM, free, deterministic drift detection (equivalent to --preset ci --yes --quiet)')
|
|
182
|
+
.option('-y, --yes', 'Skip confirmation prompts')
|
|
183
|
+
.action(async (command, args, options) => {
|
|
184
|
+
// Handle --init-scenarios: generate sample file and exit
|
|
185
|
+
if (options.initScenarios) {
|
|
186
|
+
const outputPath = options.scenarios ?? DEFAULT_SCENARIOS_FILE;
|
|
187
|
+
const content = generateSampleScenariosYaml();
|
|
188
|
+
writeFileSync(outputPath, content);
|
|
189
|
+
output.info(`Generated sample scenarios file: ${outputPath}`);
|
|
190
|
+
output.info('\nEdit this file to add custom test scenarios for your MCP server.');
|
|
191
|
+
output.info('Then run: bellwether interview <command> --scenarios ' + outputPath);
|
|
192
|
+
return;
|
|
193
|
+
}
|
|
194
|
+
// Handle --init-workflows: generate sample file and exit
|
|
195
|
+
if (options.initWorkflows) {
|
|
196
|
+
const outputPath = options.workflows ?? 'bellwether-workflows.yaml';
|
|
197
|
+
const content = generateSampleWorkflowYaml();
|
|
198
|
+
writeFileSync(outputPath, content);
|
|
199
|
+
output.info(`Generated sample workflows file: ${outputPath}`);
|
|
200
|
+
output.info('\nEdit this file to define custom workflow tests for your MCP server.');
|
|
201
|
+
output.info('Then run: bellwether interview <command> --workflows ' + outputPath);
|
|
202
|
+
return;
|
|
203
|
+
}
|
|
204
|
+
// Handle --ci / --structural composite flag: apply ci preset, yes, and quiet options
|
|
205
|
+
const isCiMode = options.ci === true || options.structural === true;
|
|
206
|
+
if (isCiMode) {
|
|
207
|
+
options.preset = 'ci';
|
|
208
|
+
options.yes = true;
|
|
209
|
+
options.quiet = true;
|
|
210
|
+
}
|
|
211
|
+
// Load configuration
|
|
212
|
+
const config = loadConfig(options.config);
|
|
213
|
+
// Handle interactive mode
|
|
214
|
+
let interactiveConfig;
|
|
215
|
+
if (options.interactive || !command) {
|
|
216
|
+
// If no command provided, enter interactive mode
|
|
217
|
+
if (!command && !options.interactive) {
|
|
218
|
+
output.info('No server command provided. Entering interactive mode...\n');
|
|
219
|
+
}
|
|
220
|
+
interactiveConfig = await promptForConfig(config, command, args);
|
|
221
|
+
displayConfigSummary(interactiveConfig);
|
|
222
|
+
// Update command and args from interactive config
|
|
223
|
+
command = interactiveConfig.serverCommand;
|
|
224
|
+
args = interactiveConfig.serverArgs;
|
|
225
|
+
}
|
|
226
|
+
// Ensure we have a command at this point
|
|
227
|
+
if (!command) {
|
|
228
|
+
output.error('Error: Server command is required.');
|
|
229
|
+
output.error('Usage: bellwether interview <command> [args...] or bellwether interview --interactive');
|
|
230
|
+
process.exit(1);
|
|
231
|
+
}
|
|
232
|
+
// Determine model: --quality uses premium models, otherwise defaults (now budget-friendly)
|
|
233
|
+
const isQualityMode = options.quality;
|
|
234
|
+
const model = options.model
|
|
235
|
+
?? (isQualityMode ? PREMIUM_MODELS[config.llm.provider] : undefined)
|
|
236
|
+
?? config.llm.model;
|
|
237
|
+
// Handle preset configurations
|
|
238
|
+
let presetConfig;
|
|
239
|
+
if (options.preset) {
|
|
240
|
+
presetConfig = PRESETS[options.preset.toLowerCase()];
|
|
241
|
+
if (!presetConfig) {
|
|
242
|
+
output.error(`Unknown preset: ${options.preset}`);
|
|
243
|
+
output.error(`Available presets: ${Object.keys(PRESETS).join(', ')}`);
|
|
244
|
+
output.error('\nPreset descriptions:');
|
|
245
|
+
for (const [name, cfg] of Object.entries(PRESETS)) {
|
|
246
|
+
output.error(` ${name}: ${cfg.description}`);
|
|
247
|
+
}
|
|
248
|
+
process.exit(1);
|
|
249
|
+
}
|
|
250
|
+
output.info(`Using preset: ${options.preset} (${presetConfig.description})\n`);
|
|
251
|
+
}
|
|
252
|
+
// Quick mode: 1 question per tool for fast CI runs
|
|
253
|
+
// Preset overrides quick mode if specified
|
|
254
|
+
const maxQuestions = presetConfig?.maxQuestions
|
|
255
|
+
?? (options.quick
|
|
256
|
+
? 1
|
|
257
|
+
: (interactiveConfig?.maxQuestions
|
|
258
|
+
?? (options.maxQuestions ? parseInt(options.maxQuestions, 10) : config.interview.maxQuestionsPerTool)));
|
|
259
|
+
const timeout = options.timeout
|
|
260
|
+
? parseInt(options.timeout, 10)
|
|
261
|
+
: config.interview.timeout;
|
|
262
|
+
const outputDir = interactiveConfig?.outputDir ?? options.output ?? config.output.outputDir ?? '.';
|
|
263
|
+
// Determine personas: preset > --security > --personas > config.yaml > default
|
|
264
|
+
let selectedPersonas;
|
|
265
|
+
if (presetConfig) {
|
|
266
|
+
selectedPersonas = presetConfig.personas;
|
|
267
|
+
}
|
|
268
|
+
else if (options.security) {
|
|
269
|
+
selectedPersonas = parsePersonas('technical,security');
|
|
270
|
+
}
|
|
271
|
+
else if (options.personas) {
|
|
272
|
+
selectedPersonas = parsePersonas(options.personas);
|
|
273
|
+
}
|
|
274
|
+
else if (config.interview.personas) {
|
|
275
|
+
// Support both string ("technical,security") and array (["technical", "security"]) formats
|
|
276
|
+
const personaList = Array.isArray(config.interview.personas)
|
|
277
|
+
? config.interview.personas.join(',')
|
|
278
|
+
: config.interview.personas;
|
|
279
|
+
selectedPersonas = parsePersonas(personaList);
|
|
280
|
+
}
|
|
281
|
+
else {
|
|
282
|
+
selectedPersonas = parsePersonas('technical');
|
|
283
|
+
}
|
|
284
|
+
// Determine output format
|
|
285
|
+
const wantsJson = interactiveConfig
|
|
286
|
+
? (interactiveConfig.outputFormat === 'json' || interactiveConfig.outputFormat === 'both')
|
|
287
|
+
: (options.json || config.output.format === 'json' || config.output.format === 'both');
|
|
288
|
+
// Determine baseline options
|
|
289
|
+
const shouldSaveBaseline = interactiveConfig?.saveBaseline ?? !!options.saveBaseline;
|
|
290
|
+
const baselinePath = interactiveConfig?.baselinePath
|
|
291
|
+
?? (typeof options.saveBaseline === 'string' ? options.saveBaseline : undefined);
|
|
292
|
+
const compareBaselinePath = interactiveConfig?.compareBaseline ?? options.compareBaseline;
|
|
293
|
+
// Display startup banner with all settings
|
|
294
|
+
const serverCommand = `${command} ${args.join(' ')}`;
|
|
295
|
+
const personaNames = selectedPersonas.map((p) => p.name);
|
|
296
|
+
const banner = formatStartupBanner({
|
|
297
|
+
serverCommand,
|
|
298
|
+
provider: config.llm.provider,
|
|
299
|
+
model,
|
|
300
|
+
isQuality: isQualityMode,
|
|
301
|
+
personas: personaNames,
|
|
302
|
+
questionsPerTool: maxQuestions,
|
|
303
|
+
});
|
|
304
|
+
output.info(banner);
|
|
305
|
+
output.newline();
|
|
306
|
+
// Validate transport options
|
|
307
|
+
const transportType = options.transport;
|
|
308
|
+
const isRemoteTransport = transportType === 'sse' || transportType === 'streamable-http';
|
|
309
|
+
if (isRemoteTransport && !options.url) {
|
|
310
|
+
output.error(`Error: --url is required when using --transport ${transportType}`);
|
|
311
|
+
process.exit(1);
|
|
312
|
+
}
|
|
313
|
+
if (options.url && !isRemoteTransport) {
|
|
314
|
+
output.error('Error: --url requires --transport sse or --transport streamable-http');
|
|
315
|
+
process.exit(1);
|
|
316
|
+
}
|
|
317
|
+
// Initialize cost tracker for real usage tracking
|
|
318
|
+
const costTracker = new CostTracker(model);
|
|
319
|
+
// Initialize metrics collector for comprehensive observability
|
|
320
|
+
resetMetricsCollector();
|
|
321
|
+
const metricsCollector = getMetricsCollector();
|
|
322
|
+
metricsCollector.startInterview();
|
|
323
|
+
// Initialize cache for tool responses and LLM analysis
|
|
324
|
+
// Cache is enabled by default unless --no-cache is specified
|
|
325
|
+
const cacheEnabled = options.cache !== false;
|
|
326
|
+
resetGlobalCache();
|
|
327
|
+
const cache = getGlobalCache({ enabled: cacheEnabled });
|
|
328
|
+
if (cacheEnabled) {
|
|
329
|
+
output.info('Response caching enabled');
|
|
330
|
+
}
|
|
331
|
+
// Initialize clients
|
|
332
|
+
const mcpClient = new MCPClient({
|
|
333
|
+
timeout,
|
|
334
|
+
debug: options.debug,
|
|
335
|
+
transport: transportType,
|
|
336
|
+
});
|
|
337
|
+
let llmClient;
|
|
338
|
+
// Create usage callback for cost and metrics tracking
|
|
339
|
+
const onUsageCallback = (inputTokens, outputTokens) => {
|
|
340
|
+
costTracker.addUsage(inputTokens, outputTokens);
|
|
341
|
+
// Also record in metrics collector for comprehensive tracking
|
|
342
|
+
metricsCollector.recordTokenUsage(config.llm.provider, model, inputTokens, outputTokens, 'llm_call');
|
|
343
|
+
};
|
|
344
|
+
try {
|
|
345
|
+
// Use the LLM factory to create the appropriate provider client
|
|
346
|
+
const baseLLMClient = createLLMClient({
|
|
347
|
+
provider: config.llm.provider,
|
|
348
|
+
model,
|
|
349
|
+
apiKey: config.llm.apiKey,
|
|
350
|
+
apiKeyEnvVar: config.llm.apiKeyEnvVar,
|
|
351
|
+
baseUrl: config.llm.baseUrl,
|
|
352
|
+
onUsage: onUsageCallback,
|
|
353
|
+
});
|
|
354
|
+
// Wrap with fallback client if enabled
|
|
355
|
+
if (options.fallback) {
|
|
356
|
+
output.info('Fallback mode enabled - will use Ollama if primary provider fails');
|
|
357
|
+
llmClient = new FallbackLLMClient({
|
|
358
|
+
providers: [
|
|
359
|
+
{
|
|
360
|
+
provider: config.llm.provider,
|
|
361
|
+
model,
|
|
362
|
+
apiKey: config.llm.apiKey,
|
|
363
|
+
apiKeyEnvVar: config.llm.apiKeyEnvVar,
|
|
364
|
+
baseUrl: config.llm.baseUrl,
|
|
365
|
+
},
|
|
366
|
+
],
|
|
367
|
+
useOllamaFallback: true,
|
|
368
|
+
onUsage: onUsageCallback,
|
|
369
|
+
});
|
|
370
|
+
}
|
|
371
|
+
else {
|
|
372
|
+
llmClient = baseLLMClient;
|
|
373
|
+
}
|
|
374
|
+
// Wrap with token budget enforcement if max-tokens specified
|
|
375
|
+
if (options.maxTokens) {
|
|
376
|
+
const maxTokens = parseInt(options.maxTokens, 10);
|
|
377
|
+
if (isNaN(maxTokens) || maxTokens < 1000) {
|
|
378
|
+
output.error('Invalid --max-tokens value: must be a positive integer >= 1000');
|
|
379
|
+
process.exit(1);
|
|
380
|
+
}
|
|
381
|
+
output.info(`Token budget enabled: ${maxTokens.toLocaleString()} tokens max`);
|
|
382
|
+
llmClient = withTokenBudget(llmClient, {
|
|
383
|
+
maxTotalTokens: maxTokens,
|
|
384
|
+
onBudgetWarning: (used, total, pct) => {
|
|
385
|
+
output.warn(`Token budget warning: ${pct.toFixed(0)}% used (${used.toLocaleString()}/${total.toLocaleString()})`);
|
|
386
|
+
},
|
|
387
|
+
onBudgetExceeded: (used, total) => {
|
|
388
|
+
output.error(`Token budget exceeded: ${used.toLocaleString()}/${total.toLocaleString()} tokens`);
|
|
389
|
+
},
|
|
390
|
+
});
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
catch (error) {
|
|
394
|
+
output.error('Failed to initialize LLM client: ' + (error instanceof Error ? error.message : String(error)));
|
|
395
|
+
output.error(`\nProvider: ${config.llm.provider}`);
|
|
396
|
+
output.error('Make sure the appropriate API key environment variable is set:');
|
|
397
|
+
output.error(' - OpenAI: OPENAI_API_KEY');
|
|
398
|
+
output.error(' - Anthropic: ANTHROPIC_API_KEY');
|
|
399
|
+
output.error(' - Ollama: No API key needed (ensure Ollama is running)');
|
|
400
|
+
process.exit(1);
|
|
401
|
+
}
|
|
402
|
+
// Determine streaming early so we can suppress logs before MCP connection
|
|
403
|
+
const enableStreaming = options.stream && !options.quiet;
|
|
404
|
+
if (enableStreaming) {
|
|
405
|
+
// Suppress JSON logs during streaming to keep output clean
|
|
406
|
+
suppressLogs();
|
|
407
|
+
}
|
|
408
|
+
try {
|
|
409
|
+
// Connect to MCP server
|
|
410
|
+
if (isRemoteTransport) {
|
|
411
|
+
output.info(`Connecting to remote MCP server via ${transportType}...`);
|
|
412
|
+
await mcpClient.connectRemote(options.url, {
|
|
413
|
+
transport: transportType,
|
|
414
|
+
sessionId: options.sessionId,
|
|
415
|
+
});
|
|
416
|
+
}
|
|
417
|
+
else {
|
|
418
|
+
output.info('Connecting to MCP server...');
|
|
419
|
+
await mcpClient.connect(command, args);
|
|
420
|
+
}
|
|
421
|
+
// Discovery phase
|
|
422
|
+
output.info('Discovering capabilities...');
|
|
423
|
+
const discovery = await discover(mcpClient, command, args);
|
|
424
|
+
const resourceCount = discovery.resources?.length ?? 0;
|
|
425
|
+
const discoveryParts = [`${discovery.tools.length} tools`, `${discovery.prompts.length} prompts`];
|
|
426
|
+
if (resourceCount > 0) {
|
|
427
|
+
discoveryParts.push(`${resourceCount} resources`);
|
|
428
|
+
}
|
|
429
|
+
output.info(`Found ${discoveryParts.join(', ')}\n`);
|
|
430
|
+
// Update metrics with discovery counts
|
|
431
|
+
metricsCollector.updateInterviewCounters({
|
|
432
|
+
toolsDiscovered: discovery.tools.length,
|
|
433
|
+
personasUsed: selectedPersonas.length,
|
|
434
|
+
});
|
|
435
|
+
if (discovery.tools.length === 0) {
|
|
436
|
+
output.info('No tools found. Nothing to interview.');
|
|
437
|
+
metricsCollector.endInterview();
|
|
438
|
+
await mcpClient.disconnect();
|
|
439
|
+
return;
|
|
440
|
+
}
|
|
441
|
+
// Check if scenarios file exists for optimization suggestions
|
|
442
|
+
const defaultScenariosPath = join(outputDir, DEFAULT_SCENARIOS_FILE);
|
|
443
|
+
const hasScenariosFile = !!(options.scenarios || existsSync(defaultScenariosPath));
|
|
444
|
+
// Cost and time estimation (shown by default unless --quiet or --ci)
|
|
445
|
+
// In CI mode, cost is near-zero since we skip LLM calls
|
|
446
|
+
const costEstimate = isCiMode
|
|
447
|
+
? {
|
|
448
|
+
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 },
|
|
449
|
+
costUSD: 0,
|
|
450
|
+
model,
|
|
451
|
+
breakdown: { inputCost: 0, outputCost: 0 },
|
|
452
|
+
}
|
|
453
|
+
: estimateInterviewCost(model, discovery.tools.length, maxQuestions, selectedPersonas.length);
|
|
454
|
+
const timeEstimate = estimateInterviewTime(discovery.tools.length, maxQuestions, selectedPersonas.length, options.parallelPersonas, config.llm.provider, discovery.prompts.length, resourceCount, isCiMode // structuralOnly in CI mode
|
|
455
|
+
);
|
|
456
|
+
// Show cost/time estimate (unless quiet mode)
|
|
457
|
+
if (!options.quiet) {
|
|
458
|
+
output.info(formatCostAndTimeEstimate(costEstimate, timeEstimate));
|
|
459
|
+
// Show optimization suggestions if cost is significant
|
|
460
|
+
const optimizationContext = {
|
|
461
|
+
estimatedCost: costEstimate.costUSD,
|
|
462
|
+
toolCount: discovery.tools.length,
|
|
463
|
+
personaCount: selectedPersonas.length,
|
|
464
|
+
isParallelPersonas: !!options.parallelPersonas,
|
|
465
|
+
isPremiumModel: !!options.quality,
|
|
466
|
+
isUsingCiPreset: options.preset === 'ci' || isCiMode,
|
|
467
|
+
hasScenariosFile,
|
|
468
|
+
};
|
|
469
|
+
const suggestions = suggestOptimizations(optimizationContext);
|
|
470
|
+
if (suggestions.length > 0) {
|
|
471
|
+
output.newline();
|
|
472
|
+
output.info(formatOptimizationSuggestions(suggestions));
|
|
473
|
+
}
|
|
474
|
+
output.newline();
|
|
475
|
+
}
|
|
476
|
+
// Legacy --estimate-cost flag shows detailed breakdown
|
|
477
|
+
if (options.estimateCost) {
|
|
478
|
+
output.info(formatCostEstimate(costEstimate));
|
|
479
|
+
output.newline();
|
|
480
|
+
}
|
|
481
|
+
// Confirmation prompt for high-cost runs in interactive terminals
|
|
482
|
+
if (costEstimate.costUSD > COST_THRESHOLDS.CONFIRMATION_THRESHOLD &&
|
|
483
|
+
isInteractiveTTY() &&
|
|
484
|
+
!options.yes &&
|
|
485
|
+
!isCiMode) {
|
|
486
|
+
const readline = await import('readline');
|
|
487
|
+
const rl = readline.createInterface({
|
|
488
|
+
input: process.stdin,
|
|
489
|
+
output: process.stdout,
|
|
490
|
+
});
|
|
491
|
+
const answer = await new Promise((resolve) => {
|
|
492
|
+
rl.question(`Estimated cost is ~$${costEstimate.costUSD.toFixed(2)}. Continue? [Y/n] `, resolve);
|
|
493
|
+
});
|
|
494
|
+
rl.close();
|
|
495
|
+
const normalizedAnswer = answer.trim().toLowerCase();
|
|
496
|
+
if (normalizedAnswer === 'n' || normalizedAnswer === 'no') {
|
|
497
|
+
output.info('Interview cancelled.');
|
|
498
|
+
await mcpClient.disconnect();
|
|
499
|
+
return;
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
// Load custom scenarios if provided
|
|
503
|
+
// In --ci mode, skip auto-loading scenarios for fast runs (use --scenarios to include them)
|
|
504
|
+
let customScenarios;
|
|
505
|
+
if (options.scenarios) {
|
|
506
|
+
try {
|
|
507
|
+
customScenarios = loadScenariosFromFile(options.scenarios);
|
|
508
|
+
output.info(`Loaded ${customScenarios.toolScenarios.length} tool scenarios, ${customScenarios.promptScenarios.length} prompt scenarios from ${options.scenarios}`);
|
|
509
|
+
}
|
|
510
|
+
catch (error) {
|
|
511
|
+
output.error(`Failed to load scenarios: ${error instanceof Error ? error.message : error}`);
|
|
512
|
+
process.exit(1);
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
else if (!isCiMode) {
|
|
516
|
+
// Try loading default scenarios file from output directory (skip in CI mode for speed)
|
|
517
|
+
const defaultScenarios = tryLoadDefaultScenarios(outputDir);
|
|
518
|
+
if (defaultScenarios) {
|
|
519
|
+
customScenarios = defaultScenarios;
|
|
520
|
+
output.info(`Auto-loaded ${customScenarios.toolScenarios.length} tool scenarios from ${DEFAULT_SCENARIOS_FILE}`);
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
// Build workflow configuration
|
|
524
|
+
let workflowConfig;
|
|
525
|
+
if (options.workflows || options.discoverWorkflows) {
|
|
526
|
+
workflowConfig = {
|
|
527
|
+
discoverWorkflows: options.discoverWorkflows,
|
|
528
|
+
maxDiscoveredWorkflows: options.maxWorkflows
|
|
529
|
+
? parseInt(options.maxWorkflows, 10)
|
|
530
|
+
: WORKFLOW.MAX_DISCOVERED_WORKFLOWS,
|
|
531
|
+
enableStateTracking: options.workflowStateTracking,
|
|
532
|
+
};
|
|
533
|
+
// Load workflows from file if provided
|
|
534
|
+
if (options.workflows) {
|
|
535
|
+
try {
|
|
536
|
+
const workflows = loadWorkflowsFromFile(options.workflows);
|
|
537
|
+
workflowConfig.workflows = workflows;
|
|
538
|
+
workflowConfig.workflowsFile = options.workflows;
|
|
539
|
+
output.info(`Loaded ${workflows.length} workflow(s) from ${options.workflows}`);
|
|
540
|
+
}
|
|
541
|
+
catch (error) {
|
|
542
|
+
output.error(`Failed to load workflows: ${error instanceof Error ? error.message : error}`);
|
|
543
|
+
process.exit(1);
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
if (options.discoverWorkflows) {
|
|
547
|
+
output.info('Workflow discovery enabled - will analyze tools for workflow patterns');
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
else if (!isCiMode) {
|
|
551
|
+
// Try auto-loading default workflows file from output directory (skip in CI mode for speed)
|
|
552
|
+
// Similar to how scenarios are auto-loaded from bellwether-tests.yaml
|
|
553
|
+
const defaultWorkflows = tryLoadDefaultWorkflows(outputDir);
|
|
554
|
+
if (defaultWorkflows && defaultWorkflows.length > 0) {
|
|
555
|
+
workflowConfig = {
|
|
556
|
+
discoverWorkflows: false,
|
|
557
|
+
maxDiscoveredWorkflows: WORKFLOW.MAX_DISCOVERED_WORKFLOWS,
|
|
558
|
+
enableStateTracking: options.workflowStateTracking,
|
|
559
|
+
workflows: defaultWorkflows,
|
|
560
|
+
workflowsFile: `${outputDir}/${DEFAULT_WORKFLOWS_FILE}`,
|
|
561
|
+
};
|
|
562
|
+
output.info(`Auto-loaded ${defaultWorkflows.length} workflow(s) from ${DEFAULT_WORKFLOWS_FILE}`);
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
// Set up streaming display if enabled
|
|
566
|
+
let streamingDisplay = null;
|
|
567
|
+
let streamingCallbacks;
|
|
568
|
+
if (enableStreaming) {
|
|
569
|
+
streamingDisplay = new StreamingDisplay({
|
|
570
|
+
style: 'dim',
|
|
571
|
+
maxWidth: 100,
|
|
572
|
+
});
|
|
573
|
+
streamingCallbacks = {
|
|
574
|
+
onStart: (operation, _context) => {
|
|
575
|
+
// Parse operation to get a human-readable description
|
|
576
|
+
const parts = operation.split(':');
|
|
577
|
+
const opType = parts[0];
|
|
578
|
+
const context = parts[1];
|
|
579
|
+
let prefix = '';
|
|
580
|
+
switch (opType) {
|
|
581
|
+
case 'generate-questions':
|
|
582
|
+
prefix = context ? `\n Generating questions for ${context}... ` : '\n Generating questions... ';
|
|
583
|
+
break;
|
|
584
|
+
case 'analyze':
|
|
585
|
+
prefix = context ? `\n Analyzing ${context}... ` : '\n Analyzing... ';
|
|
586
|
+
break;
|
|
587
|
+
case 'synthesize-tool':
|
|
588
|
+
prefix = context ? `\n Synthesizing profile for ${context}... ` : '\n Synthesizing profile... ';
|
|
589
|
+
break;
|
|
590
|
+
case 'synthesize-overall':
|
|
591
|
+
prefix = '\n Synthesizing overall findings... ';
|
|
592
|
+
break;
|
|
593
|
+
case 'generate-prompt-questions':
|
|
594
|
+
case 'analyze-prompt':
|
|
595
|
+
case 'synthesize-prompt':
|
|
596
|
+
prefix = context ? `\n Processing prompt ${context}... ` : '\n Processing prompt... ';
|
|
597
|
+
break;
|
|
598
|
+
case 'generate-resource-questions':
|
|
599
|
+
case 'analyze-resource':
|
|
600
|
+
case 'synthesize-resource':
|
|
601
|
+
prefix = context ? `\n Processing resource ${context}... ` : '\n Processing resource... ';
|
|
602
|
+
break;
|
|
603
|
+
default:
|
|
604
|
+
prefix = '\n Processing... ';
|
|
605
|
+
}
|
|
606
|
+
streamingDisplay?.start(prefix);
|
|
607
|
+
},
|
|
608
|
+
onChunk: (chunk, _operation) => {
|
|
609
|
+
streamingDisplay?.write(chunk);
|
|
610
|
+
},
|
|
611
|
+
onComplete: (_text, _operation) => {
|
|
612
|
+
streamingDisplay?.finish(' [done]');
|
|
613
|
+
},
|
|
614
|
+
onError: (error, _operation) => {
|
|
615
|
+
streamingDisplay?.abort(`[error: ${error.message}]`);
|
|
616
|
+
},
|
|
617
|
+
};
|
|
618
|
+
output.info('Streaming mode enabled - showing LLM output in real-time\n');
|
|
619
|
+
}
|
|
620
|
+
// Parse and validate persona concurrency
|
|
621
|
+
let personaConcurrency;
|
|
622
|
+
if (options.personaConcurrency) {
|
|
623
|
+
personaConcurrency = parseInt(options.personaConcurrency, 10);
|
|
624
|
+
if (isNaN(personaConcurrency) || personaConcurrency < 1) {
|
|
625
|
+
output.error('Invalid --persona-concurrency value: must be a positive integer');
|
|
626
|
+
process.exit(1);
|
|
627
|
+
}
|
|
628
|
+
if (personaConcurrency > INTERVIEW.MAX_PERSONA_CONCURRENCY) {
|
|
629
|
+
output.warn(`High persona concurrency (${personaConcurrency}) may cause rate limiting or memory issues`);
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
// Parse resource timeout option
|
|
633
|
+
const resourceTimeout = options.resourceTimeout
|
|
634
|
+
? parseInt(options.resourceTimeout, 10)
|
|
635
|
+
: undefined;
|
|
636
|
+
// Interview phase
|
|
637
|
+
const interviewer = new Interviewer(llmClient, {
|
|
638
|
+
maxQuestionsPerTool: maxQuestions,
|
|
639
|
+
timeout,
|
|
640
|
+
skipErrorTests: config.interview.skipErrorTests ?? false,
|
|
641
|
+
model,
|
|
642
|
+
personas: selectedPersonas,
|
|
643
|
+
customScenarios,
|
|
644
|
+
customScenariosOnly: options.scenariosOnly,
|
|
645
|
+
enableStreaming,
|
|
646
|
+
streamingCallbacks,
|
|
647
|
+
parallelPersonas: options.parallelPersonas,
|
|
648
|
+
personaConcurrency,
|
|
649
|
+
cache,
|
|
650
|
+
resourceTimeout,
|
|
651
|
+
workflowConfig,
|
|
652
|
+
structuralOnly: isCiMode, // Fast CI mode: skip LLM calls for speed
|
|
653
|
+
});
|
|
654
|
+
// Extract server context from command line arguments
|
|
655
|
+
const serverContext = extractServerContextFromArgs(command, args);
|
|
656
|
+
if (serverContext.allowedDirectories && serverContext.allowedDirectories.length > 0) {
|
|
657
|
+
output.info(`Detected allowed directories: ${serverContext.allowedDirectories.join(', ')}`);
|
|
658
|
+
}
|
|
659
|
+
interviewer.setServerContext(serverContext);
|
|
660
|
+
// Set up progress display - disable progress bar when streaming to avoid display conflicts
|
|
661
|
+
const progressBar = new InterviewProgressBar({ enabled: !options.verbose && !enableStreaming });
|
|
662
|
+
const progressCallback = (progress) => {
|
|
663
|
+
if (options.verbose) {
|
|
664
|
+
switch (progress.phase) {
|
|
665
|
+
case 'starting':
|
|
666
|
+
output.info('Starting interview...');
|
|
667
|
+
progressBar.start(progress.totalTools, progress.totalPersonas, progress.totalPrompts ?? 0, progress.totalResources ?? 0);
|
|
668
|
+
break;
|
|
669
|
+
case 'interviewing':
|
|
670
|
+
output.info(`[${progress.currentPersona}] Interviewing: ${progress.currentTool} (${progress.toolsCompleted + 1}/${progress.totalTools})`);
|
|
671
|
+
break;
|
|
672
|
+
case 'prompts':
|
|
673
|
+
output.info(`Interviewing prompt: ${progress.currentTool?.replace('prompt:', '') ?? '...'} (${(progress.promptsCompleted ?? 0) + 1}/${progress.totalPrompts ?? 0})`);
|
|
674
|
+
break;
|
|
675
|
+
case 'resources':
|
|
676
|
+
output.info(`Interviewing resource: ${progress.currentTool?.replace('resource:', '') ?? '...'} (${(progress.resourcesCompleted ?? 0) + 1}/${progress.totalResources ?? 0})`);
|
|
677
|
+
break;
|
|
678
|
+
case 'workflows':
|
|
679
|
+
if (progress.currentWorkflow) {
|
|
680
|
+
output.info(`Executing workflow: ${progress.currentWorkflow} (${(progress.workflowsCompleted ?? 0) + 1}/${progress.totalWorkflows})`);
|
|
681
|
+
}
|
|
682
|
+
else {
|
|
683
|
+
output.info('Executing workflows...');
|
|
684
|
+
}
|
|
685
|
+
break;
|
|
686
|
+
case 'synthesizing':
|
|
687
|
+
output.info('Synthesizing findings...');
|
|
688
|
+
break;
|
|
689
|
+
case 'complete':
|
|
690
|
+
output.info('Interview complete!');
|
|
691
|
+
break;
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
else {
|
|
695
|
+
// Use progress bar for non-verbose mode
|
|
696
|
+
if (progress.phase === 'starting') {
|
|
697
|
+
progressBar.start(progress.totalTools, progress.totalPersonas, progress.totalPrompts ?? 0, progress.totalResources ?? 0);
|
|
698
|
+
}
|
|
699
|
+
else if (progress.phase === 'interviewing' || progress.phase === 'prompts' || progress.phase === 'resources' || progress.phase === 'workflows') {
|
|
700
|
+
progressBar.update(progress);
|
|
701
|
+
}
|
|
702
|
+
else if (progress.phase === 'complete' || progress.phase === 'synthesizing') {
|
|
703
|
+
progressBar.stop();
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
};
|
|
707
|
+
output.info('Starting interview...\n');
|
|
708
|
+
const result = await interviewer.interview(mcpClient, discovery, progressCallback);
|
|
709
|
+
// Ensure progress bar is stopped
|
|
710
|
+
progressBar.stop();
|
|
711
|
+
if (!options.verbose) {
|
|
712
|
+
output.newline();
|
|
713
|
+
}
|
|
714
|
+
// Generate documentation
|
|
715
|
+
output.info('Generating documentation...');
|
|
716
|
+
// Ensure output directory exists
|
|
717
|
+
mkdirSync(outputDir, { recursive: true });
|
|
718
|
+
const agentsMd = generateAgentsMd(result);
|
|
719
|
+
const agentsMdPath = join(outputDir, 'AGENTS.md');
|
|
720
|
+
writeFileSync(agentsMdPath, agentsMd);
|
|
721
|
+
output.info(`Written: ${agentsMdPath}`);
|
|
722
|
+
if (wantsJson) {
|
|
723
|
+
const jsonReport = generateJsonReport(result);
|
|
724
|
+
const jsonPath = join(outputDir, 'bellwether-report.json');
|
|
725
|
+
writeFileSync(jsonPath, jsonReport);
|
|
726
|
+
output.info(`Written: ${jsonPath}`);
|
|
727
|
+
}
|
|
728
|
+
// End metrics tracking
|
|
729
|
+
const interviewMetrics = metricsCollector.endInterview();
|
|
730
|
+
output.info('\nInterview complete!');
|
|
731
|
+
output.info(`Duration: ${(result.metadata.durationMs / 1000).toFixed(1)}s`);
|
|
732
|
+
output.info(`Tool calls: ${result.metadata.toolCallCount} (${result.metadata.errorCount} errors)`);
|
|
733
|
+
// Display scenario results summary if scenarios were run
|
|
734
|
+
if (result.scenarioResults && result.scenarioResults.length > 0) {
|
|
735
|
+
const passed = result.scenarioResults.filter(r => r.passed).length;
|
|
736
|
+
const failed = result.scenarioResults.length - passed;
|
|
737
|
+
const statusIcon = failed === 0 ? '\u2713' : '\u2717';
|
|
738
|
+
output.info(`\nCustom scenarios: ${passed}/${result.scenarioResults.length} passed ${statusIcon}`);
|
|
739
|
+
// Show failed scenarios
|
|
740
|
+
if (failed > 0) {
|
|
741
|
+
output.info('\nFailed scenarios:');
|
|
742
|
+
for (const scenarioResult of result.scenarioResults.filter(r => !r.passed)) {
|
|
743
|
+
const scenario = scenarioResult.scenario;
|
|
744
|
+
const toolOrPrompt = 'tool' in scenario ? scenario.tool : scenario.prompt;
|
|
745
|
+
output.info(` - ${toolOrPrompt}: ${scenario.description}`);
|
|
746
|
+
if (scenarioResult.error) {
|
|
747
|
+
output.info(` Error: ${scenarioResult.error}`);
|
|
748
|
+
}
|
|
749
|
+
for (const assertion of scenarioResult.assertionResults.filter(a => !a.passed)) {
|
|
750
|
+
output.info(` Assertion failed: ${assertion.error}`);
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
// Display workflow results summary if workflows were executed
|
|
756
|
+
if (result.workflowResults && result.workflowResults.length > 0) {
|
|
757
|
+
const successful = result.workflowResults.filter(wr => wr.success).length;
|
|
758
|
+
const failed = result.workflowResults.length - successful;
|
|
759
|
+
const statusIcon = failed === 0 ? '\u2713' : '\u2717';
|
|
760
|
+
output.info(`\nWorkflows: ${successful}/${result.workflowResults.length} passed ${statusIcon}`);
|
|
761
|
+
// Show failed workflows
|
|
762
|
+
if (failed > 0) {
|
|
763
|
+
output.info('\nFailed workflows:');
|
|
764
|
+
for (const wr of result.workflowResults.filter(w => !w.success)) {
|
|
765
|
+
output.info(` - ${wr.workflow.name}: ${wr.failureReason ?? 'Unknown error'}`);
|
|
766
|
+
if (wr.failedStepIndex !== undefined) {
|
|
767
|
+
const failedStep = wr.workflow.steps[wr.failedStepIndex];
|
|
768
|
+
output.info(` Failed at step ${wr.failedStepIndex + 1}: ${failedStep?.tool ?? 'unknown'}`);
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
// Show workflow metadata summary
|
|
773
|
+
if (result.metadata.workflows) {
|
|
774
|
+
const wfMeta = result.metadata.workflows;
|
|
775
|
+
if (wfMeta.discoveredCount > 0) {
|
|
776
|
+
output.info(` Discovered: ${wfMeta.discoveredCount} workflow(s)`);
|
|
777
|
+
}
|
|
778
|
+
if (wfMeta.loadedCount > 0) {
|
|
779
|
+
output.info(` Loaded from file: ${wfMeta.loadedCount} workflow(s)`);
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
}
|
|
783
|
+
// Show cost summary if requested (uses real token counts from API responses)
|
|
784
|
+
if (options.showCost || options.estimateCost) {
|
|
785
|
+
output.info('\n' + costTracker.formatSummary());
|
|
786
|
+
}
|
|
787
|
+
// Show detailed metrics if requested
|
|
788
|
+
if (options.showMetrics && interviewMetrics) {
|
|
789
|
+
output.info('\n--- Interview Metrics ---');
|
|
790
|
+
output.info(`Tools discovered: ${interviewMetrics.toolsDiscovered}`);
|
|
791
|
+
output.info(`Personas used: ${interviewMetrics.personasUsed}`);
|
|
792
|
+
output.info(`LLM calls made: ${interviewMetrics.llmCallsMade}`);
|
|
793
|
+
output.info(`Total input tokens: ${interviewMetrics.totalInputTokens.toLocaleString()}`);
|
|
794
|
+
output.info(`Total output tokens: ${interviewMetrics.totalOutputTokens.toLocaleString()}`);
|
|
795
|
+
if (interviewMetrics.totalDurationMs) {
|
|
796
|
+
output.info(`Total duration: ${(interviewMetrics.totalDurationMs / 1000).toFixed(1)}s`);
|
|
797
|
+
}
|
|
798
|
+
if (interviewMetrics.totalCostUSD > 0) {
|
|
799
|
+
output.info(`Estimated cost: $${interviewMetrics.totalCostUSD.toFixed(4)}`);
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
// Show cache statistics if caching is enabled
|
|
803
|
+
if (cacheEnabled) {
|
|
804
|
+
const cacheStats = cache.getStats();
|
|
805
|
+
const totalCacheOps = cacheStats.hits + cacheStats.misses;
|
|
806
|
+
if (totalCacheOps > 0) {
|
|
807
|
+
output.info('\n--- Cache Statistics ---');
|
|
808
|
+
output.info(`Cache hits: ${cacheStats.hits}`);
|
|
809
|
+
output.info(`Cache misses: ${cacheStats.misses}`);
|
|
810
|
+
output.info(`Hit rate: ${cacheStats.hitRate.toFixed(1)}%`);
|
|
811
|
+
output.info(`Entries stored: ${cacheStats.entries}`);
|
|
812
|
+
if (cacheStats.hits > 0) {
|
|
813
|
+
output.info(`Estimated savings: ${cacheStats.hits} LLM/tool calls avoided`);
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
// Save baseline if requested
|
|
818
|
+
if (shouldSaveBaseline) {
|
|
819
|
+
const serverCommand = `${command} ${args.join(' ')}`;
|
|
820
|
+
const finalBaselinePath = baselinePath ?? join(outputDir, 'bellwether-baseline.json');
|
|
821
|
+
if (options.cloudFormat) {
|
|
822
|
+
// Save in cloud-ready format
|
|
823
|
+
const cloudBaseline = createCloudBaseline(result, serverCommand);
|
|
824
|
+
writeFileSync(finalBaselinePath, JSON.stringify(cloudBaseline, null, 2));
|
|
825
|
+
output.info(`\nCloud baseline saved: ${finalBaselinePath}`);
|
|
826
|
+
}
|
|
827
|
+
else {
|
|
828
|
+
// Save in local format
|
|
829
|
+
const baselineMode = isCiMode ? 'structural' : 'full';
|
|
830
|
+
const baseline = createBaseline(result, serverCommand, baselineMode);
|
|
831
|
+
saveBaseline(baseline, finalBaselinePath);
|
|
832
|
+
output.info(`\nBaseline saved: ${finalBaselinePath} (mode: ${baselineMode})`);
|
|
833
|
+
}
|
|
834
|
+
}
|
|
835
|
+
// Compare against baseline if requested
|
|
836
|
+
if (compareBaselinePath) {
|
|
837
|
+
if (!existsSync(compareBaselinePath)) {
|
|
838
|
+
output.error(`\nBaseline file not found: ${compareBaselinePath}`);
|
|
839
|
+
process.exit(1);
|
|
840
|
+
}
|
|
841
|
+
const serverCommand = `${command} ${args.join(' ')}`;
|
|
842
|
+
const previousBaseline = loadBaseline(compareBaselinePath);
|
|
843
|
+
const baselineMode = isCiMode ? 'structural' : 'full';
|
|
844
|
+
const currentBaseline = createBaseline(result, serverCommand, baselineMode);
|
|
845
|
+
// Note baseline mode mismatch
|
|
846
|
+
if (previousBaseline.mode && previousBaseline.mode !== baselineMode) {
|
|
847
|
+
output.info(`Note: Baseline mode changed (${previousBaseline.mode} -> ${baselineMode})`);
|
|
848
|
+
}
|
|
849
|
+
const diff = compareBaselines(previousBaseline, currentBaseline, {});
|
|
850
|
+
output.info('\n--- Drift Report ---');
|
|
851
|
+
output.info(formatDiffText(diff));
|
|
852
|
+
const shouldFailOnDrift = options.failOnDrift ?? config.drift?.failOnDrift ?? false;
|
|
853
|
+
if (shouldFailOnDrift) {
|
|
854
|
+
if (diff.severity === 'breaking') {
|
|
855
|
+
output.error('\nBreaking changes detected!');
|
|
856
|
+
process.exit(1);
|
|
857
|
+
}
|
|
858
|
+
else if (diff.severity === 'warning') {
|
|
859
|
+
output.warn('\nWarning-level changes detected.');
|
|
860
|
+
process.exit(1);
|
|
861
|
+
}
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
catch (error) {
|
|
866
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
867
|
+
output.error('\n--- Interview Failed ---');
|
|
868
|
+
output.error(`Error: ${errorMessage}`);
|
|
869
|
+
// Provide helpful context for common errors
|
|
870
|
+
if (errorMessage.includes('ECONNREFUSED') || errorMessage.includes('Connection refused')) {
|
|
871
|
+
output.error('\nPossible causes:');
|
|
872
|
+
output.error(' - The MCP server is not running');
|
|
873
|
+
output.error(' - The server address/port is incorrect');
|
|
874
|
+
output.error(' - A firewall is blocking the connection');
|
|
875
|
+
}
|
|
876
|
+
else if (errorMessage.includes('timeout') || errorMessage.includes('Timeout')) {
|
|
877
|
+
output.error('\nPossible causes:');
|
|
878
|
+
output.error(' - The MCP server is taking too long to respond');
|
|
879
|
+
output.error(' - Try increasing --timeout value');
|
|
880
|
+
output.error(' - The server may be overloaded or stuck');
|
|
881
|
+
}
|
|
882
|
+
else if (errorMessage.includes('ENOENT') || errorMessage.includes('not found')) {
|
|
883
|
+
output.error('\nPossible causes:');
|
|
884
|
+
output.error(' - The server command was not found');
|
|
885
|
+
output.error(' - Check that the command is installed and in PATH');
|
|
886
|
+
output.error(' - Try using an absolute path to the server executable');
|
|
887
|
+
}
|
|
888
|
+
else if (errorMessage.includes('API') || errorMessage.includes('API_KEY')) {
|
|
889
|
+
output.error('\nPossible causes:');
|
|
890
|
+
output.error(' - Missing or invalid API key');
|
|
891
|
+
output.error(' - Set OPENAI_API_KEY or ANTHROPIC_API_KEY environment variable');
|
|
892
|
+
output.error(' - Or configure apiKeyEnvVar in bellwether.yaml');
|
|
893
|
+
}
|
|
894
|
+
process.exit(1);
|
|
895
|
+
}
|
|
896
|
+
finally {
|
|
897
|
+
// Restore log level if it was suppressed for streaming
|
|
898
|
+
// (restoreLogLevel is safe to call even if logs weren't suppressed)
|
|
899
|
+
restoreLogLevel();
|
|
900
|
+
await mcpClient.disconnect();
|
|
901
|
+
}
|
|
902
|
+
});
|
|
903
|
+
//# sourceMappingURL=interview.js.map
|