@dotsetlabs/bellwether 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +291 -0
- package/LICENSE +21 -0
- package/README.md +739 -0
- package/dist/auth/credentials.d.ts +64 -0
- package/dist/auth/credentials.js +218 -0
- package/dist/auth/index.d.ts +6 -0
- package/dist/auth/index.js +6 -0
- package/dist/auth/keychain.d.ts +64 -0
- package/dist/auth/keychain.js +268 -0
- package/dist/baseline/ab-testing.d.ts +80 -0
- package/dist/baseline/ab-testing.js +236 -0
- package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
- package/dist/baseline/ai-compatibility-scorer.js +606 -0
- package/dist/baseline/calibration.d.ts +77 -0
- package/dist/baseline/calibration.js +136 -0
- package/dist/baseline/category-matching.d.ts +85 -0
- package/dist/baseline/category-matching.js +289 -0
- package/dist/baseline/change-impact-analyzer.d.ts +98 -0
- package/dist/baseline/change-impact-analyzer.js +592 -0
- package/dist/baseline/comparator.d.ts +64 -0
- package/dist/baseline/comparator.js +916 -0
- package/dist/baseline/confidence.d.ts +55 -0
- package/dist/baseline/confidence.js +122 -0
- package/dist/baseline/converter.d.ts +61 -0
- package/dist/baseline/converter.js +585 -0
- package/dist/baseline/dependency-analyzer.d.ts +89 -0
- package/dist/baseline/dependency-analyzer.js +567 -0
- package/dist/baseline/deprecation-tracker.d.ts +133 -0
- package/dist/baseline/deprecation-tracker.js +322 -0
- package/dist/baseline/diff.d.ts +55 -0
- package/dist/baseline/diff.js +1584 -0
- package/dist/baseline/documentation-scorer.d.ts +205 -0
- package/dist/baseline/documentation-scorer.js +466 -0
- package/dist/baseline/embeddings.d.ts +118 -0
- package/dist/baseline/embeddings.js +251 -0
- package/dist/baseline/error-analyzer.d.ts +198 -0
- package/dist/baseline/error-analyzer.js +721 -0
- package/dist/baseline/evaluation/evaluator.d.ts +42 -0
- package/dist/baseline/evaluation/evaluator.js +323 -0
- package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
- package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
- package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
- package/dist/baseline/evaluation/golden-dataset.js +717 -0
- package/dist/baseline/evaluation/index.d.ts +15 -0
- package/dist/baseline/evaluation/index.js +15 -0
- package/dist/baseline/evaluation/types.d.ts +186 -0
- package/dist/baseline/evaluation/types.js +8 -0
- package/dist/baseline/external-dependency-detector.d.ts +181 -0
- package/dist/baseline/external-dependency-detector.js +524 -0
- package/dist/baseline/golden-output.d.ts +162 -0
- package/dist/baseline/golden-output.js +636 -0
- package/dist/baseline/health-scorer.d.ts +174 -0
- package/dist/baseline/health-scorer.js +451 -0
- package/dist/baseline/incremental-checker.d.ts +97 -0
- package/dist/baseline/incremental-checker.js +174 -0
- package/dist/baseline/index.d.ts +31 -0
- package/dist/baseline/index.js +42 -0
- package/dist/baseline/migration-generator.d.ts +137 -0
- package/dist/baseline/migration-generator.js +554 -0
- package/dist/baseline/migrations.d.ts +60 -0
- package/dist/baseline/migrations.js +197 -0
- package/dist/baseline/performance-tracker.d.ts +214 -0
- package/dist/baseline/performance-tracker.js +577 -0
- package/dist/baseline/pr-comment-generator.d.ts +117 -0
- package/dist/baseline/pr-comment-generator.js +546 -0
- package/dist/baseline/response-fingerprint.d.ts +127 -0
- package/dist/baseline/response-fingerprint.js +728 -0
- package/dist/baseline/response-schema-tracker.d.ts +129 -0
- package/dist/baseline/response-schema-tracker.js +420 -0
- package/dist/baseline/risk-scorer.d.ts +54 -0
- package/dist/baseline/risk-scorer.js +434 -0
- package/dist/baseline/saver.d.ts +89 -0
- package/dist/baseline/saver.js +554 -0
- package/dist/baseline/scenario-generator.d.ts +151 -0
- package/dist/baseline/scenario-generator.js +905 -0
- package/dist/baseline/schema-compare.d.ts +86 -0
- package/dist/baseline/schema-compare.js +557 -0
- package/dist/baseline/schema-evolution.d.ts +189 -0
- package/dist/baseline/schema-evolution.js +467 -0
- package/dist/baseline/semantic.d.ts +203 -0
- package/dist/baseline/semantic.js +908 -0
- package/dist/baseline/synonyms.d.ts +60 -0
- package/dist/baseline/synonyms.js +386 -0
- package/dist/baseline/telemetry.d.ts +165 -0
- package/dist/baseline/telemetry.js +294 -0
- package/dist/baseline/test-pruner.d.ts +120 -0
- package/dist/baseline/test-pruner.js +387 -0
- package/dist/baseline/types.d.ts +449 -0
- package/dist/baseline/types.js +5 -0
- package/dist/baseline/version.d.ts +138 -0
- package/dist/baseline/version.js +206 -0
- package/dist/cache/index.d.ts +5 -0
- package/dist/cache/index.js +5 -0
- package/dist/cache/response-cache.d.ts +151 -0
- package/dist/cache/response-cache.js +287 -0
- package/dist/ci/index.d.ts +60 -0
- package/dist/ci/index.js +342 -0
- package/dist/cli/commands/auth.d.ts +12 -0
- package/dist/cli/commands/auth.js +352 -0
- package/dist/cli/commands/badge.d.ts +3 -0
- package/dist/cli/commands/badge.js +74 -0
- package/dist/cli/commands/baseline-accept.d.ts +15 -0
- package/dist/cli/commands/baseline-accept.js +178 -0
- package/dist/cli/commands/baseline-migrate.d.ts +12 -0
- package/dist/cli/commands/baseline-migrate.js +164 -0
- package/dist/cli/commands/baseline.d.ts +14 -0
- package/dist/cli/commands/baseline.js +449 -0
- package/dist/cli/commands/beta.d.ts +10 -0
- package/dist/cli/commands/beta.js +231 -0
- package/dist/cli/commands/check.d.ts +11 -0
- package/dist/cli/commands/check.js +820 -0
- package/dist/cli/commands/cloud/badge.d.ts +3 -0
- package/dist/cli/commands/cloud/badge.js +74 -0
- package/dist/cli/commands/cloud/diff.d.ts +6 -0
- package/dist/cli/commands/cloud/diff.js +79 -0
- package/dist/cli/commands/cloud/history.d.ts +6 -0
- package/dist/cli/commands/cloud/history.js +102 -0
- package/dist/cli/commands/cloud/link.d.ts +9 -0
- package/dist/cli/commands/cloud/link.js +119 -0
- package/dist/cli/commands/cloud/login.d.ts +7 -0
- package/dist/cli/commands/cloud/login.js +499 -0
- package/dist/cli/commands/cloud/projects.d.ts +6 -0
- package/dist/cli/commands/cloud/projects.js +44 -0
- package/dist/cli/commands/cloud/shared.d.ts +7 -0
- package/dist/cli/commands/cloud/shared.js +42 -0
- package/dist/cli/commands/cloud/teams.d.ts +8 -0
- package/dist/cli/commands/cloud/teams.js +169 -0
- package/dist/cli/commands/cloud/upload.d.ts +8 -0
- package/dist/cli/commands/cloud/upload.js +181 -0
- package/dist/cli/commands/contract.d.ts +11 -0
- package/dist/cli/commands/contract.js +280 -0
- package/dist/cli/commands/discover.d.ts +3 -0
- package/dist/cli/commands/discover.js +82 -0
- package/dist/cli/commands/eval.d.ts +9 -0
- package/dist/cli/commands/eval.js +187 -0
- package/dist/cli/commands/explore.d.ts +11 -0
- package/dist/cli/commands/explore.js +437 -0
- package/dist/cli/commands/feedback.d.ts +9 -0
- package/dist/cli/commands/feedback.js +174 -0
- package/dist/cli/commands/golden.d.ts +12 -0
- package/dist/cli/commands/golden.js +407 -0
- package/dist/cli/commands/history.d.ts +10 -0
- package/dist/cli/commands/history.js +202 -0
- package/dist/cli/commands/init.d.ts +9 -0
- package/dist/cli/commands/init.js +219 -0
- package/dist/cli/commands/interview.d.ts +3 -0
- package/dist/cli/commands/interview.js +903 -0
- package/dist/cli/commands/link.d.ts +10 -0
- package/dist/cli/commands/link.js +169 -0
- package/dist/cli/commands/login.d.ts +7 -0
- package/dist/cli/commands/login.js +499 -0
- package/dist/cli/commands/preset.d.ts +33 -0
- package/dist/cli/commands/preset.js +297 -0
- package/dist/cli/commands/profile.d.ts +33 -0
- package/dist/cli/commands/profile.js +286 -0
- package/dist/cli/commands/registry.d.ts +11 -0
- package/dist/cli/commands/registry.js +146 -0
- package/dist/cli/commands/shared.d.ts +79 -0
- package/dist/cli/commands/shared.js +196 -0
- package/dist/cli/commands/teams.d.ts +8 -0
- package/dist/cli/commands/teams.js +169 -0
- package/dist/cli/commands/test.d.ts +9 -0
- package/dist/cli/commands/test.js +500 -0
- package/dist/cli/commands/upload.d.ts +8 -0
- package/dist/cli/commands/upload.js +223 -0
- package/dist/cli/commands/validate-config.d.ts +6 -0
- package/dist/cli/commands/validate-config.js +35 -0
- package/dist/cli/commands/verify.d.ts +11 -0
- package/dist/cli/commands/verify.js +283 -0
- package/dist/cli/commands/watch.d.ts +12 -0
- package/dist/cli/commands/watch.js +253 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.js +178 -0
- package/dist/cli/interactive.d.ts +47 -0
- package/dist/cli/interactive.js +216 -0
- package/dist/cli/output/terminal-reporter.d.ts +19 -0
- package/dist/cli/output/terminal-reporter.js +104 -0
- package/dist/cli/output.d.ts +226 -0
- package/dist/cli/output.js +438 -0
- package/dist/cli/utils/env.d.ts +5 -0
- package/dist/cli/utils/env.js +14 -0
- package/dist/cli/utils/progress.d.ts +59 -0
- package/dist/cli/utils/progress.js +206 -0
- package/dist/cli/utils/server-context.d.ts +10 -0
- package/dist/cli/utils/server-context.js +36 -0
- package/dist/cloud/auth.d.ts +144 -0
- package/dist/cloud/auth.js +374 -0
- package/dist/cloud/client.d.ts +24 -0
- package/dist/cloud/client.js +65 -0
- package/dist/cloud/http-client.d.ts +38 -0
- package/dist/cloud/http-client.js +215 -0
- package/dist/cloud/index.d.ts +23 -0
- package/dist/cloud/index.js +25 -0
- package/dist/cloud/mock-client.d.ts +107 -0
- package/dist/cloud/mock-client.js +545 -0
- package/dist/cloud/types.d.ts +515 -0
- package/dist/cloud/types.js +15 -0
- package/dist/config/defaults.d.ts +160 -0
- package/dist/config/defaults.js +169 -0
- package/dist/config/loader.d.ts +24 -0
- package/dist/config/loader.js +122 -0
- package/dist/config/template.d.ts +42 -0
- package/dist/config/template.js +647 -0
- package/dist/config/validator.d.ts +2112 -0
- package/dist/config/validator.js +658 -0
- package/dist/constants/cloud.d.ts +107 -0
- package/dist/constants/cloud.js +110 -0
- package/dist/constants/core.d.ts +521 -0
- package/dist/constants/core.js +556 -0
- package/dist/constants/testing.d.ts +1283 -0
- package/dist/constants/testing.js +1568 -0
- package/dist/constants.d.ts +10 -0
- package/dist/constants.js +10 -0
- package/dist/contract/index.d.ts +6 -0
- package/dist/contract/index.js +5 -0
- package/dist/contract/validator.d.ts +177 -0
- package/dist/contract/validator.js +574 -0
- package/dist/cost/index.d.ts +6 -0
- package/dist/cost/index.js +5 -0
- package/dist/cost/tracker.d.ts +134 -0
- package/dist/cost/tracker.js +313 -0
- package/dist/discovery/discovery.d.ts +16 -0
- package/dist/discovery/discovery.js +173 -0
- package/dist/discovery/types.d.ts +51 -0
- package/dist/discovery/types.js +2 -0
- package/dist/docs/agents.d.ts +3 -0
- package/dist/docs/agents.js +995 -0
- package/dist/docs/contract.d.ts +51 -0
- package/dist/docs/contract.js +1681 -0
- package/dist/docs/generator.d.ts +4 -0
- package/dist/docs/generator.js +4 -0
- package/dist/docs/html-reporter.d.ts +9 -0
- package/dist/docs/html-reporter.js +757 -0
- package/dist/docs/index.d.ts +10 -0
- package/dist/docs/index.js +11 -0
- package/dist/docs/junit-reporter.d.ts +18 -0
- package/dist/docs/junit-reporter.js +210 -0
- package/dist/docs/report.d.ts +14 -0
- package/dist/docs/report.js +44 -0
- package/dist/docs/sarif-reporter.d.ts +19 -0
- package/dist/docs/sarif-reporter.js +335 -0
- package/dist/docs/shared.d.ts +35 -0
- package/dist/docs/shared.js +162 -0
- package/dist/docs/templates.d.ts +12 -0
- package/dist/docs/templates.js +76 -0
- package/dist/errors/index.d.ts +6 -0
- package/dist/errors/index.js +6 -0
- package/dist/errors/retry.d.ts +92 -0
- package/dist/errors/retry.js +323 -0
- package/dist/errors/types.d.ts +321 -0
- package/dist/errors/types.js +584 -0
- package/dist/index.d.ts +32 -0
- package/dist/index.js +32 -0
- package/dist/interview/dependency-resolver.d.ts +11 -0
- package/dist/interview/dependency-resolver.js +32 -0
- package/dist/interview/interviewer.d.ts +232 -0
- package/dist/interview/interviewer.js +1939 -0
- package/dist/interview/mock-response-generator.d.ts +7 -0
- package/dist/interview/mock-response-generator.js +102 -0
- package/dist/interview/orchestrator.d.ts +237 -0
- package/dist/interview/orchestrator.js +1296 -0
- package/dist/interview/rate-limiter.d.ts +15 -0
- package/dist/interview/rate-limiter.js +55 -0
- package/dist/interview/response-validator.d.ts +10 -0
- package/dist/interview/response-validator.js +132 -0
- package/dist/interview/schema-inferrer.d.ts +8 -0
- package/dist/interview/schema-inferrer.js +71 -0
- package/dist/interview/schema-test-generator.d.ts +71 -0
- package/dist/interview/schema-test-generator.js +834 -0
- package/dist/interview/smart-value-generator.d.ts +155 -0
- package/dist/interview/smart-value-generator.js +554 -0
- package/dist/interview/stateful-test-runner.d.ts +19 -0
- package/dist/interview/stateful-test-runner.js +106 -0
- package/dist/interview/types.d.ts +561 -0
- package/dist/interview/types.js +2 -0
- package/dist/llm/anthropic.d.ts +41 -0
- package/dist/llm/anthropic.js +355 -0
- package/dist/llm/client.d.ts +123 -0
- package/dist/llm/client.js +42 -0
- package/dist/llm/factory.d.ts +38 -0
- package/dist/llm/factory.js +145 -0
- package/dist/llm/fallback.d.ts +140 -0
- package/dist/llm/fallback.js +379 -0
- package/dist/llm/index.d.ts +18 -0
- package/dist/llm/index.js +15 -0
- package/dist/llm/ollama.d.ts +37 -0
- package/dist/llm/ollama.js +330 -0
- package/dist/llm/openai.d.ts +25 -0
- package/dist/llm/openai.js +320 -0
- package/dist/llm/token-budget.d.ts +161 -0
- package/dist/llm/token-budget.js +395 -0
- package/dist/logging/logger.d.ts +70 -0
- package/dist/logging/logger.js +130 -0
- package/dist/metrics/collector.d.ts +106 -0
- package/dist/metrics/collector.js +547 -0
- package/dist/metrics/index.d.ts +7 -0
- package/dist/metrics/index.js +7 -0
- package/dist/metrics/prometheus.d.ts +20 -0
- package/dist/metrics/prometheus.js +241 -0
- package/dist/metrics/types.d.ts +209 -0
- package/dist/metrics/types.js +5 -0
- package/dist/persona/builtins.d.ts +54 -0
- package/dist/persona/builtins.js +219 -0
- package/dist/persona/index.d.ts +8 -0
- package/dist/persona/index.js +8 -0
- package/dist/persona/loader.d.ts +30 -0
- package/dist/persona/loader.js +190 -0
- package/dist/persona/types.d.ts +144 -0
- package/dist/persona/types.js +5 -0
- package/dist/persona/validation.d.ts +94 -0
- package/dist/persona/validation.js +332 -0
- package/dist/prompts/index.d.ts +5 -0
- package/dist/prompts/index.js +5 -0
- package/dist/prompts/templates.d.ts +180 -0
- package/dist/prompts/templates.js +431 -0
- package/dist/registry/client.d.ts +49 -0
- package/dist/registry/client.js +191 -0
- package/dist/registry/index.d.ts +7 -0
- package/dist/registry/index.js +6 -0
- package/dist/registry/types.d.ts +140 -0
- package/dist/registry/types.js +6 -0
- package/dist/scenarios/evaluator.d.ts +43 -0
- package/dist/scenarios/evaluator.js +206 -0
- package/dist/scenarios/index.d.ts +10 -0
- package/dist/scenarios/index.js +9 -0
- package/dist/scenarios/loader.d.ts +20 -0
- package/dist/scenarios/loader.js +285 -0
- package/dist/scenarios/types.d.ts +153 -0
- package/dist/scenarios/types.js +8 -0
- package/dist/security/index.d.ts +17 -0
- package/dist/security/index.js +18 -0
- package/dist/security/payloads.d.ts +61 -0
- package/dist/security/payloads.js +268 -0
- package/dist/security/security-tester.d.ts +42 -0
- package/dist/security/security-tester.js +582 -0
- package/dist/security/types.d.ts +166 -0
- package/dist/security/types.js +8 -0
- package/dist/transport/base-transport.d.ts +59 -0
- package/dist/transport/base-transport.js +38 -0
- package/dist/transport/http-transport.d.ts +67 -0
- package/dist/transport/http-transport.js +238 -0
- package/dist/transport/mcp-client.d.ts +141 -0
- package/dist/transport/mcp-client.js +496 -0
- package/dist/transport/sse-transport.d.ts +88 -0
- package/dist/transport/sse-transport.js +316 -0
- package/dist/transport/stdio-transport.d.ts +43 -0
- package/dist/transport/stdio-transport.js +238 -0
- package/dist/transport/types.d.ts +125 -0
- package/dist/transport/types.js +16 -0
- package/dist/utils/concurrency.d.ts +123 -0
- package/dist/utils/concurrency.js +213 -0
- package/dist/utils/formatters.d.ts +16 -0
- package/dist/utils/formatters.js +37 -0
- package/dist/utils/index.d.ts +8 -0
- package/dist/utils/index.js +8 -0
- package/dist/utils/jsonpath.d.ts +87 -0
- package/dist/utils/jsonpath.js +326 -0
- package/dist/utils/markdown.d.ts +113 -0
- package/dist/utils/markdown.js +265 -0
- package/dist/utils/network.d.ts +14 -0
- package/dist/utils/network.js +17 -0
- package/dist/utils/sanitize.d.ts +92 -0
- package/dist/utils/sanitize.js +191 -0
- package/dist/utils/semantic.d.ts +194 -0
- package/dist/utils/semantic.js +1051 -0
- package/dist/utils/smart-truncate.d.ts +94 -0
- package/dist/utils/smart-truncate.js +361 -0
- package/dist/utils/timeout.d.ts +153 -0
- package/dist/utils/timeout.js +205 -0
- package/dist/utils/yaml-parser.d.ts +58 -0
- package/dist/utils/yaml-parser.js +86 -0
- package/dist/validation/index.d.ts +32 -0
- package/dist/validation/index.js +32 -0
- package/dist/validation/semantic-test-generator.d.ts +50 -0
- package/dist/validation/semantic-test-generator.js +176 -0
- package/dist/validation/semantic-types.d.ts +66 -0
- package/dist/validation/semantic-types.js +94 -0
- package/dist/validation/semantic-validator.d.ts +38 -0
- package/dist/validation/semantic-validator.js +340 -0
- package/dist/verification/index.d.ts +6 -0
- package/dist/verification/index.js +5 -0
- package/dist/verification/types.d.ts +133 -0
- package/dist/verification/types.js +5 -0
- package/dist/verification/verifier.d.ts +30 -0
- package/dist/verification/verifier.js +309 -0
- package/dist/version.d.ts +19 -0
- package/dist/version.js +48 -0
- package/dist/workflow/auto-generator.d.ts +27 -0
- package/dist/workflow/auto-generator.js +513 -0
- package/dist/workflow/discovery.d.ts +40 -0
- package/dist/workflow/discovery.js +195 -0
- package/dist/workflow/executor.d.ts +82 -0
- package/dist/workflow/executor.js +611 -0
- package/dist/workflow/index.d.ts +10 -0
- package/dist/workflow/index.js +10 -0
- package/dist/workflow/loader.d.ts +24 -0
- package/dist/workflow/loader.js +194 -0
- package/dist/workflow/state-tracker.d.ts +98 -0
- package/dist/workflow/state-tracker.js +424 -0
- package/dist/workflow/types.d.ts +337 -0
- package/dist/workflow/types.js +5 -0
- package/package.json +94 -0
- package/schemas/bellwether-check.schema.json +651 -0
|
@@ -0,0 +1,1296 @@
|
|
|
1
|
+
import { DEFAULT_PERSONA } from '../persona/builtins.js';
|
|
2
|
+
import { DEFAULT_SYSTEM_PROMPT, buildQuestionGenerationPrompt, buildResponseAnalysisPrompt, buildToolProfileSynthesisPrompt, buildOverallSynthesisPrompt, buildPromptQuestionGenerationPrompt, buildPromptResponseAnalysisPrompt, buildPromptProfileSynthesisPrompt, COMPLETION_OPTIONS, } from '../prompts/templates.js';
|
|
3
|
+
import { getLogger } from '../logging/logger.js';
|
|
4
|
+
import { withTimeout, DEFAULT_TIMEOUTS, TimeoutError } from '../utils/timeout.js';
|
|
5
|
+
import { RETRY, DISPLAY_LIMITS, ORCHESTRATOR } from '../constants.js';
|
|
6
|
+
/**
|
|
7
|
+
* Categorize an error from LLM operations.
|
|
8
|
+
*/
|
|
9
|
+
function categorizeLLMError(error) {
|
|
10
|
+
if (error instanceof TimeoutError) {
|
|
11
|
+
return { category: 'timeout', isRetryable: true, message: error.message };
|
|
12
|
+
}
|
|
13
|
+
const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase();
|
|
14
|
+
// Check for refusals
|
|
15
|
+
if (message.includes('refused') ||
|
|
16
|
+
message.includes('cannot generate') ||
|
|
17
|
+
message.includes('unable to create') ||
|
|
18
|
+
message.includes('content policy')) {
|
|
19
|
+
return { category: 'refusal', isRetryable: false, message: 'LLM declined to generate content' };
|
|
20
|
+
}
|
|
21
|
+
// Check for rate limits
|
|
22
|
+
if (message.includes('rate limit') ||
|
|
23
|
+
message.includes('429') ||
|
|
24
|
+
message.includes('too many requests')) {
|
|
25
|
+
return { category: 'rate_limit', isRetryable: true, message: 'Rate limit exceeded' };
|
|
26
|
+
}
|
|
27
|
+
// Check for auth errors
|
|
28
|
+
if (message.includes('401') ||
|
|
29
|
+
message.includes('403') ||
|
|
30
|
+
message.includes('unauthorized') ||
|
|
31
|
+
message.includes('authentication') ||
|
|
32
|
+
message.includes('api key')) {
|
|
33
|
+
return { category: 'auth', isRetryable: false, message: 'Authentication error' };
|
|
34
|
+
}
|
|
35
|
+
// Check for network errors
|
|
36
|
+
if (message.includes('econnrefused') ||
|
|
37
|
+
message.includes('econnreset') ||
|
|
38
|
+
message.includes('network') ||
|
|
39
|
+
message.includes('fetch failed') ||
|
|
40
|
+
message.includes('socket')) {
|
|
41
|
+
return { category: 'network', isRetryable: true, message: 'Network error' };
|
|
42
|
+
}
|
|
43
|
+
// Check for empty response (token exhaustion or model issues) - retryable
|
|
44
|
+
if (message.includes('empty or whitespace') ||
|
|
45
|
+
message.includes('token exhaustion') ||
|
|
46
|
+
message.includes('unexpected end of json')) {
|
|
47
|
+
return { category: 'format_error', isRetryable: true, message: 'LLM returned empty response (possible token exhaustion)' };
|
|
48
|
+
}
|
|
49
|
+
// Check for format errors (LLM returned wrong format) - retryable once
|
|
50
|
+
if (message.includes('invalid question format') ||
|
|
51
|
+
message.includes('response was not an array') ||
|
|
52
|
+
message.includes('unexpected token') ||
|
|
53
|
+
message.includes('not valid json')) {
|
|
54
|
+
return { category: 'format_error', isRetryable: true, message: 'LLM returned invalid format' };
|
|
55
|
+
}
|
|
56
|
+
return { category: 'unknown', isRetryable: false, message: error instanceof Error ? error.message : String(error) };
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Orchestrator uses an LLM to generate interview questions and synthesize findings.
|
|
60
|
+
* Optionally accepts a Persona to customize the interview style.
|
|
61
|
+
* Supports streaming output for real-time feedback during LLM operations.
|
|
62
|
+
*/
|
|
63
|
+
export class Orchestrator {
|
|
64
|
+
llm;
|
|
65
|
+
persona;
|
|
66
|
+
serverContext;
|
|
67
|
+
logger = getLogger('orchestrator');
|
|
68
|
+
streamingCallbacks;
|
|
69
|
+
useStreaming = false;
|
|
70
|
+
cache;
|
|
71
|
+
constructor(llm, persona, serverContext, cache) {
|
|
72
|
+
this.llm = llm;
|
|
73
|
+
this.persona = persona ?? DEFAULT_PERSONA;
|
|
74
|
+
this.serverContext = serverContext;
|
|
75
|
+
this.cache = cache;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Enable streaming with callbacks.
|
|
79
|
+
*/
|
|
80
|
+
enableStreaming(callbacks) {
|
|
81
|
+
this.useStreaming = true;
|
|
82
|
+
this.streamingCallbacks = callbacks;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Disable streaming.
|
|
86
|
+
*/
|
|
87
|
+
disableStreaming() {
|
|
88
|
+
this.useStreaming = false;
|
|
89
|
+
this.streamingCallbacks = undefined;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Check if streaming is enabled.
|
|
93
|
+
*/
|
|
94
|
+
isStreamingEnabled() {
|
|
95
|
+
return this.useStreaming && this.llm.getProviderInfo().supportsStreaming;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Create streaming options for an LLM call.
|
|
99
|
+
*/
|
|
100
|
+
createStreamingOptions(operation) {
|
|
101
|
+
if (!this.useStreaming || !this.streamingCallbacks) {
|
|
102
|
+
return {};
|
|
103
|
+
}
|
|
104
|
+
return {
|
|
105
|
+
onChunk: (chunk) => this.streamingCallbacks?.onChunk?.(chunk, operation),
|
|
106
|
+
onComplete: (text) => this.streamingCallbacks?.onComplete?.(text, operation),
|
|
107
|
+
onError: (error) => this.streamingCallbacks?.onError?.(error, operation),
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Complete an LLM call, using streaming if enabled.
|
|
112
|
+
* Falls back to non-streaming if streaming returns empty content.
|
|
113
|
+
*/
|
|
114
|
+
async completeWithStreaming(prompt, options, operation) {
|
|
115
|
+
if (this.isStreamingEnabled()) {
|
|
116
|
+
this.streamingCallbacks?.onStart?.(operation);
|
|
117
|
+
const streamingOpts = this.createStreamingOptions(operation);
|
|
118
|
+
try {
|
|
119
|
+
const result = await this.llm.stream(prompt, { ...options, ...streamingOpts });
|
|
120
|
+
// If streaming returned empty/incomplete, fall back to non-streaming
|
|
121
|
+
if (!result.completed || !result.text) {
|
|
122
|
+
this.logger.warn({ operation }, 'Streaming returned empty, falling back to non-streaming');
|
|
123
|
+
// Notify callbacks about the fallback
|
|
124
|
+
this.streamingCallbacks?.onError?.(new Error('Streaming returned empty content'), operation);
|
|
125
|
+
return this.llm.complete(prompt, options);
|
|
126
|
+
}
|
|
127
|
+
return result.text;
|
|
128
|
+
}
|
|
129
|
+
catch (error) {
|
|
130
|
+
// On streaming error, fall back to non-streaming
|
|
131
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
132
|
+
this.logger.warn({ operation, error: errorMessage }, 'Streaming failed, falling back to non-streaming');
|
|
133
|
+
// Notify callbacks about the error
|
|
134
|
+
this.streamingCallbacks?.onError?.(error instanceof Error ? error : new Error(errorMessage), operation);
|
|
135
|
+
return this.llm.complete(prompt, options);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
return this.llm.complete(prompt, options);
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Set server context for contextually appropriate question generation.
|
|
142
|
+
*/
|
|
143
|
+
setServerContext(context) {
|
|
144
|
+
this.serverContext = context;
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Get the current server context.
|
|
148
|
+
*/
|
|
149
|
+
getServerContext() {
|
|
150
|
+
return this.serverContext;
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Get the current persona.
|
|
154
|
+
*/
|
|
155
|
+
getPersona() {
|
|
156
|
+
return this.persona;
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Set a new persona for subsequent operations.
|
|
160
|
+
*/
|
|
161
|
+
setPersona(persona) {
|
|
162
|
+
this.persona = persona;
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Get the system prompt, combining persona prompt with additional context.
|
|
166
|
+
*/
|
|
167
|
+
getSystemPrompt() {
|
|
168
|
+
let prompt = this.persona.systemPrompt || DEFAULT_SYSTEM_PROMPT;
|
|
169
|
+
if (this.persona.additionalContext) {
|
|
170
|
+
prompt += `\n\n${this.persona.additionalContext}`;
|
|
171
|
+
}
|
|
172
|
+
return prompt;
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Get categories to focus on based on persona bias.
|
|
176
|
+
*/
|
|
177
|
+
getCategoryDistribution(maxQuestions) {
|
|
178
|
+
const bias = this.persona.questionBias;
|
|
179
|
+
const categories = [];
|
|
180
|
+
// Build weighted distribution
|
|
181
|
+
const weights = [
|
|
182
|
+
['happy_path', bias.happyPath],
|
|
183
|
+
['edge_case', bias.edgeCase],
|
|
184
|
+
['error_handling', bias.errorHandling],
|
|
185
|
+
['boundary', bias.boundary],
|
|
186
|
+
];
|
|
187
|
+
if (bias.security && bias.security > 0) {
|
|
188
|
+
weights.push(['security', bias.security]);
|
|
189
|
+
}
|
|
190
|
+
// Normalize weights
|
|
191
|
+
const totalWeight = weights.reduce((sum, [, w]) => sum + w, 0);
|
|
192
|
+
// Distribute questions based on weights
|
|
193
|
+
for (let i = 0; i < maxQuestions; i++) {
|
|
194
|
+
let random = Math.random() * totalWeight;
|
|
195
|
+
for (const [category, weight] of weights) {
|
|
196
|
+
random -= weight;
|
|
197
|
+
if (random <= 0) {
|
|
198
|
+
categories.push(category);
|
|
199
|
+
break;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
// Fallback if rounding issues
|
|
203
|
+
if (categories.length <= i) {
|
|
204
|
+
categories.push('happy_path');
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
return categories;
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Generate interview questions for a tool.
|
|
211
|
+
* Optionally accepts previous errors to learn from and avoid.
|
|
212
|
+
*
|
|
213
|
+
* Error handling strategy:
|
|
214
|
+
* - Refusals: Use fallback questions (no retry)
|
|
215
|
+
* - Rate limits: Retry with backoff
|
|
216
|
+
* - Timeouts: Retry once, then fallback
|
|
217
|
+
* - Auth errors: Log and use fallback (no retry)
|
|
218
|
+
* - Network errors: Retry with backoff
|
|
219
|
+
* - Unknown errors: Log and use fallback
|
|
220
|
+
*/
|
|
221
|
+
async generateQuestions(tool, maxQuestions = 3, skipErrorTests = false, previousErrors) {
|
|
222
|
+
// Get category distribution based on persona bias
|
|
223
|
+
const targetCategories = this.getCategoryDistribution(maxQuestions);
|
|
224
|
+
const categoryCounts = targetCategories.reduce((acc, cat) => {
|
|
225
|
+
acc[cat] = (acc[cat] || 0) + 1;
|
|
226
|
+
return acc;
|
|
227
|
+
}, {});
|
|
228
|
+
const categoryGuidance = Object.entries(categoryCounts)
|
|
229
|
+
.map(([cat, count]) => `${count} ${cat.replace('_', ' ')} example(s)`)
|
|
230
|
+
.join(', ');
|
|
231
|
+
// Build category list including security if persona uses it
|
|
232
|
+
let categoryList = '"happy_path" (normal usage), "edge_case" (boundary values), "error_handling" (incomplete inputs), "boundary" (limits)';
|
|
233
|
+
if (this.persona.questionBias.security && this.persona.questionBias.security > 0) {
|
|
234
|
+
categoryList += ', "security" (security testing)';
|
|
235
|
+
}
|
|
236
|
+
const prompt = buildQuestionGenerationPrompt({
|
|
237
|
+
tool,
|
|
238
|
+
maxQuestions,
|
|
239
|
+
categoryGuidance,
|
|
240
|
+
categoryList,
|
|
241
|
+
skipErrorTests,
|
|
242
|
+
serverContext: this.serverContext,
|
|
243
|
+
previousErrors,
|
|
244
|
+
});
|
|
245
|
+
// Retry logic for transient errors
|
|
246
|
+
const maxRetries = 2;
|
|
247
|
+
let lastError;
|
|
248
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
249
|
+
let rawResponse;
|
|
250
|
+
try {
|
|
251
|
+
// Apply timeout to LLM call - use streaming if enabled
|
|
252
|
+
const response = await withTimeout(this.completeWithStreaming(prompt, {
|
|
253
|
+
...COMPLETION_OPTIONS.questionGeneration,
|
|
254
|
+
systemPrompt: this.getSystemPrompt(),
|
|
255
|
+
}, `generate-questions:${tool.name}`), DEFAULT_TIMEOUTS.questionGeneration, `Question generation for ${tool.name}`);
|
|
256
|
+
rawResponse = response;
|
|
257
|
+
// Check for empty/whitespace-only responses (common with token exhaustion)
|
|
258
|
+
const trimmed = response.trim();
|
|
259
|
+
if (!trimmed || /^[\s\t\n]+$/.test(response)) {
|
|
260
|
+
throw new Error('LLM returned empty or whitespace-only response (possible token exhaustion)');
|
|
261
|
+
}
|
|
262
|
+
const parsed = this.llm.parseJSON(response);
|
|
263
|
+
// Handle different response formats
|
|
264
|
+
if (Array.isArray(parsed)) {
|
|
265
|
+
// Ideal case: LLM returned an array
|
|
266
|
+
return parsed.slice(0, maxQuestions);
|
|
267
|
+
}
|
|
268
|
+
// Check if it's a single valid question object (LLM sometimes returns single objects)
|
|
269
|
+
const obj = parsed;
|
|
270
|
+
if (obj.description && obj.category && obj.args !== undefined) {
|
|
271
|
+
// It's a valid question object, wrap it in an array
|
|
272
|
+
this.logger.debug({ tool: tool.name }, 'LLM returned single question object, wrapping in array');
|
|
273
|
+
return [parsed];
|
|
274
|
+
}
|
|
275
|
+
// Check for wrapped array format (e.g., {"test_cases": [...]} or {"questions": [...]})
|
|
276
|
+
// Some models wrap the array in an object instead of returning bare array
|
|
277
|
+
const wrapperKeys = ['test_cases', 'questions', 'tests', 'items', 'data'];
|
|
278
|
+
for (const key of wrapperKeys) {
|
|
279
|
+
if (Array.isArray(obj[key])) {
|
|
280
|
+
this.logger.debug({ tool: tool.name, wrapperKey: key }, 'LLM wrapped array in object, unwrapping');
|
|
281
|
+
return obj[key].slice(0, maxQuestions);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
// It's an error object or invalid format
|
|
285
|
+
const errorMsg = parsed?.error ?? 'Response was not a valid question format';
|
|
286
|
+
throw new Error(`Invalid question format: ${errorMsg}`);
|
|
287
|
+
}
|
|
288
|
+
catch (error) {
|
|
289
|
+
const categorized = categorizeLLMError(error);
|
|
290
|
+
lastError = { category: categorized.category, message: categorized.message };
|
|
291
|
+
this.logger.warn({
|
|
292
|
+
tool: tool.name,
|
|
293
|
+
attempt: attempt + 1,
|
|
294
|
+
maxRetries: maxRetries + 1,
|
|
295
|
+
errorCategory: categorized.category,
|
|
296
|
+
errorMessage: categorized.message,
|
|
297
|
+
isRetryable: categorized.isRetryable,
|
|
298
|
+
rawResponse: rawResponse?.substring(0, 1000),
|
|
299
|
+
}, 'Question generation failed');
|
|
300
|
+
// Don't retry non-retryable errors
|
|
301
|
+
if (!categorized.isRetryable) {
|
|
302
|
+
break;
|
|
303
|
+
}
|
|
304
|
+
// Wait before retry with exponential backoff
|
|
305
|
+
if (attempt < maxRetries) {
|
|
306
|
+
const delay = Math.min(RETRY.INITIAL_DELAY * Math.pow(2, attempt), RETRY.MAX_DELAY);
|
|
307
|
+
await new Promise(resolve => setTimeout(resolve, delay));
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
// Log the final failure reason clearly
|
|
312
|
+
this.logger.info({
|
|
313
|
+
tool: tool.name,
|
|
314
|
+
reason: lastError?.category ?? 'unknown',
|
|
315
|
+
message: lastError?.message ?? 'No error details',
|
|
316
|
+
}, 'Using fallback questions after LLM failure');
|
|
317
|
+
// Slice fallback questions to respect maxQuestions limit
|
|
318
|
+
return this.generateStructuralTestCases(tool, skipErrorTests).slice(0, maxQuestions);
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Analyze a tool response and generate behavioral notes.
|
|
322
|
+
* Uses cache to avoid redundant LLM calls for identical tool responses.
|
|
323
|
+
*/
|
|
324
|
+
async analyzeResponse(tool, question, response, error) {
|
|
325
|
+
// Check cache first
|
|
326
|
+
if (this.cache && response) {
|
|
327
|
+
const responseHash = this.cache.hashResponse(response);
|
|
328
|
+
const cachedAnalysis = this.cache.getAnalysis(tool.name, question.args, responseHash);
|
|
329
|
+
if (cachedAnalysis) {
|
|
330
|
+
this.logger.debug({ tool: tool.name, args: question.args }, 'LLM analysis served from cache');
|
|
331
|
+
return cachedAnalysis;
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
const prompt = buildResponseAnalysisPrompt({
|
|
335
|
+
tool,
|
|
336
|
+
question,
|
|
337
|
+
response,
|
|
338
|
+
error,
|
|
339
|
+
persona: this.persona,
|
|
340
|
+
});
|
|
341
|
+
try {
|
|
342
|
+
const analysis = await this.completeWithStreaming(prompt, {
|
|
343
|
+
...COMPLETION_OPTIONS.responseAnalysis,
|
|
344
|
+
systemPrompt: this.getSystemPrompt(),
|
|
345
|
+
}, `analyze:${tool.name}`);
|
|
346
|
+
// Cache successful analysis
|
|
347
|
+
if (this.cache && response && analysis) {
|
|
348
|
+
const responseHash = this.cache.hashResponse(response);
|
|
349
|
+
this.cache.setAnalysis(tool.name, question.args, responseHash, analysis);
|
|
350
|
+
this.logger.debug({ tool: tool.name, args: question.args }, 'LLM analysis cached');
|
|
351
|
+
}
|
|
352
|
+
return analysis;
|
|
353
|
+
}
|
|
354
|
+
catch (llmError) {
|
|
355
|
+
// Graceful fallback if LLM refuses or fails
|
|
356
|
+
this.logger.debug({
|
|
357
|
+
tool: tool.name,
|
|
358
|
+
error: llmError instanceof Error ? llmError.message : String(llmError),
|
|
359
|
+
}, 'LLM analysis failed, using fallback');
|
|
360
|
+
if (error) {
|
|
361
|
+
return `Tool returned an error: ${error}`;
|
|
362
|
+
}
|
|
363
|
+
if (response?.content) {
|
|
364
|
+
const textContent = response.content.find(c => c.type === 'text');
|
|
365
|
+
if (textContent && 'text' in textContent) {
|
|
366
|
+
return `Tool returned: ${String(textContent.text).substring(0, DISPLAY_LIMITS.TOOL_RESPONSE_PREVIEW)}`;
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
return 'Tool executed successfully.';
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
/**
|
|
373
|
+
* Synthesize findings for a single tool into a profile.
|
|
374
|
+
*/
|
|
375
|
+
async synthesizeToolProfile(tool, interactions) {
|
|
376
|
+
const prompt = buildToolProfileSynthesisPrompt({ tool, interactions });
|
|
377
|
+
try {
|
|
378
|
+
const response = await this.completeWithStreaming(prompt, {
|
|
379
|
+
...COMPLETION_OPTIONS.profileSynthesis,
|
|
380
|
+
systemPrompt: this.getSystemPrompt(),
|
|
381
|
+
}, `synthesize-tool:${tool.name}`);
|
|
382
|
+
const result = this.llm.parseJSON(response);
|
|
383
|
+
return {
|
|
384
|
+
name: tool.name,
|
|
385
|
+
description: tool.description ?? 'No description provided',
|
|
386
|
+
behavioralNotes: result.behavioralNotes ?? [],
|
|
387
|
+
limitations: result.limitations ?? [],
|
|
388
|
+
securityNotes: result.securityNotes ?? [],
|
|
389
|
+
};
|
|
390
|
+
}
|
|
391
|
+
catch (error) {
|
|
392
|
+
// Graceful fallback if LLM fails or refuses
|
|
393
|
+
const reason = error instanceof Error ? error.message : '';
|
|
394
|
+
if (reason.includes('refused')) {
|
|
395
|
+
this.logger.info({ tool: tool.name }, 'Using basic profile (LLM declined)');
|
|
396
|
+
}
|
|
397
|
+
return {
|
|
398
|
+
name: tool.name,
|
|
399
|
+
description: tool.description ?? 'No description provided',
|
|
400
|
+
behavioralNotes: interactions.map(i => i.analysis).filter(a => a),
|
|
401
|
+
limitations: [],
|
|
402
|
+
securityNotes: [],
|
|
403
|
+
};
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
/**
|
|
407
|
+
* Generate overall summary for the interview result.
|
|
408
|
+
*/
|
|
409
|
+
async synthesizeOverall(discovery, toolProfiles) {
|
|
410
|
+
const prompt = buildOverallSynthesisPrompt({ discovery, toolProfiles });
|
|
411
|
+
try {
|
|
412
|
+
const response = await this.completeWithStreaming(prompt, {
|
|
413
|
+
...COMPLETION_OPTIONS.overallSynthesis,
|
|
414
|
+
systemPrompt: this.getSystemPrompt(),
|
|
415
|
+
}, 'synthesize-overall');
|
|
416
|
+
return this.llm.parseJSON(response);
|
|
417
|
+
}
|
|
418
|
+
catch (error) {
|
|
419
|
+
// Graceful fallback if LLM fails or refuses
|
|
420
|
+
const reason = error instanceof Error ? error.message : '';
|
|
421
|
+
if (reason.includes('refused')) {
|
|
422
|
+
this.logger.info({}, 'Using basic summary (LLM declined)');
|
|
423
|
+
}
|
|
424
|
+
return {
|
|
425
|
+
summary: `${discovery.serverInfo.name} provides ${discovery.tools.length} tools for MCP integration.`,
|
|
426
|
+
limitations: [],
|
|
427
|
+
recommendations: [],
|
|
428
|
+
};
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
/**
|
|
432
|
+
* Get fallback questions without LLM call (for fast CI mode).
|
|
433
|
+
* Enhanced to generate comprehensive test cases from schema analysis.
|
|
434
|
+
*/
|
|
435
|
+
getFallbackQuestions(tool, skipErrorTests) {
|
|
436
|
+
return this.generateStructuralTestCases(tool, skipErrorTests);
|
|
437
|
+
}
|
|
438
|
+
/**
|
|
439
|
+
* Generate comprehensive test cases for check mode.
|
|
440
|
+
* Analyzes schema to create meaningful tests without LLM.
|
|
441
|
+
*/
|
|
442
|
+
generateStructuralTestCases(tool, skipErrorTests) {
|
|
443
|
+
const happyPathTests = [];
|
|
444
|
+
const edgeCaseTests = [];
|
|
445
|
+
const errorTests = [];
|
|
446
|
+
const schema = tool.inputSchema;
|
|
447
|
+
const seenArgsHashes = new Set();
|
|
448
|
+
// Helper to avoid duplicate test cases
|
|
449
|
+
const addQuestion = (q, list) => {
|
|
450
|
+
const hash = JSON.stringify(q.args);
|
|
451
|
+
if (seenArgsHashes.has(hash))
|
|
452
|
+
return false;
|
|
453
|
+
seenArgsHashes.add(hash);
|
|
454
|
+
list.push(q);
|
|
455
|
+
return true;
|
|
456
|
+
};
|
|
457
|
+
// Schema-level examples take highest priority (author-provided)
|
|
458
|
+
if (schema?.examples && Array.isArray(schema.examples)) {
|
|
459
|
+
for (const example of schema.examples.slice(0, ORCHESTRATOR.MAX_SCHEMA_EXAMPLES)) {
|
|
460
|
+
if (example && typeof example === 'object') {
|
|
461
|
+
addQuestion({
|
|
462
|
+
description: 'Test with schema-provided example',
|
|
463
|
+
category: 'happy_path',
|
|
464
|
+
args: example,
|
|
465
|
+
}, happyPathTests);
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
if (schema?.default && typeof schema.default === 'object') {
|
|
470
|
+
addQuestion({
|
|
471
|
+
description: 'Test with schema default values',
|
|
472
|
+
category: 'happy_path',
|
|
473
|
+
args: schema.default,
|
|
474
|
+
}, happyPathTests);
|
|
475
|
+
}
|
|
476
|
+
const defaultArgs = this.buildArgsFromDefaults(schema);
|
|
477
|
+
if (Object.keys(defaultArgs).length > 0) {
|
|
478
|
+
addQuestion({
|
|
479
|
+
description: 'Test with property default values',
|
|
480
|
+
category: 'happy_path',
|
|
481
|
+
args: defaultArgs,
|
|
482
|
+
}, happyPathTests);
|
|
483
|
+
}
|
|
484
|
+
const exampleArgs = this.buildArgsFromExamples(schema);
|
|
485
|
+
if (Object.keys(exampleArgs).length > 0) {
|
|
486
|
+
addQuestion({
|
|
487
|
+
description: 'Test with property example values',
|
|
488
|
+
category: 'happy_path',
|
|
489
|
+
args: exampleArgs,
|
|
490
|
+
}, happyPathTests);
|
|
491
|
+
}
|
|
492
|
+
const smartArgs = this.buildSmartDefaultArgs(schema);
|
|
493
|
+
addQuestion({
|
|
494
|
+
description: 'Basic functionality test with required parameters',
|
|
495
|
+
category: 'happy_path',
|
|
496
|
+
args: smartArgs,
|
|
497
|
+
}, happyPathTests);
|
|
498
|
+
const enumTests = this.generateEnumTests(schema, smartArgs);
|
|
499
|
+
for (const test of enumTests.slice(0, ORCHESTRATOR.MAX_ENUM_TESTS)) {
|
|
500
|
+
addQuestion(test, happyPathTests);
|
|
501
|
+
}
|
|
502
|
+
const boundaryTests = this.generateBoundaryTests(schema, smartArgs);
|
|
503
|
+
for (const test of boundaryTests.slice(0, ORCHESTRATOR.MAX_BOUNDARY_TESTS)) {
|
|
504
|
+
addQuestion(test, edgeCaseTests);
|
|
505
|
+
}
|
|
506
|
+
const optionalTests = this.generateOptionalParamTests(schema);
|
|
507
|
+
for (const test of optionalTests.slice(0, ORCHESTRATOR.MAX_OPTIONAL_TESTS)) {
|
|
508
|
+
addQuestion(test, happyPathTests);
|
|
509
|
+
}
|
|
510
|
+
if (!skipErrorTests) {
|
|
511
|
+
// Empty args (missing required)
|
|
512
|
+
addQuestion({
|
|
513
|
+
description: 'Test with empty/missing parameters',
|
|
514
|
+
category: 'error_handling',
|
|
515
|
+
args: {},
|
|
516
|
+
}, errorTests);
|
|
517
|
+
// Invalid type tests
|
|
518
|
+
const invalidTests = this.generateInvalidTypeTests(schema);
|
|
519
|
+
for (const test of invalidTests.slice(0, ORCHESTRATOR.MAX_INVALID_TYPE_TESTS)) {
|
|
520
|
+
addQuestion(test, errorTests);
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
// Combine tests, ensuring a balanced mix:
|
|
524
|
+
// - At least 1 error handling test if not skipped (put early to ensure inclusion when sliced)
|
|
525
|
+
// - Then happy path tests
|
|
526
|
+
// - Then edge case tests
|
|
527
|
+
// - Then remaining error tests
|
|
528
|
+
const questions = [];
|
|
529
|
+
// Add first happy path test (if any)
|
|
530
|
+
if (happyPathTests.length > 0) {
|
|
531
|
+
questions.push(happyPathTests[0]);
|
|
532
|
+
}
|
|
533
|
+
// Add first error handling test (if any and not skipped)
|
|
534
|
+
if (errorTests.length > 0) {
|
|
535
|
+
questions.push(errorTests[0]);
|
|
536
|
+
}
|
|
537
|
+
// Add remaining happy path tests
|
|
538
|
+
for (let i = 1; i < happyPathTests.length; i++) {
|
|
539
|
+
questions.push(happyPathTests[i]);
|
|
540
|
+
}
|
|
541
|
+
// Add edge case tests
|
|
542
|
+
for (const test of edgeCaseTests) {
|
|
543
|
+
questions.push(test);
|
|
544
|
+
}
|
|
545
|
+
// Add remaining error tests
|
|
546
|
+
for (let i = 1; i < errorTests.length; i++) {
|
|
547
|
+
questions.push(errorTests[i]);
|
|
548
|
+
}
|
|
549
|
+
return questions;
|
|
550
|
+
}
|
|
551
|
+
/**
|
|
552
|
+
* Build args from property-level default values.
|
|
553
|
+
* Also includes required parameters with smart defaults.
|
|
554
|
+
*/
|
|
555
|
+
buildArgsFromDefaults(schema) {
|
|
556
|
+
const args = {};
|
|
557
|
+
if (!schema?.properties)
|
|
558
|
+
return args;
|
|
559
|
+
const required = schema.required ?? [];
|
|
560
|
+
let hasDefaults = false;
|
|
561
|
+
for (const [name, prop] of Object.entries(schema.properties)) {
|
|
562
|
+
if (prop.default !== undefined) {
|
|
563
|
+
args[name] = prop.default;
|
|
564
|
+
hasDefaults = true;
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
if (hasDefaults) {
|
|
568
|
+
for (const param of required) {
|
|
569
|
+
if (args[param] === undefined && schema.properties[param]) {
|
|
570
|
+
args[param] = this.generateSmartValue(param, schema.properties[param]);
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
return args;
|
|
575
|
+
}
|
|
576
|
+
/**
|
|
577
|
+
* Build args from property-level example values.
|
|
578
|
+
* Also includes required parameters with smart defaults.
|
|
579
|
+
*/
|
|
580
|
+
buildArgsFromExamples(schema) {
|
|
581
|
+
const args = {};
|
|
582
|
+
if (!schema?.properties)
|
|
583
|
+
return args;
|
|
584
|
+
const required = schema.required ?? [];
|
|
585
|
+
let hasExamples = false;
|
|
586
|
+
for (const [name, prop] of Object.entries(schema.properties)) {
|
|
587
|
+
if (prop.examples && prop.examples.length > 0) {
|
|
588
|
+
args[name] = prop.examples[0];
|
|
589
|
+
hasExamples = true;
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
if (hasExamples) {
|
|
593
|
+
for (const param of required) {
|
|
594
|
+
if (args[param] === undefined && schema.properties[param]) {
|
|
595
|
+
args[param] = this.generateSmartValue(param, schema.properties[param]);
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
return args;
|
|
600
|
+
}
|
|
601
|
+
/**
|
|
602
|
+
* Build smart default args based on parameter analysis.
|
|
603
|
+
*/
|
|
604
|
+
buildSmartDefaultArgs(schema) {
|
|
605
|
+
const args = {};
|
|
606
|
+
if (!schema?.properties)
|
|
607
|
+
return args;
|
|
608
|
+
const required = schema.required ?? [];
|
|
609
|
+
for (const param of required) {
|
|
610
|
+
const prop = schema.properties[param];
|
|
611
|
+
if (prop) {
|
|
612
|
+
args[param] = this.generateSmartValue(param, prop);
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
return args;
|
|
616
|
+
}
|
|
617
|
+
/**
|
|
618
|
+
* Generate a smart value for a parameter based on comprehensive schema analysis.
|
|
619
|
+
* @param depth - Current recursion depth for circular schema protection
|
|
620
|
+
*/
|
|
621
|
+
generateSmartValue(paramName, schema, depth = 0) {
|
|
622
|
+
// Prevent infinite recursion with circular schemas
|
|
623
|
+
if (depth > ORCHESTRATOR.MAX_SCHEMA_RECURSION_DEPTH) {
|
|
624
|
+
this.logger.debug({ paramName, depth }, 'Max schema recursion depth reached');
|
|
625
|
+
return null;
|
|
626
|
+
}
|
|
627
|
+
// Priority: const > default > examples > enum > oneOf/anyOf > type-based generation
|
|
628
|
+
if (schema.const !== undefined)
|
|
629
|
+
return schema.const;
|
|
630
|
+
if (schema.default !== undefined)
|
|
631
|
+
return schema.default;
|
|
632
|
+
if (schema.examples && schema.examples.length > 0)
|
|
633
|
+
return schema.examples[0];
|
|
634
|
+
if (schema.enum && schema.enum.length > 0)
|
|
635
|
+
return schema.enum[0];
|
|
636
|
+
if (schema.oneOf && schema.oneOf.length > 0) {
|
|
637
|
+
return this.generateSmartValue(paramName, schema.oneOf[0], depth + 1);
|
|
638
|
+
}
|
|
639
|
+
if (schema.anyOf && schema.anyOf.length > 0) {
|
|
640
|
+
return this.generateSmartValue(paramName, schema.anyOf[0], depth + 1);
|
|
641
|
+
}
|
|
642
|
+
const type = this.getSchemaType(schema.type);
|
|
643
|
+
switch (type) {
|
|
644
|
+
case 'string':
|
|
645
|
+
return this.generateSmartString(paramName, schema);
|
|
646
|
+
case 'number':
|
|
647
|
+
return this.generateSmartNumber(schema, false);
|
|
648
|
+
case 'integer':
|
|
649
|
+
return this.generateSmartNumber(schema, true);
|
|
650
|
+
case 'boolean':
|
|
651
|
+
return true;
|
|
652
|
+
case 'array':
|
|
653
|
+
return this.generateSmartArray(paramName, schema, depth + 1);
|
|
654
|
+
case 'object':
|
|
655
|
+
return this.generateSmartObject(schema, depth + 1);
|
|
656
|
+
case 'null':
|
|
657
|
+
return null;
|
|
658
|
+
default:
|
|
659
|
+
// Infer from parameter name
|
|
660
|
+
return this.inferValueFromName(paramName);
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
/**
|
|
664
|
+
* Generate a smart string value based on format, pattern, and name hints.
|
|
665
|
+
* Works with or without a schema - when schema is absent, uses name-based inference only.
|
|
666
|
+
*/
|
|
667
|
+
generateSmartString(paramName, schema) {
|
|
668
|
+
const lowerName = paramName.toLowerCase();
|
|
669
|
+
const description = (schema?.description ?? '').toLowerCase();
|
|
670
|
+
// Check format first (if schema provided)
|
|
671
|
+
if (schema?.format) {
|
|
672
|
+
switch (schema.format) {
|
|
673
|
+
case 'date':
|
|
674
|
+
return '2024-01-15';
|
|
675
|
+
case 'date-time':
|
|
676
|
+
return '2024-01-15T10:30:00Z';
|
|
677
|
+
case 'time':
|
|
678
|
+
return '10:30:00';
|
|
679
|
+
case 'email':
|
|
680
|
+
return 'test@example.com';
|
|
681
|
+
case 'uri':
|
|
682
|
+
case 'url':
|
|
683
|
+
return this.serverContext?.allowedHosts?.[0] ?? 'https://example.com';
|
|
684
|
+
case 'uuid':
|
|
685
|
+
return '550e8400-e29b-41d4-a716-446655440000';
|
|
686
|
+
case 'hostname':
|
|
687
|
+
return 'example.com';
|
|
688
|
+
case 'ipv4':
|
|
689
|
+
return '127.0.0.1';
|
|
690
|
+
case 'ipv6':
|
|
691
|
+
return '::1';
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
// Check name-based hints
|
|
695
|
+
if (lowerName.includes('path') || lowerName.includes('file')) {
|
|
696
|
+
const baseDir = this.serverContext?.allowedDirectories?.[0] ?? '/tmp';
|
|
697
|
+
if (lowerName.includes('dir') || lowerName.includes('directory') || lowerName.includes('folder')) {
|
|
698
|
+
return baseDir;
|
|
699
|
+
}
|
|
700
|
+
return `${baseDir}/test.txt`;
|
|
701
|
+
}
|
|
702
|
+
if (lowerName.includes('url') || lowerName.includes('uri') || lowerName.includes('endpoint')) {
|
|
703
|
+
return this.serverContext?.allowedHosts?.[0] ?? 'https://example.com/api';
|
|
704
|
+
}
|
|
705
|
+
if (lowerName.includes('email')) {
|
|
706
|
+
return 'test@example.com';
|
|
707
|
+
}
|
|
708
|
+
if (lowerName.includes('phone') || lowerName.includes('tel')) {
|
|
709
|
+
return '+1-555-123-4567';
|
|
710
|
+
}
|
|
711
|
+
if (lowerName.includes('id') || lowerName.includes('key') || lowerName.includes('token')) {
|
|
712
|
+
return 'test-id-12345';
|
|
713
|
+
}
|
|
714
|
+
if (lowerName.includes('name')) {
|
|
715
|
+
if (lowerName.includes('user') || lowerName.includes('author')) {
|
|
716
|
+
return 'Test User';
|
|
717
|
+
}
|
|
718
|
+
return 'test-name';
|
|
719
|
+
}
|
|
720
|
+
if (lowerName.includes('query') || lowerName.includes('search') || lowerName.includes('filter')) {
|
|
721
|
+
// Use a more realistic search term based on description
|
|
722
|
+
if (description.includes('movie') || description.includes('film')) {
|
|
723
|
+
return 'The Matrix';
|
|
724
|
+
}
|
|
725
|
+
if (description.includes('music') || description.includes('song') || description.includes('artist')) {
|
|
726
|
+
return 'Beatles';
|
|
727
|
+
}
|
|
728
|
+
if (description.includes('book') || description.includes('author')) {
|
|
729
|
+
return 'Tolkien';
|
|
730
|
+
}
|
|
731
|
+
return 'example query';
|
|
732
|
+
}
|
|
733
|
+
if (lowerName.includes('title')) {
|
|
734
|
+
return 'Test Title';
|
|
735
|
+
}
|
|
736
|
+
if (lowerName.includes('description') || lowerName.includes('summary') || lowerName.includes('text')) {
|
|
737
|
+
return 'This is a test description for validation purposes.';
|
|
738
|
+
}
|
|
739
|
+
if (lowerName.includes('content') || lowerName.includes('body') || lowerName.includes('message')) {
|
|
740
|
+
return 'Test content for the operation.';
|
|
741
|
+
}
|
|
742
|
+
if (lowerName.includes('comment')) {
|
|
743
|
+
return 'This is a test comment.';
|
|
744
|
+
}
|
|
745
|
+
if (lowerName.includes('code') || lowerName.includes('snippet')) {
|
|
746
|
+
return 'function example() { return "Hello"; }';
|
|
747
|
+
}
|
|
748
|
+
if (lowerName.includes('pattern') || lowerName.includes('glob') || lowerName.includes('regex')) {
|
|
749
|
+
return '*.txt';
|
|
750
|
+
}
|
|
751
|
+
if (lowerName.includes('format') || lowerName.includes('type')) {
|
|
752
|
+
return 'json';
|
|
753
|
+
}
|
|
754
|
+
if (lowerName.includes('lang') || lowerName.includes('locale')) {
|
|
755
|
+
return 'en-US';
|
|
756
|
+
}
|
|
757
|
+
if (lowerName.includes('date')) {
|
|
758
|
+
return '2024-01-15';
|
|
759
|
+
}
|
|
760
|
+
if (lowerName.includes('time')) {
|
|
761
|
+
return '10:30:00';
|
|
762
|
+
}
|
|
763
|
+
// Respect minLength/maxLength if specified in schema
|
|
764
|
+
let value = 'test-value';
|
|
765
|
+
if (schema?.minLength && value.length < schema.minLength) {
|
|
766
|
+
value = value.padEnd(schema.minLength, '-');
|
|
767
|
+
}
|
|
768
|
+
if (schema?.maxLength && value.length > schema.maxLength) {
|
|
769
|
+
value = value.slice(0, schema.maxLength);
|
|
770
|
+
}
|
|
771
|
+
return value;
|
|
772
|
+
}
|
|
773
|
+
/**
|
|
774
|
+
* Generate a smart number value respecting constraints.
|
|
775
|
+
*/
|
|
776
|
+
generateSmartNumber(schema, isInteger) {
|
|
777
|
+
let min = schema.minimum ?? schema.exclusiveMinimum ?? ORCHESTRATOR.DEFAULT_NUMBER_MIN;
|
|
778
|
+
let max = schema.maximum ?? schema.exclusiveMaximum ?? ORCHESTRATOR.DEFAULT_NUMBER_MAX;
|
|
779
|
+
// Adjust for exclusive bounds
|
|
780
|
+
if (schema.exclusiveMinimum !== undefined) {
|
|
781
|
+
min = isInteger ? Math.floor(min) + 1 : min + 0.1;
|
|
782
|
+
}
|
|
783
|
+
if (schema.exclusiveMaximum !== undefined) {
|
|
784
|
+
max = isInteger ? Math.ceil(max) - 1 : max - 0.1;
|
|
785
|
+
}
|
|
786
|
+
// Pick a sensible middle value
|
|
787
|
+
const value = (min + max) / 2;
|
|
788
|
+
return isInteger ? Math.round(value) : value;
|
|
789
|
+
}
|
|
790
|
+
/**
|
|
791
|
+
* Generate a smart array value.
|
|
792
|
+
* @param depth - Current recursion depth for circular schema protection
|
|
793
|
+
*/
|
|
794
|
+
generateSmartArray(paramName, schema, depth = 0) {
|
|
795
|
+
const lowerName = paramName.toLowerCase();
|
|
796
|
+
// Handle path arrays
|
|
797
|
+
if (lowerName.includes('path')) {
|
|
798
|
+
const baseDir = this.serverContext?.allowedDirectories?.[0] ?? '/tmp';
|
|
799
|
+
return [`${baseDir}/file1.txt`];
|
|
800
|
+
}
|
|
801
|
+
// Generate items based on items schema
|
|
802
|
+
if (schema.items) {
|
|
803
|
+
const item = this.generateSmartValue('item', schema.items, depth);
|
|
804
|
+
return [item];
|
|
805
|
+
}
|
|
806
|
+
return ['sample-item'];
|
|
807
|
+
}
|
|
808
|
+
/**
|
|
809
|
+
* Generate a smart object value.
|
|
810
|
+
* @param depth - Current recursion depth for circular schema protection
|
|
811
|
+
*/
|
|
812
|
+
generateSmartObject(schema, depth = 0) {
|
|
813
|
+
const obj = {};
|
|
814
|
+
if (schema.properties) {
|
|
815
|
+
const required = schema.required ?? [];
|
|
816
|
+
// Fill required properties
|
|
817
|
+
for (const prop of required) {
|
|
818
|
+
if (schema.properties[prop]) {
|
|
819
|
+
obj[prop] = this.generateSmartValue(prop, schema.properties[prop], depth);
|
|
820
|
+
}
|
|
821
|
+
}
|
|
822
|
+
}
|
|
823
|
+
return obj;
|
|
824
|
+
}
|
|
825
|
+
/**
|
|
826
|
+
* Infer value from parameter name when no type info available.
|
|
827
|
+
*/
|
|
828
|
+
inferValueFromName(paramName) {
|
|
829
|
+
const lowerName = paramName.toLowerCase();
|
|
830
|
+
if (lowerName.includes('count') || lowerName.includes('limit') || lowerName.includes('num')) {
|
|
831
|
+
return 10;
|
|
832
|
+
}
|
|
833
|
+
if (lowerName.includes('enabled') || lowerName.includes('active') || lowerName.includes('flag')) {
|
|
834
|
+
return true;
|
|
835
|
+
}
|
|
836
|
+
if (lowerName.includes('list') || lowerName.includes('items') || lowerName.includes('array')) {
|
|
837
|
+
return [];
|
|
838
|
+
}
|
|
839
|
+
if (lowerName.includes('config') || lowerName.includes('options') || lowerName.includes('settings')) {
|
|
840
|
+
return {};
|
|
841
|
+
}
|
|
842
|
+
return 'test';
|
|
843
|
+
}
|
|
844
|
+
/**
|
|
845
|
+
* Get the primary type from a schema type definition.
|
|
846
|
+
* Handles both single type strings and type arrays (e.g., ['string', 'null']).
|
|
847
|
+
*/
|
|
848
|
+
getSchemaType(typeValue) {
|
|
849
|
+
if (!typeValue)
|
|
850
|
+
return undefined;
|
|
851
|
+
return Array.isArray(typeValue) ? typeValue[0] : typeValue;
|
|
852
|
+
}
|
|
853
|
+
/**
|
|
854
|
+
* Generate test cases for enum parameters.
|
|
855
|
+
*/
|
|
856
|
+
generateEnumTests(schema, baseArgs) {
|
|
857
|
+
const tests = [];
|
|
858
|
+
if (!schema?.properties)
|
|
859
|
+
return tests;
|
|
860
|
+
for (const [name, prop] of Object.entries(schema.properties)) {
|
|
861
|
+
if (prop.enum && prop.enum.length > 1) {
|
|
862
|
+
// Test with different enum values (skip first which is already in baseArgs)
|
|
863
|
+
for (const enumValue of prop.enum.slice(1, 4)) {
|
|
864
|
+
tests.push({
|
|
865
|
+
description: `Test ${name} with enum value: ${JSON.stringify(enumValue)}`,
|
|
866
|
+
category: 'happy_path',
|
|
867
|
+
args: { ...baseArgs, [name]: enumValue },
|
|
868
|
+
});
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
return tests;
|
|
873
|
+
}
|
|
874
|
+
/**
|
|
875
|
+
* Generate boundary tests for numeric parameters.
|
|
876
|
+
*/
|
|
877
|
+
generateBoundaryTests(schema, baseArgs) {
|
|
878
|
+
const tests = [];
|
|
879
|
+
if (!schema?.properties)
|
|
880
|
+
return tests;
|
|
881
|
+
for (const [name, prop] of Object.entries(schema.properties)) {
|
|
882
|
+
const type = this.getSchemaType(prop.type);
|
|
883
|
+
if (type === 'number' || type === 'integer') {
|
|
884
|
+
// Test minimum
|
|
885
|
+
if (prop.minimum !== undefined) {
|
|
886
|
+
tests.push({
|
|
887
|
+
description: `Test ${name} at minimum value (${prop.minimum})`,
|
|
888
|
+
category: 'edge_case',
|
|
889
|
+
args: { ...baseArgs, [name]: prop.minimum },
|
|
890
|
+
});
|
|
891
|
+
}
|
|
892
|
+
// Test maximum
|
|
893
|
+
if (prop.maximum !== undefined) {
|
|
894
|
+
tests.push({
|
|
895
|
+
description: `Test ${name} at maximum value (${prop.maximum})`,
|
|
896
|
+
category: 'edge_case',
|
|
897
|
+
args: { ...baseArgs, [name]: prop.maximum },
|
|
898
|
+
});
|
|
899
|
+
}
|
|
900
|
+
// Test zero if in valid range
|
|
901
|
+
const min = prop.minimum ?? Number.NEGATIVE_INFINITY;
|
|
902
|
+
const max = prop.maximum ?? Number.POSITIVE_INFINITY;
|
|
903
|
+
if (min <= 0 && max >= 0) {
|
|
904
|
+
tests.push({
|
|
905
|
+
description: `Test ${name} with zero`,
|
|
906
|
+
category: 'edge_case',
|
|
907
|
+
args: { ...baseArgs, [name]: 0 },
|
|
908
|
+
});
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
}
|
|
912
|
+
return tests;
|
|
913
|
+
}
|
|
914
|
+
/**
|
|
915
|
+
* Generate tests for optional parameters.
|
|
916
|
+
*/
|
|
917
|
+
generateOptionalParamTests(schema) {
|
|
918
|
+
const tests = [];
|
|
919
|
+
if (!schema?.properties)
|
|
920
|
+
return tests;
|
|
921
|
+
const required = new Set(schema.required ?? []);
|
|
922
|
+
const optionalParams = Object.entries(schema.properties)
|
|
923
|
+
.filter(([name]) => !required.has(name));
|
|
924
|
+
if (optionalParams.length === 0)
|
|
925
|
+
return tests;
|
|
926
|
+
const allArgs = {};
|
|
927
|
+
for (const param of required) {
|
|
928
|
+
const prop = schema.properties[param];
|
|
929
|
+
if (prop) {
|
|
930
|
+
allArgs[param] = this.generateSmartValue(param, prop);
|
|
931
|
+
}
|
|
932
|
+
}
|
|
933
|
+
for (const [name, prop] of optionalParams) {
|
|
934
|
+
allArgs[name] = this.generateSmartValue(name, prop);
|
|
935
|
+
}
|
|
936
|
+
tests.push({
|
|
937
|
+
description: 'Test with all optional parameters included',
|
|
938
|
+
category: 'happy_path',
|
|
939
|
+
args: allArgs,
|
|
940
|
+
});
|
|
941
|
+
return tests;
|
|
942
|
+
}
|
|
943
|
+
/**
|
|
944
|
+
* Generate tests with invalid types to check error handling.
|
|
945
|
+
*/
|
|
946
|
+
generateInvalidTypeTests(schema) {
|
|
947
|
+
const tests = [];
|
|
948
|
+
if (!schema?.properties)
|
|
949
|
+
return tests;
|
|
950
|
+
const required = schema.required ?? [];
|
|
951
|
+
if (required.length === 0)
|
|
952
|
+
return tests;
|
|
953
|
+
// Pick a required parameter and give it wrong type
|
|
954
|
+
const param = required[0];
|
|
955
|
+
const prop = schema.properties[param];
|
|
956
|
+
if (!prop)
|
|
957
|
+
return tests;
|
|
958
|
+
const type = this.getSchemaType(prop.type);
|
|
959
|
+
let invalidValue;
|
|
960
|
+
switch (type) {
|
|
961
|
+
case 'string':
|
|
962
|
+
invalidValue = 12345; // Number instead of string
|
|
963
|
+
break;
|
|
964
|
+
case 'number':
|
|
965
|
+
case 'integer':
|
|
966
|
+
invalidValue = 'not-a-number';
|
|
967
|
+
break;
|
|
968
|
+
case 'boolean':
|
|
969
|
+
invalidValue = 'not-a-boolean';
|
|
970
|
+
break;
|
|
971
|
+
case 'array':
|
|
972
|
+
invalidValue = 'not-an-array';
|
|
973
|
+
break;
|
|
974
|
+
case 'object':
|
|
975
|
+
invalidValue = 'not-an-object';
|
|
976
|
+
break;
|
|
977
|
+
default:
|
|
978
|
+
return tests;
|
|
979
|
+
}
|
|
980
|
+
tests.push({
|
|
981
|
+
description: `Test ${param} with invalid type (${typeof invalidValue} instead of ${type})`,
|
|
982
|
+
category: 'error_handling',
|
|
983
|
+
args: { [param]: invalidValue },
|
|
984
|
+
});
|
|
985
|
+
return tests;
|
|
986
|
+
}
|
|
987
|
+
/**
|
|
988
|
+
* Generate interview questions for an MCP prompt.
|
|
989
|
+
*/
|
|
990
|
+
async generatePromptQuestions(prompt, maxQuestions = 2) {
|
|
991
|
+
const promptText = buildPromptQuestionGenerationPrompt({
|
|
992
|
+
prompt,
|
|
993
|
+
maxQuestions,
|
|
994
|
+
});
|
|
995
|
+
try {
|
|
996
|
+
const response = await this.completeWithStreaming(promptText, {
|
|
997
|
+
...COMPLETION_OPTIONS.promptQuestionGeneration,
|
|
998
|
+
systemPrompt: this.getSystemPrompt(),
|
|
999
|
+
}, `generate-prompt-questions:${prompt.name}`);
|
|
1000
|
+
const questions = this.llm.parseJSON(response);
|
|
1001
|
+
return questions.slice(0, maxQuestions);
|
|
1002
|
+
}
|
|
1003
|
+
catch (llmError) {
|
|
1004
|
+
this.logger.debug({
|
|
1005
|
+
prompt: prompt.name,
|
|
1006
|
+
error: llmError instanceof Error ? llmError.message : String(llmError),
|
|
1007
|
+
}, 'LLM prompt question generation failed, using fallback');
|
|
1008
|
+
// Fallback to basic questions (slice to respect maxQuestions limit)
|
|
1009
|
+
return this.generateFallbackPromptQuestions(prompt).slice(0, maxQuestions);
|
|
1010
|
+
}
|
|
1011
|
+
}
|
|
1012
|
+
/**
|
|
1013
|
+
* Analyze a prompt response.
|
|
1014
|
+
*/
|
|
1015
|
+
async analyzePromptResponse(prompt, question, response, error) {
|
|
1016
|
+
const promptText = buildPromptResponseAnalysisPrompt({
|
|
1017
|
+
prompt,
|
|
1018
|
+
question,
|
|
1019
|
+
response,
|
|
1020
|
+
error,
|
|
1021
|
+
});
|
|
1022
|
+
try {
|
|
1023
|
+
return await this.completeWithStreaming(promptText, {
|
|
1024
|
+
...COMPLETION_OPTIONS.promptResponseAnalysis,
|
|
1025
|
+
systemPrompt: this.getSystemPrompt(),
|
|
1026
|
+
}, `analyze-prompt:${prompt.name}`);
|
|
1027
|
+
}
|
|
1028
|
+
catch (llmError) {
|
|
1029
|
+
// Graceful fallback
|
|
1030
|
+
this.logger.debug({
|
|
1031
|
+
prompt: prompt.name,
|
|
1032
|
+
error: llmError instanceof Error ? llmError.message : String(llmError),
|
|
1033
|
+
}, 'LLM prompt analysis failed, using fallback');
|
|
1034
|
+
if (error) {
|
|
1035
|
+
return `Prompt returned an error: ${error}`;
|
|
1036
|
+
}
|
|
1037
|
+
if (response?.messages?.length) {
|
|
1038
|
+
return `Prompt generated ${response.messages.length} message(s).`;
|
|
1039
|
+
}
|
|
1040
|
+
return 'Prompt executed successfully.';
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
/**
|
|
1044
|
+
* Synthesize findings for a prompt into a profile.
|
|
1045
|
+
*/
|
|
1046
|
+
async synthesizePromptProfile(prompt, interactions) {
|
|
1047
|
+
const promptText = buildPromptProfileSynthesisPrompt({ prompt, interactions });
|
|
1048
|
+
try {
|
|
1049
|
+
const response = await this.completeWithStreaming(promptText, {
|
|
1050
|
+
...COMPLETION_OPTIONS.promptProfileSynthesis,
|
|
1051
|
+
systemPrompt: this.getSystemPrompt(),
|
|
1052
|
+
}, `synthesize-prompt:${prompt.name}`);
|
|
1053
|
+
const result = this.llm.parseJSON(response);
|
|
1054
|
+
// Extract example output from first successful interaction
|
|
1055
|
+
let exampleOutput;
|
|
1056
|
+
const successful = interactions.find(i => !i.error && i.response?.messages?.length);
|
|
1057
|
+
if (successful?.response) {
|
|
1058
|
+
const firstMsg = successful.response.messages[0];
|
|
1059
|
+
if (firstMsg?.content?.type === 'text' && firstMsg.content.text) {
|
|
1060
|
+
exampleOutput = firstMsg.content.text.substring(0, DISPLAY_LIMITS.EXAMPLE_OUTPUT_LENGTH);
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
return {
|
|
1064
|
+
name: prompt.name,
|
|
1065
|
+
description: prompt.description ?? 'No description provided',
|
|
1066
|
+
arguments: prompt.arguments ?? [],
|
|
1067
|
+
behavioralNotes: result.behavioralNotes ?? [],
|
|
1068
|
+
limitations: result.limitations ?? [],
|
|
1069
|
+
exampleOutput,
|
|
1070
|
+
};
|
|
1071
|
+
}
|
|
1072
|
+
catch (llmError) {
|
|
1073
|
+
this.logger.debug({
|
|
1074
|
+
prompt: prompt.name,
|
|
1075
|
+
error: llmError instanceof Error ? llmError.message : String(llmError),
|
|
1076
|
+
}, 'LLM prompt profile synthesis failed, using fallback');
|
|
1077
|
+
return {
|
|
1078
|
+
name: prompt.name,
|
|
1079
|
+
description: prompt.description ?? 'No description provided',
|
|
1080
|
+
arguments: prompt.arguments ?? [],
|
|
1081
|
+
behavioralNotes: interactions.map(i => i.analysis).filter(a => a),
|
|
1082
|
+
limitations: [],
|
|
1083
|
+
};
|
|
1084
|
+
}
|
|
1085
|
+
}
|
|
1086
|
+
/**
|
|
1087
|
+
* Fallback questions when LLM fails for prompts.
|
|
1088
|
+
*/
|
|
1089
|
+
generateFallbackPromptQuestions(prompt) {
|
|
1090
|
+
const questions = [];
|
|
1091
|
+
const args = {};
|
|
1092
|
+
// Build args with required parameters
|
|
1093
|
+
if (prompt.arguments) {
|
|
1094
|
+
for (const arg of prompt.arguments) {
|
|
1095
|
+
if (arg.required) {
|
|
1096
|
+
args[arg.name] = this.generateSmartString(arg.name);
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1099
|
+
}
|
|
1100
|
+
questions.push({
|
|
1101
|
+
description: 'Basic usage with required arguments',
|
|
1102
|
+
args,
|
|
1103
|
+
});
|
|
1104
|
+
// If there are optional args, add a test with all args
|
|
1105
|
+
const optionalArgs = prompt.arguments?.filter(a => !a.required) ?? [];
|
|
1106
|
+
if (optionalArgs.length > 0) {
|
|
1107
|
+
const allArgs = { ...args };
|
|
1108
|
+
for (const arg of optionalArgs) {
|
|
1109
|
+
allArgs[arg.name] = this.generateSmartString(arg.name);
|
|
1110
|
+
}
|
|
1111
|
+
questions.push({
|
|
1112
|
+
description: 'Usage with all arguments',
|
|
1113
|
+
args: allArgs,
|
|
1114
|
+
});
|
|
1115
|
+
}
|
|
1116
|
+
return questions;
|
|
1117
|
+
}
|
|
1118
|
+
/**
|
|
1119
|
+
* Generate interview questions for an MCP resource.
|
|
1120
|
+
*/
|
|
1121
|
+
async generateResourceQuestions(resource, maxQuestions = 2) {
|
|
1122
|
+
const prompt = `You are analyzing an MCP resource to generate test questions.
|
|
1123
|
+
|
|
1124
|
+
Resource:
|
|
1125
|
+
- Name: ${resource.name}
|
|
1126
|
+
- URI: ${resource.uri}
|
|
1127
|
+
- Description: ${resource.description ?? 'No description provided'}
|
|
1128
|
+
- MIME Type: ${resource.mimeType ?? 'Not specified'}
|
|
1129
|
+
|
|
1130
|
+
Generate ${maxQuestions} test scenarios for reading this resource. Focus on:
|
|
1131
|
+
1. Basic read access
|
|
1132
|
+
2. Content validation (is the returned content appropriate for the MIME type?)
|
|
1133
|
+
3. Error handling if applicable
|
|
1134
|
+
|
|
1135
|
+
Return JSON array:
|
|
1136
|
+
[
|
|
1137
|
+
{
|
|
1138
|
+
"description": "What this test evaluates",
|
|
1139
|
+
"category": "happy_path" | "edge_case" | "error_handling"
|
|
1140
|
+
}
|
|
1141
|
+
]
|
|
1142
|
+
|
|
1143
|
+
Return ONLY valid JSON, no explanation.`;
|
|
1144
|
+
try {
|
|
1145
|
+
const response = await this.completeWithStreaming(prompt, {
|
|
1146
|
+
...COMPLETION_OPTIONS.questionGeneration,
|
|
1147
|
+
systemPrompt: this.getSystemPrompt(),
|
|
1148
|
+
}, `generate-resource-questions:${resource.name}`);
|
|
1149
|
+
const questions = this.llm.parseJSON(response);
|
|
1150
|
+
return questions.slice(0, maxQuestions);
|
|
1151
|
+
}
|
|
1152
|
+
catch (llmError) {
|
|
1153
|
+
this.logger.debug({
|
|
1154
|
+
resource: resource.name,
|
|
1155
|
+
error: llmError instanceof Error ? llmError.message : String(llmError),
|
|
1156
|
+
}, 'LLM resource question generation failed, using fallback');
|
|
1157
|
+
// Fallback to basic questions (slice to respect maxQuestions limit)
|
|
1158
|
+
return this.generateFallbackResourceQuestions(resource).slice(0, maxQuestions);
|
|
1159
|
+
}
|
|
1160
|
+
}
|
|
1161
|
+
/**
|
|
1162
|
+
* Analyze a resource read response.
|
|
1163
|
+
*/
|
|
1164
|
+
async analyzeResourceResponse(resource, question, response, error) {
|
|
1165
|
+
const contentSummary = this.summarizeResourceContent(response);
|
|
1166
|
+
const prompt = `Analyze this resource read result.
|
|
1167
|
+
|
|
1168
|
+
Resource: ${resource.name} (${resource.uri})
|
|
1169
|
+
Expected MIME type: ${resource.mimeType ?? 'Not specified'}
|
|
1170
|
+
|
|
1171
|
+
Test case: ${question.description}
|
|
1172
|
+
|
|
1173
|
+
Result:
|
|
1174
|
+
${error ? `Error: ${error}` : `Content: ${contentSummary}`}
|
|
1175
|
+
|
|
1176
|
+
Provide a brief analysis (1-2 sentences) of:
|
|
1177
|
+
- Whether the content matches expectations
|
|
1178
|
+
- Any notable characteristics or issues
|
|
1179
|
+
- Relevance of content to the resource description`;
|
|
1180
|
+
try {
|
|
1181
|
+
return await this.completeWithStreaming(prompt, {
|
|
1182
|
+
...COMPLETION_OPTIONS.responseAnalysis,
|
|
1183
|
+
systemPrompt: this.getSystemPrompt(),
|
|
1184
|
+
}, `analyze-resource:${resource.name}`);
|
|
1185
|
+
}
|
|
1186
|
+
catch (llmError) {
|
|
1187
|
+
// Graceful fallback
|
|
1188
|
+
this.logger.debug({
|
|
1189
|
+
resource: resource.name,
|
|
1190
|
+
error: llmError instanceof Error ? llmError.message : String(llmError),
|
|
1191
|
+
}, 'LLM resource analysis failed, using fallback');
|
|
1192
|
+
if (error) {
|
|
1193
|
+
return `Resource read failed: ${error}`;
|
|
1194
|
+
}
|
|
1195
|
+
if (response?.contents?.length) {
|
|
1196
|
+
return `Resource returned ${response.contents.length} content block(s).`;
|
|
1197
|
+
}
|
|
1198
|
+
return 'Resource read completed.';
|
|
1199
|
+
}
|
|
1200
|
+
}
|
|
1201
|
+
/**
|
|
1202
|
+
* Synthesize findings for a resource into a profile.
|
|
1203
|
+
*/
|
|
1204
|
+
async synthesizeResourceProfile(resource, interactions) {
|
|
1205
|
+
const prompt = `Synthesize findings for this MCP resource.
|
|
1206
|
+
|
|
1207
|
+
Resource: ${resource.name}
|
|
1208
|
+
URI: ${resource.uri}
|
|
1209
|
+
Description: ${resource.description ?? 'No description'}
|
|
1210
|
+
MIME Type: ${resource.mimeType ?? 'Not specified'}
|
|
1211
|
+
|
|
1212
|
+
Test interactions:
|
|
1213
|
+
${interactions.map((i, idx) => `
|
|
1214
|
+
${idx + 1}. ${i.question.description}
|
|
1215
|
+
${i.error ? `Error: ${i.error}` : `Analysis: ${i.analysis}`}
|
|
1216
|
+
`).join('')}
|
|
1217
|
+
|
|
1218
|
+
Generate a JSON object with:
|
|
1219
|
+
{
|
|
1220
|
+
"behavioralNotes": ["List of observed behaviors"],
|
|
1221
|
+
"limitations": ["List of limitations or issues discovered"]
|
|
1222
|
+
}
|
|
1223
|
+
|
|
1224
|
+
Return ONLY valid JSON, no explanation.`;
|
|
1225
|
+
try {
|
|
1226
|
+
const response = await this.completeWithStreaming(prompt, {
|
|
1227
|
+
...COMPLETION_OPTIONS.profileSynthesis,
|
|
1228
|
+
systemPrompt: this.getSystemPrompt(),
|
|
1229
|
+
}, `synthesize-resource:${resource.name}`);
|
|
1230
|
+
const result = this.llm.parseJSON(response);
|
|
1231
|
+
return {
|
|
1232
|
+
uri: resource.uri,
|
|
1233
|
+
name: resource.name,
|
|
1234
|
+
description: resource.description ?? 'No description provided',
|
|
1235
|
+
mimeType: resource.mimeType,
|
|
1236
|
+
behavioralNotes: result.behavioralNotes ?? [],
|
|
1237
|
+
limitations: result.limitations ?? [],
|
|
1238
|
+
};
|
|
1239
|
+
}
|
|
1240
|
+
catch (llmError) {
|
|
1241
|
+
this.logger.debug({
|
|
1242
|
+
resource: resource.name,
|
|
1243
|
+
error: llmError instanceof Error ? llmError.message : String(llmError),
|
|
1244
|
+
}, 'LLM resource profile synthesis failed, using fallback');
|
|
1245
|
+
return {
|
|
1246
|
+
uri: resource.uri,
|
|
1247
|
+
name: resource.name,
|
|
1248
|
+
description: resource.description ?? 'No description provided',
|
|
1249
|
+
mimeType: resource.mimeType,
|
|
1250
|
+
behavioralNotes: interactions.map(i => i.analysis).filter(a => a),
|
|
1251
|
+
limitations: [],
|
|
1252
|
+
};
|
|
1253
|
+
}
|
|
1254
|
+
}
|
|
1255
|
+
/**
|
|
1256
|
+
* Fallback questions when LLM fails for resources.
|
|
1257
|
+
*/
|
|
1258
|
+
generateFallbackResourceQuestions(resource) {
|
|
1259
|
+
const questions = [
|
|
1260
|
+
{
|
|
1261
|
+
description: `Basic read access for ${resource.name}`,
|
|
1262
|
+
category: 'happy_path',
|
|
1263
|
+
},
|
|
1264
|
+
];
|
|
1265
|
+
// Add MIME type validation if specified
|
|
1266
|
+
if (resource.mimeType) {
|
|
1267
|
+
questions.push({
|
|
1268
|
+
description: `Verify content matches expected MIME type (${resource.mimeType})`,
|
|
1269
|
+
category: 'happy_path',
|
|
1270
|
+
});
|
|
1271
|
+
}
|
|
1272
|
+
return questions;
|
|
1273
|
+
}
|
|
1274
|
+
/**
|
|
1275
|
+
* Summarize resource content for analysis prompts.
|
|
1276
|
+
*/
|
|
1277
|
+
summarizeResourceContent(response) {
|
|
1278
|
+
if (!response?.contents?.length) {
|
|
1279
|
+
return 'No content returned';
|
|
1280
|
+
}
|
|
1281
|
+
const summaries = [];
|
|
1282
|
+
for (const content of response.contents) {
|
|
1283
|
+
if (content.text) {
|
|
1284
|
+
const preview = content.text.length > DISPLAY_LIMITS.CONTENT_PREVIEW_LENGTH
|
|
1285
|
+
? content.text.substring(0, DISPLAY_LIMITS.CONTENT_PREVIEW_LENGTH) + '...'
|
|
1286
|
+
: content.text;
|
|
1287
|
+
summaries.push(`Text (${content.mimeType ?? 'unknown'}): ${preview}`);
|
|
1288
|
+
}
|
|
1289
|
+
else if (content.blob) {
|
|
1290
|
+
summaries.push(`Binary data (${content.mimeType ?? 'unknown'}): ${content.blob.length} bytes base64`);
|
|
1291
|
+
}
|
|
1292
|
+
}
|
|
1293
|
+
return summaries.join('\n');
|
|
1294
|
+
}
|
|
1295
|
+
}
|
|
1296
|
+
//# sourceMappingURL=orchestrator.js.map
|