@dotsetlabs/bellwether 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +291 -0
- package/LICENSE +21 -0
- package/README.md +739 -0
- package/dist/auth/credentials.d.ts +64 -0
- package/dist/auth/credentials.js +218 -0
- package/dist/auth/index.d.ts +6 -0
- package/dist/auth/index.js +6 -0
- package/dist/auth/keychain.d.ts +64 -0
- package/dist/auth/keychain.js +268 -0
- package/dist/baseline/ab-testing.d.ts +80 -0
- package/dist/baseline/ab-testing.js +236 -0
- package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
- package/dist/baseline/ai-compatibility-scorer.js +606 -0
- package/dist/baseline/calibration.d.ts +77 -0
- package/dist/baseline/calibration.js +136 -0
- package/dist/baseline/category-matching.d.ts +85 -0
- package/dist/baseline/category-matching.js +289 -0
- package/dist/baseline/change-impact-analyzer.d.ts +98 -0
- package/dist/baseline/change-impact-analyzer.js +592 -0
- package/dist/baseline/comparator.d.ts +64 -0
- package/dist/baseline/comparator.js +916 -0
- package/dist/baseline/confidence.d.ts +55 -0
- package/dist/baseline/confidence.js +122 -0
- package/dist/baseline/converter.d.ts +61 -0
- package/dist/baseline/converter.js +585 -0
- package/dist/baseline/dependency-analyzer.d.ts +89 -0
- package/dist/baseline/dependency-analyzer.js +567 -0
- package/dist/baseline/deprecation-tracker.d.ts +133 -0
- package/dist/baseline/deprecation-tracker.js +322 -0
- package/dist/baseline/diff.d.ts +55 -0
- package/dist/baseline/diff.js +1584 -0
- package/dist/baseline/documentation-scorer.d.ts +205 -0
- package/dist/baseline/documentation-scorer.js +466 -0
- package/dist/baseline/embeddings.d.ts +118 -0
- package/dist/baseline/embeddings.js +251 -0
- package/dist/baseline/error-analyzer.d.ts +198 -0
- package/dist/baseline/error-analyzer.js +721 -0
- package/dist/baseline/evaluation/evaluator.d.ts +42 -0
- package/dist/baseline/evaluation/evaluator.js +323 -0
- package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
- package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
- package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
- package/dist/baseline/evaluation/golden-dataset.js +717 -0
- package/dist/baseline/evaluation/index.d.ts +15 -0
- package/dist/baseline/evaluation/index.js +15 -0
- package/dist/baseline/evaluation/types.d.ts +186 -0
- package/dist/baseline/evaluation/types.js +8 -0
- package/dist/baseline/external-dependency-detector.d.ts +181 -0
- package/dist/baseline/external-dependency-detector.js +524 -0
- package/dist/baseline/golden-output.d.ts +162 -0
- package/dist/baseline/golden-output.js +636 -0
- package/dist/baseline/health-scorer.d.ts +174 -0
- package/dist/baseline/health-scorer.js +451 -0
- package/dist/baseline/incremental-checker.d.ts +97 -0
- package/dist/baseline/incremental-checker.js +174 -0
- package/dist/baseline/index.d.ts +31 -0
- package/dist/baseline/index.js +42 -0
- package/dist/baseline/migration-generator.d.ts +137 -0
- package/dist/baseline/migration-generator.js +554 -0
- package/dist/baseline/migrations.d.ts +60 -0
- package/dist/baseline/migrations.js +197 -0
- package/dist/baseline/performance-tracker.d.ts +214 -0
- package/dist/baseline/performance-tracker.js +577 -0
- package/dist/baseline/pr-comment-generator.d.ts +117 -0
- package/dist/baseline/pr-comment-generator.js +546 -0
- package/dist/baseline/response-fingerprint.d.ts +127 -0
- package/dist/baseline/response-fingerprint.js +728 -0
- package/dist/baseline/response-schema-tracker.d.ts +129 -0
- package/dist/baseline/response-schema-tracker.js +420 -0
- package/dist/baseline/risk-scorer.d.ts +54 -0
- package/dist/baseline/risk-scorer.js +434 -0
- package/dist/baseline/saver.d.ts +89 -0
- package/dist/baseline/saver.js +554 -0
- package/dist/baseline/scenario-generator.d.ts +151 -0
- package/dist/baseline/scenario-generator.js +905 -0
- package/dist/baseline/schema-compare.d.ts +86 -0
- package/dist/baseline/schema-compare.js +557 -0
- package/dist/baseline/schema-evolution.d.ts +189 -0
- package/dist/baseline/schema-evolution.js +467 -0
- package/dist/baseline/semantic.d.ts +203 -0
- package/dist/baseline/semantic.js +908 -0
- package/dist/baseline/synonyms.d.ts +60 -0
- package/dist/baseline/synonyms.js +386 -0
- package/dist/baseline/telemetry.d.ts +165 -0
- package/dist/baseline/telemetry.js +294 -0
- package/dist/baseline/test-pruner.d.ts +120 -0
- package/dist/baseline/test-pruner.js +387 -0
- package/dist/baseline/types.d.ts +449 -0
- package/dist/baseline/types.js +5 -0
- package/dist/baseline/version.d.ts +138 -0
- package/dist/baseline/version.js +206 -0
- package/dist/cache/index.d.ts +5 -0
- package/dist/cache/index.js +5 -0
- package/dist/cache/response-cache.d.ts +151 -0
- package/dist/cache/response-cache.js +287 -0
- package/dist/ci/index.d.ts +60 -0
- package/dist/ci/index.js +342 -0
- package/dist/cli/commands/auth.d.ts +12 -0
- package/dist/cli/commands/auth.js +352 -0
- package/dist/cli/commands/badge.d.ts +3 -0
- package/dist/cli/commands/badge.js +74 -0
- package/dist/cli/commands/baseline-accept.d.ts +15 -0
- package/dist/cli/commands/baseline-accept.js +178 -0
- package/dist/cli/commands/baseline-migrate.d.ts +12 -0
- package/dist/cli/commands/baseline-migrate.js +164 -0
- package/dist/cli/commands/baseline.d.ts +14 -0
- package/dist/cli/commands/baseline.js +449 -0
- package/dist/cli/commands/beta.d.ts +10 -0
- package/dist/cli/commands/beta.js +231 -0
- package/dist/cli/commands/check.d.ts +11 -0
- package/dist/cli/commands/check.js +820 -0
- package/dist/cli/commands/cloud/badge.d.ts +3 -0
- package/dist/cli/commands/cloud/badge.js +74 -0
- package/dist/cli/commands/cloud/diff.d.ts +6 -0
- package/dist/cli/commands/cloud/diff.js +79 -0
- package/dist/cli/commands/cloud/history.d.ts +6 -0
- package/dist/cli/commands/cloud/history.js +102 -0
- package/dist/cli/commands/cloud/link.d.ts +9 -0
- package/dist/cli/commands/cloud/link.js +119 -0
- package/dist/cli/commands/cloud/login.d.ts +7 -0
- package/dist/cli/commands/cloud/login.js +499 -0
- package/dist/cli/commands/cloud/projects.d.ts +6 -0
- package/dist/cli/commands/cloud/projects.js +44 -0
- package/dist/cli/commands/cloud/shared.d.ts +7 -0
- package/dist/cli/commands/cloud/shared.js +42 -0
- package/dist/cli/commands/cloud/teams.d.ts +8 -0
- package/dist/cli/commands/cloud/teams.js +169 -0
- package/dist/cli/commands/cloud/upload.d.ts +8 -0
- package/dist/cli/commands/cloud/upload.js +181 -0
- package/dist/cli/commands/contract.d.ts +11 -0
- package/dist/cli/commands/contract.js +280 -0
- package/dist/cli/commands/discover.d.ts +3 -0
- package/dist/cli/commands/discover.js +82 -0
- package/dist/cli/commands/eval.d.ts +9 -0
- package/dist/cli/commands/eval.js +187 -0
- package/dist/cli/commands/explore.d.ts +11 -0
- package/dist/cli/commands/explore.js +437 -0
- package/dist/cli/commands/feedback.d.ts +9 -0
- package/dist/cli/commands/feedback.js +174 -0
- package/dist/cli/commands/golden.d.ts +12 -0
- package/dist/cli/commands/golden.js +407 -0
- package/dist/cli/commands/history.d.ts +10 -0
- package/dist/cli/commands/history.js +202 -0
- package/dist/cli/commands/init.d.ts +9 -0
- package/dist/cli/commands/init.js +219 -0
- package/dist/cli/commands/interview.d.ts +3 -0
- package/dist/cli/commands/interview.js +903 -0
- package/dist/cli/commands/link.d.ts +10 -0
- package/dist/cli/commands/link.js +169 -0
- package/dist/cli/commands/login.d.ts +7 -0
- package/dist/cli/commands/login.js +499 -0
- package/dist/cli/commands/preset.d.ts +33 -0
- package/dist/cli/commands/preset.js +297 -0
- package/dist/cli/commands/profile.d.ts +33 -0
- package/dist/cli/commands/profile.js +286 -0
- package/dist/cli/commands/registry.d.ts +11 -0
- package/dist/cli/commands/registry.js +146 -0
- package/dist/cli/commands/shared.d.ts +79 -0
- package/dist/cli/commands/shared.js +196 -0
- package/dist/cli/commands/teams.d.ts +8 -0
- package/dist/cli/commands/teams.js +169 -0
- package/dist/cli/commands/test.d.ts +9 -0
- package/dist/cli/commands/test.js +500 -0
- package/dist/cli/commands/upload.d.ts +8 -0
- package/dist/cli/commands/upload.js +223 -0
- package/dist/cli/commands/validate-config.d.ts +6 -0
- package/dist/cli/commands/validate-config.js +35 -0
- package/dist/cli/commands/verify.d.ts +11 -0
- package/dist/cli/commands/verify.js +283 -0
- package/dist/cli/commands/watch.d.ts +12 -0
- package/dist/cli/commands/watch.js +253 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.js +178 -0
- package/dist/cli/interactive.d.ts +47 -0
- package/dist/cli/interactive.js +216 -0
- package/dist/cli/output/terminal-reporter.d.ts +19 -0
- package/dist/cli/output/terminal-reporter.js +104 -0
- package/dist/cli/output.d.ts +226 -0
- package/dist/cli/output.js +438 -0
- package/dist/cli/utils/env.d.ts +5 -0
- package/dist/cli/utils/env.js +14 -0
- package/dist/cli/utils/progress.d.ts +59 -0
- package/dist/cli/utils/progress.js +206 -0
- package/dist/cli/utils/server-context.d.ts +10 -0
- package/dist/cli/utils/server-context.js +36 -0
- package/dist/cloud/auth.d.ts +144 -0
- package/dist/cloud/auth.js +374 -0
- package/dist/cloud/client.d.ts +24 -0
- package/dist/cloud/client.js +65 -0
- package/dist/cloud/http-client.d.ts +38 -0
- package/dist/cloud/http-client.js +215 -0
- package/dist/cloud/index.d.ts +23 -0
- package/dist/cloud/index.js +25 -0
- package/dist/cloud/mock-client.d.ts +107 -0
- package/dist/cloud/mock-client.js +545 -0
- package/dist/cloud/types.d.ts +515 -0
- package/dist/cloud/types.js +15 -0
- package/dist/config/defaults.d.ts +160 -0
- package/dist/config/defaults.js +169 -0
- package/dist/config/loader.d.ts +24 -0
- package/dist/config/loader.js +122 -0
- package/dist/config/template.d.ts +42 -0
- package/dist/config/template.js +647 -0
- package/dist/config/validator.d.ts +2112 -0
- package/dist/config/validator.js +658 -0
- package/dist/constants/cloud.d.ts +107 -0
- package/dist/constants/cloud.js +110 -0
- package/dist/constants/core.d.ts +521 -0
- package/dist/constants/core.js +556 -0
- package/dist/constants/testing.d.ts +1283 -0
- package/dist/constants/testing.js +1568 -0
- package/dist/constants.d.ts +10 -0
- package/dist/constants.js +10 -0
- package/dist/contract/index.d.ts +6 -0
- package/dist/contract/index.js +5 -0
- package/dist/contract/validator.d.ts +177 -0
- package/dist/contract/validator.js +574 -0
- package/dist/cost/index.d.ts +6 -0
- package/dist/cost/index.js +5 -0
- package/dist/cost/tracker.d.ts +134 -0
- package/dist/cost/tracker.js +313 -0
- package/dist/discovery/discovery.d.ts +16 -0
- package/dist/discovery/discovery.js +173 -0
- package/dist/discovery/types.d.ts +51 -0
- package/dist/discovery/types.js +2 -0
- package/dist/docs/agents.d.ts +3 -0
- package/dist/docs/agents.js +995 -0
- package/dist/docs/contract.d.ts +51 -0
- package/dist/docs/contract.js +1681 -0
- package/dist/docs/generator.d.ts +4 -0
- package/dist/docs/generator.js +4 -0
- package/dist/docs/html-reporter.d.ts +9 -0
- package/dist/docs/html-reporter.js +757 -0
- package/dist/docs/index.d.ts +10 -0
- package/dist/docs/index.js +11 -0
- package/dist/docs/junit-reporter.d.ts +18 -0
- package/dist/docs/junit-reporter.js +210 -0
- package/dist/docs/report.d.ts +14 -0
- package/dist/docs/report.js +44 -0
- package/dist/docs/sarif-reporter.d.ts +19 -0
- package/dist/docs/sarif-reporter.js +335 -0
- package/dist/docs/shared.d.ts +35 -0
- package/dist/docs/shared.js +162 -0
- package/dist/docs/templates.d.ts +12 -0
- package/dist/docs/templates.js +76 -0
- package/dist/errors/index.d.ts +6 -0
- package/dist/errors/index.js +6 -0
- package/dist/errors/retry.d.ts +92 -0
- package/dist/errors/retry.js +323 -0
- package/dist/errors/types.d.ts +321 -0
- package/dist/errors/types.js +584 -0
- package/dist/index.d.ts +32 -0
- package/dist/index.js +32 -0
- package/dist/interview/dependency-resolver.d.ts +11 -0
- package/dist/interview/dependency-resolver.js +32 -0
- package/dist/interview/interviewer.d.ts +232 -0
- package/dist/interview/interviewer.js +1939 -0
- package/dist/interview/mock-response-generator.d.ts +7 -0
- package/dist/interview/mock-response-generator.js +102 -0
- package/dist/interview/orchestrator.d.ts +237 -0
- package/dist/interview/orchestrator.js +1296 -0
- package/dist/interview/rate-limiter.d.ts +15 -0
- package/dist/interview/rate-limiter.js +55 -0
- package/dist/interview/response-validator.d.ts +10 -0
- package/dist/interview/response-validator.js +132 -0
- package/dist/interview/schema-inferrer.d.ts +8 -0
- package/dist/interview/schema-inferrer.js +71 -0
- package/dist/interview/schema-test-generator.d.ts +71 -0
- package/dist/interview/schema-test-generator.js +834 -0
- package/dist/interview/smart-value-generator.d.ts +155 -0
- package/dist/interview/smart-value-generator.js +554 -0
- package/dist/interview/stateful-test-runner.d.ts +19 -0
- package/dist/interview/stateful-test-runner.js +106 -0
- package/dist/interview/types.d.ts +561 -0
- package/dist/interview/types.js +2 -0
- package/dist/llm/anthropic.d.ts +41 -0
- package/dist/llm/anthropic.js +355 -0
- package/dist/llm/client.d.ts +123 -0
- package/dist/llm/client.js +42 -0
- package/dist/llm/factory.d.ts +38 -0
- package/dist/llm/factory.js +145 -0
- package/dist/llm/fallback.d.ts +140 -0
- package/dist/llm/fallback.js +379 -0
- package/dist/llm/index.d.ts +18 -0
- package/dist/llm/index.js +15 -0
- package/dist/llm/ollama.d.ts +37 -0
- package/dist/llm/ollama.js +330 -0
- package/dist/llm/openai.d.ts +25 -0
- package/dist/llm/openai.js +320 -0
- package/dist/llm/token-budget.d.ts +161 -0
- package/dist/llm/token-budget.js +395 -0
- package/dist/logging/logger.d.ts +70 -0
- package/dist/logging/logger.js +130 -0
- package/dist/metrics/collector.d.ts +106 -0
- package/dist/metrics/collector.js +547 -0
- package/dist/metrics/index.d.ts +7 -0
- package/dist/metrics/index.js +7 -0
- package/dist/metrics/prometheus.d.ts +20 -0
- package/dist/metrics/prometheus.js +241 -0
- package/dist/metrics/types.d.ts +209 -0
- package/dist/metrics/types.js +5 -0
- package/dist/persona/builtins.d.ts +54 -0
- package/dist/persona/builtins.js +219 -0
- package/dist/persona/index.d.ts +8 -0
- package/dist/persona/index.js +8 -0
- package/dist/persona/loader.d.ts +30 -0
- package/dist/persona/loader.js +190 -0
- package/dist/persona/types.d.ts +144 -0
- package/dist/persona/types.js +5 -0
- package/dist/persona/validation.d.ts +94 -0
- package/dist/persona/validation.js +332 -0
- package/dist/prompts/index.d.ts +5 -0
- package/dist/prompts/index.js +5 -0
- package/dist/prompts/templates.d.ts +180 -0
- package/dist/prompts/templates.js +431 -0
- package/dist/registry/client.d.ts +49 -0
- package/dist/registry/client.js +191 -0
- package/dist/registry/index.d.ts +7 -0
- package/dist/registry/index.js +6 -0
- package/dist/registry/types.d.ts +140 -0
- package/dist/registry/types.js +6 -0
- package/dist/scenarios/evaluator.d.ts +43 -0
- package/dist/scenarios/evaluator.js +206 -0
- package/dist/scenarios/index.d.ts +10 -0
- package/dist/scenarios/index.js +9 -0
- package/dist/scenarios/loader.d.ts +20 -0
- package/dist/scenarios/loader.js +285 -0
- package/dist/scenarios/types.d.ts +153 -0
- package/dist/scenarios/types.js +8 -0
- package/dist/security/index.d.ts +17 -0
- package/dist/security/index.js +18 -0
- package/dist/security/payloads.d.ts +61 -0
- package/dist/security/payloads.js +268 -0
- package/dist/security/security-tester.d.ts +42 -0
- package/dist/security/security-tester.js +582 -0
- package/dist/security/types.d.ts +166 -0
- package/dist/security/types.js +8 -0
- package/dist/transport/base-transport.d.ts +59 -0
- package/dist/transport/base-transport.js +38 -0
- package/dist/transport/http-transport.d.ts +67 -0
- package/dist/transport/http-transport.js +238 -0
- package/dist/transport/mcp-client.d.ts +141 -0
- package/dist/transport/mcp-client.js +496 -0
- package/dist/transport/sse-transport.d.ts +88 -0
- package/dist/transport/sse-transport.js +316 -0
- package/dist/transport/stdio-transport.d.ts +43 -0
- package/dist/transport/stdio-transport.js +238 -0
- package/dist/transport/types.d.ts +125 -0
- package/dist/transport/types.js +16 -0
- package/dist/utils/concurrency.d.ts +123 -0
- package/dist/utils/concurrency.js +213 -0
- package/dist/utils/formatters.d.ts +16 -0
- package/dist/utils/formatters.js +37 -0
- package/dist/utils/index.d.ts +8 -0
- package/dist/utils/index.js +8 -0
- package/dist/utils/jsonpath.d.ts +87 -0
- package/dist/utils/jsonpath.js +326 -0
- package/dist/utils/markdown.d.ts +113 -0
- package/dist/utils/markdown.js +265 -0
- package/dist/utils/network.d.ts +14 -0
- package/dist/utils/network.js +17 -0
- package/dist/utils/sanitize.d.ts +92 -0
- package/dist/utils/sanitize.js +191 -0
- package/dist/utils/semantic.d.ts +194 -0
- package/dist/utils/semantic.js +1051 -0
- package/dist/utils/smart-truncate.d.ts +94 -0
- package/dist/utils/smart-truncate.js +361 -0
- package/dist/utils/timeout.d.ts +153 -0
- package/dist/utils/timeout.js +205 -0
- package/dist/utils/yaml-parser.d.ts +58 -0
- package/dist/utils/yaml-parser.js +86 -0
- package/dist/validation/index.d.ts +32 -0
- package/dist/validation/index.js +32 -0
- package/dist/validation/semantic-test-generator.d.ts +50 -0
- package/dist/validation/semantic-test-generator.js +176 -0
- package/dist/validation/semantic-types.d.ts +66 -0
- package/dist/validation/semantic-types.js +94 -0
- package/dist/validation/semantic-validator.d.ts +38 -0
- package/dist/validation/semantic-validator.js +340 -0
- package/dist/verification/index.d.ts +6 -0
- package/dist/verification/index.js +5 -0
- package/dist/verification/types.d.ts +133 -0
- package/dist/verification/types.js +5 -0
- package/dist/verification/verifier.d.ts +30 -0
- package/dist/verification/verifier.js +309 -0
- package/dist/version.d.ts +19 -0
- package/dist/version.js +48 -0
- package/dist/workflow/auto-generator.d.ts +27 -0
- package/dist/workflow/auto-generator.js +513 -0
- package/dist/workflow/discovery.d.ts +40 -0
- package/dist/workflow/discovery.js +195 -0
- package/dist/workflow/executor.d.ts +82 -0
- package/dist/workflow/executor.js +611 -0
- package/dist/workflow/index.d.ts +10 -0
- package/dist/workflow/index.js +10 -0
- package/dist/workflow/loader.d.ts +24 -0
- package/dist/workflow/loader.js +194 -0
- package/dist/workflow/state-tracker.d.ts +98 -0
- package/dist/workflow/state-tracker.js +424 -0
- package/dist/workflow/types.d.ts +337 -0
- package/dist/workflow/types.js +5 -0
- package/package.json +94 -0
- package/schemas/bellwether-check.schema.json +651 -0
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Intelligent Test Pruning.
|
|
3
|
+
*
|
|
4
|
+
* Determines which test categories to run or skip based on tool characteristics
|
|
5
|
+
* and testing history. Reduces test time while maintaining coverage where it matters.
|
|
6
|
+
*/
|
|
7
|
+
import { TEST_PRUNING } from '../constants.js';
|
|
8
|
+
/**
|
|
9
|
+
* Calculate pruning decisions for a set of tools.
|
|
10
|
+
*/
|
|
11
|
+
export function calculatePruningDecisions(inputs) {
|
|
12
|
+
return inputs.map(input => calculateToolPruning(input));
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Calculate pruning decision for a single tool.
|
|
16
|
+
*/
|
|
17
|
+
export function calculateToolPruning(input) {
|
|
18
|
+
const { tool, fingerprint, errorPatterns, availableCategories } = input;
|
|
19
|
+
// Analyze tool characteristics
|
|
20
|
+
const characteristics = analyzeToolCharacteristics(tool, fingerprint, errorPatterns);
|
|
21
|
+
// Calculate tool priority
|
|
22
|
+
const priority = calculateToolPriority(characteristics, fingerprint);
|
|
23
|
+
// Make decisions for each category
|
|
24
|
+
const categories = availableCategories.map(category => decideCategoryPruning(category, characteristics, priority));
|
|
25
|
+
// Apply maximum skip limit
|
|
26
|
+
const skippableCategories = categories.filter(c => !c.shouldRun);
|
|
27
|
+
const alwaysRun = categories.filter(c => TEST_PRUNING.ALWAYS_RUN.includes(c.category));
|
|
28
|
+
// Ensure always-run categories are included
|
|
29
|
+
for (const cat of alwaysRun) {
|
|
30
|
+
cat.shouldRun = true;
|
|
31
|
+
cat.reason = 'Required category';
|
|
32
|
+
}
|
|
33
|
+
// Respect max skip limit
|
|
34
|
+
const skippedCount = categories.filter(c => !c.shouldRun).length;
|
|
35
|
+
if (skippedCount > TEST_PRUNING.MAX_SKIPPED_CATEGORIES_PER_TOOL) {
|
|
36
|
+
// Re-enable lowest priority skipped categories
|
|
37
|
+
const toReEnable = skippableCategories
|
|
38
|
+
.sort((a, b) => b.priority - a.priority)
|
|
39
|
+
.slice(0, skippedCount - TEST_PRUNING.MAX_SKIPPED_CATEGORIES_PER_TOOL);
|
|
40
|
+
for (const cat of toReEnable) {
|
|
41
|
+
const found = categories.find(c => c.category === cat.category);
|
|
42
|
+
if (found) {
|
|
43
|
+
found.shouldRun = true;
|
|
44
|
+
found.reason = 'Re-enabled due to skip limit';
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
const categoriesToRun = categories.filter(c => c.shouldRun).map(c => c.category);
|
|
49
|
+
const categoriesToSkip = categories.filter(c => !c.shouldRun).map(c => c.category);
|
|
50
|
+
// Calculate reduction percentage
|
|
51
|
+
const reductionPercent = availableCategories.length > 0
|
|
52
|
+
? Math.round((categoriesToSkip.length / availableCategories.length) * 100)
|
|
53
|
+
: 0;
|
|
54
|
+
return {
|
|
55
|
+
toolName: tool.name,
|
|
56
|
+
priority,
|
|
57
|
+
categories,
|
|
58
|
+
categoriesToRun,
|
|
59
|
+
categoriesToSkip,
|
|
60
|
+
reductionPercent,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Analyze characteristics of a tool for pruning decisions.
|
|
65
|
+
*/
|
|
66
|
+
function analyzeToolCharacteristics(tool, fingerprint, errorPatterns) {
|
|
67
|
+
const schema = tool.inputSchema;
|
|
68
|
+
const properties = schema?.properties || {};
|
|
69
|
+
const required = schema?.required || [];
|
|
70
|
+
const paramNames = Object.keys(properties);
|
|
71
|
+
let hasNumericParams = false;
|
|
72
|
+
let hasEnumParams = false;
|
|
73
|
+
const hasOptionalParams = required.length < paramNames.length;
|
|
74
|
+
let hasStringParams = false;
|
|
75
|
+
let maxNestingDepth = 0;
|
|
76
|
+
// Analyze each parameter
|
|
77
|
+
for (const paramName of paramNames) {
|
|
78
|
+
const param = properties[paramName];
|
|
79
|
+
const type = param.type;
|
|
80
|
+
if (type === 'number' || type === 'integer') {
|
|
81
|
+
hasNumericParams = true;
|
|
82
|
+
}
|
|
83
|
+
if (type === 'string') {
|
|
84
|
+
hasStringParams = true;
|
|
85
|
+
}
|
|
86
|
+
if (Array.isArray(param.enum)) {
|
|
87
|
+
hasEnumParams = true;
|
|
88
|
+
}
|
|
89
|
+
// Calculate nesting depth
|
|
90
|
+
const depth = calculateNestingDepth(param);
|
|
91
|
+
maxNestingDepth = Math.max(maxNestingDepth, depth);
|
|
92
|
+
}
|
|
93
|
+
// Calculate error rate from patterns
|
|
94
|
+
let errorRate = 0;
|
|
95
|
+
if (errorPatterns && errorPatterns.length > 0) {
|
|
96
|
+
const errorCount = errorPatterns.reduce((sum, p) => sum + (p.count || 1), 0);
|
|
97
|
+
// Assume some baseline of total calls (conservative estimate)
|
|
98
|
+
const estimatedCalls = Math.max(errorCount * 2, 10);
|
|
99
|
+
errorRate = errorCount / estimatedCalls;
|
|
100
|
+
}
|
|
101
|
+
// Calculate hours since last test
|
|
102
|
+
let hoursSinceLastTest = null;
|
|
103
|
+
if (fingerprint?.lastTestedAt) {
|
|
104
|
+
const lastTested = new Date(fingerprint.lastTestedAt).getTime();
|
|
105
|
+
const now = Date.now();
|
|
106
|
+
hoursSinceLastTest = (now - lastTested) / (1000 * 60 * 60);
|
|
107
|
+
}
|
|
108
|
+
// Check for external dependencies
|
|
109
|
+
const hasExternalDependency = checkExternalDependency(tool, errorPatterns);
|
|
110
|
+
// Count consecutive successes (simplified - would need history tracking)
|
|
111
|
+
const consecutiveSuccesses = errorRate === 0 ? 5 : 0;
|
|
112
|
+
return {
|
|
113
|
+
parameterCount: paramNames.length,
|
|
114
|
+
requiredParamCount: required.length,
|
|
115
|
+
hasNumericParams,
|
|
116
|
+
hasEnumParams,
|
|
117
|
+
hasOptionalParams,
|
|
118
|
+
hasStringParams,
|
|
119
|
+
maxNestingDepth,
|
|
120
|
+
hasExternalDependency,
|
|
121
|
+
errorRate,
|
|
122
|
+
hoursSinceLastTest,
|
|
123
|
+
consecutiveSuccesses,
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Calculate nesting depth of a schema.
|
|
128
|
+
*/
|
|
129
|
+
function calculateNestingDepth(schema, currentDepth = 0) {
|
|
130
|
+
if (!schema || typeof schema !== 'object' || currentDepth > 10) {
|
|
131
|
+
return currentDepth;
|
|
132
|
+
}
|
|
133
|
+
const obj = schema;
|
|
134
|
+
if (obj.type === 'object' && obj.properties) {
|
|
135
|
+
const props = obj.properties;
|
|
136
|
+
let maxChildDepth = currentDepth + 1;
|
|
137
|
+
for (const prop of Object.values(props)) {
|
|
138
|
+
maxChildDepth = Math.max(maxChildDepth, calculateNestingDepth(prop, currentDepth + 1));
|
|
139
|
+
}
|
|
140
|
+
return maxChildDepth;
|
|
141
|
+
}
|
|
142
|
+
if (obj.type === 'array' && obj.items) {
|
|
143
|
+
return calculateNestingDepth(obj.items, currentDepth + 1);
|
|
144
|
+
}
|
|
145
|
+
return currentDepth;
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Check if tool has external dependencies.
|
|
149
|
+
*/
|
|
150
|
+
function checkExternalDependency(tool, errorPatterns) {
|
|
151
|
+
const description = (tool.description || '').toLowerCase();
|
|
152
|
+
const name = tool.name.toLowerCase();
|
|
153
|
+
// Check common external service indicators
|
|
154
|
+
const externalIndicators = [
|
|
155
|
+
'api', 'external', 'service', 'cloud', 'remote',
|
|
156
|
+
'plaid', 'stripe', 'aws', 's3', 'openai', 'anthropic',
|
|
157
|
+
'database', 'db', 'postgres', 'mysql', 'redis',
|
|
158
|
+
];
|
|
159
|
+
if (externalIndicators.some(ind => description.includes(ind) || name.includes(ind))) {
|
|
160
|
+
return true;
|
|
161
|
+
}
|
|
162
|
+
// Check error patterns for external service errors (timeout suggests external calls)
|
|
163
|
+
if (errorPatterns) {
|
|
164
|
+
const externalErrors = errorPatterns.filter(p => p.category === 'timeout' || p.category === 'internal');
|
|
165
|
+
if (externalErrors.length > 0) {
|
|
166
|
+
return true;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
return false;
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Calculate overall tool priority (0-100).
|
|
173
|
+
*/
|
|
174
|
+
function calculateToolPriority(characteristics, fingerprint) {
|
|
175
|
+
let priority = 50; // Base priority
|
|
176
|
+
// Factor 1: Error history (increases priority)
|
|
177
|
+
if (characteristics.errorRate > 0) {
|
|
178
|
+
priority += Math.round(characteristics.errorRate * 100 * TEST_PRUNING.PRIORITY_WEIGHTS.errorHistory);
|
|
179
|
+
}
|
|
180
|
+
// Factor 2: External dependencies (increases priority)
|
|
181
|
+
if (characteristics.hasExternalDependency) {
|
|
182
|
+
priority += 25 * TEST_PRUNING.PRIORITY_WEIGHTS.externalDependency;
|
|
183
|
+
}
|
|
184
|
+
// Factor 3: Schema complexity (increases priority)
|
|
185
|
+
if (characteristics.parameterCount >= TEST_PRUNING.SCHEMA_COMPLEXITY.HIGH_PARAM_COUNT) {
|
|
186
|
+
priority += 20 * TEST_PRUNING.PRIORITY_WEIGHTS.schemaComplexity;
|
|
187
|
+
}
|
|
188
|
+
if (characteristics.maxNestingDepth >= TEST_PRUNING.SCHEMA_COMPLEXITY.HIGH_NESTING_DEPTH) {
|
|
189
|
+
priority += 15 * TEST_PRUNING.PRIORITY_WEIGHTS.schemaComplexity;
|
|
190
|
+
}
|
|
191
|
+
// Factor 4: Time since last test (increases priority if stale)
|
|
192
|
+
if (characteristics.hoursSinceLastTest !== null) {
|
|
193
|
+
if (characteristics.hoursSinceLastTest >= TEST_PRUNING.TIME_THRESHOLDS.VERY_STALE_HOURS) {
|
|
194
|
+
priority += 30 * TEST_PRUNING.PRIORITY_WEIGHTS.timeSinceLastTest;
|
|
195
|
+
}
|
|
196
|
+
else if (characteristics.hoursSinceLastTest >= TEST_PRUNING.TIME_THRESHOLDS.STALE_HOURS) {
|
|
197
|
+
priority += 15 * TEST_PRUNING.PRIORITY_WEIGHTS.timeSinceLastTest;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
// Factor 5: Consecutive successes (decreases priority)
|
|
201
|
+
if (characteristics.consecutiveSuccesses >= TEST_PRUNING.SUCCESS_HISTORY.STABLE_RUN_COUNT) {
|
|
202
|
+
priority -= 20;
|
|
203
|
+
}
|
|
204
|
+
// Factor 6: Schema changes (increases priority)
|
|
205
|
+
if (fingerprint && fingerprint.inputSchemaHashAtTest) {
|
|
206
|
+
// Would need current schema hash to compare - simplified
|
|
207
|
+
// If schema changed, increase priority
|
|
208
|
+
}
|
|
209
|
+
return Math.max(0, Math.min(100, Math.round(priority)));
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* Decide whether to run a test category.
|
|
213
|
+
*/
|
|
214
|
+
function decideCategoryPruning(category, characteristics, toolPriority) {
|
|
215
|
+
// Always-run categories
|
|
216
|
+
if (TEST_PRUNING.ALWAYS_RUN.includes(category)) {
|
|
217
|
+
return {
|
|
218
|
+
category,
|
|
219
|
+
shouldRun: true,
|
|
220
|
+
reason: 'Required category',
|
|
221
|
+
priority: 100,
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
switch (category) {
|
|
225
|
+
case 'boundary':
|
|
226
|
+
if (!characteristics.hasNumericParams) {
|
|
227
|
+
return {
|
|
228
|
+
category,
|
|
229
|
+
shouldRun: false,
|
|
230
|
+
reason: 'No numeric parameters',
|
|
231
|
+
priority: 0,
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
return {
|
|
235
|
+
category,
|
|
236
|
+
shouldRun: toolPriority >= 40,
|
|
237
|
+
reason: characteristics.hasNumericParams ? 'Has numeric parameters' : 'Low priority tool',
|
|
238
|
+
priority: toolPriority,
|
|
239
|
+
};
|
|
240
|
+
case 'enum':
|
|
241
|
+
if (!characteristics.hasEnumParams) {
|
|
242
|
+
return {
|
|
243
|
+
category,
|
|
244
|
+
shouldRun: false,
|
|
245
|
+
reason: 'No enum parameters',
|
|
246
|
+
priority: 0,
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
return {
|
|
250
|
+
category,
|
|
251
|
+
shouldRun: true,
|
|
252
|
+
reason: 'Has enum parameters',
|
|
253
|
+
priority: 70,
|
|
254
|
+
};
|
|
255
|
+
case 'optional_combinations':
|
|
256
|
+
if (!characteristics.hasOptionalParams) {
|
|
257
|
+
return {
|
|
258
|
+
category,
|
|
259
|
+
shouldRun: false,
|
|
260
|
+
reason: 'No optional parameters',
|
|
261
|
+
priority: 0,
|
|
262
|
+
};
|
|
263
|
+
}
|
|
264
|
+
// Only run for high-priority tools with optional params
|
|
265
|
+
return {
|
|
266
|
+
category,
|
|
267
|
+
shouldRun: toolPriority >= 60,
|
|
268
|
+
reason: characteristics.hasOptionalParams ? 'Has optional parameters' : 'Low priority',
|
|
269
|
+
priority: toolPriority - 10,
|
|
270
|
+
};
|
|
271
|
+
case 'security':
|
|
272
|
+
if (!characteristics.hasStringParams) {
|
|
273
|
+
return {
|
|
274
|
+
category,
|
|
275
|
+
shouldRun: false,
|
|
276
|
+
reason: 'No string parameters',
|
|
277
|
+
priority: 0,
|
|
278
|
+
};
|
|
279
|
+
}
|
|
280
|
+
// Security tests are important - run for medium+ priority
|
|
281
|
+
return {
|
|
282
|
+
category,
|
|
283
|
+
shouldRun: toolPriority >= 30 || characteristics.hasExternalDependency,
|
|
284
|
+
reason: characteristics.hasExternalDependency ? 'External dependency' : 'Has string inputs',
|
|
285
|
+
priority: toolPriority + 10,
|
|
286
|
+
};
|
|
287
|
+
case 'semantic':
|
|
288
|
+
// Semantic tests based on param naming - run for tools with clear semantic params
|
|
289
|
+
return {
|
|
290
|
+
category,
|
|
291
|
+
shouldRun: toolPriority >= 50,
|
|
292
|
+
reason: toolPriority >= 50 ? 'Standard priority' : 'Low priority tool',
|
|
293
|
+
priority: toolPriority,
|
|
294
|
+
};
|
|
295
|
+
default:
|
|
296
|
+
return {
|
|
297
|
+
category,
|
|
298
|
+
shouldRun: true,
|
|
299
|
+
reason: 'Unknown category - running by default',
|
|
300
|
+
priority: 50,
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
/**
|
|
305
|
+
* Prioritize tools for testing order.
|
|
306
|
+
*/
|
|
307
|
+
export function prioritizeTools(decisions) {
|
|
308
|
+
return [...decisions].sort((a, b) => b.priority - a.priority);
|
|
309
|
+
}
|
|
310
|
+
/**
|
|
311
|
+
* Generate pruning summary.
|
|
312
|
+
*/
|
|
313
|
+
export function generatePruningSummary(decisions) {
|
|
314
|
+
const totalTools = decisions.length;
|
|
315
|
+
let totalWithout = 0;
|
|
316
|
+
let totalWith = 0;
|
|
317
|
+
for (const decision of decisions) {
|
|
318
|
+
totalWithout += decision.categories.length;
|
|
319
|
+
totalWith += decision.categoriesToRun.length;
|
|
320
|
+
}
|
|
321
|
+
const overallReduction = totalWithout > 0
|
|
322
|
+
? Math.round(((totalWithout - totalWith) / totalWithout) * 100)
|
|
323
|
+
: 0;
|
|
324
|
+
const sorted = [...decisions].sort((a, b) => b.priority - a.priority);
|
|
325
|
+
const highPriorityTools = sorted.slice(0, 5).map(d => d.toolName);
|
|
326
|
+
const byPruning = [...decisions].sort((a, b) => b.reductionPercent - a.reductionPercent);
|
|
327
|
+
const mostPrunedTools = byPruning
|
|
328
|
+
.filter(d => d.reductionPercent > 0)
|
|
329
|
+
.slice(0, 5)
|
|
330
|
+
.map(d => d.toolName);
|
|
331
|
+
return {
|
|
332
|
+
totalTools,
|
|
333
|
+
totalCategoriesWithoutPruning: totalWithout,
|
|
334
|
+
totalCategoriesWithPruning: totalWith,
|
|
335
|
+
overallReduction,
|
|
336
|
+
highPriorityTools,
|
|
337
|
+
mostPrunedTools,
|
|
338
|
+
};
|
|
339
|
+
}
|
|
340
|
+
/**
|
|
341
|
+
* Generate markdown report for pruning decisions.
|
|
342
|
+
*/
|
|
343
|
+
export function generatePruningMarkdown(decisions, summary) {
|
|
344
|
+
const lines = [];
|
|
345
|
+
lines.push('## Test Pruning Analysis');
|
|
346
|
+
lines.push('');
|
|
347
|
+
lines.push(`**Test Reduction: ${summary.overallReduction}%** (${summary.totalCategoriesWithPruning}/${summary.totalCategoriesWithoutPruning} categories)`);
|
|
348
|
+
lines.push('');
|
|
349
|
+
// Summary stats
|
|
350
|
+
lines.push('| Metric | Value |');
|
|
351
|
+
lines.push('|--------|-------|');
|
|
352
|
+
lines.push(`| Tools Analyzed | ${summary.totalTools} |`);
|
|
353
|
+
lines.push(`| Categories Without Pruning | ${summary.totalCategoriesWithoutPruning} |`);
|
|
354
|
+
lines.push(`| Categories With Pruning | ${summary.totalCategoriesWithPruning} |`);
|
|
355
|
+
lines.push(`| Reduction | ${summary.overallReduction}% |`);
|
|
356
|
+
lines.push('');
|
|
357
|
+
// High priority tools
|
|
358
|
+
if (summary.highPriorityTools.length > 0) {
|
|
359
|
+
lines.push('### High Priority Tools');
|
|
360
|
+
lines.push('');
|
|
361
|
+
lines.push('These tools have elevated testing priority:');
|
|
362
|
+
lines.push('');
|
|
363
|
+
for (const tool of summary.highPriorityTools) {
|
|
364
|
+
const decision = decisions.find(d => d.toolName === tool);
|
|
365
|
+
if (decision) {
|
|
366
|
+
lines.push(`- \`${tool}\` (priority: ${decision.priority})`);
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
lines.push('');
|
|
370
|
+
}
|
|
371
|
+
// Pruning details for top tools
|
|
372
|
+
lines.push('### Pruning Decisions');
|
|
373
|
+
lines.push('');
|
|
374
|
+
lines.push('| Tool | Priority | Run | Skip | Reduction |');
|
|
375
|
+
lines.push('|------|----------|-----|------|-----------|');
|
|
376
|
+
const topDecisions = prioritizeTools(decisions).slice(0, 15);
|
|
377
|
+
for (const d of topDecisions) {
|
|
378
|
+
lines.push(`| \`${d.toolName}\` | ${d.priority} | ${d.categoriesToRun.length} | ${d.categoriesToSkip.length} | ${d.reductionPercent}% |`);
|
|
379
|
+
}
|
|
380
|
+
if (decisions.length > 15) {
|
|
381
|
+
lines.push(`| ... | ... | ... | ... | ... |`);
|
|
382
|
+
lines.push(`| *${decisions.length - 15} more tools* | | | | |`);
|
|
383
|
+
}
|
|
384
|
+
lines.push('');
|
|
385
|
+
return lines.join('\n');
|
|
386
|
+
}
|
|
387
|
+
//# sourceMappingURL=test-pruner.js.map
|