@dotsetlabs/bellwether 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +291 -0
- package/LICENSE +21 -0
- package/README.md +739 -0
- package/dist/auth/credentials.d.ts +64 -0
- package/dist/auth/credentials.js +218 -0
- package/dist/auth/index.d.ts +6 -0
- package/dist/auth/index.js +6 -0
- package/dist/auth/keychain.d.ts +64 -0
- package/dist/auth/keychain.js +268 -0
- package/dist/baseline/ab-testing.d.ts +80 -0
- package/dist/baseline/ab-testing.js +236 -0
- package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
- package/dist/baseline/ai-compatibility-scorer.js +606 -0
- package/dist/baseline/calibration.d.ts +77 -0
- package/dist/baseline/calibration.js +136 -0
- package/dist/baseline/category-matching.d.ts +85 -0
- package/dist/baseline/category-matching.js +289 -0
- package/dist/baseline/change-impact-analyzer.d.ts +98 -0
- package/dist/baseline/change-impact-analyzer.js +592 -0
- package/dist/baseline/comparator.d.ts +64 -0
- package/dist/baseline/comparator.js +916 -0
- package/dist/baseline/confidence.d.ts +55 -0
- package/dist/baseline/confidence.js +122 -0
- package/dist/baseline/converter.d.ts +61 -0
- package/dist/baseline/converter.js +585 -0
- package/dist/baseline/dependency-analyzer.d.ts +89 -0
- package/dist/baseline/dependency-analyzer.js +567 -0
- package/dist/baseline/deprecation-tracker.d.ts +133 -0
- package/dist/baseline/deprecation-tracker.js +322 -0
- package/dist/baseline/diff.d.ts +55 -0
- package/dist/baseline/diff.js +1584 -0
- package/dist/baseline/documentation-scorer.d.ts +205 -0
- package/dist/baseline/documentation-scorer.js +466 -0
- package/dist/baseline/embeddings.d.ts +118 -0
- package/dist/baseline/embeddings.js +251 -0
- package/dist/baseline/error-analyzer.d.ts +198 -0
- package/dist/baseline/error-analyzer.js +721 -0
- package/dist/baseline/evaluation/evaluator.d.ts +42 -0
- package/dist/baseline/evaluation/evaluator.js +323 -0
- package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
- package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
- package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
- package/dist/baseline/evaluation/golden-dataset.js +717 -0
- package/dist/baseline/evaluation/index.d.ts +15 -0
- package/dist/baseline/evaluation/index.js +15 -0
- package/dist/baseline/evaluation/types.d.ts +186 -0
- package/dist/baseline/evaluation/types.js +8 -0
- package/dist/baseline/external-dependency-detector.d.ts +181 -0
- package/dist/baseline/external-dependency-detector.js +524 -0
- package/dist/baseline/golden-output.d.ts +162 -0
- package/dist/baseline/golden-output.js +636 -0
- package/dist/baseline/health-scorer.d.ts +174 -0
- package/dist/baseline/health-scorer.js +451 -0
- package/dist/baseline/incremental-checker.d.ts +97 -0
- package/dist/baseline/incremental-checker.js +174 -0
- package/dist/baseline/index.d.ts +31 -0
- package/dist/baseline/index.js +42 -0
- package/dist/baseline/migration-generator.d.ts +137 -0
- package/dist/baseline/migration-generator.js +554 -0
- package/dist/baseline/migrations.d.ts +60 -0
- package/dist/baseline/migrations.js +197 -0
- package/dist/baseline/performance-tracker.d.ts +214 -0
- package/dist/baseline/performance-tracker.js +577 -0
- package/dist/baseline/pr-comment-generator.d.ts +117 -0
- package/dist/baseline/pr-comment-generator.js +546 -0
- package/dist/baseline/response-fingerprint.d.ts +127 -0
- package/dist/baseline/response-fingerprint.js +728 -0
- package/dist/baseline/response-schema-tracker.d.ts +129 -0
- package/dist/baseline/response-schema-tracker.js +420 -0
- package/dist/baseline/risk-scorer.d.ts +54 -0
- package/dist/baseline/risk-scorer.js +434 -0
- package/dist/baseline/saver.d.ts +89 -0
- package/dist/baseline/saver.js +554 -0
- package/dist/baseline/scenario-generator.d.ts +151 -0
- package/dist/baseline/scenario-generator.js +905 -0
- package/dist/baseline/schema-compare.d.ts +86 -0
- package/dist/baseline/schema-compare.js +557 -0
- package/dist/baseline/schema-evolution.d.ts +189 -0
- package/dist/baseline/schema-evolution.js +467 -0
- package/dist/baseline/semantic.d.ts +203 -0
- package/dist/baseline/semantic.js +908 -0
- package/dist/baseline/synonyms.d.ts +60 -0
- package/dist/baseline/synonyms.js +386 -0
- package/dist/baseline/telemetry.d.ts +165 -0
- package/dist/baseline/telemetry.js +294 -0
- package/dist/baseline/test-pruner.d.ts +120 -0
- package/dist/baseline/test-pruner.js +387 -0
- package/dist/baseline/types.d.ts +449 -0
- package/dist/baseline/types.js +5 -0
- package/dist/baseline/version.d.ts +138 -0
- package/dist/baseline/version.js +206 -0
- package/dist/cache/index.d.ts +5 -0
- package/dist/cache/index.js +5 -0
- package/dist/cache/response-cache.d.ts +151 -0
- package/dist/cache/response-cache.js +287 -0
- package/dist/ci/index.d.ts +60 -0
- package/dist/ci/index.js +342 -0
- package/dist/cli/commands/auth.d.ts +12 -0
- package/dist/cli/commands/auth.js +352 -0
- package/dist/cli/commands/badge.d.ts +3 -0
- package/dist/cli/commands/badge.js +74 -0
- package/dist/cli/commands/baseline-accept.d.ts +15 -0
- package/dist/cli/commands/baseline-accept.js +178 -0
- package/dist/cli/commands/baseline-migrate.d.ts +12 -0
- package/dist/cli/commands/baseline-migrate.js +164 -0
- package/dist/cli/commands/baseline.d.ts +14 -0
- package/dist/cli/commands/baseline.js +449 -0
- package/dist/cli/commands/beta.d.ts +10 -0
- package/dist/cli/commands/beta.js +231 -0
- package/dist/cli/commands/check.d.ts +11 -0
- package/dist/cli/commands/check.js +820 -0
- package/dist/cli/commands/cloud/badge.d.ts +3 -0
- package/dist/cli/commands/cloud/badge.js +74 -0
- package/dist/cli/commands/cloud/diff.d.ts +6 -0
- package/dist/cli/commands/cloud/diff.js +79 -0
- package/dist/cli/commands/cloud/history.d.ts +6 -0
- package/dist/cli/commands/cloud/history.js +102 -0
- package/dist/cli/commands/cloud/link.d.ts +9 -0
- package/dist/cli/commands/cloud/link.js +119 -0
- package/dist/cli/commands/cloud/login.d.ts +7 -0
- package/dist/cli/commands/cloud/login.js +499 -0
- package/dist/cli/commands/cloud/projects.d.ts +6 -0
- package/dist/cli/commands/cloud/projects.js +44 -0
- package/dist/cli/commands/cloud/shared.d.ts +7 -0
- package/dist/cli/commands/cloud/shared.js +42 -0
- package/dist/cli/commands/cloud/teams.d.ts +8 -0
- package/dist/cli/commands/cloud/teams.js +169 -0
- package/dist/cli/commands/cloud/upload.d.ts +8 -0
- package/dist/cli/commands/cloud/upload.js +181 -0
- package/dist/cli/commands/contract.d.ts +11 -0
- package/dist/cli/commands/contract.js +280 -0
- package/dist/cli/commands/discover.d.ts +3 -0
- package/dist/cli/commands/discover.js +82 -0
- package/dist/cli/commands/eval.d.ts +9 -0
- package/dist/cli/commands/eval.js +187 -0
- package/dist/cli/commands/explore.d.ts +11 -0
- package/dist/cli/commands/explore.js +437 -0
- package/dist/cli/commands/feedback.d.ts +9 -0
- package/dist/cli/commands/feedback.js +174 -0
- package/dist/cli/commands/golden.d.ts +12 -0
- package/dist/cli/commands/golden.js +407 -0
- package/dist/cli/commands/history.d.ts +10 -0
- package/dist/cli/commands/history.js +202 -0
- package/dist/cli/commands/init.d.ts +9 -0
- package/dist/cli/commands/init.js +219 -0
- package/dist/cli/commands/interview.d.ts +3 -0
- package/dist/cli/commands/interview.js +903 -0
- package/dist/cli/commands/link.d.ts +10 -0
- package/dist/cli/commands/link.js +169 -0
- package/dist/cli/commands/login.d.ts +7 -0
- package/dist/cli/commands/login.js +499 -0
- package/dist/cli/commands/preset.d.ts +33 -0
- package/dist/cli/commands/preset.js +297 -0
- package/dist/cli/commands/profile.d.ts +33 -0
- package/dist/cli/commands/profile.js +286 -0
- package/dist/cli/commands/registry.d.ts +11 -0
- package/dist/cli/commands/registry.js +146 -0
- package/dist/cli/commands/shared.d.ts +79 -0
- package/dist/cli/commands/shared.js +196 -0
- package/dist/cli/commands/teams.d.ts +8 -0
- package/dist/cli/commands/teams.js +169 -0
- package/dist/cli/commands/test.d.ts +9 -0
- package/dist/cli/commands/test.js +500 -0
- package/dist/cli/commands/upload.d.ts +8 -0
- package/dist/cli/commands/upload.js +223 -0
- package/dist/cli/commands/validate-config.d.ts +6 -0
- package/dist/cli/commands/validate-config.js +35 -0
- package/dist/cli/commands/verify.d.ts +11 -0
- package/dist/cli/commands/verify.js +283 -0
- package/dist/cli/commands/watch.d.ts +12 -0
- package/dist/cli/commands/watch.js +253 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.js +178 -0
- package/dist/cli/interactive.d.ts +47 -0
- package/dist/cli/interactive.js +216 -0
- package/dist/cli/output/terminal-reporter.d.ts +19 -0
- package/dist/cli/output/terminal-reporter.js +104 -0
- package/dist/cli/output.d.ts +226 -0
- package/dist/cli/output.js +438 -0
- package/dist/cli/utils/env.d.ts +5 -0
- package/dist/cli/utils/env.js +14 -0
- package/dist/cli/utils/progress.d.ts +59 -0
- package/dist/cli/utils/progress.js +206 -0
- package/dist/cli/utils/server-context.d.ts +10 -0
- package/dist/cli/utils/server-context.js +36 -0
- package/dist/cloud/auth.d.ts +144 -0
- package/dist/cloud/auth.js +374 -0
- package/dist/cloud/client.d.ts +24 -0
- package/dist/cloud/client.js +65 -0
- package/dist/cloud/http-client.d.ts +38 -0
- package/dist/cloud/http-client.js +215 -0
- package/dist/cloud/index.d.ts +23 -0
- package/dist/cloud/index.js +25 -0
- package/dist/cloud/mock-client.d.ts +107 -0
- package/dist/cloud/mock-client.js +545 -0
- package/dist/cloud/types.d.ts +515 -0
- package/dist/cloud/types.js +15 -0
- package/dist/config/defaults.d.ts +160 -0
- package/dist/config/defaults.js +169 -0
- package/dist/config/loader.d.ts +24 -0
- package/dist/config/loader.js +122 -0
- package/dist/config/template.d.ts +42 -0
- package/dist/config/template.js +647 -0
- package/dist/config/validator.d.ts +2112 -0
- package/dist/config/validator.js +658 -0
- package/dist/constants/cloud.d.ts +107 -0
- package/dist/constants/cloud.js +110 -0
- package/dist/constants/core.d.ts +521 -0
- package/dist/constants/core.js +556 -0
- package/dist/constants/testing.d.ts +1283 -0
- package/dist/constants/testing.js +1568 -0
- package/dist/constants.d.ts +10 -0
- package/dist/constants.js +10 -0
- package/dist/contract/index.d.ts +6 -0
- package/dist/contract/index.js +5 -0
- package/dist/contract/validator.d.ts +177 -0
- package/dist/contract/validator.js +574 -0
- package/dist/cost/index.d.ts +6 -0
- package/dist/cost/index.js +5 -0
- package/dist/cost/tracker.d.ts +134 -0
- package/dist/cost/tracker.js +313 -0
- package/dist/discovery/discovery.d.ts +16 -0
- package/dist/discovery/discovery.js +173 -0
- package/dist/discovery/types.d.ts +51 -0
- package/dist/discovery/types.js +2 -0
- package/dist/docs/agents.d.ts +3 -0
- package/dist/docs/agents.js +995 -0
- package/dist/docs/contract.d.ts +51 -0
- package/dist/docs/contract.js +1681 -0
- package/dist/docs/generator.d.ts +4 -0
- package/dist/docs/generator.js +4 -0
- package/dist/docs/html-reporter.d.ts +9 -0
- package/dist/docs/html-reporter.js +757 -0
- package/dist/docs/index.d.ts +10 -0
- package/dist/docs/index.js +11 -0
- package/dist/docs/junit-reporter.d.ts +18 -0
- package/dist/docs/junit-reporter.js +210 -0
- package/dist/docs/report.d.ts +14 -0
- package/dist/docs/report.js +44 -0
- package/dist/docs/sarif-reporter.d.ts +19 -0
- package/dist/docs/sarif-reporter.js +335 -0
- package/dist/docs/shared.d.ts +35 -0
- package/dist/docs/shared.js +162 -0
- package/dist/docs/templates.d.ts +12 -0
- package/dist/docs/templates.js +76 -0
- package/dist/errors/index.d.ts +6 -0
- package/dist/errors/index.js +6 -0
- package/dist/errors/retry.d.ts +92 -0
- package/dist/errors/retry.js +323 -0
- package/dist/errors/types.d.ts +321 -0
- package/dist/errors/types.js +584 -0
- package/dist/index.d.ts +32 -0
- package/dist/index.js +32 -0
- package/dist/interview/dependency-resolver.d.ts +11 -0
- package/dist/interview/dependency-resolver.js +32 -0
- package/dist/interview/interviewer.d.ts +232 -0
- package/dist/interview/interviewer.js +1939 -0
- package/dist/interview/mock-response-generator.d.ts +7 -0
- package/dist/interview/mock-response-generator.js +102 -0
- package/dist/interview/orchestrator.d.ts +237 -0
- package/dist/interview/orchestrator.js +1296 -0
- package/dist/interview/rate-limiter.d.ts +15 -0
- package/dist/interview/rate-limiter.js +55 -0
- package/dist/interview/response-validator.d.ts +10 -0
- package/dist/interview/response-validator.js +132 -0
- package/dist/interview/schema-inferrer.d.ts +8 -0
- package/dist/interview/schema-inferrer.js +71 -0
- package/dist/interview/schema-test-generator.d.ts +71 -0
- package/dist/interview/schema-test-generator.js +834 -0
- package/dist/interview/smart-value-generator.d.ts +155 -0
- package/dist/interview/smart-value-generator.js +554 -0
- package/dist/interview/stateful-test-runner.d.ts +19 -0
- package/dist/interview/stateful-test-runner.js +106 -0
- package/dist/interview/types.d.ts +561 -0
- package/dist/interview/types.js +2 -0
- package/dist/llm/anthropic.d.ts +41 -0
- package/dist/llm/anthropic.js +355 -0
- package/dist/llm/client.d.ts +123 -0
- package/dist/llm/client.js +42 -0
- package/dist/llm/factory.d.ts +38 -0
- package/dist/llm/factory.js +145 -0
- package/dist/llm/fallback.d.ts +140 -0
- package/dist/llm/fallback.js +379 -0
- package/dist/llm/index.d.ts +18 -0
- package/dist/llm/index.js +15 -0
- package/dist/llm/ollama.d.ts +37 -0
- package/dist/llm/ollama.js +330 -0
- package/dist/llm/openai.d.ts +25 -0
- package/dist/llm/openai.js +320 -0
- package/dist/llm/token-budget.d.ts +161 -0
- package/dist/llm/token-budget.js +395 -0
- package/dist/logging/logger.d.ts +70 -0
- package/dist/logging/logger.js +130 -0
- package/dist/metrics/collector.d.ts +106 -0
- package/dist/metrics/collector.js +547 -0
- package/dist/metrics/index.d.ts +7 -0
- package/dist/metrics/index.js +7 -0
- package/dist/metrics/prometheus.d.ts +20 -0
- package/dist/metrics/prometheus.js +241 -0
- package/dist/metrics/types.d.ts +209 -0
- package/dist/metrics/types.js +5 -0
- package/dist/persona/builtins.d.ts +54 -0
- package/dist/persona/builtins.js +219 -0
- package/dist/persona/index.d.ts +8 -0
- package/dist/persona/index.js +8 -0
- package/dist/persona/loader.d.ts +30 -0
- package/dist/persona/loader.js +190 -0
- package/dist/persona/types.d.ts +144 -0
- package/dist/persona/types.js +5 -0
- package/dist/persona/validation.d.ts +94 -0
- package/dist/persona/validation.js +332 -0
- package/dist/prompts/index.d.ts +5 -0
- package/dist/prompts/index.js +5 -0
- package/dist/prompts/templates.d.ts +180 -0
- package/dist/prompts/templates.js +431 -0
- package/dist/registry/client.d.ts +49 -0
- package/dist/registry/client.js +191 -0
- package/dist/registry/index.d.ts +7 -0
- package/dist/registry/index.js +6 -0
- package/dist/registry/types.d.ts +140 -0
- package/dist/registry/types.js +6 -0
- package/dist/scenarios/evaluator.d.ts +43 -0
- package/dist/scenarios/evaluator.js +206 -0
- package/dist/scenarios/index.d.ts +10 -0
- package/dist/scenarios/index.js +9 -0
- package/dist/scenarios/loader.d.ts +20 -0
- package/dist/scenarios/loader.js +285 -0
- package/dist/scenarios/types.d.ts +153 -0
- package/dist/scenarios/types.js +8 -0
- package/dist/security/index.d.ts +17 -0
- package/dist/security/index.js +18 -0
- package/dist/security/payloads.d.ts +61 -0
- package/dist/security/payloads.js +268 -0
- package/dist/security/security-tester.d.ts +42 -0
- package/dist/security/security-tester.js +582 -0
- package/dist/security/types.d.ts +166 -0
- package/dist/security/types.js +8 -0
- package/dist/transport/base-transport.d.ts +59 -0
- package/dist/transport/base-transport.js +38 -0
- package/dist/transport/http-transport.d.ts +67 -0
- package/dist/transport/http-transport.js +238 -0
- package/dist/transport/mcp-client.d.ts +141 -0
- package/dist/transport/mcp-client.js +496 -0
- package/dist/transport/sse-transport.d.ts +88 -0
- package/dist/transport/sse-transport.js +316 -0
- package/dist/transport/stdio-transport.d.ts +43 -0
- package/dist/transport/stdio-transport.js +238 -0
- package/dist/transport/types.d.ts +125 -0
- package/dist/transport/types.js +16 -0
- package/dist/utils/concurrency.d.ts +123 -0
- package/dist/utils/concurrency.js +213 -0
- package/dist/utils/formatters.d.ts +16 -0
- package/dist/utils/formatters.js +37 -0
- package/dist/utils/index.d.ts +8 -0
- package/dist/utils/index.js +8 -0
- package/dist/utils/jsonpath.d.ts +87 -0
- package/dist/utils/jsonpath.js +326 -0
- package/dist/utils/markdown.d.ts +113 -0
- package/dist/utils/markdown.js +265 -0
- package/dist/utils/network.d.ts +14 -0
- package/dist/utils/network.js +17 -0
- package/dist/utils/sanitize.d.ts +92 -0
- package/dist/utils/sanitize.js +191 -0
- package/dist/utils/semantic.d.ts +194 -0
- package/dist/utils/semantic.js +1051 -0
- package/dist/utils/smart-truncate.d.ts +94 -0
- package/dist/utils/smart-truncate.js +361 -0
- package/dist/utils/timeout.d.ts +153 -0
- package/dist/utils/timeout.js +205 -0
- package/dist/utils/yaml-parser.d.ts +58 -0
- package/dist/utils/yaml-parser.js +86 -0
- package/dist/validation/index.d.ts +32 -0
- package/dist/validation/index.js +32 -0
- package/dist/validation/semantic-test-generator.d.ts +50 -0
- package/dist/validation/semantic-test-generator.js +176 -0
- package/dist/validation/semantic-types.d.ts +66 -0
- package/dist/validation/semantic-types.js +94 -0
- package/dist/validation/semantic-validator.d.ts +38 -0
- package/dist/validation/semantic-validator.js +340 -0
- package/dist/verification/index.d.ts +6 -0
- package/dist/verification/index.js +5 -0
- package/dist/verification/types.d.ts +133 -0
- package/dist/verification/types.js +5 -0
- package/dist/verification/verifier.d.ts +30 -0
- package/dist/verification/verifier.js +309 -0
- package/dist/version.d.ts +19 -0
- package/dist/version.js +48 -0
- package/dist/workflow/auto-generator.d.ts +27 -0
- package/dist/workflow/auto-generator.js +513 -0
- package/dist/workflow/discovery.d.ts +40 -0
- package/dist/workflow/discovery.js +195 -0
- package/dist/workflow/executor.d.ts +82 -0
- package/dist/workflow/executor.js +611 -0
- package/dist/workflow/index.d.ts +10 -0
- package/dist/workflow/index.js +10 -0
- package/dist/workflow/loader.d.ts +24 -0
- package/dist/workflow/loader.js +194 -0
- package/dist/workflow/state-tracker.d.ts +98 -0
- package/dist/workflow/state-tracker.js +424 -0
- package/dist/workflow/types.d.ts +337 -0
- package/dist/workflow/types.js +5 -0
- package/package.json +94 -0
- package/schemas/bellwether-check.schema.json +651 -0
|
@@ -0,0 +1,577 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Performance Regression Detection
|
|
3
|
+
*
|
|
4
|
+
* Tracks response times across tool executions and detects performance regressions.
|
|
5
|
+
* Provides percentile-based metrics (p50, p95, p99) for comprehensive latency analysis.
|
|
6
|
+
*/
|
|
7
|
+
import { PERFORMANCE_TRACKING, PERFORMANCE_CONFIDENCE } from '../constants.js';
|
|
8
|
+
// Re-export centralized constant for backwards compatibility
|
|
9
|
+
export { PERFORMANCE_TRACKING as PERFORMANCE } from '../constants.js';
|
|
10
|
+
/**
|
|
11
|
+
* Calculate statistical confidence for performance metrics.
|
|
12
|
+
*
|
|
13
|
+
* Confidence is determined by:
|
|
14
|
+
* 1. Sample count - more samples = higher confidence
|
|
15
|
+
* 2. Coefficient of variation (CV) - lower variability = higher confidence
|
|
16
|
+
*
|
|
17
|
+
* Key insight: For confidence calculation, we only count happy_path tests that
|
|
18
|
+
* expect success. Validation tests (expectedOutcome: 'error') are tracked
|
|
19
|
+
* separately because their failure doesn't indicate tool problems.
|
|
20
|
+
*
|
|
21
|
+
* Note: The first sample is excluded from variance calculation because it includes
|
|
22
|
+
* cold-start overhead (JIT compilation, connection establishment, cache warming).
|
|
23
|
+
* This gives more accurate confidence scores for steady-state performance.
|
|
24
|
+
*
|
|
25
|
+
* @param samples - The latency samples to analyze
|
|
26
|
+
* @param options - Optional configuration
|
|
27
|
+
* @returns Performance confidence metrics
|
|
28
|
+
*/
|
|
29
|
+
export function calculatePerformanceConfidence(samples, options = {}) {
|
|
30
|
+
const { excludeWarmup = true } = options;
|
|
31
|
+
const totalTests = samples.length;
|
|
32
|
+
// Handle no samples case
|
|
33
|
+
if (totalTests === 0) {
|
|
34
|
+
return {
|
|
35
|
+
sampleCount: 0,
|
|
36
|
+
successfulSamples: 0,
|
|
37
|
+
validationSamples: 0,
|
|
38
|
+
totalTests: 0,
|
|
39
|
+
standardDeviation: 0,
|
|
40
|
+
coefficientOfVariation: 0,
|
|
41
|
+
confidenceLevel: 'low',
|
|
42
|
+
recommendation: PERFORMANCE_CONFIDENCE.RECOMMENDATIONS.NO_SAMPLES,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
// Categorize samples by expected outcome
|
|
46
|
+
// Happy path tests: expectedOutcome === 'success' or undefined (backward compat)
|
|
47
|
+
const happyPathSamples = samples.filter(s => s.expectedOutcome === 'success' || s.expectedOutcome === undefined);
|
|
48
|
+
// Validation tests: expectedOutcome === 'error'
|
|
49
|
+
const validationTestSamples = samples.filter(s => s.expectedOutcome === 'error');
|
|
50
|
+
// Count validation samples that correctly rejected (error as expected = success)
|
|
51
|
+
const validationSuccesses = validationTestSamples.filter(s => !s.success && (s.outcomeCorrect === undefined || s.outcomeCorrect === true)).length;
|
|
52
|
+
// For confidence, only use happy path samples that succeeded
|
|
53
|
+
const successfulHappyPath = happyPathSamples.filter(s => s.success);
|
|
54
|
+
const allDurations = successfulHappyPath.map(s => s.durationMs);
|
|
55
|
+
// Handle all failures case
|
|
56
|
+
if (allDurations.length === 0) {
|
|
57
|
+
return {
|
|
58
|
+
sampleCount: 0,
|
|
59
|
+
successfulSamples: 0,
|
|
60
|
+
validationSamples: validationTestSamples.length,
|
|
61
|
+
totalTests,
|
|
62
|
+
standardDeviation: 0,
|
|
63
|
+
coefficientOfVariation: 0,
|
|
64
|
+
confidenceLevel: 'low',
|
|
65
|
+
recommendation: 'No successful happy path samples; cannot calculate performance confidence',
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
// For variance calculation, exclude the first sample (cold start warmup)
|
|
69
|
+
// This prevents JIT compilation, connection setup, and cache warming from
|
|
70
|
+
// inflating the coefficient of variation and lowering confidence scores.
|
|
71
|
+
const durationsForVariance = excludeWarmup && allDurations.length > 1
|
|
72
|
+
? allDurations.slice(1)
|
|
73
|
+
: allDurations;
|
|
74
|
+
// Calculate variance using post-warmup samples only
|
|
75
|
+
const meanForVariance = durationsForVariance.reduce((sum, d) => sum + d, 0) / durationsForVariance.length;
|
|
76
|
+
const squaredDiffs = durationsForVariance.map(d => Math.pow(d - meanForVariance, 2));
|
|
77
|
+
const variance = squaredDiffs.reduce((sum, d) => sum + d, 0) / durationsForVariance.length;
|
|
78
|
+
const standardDeviation = Math.sqrt(variance);
|
|
79
|
+
// Calculate coefficient of variation (CV = stdDev / mean)
|
|
80
|
+
// Use the post-warmup mean for consistency with variance calculation
|
|
81
|
+
// CV is undefined when mean is 0, treat as 0 (consistent)
|
|
82
|
+
const coefficientOfVariation = meanForVariance > 0 ? standardDeviation / meanForVariance : 0;
|
|
83
|
+
// Determine confidence level based on happy path sample count
|
|
84
|
+
// This fixes the bug where validation tests lowered confidence
|
|
85
|
+
const { confidenceLevel, recommendation } = determineConfidenceLevel(allDurations.length, coefficientOfVariation);
|
|
86
|
+
return {
|
|
87
|
+
sampleCount: allDurations.length, // Happy path successful samples only
|
|
88
|
+
successfulSamples: allDurations.length,
|
|
89
|
+
validationSamples: validationSuccesses, // Correctly rejected validation tests
|
|
90
|
+
totalTests,
|
|
91
|
+
standardDeviation,
|
|
92
|
+
coefficientOfVariation,
|
|
93
|
+
confidenceLevel,
|
|
94
|
+
recommendation,
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Determine confidence level based on sample count and coefficient of variation.
|
|
99
|
+
*/
|
|
100
|
+
function determineConfidenceLevel(sampleCount, coefficientOfVariation) {
|
|
101
|
+
// Check for high confidence
|
|
102
|
+
if (sampleCount >= PERFORMANCE_CONFIDENCE.HIGH.MIN_SAMPLES &&
|
|
103
|
+
coefficientOfVariation <= PERFORMANCE_CONFIDENCE.HIGH.MAX_CV) {
|
|
104
|
+
return { confidenceLevel: 'high' };
|
|
105
|
+
}
|
|
106
|
+
// Check for medium confidence
|
|
107
|
+
if (sampleCount >= PERFORMANCE_CONFIDENCE.MEDIUM.MIN_SAMPLES &&
|
|
108
|
+
coefficientOfVariation <= PERFORMANCE_CONFIDENCE.MEDIUM.MAX_CV) {
|
|
109
|
+
return { confidenceLevel: 'medium' };
|
|
110
|
+
}
|
|
111
|
+
// Low confidence - generate recommendation
|
|
112
|
+
let recommendation;
|
|
113
|
+
if (sampleCount < PERFORMANCE_CONFIDENCE.HIGH.MIN_SAMPLES) {
|
|
114
|
+
recommendation = PERFORMANCE_CONFIDENCE.RECOMMENDATIONS.LOW_SAMPLES(sampleCount, PERFORMANCE_CONFIDENCE.HIGH.MIN_SAMPLES);
|
|
115
|
+
}
|
|
116
|
+
else {
|
|
117
|
+
recommendation = PERFORMANCE_CONFIDENCE.RECOMMENDATIONS.HIGH_VARIABILITY;
|
|
118
|
+
}
|
|
119
|
+
return { confidenceLevel: 'low', recommendation };
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Calculate performance confidence from ToolPerformanceMetrics.
|
|
123
|
+
* Use this when you already have calculated metrics but need confidence.
|
|
124
|
+
*
|
|
125
|
+
* Note: This function assumes the metrics are from happy path tests only.
|
|
126
|
+
* For full validation/success separation, use calculatePerformanceConfidence with raw samples.
|
|
127
|
+
*/
|
|
128
|
+
export function calculateConfidenceFromMetrics(metrics, options) {
|
|
129
|
+
const { sampleCount, avgMs, stdDevMs } = metrics;
|
|
130
|
+
const validationSamples = options?.validationSamples ?? 0;
|
|
131
|
+
const totalTests = options?.totalTests ?? sampleCount;
|
|
132
|
+
// Handle edge cases
|
|
133
|
+
if (sampleCount === 0) {
|
|
134
|
+
return {
|
|
135
|
+
sampleCount: 0,
|
|
136
|
+
successfulSamples: 0,
|
|
137
|
+
validationSamples,
|
|
138
|
+
totalTests,
|
|
139
|
+
standardDeviation: 0,
|
|
140
|
+
coefficientOfVariation: 0,
|
|
141
|
+
confidenceLevel: 'low',
|
|
142
|
+
recommendation: PERFORMANCE_CONFIDENCE.RECOMMENDATIONS.NO_SAMPLES,
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
// Calculate coefficient of variation
|
|
146
|
+
const coefficientOfVariation = avgMs > 0 ? stdDevMs / avgMs : 0;
|
|
147
|
+
// Determine confidence level
|
|
148
|
+
const { confidenceLevel, recommendation } = determineConfidenceLevel(sampleCount, coefficientOfVariation);
|
|
149
|
+
return {
|
|
150
|
+
sampleCount,
|
|
151
|
+
successfulSamples: sampleCount, // Assumes all samples passed are successful happy path
|
|
152
|
+
validationSamples,
|
|
153
|
+
totalTests,
|
|
154
|
+
standardDeviation: stdDevMs,
|
|
155
|
+
coefficientOfVariation,
|
|
156
|
+
confidenceLevel,
|
|
157
|
+
recommendation,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Format confidence level for display.
|
|
162
|
+
*/
|
|
163
|
+
export function formatConfidenceLevel(confidence, includeIndicator = true) {
|
|
164
|
+
const label = PERFORMANCE_CONFIDENCE.LABELS[confidence.confidenceLevel];
|
|
165
|
+
const indicator = includeIndicator
|
|
166
|
+
? PERFORMANCE_CONFIDENCE.INDICATORS[confidence.confidenceLevel]
|
|
167
|
+
: '';
|
|
168
|
+
const sampleInfo = `n=${confidence.sampleCount}`;
|
|
169
|
+
if (confidence.recommendation) {
|
|
170
|
+
return `${indicator} ${label} (${sampleInfo}) - ${confidence.recommendation}`;
|
|
171
|
+
}
|
|
172
|
+
return `${indicator} ${label} (${sampleInfo})`;
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Check if performance data has sufficient confidence for reliable comparisons.
|
|
176
|
+
*/
|
|
177
|
+
export function hasReliableConfidence(confidence) {
|
|
178
|
+
return confidence.confidenceLevel === 'high' || confidence.confidenceLevel === 'medium';
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Calculate performance metrics from raw latency samples.
|
|
182
|
+
*/
|
|
183
|
+
export function calculateMetrics(samples) {
|
|
184
|
+
if (samples.length === 0) {
|
|
185
|
+
return null;
|
|
186
|
+
}
|
|
187
|
+
const toolName = samples[0].toolName;
|
|
188
|
+
const successfulSamples = samples.filter(s => s.success);
|
|
189
|
+
const durations = successfulSamples.map(s => s.durationMs).sort((a, b) => a - b);
|
|
190
|
+
if (durations.length === 0) {
|
|
191
|
+
// All calls failed
|
|
192
|
+
const confidence = calculatePerformanceConfidence(samples);
|
|
193
|
+
return {
|
|
194
|
+
toolName,
|
|
195
|
+
p50Ms: 0,
|
|
196
|
+
p95Ms: 0,
|
|
197
|
+
p99Ms: 0,
|
|
198
|
+
successRate: 0,
|
|
199
|
+
sampleCount: samples.length,
|
|
200
|
+
avgMs: 0,
|
|
201
|
+
minMs: 0,
|
|
202
|
+
maxMs: 0,
|
|
203
|
+
stdDevMs: 0,
|
|
204
|
+
collectedAt: new Date(),
|
|
205
|
+
confidence,
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
const p50Ms = calculatePercentile(durations, 50);
|
|
209
|
+
const p95Ms = calculatePercentile(durations, 95);
|
|
210
|
+
const p99Ms = calculatePercentile(durations, 99);
|
|
211
|
+
const avgMs = durations.reduce((sum, d) => sum + d, 0) / durations.length;
|
|
212
|
+
const minMs = durations[0];
|
|
213
|
+
const maxMs = durations[durations.length - 1];
|
|
214
|
+
// Calculate standard deviation
|
|
215
|
+
const squaredDiffs = durations.map(d => Math.pow(d - avgMs, 2));
|
|
216
|
+
const avgSquaredDiff = squaredDiffs.reduce((sum, d) => sum + d, 0) / squaredDiffs.length;
|
|
217
|
+
const stdDevMs = Math.sqrt(avgSquaredDiff);
|
|
218
|
+
// Calculate confidence from samples
|
|
219
|
+
const confidence = calculatePerformanceConfidence(samples);
|
|
220
|
+
return {
|
|
221
|
+
toolName,
|
|
222
|
+
p50Ms,
|
|
223
|
+
p95Ms,
|
|
224
|
+
p99Ms,
|
|
225
|
+
successRate: successfulSamples.length / samples.length,
|
|
226
|
+
sampleCount: samples.length,
|
|
227
|
+
avgMs,
|
|
228
|
+
minMs,
|
|
229
|
+
maxMs,
|
|
230
|
+
stdDevMs,
|
|
231
|
+
collectedAt: new Date(),
|
|
232
|
+
confidence,
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Calculate a specific percentile from sorted values.
|
|
237
|
+
*/
|
|
238
|
+
function calculatePercentile(sortedValues, percentile) {
|
|
239
|
+
if (sortedValues.length === 0) {
|
|
240
|
+
return 0;
|
|
241
|
+
}
|
|
242
|
+
if (sortedValues.length === 1) {
|
|
243
|
+
return sortedValues[0];
|
|
244
|
+
}
|
|
245
|
+
const index = (percentile / 100) * (sortedValues.length - 1);
|
|
246
|
+
const lower = Math.floor(index);
|
|
247
|
+
const upper = Math.ceil(index);
|
|
248
|
+
if (lower === upper) {
|
|
249
|
+
return sortedValues[lower];
|
|
250
|
+
}
|
|
251
|
+
// Linear interpolation
|
|
252
|
+
const fraction = index - lower;
|
|
253
|
+
return sortedValues[lower] + fraction * (sortedValues[upper] - sortedValues[lower]);
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* Create a performance baseline from metrics.
|
|
257
|
+
*/
|
|
258
|
+
export function createPerformanceBaseline(metrics, maxAllowedRegression = PERFORMANCE_TRACKING.DEFAULT_REGRESSION_THRESHOLD) {
|
|
259
|
+
return {
|
|
260
|
+
toolName: metrics.toolName,
|
|
261
|
+
baselineP50: metrics.p50Ms,
|
|
262
|
+
baselineP95: metrics.p95Ms,
|
|
263
|
+
baselineP99: metrics.p99Ms,
|
|
264
|
+
baselineSuccessRate: metrics.successRate,
|
|
265
|
+
maxAllowedRegression,
|
|
266
|
+
establishedAt: new Date(),
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
/**
|
|
270
|
+
* Extract performance baselines from a behavioral baseline.
|
|
271
|
+
* Uses the performance metrics stored in tool fingerprints.
|
|
272
|
+
*/
|
|
273
|
+
export function extractPerformanceBaselines(baseline, regressionThreshold = PERFORMANCE_TRACKING.DEFAULT_REGRESSION_THRESHOLD) {
|
|
274
|
+
const baselines = new Map();
|
|
275
|
+
for (const tool of baseline.tools) {
|
|
276
|
+
// Only create baseline if performance data exists
|
|
277
|
+
if (tool.baselineP50Ms !== undefined && tool.baselineP95Ms !== undefined) {
|
|
278
|
+
baselines.set(tool.name, {
|
|
279
|
+
toolName: tool.name,
|
|
280
|
+
baselineP50: tool.baselineP50Ms,
|
|
281
|
+
baselineP95: tool.baselineP95Ms,
|
|
282
|
+
baselineP99: tool.baselineP95Ms * 1.2, // Estimate p99 from p95 if not stored
|
|
283
|
+
baselineSuccessRate: tool.baselineSuccessRate ?? 1.0,
|
|
284
|
+
maxAllowedRegression: regressionThreshold,
|
|
285
|
+
establishedAt: baseline.createdAt,
|
|
286
|
+
});
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
return baselines;
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* Compare current metrics against baseline.
|
|
293
|
+
*/
|
|
294
|
+
export function comparePerformance(current, baseline, regressionThreshold = PERFORMANCE_TRACKING.DEFAULT_REGRESSION_THRESHOLD) {
|
|
295
|
+
// Get confidence from current metrics, or calculate if not present
|
|
296
|
+
const confidence = current.confidence ?? calculateConfidenceFromMetrics(current);
|
|
297
|
+
const isReliable = hasReliableConfidence(confidence);
|
|
298
|
+
// No baseline - can't compare
|
|
299
|
+
if (!baseline) {
|
|
300
|
+
return {
|
|
301
|
+
toolName: current.toolName,
|
|
302
|
+
current,
|
|
303
|
+
baseline: undefined,
|
|
304
|
+
trend: 'stable',
|
|
305
|
+
p50RegressionPercent: null,
|
|
306
|
+
p95RegressionPercent: null,
|
|
307
|
+
p99RegressionPercent: null,
|
|
308
|
+
hasRegression: false,
|
|
309
|
+
severity: 'none',
|
|
310
|
+
summary: `No baseline for "${current.toolName}" - metrics recorded for future comparison.`,
|
|
311
|
+
confidence,
|
|
312
|
+
isReliable,
|
|
313
|
+
};
|
|
314
|
+
}
|
|
315
|
+
// Calculate regression percentages
|
|
316
|
+
const p50Regression = calculateRegression(baseline.baselineP50, current.p50Ms);
|
|
317
|
+
const p95Regression = calculateRegression(baseline.baselineP95, current.p95Ms);
|
|
318
|
+
const p99Regression = calculateRegression(baseline.baselineP99, current.p99Ms);
|
|
319
|
+
// Determine trend (based on p50 as primary metric)
|
|
320
|
+
const trend = determineTrend(p50Regression);
|
|
321
|
+
// Check for regression
|
|
322
|
+
const maxRegression = baseline.maxAllowedRegression ?? regressionThreshold;
|
|
323
|
+
const hasRegression = p50Regression !== null && p50Regression > maxRegression ||
|
|
324
|
+
p95Regression !== null && p95Regression > maxRegression;
|
|
325
|
+
// Determine severity
|
|
326
|
+
const severity = determinePerformanceSeverity(p50Regression, p95Regression, maxRegression);
|
|
327
|
+
// Generate summary (include confidence note if low)
|
|
328
|
+
let summary = generateComparisonSummary(current.toolName, trend, p50Regression, p95Regression, hasRegression, maxRegression);
|
|
329
|
+
// Add confidence warning if low
|
|
330
|
+
if (!isReliable && confidence.recommendation) {
|
|
331
|
+
summary += ` (Low confidence: ${confidence.recommendation})`;
|
|
332
|
+
}
|
|
333
|
+
return {
|
|
334
|
+
toolName: current.toolName,
|
|
335
|
+
current,
|
|
336
|
+
baseline,
|
|
337
|
+
trend,
|
|
338
|
+
p50RegressionPercent: p50Regression,
|
|
339
|
+
p95RegressionPercent: p95Regression,
|
|
340
|
+
p99RegressionPercent: p99Regression,
|
|
341
|
+
hasRegression,
|
|
342
|
+
severity,
|
|
343
|
+
summary,
|
|
344
|
+
confidence,
|
|
345
|
+
isReliable,
|
|
346
|
+
};
|
|
347
|
+
}
|
|
348
|
+
/**
|
|
349
|
+
* Calculate regression percentage.
|
|
350
|
+
* Returns positive for slower (regression), negative for faster (improvement).
|
|
351
|
+
*/
|
|
352
|
+
function calculateRegression(baseline, current) {
|
|
353
|
+
if (baseline === 0) {
|
|
354
|
+
return null;
|
|
355
|
+
}
|
|
356
|
+
return (current - baseline) / baseline;
|
|
357
|
+
}
|
|
358
|
+
/**
|
|
359
|
+
* Determine latency trend from regression percentage.
|
|
360
|
+
*/
|
|
361
|
+
function determineTrend(regression) {
|
|
362
|
+
if (regression === null) {
|
|
363
|
+
return 'stable';
|
|
364
|
+
}
|
|
365
|
+
if (regression <= PERFORMANCE_TRACKING.TREND_THRESHOLDS.improving) {
|
|
366
|
+
return 'improving';
|
|
367
|
+
}
|
|
368
|
+
if (regression >= PERFORMANCE_TRACKING.TREND_THRESHOLDS.degrading) {
|
|
369
|
+
return 'degrading';
|
|
370
|
+
}
|
|
371
|
+
return 'stable';
|
|
372
|
+
}
|
|
373
|
+
/**
|
|
374
|
+
* Determine severity based on regression percentages.
|
|
375
|
+
*/
|
|
376
|
+
function determinePerformanceSeverity(p50Regression, p95Regression, threshold) {
|
|
377
|
+
// Check if any significant regression
|
|
378
|
+
const maxRegression = Math.max(p50Regression ?? 0, p95Regression ?? 0);
|
|
379
|
+
if (maxRegression > threshold) {
|
|
380
|
+
return 'breaking';
|
|
381
|
+
}
|
|
382
|
+
if (maxRegression > PERFORMANCE_TRACKING.WARNING_THRESHOLD) {
|
|
383
|
+
return 'warning';
|
|
384
|
+
}
|
|
385
|
+
if (maxRegression > 0) {
|
|
386
|
+
return 'info';
|
|
387
|
+
}
|
|
388
|
+
return 'none';
|
|
389
|
+
}
|
|
390
|
+
/**
|
|
391
|
+
* Generate human-readable comparison summary.
|
|
392
|
+
*/
|
|
393
|
+
function generateComparisonSummary(toolName, trend, p50Regression, p95Regression, hasRegression, threshold) {
|
|
394
|
+
if (p50Regression === null) {
|
|
395
|
+
return `No baseline performance data for "${toolName}".`;
|
|
396
|
+
}
|
|
397
|
+
const p50Percent = (p50Regression * 100).toFixed(1);
|
|
398
|
+
const p95Percent = p95Regression !== null ? (p95Regression * 100).toFixed(1) : 'N/A';
|
|
399
|
+
const thresholdPercent = (threshold * 100).toFixed(0);
|
|
400
|
+
if (hasRegression) {
|
|
401
|
+
return `"${toolName}" performance REGRESSION: p50 ${p50Percent}% slower, p95 ${p95Percent}% slower (threshold: ${thresholdPercent}%)`;
|
|
402
|
+
}
|
|
403
|
+
if (trend === 'improving') {
|
|
404
|
+
return `"${toolName}" performance improved: p50 ${Math.abs(parseFloat(p50Percent))}% faster`;
|
|
405
|
+
}
|
|
406
|
+
if (trend === 'degrading') {
|
|
407
|
+
return `"${toolName}" performance slightly degraded: p50 ${p50Percent}% slower (within threshold)`;
|
|
408
|
+
}
|
|
409
|
+
return `"${toolName}" performance stable: p50 ${p50Percent}% change`;
|
|
410
|
+
}
|
|
411
|
+
/**
|
|
412
|
+
* Generate a complete performance report comparing current and baseline.
|
|
413
|
+
*/
|
|
414
|
+
export function generatePerformanceReport(currentMetrics, baselines, regressionThreshold = PERFORMANCE_TRACKING.DEFAULT_REGRESSION_THRESHOLD) {
|
|
415
|
+
const comparisons = [];
|
|
416
|
+
let regressionCount = 0;
|
|
417
|
+
let improvementCount = 0;
|
|
418
|
+
let stableCount = 0;
|
|
419
|
+
let lowConfidenceCount = 0;
|
|
420
|
+
let reliableRegressionCount = 0;
|
|
421
|
+
const lowConfidenceTools = [];
|
|
422
|
+
// Compare each tool
|
|
423
|
+
for (const [toolName, metrics] of currentMetrics) {
|
|
424
|
+
const baseline = baselines.get(toolName);
|
|
425
|
+
const comparison = comparePerformance(metrics, baseline, regressionThreshold);
|
|
426
|
+
comparisons.push(comparison);
|
|
427
|
+
if (comparison.hasRegression) {
|
|
428
|
+
regressionCount++;
|
|
429
|
+
if (comparison.isReliable) {
|
|
430
|
+
reliableRegressionCount++;
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
else if (comparison.trend === 'improving') {
|
|
434
|
+
improvementCount++;
|
|
435
|
+
}
|
|
436
|
+
else {
|
|
437
|
+
stableCount++;
|
|
438
|
+
}
|
|
439
|
+
// Track low confidence tools
|
|
440
|
+
if (!comparison.isReliable) {
|
|
441
|
+
lowConfidenceCount++;
|
|
442
|
+
lowConfidenceTools.push(toolName);
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
// Determine overall trend
|
|
446
|
+
let overallTrend = 'stable';
|
|
447
|
+
if (regressionCount > 0) {
|
|
448
|
+
overallTrend = 'degrading';
|
|
449
|
+
}
|
|
450
|
+
else if (improvementCount > stableCount) {
|
|
451
|
+
overallTrend = 'improving';
|
|
452
|
+
}
|
|
453
|
+
// Determine overall severity
|
|
454
|
+
const overallSeverity = comparisons.reduce((max, c) => {
|
|
455
|
+
const severityOrder = ['none', 'info', 'warning', 'breaking'];
|
|
456
|
+
return severityOrder.indexOf(c.severity) > severityOrder.indexOf(max) ? c.severity : max;
|
|
457
|
+
}, 'none');
|
|
458
|
+
// Generate summary
|
|
459
|
+
let summary = generateReportSummary(regressionCount, improvementCount, stableCount, comparisons.length);
|
|
460
|
+
// Add confidence summary if there are low confidence tools
|
|
461
|
+
if (lowConfidenceCount > 0) {
|
|
462
|
+
summary += ` (${lowConfidenceCount} tool(s) with low confidence)`;
|
|
463
|
+
}
|
|
464
|
+
return {
|
|
465
|
+
toolComparisons: comparisons,
|
|
466
|
+
regressionCount,
|
|
467
|
+
improvementCount,
|
|
468
|
+
stableCount,
|
|
469
|
+
overallTrend,
|
|
470
|
+
overallSeverity,
|
|
471
|
+
summary,
|
|
472
|
+
lowConfidenceCount,
|
|
473
|
+
lowConfidenceTools,
|
|
474
|
+
reliableRegressionCount,
|
|
475
|
+
};
|
|
476
|
+
}
|
|
477
|
+
/**
|
|
478
|
+
* Generate report summary.
|
|
479
|
+
*/
|
|
480
|
+
function generateReportSummary(regressions, improvements, stable, total) {
|
|
481
|
+
const parts = [];
|
|
482
|
+
if (regressions > 0) {
|
|
483
|
+
parts.push(`${regressions} tool(s) with performance regression`);
|
|
484
|
+
}
|
|
485
|
+
if (improvements > 0) {
|
|
486
|
+
parts.push(`${improvements} tool(s) with improved performance`);
|
|
487
|
+
}
|
|
488
|
+
if (stable > 0) {
|
|
489
|
+
parts.push(`${stable} tool(s) with stable performance`);
|
|
490
|
+
}
|
|
491
|
+
if (parts.length === 0) {
|
|
492
|
+
return `No performance data for ${total} tool(s).`;
|
|
493
|
+
}
|
|
494
|
+
return parts.join(', ') + '.';
|
|
495
|
+
}
|
|
496
|
+
/**
|
|
497
|
+
* Format performance metrics for display.
|
|
498
|
+
*/
|
|
499
|
+
export function formatMetrics(metrics) {
|
|
500
|
+
const lines = [
|
|
501
|
+
`Tool: ${metrics.toolName}`,
|
|
502
|
+
` p50: ${metrics.p50Ms.toFixed(1)}ms`,
|
|
503
|
+
` p95: ${metrics.p95Ms.toFixed(1)}ms`,
|
|
504
|
+
` p99: ${metrics.p99Ms.toFixed(1)}ms`,
|
|
505
|
+
` avg: ${metrics.avgMs.toFixed(1)}ms`,
|
|
506
|
+
` success: ${(metrics.successRate * 100).toFixed(1)}%`,
|
|
507
|
+
` samples: ${metrics.sampleCount}`,
|
|
508
|
+
];
|
|
509
|
+
// Add confidence if available
|
|
510
|
+
if (metrics.confidence) {
|
|
511
|
+
const confidenceLabel = formatConfidenceLevel(metrics.confidence, true);
|
|
512
|
+
lines.push(` confidence: ${confidenceLabel}`);
|
|
513
|
+
}
|
|
514
|
+
return lines.join('\n');
|
|
515
|
+
}
|
|
516
|
+
/**
|
|
517
|
+
* Format performance comparison for display.
|
|
518
|
+
*/
|
|
519
|
+
export function formatComparison(comparison) {
|
|
520
|
+
const lines = [
|
|
521
|
+
`Tool: ${comparison.toolName}`,
|
|
522
|
+
` Trend: ${comparison.trend.toUpperCase()}`,
|
|
523
|
+
];
|
|
524
|
+
if (comparison.p50RegressionPercent !== null) {
|
|
525
|
+
const sign = comparison.p50RegressionPercent >= 0 ? '+' : '';
|
|
526
|
+
lines.push(` p50 change: ${sign}${(comparison.p50RegressionPercent * 100).toFixed(1)}%`);
|
|
527
|
+
}
|
|
528
|
+
if (comparison.p95RegressionPercent !== null) {
|
|
529
|
+
const sign = comparison.p95RegressionPercent >= 0 ? '+' : '';
|
|
530
|
+
lines.push(` p95 change: ${sign}${(comparison.p95RegressionPercent * 100).toFixed(1)}%`);
|
|
531
|
+
}
|
|
532
|
+
// Add confidence information
|
|
533
|
+
if (comparison.confidence) {
|
|
534
|
+
const confidenceLabel = formatConfidenceLevel(comparison.confidence, true);
|
|
535
|
+
lines.push(` Confidence: ${confidenceLabel}`);
|
|
536
|
+
}
|
|
537
|
+
if (comparison.hasRegression) {
|
|
538
|
+
if (!comparison.isReliable) {
|
|
539
|
+
lines.push(` ⚠️ REGRESSION DETECTED (low confidence - may not be reliable)`);
|
|
540
|
+
}
|
|
541
|
+
else {
|
|
542
|
+
lines.push(` ⚠️ REGRESSION DETECTED`);
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
return lines.join('\n');
|
|
546
|
+
}
|
|
547
|
+
/**
|
|
548
|
+
* Check if metrics indicate acceptable performance.
|
|
549
|
+
*/
|
|
550
|
+
export function isPerformanceAcceptable(comparison, failOnRegression = false) {
|
|
551
|
+
if (!failOnRegression) {
|
|
552
|
+
return true;
|
|
553
|
+
}
|
|
554
|
+
return !comparison.hasRegression;
|
|
555
|
+
}
|
|
556
|
+
/**
|
|
557
|
+
* Aggregate multiple samples into metrics grouped by tool.
|
|
558
|
+
*/
|
|
559
|
+
export function aggregateSamplesByTool(samples) {
|
|
560
|
+
const metrics = new Map();
|
|
561
|
+
// Group samples by tool
|
|
562
|
+
const groupedSamples = new Map();
|
|
563
|
+
for (const sample of samples) {
|
|
564
|
+
const existing = groupedSamples.get(sample.toolName) || [];
|
|
565
|
+
existing.push(sample);
|
|
566
|
+
groupedSamples.set(sample.toolName, existing);
|
|
567
|
+
}
|
|
568
|
+
// Calculate metrics for each tool
|
|
569
|
+
for (const [toolName, toolSamples] of groupedSamples) {
|
|
570
|
+
const toolMetrics = calculateMetrics(toolSamples);
|
|
571
|
+
if (toolMetrics) {
|
|
572
|
+
metrics.set(toolName, toolMetrics);
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
return metrics;
|
|
576
|
+
}
|
|
577
|
+
//# sourceMappingURL=performance-tracker.js.map
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Enhanced GitHub PR comment generation for schema changes.
|
|
3
|
+
*
|
|
4
|
+
* This module generates detailed, actionable PR comments that help reviewers
|
|
5
|
+
* understand the impact of schema changes on downstream consumers.
|
|
6
|
+
*/
|
|
7
|
+
import type { BehavioralDiff, ToolDiff, ChangeSeverity } from './types.js';
|
|
8
|
+
import type { MigrationGuide } from './migration-generator.js';
|
|
9
|
+
/**
|
|
10
|
+
* Severity badge configuration.
|
|
11
|
+
*/
|
|
12
|
+
export type BadgeColor = 'red' | 'orange' | 'blue' | 'green';
|
|
13
|
+
/**
|
|
14
|
+
* A section in the PR comment.
|
|
15
|
+
*/
|
|
16
|
+
export interface CommentSection {
|
|
17
|
+
title: string;
|
|
18
|
+
content: string;
|
|
19
|
+
priority: number;
|
|
20
|
+
collapsed?: boolean;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Affected workflow information for PR comments.
|
|
24
|
+
*/
|
|
25
|
+
export interface AffectedWorkflow {
|
|
26
|
+
name: string;
|
|
27
|
+
description: string;
|
|
28
|
+
affectedTools: string[];
|
|
29
|
+
severity: ChangeSeverity;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Complete PR comment structure.
|
|
33
|
+
*/
|
|
34
|
+
export interface PRComment {
|
|
35
|
+
/** Main title/header */
|
|
36
|
+
title: string;
|
|
37
|
+
/** Summary of changes */
|
|
38
|
+
summary: string;
|
|
39
|
+
/** Severity badge */
|
|
40
|
+
badge: {
|
|
41
|
+
label: string;
|
|
42
|
+
color: BadgeColor;
|
|
43
|
+
message: string;
|
|
44
|
+
};
|
|
45
|
+
/** Detailed sections */
|
|
46
|
+
sections: CommentSection[];
|
|
47
|
+
/** Quick action items */
|
|
48
|
+
actionItems: string[];
|
|
49
|
+
/** Footer with metadata */
|
|
50
|
+
footer: string;
|
|
51
|
+
/** Full rendered markdown */
|
|
52
|
+
markdown: string;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Configuration for PR comment generation.
|
|
56
|
+
*/
|
|
57
|
+
export interface PRCommentConfig {
|
|
58
|
+
/** Maximum tools to show in detail */
|
|
59
|
+
maxDetailedTools?: number;
|
|
60
|
+
/** Maximum changes per tool */
|
|
61
|
+
maxChangesPerTool?: number;
|
|
62
|
+
/** Maximum affected workflows to show */
|
|
63
|
+
maxAffectedWorkflows?: number;
|
|
64
|
+
/** Include migration examples */
|
|
65
|
+
includeMigrationExamples?: boolean;
|
|
66
|
+
/** Maximum migration examples */
|
|
67
|
+
maxMigrationExamples?: number;
|
|
68
|
+
/** Include collapsible sections */
|
|
69
|
+
useCollapsibleSections?: boolean;
|
|
70
|
+
/** Repository URL for linking */
|
|
71
|
+
repositoryUrl?: string;
|
|
72
|
+
/** Base branch name */
|
|
73
|
+
baseBranch?: string;
|
|
74
|
+
/** Head branch name */
|
|
75
|
+
headBranch?: string;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Get badge color for severity level.
|
|
79
|
+
*/
|
|
80
|
+
export declare function getBadgeColor(severity: ChangeSeverity): BadgeColor;
|
|
81
|
+
/**
|
|
82
|
+
* Generate a shields.io badge URL.
|
|
83
|
+
*/
|
|
84
|
+
export declare function generateBadgeUrl(label: string, message: string, color: BadgeColor): string;
|
|
85
|
+
/**
|
|
86
|
+
* Generate a markdown badge.
|
|
87
|
+
*/
|
|
88
|
+
export declare function generateBadgeMarkdown(label: string, message: string, color: BadgeColor): string;
|
|
89
|
+
/**
|
|
90
|
+
* Generate a complete PR comment for a behavioral diff.
|
|
91
|
+
*/
|
|
92
|
+
export declare function generatePRComment(diff: BehavioralDiff, config?: PRCommentConfig, migrationGuide?: MigrationGuide, affectedWorkflows?: AffectedWorkflow[]): PRComment;
|
|
93
|
+
/**
|
|
94
|
+
* Generate a compact PR comment for simple diffs.
|
|
95
|
+
*/
|
|
96
|
+
export declare function generateCompactPRComment(diff: BehavioralDiff): string;
|
|
97
|
+
/**
|
|
98
|
+
* Generate a status check summary for CI.
|
|
99
|
+
*/
|
|
100
|
+
export declare function generateCIStatusSummary(diff: BehavioralDiff): {
|
|
101
|
+
conclusion: 'success' | 'failure' | 'neutral';
|
|
102
|
+
title: string;
|
|
103
|
+
summary: string;
|
|
104
|
+
};
|
|
105
|
+
/**
|
|
106
|
+
* Generate diff visualization as a table.
|
|
107
|
+
*/
|
|
108
|
+
export declare function generateDiffTable(toolDiffs: ToolDiff[]): string;
|
|
109
|
+
/**
|
|
110
|
+
* Determine if a PR comment should block merge.
|
|
111
|
+
*/
|
|
112
|
+
export declare function shouldBlockMerge(diff: BehavioralDiff, strictMode?: boolean): boolean;
|
|
113
|
+
/**
|
|
114
|
+
* Get emoji for severity level.
|
|
115
|
+
*/
|
|
116
|
+
export declare function getSeverityEmoji(severity: ChangeSeverity): string;
|
|
117
|
+
//# sourceMappingURL=pr-comment-generator.d.ts.map
|