npm - @dotsetlabs/bellwether - Versions diffs - 0.10.0 - Mend

@dotsetlabs/bellwether 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (403) hide show

package/CHANGELOG.md +291 -0
package/LICENSE +21 -0
package/README.md +739 -0
package/dist/auth/credentials.d.ts +64 -0
package/dist/auth/credentials.js +218 -0
package/dist/auth/index.d.ts +6 -0
package/dist/auth/index.js +6 -0
package/dist/auth/keychain.d.ts +64 -0
package/dist/auth/keychain.js +268 -0
package/dist/baseline/ab-testing.d.ts +80 -0
package/dist/baseline/ab-testing.js +236 -0
package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
package/dist/baseline/ai-compatibility-scorer.js +606 -0
package/dist/baseline/calibration.d.ts +77 -0
package/dist/baseline/calibration.js +136 -0
package/dist/baseline/category-matching.d.ts +85 -0
package/dist/baseline/category-matching.js +289 -0
package/dist/baseline/change-impact-analyzer.d.ts +98 -0
package/dist/baseline/change-impact-analyzer.js +592 -0
package/dist/baseline/comparator.d.ts +64 -0
package/dist/baseline/comparator.js +916 -0
package/dist/baseline/confidence.d.ts +55 -0
package/dist/baseline/confidence.js +122 -0
package/dist/baseline/converter.d.ts +61 -0
package/dist/baseline/converter.js +585 -0
package/dist/baseline/dependency-analyzer.d.ts +89 -0
package/dist/baseline/dependency-analyzer.js +567 -0
package/dist/baseline/deprecation-tracker.d.ts +133 -0
package/dist/baseline/deprecation-tracker.js +322 -0
package/dist/baseline/diff.d.ts +55 -0
package/dist/baseline/diff.js +1584 -0
package/dist/baseline/documentation-scorer.d.ts +205 -0
package/dist/baseline/documentation-scorer.js +466 -0
package/dist/baseline/embeddings.d.ts +118 -0
package/dist/baseline/embeddings.js +251 -0
package/dist/baseline/error-analyzer.d.ts +198 -0
package/dist/baseline/error-analyzer.js +721 -0
package/dist/baseline/evaluation/evaluator.d.ts +42 -0
package/dist/baseline/evaluation/evaluator.js +323 -0
package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
package/dist/baseline/evaluation/golden-dataset.js +717 -0
package/dist/baseline/evaluation/index.d.ts +15 -0
package/dist/baseline/evaluation/index.js +15 -0
package/dist/baseline/evaluation/types.d.ts +186 -0
package/dist/baseline/evaluation/types.js +8 -0
package/dist/baseline/external-dependency-detector.d.ts +181 -0
package/dist/baseline/external-dependency-detector.js +524 -0
package/dist/baseline/golden-output.d.ts +162 -0
package/dist/baseline/golden-output.js +636 -0
package/dist/baseline/health-scorer.d.ts +174 -0
package/dist/baseline/health-scorer.js +451 -0
package/dist/baseline/incremental-checker.d.ts +97 -0
package/dist/baseline/incremental-checker.js +174 -0
package/dist/baseline/index.d.ts +31 -0
package/dist/baseline/index.js +42 -0
package/dist/baseline/migration-generator.d.ts +137 -0
package/dist/baseline/migration-generator.js +554 -0
package/dist/baseline/migrations.d.ts +60 -0
package/dist/baseline/migrations.js +197 -0
package/dist/baseline/performance-tracker.d.ts +214 -0
package/dist/baseline/performance-tracker.js +577 -0
package/dist/baseline/pr-comment-generator.d.ts +117 -0
package/dist/baseline/pr-comment-generator.js +546 -0
package/dist/baseline/response-fingerprint.d.ts +127 -0
package/dist/baseline/response-fingerprint.js +728 -0
package/dist/baseline/response-schema-tracker.d.ts +129 -0
package/dist/baseline/response-schema-tracker.js +420 -0
package/dist/baseline/risk-scorer.d.ts +54 -0
package/dist/baseline/risk-scorer.js +434 -0
package/dist/baseline/saver.d.ts +89 -0
package/dist/baseline/saver.js +554 -0
package/dist/baseline/scenario-generator.d.ts +151 -0
package/dist/baseline/scenario-generator.js +905 -0
package/dist/baseline/schema-compare.d.ts +86 -0
package/dist/baseline/schema-compare.js +557 -0
package/dist/baseline/schema-evolution.d.ts +189 -0
package/dist/baseline/schema-evolution.js +467 -0
package/dist/baseline/semantic.d.ts +203 -0
package/dist/baseline/semantic.js +908 -0
package/dist/baseline/synonyms.d.ts +60 -0
package/dist/baseline/synonyms.js +386 -0
package/dist/baseline/telemetry.d.ts +165 -0
package/dist/baseline/telemetry.js +294 -0
package/dist/baseline/test-pruner.d.ts +120 -0
package/dist/baseline/test-pruner.js +387 -0
package/dist/baseline/types.d.ts +449 -0
package/dist/baseline/types.js +5 -0
package/dist/baseline/version.d.ts +138 -0
package/dist/baseline/version.js +206 -0
package/dist/cache/index.d.ts +5 -0
package/dist/cache/index.js +5 -0
package/dist/cache/response-cache.d.ts +151 -0
package/dist/cache/response-cache.js +287 -0
package/dist/ci/index.d.ts +60 -0
package/dist/ci/index.js +342 -0
package/dist/cli/commands/auth.d.ts +12 -0
package/dist/cli/commands/auth.js +352 -0
package/dist/cli/commands/badge.d.ts +3 -0
package/dist/cli/commands/badge.js +74 -0
package/dist/cli/commands/baseline-accept.d.ts +15 -0
package/dist/cli/commands/baseline-accept.js +178 -0
package/dist/cli/commands/baseline-migrate.d.ts +12 -0
package/dist/cli/commands/baseline-migrate.js +164 -0
package/dist/cli/commands/baseline.d.ts +14 -0
package/dist/cli/commands/baseline.js +449 -0
package/dist/cli/commands/beta.d.ts +10 -0
package/dist/cli/commands/beta.js +231 -0
package/dist/cli/commands/check.d.ts +11 -0
package/dist/cli/commands/check.js +820 -0
package/dist/cli/commands/cloud/badge.d.ts +3 -0
package/dist/cli/commands/cloud/badge.js +74 -0
package/dist/cli/commands/cloud/diff.d.ts +6 -0
package/dist/cli/commands/cloud/diff.js +79 -0
package/dist/cli/commands/cloud/history.d.ts +6 -0
package/dist/cli/commands/cloud/history.js +102 -0
package/dist/cli/commands/cloud/link.d.ts +9 -0
package/dist/cli/commands/cloud/link.js +119 -0
package/dist/cli/commands/cloud/login.d.ts +7 -0
package/dist/cli/commands/cloud/login.js +499 -0
package/dist/cli/commands/cloud/projects.d.ts +6 -0
package/dist/cli/commands/cloud/projects.js +44 -0
package/dist/cli/commands/cloud/shared.d.ts +7 -0
package/dist/cli/commands/cloud/shared.js +42 -0
package/dist/cli/commands/cloud/teams.d.ts +8 -0
package/dist/cli/commands/cloud/teams.js +169 -0
package/dist/cli/commands/cloud/upload.d.ts +8 -0
package/dist/cli/commands/cloud/upload.js +181 -0
package/dist/cli/commands/contract.d.ts +11 -0
package/dist/cli/commands/contract.js +280 -0
package/dist/cli/commands/discover.d.ts +3 -0
package/dist/cli/commands/discover.js +82 -0
package/dist/cli/commands/eval.d.ts +9 -0
package/dist/cli/commands/eval.js +187 -0
package/dist/cli/commands/explore.d.ts +11 -0
package/dist/cli/commands/explore.js +437 -0
package/dist/cli/commands/feedback.d.ts +9 -0
package/dist/cli/commands/feedback.js +174 -0
package/dist/cli/commands/golden.d.ts +12 -0
package/dist/cli/commands/golden.js +407 -0
package/dist/cli/commands/history.d.ts +10 -0
package/dist/cli/commands/history.js +202 -0
package/dist/cli/commands/init.d.ts +9 -0
package/dist/cli/commands/init.js +219 -0
package/dist/cli/commands/interview.d.ts +3 -0
package/dist/cli/commands/interview.js +903 -0
package/dist/cli/commands/link.d.ts +10 -0
package/dist/cli/commands/link.js +169 -0
package/dist/cli/commands/login.d.ts +7 -0
package/dist/cli/commands/login.js +499 -0
package/dist/cli/commands/preset.d.ts +33 -0
package/dist/cli/commands/preset.js +297 -0
package/dist/cli/commands/profile.d.ts +33 -0
package/dist/cli/commands/profile.js +286 -0
package/dist/cli/commands/registry.d.ts +11 -0
package/dist/cli/commands/registry.js +146 -0
package/dist/cli/commands/shared.d.ts +79 -0
package/dist/cli/commands/shared.js +196 -0
package/dist/cli/commands/teams.d.ts +8 -0
package/dist/cli/commands/teams.js +169 -0
package/dist/cli/commands/test.d.ts +9 -0
package/dist/cli/commands/test.js +500 -0
package/dist/cli/commands/upload.d.ts +8 -0
package/dist/cli/commands/upload.js +223 -0
package/dist/cli/commands/validate-config.d.ts +6 -0
package/dist/cli/commands/validate-config.js +35 -0
package/dist/cli/commands/verify.d.ts +11 -0
package/dist/cli/commands/verify.js +283 -0
package/dist/cli/commands/watch.d.ts +12 -0
package/dist/cli/commands/watch.js +253 -0
package/dist/cli/index.d.ts +3 -0
package/dist/cli/index.js +178 -0
package/dist/cli/interactive.d.ts +47 -0
package/dist/cli/interactive.js +216 -0
package/dist/cli/output/terminal-reporter.d.ts +19 -0
package/dist/cli/output/terminal-reporter.js +104 -0
package/dist/cli/output.d.ts +226 -0
package/dist/cli/output.js +438 -0
package/dist/cli/utils/env.d.ts +5 -0
package/dist/cli/utils/env.js +14 -0
package/dist/cli/utils/progress.d.ts +59 -0
package/dist/cli/utils/progress.js +206 -0
package/dist/cli/utils/server-context.d.ts +10 -0
package/dist/cli/utils/server-context.js +36 -0
package/dist/cloud/auth.d.ts +144 -0
package/dist/cloud/auth.js +374 -0
package/dist/cloud/client.d.ts +24 -0
package/dist/cloud/client.js +65 -0
package/dist/cloud/http-client.d.ts +38 -0
package/dist/cloud/http-client.js +215 -0
package/dist/cloud/index.d.ts +23 -0
package/dist/cloud/index.js +25 -0
package/dist/cloud/mock-client.d.ts +107 -0
package/dist/cloud/mock-client.js +545 -0
package/dist/cloud/types.d.ts +515 -0
package/dist/cloud/types.js +15 -0
package/dist/config/defaults.d.ts +160 -0
package/dist/config/defaults.js +169 -0
package/dist/config/loader.d.ts +24 -0
package/dist/config/loader.js +122 -0
package/dist/config/template.d.ts +42 -0
package/dist/config/template.js +647 -0
package/dist/config/validator.d.ts +2112 -0
package/dist/config/validator.js +658 -0
package/dist/constants/cloud.d.ts +107 -0
package/dist/constants/cloud.js +110 -0
package/dist/constants/core.d.ts +521 -0
package/dist/constants/core.js +556 -0
package/dist/constants/testing.d.ts +1283 -0
package/dist/constants/testing.js +1568 -0
package/dist/constants.d.ts +10 -0
package/dist/constants.js +10 -0
package/dist/contract/index.d.ts +6 -0
package/dist/contract/index.js +5 -0
package/dist/contract/validator.d.ts +177 -0
package/dist/contract/validator.js +574 -0
package/dist/cost/index.d.ts +6 -0
package/dist/cost/index.js +5 -0
package/dist/cost/tracker.d.ts +134 -0
package/dist/cost/tracker.js +313 -0
package/dist/discovery/discovery.d.ts +16 -0
package/dist/discovery/discovery.js +173 -0
package/dist/discovery/types.d.ts +51 -0
package/dist/discovery/types.js +2 -0
package/dist/docs/agents.d.ts +3 -0
package/dist/docs/agents.js +995 -0
package/dist/docs/contract.d.ts +51 -0
package/dist/docs/contract.js +1681 -0
package/dist/docs/generator.d.ts +4 -0
package/dist/docs/generator.js +4 -0
package/dist/docs/html-reporter.d.ts +9 -0
package/dist/docs/html-reporter.js +757 -0
package/dist/docs/index.d.ts +10 -0
package/dist/docs/index.js +11 -0
package/dist/docs/junit-reporter.d.ts +18 -0
package/dist/docs/junit-reporter.js +210 -0
package/dist/docs/report.d.ts +14 -0
package/dist/docs/report.js +44 -0
package/dist/docs/sarif-reporter.d.ts +19 -0
package/dist/docs/sarif-reporter.js +335 -0
package/dist/docs/shared.d.ts +35 -0
package/dist/docs/shared.js +162 -0
package/dist/docs/templates.d.ts +12 -0
package/dist/docs/templates.js +76 -0
package/dist/errors/index.d.ts +6 -0
package/dist/errors/index.js +6 -0
package/dist/errors/retry.d.ts +92 -0
package/dist/errors/retry.js +323 -0
package/dist/errors/types.d.ts +321 -0
package/dist/errors/types.js +584 -0
package/dist/index.d.ts +32 -0
package/dist/index.js +32 -0
package/dist/interview/dependency-resolver.d.ts +11 -0
package/dist/interview/dependency-resolver.js +32 -0
package/dist/interview/interviewer.d.ts +232 -0
package/dist/interview/interviewer.js +1939 -0
package/dist/interview/mock-response-generator.d.ts +7 -0
package/dist/interview/mock-response-generator.js +102 -0
package/dist/interview/orchestrator.d.ts +237 -0
package/dist/interview/orchestrator.js +1296 -0
package/dist/interview/rate-limiter.d.ts +15 -0
package/dist/interview/rate-limiter.js +55 -0
package/dist/interview/response-validator.d.ts +10 -0
package/dist/interview/response-validator.js +132 -0
package/dist/interview/schema-inferrer.d.ts +8 -0
package/dist/interview/schema-inferrer.js +71 -0
package/dist/interview/schema-test-generator.d.ts +71 -0
package/dist/interview/schema-test-generator.js +834 -0
package/dist/interview/smart-value-generator.d.ts +155 -0
package/dist/interview/smart-value-generator.js +554 -0
package/dist/interview/stateful-test-runner.d.ts +19 -0
package/dist/interview/stateful-test-runner.js +106 -0
package/dist/interview/types.d.ts +561 -0
package/dist/interview/types.js +2 -0
package/dist/llm/anthropic.d.ts +41 -0
package/dist/llm/anthropic.js +355 -0
package/dist/llm/client.d.ts +123 -0
package/dist/llm/client.js +42 -0
package/dist/llm/factory.d.ts +38 -0
package/dist/llm/factory.js +145 -0
package/dist/llm/fallback.d.ts +140 -0
package/dist/llm/fallback.js +379 -0
package/dist/llm/index.d.ts +18 -0
package/dist/llm/index.js +15 -0
package/dist/llm/ollama.d.ts +37 -0
package/dist/llm/ollama.js +330 -0
package/dist/llm/openai.d.ts +25 -0
package/dist/llm/openai.js +320 -0
package/dist/llm/token-budget.d.ts +161 -0
package/dist/llm/token-budget.js +395 -0
package/dist/logging/logger.d.ts +70 -0
package/dist/logging/logger.js +130 -0
package/dist/metrics/collector.d.ts +106 -0
package/dist/metrics/collector.js +547 -0
package/dist/metrics/index.d.ts +7 -0
package/dist/metrics/index.js +7 -0
package/dist/metrics/prometheus.d.ts +20 -0
package/dist/metrics/prometheus.js +241 -0
package/dist/metrics/types.d.ts +209 -0
package/dist/metrics/types.js +5 -0
package/dist/persona/builtins.d.ts +54 -0
package/dist/persona/builtins.js +219 -0
package/dist/persona/index.d.ts +8 -0
package/dist/persona/index.js +8 -0
package/dist/persona/loader.d.ts +30 -0
package/dist/persona/loader.js +190 -0
package/dist/persona/types.d.ts +144 -0
package/dist/persona/types.js +5 -0
package/dist/persona/validation.d.ts +94 -0
package/dist/persona/validation.js +332 -0
package/dist/prompts/index.d.ts +5 -0
package/dist/prompts/index.js +5 -0
package/dist/prompts/templates.d.ts +180 -0
package/dist/prompts/templates.js +431 -0
package/dist/registry/client.d.ts +49 -0
package/dist/registry/client.js +191 -0
package/dist/registry/index.d.ts +7 -0
package/dist/registry/index.js +6 -0
package/dist/registry/types.d.ts +140 -0
package/dist/registry/types.js +6 -0
package/dist/scenarios/evaluator.d.ts +43 -0
package/dist/scenarios/evaluator.js +206 -0
package/dist/scenarios/index.d.ts +10 -0
package/dist/scenarios/index.js +9 -0
package/dist/scenarios/loader.d.ts +20 -0
package/dist/scenarios/loader.js +285 -0
package/dist/scenarios/types.d.ts +153 -0
package/dist/scenarios/types.js +8 -0
package/dist/security/index.d.ts +17 -0
package/dist/security/index.js +18 -0
package/dist/security/payloads.d.ts +61 -0
package/dist/security/payloads.js +268 -0
package/dist/security/security-tester.d.ts +42 -0
package/dist/security/security-tester.js +582 -0
package/dist/security/types.d.ts +166 -0
package/dist/security/types.js +8 -0
package/dist/transport/base-transport.d.ts +59 -0
package/dist/transport/base-transport.js +38 -0
package/dist/transport/http-transport.d.ts +67 -0
package/dist/transport/http-transport.js +238 -0
package/dist/transport/mcp-client.d.ts +141 -0
package/dist/transport/mcp-client.js +496 -0
package/dist/transport/sse-transport.d.ts +88 -0
package/dist/transport/sse-transport.js +316 -0
package/dist/transport/stdio-transport.d.ts +43 -0
package/dist/transport/stdio-transport.js +238 -0
package/dist/transport/types.d.ts +125 -0
package/dist/transport/types.js +16 -0
package/dist/utils/concurrency.d.ts +123 -0
package/dist/utils/concurrency.js +213 -0
package/dist/utils/formatters.d.ts +16 -0
package/dist/utils/formatters.js +37 -0
package/dist/utils/index.d.ts +8 -0
package/dist/utils/index.js +8 -0
package/dist/utils/jsonpath.d.ts +87 -0
package/dist/utils/jsonpath.js +326 -0
package/dist/utils/markdown.d.ts +113 -0
package/dist/utils/markdown.js +265 -0
package/dist/utils/network.d.ts +14 -0
package/dist/utils/network.js +17 -0
package/dist/utils/sanitize.d.ts +92 -0
package/dist/utils/sanitize.js +191 -0
package/dist/utils/semantic.d.ts +194 -0
package/dist/utils/semantic.js +1051 -0
package/dist/utils/smart-truncate.d.ts +94 -0
package/dist/utils/smart-truncate.js +361 -0
package/dist/utils/timeout.d.ts +153 -0
package/dist/utils/timeout.js +205 -0
package/dist/utils/yaml-parser.d.ts +58 -0
package/dist/utils/yaml-parser.js +86 -0
package/dist/validation/index.d.ts +32 -0
package/dist/validation/index.js +32 -0
package/dist/validation/semantic-test-generator.d.ts +50 -0
package/dist/validation/semantic-test-generator.js +176 -0
package/dist/validation/semantic-types.d.ts +66 -0
package/dist/validation/semantic-types.js +94 -0
package/dist/validation/semantic-validator.d.ts +38 -0
package/dist/validation/semantic-validator.js +340 -0
package/dist/verification/index.d.ts +6 -0
package/dist/verification/index.js +5 -0
package/dist/verification/types.d.ts +133 -0
package/dist/verification/types.js +5 -0
package/dist/verification/verifier.d.ts +30 -0
package/dist/verification/verifier.js +309 -0
package/dist/version.d.ts +19 -0
package/dist/version.js +48 -0
package/dist/workflow/auto-generator.d.ts +27 -0
package/dist/workflow/auto-generator.js +513 -0
package/dist/workflow/discovery.d.ts +40 -0
package/dist/workflow/discovery.js +195 -0
package/dist/workflow/executor.d.ts +82 -0
package/dist/workflow/executor.js +611 -0
package/dist/workflow/index.d.ts +10 -0
package/dist/workflow/index.js +10 -0
package/dist/workflow/loader.d.ts +24 -0
package/dist/workflow/loader.js +194 -0
package/dist/workflow/state-tracker.d.ts +98 -0
package/dist/workflow/state-tracker.js +424 -0
package/dist/workflow/types.d.ts +337 -0
package/dist/workflow/types.js +5 -0
package/package.json +94 -0
package/schemas/bellwether-check.schema.json +651 -0

package/dist/baseline/calibration.js ADDED Viewed

@@ -0,0 +1,136 @@
+/**
+ * Confidence Calibration for Drift Detection
+ *
+ * Calibrates raw confidence scores to match actual accuracy.
+ * A calibrated confidence of 80% means the algorithm is correct ~80% of the time
+ * when it reports that confidence level.
+ *
+ * Calibration is based on evaluation against the golden dataset.
+ */
+/**
+ * Default calibration model based on golden dataset evaluation.
+ *
+ * These values should be updated as the algorithm improves.
+ * Current baseline: v1.0.1 (50 test cases)
+ */
+export const DEFAULT_CALIBRATION_MODEL = [
+    // High confidence predictions
+    { min: 90, max: 101, calibratedAccuracy: 85, sampleCount: 12 },
+    { min: 80, max: 90, calibratedAccuracy: 75, sampleCount: 15 },
+    // Medium confidence predictions
+    { min: 70, max: 80, calibratedAccuracy: 65, sampleCount: 10 },
+    { min: 60, max: 70, calibratedAccuracy: 55, sampleCount: 8 },
+    // Low confidence predictions
+    { min: 50, max: 60, calibratedAccuracy: 45, sampleCount: 5 },
+    { min: 0, max: 50, calibratedAccuracy: 35, sampleCount: 10 },
+];
+/**
+ * Calibrate a raw confidence score to reflect actual accuracy.
+ *
+ * @param rawScore - Raw confidence score (0-100)
+ * @param model - Calibration model to use (defaults to DEFAULT_CALIBRATION_MODEL)
+ * @returns Calibrated confidence score
+ */
+export function calibrateConfidence(rawScore, model = DEFAULT_CALIBRATION_MODEL) {
+    // Find the bucket for this score
+    const bucket = model.find(b => rawScore >= b.min && rawScore < b.max);
+    if (!bucket) {
+        // Score outside all buckets, return as-is
+        return rawScore;
+    }
+    return bucket.calibratedAccuracy;
+}
+/**
+ * Format confidence score with calibration information.
+ *
+ * @param rawScore - Raw confidence score
+ * @param showRaw - Whether to show raw score alongside calibrated
+ * @returns Formatted string
+ */
+export function formatCalibratedConfidence(rawScore, showRaw = false) {
+    const calibrated = calibrateConfidence(rawScore);
+    if (showRaw && calibrated !== rawScore) {
+        return `${calibrated}% (raw: ${rawScore}%)`;
+    }
+    return `${calibrated}%`;
+}
+/**
+ * Get confidence label based on calibrated score.
+ */
+export function getCalibratedConfidenceLabel(rawScore) {
+    const calibrated = calibrateConfidence(rawScore);
+    if (calibrated >= 75)
+        return 'high';
+    if (calibrated >= 55)
+        return 'medium';
+    if (calibrated >= 40)
+        return 'low';
+    return 'very-low';
+}
+/**
+ * Check if a calibrated confidence meets a threshold.
+ *
+ * @param rawScore - Raw confidence score
+ * @param threshold - Minimum required calibrated confidence
+ * @returns True if calibrated confidence meets threshold
+ */
+export function meetsCalibratedThreshold(rawScore, threshold) {
+    return calibrateConfidence(rawScore) >= threshold;
+}
+/**
+ * Update calibration model based on evaluation results.
+ * This recalculates accuracy for each bucket from test results.
+ *
+ * @param results - Array of {predictedConfidence, wasCorrect} pairs
+ * @returns Updated calibration model
+ */
+export function updateCalibrationModel(results) {
+    const bucketRanges = [
+        { min: 90, max: 101 },
+        { min: 80, max: 90 },
+        { min: 70, max: 80 },
+        { min: 60, max: 70 },
+        { min: 50, max: 60 },
+        { min: 0, max: 50 },
+    ];
+    return bucketRanges.map(range => {
+        const bucketResults = results.filter(r => r.predictedConfidence >= range.min && r.predictedConfidence < range.max);
+        if (bucketResults.length === 0) {
+            return {
+                ...range,
+                calibratedAccuracy: (range.min + range.max) / 2,
+                sampleCount: 0,
+            };
+        }
+        const correctCount = bucketResults.filter(r => r.wasCorrect).length;
+        const accuracy = Math.round((correctCount / bucketResults.length) * 100);
+        return {
+            ...range,
+            calibratedAccuracy: accuracy,
+            sampleCount: bucketResults.length,
+        };
+    });
+}
+/**
+ * Calculate calibration error (ECE - Expected Calibration Error).
+ * Lower is better. 0 = perfectly calibrated.
+ *
+ * @param model - Calibration model
+ * @returns ECE as a percentage (0-100)
+ */
+export function calculateCalibrationError(model) {
+    let totalError = 0;
+    let totalSamples = 0;
+    for (const bucket of model) {
+        if (bucket.sampleCount > 0) {
+            const midpoint = (bucket.min + bucket.max) / 2;
+            const error = Math.abs(midpoint - bucket.calibratedAccuracy);
+            totalError += error * bucket.sampleCount;
+            totalSamples += bucket.sampleCount;
+        }
+    }
+    if (totalSamples === 0)
+        return 0;
+    return Math.round(totalError / totalSamples);
+}
+//# sourceMappingURL=calibration.js.map

package/dist/baseline/category-matching.d.ts ADDED Viewed

@@ -0,0 +1,85 @@
+/**
+ * Category Matching and Relationship Scoring
+ *
+ * Provides multi-category detection and relationship scoring for improved
+ * recall in semantic matching. Instead of first-match-wins, this module
+ * extracts ALL matching categories and scores relationships between them.
+ *
+ * Key improvements over single-category matching:
+ * 1. Multi-category detection - finds all matching categories
+ * 2. Relationship scoring - related categories get partial credit
+ * 3. Best-match selection - finds highest-confidence category pair
+ */
+/**
+ * Result of category extraction with confidence.
+ */
+export interface CategoryMatch {
+    category: string;
+    confidence: number;
+    matchedKeywords: string[];
+}
+/**
+ * Category relationship groups.
+ * Categories in the same group are related and should get partial credit.
+ *
+ * IMPORTANT (v1.3.0): Groups are now more conservative to prevent false positives.
+ * - Authentication and Authorization are NOT grouped (they're different concerns)
+ * - Injection types are more narrowly grouped
+ * - Only truly similar vulnerabilities are grouped
+ */
+export declare const SECURITY_CATEGORY_GROUPS: Record<string, string[]>;
+/**
+ * Limitation category relationship groups.
+ */
+export declare const LIMITATION_CATEGORY_GROUPS: Record<string, string[]>;
+/**
+ * Extract ALL matching security categories from text with confidence scores.
+ * Unlike single-category extraction, this returns all matches ranked by confidence.
+ */
+export declare function extractSecurityCategories(text: string): CategoryMatch[];
+/**
+ * Extract ALL matching limitation categories from text with confidence scores.
+ */
+export declare function extractLimitationCategories(text: string): CategoryMatch[];
+/**
+ * Calculate relationship score between two security categories.
+ * Returns 0-100 where:
+ * - 100: Same category
+ * - 70-90: Categories in same group
+ * - 40-60: Related categories
+ * - 0-30: Unrelated categories
+ */
+export declare function calculateSecurityCategoryRelationship(cat1: string, cat2: string): number;
+/**
+ * Calculate relationship score between two limitation categories.
+ */
+export declare function calculateLimitationCategoryRelationship(cat1: string, cat2: string): number;
+/**
+ * Find the best category match between two texts.
+ * Returns the highest-scoring category pair and their relationship score.
+ */
+export declare function findBestSecurityCategoryMatch(categories1: CategoryMatch[], categories2: CategoryMatch[]): {
+    cat1: string;
+    cat2: string;
+    relationshipScore: number;
+    combinedConfidence: number;
+} | null;
+/**
+ * Find the best limitation category match between two texts.
+ */
+export declare function findBestLimitationCategoryMatch(categories1: CategoryMatch[], categories2: CategoryMatch[]): {
+    cat1: string;
+    cat2: string;
+    relationshipScore: number;
+    combinedConfidence: number;
+} | null;
+/**
+ * Check if two security categories are considered matching.
+ * Uses relationship scoring for partial credit.
+ */
+export declare function securityCategoriesMatch(cat1: string, cat2: string): boolean;
+/**
+ * Check if two limitation categories are considered matching.
+ */
+export declare function limitationCategoriesMatch(cat1: string, cat2: string): boolean;
+//# sourceMappingURL=category-matching.d.ts.map

package/dist/baseline/category-matching.js ADDED Viewed

@@ -0,0 +1,289 @@
+/**
+ * Category Matching and Relationship Scoring
+ *
+ * Provides multi-category detection and relationship scoring for improved
+ * recall in semantic matching. Instead of first-match-wins, this module
+ * extracts ALL matching categories and scores relationships between them.
+ *
+ * Key improvements over single-category matching:
+ * 1. Multi-category detection - finds all matching categories
+ * 2. Relationship scoring - related categories get partial credit
+ * 3. Best-match selection - finds highest-confidence category pair
+ */
+import { EXTENDED_SECURITY_KEYWORDS } from '../utils/semantic.js';
+/**
+ * Category relationship groups.
+ * Categories in the same group are related and should get partial credit.
+ *
+ * IMPORTANT (v1.3.0): Groups are now more conservative to prevent false positives.
+ * - Authentication and Authorization are NOT grouped (they're different concerns)
+ * - Injection types are more narrowly grouped
+ * - Only truly similar vulnerabilities are grouped
+ */
+export const SECURITY_CATEGORY_GROUPS = {
+    // File/path related vulnerabilities (narrowed)
+    file_access: ['path_traversal', 'file_upload'],
+    // SQL-specific injection (narrowed - SQL and command injection are different)
+    sql_issues: ['sql_injection'],
+    // Command execution issues
+    command_issues: ['command_injection'],
+    // XSS-specific
+    xss_issues: ['xss', 'output_encoding'],
+    // Access control (without auth/authz - they're different)
+    access: ['access_control'],
+    // Data handling issues (narrowed)
+    data: ['information_disclosure', 'cryptography'],
+    // Input handling
+    input_handling: ['input_validation'],
+    // Server-side issues (narrowed)
+    server_side: ['ssrf'],
+    // Deserialization standalone
+    deserialization_issues: ['deserialization'],
+    // XXE standalone (different from deserialization despite both being data handling)
+    xxe_issues: ['xxe'],
+};
+/**
+ * Limitation category relationship groups.
+ */
+export const LIMITATION_CATEGORY_GROUPS = {
+    // Resource constraints
+    resource: ['size_limit', 'memory', 'rate_limit'],
+    // Time-related
+    temporal: ['timeout', 'rate_limit'],
+    // Access-related
+    access: ['permission', 'platform'],
+    // Format-related
+    format: ['encoding', 'format'],
+};
+/**
+ * Direct category similarity scores (0-100).
+ * Used for categories that are similar but not in the same group.
+ *
+ * IMPORTANT (v1.3.0): Reduced similarity scores to prevent false positives.
+ * Authentication vs Authorization are now considered DIFFERENT (not related).
+ * Only truly similar categories get partial credit.
+ */
+const CATEGORY_SIMILARITY = {
+    path_traversal: {
+        file_upload: 40, // Reduced - only somewhat related
+        information_disclosure: 30, // Reduced
+    },
+    sql_injection: {
+        input_validation: 40, // Reduced - input validation is generic
+    },
+    xss: {
+        output_encoding: 50, // Reduced
+        input_validation: 30, // Reduced
+    },
+    // IMPORTANT: authentication and authorization are now DIFFERENT
+    // They are distinct security concerns and should not match
+    authentication: {
+        session_management: 40, // Only session mgmt is somewhat related
+    },
+    authorization: {
+        access_control: 50, // Access control is related but distinct
+    },
+    input_validation: {
+        output_encoding: 40,
+    },
+};
+/**
+ * Extract ALL matching security categories from text with confidence scores.
+ * Unlike single-category extraction, this returns all matches ranked by confidence.
+ */
+export function extractSecurityCategories(text) {
+    const matches = [];
+    const lowerText = text.toLowerCase();
+    for (const [category, keywords] of Object.entries(EXTENDED_SECURITY_KEYWORDS)) {
+        const matchedKeywords = keywords.filter(keyword => lowerText.includes(keyword));
+        if (matchedKeywords.length > 0) {
+            // Calculate confidence based on keyword match quality
+            const confidence = calculateCategoryConfidence(text, matchedKeywords, keywords);
+            matches.push({
+                category,
+                confidence,
+                matchedKeywords,
+            });
+        }
+    }
+    // Sort by confidence (highest first)
+    return matches.sort((a, b) => b.confidence - a.confidence);
+}
+/**
+ * Limitation category keywords for multi-category extraction.
+ */
+const LIMITATION_KEYWORDS = {
+    size_limit: ['size limit', 'max size', 'file size', 'mb', 'gb', 'kb', 'bytes', 'too large', 'megabytes', 'gigabytes', 'kilobytes', 'maximum'],
+    rate_limit: ['rate limit', 'throttle', 'requests per', 'quota', 'too many requests', 'rate limiting'],
+    timeout: ['timeout', 'time out', 'time limit', 'seconds', 'timed out', 'deadline', 'expires'],
+    encoding: ['encoding', 'utf-8', 'ascii', 'binary', 'charset', 'unicode'],
+    format: ['format', 'json', 'xml', 'csv', 'type', 'mime', 'content-type'],
+    permission: ['permission', 'access', 'denied', 'forbidden', 'read-only', 'write', 'privileges'],
+    platform: ['platform', 'windows', 'linux', 'macos', 'os-specific', 'operating system'],
+    dependency: ['dependency', 'requires', 'prerequisite', 'library', 'package', 'module'],
+    concurrency: ['concurrent', 'parallel', 'thread', 'lock', 'race condition', 'simultaneous'],
+    memory: ['memory', 'ram', 'heap', 'out of memory', 'memory limit'],
+    network: ['network', 'connection', 'offline', 'unreachable', 'connectivity'],
+};
+/**
+ * Extract ALL matching limitation categories from text with confidence scores.
+ */
+export function extractLimitationCategories(text) {
+    const matches = [];
+    const lowerText = text.toLowerCase();
+    for (const [category, keywords] of Object.entries(LIMITATION_KEYWORDS)) {
+        const matchedKeywords = keywords.filter(keyword => lowerText.includes(keyword));
+        if (matchedKeywords.length > 0) {
+            const confidence = calculateCategoryConfidence(text, matchedKeywords, keywords);
+            matches.push({
+                category,
+                confidence,
+                matchedKeywords,
+            });
+        }
+    }
+    return matches.sort((a, b) => b.confidence - a.confidence);
+}
+/**
+ * Calculate confidence for a category match based on keyword quality.
+ */
+function calculateCategoryConfidence(text, matchedKeywords, allKeywords) {
+    if (matchedKeywords.length === 0)
+        return 0;
+    // Base confidence from keyword count
+    const keywordRatio = matchedKeywords.length / Math.min(allKeywords.length, 5);
+    let confidence = Math.min(keywordRatio * 60, 60); // Max 60 from keyword count
+    // Bonus for longer/more specific keywords
+    const avgKeywordLength = matchedKeywords.reduce((sum, k) => sum + k.length, 0) / matchedKeywords.length;
+    if (avgKeywordLength > 10)
+        confidence += 20;
+    else if (avgKeywordLength > 5)
+        confidence += 10;
+    // Bonus for multiple distinct keywords
+    if (matchedKeywords.length >= 3)
+        confidence += 15;
+    else if (matchedKeywords.length >= 2)
+        confidence += 10;
+    // Penalty if text is very long but few keywords matched
+    const textLength = text.length;
+    if (textLength > 200 && matchedKeywords.length === 1) {
+        confidence -= 10;
+    }
+    return Math.min(Math.max(confidence, 10), 100);
+}
+/**
+ * Calculate relationship score between two security categories.
+ * Returns 0-100 where:
+ * - 100: Same category
+ * - 70-90: Categories in same group
+ * - 40-60: Related categories
+ * - 0-30: Unrelated categories
+ */
+export function calculateSecurityCategoryRelationship(cat1, cat2) {
+    if (cat1 === cat2)
+        return 100;
+    // Check direct similarity scores
+    const directScore = CATEGORY_SIMILARITY[cat1]?.[cat2] ?? CATEGORY_SIMILARITY[cat2]?.[cat1];
+    if (directScore !== undefined)
+        return directScore;
+    // Check if in same group
+    for (const groupCategories of Object.values(SECURITY_CATEGORY_GROUPS)) {
+        if (groupCategories.includes(cat1) && groupCategories.includes(cat2)) {
+            return 70; // Same group gets 70%
+        }
+    }
+    // Unrelated
+    return 0;
+}
+/**
+ * Calculate relationship score between two limitation categories.
+ */
+export function calculateLimitationCategoryRelationship(cat1, cat2) {
+    if (cat1 === cat2)
+        return 100;
+    // Check if in same group
+    for (const groupCategories of Object.values(LIMITATION_CATEGORY_GROUPS)) {
+        if (groupCategories.includes(cat1) && groupCategories.includes(cat2)) {
+            return 70;
+        }
+    }
+    return 0;
+}
+/**
+ * Find the best category match between two texts.
+ * Returns the highest-scoring category pair and their relationship score.
+ */
+export function findBestSecurityCategoryMatch(categories1, categories2) {
+    if (categories1.length === 0 || categories2.length === 0) {
+        return null;
+    }
+    let bestMatch = null;
+    let bestScore = 0;
+    for (const c1 of categories1) {
+        for (const c2 of categories2) {
+            const relationshipScore = calculateSecurityCategoryRelationship(c1.category, c2.category);
+            if (relationshipScore > 0) {
+                // Combined score considers both category confidence and relationship
+                const combinedConfidence = Math.round((c1.confidence * 0.4 + c2.confidence * 0.4 + relationshipScore * 0.2));
+                // Prefer higher relationship scores, then higher combined confidence
+                const totalScore = relationshipScore * 100 + combinedConfidence;
+                if (totalScore > bestScore) {
+                    bestScore = totalScore;
+                    bestMatch = {
+                        cat1: c1.category,
+                        cat2: c2.category,
+                        relationshipScore,
+                        combinedConfidence,
+                    };
+                }
+            }
+        }
+    }
+    return bestMatch;
+}
+/**
+ * Find the best limitation category match between two texts.
+ */
+export function findBestLimitationCategoryMatch(categories1, categories2) {
+    if (categories1.length === 0 || categories2.length === 0) {
+        return null;
+    }
+    let bestMatch = null;
+    let bestScore = 0;
+    for (const c1 of categories1) {
+        for (const c2 of categories2) {
+            const relationshipScore = calculateLimitationCategoryRelationship(c1.category, c2.category);
+            if (relationshipScore > 0) {
+                const combinedConfidence = Math.round((c1.confidence * 0.4 + c2.confidence * 0.4 + relationshipScore * 0.2));
+                const totalScore = relationshipScore * 100 + combinedConfidence;
+                if (totalScore > bestScore) {
+                    bestScore = totalScore;
+                    bestMatch = {
+                        cat1: c1.category,
+                        cat2: c2.category,
+                        relationshipScore,
+                        combinedConfidence,
+                    };
+                }
+            }
+        }
+    }
+    return bestMatch;
+}
+/**
+ * Check if two security categories are considered matching.
+ * Uses relationship scoring for partial credit.
+ */
+export function securityCategoriesMatch(cat1, cat2) {
+    const relationshipScore = calculateSecurityCategoryRelationship(cat1, cat2);
+    // Consider matching if relationship score is 50 or higher
+    return relationshipScore >= 50;
+}
+/**
+ * Check if two limitation categories are considered matching.
+ */
+export function limitationCategoriesMatch(cat1, cat2) {
+    const relationshipScore = calculateLimitationCategoryRelationship(cat1, cat2);
+    return relationshipScore >= 50;
+}
+//# sourceMappingURL=category-matching.js.map

package/dist/baseline/change-impact-analyzer.d.ts ADDED Viewed

@@ -0,0 +1,98 @@
+/**
+ * Change Impact Analyzer
+ *
+ * Provides semantic understanding of schema changes and their impact.
+ * Goes beyond simple hash comparison to understand what actually breaks.
+ */
+import type { BehaviorChange, ChangeSeverity, ToolFingerprint, BehavioralBaseline, BehavioralDiff, WorkflowSignature } from './types.js';
+/**
+ * Type of schema change detected.
+ */
+export type SchemaChangeType = 'parameter_removed' | 'parameter_added' | 'parameter_type_changed' | 'parameter_required_added' | 'parameter_required_removed' | 'enum_value_removed' | 'enum_value_added' | 'constraint_added' | 'constraint_removed' | 'constraint_tightened' | 'constraint_relaxed' | 'description_changed' | 'default_changed' | 'format_changed';
+/**
+ * Detailed information about a single schema change.
+ */
+export interface SchemaChangeDetail {
+    type: SchemaChangeType;
+    parameterPath: string;
+    breaking: boolean;
+    before: unknown;
+    after: unknown;
+    description: string;
+}
+/**
+ * Migration complexity levels.
+ */
+export type MigrationComplexity = 'trivial' | 'simple' | 'moderate' | 'complex';
+/**
+ * Comprehensive impact analysis for a change.
+ */
+export interface ChangeImpact {
+    /** Overall severity of the change */
+    severity: ChangeSeverity;
+    /** List of affected workflow IDs */
+    affectedWorkflows: string[];
+    /** List of affected parameter paths */
+    affectedParameters: string[];
+    /** Estimated complexity to migrate */
+    migrationComplexity: MigrationComplexity;
+    /** Suggested migration approach */
+    suggestedMigration: string;
+    /** Detailed breakdown of schema changes */
+    schemaChanges: SchemaChangeDetail[];
+    /** Whether this change is backwards compatible */
+    backwardsCompatible: boolean;
+    /** Risk score (0-100) */
+    riskScore: number;
+}
+/**
+ * Impact analysis results for the entire diff.
+ */
+export interface DiffImpactAnalysis {
+    /** Overall severity of all changes */
+    overallSeverity: ChangeSeverity;
+    /** Total number of breaking changes */
+    breakingChangesCount: number;
+    /** Per-tool impact analysis */
+    toolImpacts: Map<string, ChangeImpact>;
+    /** Workflows that will fail due to changes */
+    brokenWorkflows: string[];
+    /** Overall migration complexity */
+    overallMigrationComplexity: MigrationComplexity;
+    /** Summary of all changes */
+    summary: string;
+    /** Action items for addressing the changes */
+    actionItems: ActionItem[];
+}
+/**
+ * Action item for addressing a change.
+ */
+export interface ActionItem {
+    priority: 'critical' | 'high' | 'medium' | 'low';
+    tool: string;
+    description: string;
+    suggestedAction: string;
+}
+export { CHANGE_IMPACT } from '../constants.js';
+/**
+ * Analyze the impact of changes between two tool fingerprints.
+ */
+export declare function analyzeToolChangeImpact(oldTool: ToolFingerprint, newTool: ToolFingerprint, workflows?: WorkflowSignature[]): ChangeImpact;
+/**
+ * Analyze a complete diff and provide comprehensive impact analysis.
+ */
+export declare function analyzeDiffImpact(diff: BehavioralDiff, oldBaseline: BehavioralBaseline, newBaseline: BehavioralBaseline): DiffImpactAnalysis;
+/**
+ * Analyze changes between two schemas and return detailed change information.
+ */
+export declare function analyzeSchemaChanges(oldSchema: Record<string, unknown> | undefined, newSchema: Record<string, unknown> | undefined): SchemaChangeDetail[];
+/**
+ * Check if a behavior change is actually breaking based on semantic analysis.
+ * This enhances the simple hash-based comparison with semantic understanding.
+ */
+export declare function isBreakingChange(change: BehaviorChange): boolean;
+/**
+ * Get a quick summary of breaking changes for CI output.
+ */
+export declare function getBreakingChangeSummary(analysis: DiffImpactAnalysis): string;
+//# sourceMappingURL=change-impact-analyzer.d.ts.map