observability-toolkit 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +163 -398
- package/dist/__tests__/find-constant-dedup.test.d.ts +11 -0
- package/dist/__tests__/find-constant-dedup.test.d.ts.map +1 -0
- package/dist/__tests__/find-constant-dedup.test.js +132 -0
- package/dist/__tests__/find-constant-dedup.test.js.map +1 -0
- package/dist/backends/backend-schemas.d.ts +309 -0
- package/dist/backends/backend-schemas.d.ts.map +1 -0
- package/dist/backends/backend-schemas.js +215 -0
- package/dist/backends/backend-schemas.js.map +1 -0
- package/dist/backends/cloud.d.ts +46 -0
- package/dist/backends/cloud.d.ts.map +1 -0
- package/dist/backends/cloud.js +520 -0
- package/dist/backends/cloud.js.map +1 -0
- package/dist/backends/cloud.test.d.ts +2 -0
- package/dist/backends/cloud.test.d.ts.map +1 -0
- package/dist/backends/cloud.test.js +436 -0
- package/dist/backends/cloud.test.js.map +1 -0
- package/dist/backends/index.d.ts +659 -386
- package/dist/backends/index.d.ts.map +1 -1
- package/dist/backends/index.js +318 -41
- package/dist/backends/index.js.map +1 -1
- package/dist/backends/index.test.js +578 -57
- package/dist/backends/index.test.js.map +1 -1
- package/dist/backends/local-jsonl-boolean-search.test.js +8 -7
- package/dist/backends/local-jsonl-boolean-search.test.js.map +1 -1
- package/dist/backends/local-jsonl-cache.test.js +33 -31
- package/dist/backends/local-jsonl-cache.test.js.map +1 -1
- package/dist/backends/local-jsonl-circuit-breaker.test.js +9 -7
- package/dist/backends/local-jsonl-circuit-breaker.test.js.map +1 -1
- package/dist/backends/local-jsonl-export.test.js +73 -58
- package/dist/backends/local-jsonl-export.test.js.map +1 -1
- package/dist/backends/local-jsonl-index.test.js +52 -50
- package/dist/backends/local-jsonl-index.test.js.map +1 -1
- package/dist/backends/local-jsonl-logs.test.js +47 -31
- package/dist/backends/local-jsonl-logs.test.js.map +1 -1
- package/dist/backends/local-jsonl-metrics.test.js +85 -82
- package/dist/backends/local-jsonl-metrics.test.js.map +1 -1
- package/dist/backends/local-jsonl-otlp-unwrap.test.d.ts +2 -0
- package/dist/backends/local-jsonl-otlp-unwrap.test.d.ts.map +1 -0
- package/dist/backends/local-jsonl-otlp-unwrap.test.js +602 -0
- package/dist/backends/local-jsonl-otlp-unwrap.test.js.map +1 -0
- package/dist/backends/local-jsonl-traces.test.js +161 -147
- package/dist/backends/local-jsonl-traces.test.js.map +1 -1
- package/dist/backends/local-jsonl.d.ts +37 -8
- package/dist/backends/local-jsonl.d.ts.map +1 -1
- package/dist/backends/local-jsonl.js +1088 -241
- package/dist/backends/local-jsonl.js.map +1 -1
- package/dist/backends/shared.d.ts +9 -0
- package/dist/backends/shared.d.ts.map +1 -0
- package/dist/backends/shared.js +9 -0
- package/dist/backends/shared.js.map +1 -0
- package/dist/generated/opentelemetry/proto/collector/logs/v1/logs_service_pb.d.ts +40 -0
- package/dist/generated/opentelemetry/proto/collector/logs/v1/logs_service_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/collector/logs/v1/logs_service_pb.js +27 -0
- package/dist/generated/opentelemetry/proto/collector/logs/v1/logs_service_pb.js.map +1 -0
- package/dist/generated/opentelemetry/proto/collector/metrics/v1/metrics_service_pb.d.ts +106 -0
- package/dist/generated/opentelemetry/proto/collector/metrics/v1/metrics_service_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/collector/metrics/v1/metrics_service_pb.js +43 -0
- package/dist/generated/opentelemetry/proto/collector/metrics/v1/metrics_service_pb.js.map +1 -0
- package/dist/generated/opentelemetry/proto/collector/profiles/v1development/profiles_service_pb.d.ts +111 -0
- package/dist/generated/opentelemetry/proto/collector/profiles/v1development/profiles_service_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/collector/profiles/v1development/profiles_service_pb.js +42 -0
- package/dist/generated/opentelemetry/proto/collector/profiles/v1development/profiles_service_pb.js.map +1 -0
- package/dist/generated/opentelemetry/proto/collector/trace/v1/trace_service_pb.d.ts +106 -0
- package/dist/generated/opentelemetry/proto/collector/trace/v1/trace_service_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/collector/trace/v1/trace_service_pb.js +43 -0
- package/dist/generated/opentelemetry/proto/collector/trace/v1/trace_service_pb.js.map +1 -0
- package/dist/generated/opentelemetry/proto/common/v1/common_pb.d.ts +243 -0
- package/dist/generated/opentelemetry/proto/common/v1/common_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/common/v1/common_pb.js +49 -0
- package/dist/generated/opentelemetry/proto/common/v1/common_pb.js.map +1 -0
- package/dist/generated/opentelemetry/proto/logs/v1/logs_pb.d.ts +90 -0
- package/dist/generated/opentelemetry/proto/logs/v1/logs_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/logs/v1/logs_pb.js +66 -0
- package/dist/generated/opentelemetry/proto/logs/v1/logs_pb.js.map +1 -0
- package/dist/generated/opentelemetry/proto/metrics/v1/metrics_pb.d.ts +1134 -0
- package/dist/generated/opentelemetry/proto/metrics/v1/metrics_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/metrics/v1/metrics_pb.js +223 -0
- package/dist/generated/opentelemetry/proto/metrics/v1/metrics_pb.js.map +1 -0
- package/dist/generated/opentelemetry/proto/profiles/v1development/profiles_pb.d.ts +678 -0
- package/dist/generated/opentelemetry/proto/profiles/v1development/profiles_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/profiles/v1development/profiles_pb.js +107 -0
- package/dist/generated/opentelemetry/proto/profiles/v1development/profiles_pb.js.map +1 -0
- package/dist/generated/opentelemetry/proto/resource/v1/resource_pb.d.ts +46 -0
- package/dist/generated/opentelemetry/proto/resource/v1/resource_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/resource/v1/resource_pb.js +25 -0
- package/dist/generated/opentelemetry/proto/resource/v1/resource_pb.js.map +1 -0
- package/dist/generated/opentelemetry/proto/trace/v1/trace_pb.d.ts +569 -0
- package/dist/generated/opentelemetry/proto/trace/v1/trace_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/trace/v1/trace_pb.js +195 -0
- package/dist/generated/opentelemetry/proto/trace/v1/trace_pb.js.map +1 -0
- package/dist/lib/agent-judge/agent-as-judge.d.ts +157 -0
- package/dist/lib/agent-judge/agent-as-judge.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-as-judge.js +137 -0
- package/dist/lib/agent-judge/agent-as-judge.js.map +1 -0
- package/dist/lib/agent-judge/agent-as-judge.test.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-as-judge.test.js +839 -0
- package/dist/lib/agent-judge/agent-as-judge.test.js.map +1 -0
- package/dist/lib/agent-judge/agent-eval-metrics.d.ts +293 -0
- package/dist/lib/agent-judge/agent-eval-metrics.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-eval-metrics.js +715 -0
- package/dist/lib/agent-judge/agent-eval-metrics.js.map +1 -0
- package/dist/lib/agent-judge/agent-eval-metrics.test.d.ts +5 -0
- package/dist/lib/agent-judge/agent-eval-metrics.test.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-eval-metrics.test.js +676 -0
- package/dist/lib/agent-judge/agent-eval-metrics.test.js.map +1 -0
- package/dist/lib/agent-judge/agent-judge-classes.d.ts +95 -0
- package/dist/lib/agent-judge/agent-judge-classes.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-judge-classes.js +222 -0
- package/dist/lib/agent-judge/agent-judge-classes.js.map +1 -0
- package/dist/lib/agent-judge/agent-judge-classes.test.d.ts +6 -0
- package/dist/lib/agent-judge/agent-judge-classes.test.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-judge-classes.test.js +271 -0
- package/dist/lib/agent-judge/agent-judge-classes.test.js.map +1 -0
- package/dist/lib/agent-judge/agent-judge-consensus.d.ts +58 -0
- package/dist/lib/agent-judge/agent-judge-consensus.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-judge-consensus.js +149 -0
- package/dist/lib/agent-judge/agent-judge-consensus.js.map +1 -0
- package/dist/lib/agent-judge/agent-judge-consensus.test.d.ts +2 -0
- package/dist/lib/agent-judge/agent-judge-consensus.test.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-judge-consensus.test.js +170 -0
- package/dist/lib/agent-judge/agent-judge-consensus.test.js.map +1 -0
- package/dist/lib/agent-judge/agent-judge-verification.d.ts +89 -0
- package/dist/lib/agent-judge/agent-judge-verification.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-judge-verification.js +235 -0
- package/dist/lib/agent-judge/agent-judge-verification.js.map +1 -0
- package/dist/lib/agent-judge/agent-judge-verification.test.d.ts +5 -0
- package/dist/lib/agent-judge/agent-judge-verification.test.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-judge-verification.test.js +399 -0
- package/dist/lib/agent-judge/agent-judge-verification.test.js.map +1 -0
- package/dist/lib/audit/agent-auditor-scoring.d.ts +167 -0
- package/dist/lib/audit/agent-auditor-scoring.d.ts.map +1 -0
- package/dist/lib/audit/agent-auditor-scoring.js +338 -0
- package/dist/lib/audit/agent-auditor-scoring.js.map +1 -0
- package/dist/lib/audit/agent-auditor-scoring.test.d.ts +2 -0
- package/dist/lib/audit/agent-auditor-scoring.test.d.ts.map +1 -0
- package/dist/lib/audit/agent-auditor-scoring.test.js +576 -0
- package/dist/lib/audit/agent-auditor-scoring.test.js.map +1 -0
- package/dist/lib/audit/audit-record.d.ts +139 -0
- package/dist/lib/audit/audit-record.d.ts.map +1 -0
- package/dist/lib/audit/audit-record.js +288 -0
- package/dist/lib/audit/audit-record.js.map +1 -0
- package/dist/lib/audit/audit-record.test.d.ts +5 -0
- package/dist/lib/audit/audit-record.test.d.ts.map +1 -0
- package/dist/lib/audit/audit-record.test.js +258 -0
- package/dist/lib/audit/audit-record.test.js.map +1 -0
- package/dist/lib/audit/audit-scoring-constants.d.ts +57 -0
- package/dist/lib/audit/audit-scoring-constants.d.ts.map +1 -0
- package/dist/lib/audit/audit-scoring-constants.js +59 -0
- package/dist/lib/audit/audit-scoring-constants.js.map +1 -0
- package/dist/lib/audit/compliance-report.d.ts +125 -0
- package/dist/lib/audit/compliance-report.d.ts.map +1 -0
- package/dist/lib/audit/compliance-report.js +205 -0
- package/dist/lib/audit/compliance-report.js.map +1 -0
- package/dist/lib/audit/compliance-report.test.d.ts +5 -0
- package/dist/lib/audit/compliance-report.test.d.ts.map +1 -0
- package/dist/lib/audit/compliance-report.test.js +290 -0
- package/dist/lib/audit/compliance-report.test.js.map +1 -0
- package/dist/lib/audit/retention-guard.d.ts +41 -0
- package/dist/lib/audit/retention-guard.d.ts.map +1 -0
- package/dist/lib/audit/retention-guard.js +103 -0
- package/dist/lib/audit/retention-guard.js.map +1 -0
- package/dist/lib/audit/retention-guard.test.d.ts +5 -0
- package/dist/lib/audit/retention-guard.test.d.ts.map +1 -0
- package/dist/lib/audit/retention-guard.test.js +109 -0
- package/dist/lib/audit/retention-guard.test.js.map +1 -0
- package/dist/lib/audit/skill-auditor-scoring.d.ts +69 -0
- package/dist/lib/audit/skill-auditor-scoring.d.ts.map +1 -0
- package/dist/lib/audit/skill-auditor-scoring.js +149 -0
- package/dist/lib/audit/skill-auditor-scoring.js.map +1 -0
- package/dist/lib/audit/skill-auditor-scoring.test.d.ts +2 -0
- package/dist/lib/audit/skill-auditor-scoring.test.d.ts.map +1 -0
- package/dist/lib/audit/skill-auditor-scoring.test.js +369 -0
- package/dist/lib/audit/skill-auditor-scoring.test.js.map +1 -0
- package/dist/lib/audit/verification-events.d.ts +119 -0
- package/dist/lib/audit/verification-events.d.ts.map +1 -0
- package/dist/lib/audit/verification-events.js +175 -0
- package/dist/lib/audit/verification-events.js.map +1 -0
- package/dist/lib/audit/verification-events.test.d.ts.map +1 -0
- package/dist/lib/audit/verification-events.test.js +197 -0
- package/dist/lib/audit/verification-events.test.js.map +1 -0
- package/dist/lib/core/constants-models.d.ts +90 -0
- package/dist/lib/core/constants-models.d.ts.map +1 -0
- package/dist/lib/core/constants-models.js +208 -0
- package/dist/lib/core/constants-models.js.map +1 -0
- package/dist/lib/core/constants-otel.d.ts +68 -0
- package/dist/lib/core/constants-otel.d.ts.map +1 -0
- package/dist/lib/core/constants-otel.js +128 -0
- package/dist/lib/core/constants-otel.js.map +1 -0
- package/dist/lib/core/constants-symlink.test.d.ts.map +1 -0
- package/dist/lib/core/constants-symlink.test.js +358 -0
- package/dist/lib/core/constants-symlink.test.js.map +1 -0
- package/dist/lib/core/constants-telemetry.d.ts +21 -0
- package/dist/lib/core/constants-telemetry.d.ts.map +1 -0
- package/dist/lib/core/constants-telemetry.js +162 -0
- package/dist/lib/core/constants-telemetry.js.map +1 -0
- package/dist/lib/core/constants.d.ts +152 -0
- package/dist/lib/core/constants.d.ts.map +1 -0
- package/dist/lib/core/constants.js +223 -0
- package/dist/lib/core/constants.js.map +1 -0
- package/dist/lib/core/constants.test.d.ts.map +1 -0
- package/dist/lib/core/constants.test.js +833 -0
- package/dist/lib/core/constants.test.js.map +1 -0
- package/dist/lib/core/doc-sync.test.d.ts +9 -0
- package/dist/lib/core/doc-sync.test.d.ts.map +1 -0
- package/dist/lib/core/doc-sync.test.js +159 -0
- package/dist/lib/core/doc-sync.test.js.map +1 -0
- package/dist/lib/core/edge-cases.test.d.ts.map +1 -0
- package/dist/lib/core/edge-cases.test.js +637 -0
- package/dist/lib/core/edge-cases.test.js.map +1 -0
- package/dist/lib/core/file-utils.d.ts +360 -0
- package/dist/lib/core/file-utils.d.ts.map +1 -0
- package/dist/lib/core/file-utils.js +890 -0
- package/dist/lib/core/file-utils.js.map +1 -0
- package/dist/lib/core/file-utils.test-constants.d.ts +38 -0
- package/dist/lib/core/file-utils.test-constants.d.ts.map +1 -0
- package/dist/lib/core/file-utils.test-constants.js +40 -0
- package/dist/lib/core/file-utils.test-constants.js.map +1 -0
- package/dist/lib/core/file-utils.test.d.ts.map +1 -0
- package/dist/lib/core/file-utils.test.js +1329 -0
- package/dist/lib/core/file-utils.test.js.map +1 -0
- package/dist/lib/core/input-validator.d.ts +125 -0
- package/dist/lib/core/input-validator.d.ts.map +1 -0
- package/dist/lib/core/input-validator.fuzz.test.d.ts.map +1 -0
- package/dist/lib/core/input-validator.fuzz.test.js +302 -0
- package/dist/lib/core/input-validator.fuzz.test.js.map +1 -0
- package/dist/lib/core/input-validator.js +348 -0
- package/dist/lib/core/input-validator.js.map +1 -0
- package/dist/lib/core/input-validator.test.d.ts.map +1 -0
- package/dist/lib/core/input-validator.test.js +465 -0
- package/dist/lib/core/input-validator.test.js.map +1 -0
- package/dist/lib/core/logger.d.ts +32 -0
- package/dist/lib/core/logger.d.ts.map +1 -0
- package/dist/lib/core/logger.js +104 -0
- package/dist/lib/core/logger.js.map +1 -0
- package/dist/lib/core/logger.test.d.ts.map +1 -0
- package/dist/lib/core/logger.test.js.map +1 -0
- package/dist/lib/core/schema-types.d.ts +37 -0
- package/dist/lib/core/schema-types.d.ts.map +1 -0
- package/dist/lib/core/schema-types.js +29 -0
- package/dist/lib/core/schema-types.js.map +1 -0
- package/dist/lib/core/server-utils.d.ts +98 -0
- package/dist/lib/core/server-utils.d.ts.map +1 -0
- package/dist/lib/core/server-utils.js +193 -0
- package/dist/lib/core/server-utils.js.map +1 -0
- package/dist/lib/core/shared-schemas.d.ts +301 -0
- package/dist/lib/core/shared-schemas.d.ts.map +1 -0
- package/dist/lib/core/shared-schemas.js +222 -0
- package/dist/lib/core/shared-schemas.js.map +1 -0
- package/dist/lib/core/shared-schemas.test.d.ts.map +1 -0
- package/dist/lib/core/shared-schemas.test.js +136 -0
- package/dist/lib/core/shared-schemas.test.js.map +1 -0
- package/dist/lib/core/units.d.ts +67 -0
- package/dist/lib/core/units.d.ts.map +1 -0
- package/dist/lib/core/units.js +88 -0
- package/dist/lib/core/units.js.map +1 -0
- package/dist/lib/cost/cost-estimation.d.ts +264 -0
- package/dist/lib/cost/cost-estimation.d.ts.map +1 -0
- package/dist/lib/cost/cost-estimation.js +541 -0
- package/dist/lib/cost/cost-estimation.js.map +1 -0
- package/dist/lib/cost/cost-estimation.test.d.ts +5 -0
- package/dist/lib/cost/cost-estimation.test.d.ts.map +1 -0
- package/dist/lib/cost/cost-estimation.test.js +701 -0
- package/dist/lib/cost/cost-estimation.test.js.map +1 -0
- package/dist/lib/cost/pricing-cache.d.ts +59 -0
- package/dist/lib/cost/pricing-cache.d.ts.map +1 -0
- package/dist/lib/cost/pricing-cache.js +120 -0
- package/dist/lib/cost/pricing-cache.js.map +1 -0
- package/dist/lib/cost/pricing-cache.test.d.ts +5 -0
- package/dist/lib/cost/pricing-cache.test.d.ts.map +1 -0
- package/dist/lib/cost/pricing-cache.test.js +176 -0
- package/dist/lib/cost/pricing-cache.test.js.map +1 -0
- package/dist/lib/dashboard-file-utils.d.ts +35 -0
- package/dist/lib/dashboard-file-utils.d.ts.map +1 -0
- package/dist/lib/dashboard-file-utils.js +94 -0
- package/dist/lib/dashboard-file-utils.js.map +1 -0
- package/dist/lib/errors/error-sanitizer.d.ts +62 -0
- package/dist/lib/errors/error-sanitizer.d.ts.map +1 -0
- package/dist/lib/errors/error-sanitizer.js +235 -0
- package/dist/lib/errors/error-sanitizer.js.map +1 -0
- package/dist/lib/errors/error-sanitizer.test.d.ts.map +1 -0
- package/dist/lib/errors/error-sanitizer.test.js +534 -0
- package/dist/lib/errors/error-sanitizer.test.js.map +1 -0
- package/dist/lib/errors/error-types.d.ts +59 -0
- package/dist/lib/errors/error-types.d.ts.map +1 -0
- package/dist/lib/errors/error-types.js +187 -0
- package/dist/lib/errors/error-types.js.map +1 -0
- package/dist/lib/errors/error-types.test.d.ts.map +1 -0
- package/dist/lib/errors/error-types.test.js +246 -0
- package/dist/lib/errors/error-types.test.js.map +1 -0
- package/dist/lib/errors/query-sanitizer.d.ts.map +1 -0
- package/dist/lib/errors/query-sanitizer.js +269 -0
- package/dist/lib/errors/query-sanitizer.js.map +1 -0
- package/dist/lib/errors/query-sanitizer.test.d.ts.map +1 -0
- package/dist/lib/errors/query-sanitizer.test.js +403 -0
- package/dist/lib/errors/query-sanitizer.test.js.map +1 -0
- package/dist/lib/exports/confident-export.d.ts +105 -0
- package/dist/lib/exports/confident-export.d.ts.map +1 -0
- package/dist/lib/exports/confident-export.js +385 -0
- package/dist/lib/exports/confident-export.js.map +1 -0
- package/dist/lib/exports/confident-export.test.d.ts.map +1 -0
- package/dist/lib/exports/confident-export.test.js +848 -0
- package/dist/lib/exports/confident-export.test.js.map +1 -0
- package/dist/lib/exports/datadog-export.d.ts +200 -0
- package/dist/lib/exports/datadog-export.d.ts.map +1 -0
- package/dist/lib/exports/datadog-export.js +488 -0
- package/dist/lib/exports/datadog-export.js.map +1 -0
- package/dist/lib/exports/datadog-export.test.d.ts +2 -0
- package/dist/lib/exports/datadog-export.test.d.ts.map +1 -0
- package/dist/lib/exports/datadog-export.test.js +890 -0
- package/dist/lib/exports/datadog-export.test.js.map +1 -0
- package/dist/lib/exports/export-config-schemas.d.ts +67 -0
- package/dist/lib/exports/export-config-schemas.d.ts.map +1 -0
- package/dist/lib/exports/export-config-schemas.js +120 -0
- package/dist/lib/exports/export-config-schemas.js.map +1 -0
- package/dist/lib/exports/export-config-schemas.test.d.ts +8 -0
- package/dist/lib/exports/export-config-schemas.test.d.ts.map +1 -0
- package/dist/lib/exports/export-config-schemas.test.js +503 -0
- package/dist/lib/exports/export-config-schemas.test.js.map +1 -0
- package/dist/lib/exports/export-utils.d.ts +127 -0
- package/dist/lib/exports/export-utils.d.ts.map +1 -0
- package/dist/lib/exports/export-utils.js +303 -0
- package/dist/lib/exports/export-utils.js.map +1 -0
- package/dist/lib/exports/export-utils.test.d.ts.map +1 -0
- package/dist/lib/exports/export-utils.test.js +344 -0
- package/dist/lib/exports/export-utils.test.js.map +1 -0
- package/dist/lib/exports/langfuse-export.d.ts +129 -0
- package/dist/lib/exports/langfuse-export.d.ts.map +1 -0
- package/dist/lib/exports/langfuse-export.js +370 -0
- package/dist/lib/exports/langfuse-export.js.map +1 -0
- package/dist/lib/exports/langfuse-export.test.d.ts.map +1 -0
- package/dist/lib/exports/langfuse-export.test.js +1020 -0
- package/dist/lib/exports/langfuse-export.test.js.map +1 -0
- package/dist/lib/exports/otlp-export.d.ts +179 -0
- package/dist/lib/exports/otlp-export.d.ts.map +1 -0
- package/dist/lib/exports/otlp-export.js +397 -0
- package/dist/lib/exports/otlp-export.js.map +1 -0
- package/dist/lib/exports/otlp-format-converter.d.ts +70 -0
- package/dist/lib/exports/otlp-format-converter.d.ts.map +1 -0
- package/dist/lib/exports/otlp-format-converter.js +401 -0
- package/dist/lib/exports/otlp-format-converter.js.map +1 -0
- package/dist/lib/exports/otlp-proto-encode.d.ts +53 -0
- package/dist/lib/exports/otlp-proto-encode.d.ts.map +1 -0
- package/dist/lib/exports/otlp-proto-encode.js +165 -0
- package/dist/lib/exports/otlp-proto-encode.js.map +1 -0
- package/dist/lib/exports/otlp-proto-encode.test.d.ts +7 -0
- package/dist/lib/exports/otlp-proto-encode.test.d.ts.map +1 -0
- package/dist/lib/exports/otlp-proto-encode.test.js +997 -0
- package/dist/lib/exports/otlp-proto-encode.test.js.map +1 -0
- package/dist/lib/exports/phoenix-export.d.ts +119 -0
- package/dist/lib/exports/phoenix-export.d.ts.map +1 -0
- package/dist/lib/exports/phoenix-export.js +448 -0
- package/dist/lib/exports/phoenix-export.js.map +1 -0
- package/dist/lib/exports/phoenix-export.test.d.ts.map +1 -0
- package/dist/lib/exports/phoenix-export.test.js +816 -0
- package/dist/lib/exports/phoenix-export.test.js.map +1 -0
- package/dist/lib/index.d.ts +16 -0
- package/dist/lib/index.d.ts.map +1 -0
- package/dist/lib/index.js +31 -0
- package/dist/lib/index.js.map +1 -0
- package/dist/lib/judge/evaluation-hooks-schemas.d.ts +186 -0
- package/dist/lib/judge/evaluation-hooks-schemas.d.ts.map +1 -0
- package/dist/lib/judge/evaluation-hooks-schemas.js +125 -0
- package/dist/lib/judge/evaluation-hooks-schemas.js.map +1 -0
- package/dist/lib/judge/evaluation-hooks.d.ts +88 -0
- package/dist/lib/judge/evaluation-hooks.d.ts.map +1 -0
- package/dist/lib/judge/evaluation-hooks.js +658 -0
- package/dist/lib/judge/evaluation-hooks.js.map +1 -0
- package/dist/lib/judge/evaluation-hooks.test.d.ts.map +1 -0
- package/dist/lib/judge/evaluation-hooks.test.js +934 -0
- package/dist/lib/judge/evaluation-hooks.test.js.map +1 -0
- package/dist/lib/judge/llm-as-judge.d.ts +138 -0
- package/dist/lib/judge/llm-as-judge.d.ts.map +1 -0
- package/dist/lib/judge/llm-as-judge.js +103 -0
- package/dist/lib/judge/llm-as-judge.js.map +1 -0
- package/dist/lib/judge/llm-as-judge.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-as-judge.test.js +2179 -0
- package/dist/lib/judge/llm-as-judge.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-bias.d.ts +44 -0
- package/dist/lib/judge/llm-judge-bias.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-bias.js +130 -0
- package/dist/lib/judge/llm-judge-bias.js.map +1 -0
- package/dist/lib/judge/llm-judge-bias.test.d.ts +2 -0
- package/dist/lib/judge/llm-judge-bias.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-bias.test.js +380 -0
- package/dist/lib/judge/llm-judge-bias.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-code.d.ts +99 -0
- package/dist/lib/judge/llm-judge-code.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-code.js +261 -0
- package/dist/lib/judge/llm-judge-code.js.map +1 -0
- package/dist/lib/judge/llm-judge-code.test.d.ts +2 -0
- package/dist/lib/judge/llm-judge-code.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-code.test.js +981 -0
- package/dist/lib/judge/llm-judge-code.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-config.d.ts +241 -0
- package/dist/lib/judge/llm-judge-config.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-config.js +390 -0
- package/dist/lib/judge/llm-judge-config.js.map +1 -0
- package/dist/lib/judge/llm-judge-config.test.d.ts +5 -0
- package/dist/lib/judge/llm-judge-config.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-config.test.js +392 -0
- package/dist/lib/judge/llm-judge-config.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-constants.d.ts +111 -0
- package/dist/lib/judge/llm-judge-constants.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-constants.js +150 -0
- package/dist/lib/judge/llm-judge-constants.js.map +1 -0
- package/dist/lib/judge/llm-judge-dag.d.ts +57 -0
- package/dist/lib/judge/llm-judge-dag.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-dag.js +217 -0
- package/dist/lib/judge/llm-judge-dag.js.map +1 -0
- package/dist/lib/judge/llm-judge-dag.test.d.ts +8 -0
- package/dist/lib/judge/llm-judge-dag.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-dag.test.js +973 -0
- package/dist/lib/judge/llm-judge-dag.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-domain.d.ts +42 -0
- package/dist/lib/judge/llm-judge-domain.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-domain.js +167 -0
- package/dist/lib/judge/llm-judge-domain.js.map +1 -0
- package/dist/lib/judge/llm-judge-domain.test.d.ts +6 -0
- package/dist/lib/judge/llm-judge-domain.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-domain.test.js +337 -0
- package/dist/lib/judge/llm-judge-domain.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-geval.d.ts +42 -0
- package/dist/lib/judge/llm-judge-geval.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-geval.js +213 -0
- package/dist/lib/judge/llm-judge-geval.js.map +1 -0
- package/dist/lib/judge/llm-judge-geval.test.d.ts +2 -0
- package/dist/lib/judge/llm-judge-geval.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-geval.test.js +556 -0
- package/dist/lib/judge/llm-judge-geval.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-otel.test.d.ts +9 -0
- package/dist/lib/judge/llm-judge-otel.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-otel.test.js +91 -0
- package/dist/lib/judge/llm-judge-otel.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-qag.d.ts +38 -0
- package/dist/lib/judge/llm-judge-qag.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-qag.js +205 -0
- package/dist/lib/judge/llm-judge-qag.js.map +1 -0
- package/dist/lib/judge/llm-judge-qag.test.d.ts +2 -0
- package/dist/lib/judge/llm-judge-qag.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-qag.test.js +386 -0
- package/dist/lib/judge/llm-judge-qag.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-resilience.d.ts +74 -0
- package/dist/lib/judge/llm-judge-resilience.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-resilience.js +146 -0
- package/dist/lib/judge/llm-judge-resilience.js.map +1 -0
- package/dist/lib/judge/llm-judge-resilience.test.d.ts +2 -0
- package/dist/lib/judge/llm-judge-resilience.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-resilience.test.js +353 -0
- package/dist/lib/judge/llm-judge-resilience.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-security.d.ts +106 -0
- package/dist/lib/judge/llm-judge-security.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-security.js +314 -0
- package/dist/lib/judge/llm-judge-security.js.map +1 -0
- package/dist/lib/judge/llm-judge-security.test.d.ts +2 -0
- package/dist/lib/judge/llm-judge-security.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-security.test.js +1011 -0
- package/dist/lib/judge/llm-judge-security.test.js.map +1 -0
- package/dist/lib/observability/context-accumulator.d.ts +32 -0
- package/dist/lib/observability/context-accumulator.d.ts.map +1 -0
- package/dist/lib/observability/context-accumulator.js +87 -0
- package/dist/lib/observability/context-accumulator.js.map +1 -0
- package/dist/lib/observability/evaluation-events.d.ts +35 -0
- package/dist/lib/observability/evaluation-events.d.ts.map +1 -0
- package/dist/lib/observability/evaluation-events.js +90 -0
- package/dist/lib/observability/evaluation-events.js.map +1 -0
- package/dist/lib/observability/file-span-exporter.d.ts +17 -0
- package/dist/lib/observability/file-span-exporter.d.ts.map +1 -0
- package/dist/lib/observability/file-span-exporter.js +49 -0
- package/dist/lib/observability/file-span-exporter.js.map +1 -0
- package/dist/lib/observability/histogram-bucket-constants.d.ts +25 -0
- package/dist/lib/observability/histogram-bucket-constants.d.ts.map +1 -0
- package/dist/lib/observability/histogram-bucket-constants.js +60 -0
- package/dist/lib/observability/histogram-bucket-constants.js.map +1 -0
- package/dist/lib/observability/histogram.d.ts +112 -0
- package/dist/lib/observability/histogram.d.ts.map +1 -0
- package/dist/lib/observability/histogram.js +170 -0
- package/dist/lib/observability/histogram.js.map +1 -0
- package/dist/lib/observability/histogram.test.d.ts.map +1 -0
- package/dist/lib/observability/histogram.test.js +385 -0
- package/dist/lib/observability/histogram.test.js.map +1 -0
- package/dist/lib/observability/indexer.d.ts +114 -0
- package/dist/lib/observability/indexer.d.ts.map +1 -0
- package/dist/lib/observability/indexer.js +402 -0
- package/dist/lib/observability/indexer.js.map +1 -0
- package/dist/lib/observability/indexer.test.d.ts.map +1 -0
- package/dist/lib/observability/indexer.test.js +713 -0
- package/dist/lib/observability/indexer.test.js.map +1 -0
- package/dist/lib/observability/instrumentation-eval.test.d.ts +5 -0
- package/dist/lib/observability/instrumentation-eval.test.d.ts.map +1 -0
- package/dist/lib/observability/instrumentation-eval.test.js +63 -0
- package/dist/lib/observability/instrumentation-eval.test.js.map +1 -0
- package/dist/lib/observability/instrumentation-init-errors.test.d.ts +13 -0
- package/dist/lib/observability/instrumentation-init-errors.test.d.ts.map +1 -0
- package/dist/lib/observability/instrumentation-init-errors.test.js +194 -0
- package/dist/lib/observability/instrumentation-init-errors.test.js.map +1 -0
- package/dist/lib/observability/instrumentation-retry-timeout.test.d.ts +15 -0
- package/dist/lib/observability/instrumentation-retry-timeout.test.d.ts.map +1 -0
- package/dist/lib/observability/instrumentation-retry-timeout.test.js +188 -0
- package/dist/lib/observability/instrumentation-retry-timeout.test.js.map +1 -0
- package/dist/lib/observability/instrumentation-set-otel.test.d.ts +5 -0
- package/dist/lib/observability/instrumentation-set-otel.test.d.ts.map +1 -0
- package/dist/lib/observability/instrumentation-set-otel.test.js +59 -0
- package/dist/lib/observability/instrumentation-set-otel.test.js.map +1 -0
- package/dist/lib/observability/instrumentation.d.ts +158 -0
- package/dist/lib/observability/instrumentation.d.ts.map +1 -0
- package/dist/lib/observability/instrumentation.integration.test.d.ts.map +1 -0
- package/dist/lib/observability/instrumentation.integration.test.js +590 -0
- package/dist/lib/observability/instrumentation.integration.test.js.map +1 -0
- package/dist/lib/observability/instrumentation.js +512 -0
- package/dist/lib/observability/instrumentation.js.map +1 -0
- package/dist/lib/observability/instrumentation.test.d.ts.map +1 -0
- package/dist/lib/observability/instrumentation.test.js +822 -0
- package/dist/lib/observability/instrumentation.test.js.map +1 -0
- package/dist/lib/observability/mcp-semconv-constants.d.ts +98 -0
- package/dist/lib/observability/mcp-semconv-constants.d.ts.map +1 -0
- package/dist/lib/observability/mcp-semconv-constants.js +102 -0
- package/dist/lib/observability/mcp-semconv-constants.js.map +1 -0
- package/dist/lib/observability/mcp-semconv.d.ts +37 -0
- package/dist/lib/observability/mcp-semconv.d.ts.map +1 -0
- package/dist/lib/observability/mcp-semconv.js +87 -0
- package/dist/lib/observability/mcp-semconv.js.map +1 -0
- package/dist/lib/observability/mcp-semconv.test.d.ts +2 -0
- package/dist/lib/observability/mcp-semconv.test.d.ts.map +1 -0
- package/dist/lib/observability/mcp-semconv.test.js +168 -0
- package/dist/lib/observability/mcp-semconv.test.js.map +1 -0
- package/dist/lib/observability/metrics.d.ts +100 -0
- package/dist/lib/observability/metrics.d.ts.map +1 -0
- package/dist/lib/observability/metrics.js +429 -0
- package/dist/lib/observability/metrics.js.map +1 -0
- package/dist/lib/observability/metrics.test.d.ts.map +1 -0
- package/dist/lib/observability/metrics.test.js +191 -0
- package/dist/lib/observability/metrics.test.js.map +1 -0
- package/dist/lib/observability/observability-test-constants.d.ts +34 -0
- package/dist/lib/observability/observability-test-constants.d.ts.map +1 -0
- package/dist/lib/observability/observability-test-constants.js +55 -0
- package/dist/lib/observability/observability-test-constants.js.map +1 -0
- package/dist/lib/observability/opentelemetry-resources.test.d.ts +2 -0
- package/dist/lib/observability/opentelemetry-resources.test.d.ts.map +1 -0
- package/dist/lib/observability/opentelemetry-resources.test.js +19 -0
- package/dist/lib/observability/opentelemetry-resources.test.js.map +1 -0
- package/dist/lib/observability/parse-stats.d.ts.map +1 -0
- package/dist/lib/observability/parse-stats.js +207 -0
- package/dist/lib/observability/parse-stats.js.map +1 -0
- package/dist/lib/observability/parse-stats.test.d.ts.map +1 -0
- package/dist/lib/observability/parse-stats.test.js +287 -0
- package/dist/lib/observability/parse-stats.test.js.map +1 -0
- package/dist/lib/observability/render-trace-tree.d.ts +31 -0
- package/dist/lib/observability/render-trace-tree.d.ts.map +1 -0
- package/dist/lib/observability/render-trace-tree.js +95 -0
- package/dist/lib/observability/render-trace-tree.js.map +1 -0
- package/dist/lib/observability/render-trace-tree.test.d.ts +5 -0
- package/dist/lib/observability/render-trace-tree.test.d.ts.map +1 -0
- package/dist/lib/observability/render-trace-tree.test.js +97 -0
- package/dist/lib/observability/render-trace-tree.test.js.map +1 -0
- package/dist/lib/observability/span-attributes.d.ts +27 -0
- package/dist/lib/observability/span-attributes.d.ts.map +1 -0
- package/dist/lib/observability/span-attributes.js +85 -0
- package/dist/lib/observability/span-attributes.js.map +1 -0
- package/dist/lib/observability/trace-anomaly-detector.d.ts +23 -0
- package/dist/lib/observability/trace-anomaly-detector.d.ts.map +1 -0
- package/dist/lib/observability/trace-anomaly-detector.js +211 -0
- package/dist/lib/observability/trace-anomaly-detector.js.map +1 -0
- package/dist/lib/observability/trace-anomaly-detector.test.d.ts +5 -0
- package/dist/lib/observability/trace-anomaly-detector.test.d.ts.map +1 -0
- package/dist/lib/observability/trace-anomaly-detector.test.js +224 -0
- package/dist/lib/observability/trace-anomaly-detector.test.js.map +1 -0
- package/dist/lib/observability/trace-anomaly-schemas.d.ts +189 -0
- package/dist/lib/observability/trace-anomaly-schemas.d.ts.map +1 -0
- package/dist/lib/observability/trace-anomaly-schemas.js +167 -0
- package/dist/lib/observability/trace-anomaly-schemas.js.map +1 -0
- package/dist/lib/privacy/content-redaction.d.ts +141 -0
- package/dist/lib/privacy/content-redaction.d.ts.map +1 -0
- package/dist/lib/privacy/content-redaction.js +210 -0
- package/dist/lib/privacy/content-redaction.js.map +1 -0
- package/dist/lib/privacy/content-redaction.test.d.ts +2 -0
- package/dist/lib/privacy/content-redaction.test.d.ts.map +1 -0
- package/dist/lib/privacy/content-redaction.test.js +302 -0
- package/dist/lib/privacy/content-redaction.test.js.map +1 -0
- package/dist/lib/quality/bucket-utils.d.ts +17 -0
- package/dist/lib/quality/bucket-utils.d.ts.map +1 -0
- package/dist/lib/quality/bucket-utils.js +31 -0
- package/dist/lib/quality/bucket-utils.js.map +1 -0
- package/dist/lib/quality/bucket-utils.test.d.ts +2 -0
- package/dist/lib/quality/bucket-utils.test.d.ts.map +1 -0
- package/dist/lib/quality/bucket-utils.test.js +42 -0
- package/dist/lib/quality/bucket-utils.test.js.map +1 -0
- package/dist/lib/quality/qfe-backtest-detail.test.d.ts +5 -0
- package/dist/lib/quality/qfe-backtest-detail.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-backtest-detail.test.js +179 -0
- package/dist/lib/quality/qfe-backtest-detail.test.js.map +1 -0
- package/dist/lib/quality/qfe-calibration-paths.test.d.ts +5 -0
- package/dist/lib/quality/qfe-calibration-paths.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-calibration-paths.test.js +203 -0
- package/dist/lib/quality/qfe-calibration-paths.test.js.map +1 -0
- package/dist/lib/quality/qfe-correlation-helpers.test.d.ts +6 -0
- package/dist/lib/quality/qfe-correlation-helpers.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-correlation-helpers.test.js +143 -0
- package/dist/lib/quality/qfe-correlation-helpers.test.js.map +1 -0
- package/dist/lib/quality/qfe-cqi-paths.test.d.ts +6 -0
- package/dist/lib/quality/qfe-cqi-paths.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-cqi-paths.test.js +231 -0
- package/dist/lib/quality/qfe-cqi-paths.test.js.map +1 -0
- package/dist/lib/quality/qfe-critic-internals.test.d.ts +6 -0
- package/dist/lib/quality/qfe-critic-internals.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-critic-internals.test.js +191 -0
- package/dist/lib/quality/qfe-critic-internals.test.js.map +1 -0
- package/dist/lib/quality/qfe-derived-paths.test.d.ts +2 -0
- package/dist/lib/quality/qfe-derived-paths.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-derived-paths.test.js +372 -0
- package/dist/lib/quality/qfe-derived-paths.test.js.map +1 -0
- package/dist/lib/quality/qfe-dynamics-paths.test.d.ts +8 -0
- package/dist/lib/quality/qfe-dynamics-paths.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-dynamics-paths.test.js +223 -0
- package/dist/lib/quality/qfe-dynamics-paths.test.js.map +1 -0
- package/dist/lib/quality/qfe-granger-internals.test.d.ts +6 -0
- package/dist/lib/quality/qfe-granger-internals.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-granger-internals.test.js +158 -0
- package/dist/lib/quality/qfe-granger-internals.test.js.map +1 -0
- package/dist/lib/quality/qfe-label-normalize.test.d.ts +7 -0
- package/dist/lib/quality/qfe-label-normalize.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-label-normalize.test.js +332 -0
- package/dist/lib/quality/qfe-label-normalize.test.js.map +1 -0
- package/dist/lib/quality/qfe-ordinal-edge.test.d.ts +6 -0
- package/dist/lib/quality/qfe-ordinal-edge.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-ordinal-edge.test.js +98 -0
- package/dist/lib/quality/qfe-ordinal-edge.test.js.map +1 -0
- package/dist/lib/quality/qfe-roles-detail.test.d.ts +5 -0
- package/dist/lib/quality/qfe-roles-detail.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-roles-detail.test.js +115 -0
- package/dist/lib/quality/qfe-roles-detail.test.js.map +1 -0
- package/dist/lib/quality/qfe-rolling-detail.test.d.ts +7 -0
- package/dist/lib/quality/qfe-rolling-detail.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-rolling-detail.test.js +249 -0
- package/dist/lib/quality/qfe-rolling-detail.test.js.map +1 -0
- package/dist/lib/quality/qfe-stats-internals.test.d.ts +7 -0
- package/dist/lib/quality/qfe-stats-internals.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-stats-internals.test.js +143 -0
- package/dist/lib/quality/qfe-stats-internals.test.js.map +1 -0
- package/dist/lib/quality/qfe-streaming.test.d.ts +5 -0
- package/dist/lib/quality/qfe-streaming.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-streaming.test.js +239 -0
- package/dist/lib/quality/qfe-streaming.test.js.map +1 -0
- package/dist/lib/quality/qfe-sweep-detail.test.d.ts +6 -0
- package/dist/lib/quality/qfe-sweep-detail.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-sweep-detail.test.js +291 -0
- package/dist/lib/quality/qfe-sweep-detail.test.js.map +1 -0
- package/dist/lib/quality/quality-alerts.d.ts +23 -0
- package/dist/lib/quality/quality-alerts.d.ts.map +1 -0
- package/dist/lib/quality/quality-alerts.js +89 -0
- package/dist/lib/quality/quality-alerts.js.map +1 -0
- package/dist/lib/quality/quality-alerts.test.d.ts +2 -0
- package/dist/lib/quality/quality-alerts.test.d.ts.map +1 -0
- package/dist/lib/quality/quality-alerts.test.js +86 -0
- package/dist/lib/quality/quality-alerts.test.js.map +1 -0
- package/dist/lib/quality/quality-constants.d.ts +294 -0
- package/dist/lib/quality/quality-constants.d.ts.map +1 -0
- package/dist/lib/quality/quality-constants.js +335 -0
- package/dist/lib/quality/quality-constants.js.map +1 -0
- package/dist/lib/quality/quality-feature-engineering.d.ts +1071 -0
- package/dist/lib/quality/quality-feature-engineering.d.ts.map +1 -0
- package/dist/lib/quality/quality-feature-engineering.js +2076 -0
- package/dist/lib/quality/quality-feature-engineering.js.map +1 -0
- package/dist/lib/quality/quality-feature-engineering.test.d.ts +5 -0
- package/dist/lib/quality/quality-feature-engineering.test.d.ts.map +1 -0
- package/dist/lib/quality/quality-feature-engineering.test.js +2908 -0
- package/dist/lib/quality/quality-feature-engineering.test.js.map +1 -0
- package/dist/lib/quality/quality-metrics.d.ts +943 -0
- package/dist/lib/quality/quality-metrics.d.ts.map +1 -0
- package/dist/lib/quality/quality-metrics.js +1151 -0
- package/dist/lib/quality/quality-metrics.js.map +1 -0
- package/dist/lib/quality/quality-metrics.test.d.ts +5 -0
- package/dist/lib/quality/quality-metrics.test.d.ts.map +1 -0
- package/dist/lib/quality/quality-metrics.test.js +2766 -0
- package/dist/lib/quality/quality-metrics.test.js.map +1 -0
- package/dist/lib/quality/quality-multi-agent.d.ts +106 -0
- package/dist/lib/quality/quality-multi-agent.d.ts.map +1 -0
- package/dist/lib/quality/quality-multi-agent.js +124 -0
- package/dist/lib/quality/quality-multi-agent.js.map +1 -0
- package/dist/lib/quality/quality-multi-agent.test.d.ts +6 -0
- package/dist/lib/quality/quality-multi-agent.test.d.ts.map +1 -0
- package/dist/lib/quality/quality-multi-agent.test.js +163 -0
- package/dist/lib/quality/quality-multi-agent.test.js.map +1 -0
- package/dist/lib/quality/quality-sla.d.ts +35 -0
- package/dist/lib/quality/quality-sla.d.ts.map +1 -0
- package/dist/lib/quality/quality-sla.js +62 -0
- package/dist/lib/quality/quality-sla.js.map +1 -0
- package/dist/lib/quality/quality-sla.test.d.ts +5 -0
- package/dist/lib/quality/quality-sla.test.d.ts.map +1 -0
- package/dist/lib/quality/quality-sla.test.js +144 -0
- package/dist/lib/quality/quality-sla.test.js.map +1 -0
- package/dist/lib/quality/quality-test-constants.d.ts +23 -0
- package/dist/lib/quality/quality-test-constants.d.ts.map +1 -0
- package/dist/lib/quality/quality-test-constants.js +25 -0
- package/dist/lib/quality/quality-test-constants.js.map +1 -0
- package/dist/lib/quality/quality-trends.d.ts +101 -0
- package/dist/lib/quality/quality-trends.d.ts.map +1 -0
- package/dist/lib/quality/quality-trends.js +299 -0
- package/dist/lib/quality/quality-trends.js.map +1 -0
- package/dist/lib/quality/quality-trends.test.d.ts +6 -0
- package/dist/lib/quality/quality-trends.test.d.ts.map +1 -0
- package/dist/lib/quality/quality-trends.test.js +377 -0
- package/dist/lib/quality/quality-trends.test.js.map +1 -0
- package/dist/lib/quality/quality-views.d.ts +966 -0
- package/dist/lib/quality/quality-views.d.ts.map +1 -0
- package/dist/lib/quality/quality-views.js +367 -0
- package/dist/lib/quality/quality-views.js.map +1 -0
- package/dist/lib/quality/quality-views.test.d.ts +6 -0
- package/dist/lib/quality/quality-views.test.d.ts.map +1 -0
- package/dist/lib/quality/quality-views.test.js +262 -0
- package/dist/lib/quality/quality-views.test.js.map +1 -0
- package/dist/lib/quality/quality-visualization.d.ts +112 -0
- package/dist/lib/quality/quality-visualization.d.ts.map +1 -0
- package/dist/lib/quality/quality-visualization.js +136 -0
- package/dist/lib/quality/quality-visualization.js.map +1 -0
- package/dist/lib/quality/quality-visualization.test.d.ts +5 -0
- package/dist/lib/quality/quality-visualization.test.d.ts.map +1 -0
- package/dist/lib/quality/quality-visualization.test.js +189 -0
- package/dist/lib/quality/quality-visualization.test.js.map +1 -0
- package/dist/lib/resilience/cache.d.ts +56 -0
- package/dist/lib/resilience/cache.d.ts.map +1 -0
- package/dist/lib/resilience/cache.js +96 -0
- package/dist/lib/resilience/cache.js.map +1 -0
- package/dist/lib/resilience/cache.test.d.ts.map +1 -0
- package/dist/lib/resilience/cache.test.js +106 -0
- package/dist/lib/resilience/cache.test.js.map +1 -0
- package/dist/lib/resilience/circuit-breaker.d.ts +147 -0
- package/dist/lib/resilience/circuit-breaker.d.ts.map +1 -0
- package/dist/lib/resilience/circuit-breaker.js +251 -0
- package/dist/lib/resilience/circuit-breaker.js.map +1 -0
- package/dist/lib/resilience/circuit-breaker.test.d.ts.map +1 -0
- package/dist/lib/resilience/circuit-breaker.test.js +266 -0
- package/dist/lib/resilience/circuit-breaker.test.js.map +1 -0
- package/dist/lib/resilience/toon-encoder.d.ts +31 -0
- package/dist/lib/resilience/toon-encoder.d.ts.map +1 -0
- package/dist/lib/resilience/toon-encoder.js +66 -0
- package/dist/lib/resilience/toon-encoder.js.map +1 -0
- package/dist/lib/resilience/toon-encoder.test.d.ts.map +1 -0
- package/dist/lib/resilience/toon-encoder.test.js +86 -0
- package/dist/lib/resilience/toon-encoder.test.js.map +1 -0
- package/dist/lib/testing/mock-llm-builder.d.ts +139 -0
- package/dist/lib/testing/mock-llm-builder.d.ts.map +1 -0
- package/dist/lib/testing/mock-llm-builder.js +254 -0
- package/dist/lib/testing/mock-llm-builder.js.map +1 -0
- package/dist/lib/testing/mock-llm-builder.test.d.ts +5 -0
- package/dist/lib/testing/mock-llm-builder.test.d.ts.map +1 -0
- package/dist/lib/testing/mock-llm-builder.test.js +304 -0
- package/dist/lib/testing/mock-llm-builder.test.js.map +1 -0
- package/dist/lib/validation/api-schemas.d.ts +705 -0
- package/dist/lib/validation/api-schemas.d.ts.map +1 -0
- package/dist/lib/validation/api-schemas.js +351 -0
- package/dist/lib/validation/api-schemas.js.map +1 -0
- package/dist/lib/validation/api-schemas.test.d.ts +5 -0
- package/dist/lib/validation/api-schemas.test.d.ts.map +1 -0
- package/dist/lib/validation/api-schemas.test.js +427 -0
- package/dist/lib/validation/api-schemas.test.js.map +1 -0
- package/dist/lib/validation/dashboard-schemas.d.ts +203 -0
- package/dist/lib/validation/dashboard-schemas.d.ts.map +1 -0
- package/dist/lib/validation/dashboard-schemas.js +186 -0
- package/dist/lib/validation/dashboard-schemas.js.map +1 -0
- package/dist/lib/validation/dashboard-schemas.test.d.ts +5 -0
- package/dist/lib/validation/dashboard-schemas.test.d.ts.map +1 -0
- package/dist/lib/validation/dashboard-schemas.test.js +353 -0
- package/dist/lib/validation/dashboard-schemas.test.js.map +1 -0
- package/dist/server.d.ts +2 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +158 -144
- package/dist/server.js.map +1 -1
- package/dist/server.test.js +102 -95
- package/dist/server.test.js.map +1 -1
- package/dist/test-helpers/assertions.d.ts +6 -0
- package/dist/test-helpers/assertions.d.ts.map +1 -0
- package/dist/test-helpers/assertions.js +11 -0
- package/dist/test-helpers/assertions.js.map +1 -0
- package/dist/test-helpers/env-utils.d.ts +0 -64
- package/dist/test-helpers/env-utils.d.ts.map +1 -1
- package/dist/test-helpers/env-utils.js +0 -100
- package/dist/test-helpers/env-utils.js.map +1 -1
- package/dist/test-helpers/fuzz-generators.d.ts.map +1 -1
- package/dist/test-helpers/fuzz-generators.js +62 -22
- package/dist/test-helpers/fuzz-generators.js.map +1 -1
- package/dist/test-helpers/index.d.ts +3 -2
- package/dist/test-helpers/index.d.ts.map +1 -1
- package/dist/test-helpers/index.js +4 -2
- package/dist/test-helpers/index.js.map +1 -1
- package/dist/test-helpers/memfs-utils.test.js +81 -76
- package/dist/test-helpers/memfs-utils.test.js.map +1 -1
- package/dist/test-helpers/mock-backends.d.ts +19 -17
- package/dist/test-helpers/mock-backends.d.ts.map +1 -1
- package/dist/test-helpers/mock-backends.js +16 -4
- package/dist/test-helpers/mock-backends.js.map +1 -1
- package/dist/test-helpers/mock-backends.test.js +43 -112
- package/dist/test-helpers/mock-backends.test.js.map +1 -1
- package/dist/test-helpers/race-condition-helpers.d.ts.map +1 -1
- package/dist/test-helpers/race-condition-helpers.js +3 -2
- package/dist/test-helpers/race-condition-helpers.js.map +1 -1
- package/dist/test-helpers/schema-validators.d.ts +2 -2
- package/dist/test-helpers/schema-validators.d.ts.map +1 -1
- package/dist/test-helpers/schema-validators.js +35 -31
- package/dist/test-helpers/schema-validators.js.map +1 -1
- package/dist/test-helpers/test-constants.d.ts +74 -0
- package/dist/test-helpers/test-constants.d.ts.map +1 -0
- package/dist/test-helpers/test-constants.js +78 -0
- package/dist/test-helpers/test-constants.js.map +1 -0
- package/dist/test-helpers/test-data-builders.d.ts +25 -7
- package/dist/test-helpers/test-data-builders.d.ts.map +1 -1
- package/dist/test-helpers/test-data-builders.js +32 -9
- package/dist/test-helpers/test-data-builders.js.map +1 -1
- package/dist/test-helpers/test-data-builders.test.js +116 -107
- package/dist/test-helpers/test-data-builders.test.js.map +1 -1
- package/dist/test-helpers/tool-validators.d.ts +1 -1
- package/dist/test-helpers/tool-validators.d.ts.map +1 -1
- package/dist/test-helpers/tool-validators.js +10 -10
- package/dist/test-helpers/tool-validators.js.map +1 -1
- package/dist/tools/audit-trail.d.ts +170 -0
- package/dist/tools/audit-trail.d.ts.map +1 -0
- package/dist/tools/audit-trail.js +109 -0
- package/dist/tools/audit-trail.js.map +1 -0
- package/dist/tools/audit-trail.test.d.ts +5 -0
- package/dist/tools/audit-trail.test.d.ts.map +1 -0
- package/dist/tools/audit-trail.test.js +122 -0
- package/dist/tools/audit-trail.test.js.map +1 -0
- package/dist/tools/context-stats.d.ts +6 -20
- package/dist/tools/context-stats.d.ts.map +1 -1
- package/dist/tools/context-stats.js +106 -88
- package/dist/tools/context-stats.js.map +1 -1
- package/dist/tools/context-stats.test.js +109 -60
- package/dist/tools/context-stats.test.js.map +1 -1
- package/dist/tools/detect-trace-anomalies.d.ts +123 -0
- package/dist/tools/detect-trace-anomalies.d.ts.map +1 -0
- package/dist/tools/detect-trace-anomalies.js +66 -0
- package/dist/tools/detect-trace-anomalies.js.map +1 -0
- package/dist/tools/estimate-cost.d.ts +77 -0
- package/dist/tools/estimate-cost.d.ts.map +1 -0
- package/dist/tools/estimate-cost.js +104 -0
- package/dist/tools/estimate-cost.js.map +1 -0
- package/dist/tools/estimate-cost.test.d.ts +5 -0
- package/dist/tools/estimate-cost.test.d.ts.map +1 -0
- package/dist/tools/estimate-cost.test.js +343 -0
- package/dist/tools/estimate-cost.test.js.map +1 -0
- package/dist/tools/export-base.d.ts +77 -0
- package/dist/tools/export-base.d.ts.map +1 -0
- package/dist/tools/export-base.js +150 -0
- package/dist/tools/export-base.js.map +1 -0
- package/dist/tools/export-base.test.d.ts +18 -0
- package/dist/tools/export-base.test.d.ts.map +1 -0
- package/dist/tools/export-base.test.js +220 -0
- package/dist/tools/export-base.test.js.map +1 -0
- package/dist/tools/export-confident.d.ts +94 -90
- package/dist/tools/export-confident.d.ts.map +1 -1
- package/dist/tools/export-confident.js +17 -115
- package/dist/tools/export-confident.js.map +1 -1
- package/dist/tools/export-confident.test.js +79 -75
- package/dist/tools/export-confident.test.js.map +1 -1
- package/dist/tools/export-datadog.d.ts +77 -116
- package/dist/tools/export-datadog.d.ts.map +1 -1
- package/dist/tools/export-datadog.js +38 -40
- package/dist/tools/export-datadog.js.map +1 -1
- package/dist/tools/export-datadog.test.js +122 -165
- package/dist/tools/export-datadog.test.js.map +1 -1
- package/dist/tools/export-jaeger.d.ts +100 -0
- package/dist/tools/export-jaeger.d.ts.map +1 -0
- package/dist/tools/export-jaeger.js +154 -0
- package/dist/tools/export-jaeger.js.map +1 -0
- package/dist/tools/export-jaeger.test.d.ts +2 -0
- package/dist/tools/export-jaeger.test.d.ts.map +1 -0
- package/dist/tools/export-jaeger.test.js +113 -0
- package/dist/tools/export-jaeger.test.js.map +1 -0
- package/dist/tools/export-langfuse.d.ts +78 -80
- package/dist/tools/export-langfuse.d.ts.map +1 -1
- package/dist/tools/export-langfuse.js +15 -113
- package/dist/tools/export-langfuse.js.map +1 -1
- package/dist/tools/export-langfuse.test.js +70 -81
- package/dist/tools/export-langfuse.test.js.map +1 -1
- package/dist/tools/export-phoenix.d.ts +115 -90
- package/dist/tools/export-phoenix.d.ts.map +1 -1
- package/dist/tools/export-phoenix.js +29 -117
- package/dist/tools/export-phoenix.js.map +1 -1
- package/dist/tools/export-phoenix.test.js +95 -94
- package/dist/tools/export-phoenix.test.js.map +1 -1
- package/dist/tools/get-trace-url.d.ts +2 -10
- package/dist/tools/get-trace-url.d.ts.map +1 -1
- package/dist/tools/get-trace-url.js +5 -8
- package/dist/tools/get-trace-url.js.map +1 -1
- package/dist/tools/get-trace-url.test.js +81 -399
- package/dist/tools/get-trace-url.test.js.map +1 -1
- package/dist/tools/hallucination-detection.d.ts +203 -0
- package/dist/tools/hallucination-detection.d.ts.map +1 -0
- package/dist/tools/hallucination-detection.js +189 -0
- package/dist/tools/hallucination-detection.js.map +1 -0
- package/dist/tools/hallucination-detection.test.d.ts +5 -0
- package/dist/tools/hallucination-detection.test.d.ts.map +1 -0
- package/dist/tools/hallucination-detection.test.js +529 -0
- package/dist/tools/hallucination-detection.test.js.map +1 -0
- package/dist/tools/health-check.d.ts +9 -16
- package/dist/tools/health-check.d.ts.map +1 -1
- package/dist/tools/health-check.js +88 -101
- package/dist/tools/health-check.js.map +1 -1
- package/dist/tools/health-check.test.js +72 -165
- package/dist/tools/health-check.test.js.map +1 -1
- package/dist/tools/index.d.ts +13 -0
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +13 -0
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/ingest-constants.d.ts +8 -0
- package/dist/tools/ingest-constants.d.ts.map +1 -0
- package/dist/tools/ingest-constants.js +8 -0
- package/dist/tools/ingest-constants.js.map +1 -0
- package/dist/tools/ingest-spans.d.ts +45 -0
- package/dist/tools/ingest-spans.d.ts.map +1 -0
- package/dist/tools/ingest-spans.js +129 -0
- package/dist/tools/ingest-spans.js.map +1 -0
- package/dist/tools/ingest-spans.test.d.ts +5 -0
- package/dist/tools/ingest-spans.test.d.ts.map +1 -0
- package/dist/tools/ingest-spans.test.js +250 -0
- package/dist/tools/ingest-spans.test.js.map +1 -0
- package/dist/tools/ingest-traces.d.ts +76 -0
- package/dist/tools/ingest-traces.d.ts.map +1 -0
- package/dist/tools/ingest-traces.js +164 -0
- package/dist/tools/ingest-traces.js.map +1 -0
- package/dist/tools/ingest-traces.test.d.ts +5 -0
- package/dist/tools/ingest-traces.test.d.ts.map +1 -0
- package/dist/tools/ingest-traces.test.js +483 -0
- package/dist/tools/ingest-traces.test.js.map +1 -0
- package/dist/tools/inject-evaluations.d.ts +136 -1197
- package/dist/tools/inject-evaluations.d.ts.map +1 -1
- package/dist/tools/inject-evaluations.js +65 -53
- package/dist/tools/inject-evaluations.js.map +1 -1
- package/dist/tools/inject-evaluations.test.js +83 -71
- package/dist/tools/inject-evaluations.test.js.map +1 -1
- package/dist/tools/manage-datasets.d.ts +850 -0
- package/dist/tools/manage-datasets.d.ts.map +1 -0
- package/dist/tools/manage-datasets.js +139 -0
- package/dist/tools/manage-datasets.js.map +1 -0
- package/dist/tools/manage-datasets.test.d.ts +5 -0
- package/dist/tools/manage-datasets.test.d.ts.map +1 -0
- package/dist/tools/manage-datasets.test.js +430 -0
- package/dist/tools/manage-datasets.test.js.map +1 -0
- package/dist/tools/multi-agent-coordination.d.ts +178 -0
- package/dist/tools/multi-agent-coordination.d.ts.map +1 -0
- package/dist/tools/multi-agent-coordination.js +270 -0
- package/dist/tools/multi-agent-coordination.js.map +1 -0
- package/dist/tools/multi-agent-coordination.test.d.ts +5 -0
- package/dist/tools/multi-agent-coordination.test.d.ts.map +1 -0
- package/dist/tools/multi-agent-coordination.test.js +530 -0
- package/dist/tools/multi-agent-coordination.test.js.map +1 -0
- package/dist/tools/query-evaluations.d.ts +147 -105
- package/dist/tools/query-evaluations.d.ts.map +1 -1
- package/dist/tools/query-evaluations.js +205 -178
- package/dist/tools/query-evaluations.js.map +1 -1
- package/dist/tools/query-evaluations.test.js +386 -391
- package/dist/tools/query-evaluations.test.js.map +1 -1
- package/dist/tools/query-llm-events.d.ts +100 -75
- package/dist/tools/query-llm-events.d.ts.map +1 -1
- package/dist/tools/query-llm-events.js +106 -80
- package/dist/tools/query-llm-events.js.map +1 -1
- package/dist/tools/query-llm-events.test.js +183 -346
- package/dist/tools/query-llm-events.test.js.map +1 -1
- package/dist/tools/query-logs.d.ts +45 -58
- package/dist/tools/query-logs.d.ts.map +1 -1
- package/dist/tools/query-logs.js +54 -101
- package/dist/tools/query-logs.js.map +1 -1
- package/dist/tools/query-logs.test.js +118 -314
- package/dist/tools/query-logs.test.js.map +1 -1
- package/dist/tools/query-metric-histograms.d.ts +112 -0
- package/dist/tools/query-metric-histograms.d.ts.map +1 -0
- package/dist/tools/query-metric-histograms.js +69 -0
- package/dist/tools/query-metric-histograms.js.map +1 -0
- package/dist/tools/query-metric-histograms.test.d.ts +5 -0
- package/dist/tools/query-metric-histograms.test.d.ts.map +1 -0
- package/dist/tools/query-metric-histograms.test.js +209 -0
- package/dist/tools/query-metric-histograms.test.js.map +1 -0
- package/dist/tools/query-metrics.d.ts +159 -60
- package/dist/tools/query-metrics.d.ts.map +1 -1
- package/dist/tools/query-metrics.js +133 -111
- package/dist/tools/query-metrics.js.map +1 -1
- package/dist/tools/query-metrics.test.js +314 -389
- package/dist/tools/query-metrics.test.js.map +1 -1
- package/dist/tools/query-regressions.d.ts +76 -0
- package/dist/tools/query-regressions.d.ts.map +1 -0
- package/dist/tools/query-regressions.js +122 -0
- package/dist/tools/query-regressions.js.map +1 -0
- package/dist/tools/query-regressions.test.d.ts +8 -0
- package/dist/tools/query-regressions.test.d.ts.map +1 -0
- package/dist/tools/query-regressions.test.js +129 -0
- package/dist/tools/query-regressions.test.js.map +1 -0
- package/dist/tools/query-traces.d.ts +103 -71
- package/dist/tools/query-traces.d.ts.map +1 -1
- package/dist/tools/query-traces.js +75 -106
- package/dist/tools/query-traces.js.map +1 -1
- package/dist/tools/query-traces.test.js +140 -846
- package/dist/tools/query-traces.test.js.map +1 -1
- package/dist/tools/query-verifications.d.ts +55 -43
- package/dist/tools/query-verifications.d.ts.map +1 -1
- package/dist/tools/query-verifications.js +47 -46
- package/dist/tools/query-verifications.js.map +1 -1
- package/dist/tools/query-verifications.test.js +42 -35
- package/dist/tools/query-verifications.test.js.map +1 -1
- package/dist/tools/routing-telemetry.d.ts +168 -0
- package/dist/tools/routing-telemetry.d.ts.map +1 -0
- package/dist/tools/routing-telemetry.js +267 -0
- package/dist/tools/routing-telemetry.js.map +1 -0
- package/dist/tools/routing-telemetry.test.d.ts +5 -0
- package/dist/tools/routing-telemetry.test.d.ts.map +1 -0
- package/dist/tools/routing-telemetry.test.js +747 -0
- package/dist/tools/routing-telemetry.test.js.map +1 -0
- package/dist/tools/setup-claudeignore.d.ts +4 -32
- package/dist/tools/setup-claudeignore.d.ts.map +1 -1
- package/dist/tools/setup-claudeignore.js +18 -22
- package/dist/tools/setup-claudeignore.js.map +1 -1
- package/dist/tools/setup-claudeignore.test.js +50 -49
- package/dist/tools/setup-claudeignore.test.js.map +1 -1
- package/dist/tools/token-budget.d.ts +170 -0
- package/dist/tools/token-budget.d.ts.map +1 -0
- package/dist/tools/token-budget.js +219 -0
- package/dist/tools/token-budget.js.map +1 -0
- package/dist/tools/token-budget.test.d.ts +5 -0
- package/dist/tools/token-budget.test.d.ts.map +1 -0
- package/dist/tools/token-budget.test.js +293 -0
- package/dist/tools/token-budget.test.js.map +1 -0
- package/package.json +72 -10
- package/dist/backends/local-jsonl.test.d.ts +0 -2
- package/dist/backends/local-jsonl.test.d.ts.map +0 -1
- package/dist/backends/local-jsonl.test.js +0 -4651
- package/dist/backends/local-jsonl.test.js.map +0 -1
- package/dist/backends/signoz-api-circuit-breaker.test.d.ts +0 -6
- package/dist/backends/signoz-api-circuit-breaker.test.d.ts.map +0 -1
- package/dist/backends/signoz-api-circuit-breaker.test.js +0 -548
- package/dist/backends/signoz-api-circuit-breaker.test.js.map +0 -1
- package/dist/backends/signoz-api-rate-limiter.test.d.ts +0 -6
- package/dist/backends/signoz-api-rate-limiter.test.d.ts.map +0 -1
- package/dist/backends/signoz-api-rate-limiter.test.js +0 -390
- package/dist/backends/signoz-api-rate-limiter.test.js.map +0 -1
- package/dist/backends/signoz-api-ssrf.test.d.ts +0 -6
- package/dist/backends/signoz-api-ssrf.test.d.ts.map +0 -1
- package/dist/backends/signoz-api-ssrf.test.js +0 -216
- package/dist/backends/signoz-api-ssrf.test.js.map +0 -1
- package/dist/backends/signoz-api-test-helpers.d.ts +0 -80
- package/dist/backends/signoz-api-test-helpers.d.ts.map +0 -1
- package/dist/backends/signoz-api-test-helpers.js +0 -79
- package/dist/backends/signoz-api-test-helpers.js.map +0 -1
- package/dist/backends/signoz-api.d.ts +0 -109
- package/dist/backends/signoz-api.d.ts.map +0 -1
- package/dist/backends/signoz-api.integration.test.d.ts +0 -8
- package/dist/backends/signoz-api.integration.test.d.ts.map +0 -1
- package/dist/backends/signoz-api.integration.test.js +0 -137
- package/dist/backends/signoz-api.integration.test.js.map +0 -1
- package/dist/backends/signoz-api.js +0 -1132
- package/dist/backends/signoz-api.js.map +0 -1
- package/dist/backends/signoz-api.test.d.ts +0 -11
- package/dist/backends/signoz-api.test.d.ts.map +0 -1
- package/dist/backends/signoz-api.test.js +0 -832
- package/dist/backends/signoz-api.test.js.map +0 -1
- package/dist/lib/agent-as-judge.d.ts +0 -388
- package/dist/lib/agent-as-judge.d.ts.map +0 -1
- package/dist/lib/agent-as-judge.js +0 -740
- package/dist/lib/agent-as-judge.js.map +0 -1
- package/dist/lib/agent-as-judge.test.d.ts.map +0 -1
- package/dist/lib/agent-as-judge.test.js +0 -816
- package/dist/lib/agent-as-judge.test.js.map +0 -1
- package/dist/lib/cache.d.ts +0 -90
- package/dist/lib/cache.d.ts.map +0 -1
- package/dist/lib/cache.js +0 -133
- package/dist/lib/cache.js.map +0 -1
- package/dist/lib/cache.test.d.ts.map +0 -1
- package/dist/lib/cache.test.js +0 -105
- package/dist/lib/cache.test.js.map +0 -1
- package/dist/lib/circuit-breaker.d.ts +0 -101
- package/dist/lib/circuit-breaker.d.ts.map +0 -1
- package/dist/lib/circuit-breaker.js +0 -158
- package/dist/lib/circuit-breaker.js.map +0 -1
- package/dist/lib/circuit-breaker.test.d.ts.map +0 -1
- package/dist/lib/circuit-breaker.test.js +0 -263
- package/dist/lib/circuit-breaker.test.js.map +0 -1
- package/dist/lib/confident-export.d.ts +0 -101
- package/dist/lib/confident-export.d.ts.map +0 -1
- package/dist/lib/confident-export.js +0 -393
- package/dist/lib/confident-export.js.map +0 -1
- package/dist/lib/confident-export.test.d.ts.map +0 -1
- package/dist/lib/confident-export.test.js +0 -835
- package/dist/lib/confident-export.test.js.map +0 -1
- package/dist/lib/constants-symlink.test.d.ts.map +0 -1
- package/dist/lib/constants-symlink.test.js +0 -357
- package/dist/lib/constants-symlink.test.js.map +0 -1
- package/dist/lib/constants.d.ts +0 -183
- package/dist/lib/constants.d.ts.map +0 -1
- package/dist/lib/constants.js +0 -453
- package/dist/lib/constants.js.map +0 -1
- package/dist/lib/constants.test.d.ts.map +0 -1
- package/dist/lib/constants.test.js +0 -717
- package/dist/lib/constants.test.js.map +0 -1
- package/dist/lib/datadog-export.d.ts +0 -156
- package/dist/lib/datadog-export.d.ts.map +0 -1
- package/dist/lib/datadog-export.js +0 -464
- package/dist/lib/datadog-export.js.map +0 -1
- package/dist/lib/datadog-export.test.d.ts +0 -14
- package/dist/lib/datadog-export.test.d.ts.map +0 -1
- package/dist/lib/datadog-export.test.js +0 -890
- package/dist/lib/datadog-export.test.js.map +0 -1
- package/dist/lib/edge-cases.test.d.ts.map +0 -1
- package/dist/lib/edge-cases.test.js +0 -634
- package/dist/lib/edge-cases.test.js.map +0 -1
- package/dist/lib/error-sanitizer.d.ts +0 -57
- package/dist/lib/error-sanitizer.d.ts.map +0 -1
- package/dist/lib/error-sanitizer.js +0 -233
- package/dist/lib/error-sanitizer.js.map +0 -1
- package/dist/lib/error-sanitizer.test.d.ts.map +0 -1
- package/dist/lib/error-sanitizer.test.js +0 -528
- package/dist/lib/error-sanitizer.test.js.map +0 -1
- package/dist/lib/error-types.d.ts +0 -54
- package/dist/lib/error-types.d.ts.map +0 -1
- package/dist/lib/error-types.js +0 -154
- package/dist/lib/error-types.js.map +0 -1
- package/dist/lib/error-types.test.d.ts.map +0 -1
- package/dist/lib/error-types.test.js +0 -196
- package/dist/lib/error-types.test.js.map +0 -1
- package/dist/lib/evaluation-hooks.d.ts +0 -49
- package/dist/lib/evaluation-hooks.d.ts.map +0 -1
- package/dist/lib/evaluation-hooks.js +0 -488
- package/dist/lib/evaluation-hooks.js.map +0 -1
- package/dist/lib/evaluation-hooks.test.d.ts.map +0 -1
- package/dist/lib/evaluation-hooks.test.js +0 -624
- package/dist/lib/evaluation-hooks.test.js.map +0 -1
- package/dist/lib/export-utils.d.ts +0 -99
- package/dist/lib/export-utils.d.ts.map +0 -1
- package/dist/lib/export-utils.js +0 -238
- package/dist/lib/export-utils.js.map +0 -1
- package/dist/lib/export-utils.test.d.ts.map +0 -1
- package/dist/lib/export-utils.test.js +0 -193
- package/dist/lib/export-utils.test.js.map +0 -1
- package/dist/lib/file-utils.d.ts +0 -320
- package/dist/lib/file-utils.d.ts.map +0 -1
- package/dist/lib/file-utils.js +0 -816
- package/dist/lib/file-utils.js.map +0 -1
- package/dist/lib/file-utils.test.d.ts.map +0 -1
- package/dist/lib/file-utils.test.js +0 -1333
- package/dist/lib/file-utils.test.js.map +0 -1
- package/dist/lib/histogram.d.ts +0 -119
- package/dist/lib/histogram.d.ts.map +0 -1
- package/dist/lib/histogram.js +0 -202
- package/dist/lib/histogram.js.map +0 -1
- package/dist/lib/histogram.test.d.ts.map +0 -1
- package/dist/lib/histogram.test.js +0 -381
- package/dist/lib/histogram.test.js.map +0 -1
- package/dist/lib/indexer.d.ts +0 -96
- package/dist/lib/indexer.d.ts.map +0 -1
- package/dist/lib/indexer.js +0 -353
- package/dist/lib/indexer.js.map +0 -1
- package/dist/lib/indexer.test.d.ts.map +0 -1
- package/dist/lib/indexer.test.js +0 -696
- package/dist/lib/indexer.test.js.map +0 -1
- package/dist/lib/input-validator.d.ts +0 -115
- package/dist/lib/input-validator.d.ts.map +0 -1
- package/dist/lib/input-validator.fuzz.test.d.ts.map +0 -1
- package/dist/lib/input-validator.fuzz.test.js +0 -290
- package/dist/lib/input-validator.fuzz.test.js.map +0 -1
- package/dist/lib/input-validator.js +0 -304
- package/dist/lib/input-validator.js.map +0 -1
- package/dist/lib/input-validator.test.d.ts.map +0 -1
- package/dist/lib/input-validator.test.js +0 -415
- package/dist/lib/input-validator.test.js.map +0 -1
- package/dist/lib/instrumentation.d.ts +0 -153
- package/dist/lib/instrumentation.d.ts.map +0 -1
- package/dist/lib/instrumentation.integration.test.d.ts.map +0 -1
- package/dist/lib/instrumentation.integration.test.js +0 -589
- package/dist/lib/instrumentation.integration.test.js.map +0 -1
- package/dist/lib/instrumentation.js +0 -520
- package/dist/lib/instrumentation.js.map +0 -1
- package/dist/lib/instrumentation.test.d.ts.map +0 -1
- package/dist/lib/instrumentation.test.js +0 -821
- package/dist/lib/instrumentation.test.js.map +0 -1
- package/dist/lib/langfuse-export.d.ts +0 -125
- package/dist/lib/langfuse-export.d.ts.map +0 -1
- package/dist/lib/langfuse-export.js +0 -367
- package/dist/lib/langfuse-export.js.map +0 -1
- package/dist/lib/langfuse-export.test.d.ts.map +0 -1
- package/dist/lib/langfuse-export.test.js +0 -1007
- package/dist/lib/langfuse-export.test.js.map +0 -1
- package/dist/lib/llm-as-judge.d.ts +0 -657
- package/dist/lib/llm-as-judge.d.ts.map +0 -1
- package/dist/lib/llm-as-judge.js +0 -1397
- package/dist/lib/llm-as-judge.js.map +0 -1
- package/dist/lib/llm-as-judge.test.d.ts.map +0 -1
- package/dist/lib/llm-as-judge.test.js +0 -2409
- package/dist/lib/llm-as-judge.test.js.map +0 -1
- package/dist/lib/logger.d.ts +0 -46
- package/dist/lib/logger.d.ts.map +0 -1
- package/dist/lib/logger.js +0 -81
- package/dist/lib/logger.js.map +0 -1
- package/dist/lib/logger.test.d.ts.map +0 -1
- package/dist/lib/logger.test.js.map +0 -1
- package/dist/lib/metrics.d.ts +0 -62
- package/dist/lib/metrics.d.ts.map +0 -1
- package/dist/lib/metrics.js +0 -166
- package/dist/lib/metrics.js.map +0 -1
- package/dist/lib/metrics.test.d.ts.map +0 -1
- package/dist/lib/metrics.test.js +0 -189
- package/dist/lib/metrics.test.js.map +0 -1
- package/dist/lib/otlp-export.d.ts +0 -178
- package/dist/lib/otlp-export.d.ts.map +0 -1
- package/dist/lib/otlp-export.js +0 -382
- package/dist/lib/otlp-export.js.map +0 -1
- package/dist/lib/parse-stats.d.ts.map +0 -1
- package/dist/lib/parse-stats.js +0 -206
- package/dist/lib/parse-stats.js.map +0 -1
- package/dist/lib/parse-stats.test.d.ts.map +0 -1
- package/dist/lib/parse-stats.test.js +0 -283
- package/dist/lib/parse-stats.test.js.map +0 -1
- package/dist/lib/phoenix-export.d.ts +0 -109
- package/dist/lib/phoenix-export.d.ts.map +0 -1
- package/dist/lib/phoenix-export.js +0 -429
- package/dist/lib/phoenix-export.js.map +0 -1
- package/dist/lib/phoenix-export.test.d.ts.map +0 -1
- package/dist/lib/phoenix-export.test.js +0 -725
- package/dist/lib/phoenix-export.test.js.map +0 -1
- package/dist/lib/query-sanitizer.d.ts.map +0 -1
- package/dist/lib/query-sanitizer.js +0 -261
- package/dist/lib/query-sanitizer.js.map +0 -1
- package/dist/lib/query-sanitizer.test.d.ts.map +0 -1
- package/dist/lib/query-sanitizer.test.js +0 -400
- package/dist/lib/query-sanitizer.test.js.map +0 -1
- package/dist/lib/server-utils.d.ts +0 -93
- package/dist/lib/server-utils.d.ts.map +0 -1
- package/dist/lib/server-utils.js +0 -181
- package/dist/lib/server-utils.js.map +0 -1
- package/dist/lib/shared-schemas.d.ts +0 -87
- package/dist/lib/shared-schemas.d.ts.map +0 -1
- package/dist/lib/shared-schemas.js +0 -87
- package/dist/lib/shared-schemas.js.map +0 -1
- package/dist/lib/shared-schemas.test.d.ts.map +0 -1
- package/dist/lib/shared-schemas.test.js +0 -106
- package/dist/lib/shared-schemas.test.js.map +0 -1
- package/dist/lib/toon-encoder.d.ts +0 -26
- package/dist/lib/toon-encoder.d.ts.map +0 -1
- package/dist/lib/toon-encoder.js +0 -61
- package/dist/lib/toon-encoder.js.map +0 -1
- package/dist/lib/toon-encoder.test.d.ts.map +0 -1
- package/dist/lib/toon-encoder.test.js +0 -85
- package/dist/lib/toon-encoder.test.js.map +0 -1
- package/dist/lib/verification-events.d.ts +0 -100
- package/dist/lib/verification-events.d.ts.map +0 -1
- package/dist/lib/verification-events.js +0 -162
- package/dist/lib/verification-events.js.map +0 -1
- package/dist/lib/verification-events.test.d.ts.map +0 -1
- package/dist/lib/verification-events.test.js +0 -193
- package/dist/lib/verification-events.test.js.map +0 -1
- package/dist/tools/signoz.integration.test.d.ts +0 -8
- package/dist/tools/signoz.integration.test.d.ts.map +0 -1
- package/dist/tools/signoz.integration.test.js +0 -141
- package/dist/tools/signoz.integration.test.js.map +0 -1
- package/dist/types/evaluation-hooks.d.ts +0 -176
- package/dist/types/evaluation-hooks.d.ts.map +0 -1
- package/dist/types/evaluation-hooks.js +0 -49
- package/dist/types/evaluation-hooks.js.map +0 -1
- /package/dist/lib/{agent-as-judge.test.d.ts → agent-judge/agent-as-judge.test.d.ts} +0 -0
- /package/dist/lib/{verification-events.test.d.ts → audit/verification-events.test.d.ts} +0 -0
- /package/dist/lib/{constants-symlink.test.d.ts → core/constants-symlink.test.d.ts} +0 -0
- /package/dist/lib/{constants.test.d.ts → core/constants.test.d.ts} +0 -0
- /package/dist/lib/{edge-cases.test.d.ts → core/edge-cases.test.d.ts} +0 -0
- /package/dist/lib/{file-utils.test.d.ts → core/file-utils.test.d.ts} +0 -0
- /package/dist/lib/{input-validator.fuzz.test.d.ts → core/input-validator.fuzz.test.d.ts} +0 -0
- /package/dist/lib/{input-validator.test.d.ts → core/input-validator.test.d.ts} +0 -0
- /package/dist/lib/{logger.test.d.ts → core/logger.test.d.ts} +0 -0
- /package/dist/lib/{logger.test.js → core/logger.test.js} +0 -0
- /package/dist/lib/{shared-schemas.test.d.ts → core/shared-schemas.test.d.ts} +0 -0
- /package/dist/lib/{error-sanitizer.test.d.ts → errors/error-sanitizer.test.d.ts} +0 -0
- /package/dist/lib/{error-types.test.d.ts → errors/error-types.test.d.ts} +0 -0
- /package/dist/lib/{query-sanitizer.d.ts → errors/query-sanitizer.d.ts} +0 -0
- /package/dist/lib/{query-sanitizer.test.d.ts → errors/query-sanitizer.test.d.ts} +0 -0
- /package/dist/lib/{confident-export.test.d.ts → exports/confident-export.test.d.ts} +0 -0
- /package/dist/lib/{export-utils.test.d.ts → exports/export-utils.test.d.ts} +0 -0
- /package/dist/lib/{langfuse-export.test.d.ts → exports/langfuse-export.test.d.ts} +0 -0
- /package/dist/lib/{phoenix-export.test.d.ts → exports/phoenix-export.test.d.ts} +0 -0
- /package/dist/lib/{evaluation-hooks.test.d.ts → judge/evaluation-hooks.test.d.ts} +0 -0
- /package/dist/lib/{llm-as-judge.test.d.ts → judge/llm-as-judge.test.d.ts} +0 -0
- /package/dist/lib/{histogram.test.d.ts → observability/histogram.test.d.ts} +0 -0
- /package/dist/lib/{indexer.test.d.ts → observability/indexer.test.d.ts} +0 -0
- /package/dist/lib/{instrumentation.integration.test.d.ts → observability/instrumentation.integration.test.d.ts} +0 -0
- /package/dist/lib/{instrumentation.test.d.ts → observability/instrumentation.test.d.ts} +0 -0
- /package/dist/lib/{metrics.test.d.ts → observability/metrics.test.d.ts} +0 -0
- /package/dist/lib/{parse-stats.d.ts → observability/parse-stats.d.ts} +0 -0
- /package/dist/lib/{parse-stats.test.d.ts → observability/parse-stats.test.d.ts} +0 -0
- /package/dist/lib/{cache.test.d.ts → resilience/cache.test.d.ts} +0 -0
- /package/dist/lib/{circuit-breaker.test.d.ts → resilience/circuit-breaker.test.d.ts} +0 -0
- /package/dist/lib/{toon-encoder.test.d.ts → resilience/toon-encoder.test.d.ts} +0 -0
|
@@ -0,0 +1,2908 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for Feature Engineering (Design Doc Section 16)
|
|
3
|
+
*/
|
|
4
|
+
import { describe, it, beforeEach, afterEach } from 'vitest';
|
|
5
|
+
import * as assert from 'node:assert';
|
|
6
|
+
import { mkdtempSync, rmSync, writeFileSync } from 'fs';
|
|
7
|
+
import { join } from 'path';
|
|
8
|
+
import { tmpdir } from 'os';
|
|
9
|
+
import { scoreColorBand, adaptiveScoreColorBand, empiricalCDF, inferScoreDirection, labelToOrdinal, ordinalToCategory, computeCQI, computeCQISensitivity, computeMetricDynamics, computeCoverageWeightedConfidence, computeNormalizedEntropy, computeDegradationSignal, computeEWMA, computeMAD, detectEWMADrift, VARIANCE_INCREASE_THRESHOLD, computePearsonR, computeSpearmanR, computeCorrelationMatrix, computeGrangerCausality, pearsonPValue, benjaminiHochberg, injectDegradationScenario, METRIC_SCALE_STRATEGY, computePercentileDistribution, computePercentileRank, StreamingPercentile, computeDerivedFeatures, computeStdDev, computeRollingDegradationSignals, loadDegradationState, saveDegradationState, computePSI, computeCalibrationDistributions, loadCalibrationState, saveCalibrationState, shouldRecalibrate, MIN_QUANTILE_SAMPLE_SIZE, computeAHPWeights, computeCRITICWeights, computeHybridCQIWeights, tunedCQIWeightsSchema, computeTaPR, backtestDegradationConfig, sweepDegradationParams, sweepWithCrossValidation, BACKTEST_SWEEP, CURRENT_PRODUCTION_CONFIG, } from './quality-feature-engineering.js';
|
|
10
|
+
import { TEST_CORE_METRIC_COUNT, TEST_DECIMAL_EPSILON, TEST_LABEL_PASS_ORDINAL, TEST_SCORE_ADEQUATE, TEST_SCORE_BASELINE, TEST_SCORE_BORDERLINE, TEST_SCORE_EXCELLENT, TEST_SCORE_GOOD, TEST_SCORE_HIGH, TEST_SCORE_LOW, TEST_SCORE_MID, TEST_SCORE_POOR, TEST_SCORE_PASSING, TEST_SCORE_STRONG, TEST_SCORE_VERY_LOW, TEST_SCORE_WARNING, } from './quality-test-constants.js';
|
|
11
|
+
import { SCORE_BAND_ADEQUATE, MAD_CONSISTENCY_FACTOR, SCORE_BAND_POOR, PSI_RECALIBRATION_THRESHOLD, LATENCY_SKEW_SIGNAL_THRESHOLD, } from './quality-constants.js';
|
|
12
|
+
import { COUNT_EIGHT, COUNT_FIFTEEN, COUNT_FIFTY, COUNT_FIVE, COUNT_FORTY, COUNT_FOUR, COUNT_FOURTEEN, COUNT_HUNDRED, COUNT_NINE, COUNT_SEVEN, COUNT_SIX, COUNT_SIXTY, COUNT_TEN, COUNT_THREE, COUNT_THIRTY, COUNT_THOUSAND, COUNT_TWENTY, COUNT_TWENTY_FIVE, COUNT_TWO, DELTA, } from '../../test-helpers/test-constants.js';
|
|
13
|
+
import { requireDefined } from '../../test-helpers/assertions.js';
|
|
14
|
+
const TEST_MINIMIZE_GOOD_INPUT = 0.15;
|
|
15
|
+
const TEST_MINIMIZE_ADEQUATE_INPUT = 0.35;
|
|
16
|
+
const TEST_MINIMIZE_POOR_INPUT = 0.55;
|
|
17
|
+
const TEST_MAXIMIZE_FAILING_INPUT = 0.39;
|
|
18
|
+
const TEST_CDF_ABOVE_P90_INPUT = 0.98;
|
|
19
|
+
const TEST_CDF_INTERPOLATION_INPUT = 0.75;
|
|
20
|
+
const TEST_CDF_INTERPOLATION_MIN = 0.25;
|
|
21
|
+
const TEST_VERY_LOW_HALLUCINATION = 0.001;
|
|
22
|
+
const TEST_CQI_WEIGHT_RELEVANCE = 0.25;
|
|
23
|
+
const TEST_CQI_WEIGHT_COHERENCE = 0.10;
|
|
24
|
+
const TEST_CQI_WEIGHT_SUM = 0.35;
|
|
25
|
+
const TEST_TASK_COMPLETION_STRONG_SCORE = 0.88;
|
|
26
|
+
const TEST_CQI_CONTRIBUTION_TOLERANCE = 0.02;
|
|
27
|
+
const TEST_CQI_CUSTOM_DELTA = 0.10;
|
|
28
|
+
const TEST_MIN_PERIOD_HOURS_BELOW_THRESHOLD = 0.001;
|
|
29
|
+
const TEST_SINGLE_TREND_CONFIDENCE = 0.275;
|
|
30
|
+
const TEST_DUAL_TREND_CONFIDENCE = 0.35;
|
|
31
|
+
const TEST_ENTROPY_DOMINANT_PRIMARY = 70;
|
|
32
|
+
const TEST_ENTROPY_NEAR_UNIFORM_MIN = 0.99;
|
|
33
|
+
const TEST_CONCENTRATED_COVERAGE_PRIMARY = 95;
|
|
34
|
+
const TEST_STABLE_SERIES_HIGH = 0.81;
|
|
35
|
+
const TEST_STABLE_SERIES_LOW = 0.79;
|
|
36
|
+
const TEST_STABLE_SERIES_HIGHER = 0.82;
|
|
37
|
+
const TEST_VARIANCE_STABLE_INPUT = 0.09;
|
|
38
|
+
const TEST_VARIANCE_INCREASING_INPUT = 0.125;
|
|
39
|
+
const TEST_VARIANCE_DECREASING_INPUT = 0.02;
|
|
40
|
+
const TEST_LATENCY_P95 = 10.0;
|
|
41
|
+
const TEST_LATENCY_P50 = 2.0;
|
|
42
|
+
const TEST_PEARSON_NEAR_PERFECT = 0.999;
|
|
43
|
+
const TEST_PVALUE_STRICT = 0.001;
|
|
44
|
+
const TEST_PEARSON_CASE_ONE_R = 0.6033;
|
|
45
|
+
const TEST_PEARSON_CASE_ONE_P = 0.0649;
|
|
46
|
+
const TEST_PEARSON_CASE_TWO_P = 0.0247;
|
|
47
|
+
const TEST_PEARSON_CASE_THREE_P = 0.1082;
|
|
48
|
+
const TEST_PEARSON_CASE_FOUR_P = 0.0056;
|
|
49
|
+
const TEST_PEARSON_COMPARISON_TOLERANCE = 0.002;
|
|
50
|
+
const TEST_PVALUE_CASE_MID = 0.002;
|
|
51
|
+
const TEST_PVALUE_CASE_HIGH = 0.003;
|
|
52
|
+
const TEST_SEQUENCE_TWELVE = 12;
|
|
53
|
+
const TEST_SEQUENCE_SIXTEEN = 16;
|
|
54
|
+
const TEST_SEQUENCE_EIGHTEEN = 18;
|
|
55
|
+
const TEST_PERCENTILE_DIVISOR_MAX_INDEX = 99;
|
|
56
|
+
const TEST_STREAMING_MONOTONIC_SAMPLE_SIZE = 500;
|
|
57
|
+
const TEST_VARIANCE_SPIKE_MULTIPLIER = 3.0;
|
|
58
|
+
const TEST_ABSOLUTE_EPSILON = 1e-10;
|
|
59
|
+
const TEST_STD_DEV_REFERENCE_HIGH = 2.1381;
|
|
60
|
+
const TEST_STD_DEV_REFERENCE_LOW = 0.7071;
|
|
61
|
+
const TEST_YEAR_2026 = 2026;
|
|
62
|
+
// ============================================================================
|
|
63
|
+
// Helper Factories
|
|
64
|
+
// ============================================================================
|
|
65
|
+
function makeMetricResult(overrides) {
|
|
66
|
+
return {
|
|
67
|
+
displayName: overrides.name,
|
|
68
|
+
values: {
|
|
69
|
+
avg: TEST_SCORE_STRONG,
|
|
70
|
+
p50: 0.84,
|
|
71
|
+
p95: TEST_SCORE_PASSING,
|
|
72
|
+
p99: null,
|
|
73
|
+
min: TEST_SCORE_BASELINE,
|
|
74
|
+
max: TEST_SCORE_EXCELLENT,
|
|
75
|
+
count: COUNT_HUNDRED,
|
|
76
|
+
},
|
|
77
|
+
sampleCount: COUNT_HUNDRED,
|
|
78
|
+
alerts: [],
|
|
79
|
+
status: 'healthy',
|
|
80
|
+
...overrides,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
function makeTrend(overrides) {
|
|
84
|
+
return {
|
|
85
|
+
direction: 'stable',
|
|
86
|
+
delta: 0,
|
|
87
|
+
percentChange: 0,
|
|
88
|
+
previousValue: TEST_SCORE_STRONG,
|
|
89
|
+
currentValue: TEST_SCORE_STRONG,
|
|
90
|
+
aggregation: 'avg',
|
|
91
|
+
...overrides,
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
// ============================================================================
|
|
95
|
+
// Score Color Band Tests
|
|
96
|
+
// ============================================================================
|
|
97
|
+
describe('quality-feature-engineering', () => {
|
|
98
|
+
describe('scoreColorBand', () => {
|
|
99
|
+
it('returns correct bands for maximize direction', () => {
|
|
100
|
+
assert.strictEqual(scoreColorBand(TEST_SCORE_EXCELLENT, 'maximize'), 'excellent');
|
|
101
|
+
assert.strictEqual(scoreColorBand(TEST_SCORE_STRONG, 'maximize'), 'good');
|
|
102
|
+
assert.strictEqual(scoreColorBand(TEST_SCORE_ADEQUATE, 'maximize'), 'adequate');
|
|
103
|
+
assert.strictEqual(scoreColorBand(TEST_SCORE_BORDERLINE, 'maximize'), 'poor');
|
|
104
|
+
assert.strictEqual(scoreColorBand(TEST_SCORE_MID - TEST_SCORE_LOW, 'maximize'), 'failing');
|
|
105
|
+
});
|
|
106
|
+
it('returns inverted bands for minimize direction', () => {
|
|
107
|
+
assert.strictEqual(scoreColorBand(DELTA, 'minimize'), 'excellent');
|
|
108
|
+
assert.strictEqual(scoreColorBand(TEST_MINIMIZE_GOOD_INPUT, 'minimize'), 'good');
|
|
109
|
+
assert.strictEqual(scoreColorBand(TEST_MINIMIZE_ADEQUATE_INPUT, 'minimize'), 'adequate');
|
|
110
|
+
assert.strictEqual(scoreColorBand(TEST_MINIMIZE_POOR_INPUT, 'minimize'), 'poor');
|
|
111
|
+
assert.strictEqual(scoreColorBand(TEST_SCORE_GOOD, 'minimize'), 'failing');
|
|
112
|
+
});
|
|
113
|
+
it('defaults to maximize direction', () => {
|
|
114
|
+
assert.strictEqual(scoreColorBand(TEST_SCORE_EXCELLENT), 'excellent');
|
|
115
|
+
});
|
|
116
|
+
it('handles boundary values (>=)', () => {
|
|
117
|
+
assert.strictEqual(scoreColorBand(TEST_SCORE_HIGH, 'maximize'), 'excellent');
|
|
118
|
+
assert.strictEqual(scoreColorBand(TEST_SCORE_GOOD, 'maximize'), 'good');
|
|
119
|
+
assert.strictEqual(scoreColorBand(SCORE_BAND_ADEQUATE, 'maximize'), 'adequate');
|
|
120
|
+
assert.strictEqual(scoreColorBand(SCORE_BAND_POOR, 'maximize'), 'poor');
|
|
121
|
+
assert.strictEqual(scoreColorBand(TEST_MAXIMIZE_FAILING_INPUT, 'maximize'), 'failing');
|
|
122
|
+
});
|
|
123
|
+
});
|
|
124
|
+
describe('empiricalCDF', () => {
|
|
125
|
+
const dist = {
|
|
126
|
+
p10: TEST_SCORE_BASELINE, p25: TEST_SCORE_PASSING, p50: TEST_SCORE_GOOD, p75: TEST_SCORE_STRONG, p90: TEST_SCORE_EXCELLENT,
|
|
127
|
+
};
|
|
128
|
+
it('returns 0.5 for p50 value', () => {
|
|
129
|
+
assert.strictEqual(empiricalCDF(TEST_SCORE_GOOD, dist), TEST_SCORE_MID);
|
|
130
|
+
});
|
|
131
|
+
it('returns ~0.1 for p10 value', () => {
|
|
132
|
+
assert.ok(Math.abs(empiricalCDF(TEST_SCORE_BASELINE, dist) - TEST_SCORE_WARNING) < TEST_DECIMAL_EPSILON);
|
|
133
|
+
});
|
|
134
|
+
it('returns ~0.9 for p90 value', () => {
|
|
135
|
+
assert.ok(Math.abs(empiricalCDF(TEST_SCORE_EXCELLENT, dist) - TEST_SCORE_HIGH) < TEST_DECIMAL_EPSILON);
|
|
136
|
+
});
|
|
137
|
+
it('returns value < 0.1 for below p10', () => {
|
|
138
|
+
const result = empiricalCDF(TEST_SCORE_LOW, dist);
|
|
139
|
+
assert.ok(result < TEST_SCORE_WARNING, `Expected < ${TEST_SCORE_WARNING}, got ${result}`);
|
|
140
|
+
assert.ok(result >= 0, `Expected >= 0, got ${result}`);
|
|
141
|
+
});
|
|
142
|
+
it('returns value > 0.9 for above p90', () => {
|
|
143
|
+
const result = empiricalCDF(TEST_CDF_ABOVE_P90_INPUT, dist);
|
|
144
|
+
assert.ok(result > TEST_SCORE_HIGH, `Expected > ${TEST_SCORE_HIGH}, got ${result}`);
|
|
145
|
+
assert.ok(result <= 1, `Expected <= 1, got ${result}`);
|
|
146
|
+
});
|
|
147
|
+
it('interpolates between percentile points', () => {
|
|
148
|
+
const result = empiricalCDF(TEST_CDF_INTERPOLATION_INPUT, dist); // Between p25 (0.7) and p50 (0.8)
|
|
149
|
+
assert.ok(result > TEST_CDF_INTERPOLATION_MIN && result < TEST_SCORE_MID, `Expected between 0.25 and 0.5, got ${result}`);
|
|
150
|
+
});
|
|
151
|
+
it('handles negative values by clamping to >= 0', () => {
|
|
152
|
+
const result = empiricalCDF(-TEST_SCORE_WARNING, dist);
|
|
153
|
+
assert.ok(result >= 0, `Expected >= 0, got ${result}`);
|
|
154
|
+
assert.ok(result < TEST_SCORE_WARNING, `Expected < ${TEST_SCORE_WARNING}, got ${result}`);
|
|
155
|
+
});
|
|
156
|
+
it('returns 0.5 for NaN input', () => {
|
|
157
|
+
assert.strictEqual(empiricalCDF(NaN, dist), TEST_SCORE_MID);
|
|
158
|
+
});
|
|
159
|
+
it('returns 0.5 for Infinity input', () => {
|
|
160
|
+
assert.strictEqual(empiricalCDF(Infinity, dist), TEST_SCORE_MID);
|
|
161
|
+
assert.strictEqual(empiricalCDF(-Infinity, dist), TEST_SCORE_MID);
|
|
162
|
+
});
|
|
163
|
+
});
|
|
164
|
+
describe('adaptiveScoreColorBand', () => {
|
|
165
|
+
it('uses uniform strategy for unknown metrics', () => {
|
|
166
|
+
assert.strictEqual(adaptiveScoreColorBand(TEST_SCORE_EXCELLENT, 'unknown_metric'), 'excellent');
|
|
167
|
+
});
|
|
168
|
+
it('uses binary strategy for faithfulness', () => {
|
|
169
|
+
assert.strictEqual(adaptiveScoreColorBand(TEST_CDF_INTERPOLATION_INPUT, 'faithfulness'), 'excellent');
|
|
170
|
+
assert.strictEqual(adaptiveScoreColorBand(TEST_SCORE_ADEQUATE, 'faithfulness'), 'failing');
|
|
171
|
+
});
|
|
172
|
+
it('uses log strategy for hallucination', () => {
|
|
173
|
+
// Very low hallucination (0.001) -> logNorm = -log10(0.001)/3 = 1.0 -> excellent
|
|
174
|
+
assert.strictEqual(adaptiveScoreColorBand(TEST_VERY_LOW_HALLUCINATION, 'hallucination'), 'excellent');
|
|
175
|
+
// Low hallucination (0.01) -> logNorm = -log10(0.01)/3 = 0.667 -> adequate
|
|
176
|
+
assert.strictEqual(adaptiveScoreColorBand(TEST_DECIMAL_EPSILON, 'hallucination'), 'adequate');
|
|
177
|
+
// High hallucination (0.5) -> logNorm = -log10(0.5)/3 = 0.1 -> failing
|
|
178
|
+
assert.strictEqual(adaptiveScoreColorBand(TEST_SCORE_MID, 'hallucination'), 'failing');
|
|
179
|
+
});
|
|
180
|
+
it('uses step strategy for task_completion', () => {
|
|
181
|
+
assert.strictEqual(adaptiveScoreColorBand(TEST_SCORE_HIGH, 'task_completion'), 'excellent');
|
|
182
|
+
assert.strictEqual(adaptiveScoreColorBand(SCORE_BAND_ADEQUATE, 'task_completion'), 'adequate');
|
|
183
|
+
assert.strictEqual(adaptiveScoreColorBand(TEST_SCORE_LOW, 'task_completion'), 'failing');
|
|
184
|
+
});
|
|
185
|
+
it('uses quantile strategy with distribution for relevance', () => {
|
|
186
|
+
const dist = {
|
|
187
|
+
p10: TEST_SCORE_BASELINE,
|
|
188
|
+
p25: TEST_SCORE_PASSING,
|
|
189
|
+
p50: TEST_SCORE_GOOD,
|
|
190
|
+
p75: TEST_SCORE_STRONG,
|
|
191
|
+
p90: TEST_SCORE_EXCELLENT,
|
|
192
|
+
};
|
|
193
|
+
const band = adaptiveScoreColorBand(TEST_SCORE_GOOD, 'relevance', 'maximize', dist);
|
|
194
|
+
// 0.8 is at p50, so rank = 0.5 -> 'poor' in uniform but wait...
|
|
195
|
+
// Actually rank 0.5 maps to: v=0.5, >= 0.4 -> 'poor'
|
|
196
|
+
// This is expected -- in a left-skewed distribution, p50 is not great
|
|
197
|
+
assert.ok(['poor', 'adequate'].includes(band));
|
|
198
|
+
});
|
|
199
|
+
it('falls back to uniform when no distribution for quantile metrics', () => {
|
|
200
|
+
assert.strictEqual(adaptiveScoreColorBand(TEST_SCORE_EXCELLENT, 'relevance'), 'excellent');
|
|
201
|
+
});
|
|
202
|
+
});
|
|
203
|
+
describe('inferScoreDirection', () => {
|
|
204
|
+
it('returns minimize for above threshold direction', () => {
|
|
205
|
+
assert.strictEqual(inferScoreDirection('above'), 'minimize');
|
|
206
|
+
});
|
|
207
|
+
it('returns maximize for below threshold direction', () => {
|
|
208
|
+
assert.strictEqual(inferScoreDirection('below'), 'maximize');
|
|
209
|
+
});
|
|
210
|
+
it('defaults to maximize for undefined', () => {
|
|
211
|
+
assert.strictEqual(inferScoreDirection(undefined), 'maximize');
|
|
212
|
+
});
|
|
213
|
+
});
|
|
214
|
+
// ============================================================================
|
|
215
|
+
// Label Ordinal Encoding Tests
|
|
216
|
+
// ============================================================================
|
|
217
|
+
describe('labelToOrdinal', () => {
|
|
218
|
+
it('maps excellent labels to ordinal 4 (Pass)', () => {
|
|
219
|
+
const result = labelToOrdinal('excellent');
|
|
220
|
+
assert.strictEqual(result.ordinal, TEST_LABEL_PASS_ORDINAL);
|
|
221
|
+
assert.strictEqual(result.category, 'Pass');
|
|
222
|
+
assert.strictEqual(result.mapped, true);
|
|
223
|
+
});
|
|
224
|
+
it('maps good labels to ordinal 3 (Pass)', () => {
|
|
225
|
+
assert.strictEqual(labelToOrdinal('relevant').ordinal, COUNT_THREE);
|
|
226
|
+
assert.strictEqual(labelToOrdinal('good').ordinal, COUNT_THREE);
|
|
227
|
+
assert.strictEqual(labelToOrdinal('pass').ordinal, COUNT_THREE);
|
|
228
|
+
});
|
|
229
|
+
it('maps review labels to ordinal 2 (Review)', () => {
|
|
230
|
+
assert.strictEqual(labelToOrdinal('partial').ordinal, COUNT_TWO);
|
|
231
|
+
assert.strictEqual(labelToOrdinal('borderline').category, 'Review');
|
|
232
|
+
});
|
|
233
|
+
it('maps fail labels to ordinal 1 (Fail)', () => {
|
|
234
|
+
assert.strictEqual(labelToOrdinal('off_topic').ordinal, 1);
|
|
235
|
+
assert.strictEqual(labelToOrdinal('irrelevant').category, 'Fail');
|
|
236
|
+
});
|
|
237
|
+
it('maps critical fail labels to ordinal 0 (Fail)', () => {
|
|
238
|
+
assert.strictEqual(labelToOrdinal('hallucinated').ordinal, 0);
|
|
239
|
+
assert.strictEqual(labelToOrdinal('toxic').category, 'Fail');
|
|
240
|
+
});
|
|
241
|
+
it('normalizes hyphens to underscores', () => {
|
|
242
|
+
const result = labelToOrdinal('highly-relevant');
|
|
243
|
+
assert.strictEqual(result.ordinal, TEST_LABEL_PASS_ORDINAL);
|
|
244
|
+
assert.strictEqual(result.mapped, true);
|
|
245
|
+
});
|
|
246
|
+
it('normalizes case', () => {
|
|
247
|
+
assert.strictEqual(labelToOrdinal('EXCELLENT').ordinal, TEST_LABEL_PASS_ORDINAL);
|
|
248
|
+
assert.strictEqual(labelToOrdinal('Partial').ordinal, COUNT_TWO);
|
|
249
|
+
});
|
|
250
|
+
it('returns ordinal 2 (Review) for unmapped labels with mapped=false', () => {
|
|
251
|
+
const result = labelToOrdinal('custom_label_xyz');
|
|
252
|
+
assert.strictEqual(result.ordinal, COUNT_TWO);
|
|
253
|
+
assert.strictEqual(result.category, 'Review');
|
|
254
|
+
assert.strictEqual(result.mapped, false);
|
|
255
|
+
});
|
|
256
|
+
it('handles whitespace in labels', () => {
|
|
257
|
+
const result = labelToOrdinal(' excellent ');
|
|
258
|
+
assert.strictEqual(result.ordinal, TEST_LABEL_PASS_ORDINAL);
|
|
259
|
+
assert.strictEqual(result.mapped, true);
|
|
260
|
+
});
|
|
261
|
+
});
|
|
262
|
+
describe('ordinalToCategory', () => {
|
|
263
|
+
it('maps ordinals to correct categories', () => {
|
|
264
|
+
assert.strictEqual(ordinalToCategory(TEST_LABEL_PASS_ORDINAL), 'Pass');
|
|
265
|
+
assert.strictEqual(ordinalToCategory(COUNT_THREE), 'Pass');
|
|
266
|
+
assert.strictEqual(ordinalToCategory(COUNT_TWO), 'Review');
|
|
267
|
+
assert.strictEqual(ordinalToCategory(1), 'Fail');
|
|
268
|
+
assert.strictEqual(ordinalToCategory(0), 'Fail');
|
|
269
|
+
});
|
|
270
|
+
});
|
|
271
|
+
// ============================================================================
|
|
272
|
+
// Composite Quality Index Tests
|
|
273
|
+
// ============================================================================
|
|
274
|
+
describe('computeCQI', () => {
|
|
275
|
+
it('computes weighted composite from all 7 metrics', () => {
|
|
276
|
+
const metrics = [
|
|
277
|
+
makeMetricResult({ name: 'relevance', values: { avg: TEST_SCORE_STRONG, p50: null, p95: null, p99: null, min: null, max: null, count: COUNT_HUNDRED } }),
|
|
278
|
+
makeMetricResult({ name: 'faithfulness', values: { avg: TEST_SCORE_HIGH, p50: null, p95: null, p99: null, min: null, max: null, count: COUNT_HUNDRED } }),
|
|
279
|
+
makeMetricResult({
|
|
280
|
+
name: 'hallucination',
|
|
281
|
+
values: { avg: DELTA, p50: null, p95: null, p99: null, min: null, max: null, count: COUNT_HUNDRED },
|
|
282
|
+
alerts: [{ severity: 'warning', message: '', aggregation: 'avg', threshold: TEST_SCORE_WARNING, actualValue: DELTA, direction: 'above' }],
|
|
283
|
+
}),
|
|
284
|
+
makeMetricResult({ name: 'task_completion', values: { avg: TEST_TASK_COMPLETION_STRONG_SCORE, p50: null, p95: null, p99: null, min: null, max: null, count: COUNT_HUNDRED } }),
|
|
285
|
+
makeMetricResult({ name: 'coherence', values: { avg: 0.82, p50: null, p95: null, p99: null, min: null, max: null, count: COUNT_HUNDRED } }),
|
|
286
|
+
makeMetricResult({ name: 'tool_correctness', values: { avg: TEST_SCORE_EXCELLENT, p50: null, p95: null, p99: null, min: null, max: null, count: COUNT_HUNDRED } }),
|
|
287
|
+
makeMetricResult({
|
|
288
|
+
name: 'evaluation_latency',
|
|
289
|
+
values: { avg: 2.5, p50: null, p95: null, p99: null, min: null, max: null, count: COUNT_HUNDRED },
|
|
290
|
+
alerts: [{ severity: 'warning', message: '', aggregation: 'p95', threshold: COUNT_FIVE, actualValue: 2.5, direction: 'above' }],
|
|
291
|
+
}),
|
|
292
|
+
];
|
|
293
|
+
const cqi = computeCQI(metrics);
|
|
294
|
+
assert.ok(cqi !== undefined);
|
|
295
|
+
assert.strictEqual(requireDefined(cqi).featureVersion, '1.0');
|
|
296
|
+
assert.ok(requireDefined(cqi).value >= 0 && requireDefined(cqi).value <= 1);
|
|
297
|
+
assert.strictEqual(requireDefined(cqi).contributions.length, TEST_CORE_METRIC_COUNT);
|
|
298
|
+
// Hallucination should be inverted (0.05 -> 0.95 normalized)
|
|
299
|
+
const hallContrib = requireDefined(cqi).contributions.find(c => c.metric === 'hallucination');
|
|
300
|
+
assert.ok(requireDefined(hallContrib).normalizedScore > TEST_SCORE_HIGH, `Expected hallucination normalized > 0.9, got ${requireDefined(hallContrib).normalizedScore}`);
|
|
301
|
+
});
|
|
302
|
+
it('returns undefined when no metrics have data', () => {
|
|
303
|
+
const metrics = [
|
|
304
|
+
makeMetricResult({ name: 'relevance', values: { avg: null, p50: null, p95: null, p99: null, min: null, max: null, count: 0 } }),
|
|
305
|
+
];
|
|
306
|
+
assert.strictEqual(computeCQI(metrics), undefined);
|
|
307
|
+
});
|
|
308
|
+
it('renormalizes weights when some metrics are missing', () => {
|
|
309
|
+
const metrics = [
|
|
310
|
+
makeMetricResult({ name: 'relevance', values: { avg: TEST_SCORE_HIGH, p50: null, p95: null, p99: null, min: null, max: null, count: COUNT_HUNDRED } }),
|
|
311
|
+
makeMetricResult({ name: 'coherence', values: { avg: TEST_SCORE_GOOD, p50: null, p95: null, p99: null, min: null, max: null, count: COUNT_HUNDRED } }),
|
|
312
|
+
];
|
|
313
|
+
const cqi = computeCQI(metrics);
|
|
314
|
+
assert.ok(cqi !== undefined);
|
|
315
|
+
assert.strictEqual(requireDefined(cqi).contributions.length, COUNT_TWO);
|
|
316
|
+
// With only relevance (0.25 weight) and coherence (0.10 weight),
|
|
317
|
+
// totalWeight = 0.35, value = (0.9*0.25 + 0.8*0.10) / 0.35
|
|
318
|
+
const expected = (TEST_SCORE_HIGH * TEST_CQI_WEIGHT_RELEVANCE + TEST_SCORE_GOOD * TEST_CQI_WEIGHT_COHERENCE) / TEST_CQI_WEIGHT_SUM;
|
|
319
|
+
assert.ok(Math.abs(requireDefined(cqi).value - expected) < TEST_DECIMAL_EPSILON);
|
|
320
|
+
});
|
|
321
|
+
it('accepts custom weights', () => {
|
|
322
|
+
const metrics = [
|
|
323
|
+
makeMetricResult({ name: 'relevance', values: { avg: 1.0, p50: null, p95: null, p99: null, min: null, max: null, count: COUNT_FIFTY } }),
|
|
324
|
+
];
|
|
325
|
+
const cqi = computeCQI(metrics, { relevance: 1.0 });
|
|
326
|
+
assert.ok(cqi !== undefined);
|
|
327
|
+
assert.ok(Math.abs(requireDefined(cqi).value - 1.0) < TEST_DECIMAL_EPSILON);
|
|
328
|
+
});
|
|
329
|
+
it('returns undefined when all weights are zero (ER22)', () => {
|
|
330
|
+
const metrics = [
|
|
331
|
+
makeMetricResult({ name: 'relevance', values: { avg: TEST_SCORE_HIGH, p50: null, p95: null, p99: null, min: null, max: null, count: COUNT_HUNDRED } }),
|
|
332
|
+
];
|
|
333
|
+
assert.strictEqual(computeCQI(metrics, { relevance: 0 }), undefined);
|
|
334
|
+
});
|
|
335
|
+
it('contributions sum to value after renormalization (ER11)', () => {
|
|
336
|
+
const metrics = [
|
|
337
|
+
makeMetricResult({ name: 'relevance', values: { avg: TEST_SCORE_STRONG, p50: null, p95: null, p99: null, min: null, max: null, count: COUNT_HUNDRED } }),
|
|
338
|
+
makeMetricResult({ name: 'coherence', values: { avg: 0.80, p50: null, p95: null, p99: null, min: null, max: null, count: COUNT_HUNDRED } }),
|
|
339
|
+
];
|
|
340
|
+
const cqi = computeCQI(metrics);
|
|
341
|
+
assert.ok(cqi !== undefined);
|
|
342
|
+
const contribSum = requireDefined(cqi).contributions.reduce((s, c) => s + c.contribution, 0);
|
|
343
|
+
assert.ok(Math.abs(contribSum - requireDefined(cqi).value) < TEST_CQI_CONTRIBUTION_TOLERANCE, `contributions sum ${contribSum} should approximate value ${requireDefined(cqi).value}`);
|
|
344
|
+
});
|
|
345
|
+
});
|
|
346
|
+
// ============================================================================
|
|
347
|
+
// CQI Sensitivity Tests
|
|
348
|
+
// ============================================================================
|
|
349
|
+
describe('computeCQISensitivity', () => {
|
|
350
|
+
const baseMetrics = [
|
|
351
|
+
makeMetricResult({ name: 'relevance', values: { avg: 0.90, p50: null, p95: null, p99: null, min: null, max: null, count: COUNT_HUNDRED } }),
|
|
352
|
+
makeMetricResult({ name: 'faithfulness', values: { avg: TEST_SCORE_STRONG, p50: null, p95: null, p99: null, min: null, max: null, count: COUNT_HUNDRED } }),
|
|
353
|
+
makeMetricResult({ name: 'coherence', values: { avg: 0.80, p50: null, p95: null, p99: null, min: null, max: null, count: COUNT_HUNDRED } }),
|
|
354
|
+
];
|
|
355
|
+
it('reports range for each weight when perturbed +/- 0.05', () => {
|
|
356
|
+
const sens = computeCQISensitivity(baseMetrics);
|
|
357
|
+
assert.ok(sens !== undefined);
|
|
358
|
+
assert.ok(requireDefined(sens).baseValue > 0);
|
|
359
|
+
assert.strictEqual(requireDefined(sens).delta, DELTA);
|
|
360
|
+
// Only metrics present in both data and weights should appear
|
|
361
|
+
const metricNames = requireDefined(sens).entries.map(e => e.metric);
|
|
362
|
+
assert.ok(metricNames.includes('relevance'));
|
|
363
|
+
assert.ok(metricNames.includes('coherence'));
|
|
364
|
+
});
|
|
365
|
+
it('entries are sorted by range descending', () => {
|
|
366
|
+
const sens = computeCQISensitivity(baseMetrics);
|
|
367
|
+
assert.ok(sens !== undefined);
|
|
368
|
+
for (let i = 1; i < requireDefined(sens).entries.length; i++) {
|
|
369
|
+
assert.ok(requireDefined(sens).entries[i - 1].range >= requireDefined(sens).entries[i].range, `Entry ${i - 1} range (${requireDefined(sens).entries[i - 1].range}) should be >= entry ${i} range (${requireDefined(sens).entries[i].range})`);
|
|
370
|
+
}
|
|
371
|
+
});
|
|
372
|
+
it('returns undefined when no metrics have data', () => {
|
|
373
|
+
const empty = [
|
|
374
|
+
makeMetricResult({ name: 'relevance', values: { avg: null, p50: null, p95: null, p99: null, min: null, max: null, count: 0 } }),
|
|
375
|
+
];
|
|
376
|
+
assert.strictEqual(computeCQISensitivity(empty), undefined);
|
|
377
|
+
});
|
|
378
|
+
it('accepts custom delta', () => {
|
|
379
|
+
const sens = computeCQISensitivity(baseMetrics, undefined, TEST_CQI_CUSTOM_DELTA);
|
|
380
|
+
assert.ok(sens !== undefined);
|
|
381
|
+
assert.strictEqual(requireDefined(sens).delta, TEST_CQI_CUSTOM_DELTA);
|
|
382
|
+
// Larger delta should produce wider ranges than default
|
|
383
|
+
const sensSmall = computeCQISensitivity(baseMetrics, undefined, TEST_DECIMAL_EPSILON);
|
|
384
|
+
assert.ok(sensSmall !== undefined);
|
|
385
|
+
const maxRangeLarge = Math.max(...requireDefined(sens).entries.map(e => e.range));
|
|
386
|
+
const maxRangeSmall = Math.max(...requireDefined(sensSmall).entries.map(e => e.range));
|
|
387
|
+
assert.ok(maxRangeLarge >= maxRangeSmall);
|
|
388
|
+
});
|
|
389
|
+
it('low <= baseValue <= high for each entry', () => {
|
|
390
|
+
const sens = computeCQISensitivity(baseMetrics);
|
|
391
|
+
assert.ok(sens !== undefined);
|
|
392
|
+
for (const entry of requireDefined(sens).entries) {
|
|
393
|
+
assert.ok(entry.low <= requireDefined(sens).baseValue + TEST_DECIMAL_EPSILON, `${entry.metric}: low (${entry.low}) should be <= baseValue (${requireDefined(sens).baseValue})`);
|
|
394
|
+
assert.ok(entry.high >= requireDefined(sens).baseValue - TEST_DECIMAL_EPSILON, `${entry.metric}: high (${entry.high}) should be >= baseValue (${requireDefined(sens).baseValue})`);
|
|
395
|
+
}
|
|
396
|
+
});
|
|
397
|
+
it('filters out metrics with zero or negative weights (pipe implementation)', () => {
|
|
398
|
+
const customWeights = {
|
|
399
|
+
relevance: 0.3,
|
|
400
|
+
faithfulness: 0, // filtered out
|
|
401
|
+
coherence: 0.5,
|
|
402
|
+
completeness: -0.05, // filtered out
|
|
403
|
+
};
|
|
404
|
+
const sens = computeCQISensitivity(baseMetrics, customWeights);
|
|
405
|
+
assert.ok(sens !== undefined);
|
|
406
|
+
const metricNames = requireDefined(sens).entries.map(e => e.metric);
|
|
407
|
+
assert.ok(metricNames.includes('relevance'), 'positive weights should be included');
|
|
408
|
+
assert.ok(metricNames.includes('coherence'), 'positive weights should be included');
|
|
409
|
+
assert.strictEqual(metricNames.includes('faithfulness'), false, 'zero weights should be filtered');
|
|
410
|
+
assert.strictEqual(metricNames.includes('completeness'), false, 'negative weights should be filtered');
|
|
411
|
+
});
|
|
412
|
+
it('maintains consistent ordering across multiple calls (sort stability)', () => {
|
|
413
|
+
const sens1 = computeCQISensitivity(baseMetrics);
|
|
414
|
+
const sens2 = computeCQISensitivity(baseMetrics);
|
|
415
|
+
assert.ok(sens1 !== undefined && sens2 !== undefined);
|
|
416
|
+
const entries1 = requireDefined(sens1).entries;
|
|
417
|
+
const entries2 = requireDefined(sens2).entries;
|
|
418
|
+
assert.strictEqual(entries1.length, entries2.length, 'entry counts should match');
|
|
419
|
+
for (let i = 0; i < entries1.length; i++) {
|
|
420
|
+
assert.strictEqual(entries1[i].metric, entries2[i].metric, `Entry ${i} metric should be consistent across calls`);
|
|
421
|
+
assert.strictEqual(entries1[i].range, entries2[i].range, `Entry ${i} range should be consistent across calls`);
|
|
422
|
+
}
|
|
423
|
+
});
|
|
424
|
+
it('range is always non-negative (absolute value in pipe)', () => {
|
|
425
|
+
const sens = computeCQISensitivity(baseMetrics);
|
|
426
|
+
assert.ok(sens !== undefined);
|
|
427
|
+
for (const entry of requireDefined(sens).entries) {
|
|
428
|
+
assert.ok(entry.range >= 0, `${entry.metric}: range (${entry.range}) should be non-negative`);
|
|
429
|
+
}
|
|
430
|
+
});
|
|
431
|
+
});
|
|
432
|
+
// ============================================================================
|
|
433
|
+
// Metric Dynamics Tests
|
|
434
|
+
// ============================================================================
|
|
435
|
+
describe('computeMetricDynamics', () => {
|
|
436
|
+
it('computes velocity from trend delta and period', () => {
|
|
437
|
+
const trend = makeTrend({ delta: TEST_SCORE_WARNING, currentValue: TEST_SCORE_STRONG });
|
|
438
|
+
const result = computeMetricDynamics(trend, undefined, COUNT_TWO);
|
|
439
|
+
assert.ok(result !== undefined);
|
|
440
|
+
assert.strictEqual(requireDefined(result).velocity, DELTA); // 0.1 / 2hrs
|
|
441
|
+
assert.strictEqual(requireDefined(result).acceleration, 0);
|
|
442
|
+
assert.strictEqual(requireDefined(result).inflectionDetected, false);
|
|
443
|
+
});
|
|
444
|
+
it('computes acceleration from two trends', () => {
|
|
445
|
+
const currentTrend = makeTrend({ delta: TEST_SCORE_VERY_LOW, currentValue: TEST_SCORE_STRONG });
|
|
446
|
+
const previousTrend = makeTrend({ delta: TEST_SCORE_WARNING, previousValue: TEST_SCORE_ADEQUATE, currentValue: 0.75 });
|
|
447
|
+
const result = computeMetricDynamics(currentTrend, previousTrend, 1);
|
|
448
|
+
assert.ok(result !== undefined);
|
|
449
|
+
assert.strictEqual(requireDefined(result).velocity, TEST_SCORE_VERY_LOW);
|
|
450
|
+
assert.strictEqual(requireDefined(result).acceleration, TEST_SCORE_WARNING); // (0.2 - 0.1) / 1hr
|
|
451
|
+
});
|
|
452
|
+
it('detects inflection point when velocity changes sign', () => {
|
|
453
|
+
const currentTrend = makeTrend({ delta: -DELTA, currentValue: TEST_SCORE_STRONG });
|
|
454
|
+
const previousTrend = makeTrend({ delta: TEST_SCORE_WARNING, currentValue: 0.90 });
|
|
455
|
+
const result = computeMetricDynamics(currentTrend, previousTrend, 1);
|
|
456
|
+
assert.ok(result !== undefined);
|
|
457
|
+
assert.strictEqual(requireDefined(result).inflectionDetected, true);
|
|
458
|
+
});
|
|
459
|
+
it('projects breach time for degrading metric', () => {
|
|
460
|
+
const trend = makeTrend({ delta: -TEST_SCORE_WARNING, currentValue: 0.75 });
|
|
461
|
+
const thresholds = [
|
|
462
|
+
{ value: TEST_SCORE_PASSING, direction: 'below', severity: 'warning' },
|
|
463
|
+
{ value: TEST_SCORE_MID, direction: 'below', severity: 'critical' },
|
|
464
|
+
];
|
|
465
|
+
const result = computeMetricDynamics(trend, undefined, 1, thresholds);
|
|
466
|
+
assert.ok(result !== undefined);
|
|
467
|
+
assert.strictEqual(requireDefined(result).projectedStatus, 'warning');
|
|
468
|
+
assert.ok(requireDefined(result).projectedBreachTime !== undefined);
|
|
469
|
+
});
|
|
470
|
+
it('returns undefined for zero period hours', () => {
|
|
471
|
+
const trend = makeTrend();
|
|
472
|
+
assert.strictEqual(computeMetricDynamics(trend, undefined, 0), undefined);
|
|
473
|
+
});
|
|
474
|
+
it('returns undefined for sub-minimum period hours', () => {
|
|
475
|
+
const trend = makeTrend();
|
|
476
|
+
assert.strictEqual(computeMetricDynamics(trend, undefined, TEST_MIN_PERIOD_HOURS_BELOW_THRESHOLD), undefined);
|
|
477
|
+
});
|
|
478
|
+
it('includes confidence field based on data points', () => {
|
|
479
|
+
const trend = makeTrend({ delta: TEST_SCORE_WARNING, currentValue: TEST_SCORE_STRONG });
|
|
480
|
+
const single = computeMetricDynamics(trend, undefined, 1);
|
|
481
|
+
assert.ok(single !== undefined);
|
|
482
|
+
assert.strictEqual(requireDefined(single).confidence, TEST_SINGLE_TREND_CONFIDENCE); // 0.2 + 0.075 * 1
|
|
483
|
+
const previous = makeTrend({ delta: DELTA, currentValue: 0.80 });
|
|
484
|
+
const dual = computeMetricDynamics(trend, previous, 1);
|
|
485
|
+
assert.ok(dual !== undefined);
|
|
486
|
+
assert.strictEqual(requireDefined(dual).confidence, TEST_DUAL_TREND_CONFIDENCE); // 0.2 + 0.075 * 2
|
|
487
|
+
});
|
|
488
|
+
it('applies exponential smoothing when trendHistory provided', () => {
|
|
489
|
+
// 5 trends with noisy deltas
|
|
490
|
+
const history = [
|
|
491
|
+
makeTrend({ delta: 0.10 }),
|
|
492
|
+
makeTrend({ delta: -DELTA }),
|
|
493
|
+
makeTrend({ delta: 0.08 }),
|
|
494
|
+
makeTrend({ delta: -0.02 }),
|
|
495
|
+
makeTrend({ delta: 0.06 }),
|
|
496
|
+
];
|
|
497
|
+
const current = history[history.length - 1];
|
|
498
|
+
const result = computeMetricDynamics(current, undefined, 1, undefined, history);
|
|
499
|
+
assert.ok(result !== undefined);
|
|
500
|
+
// EMA smoothing should produce less volatile velocity than raw 0.06
|
|
501
|
+
assert.ok(typeof requireDefined(result).velocity === 'number');
|
|
502
|
+
assert.ok(requireDefined(result).confidence > TEST_SCORE_MID); // 5 data points → 0.2 + 0.075*5 = 0.575
|
|
503
|
+
});
|
|
504
|
+
it('caps confidence at 0.95 for large history', () => {
|
|
505
|
+
const history = Array.from({ length: COUNT_FIFTEEN }, (_, i) => makeTrend({ delta: TEST_DECIMAL_EPSILON * (i + 1) }));
|
|
506
|
+
const result = computeMetricDynamics(history[14], undefined, 1, undefined, history);
|
|
507
|
+
assert.ok(result !== undefined);
|
|
508
|
+
assert.strictEqual(requireDefined(result).confidence, TEST_SCORE_EXCELLENT);
|
|
509
|
+
});
|
|
510
|
+
it('detects inflection from smoothed velocity series', () => {
|
|
511
|
+
// Positive then negative deltas
|
|
512
|
+
const history = [
|
|
513
|
+
makeTrend({ delta: TEST_SCORE_WARNING }),
|
|
514
|
+
makeTrend({ delta: 0.08 }),
|
|
515
|
+
makeTrend({ delta: 0.02 }),
|
|
516
|
+
makeTrend({ delta: -DELTA }),
|
|
517
|
+
makeTrend({ delta: -TEST_SCORE_WARNING }),
|
|
518
|
+
];
|
|
519
|
+
const result = computeMetricDynamics(history[COUNT_FOUR], undefined, 1, undefined, history);
|
|
520
|
+
assert.ok(result !== undefined);
|
|
521
|
+
// Smoothed velocity should eventually become negative → inflection possible
|
|
522
|
+
assert.ok(requireDefined(result).velocity < DELTA); // Smoothed toward negative
|
|
523
|
+
});
|
|
524
|
+
});
|
|
525
|
+
// ============================================================================
|
|
526
|
+
// Coverage-Weighted Confidence Tests
|
|
527
|
+
// ============================================================================
|
|
528
|
+
describe('computeNormalizedEntropy', () => {
|
|
529
|
+
it('returns 1 for perfectly uniform distribution', () => {
|
|
530
|
+
const result = computeNormalizedEntropy([COUNT_TEN, COUNT_TEN, COUNT_TEN, COUNT_TEN]);
|
|
531
|
+
assert.ok(Math.abs(result - 1) < TEST_DECIMAL_EPSILON, `Expected ~1, got ${result}`);
|
|
532
|
+
});
|
|
533
|
+
it('returns low value for concentrated distribution', () => {
|
|
534
|
+
const result = computeNormalizedEntropy([COUNT_HUNDRED, 0, 0, 0]);
|
|
535
|
+
assert.strictEqual(result, 0);
|
|
536
|
+
});
|
|
537
|
+
it('returns 1 for single element', () => {
|
|
538
|
+
assert.strictEqual(computeNormalizedEntropy([COUNT_FIFTY]), 1);
|
|
539
|
+
});
|
|
540
|
+
it('returns 1 for empty array (vacuously uniform)', () => {
|
|
541
|
+
assert.strictEqual(computeNormalizedEntropy([]), 1);
|
|
542
|
+
});
|
|
543
|
+
it('returns 0 for all zeros', () => {
|
|
544
|
+
assert.strictEqual(computeNormalizedEntropy([0, 0, 0]), 0);
|
|
545
|
+
});
|
|
546
|
+
it('sharply detects zero categories', () => {
|
|
547
|
+
// Single active category: entropy = 0
|
|
548
|
+
const entropyOne = computeNormalizedEntropy([COUNT_HUNDRED, 0, 0, 0, 0, 0, 0]);
|
|
549
|
+
assert.strictEqual(entropyOne, 0, 'Entropy should be 0 for single active category');
|
|
550
|
+
// Two active categories: both low but below 0.4
|
|
551
|
+
const entropyTwo = computeNormalizedEntropy([COUNT_FIFTY, COUNT_FIFTY, 0, 0, 0, 0, 0]);
|
|
552
|
+
assert.ok(entropyTwo < TEST_SCORE_POOR, `Expected entropy < 0.4, got ${entropyTwo}`);
|
|
553
|
+
});
|
|
554
|
+
it('discriminates bimodal from dominant distributions', () => {
|
|
555
|
+
const bimodal = computeNormalizedEntropy([COUNT_FIFTY, COUNT_FIFTY, 0, 0, 0, 0, 0]);
|
|
556
|
+
const dominant = computeNormalizedEntropy([TEST_ENTROPY_DOMINANT_PRIMARY, COUNT_FIVE, COUNT_FIVE, COUNT_FIVE, COUNT_FIVE, COUNT_FIVE, COUNT_FIVE]);
|
|
557
|
+
// Dominant has more categories active -> higher entropy
|
|
558
|
+
assert.ok(dominant > bimodal, `Expected dominant (${dominant}) > bimodal (${bimodal})`);
|
|
559
|
+
});
|
|
560
|
+
it('near-uniform distribution returns close to 1', () => {
|
|
561
|
+
const result = computeNormalizedEntropy([COUNT_FOURTEEN, COUNT_FOURTEEN, COUNT_FOURTEEN, COUNT_FOURTEEN, COUNT_FOURTEEN, COUNT_FIFTEEN, COUNT_FIFTEEN]);
|
|
562
|
+
assert.ok(result > TEST_ENTROPY_NEAR_UNIFORM_MIN, `Expected > 0.99, got ${result}`);
|
|
563
|
+
});
|
|
564
|
+
});
|
|
565
|
+
describe('computeCoverageWeightedConfidence', () => {
|
|
566
|
+
const highBase = {
|
|
567
|
+
level: 'high',
|
|
568
|
+
sampleCount: COUNT_HUNDRED,
|
|
569
|
+
scoreStdDev: DELTA,
|
|
570
|
+
evaluatorCount: 1,
|
|
571
|
+
evaluatorAgreement: null,
|
|
572
|
+
};
|
|
573
|
+
it('maintains high confidence for uniform coverage', () => {
|
|
574
|
+
const result = computeCoverageWeightedConfidence(highBase, [COUNT_TEN, COUNT_TEN, COUNT_TEN, COUNT_TEN, COUNT_TEN, COUNT_TEN, COUNT_TEN, COUNT_TEN, COUNT_TEN, COUNT_TEN]);
|
|
575
|
+
assert.strictEqual(result.adjustedLevel, 'high');
|
|
576
|
+
assert.ok(result.effectiveSampleSize >= COUNT_THIRTY);
|
|
577
|
+
assert.ok(result.coverageUniformity > TEST_SCORE_HIGH);
|
|
578
|
+
});
|
|
579
|
+
it('downgrades confidence for concentrated coverage', () => {
|
|
580
|
+
const result = computeCoverageWeightedConfidence(highBase, [TEST_CONCENTRATED_COVERAGE_PRIMARY, 1, 1, 1, 1, 1]);
|
|
581
|
+
assert.ok(result.adjustedLevel !== 'high' || result.effectiveSampleSize >= COUNT_THIRTY);
|
|
582
|
+
assert.ok(result.coverageUniformity < TEST_SCORE_GOOD, `Expected < 0.8, got ${result.coverageUniformity}`);
|
|
583
|
+
});
|
|
584
|
+
it('enforces minimum 30 effective samples for high', () => {
|
|
585
|
+
const lowSample = {
|
|
586
|
+
level: 'medium',
|
|
587
|
+
sampleCount: COUNT_TWENTY,
|
|
588
|
+
scoreStdDev: DELTA,
|
|
589
|
+
evaluatorCount: 1,
|
|
590
|
+
evaluatorAgreement: null,
|
|
591
|
+
};
|
|
592
|
+
const result = computeCoverageWeightedConfidence(lowSample, [COUNT_FIVE, COUNT_FIVE, COUNT_FIVE, COUNT_FIVE]);
|
|
593
|
+
assert.ok(result.effectiveSampleSize < COUNT_THIRTY);
|
|
594
|
+
assert.notStrictEqual(result.adjustedLevel, 'high');
|
|
595
|
+
});
|
|
596
|
+
it('preserves base confidence fields', () => {
|
|
597
|
+
const result = computeCoverageWeightedConfidence(highBase, [COUNT_FIFTY, COUNT_FIFTY]);
|
|
598
|
+
assert.strictEqual(result.sampleCount, COUNT_HUNDRED);
|
|
599
|
+
assert.strictEqual(result.scoreStdDev, DELTA);
|
|
600
|
+
assert.strictEqual(result.featureVersion, '2.0');
|
|
601
|
+
});
|
|
602
|
+
});
|
|
603
|
+
// ============================================================================
|
|
604
|
+
// Degradation Signal Tests
|
|
605
|
+
// ============================================================================
|
|
606
|
+
describe('computeEWMA', () => {
|
|
607
|
+
it('returns first value for single-element array', () => {
|
|
608
|
+
assert.strictEqual(computeEWMA([TEST_SCORE_MID]), TEST_SCORE_MID);
|
|
609
|
+
});
|
|
610
|
+
it('returns undefined for empty array', () => {
|
|
611
|
+
assert.strictEqual(computeEWMA([]), undefined);
|
|
612
|
+
});
|
|
613
|
+
it('converges toward recent values with default lambda', () => {
|
|
614
|
+
// Series with a shift: 0.8 x 10, then 0.4 x 10
|
|
615
|
+
const stable = Array(COUNT_TEN).fill(TEST_SCORE_GOOD);
|
|
616
|
+
const shifted = [...stable, ...Array(COUNT_TEN).fill(TEST_SCORE_POOR)];
|
|
617
|
+
const ewma = requireDefined(computeEWMA(shifted), 'Expected EWMA result');
|
|
618
|
+
// EWMA should be between 0.4 and 0.8, pulled toward 0.4
|
|
619
|
+
assert.ok(ewma > TEST_SCORE_POOR && ewma < TEST_SCORE_GOOD, `Expected between 0.4 and 0.8, got ${ewma}`);
|
|
620
|
+
});
|
|
621
|
+
it('lambda=1 returns the last value', () => {
|
|
622
|
+
assert.strictEqual(computeEWMA([TEST_SCORE_WARNING, TEST_SCORE_MID, TEST_SCORE_HIGH], 1.0), TEST_SCORE_HIGH);
|
|
623
|
+
});
|
|
624
|
+
});
|
|
625
|
+
describe('computeMAD', () => {
|
|
626
|
+
it('returns 0 for single element', () => {
|
|
627
|
+
assert.strictEqual(computeMAD([COUNT_FIVE]), 0);
|
|
628
|
+
});
|
|
629
|
+
it('returns 0 for constant values', () => {
|
|
630
|
+
assert.strictEqual(computeMAD([COUNT_THREE, COUNT_THREE, COUNT_THREE, COUNT_THREE]), 0);
|
|
631
|
+
});
|
|
632
|
+
it('computes scaled MAD for simple series', () => {
|
|
633
|
+
// [1,2,3,4,5]: median=3, deviations=[2,1,0,1,2], median_dev=1, scaled=1.4826
|
|
634
|
+
const result = computeMAD([1, COUNT_TWO, COUNT_THREE, COUNT_FOUR, COUNT_FIVE]);
|
|
635
|
+
assert.ok(Math.abs(result - MAD_CONSISTENCY_FACTOR) < TEST_DECIMAL_EPSILON, `Expected ~1.4826, got ${result}`);
|
|
636
|
+
});
|
|
637
|
+
it('is robust to outliers', () => {
|
|
638
|
+
const normal = computeMAD([1, COUNT_TWO, COUNT_THREE, COUNT_FOUR, COUNT_FIVE]);
|
|
639
|
+
const withOutlier = computeMAD([1, COUNT_TWO, COUNT_THREE, COUNT_FOUR, COUNT_HUNDRED]);
|
|
640
|
+
// MAD should be similar despite outlier; stdDev would explode
|
|
641
|
+
assert.ok(Math.abs(normal - withOutlier) < COUNT_TWO, `Expected similar MADs: ${normal} vs ${withOutlier}`);
|
|
642
|
+
});
|
|
643
|
+
});
|
|
644
|
+
describe('detectEWMADrift', () => {
|
|
645
|
+
it('returns false for insufficient data', () => {
|
|
646
|
+
assert.strictEqual(detectEWMADrift([1, COUNT_TWO, COUNT_THREE]), false);
|
|
647
|
+
});
|
|
648
|
+
it('returns false for stable series', () => {
|
|
649
|
+
assert.strictEqual(detectEWMADrift([TEST_SCORE_GOOD, TEST_STABLE_SERIES_HIGH, TEST_STABLE_SERIES_LOW, TEST_SCORE_GOOD, TEST_STABLE_SERIES_HIGHER, TEST_STABLE_SERIES_LOW, TEST_STABLE_SERIES_HIGH]), false);
|
|
650
|
+
});
|
|
651
|
+
it('detects drift in shifted series', () => {
|
|
652
|
+
// 10 periods at 0.8, then shift to 0.3
|
|
653
|
+
const values = [...Array(COUNT_TEN).fill(TEST_SCORE_GOOD), TEST_SCORE_LOW, TEST_SCORE_LOW, TEST_SCORE_LOW, TEST_SCORE_LOW, TEST_SCORE_LOW];
|
|
654
|
+
assert.strictEqual(detectEWMADrift(values), true);
|
|
655
|
+
});
|
|
656
|
+
it('returns false for constant zero series (ER22 - MAD=0 fallback)', () => {
|
|
657
|
+
const values = Array(COUNT_TWENTY).fill(0);
|
|
658
|
+
assert.strictEqual(detectEWMADrift(values), false);
|
|
659
|
+
});
|
|
660
|
+
it('returns false for constant non-zero series (ER22)', () => {
|
|
661
|
+
const values = Array(COUNT_TWENTY).fill(TEST_SCORE_MID);
|
|
662
|
+
assert.strictEqual(detectEWMADrift(values), false);
|
|
663
|
+
});
|
|
664
|
+
});
|
|
665
|
+
describe('computeDegradationSignal', () => {
|
|
666
|
+
it('returns healthy for normal conditions', () => {
|
|
667
|
+
const result = computeDegradationSignal(DELTA, DELTA, 0, COUNT_HUNDRED);
|
|
668
|
+
assert.strictEqual(result.predictedStatus, 'healthy');
|
|
669
|
+
assert.strictEqual(result.varianceTrend, 'stable');
|
|
670
|
+
assert.strictEqual(result.ewmaDriftDetected, false);
|
|
671
|
+
assert.strictEqual(result.consecutiveBreaches, 0);
|
|
672
|
+
assert.strictEqual(result.confirmed, false);
|
|
673
|
+
});
|
|
674
|
+
it('includes featureVersion (FE7)', () => {
|
|
675
|
+
const result = computeDegradationSignal(DELTA, DELTA, 0, COUNT_HUNDRED);
|
|
676
|
+
assert.ok(typeof result.featureVersion === 'string');
|
|
677
|
+
assert.ok(result.featureVersion.length > 0);
|
|
678
|
+
});
|
|
679
|
+
it('detects increasing variance at 2x threshold (R5: was 1.5x)', () => {
|
|
680
|
+
// 1.8x ratio: should be stable (below 2.0 threshold)
|
|
681
|
+
const stable = computeDegradationSignal(TEST_VARIANCE_STABLE_INPUT, DELTA, 0, COUNT_HUNDRED);
|
|
682
|
+
assert.strictEqual(stable.varianceTrend, 'stable');
|
|
683
|
+
// 2.5x ratio: should be increasing
|
|
684
|
+
const increasing = computeDegradationSignal(TEST_VARIANCE_INCREASING_INPUT, DELTA, 0, COUNT_HUNDRED);
|
|
685
|
+
assert.strictEqual(increasing.varianceTrend, 'increasing');
|
|
686
|
+
assert.ok(increasing.varianceRatio >= VARIANCE_INCREASE_THRESHOLD);
|
|
687
|
+
});
|
|
688
|
+
it('detects decreasing variance', () => {
|
|
689
|
+
const result = computeDegradationSignal(TEST_VARIANCE_DECREASING_INPUT, DELTA, 0, COUNT_HUNDRED);
|
|
690
|
+
assert.strictEqual(result.varianceTrend, 'decreasing');
|
|
691
|
+
});
|
|
692
|
+
it('computes coverage dropout rate', () => {
|
|
693
|
+
const result = computeDegradationSignal(DELTA, DELTA, COUNT_TWENTY_FIVE, COUNT_HUNDRED);
|
|
694
|
+
assert.strictEqual(result.coverageDropoutRate, TEST_CDF_INTERPOLATION_MIN);
|
|
695
|
+
});
|
|
696
|
+
it('computes latency skew ratio', () => {
|
|
697
|
+
const result = computeDegradationSignal(DELTA, DELTA, 0, COUNT_HUNDRED, TEST_LATENCY_P95, TEST_LATENCY_P50);
|
|
698
|
+
assert.strictEqual(result.latencySkewRatio, COUNT_FIVE);
|
|
699
|
+
});
|
|
700
|
+
it('predicts warning with 1 active signal', () => {
|
|
701
|
+
// Use high enough variance to cross 2.0 threshold
|
|
702
|
+
const result = computeDegradationSignal(TEST_MINIMIZE_GOOD_INPUT, DELTA, 0, COUNT_HUNDRED);
|
|
703
|
+
assert.strictEqual(result.predictedStatus, 'warning');
|
|
704
|
+
});
|
|
705
|
+
it('predicts critical with 2+ active signals', () => {
|
|
706
|
+
const result = computeDegradationSignal(TEST_MINIMIZE_GOOD_INPUT, DELTA, COUNT_TWENTY_FIVE, COUNT_HUNDRED);
|
|
707
|
+
assert.strictEqual(result.predictedStatus, 'critical');
|
|
708
|
+
});
|
|
709
|
+
it('handles null stdDev gracefully', () => {
|
|
710
|
+
const result = computeDegradationSignal(null, null, 0, COUNT_HUNDRED);
|
|
711
|
+
assert.strictEqual(result.varianceRatio, 1);
|
|
712
|
+
assert.strictEqual(result.varianceTrend, 'stable');
|
|
713
|
+
});
|
|
714
|
+
it('handles zero total coverage cells', () => {
|
|
715
|
+
const result = computeDegradationSignal(DELTA, DELTA, 0, 0);
|
|
716
|
+
assert.strictEqual(result.coverageDropoutRate, 0);
|
|
717
|
+
});
|
|
718
|
+
it('detects EWMA drift from historical values', () => {
|
|
719
|
+
const driftingValues = [...Array(COUNT_TEN).fill(TEST_SCORE_GOOD), TEST_SCORE_LOW, TEST_SCORE_LOW, TEST_SCORE_LOW, TEST_SCORE_LOW, TEST_SCORE_LOW];
|
|
720
|
+
const result = computeDegradationSignal(DELTA, DELTA, 0, COUNT_HUNDRED, null, null, {
|
|
721
|
+
historicalValues: driftingValues,
|
|
722
|
+
});
|
|
723
|
+
assert.strictEqual(result.ewmaDriftDetected, true);
|
|
724
|
+
assert.strictEqual(result.predictedStatus, 'warning');
|
|
725
|
+
});
|
|
726
|
+
it('tracks consecutive breaches for confirmation window', () => {
|
|
727
|
+
const warning = computeDegradationSignal(TEST_MINIMIZE_GOOD_INPUT, DELTA, 0, COUNT_HUNDRED, null, null, {
|
|
728
|
+
priorConsecutiveBreaches: 0,
|
|
729
|
+
});
|
|
730
|
+
assert.strictEqual(warning.consecutiveBreaches, 1);
|
|
731
|
+
assert.strictEqual(warning.confirmed, false);
|
|
732
|
+
const confirmed = computeDegradationSignal(TEST_MINIMIZE_GOOD_INPUT, DELTA, 0, COUNT_HUNDRED, null, null, {
|
|
733
|
+
priorConsecutiveBreaches: 1,
|
|
734
|
+
});
|
|
735
|
+
assert.strictEqual(confirmed.consecutiveBreaches, COUNT_TWO);
|
|
736
|
+
assert.strictEqual(confirmed.confirmed, true);
|
|
737
|
+
});
|
|
738
|
+
it('resets consecutive breaches on healthy period', () => {
|
|
739
|
+
const result = computeDegradationSignal(DELTA, DELTA, 0, COUNT_HUNDRED, null, null, {
|
|
740
|
+
priorConsecutiveBreaches: COUNT_FIVE,
|
|
741
|
+
});
|
|
742
|
+
assert.strictEqual(result.consecutiveBreaches, 0);
|
|
743
|
+
assert.strictEqual(result.confirmed, false);
|
|
744
|
+
});
|
|
745
|
+
});
|
|
746
|
+
// ============================================================================
|
|
747
|
+
// Correlation Matrix Tests
|
|
748
|
+
// ============================================================================
|
|
749
|
+
describe('computePearsonR', () => {
|
|
750
|
+
it('returns 1 for perfectly correlated series', () => {
|
|
751
|
+
const xs = [1, COUNT_TWO, COUNT_THREE, COUNT_FOUR, COUNT_FIVE];
|
|
752
|
+
const ys = [COUNT_TWO, COUNT_FOUR, COUNT_SIX, COUNT_EIGHT, COUNT_TEN];
|
|
753
|
+
assert.ok(Math.abs(computePearsonR(xs, ys) - 1) < TEST_DECIMAL_EPSILON);
|
|
754
|
+
});
|
|
755
|
+
it('returns -1 for perfectly inverse correlated series', () => {
|
|
756
|
+
const xs = [1, COUNT_TWO, COUNT_THREE, COUNT_FOUR, COUNT_FIVE];
|
|
757
|
+
const ys = [COUNT_TEN, COUNT_EIGHT, COUNT_SIX, COUNT_FOUR, COUNT_TWO];
|
|
758
|
+
assert.ok(Math.abs(computePearsonR(xs, ys) + 1) < TEST_DECIMAL_EPSILON);
|
|
759
|
+
});
|
|
760
|
+
it('returns ~0 for uncorrelated series', () => {
|
|
761
|
+
// Truly uncorrelated: no linear relationship
|
|
762
|
+
const xs = [1, COUNT_TWO, COUNT_THREE, COUNT_FOUR, COUNT_FIVE, COUNT_SIX, COUNT_SEVEN, COUNT_EIGHT, COUNT_NINE, COUNT_TEN];
|
|
763
|
+
const ys = [COUNT_FIVE, COUNT_THREE, COUNT_SEVEN, 1, COUNT_NINE, COUNT_TWO, COUNT_EIGHT, COUNT_FOUR, COUNT_SIX, COUNT_FIVE]; // random-looking
|
|
764
|
+
const r = computePearsonR(xs, ys);
|
|
765
|
+
assert.ok(Math.abs(r) < TEST_SCORE_MID, `Expected |r| < 0.5, got ${r}`);
|
|
766
|
+
});
|
|
767
|
+
it('returns 0 for insufficient data (< 3)', () => {
|
|
768
|
+
assert.strictEqual(computePearsonR([1, COUNT_TWO], [COUNT_THREE, COUNT_FOUR]), 0);
|
|
769
|
+
});
|
|
770
|
+
it('returns 0 for zero variance', () => {
|
|
771
|
+
assert.strictEqual(computePearsonR([COUNT_FIVE, COUNT_FIVE, COUNT_FIVE], [1, COUNT_TWO, COUNT_THREE]), 0);
|
|
772
|
+
});
|
|
773
|
+
});
|
|
774
|
+
describe('pearsonPValue', () => {
|
|
775
|
+
it('returns null for insufficient data', () => {
|
|
776
|
+
assert.strictEqual(pearsonPValue(TEST_SCORE_MID, COUNT_TWO), null);
|
|
777
|
+
});
|
|
778
|
+
it('returns ~0 for perfect correlation', () => {
|
|
779
|
+
const p = pearsonPValue(TEST_PEARSON_NEAR_PERFECT, COUNT_HUNDRED);
|
|
780
|
+
assert.ok(p !== null && p < TEST_PVALUE_STRICT, `Expected p < 0.001, got ${p}`);
|
|
781
|
+
});
|
|
782
|
+
it('returns 1 for zero correlation', () => {
|
|
783
|
+
const p = pearsonPValue(0, COUNT_FIFTY);
|
|
784
|
+
assert.strictEqual(p, 1);
|
|
785
|
+
});
|
|
786
|
+
it('returns moderate p for weak correlation', () => {
|
|
787
|
+
const p = pearsonPValue(TEST_SCORE_LOW, COUNT_TEN);
|
|
788
|
+
assert.ok(p !== null && p > DELTA, `Expected p > 0.05, got ${p}`);
|
|
789
|
+
});
|
|
790
|
+
it('strong correlation with few points has moderate p', () => {
|
|
791
|
+
const p = pearsonPValue(TEST_SCORE_GOOD, COUNT_FIVE);
|
|
792
|
+
assert.ok(p !== null && p > TEST_DECIMAL_EPSILON && p < TEST_SCORE_LOW, `Expected 0.01 < p < 0.3, got ${p}`);
|
|
793
|
+
});
|
|
794
|
+
it('matches scipy two-tailed reference values', () => {
|
|
795
|
+
// scipy.stats.pearsonr reference values
|
|
796
|
+
const cases = [
|
|
797
|
+
[TEST_PEARSON_CASE_ONE_R, COUNT_TEN, TEST_PEARSON_CASE_ONE_P], // moderate r, decent n
|
|
798
|
+
[TEST_SCORE_MID, COUNT_TWENTY, TEST_PEARSON_CASE_TWO_P], // moderate r, larger n
|
|
799
|
+
[TEST_SCORE_LOW, COUNT_THIRTY, TEST_PEARSON_CASE_THREE_P], // weak r, large n
|
|
800
|
+
[-TEST_SCORE_GOOD, COUNT_TEN, TEST_PEARSON_CASE_FOUR_P], // strong negative r
|
|
801
|
+
];
|
|
802
|
+
for (const [r, n, expected] of cases) {
|
|
803
|
+
const p = requireDefined(pearsonPValue(r, n), 'Expected p-value');
|
|
804
|
+
assert.ok(Math.abs(p - expected) < TEST_PEARSON_COMPARISON_TOLERANCE, `pearsonPValue(${r}, ${n}) = ${p}, expected ~${expected}`);
|
|
805
|
+
}
|
|
806
|
+
});
|
|
807
|
+
});
|
|
808
|
+
describe('benjaminiHochberg', () => {
|
|
809
|
+
it('returns empty for empty input', () => {
|
|
810
|
+
assert.deepStrictEqual(benjaminiHochberg([]), []);
|
|
811
|
+
});
|
|
812
|
+
it('rejects non-significant p-values', () => {
|
|
813
|
+
const result = benjaminiHochberg([TEST_SCORE_MID, TEST_SCORE_BASELINE, TEST_SCORE_GOOD], DELTA);
|
|
814
|
+
assert.deepStrictEqual(result, [false, false, false]);
|
|
815
|
+
});
|
|
816
|
+
it('accepts clearly significant p-values', () => {
|
|
817
|
+
const result = benjaminiHochberg([TEST_PVALUE_STRICT, TEST_PVALUE_CASE_MID, TEST_PVALUE_CASE_HIGH], DELTA);
|
|
818
|
+
assert.deepStrictEqual(result, [true, true, true]);
|
|
819
|
+
});
|
|
820
|
+
it('handles mixed significance with FDR control', () => {
|
|
821
|
+
// 5 tests: 2 real signals (p=0.001, 0.01), 3 noise (p=0.3, 0.5, 0.8)
|
|
822
|
+
const result = benjaminiHochberg([TEST_PVALUE_STRICT, TEST_SCORE_LOW, TEST_DECIMAL_EPSILON, TEST_SCORE_MID, TEST_SCORE_GOOD], DELTA);
|
|
823
|
+
// Indices 0 and 2 should be significant
|
|
824
|
+
assert.strictEqual(result[0], true);
|
|
825
|
+
assert.strictEqual(result[COUNT_TWO], true);
|
|
826
|
+
assert.strictEqual(result[1], false);
|
|
827
|
+
assert.strictEqual(result[COUNT_THREE], false);
|
|
828
|
+
assert.strictEqual(result[COUNT_FOUR], false);
|
|
829
|
+
});
|
|
830
|
+
});
|
|
831
|
+
describe('computeCorrelationMatrix', () => {
|
|
832
|
+
it('computes pairwise correlations with significance', () => {
|
|
833
|
+
const timeSeries = new Map([
|
|
834
|
+
['relevance', [TEST_SCORE_GOOD, TEST_SCORE_PASSING, TEST_SCORE_BASELINE, TEST_SCORE_MID, TEST_SCORE_POOR]],
|
|
835
|
+
['hallucination', [TEST_SCORE_WARNING, TEST_SCORE_VERY_LOW, TEST_SCORE_LOW, TEST_SCORE_POOR, TEST_SCORE_MID]],
|
|
836
|
+
['coherence', [TEST_SCORE_HIGH, TEST_SCORE_STRONG, TEST_SCORE_GOOD, TEST_CDF_INTERPOLATION_INPUT, TEST_SCORE_PASSING]],
|
|
837
|
+
]);
|
|
838
|
+
const results = computeCorrelationMatrix(timeSeries);
|
|
839
|
+
assert.strictEqual(results.length, COUNT_THREE);
|
|
840
|
+
// All results should have all fields
|
|
841
|
+
for (const r of results) {
|
|
842
|
+
assert.strictEqual(r.featureVersion, '3.1');
|
|
843
|
+
assert.strictEqual(r.causalConfidence, 'correlation');
|
|
844
|
+
assert.ok('significant' in r);
|
|
845
|
+
assert.ok('pValue' in r);
|
|
846
|
+
assert.ok('spearmanR' in r);
|
|
847
|
+
assert.ok('effectSize' in r);
|
|
848
|
+
assert.ok(r.spearmanR >= -1 && r.spearmanR <= 1, `spearmanR out of range: ${r.spearmanR}`);
|
|
849
|
+
assert.ok(r.effectSize >= 0, `effectSize should be non-negative: ${r.effectSize}`);
|
|
850
|
+
}
|
|
851
|
+
});
|
|
852
|
+
it('identifies known toxic combinations', () => {
|
|
853
|
+
const timeSeries = new Map([
|
|
854
|
+
['hallucination', [TEST_SCORE_WARNING, TEST_SCORE_VERY_LOW, TEST_SCORE_LOW]],
|
|
855
|
+
['relevance', [TEST_SCORE_HIGH, TEST_SCORE_GOOD, TEST_SCORE_PASSING]],
|
|
856
|
+
]);
|
|
857
|
+
const knownCombos = new Set(['hallucination:relevance']);
|
|
858
|
+
const results = computeCorrelationMatrix(timeSeries, knownCombos);
|
|
859
|
+
assert.strictEqual(results.length, 1);
|
|
860
|
+
assert.strictEqual(results[0].isKnownToxicCombo, true);
|
|
861
|
+
});
|
|
862
|
+
it('computes co-occurrence rate from degraded periods', () => {
|
|
863
|
+
const timeSeries = new Map([
|
|
864
|
+
['metricA', [TEST_SCORE_MID, TEST_SCORE_BASELINE, TEST_SCORE_PASSING]],
|
|
865
|
+
['metricB', [TEST_SCORE_POOR, TEST_SCORE_MID, TEST_SCORE_BASELINE]],
|
|
866
|
+
]);
|
|
867
|
+
const degraded = new Map([
|
|
868
|
+
['metricA', [true, true, false]],
|
|
869
|
+
['metricB', [true, false, false]],
|
|
870
|
+
]);
|
|
871
|
+
const results = computeCorrelationMatrix(timeSeries, undefined, degraded);
|
|
872
|
+
assert.strictEqual(results.length, 1);
|
|
873
|
+
assert.ok(Math.abs(results[0].coOccurrenceRate - 1 / COUNT_THREE) < TEST_DECIMAL_EPSILON);
|
|
874
|
+
});
|
|
875
|
+
it('returns empty array for fewer than 2 metrics', () => {
|
|
876
|
+
const timeSeries = new Map([['only_one', [1, COUNT_TWO, COUNT_THREE]]]);
|
|
877
|
+
assert.strictEqual(computeCorrelationMatrix(timeSeries).length, 0);
|
|
878
|
+
});
|
|
879
|
+
it('handles empty time series arrays (ER22)', () => {
|
|
880
|
+
const timeSeries = new Map([
|
|
881
|
+
['metricA', []],
|
|
882
|
+
['metricB', []],
|
|
883
|
+
]);
|
|
884
|
+
const results = computeCorrelationMatrix(timeSeries);
|
|
885
|
+
// Empty arrays should produce pearsonR=0
|
|
886
|
+
for (const r of results) {
|
|
887
|
+
assert.strictEqual(r.pearsonR, 0);
|
|
888
|
+
}
|
|
889
|
+
});
|
|
890
|
+
it('detects lagged correlations with maxLagSteps', () => {
|
|
891
|
+
// A leads B by 2 steps: A[i] correlates with B[i+2]
|
|
892
|
+
const a = [1, COUNT_TWO, COUNT_THREE, COUNT_FOUR, COUNT_FIVE, COUNT_SIX, COUNT_SEVEN, COUNT_EIGHT, COUNT_NINE, COUNT_TEN];
|
|
893
|
+
const b = [0, 0, 1, COUNT_TWO, COUNT_THREE, COUNT_FOUR, COUNT_FIVE, COUNT_SIX, COUNT_SEVEN, COUNT_EIGHT]; // b = a shifted right by 2
|
|
894
|
+
const timeSeries = new Map([['a', a], ['b', b]]);
|
|
895
|
+
// Without lag: weaker correlation
|
|
896
|
+
const noLag = computeCorrelationMatrix(timeSeries);
|
|
897
|
+
// With lag: should find stronger correlation at lag
|
|
898
|
+
const withLag = computeCorrelationMatrix(timeSeries, undefined, undefined, {
|
|
899
|
+
maxLagSteps: COUNT_THREE, lagStepHours: 1,
|
|
900
|
+
});
|
|
901
|
+
assert.strictEqual(withLag.length, 1);
|
|
902
|
+
assert.ok(Math.abs(withLag[0].pearsonR) >= Math.abs(noLag[0].pearsonR), `Expected lagged R (${withLag[0].pearsonR}) >= no-lag R (${noLag[0].pearsonR})`);
|
|
903
|
+
});
|
|
904
|
+
it('applies BH-FDR with custom q threshold', () => {
|
|
905
|
+
// Strongly correlated pairs
|
|
906
|
+
const timeSeries = new Map([
|
|
907
|
+
['a', [1, COUNT_TWO, COUNT_THREE, COUNT_FOUR, COUNT_FIVE, COUNT_SIX, COUNT_SEVEN, COUNT_EIGHT, COUNT_NINE, COUNT_TEN]],
|
|
908
|
+
['b', [COUNT_TWO, COUNT_FOUR, COUNT_SIX, COUNT_EIGHT, COUNT_TEN, TEST_SEQUENCE_TWELVE, COUNT_FOURTEEN, TEST_SEQUENCE_SIXTEEN, TEST_SEQUENCE_EIGHTEEN, COUNT_TWENTY]],
|
|
909
|
+
['c', [COUNT_FIVE, COUNT_THREE, COUNT_SEVEN, 1, COUNT_NINE, COUNT_TWO, COUNT_EIGHT, COUNT_FOUR, COUNT_SIX, COUNT_FIVE]], // uncorrelated noise
|
|
910
|
+
]);
|
|
911
|
+
const results = computeCorrelationMatrix(timeSeries, undefined, undefined, { fdrQ: DELTA });
|
|
912
|
+
// a:b should be significant (r ~ 1.0)
|
|
913
|
+
const ab = results.find(r => r.metricA === 'a' && r.metricB === 'b');
|
|
914
|
+
assert.ok(ab, 'a:b pair not found');
|
|
915
|
+
assert.strictEqual(requireDefined(ab).significant, true);
|
|
916
|
+
});
|
|
917
|
+
});
|
|
918
|
+
// ============================================================================
|
|
919
|
+
// Spearman Rank Correlation Tests
|
|
920
|
+
// ============================================================================
|
|
921
|
+
describe('computeSpearmanR', () => {
|
|
922
|
+
it('returns perfect correlation for monotonically increasing data', () => {
|
|
923
|
+
const r = computeSpearmanR([1, COUNT_TWO, COUNT_THREE, COUNT_FOUR, COUNT_FIVE], [COUNT_TEN, COUNT_TWENTY, COUNT_THIRTY, COUNT_FORTY, COUNT_FIFTY]);
|
|
924
|
+
assert.strictEqual(r, 1);
|
|
925
|
+
});
|
|
926
|
+
it('returns perfect negative correlation for monotonically decreasing data', () => {
|
|
927
|
+
const r = computeSpearmanR([1, COUNT_TWO, COUNT_THREE, COUNT_FOUR, COUNT_FIVE], [COUNT_FIFTY, COUNT_FORTY, COUNT_THIRTY, COUNT_TWENTY, COUNT_TEN]);
|
|
928
|
+
assert.strictEqual(r, -1);
|
|
929
|
+
});
|
|
930
|
+
it('handles tied values via fractional ranks', () => {
|
|
931
|
+
// Ties: [1,1,3] -> ranks [1.5, 1.5, 3]
|
|
932
|
+
const r = computeSpearmanR([1, 1, COUNT_THREE], [COUNT_TEN, COUNT_TEN, COUNT_THIRTY]);
|
|
933
|
+
assert.ok(r > TEST_SCORE_HIGH, `Expected strong positive Spearman, got ${r}`);
|
|
934
|
+
});
|
|
935
|
+
it('returns 0 for fewer than 3 data points', () => {
|
|
936
|
+
assert.strictEqual(computeSpearmanR([1, COUNT_TWO], [COUNT_THREE, COUNT_FOUR]), 0);
|
|
937
|
+
});
|
|
938
|
+
it('agrees with Pearson for linear relationships', () => {
|
|
939
|
+
const xs = [1, COUNT_TWO, COUNT_THREE, COUNT_FOUR, COUNT_FIVE, COUNT_SIX, COUNT_SEVEN, COUNT_EIGHT];
|
|
940
|
+
const ys = xs.map(x => COUNT_TWO * x + 1);
|
|
941
|
+
const pearson = computePearsonR(xs, ys);
|
|
942
|
+
const spearman = computeSpearmanR(xs, ys);
|
|
943
|
+
assert.ok(Math.abs(pearson - spearman) < TEST_DECIMAL_EPSILON, `Pearson (${pearson}) and Spearman (${spearman}) should agree for linear data`);
|
|
944
|
+
});
|
|
945
|
+
it('handles non-linear monotonic relationships better than Pearson', () => {
|
|
946
|
+
// Exponential relationship: Spearman should be 1.0, Pearson < 1.0
|
|
947
|
+
const xs = [1, COUNT_TWO, COUNT_THREE, COUNT_FOUR, COUNT_FIVE, COUNT_SIX, COUNT_SEVEN, COUNT_EIGHT];
|
|
948
|
+
const ys = xs.map(x => Math.exp(x));
|
|
949
|
+
const pearson = computePearsonR(xs, ys);
|
|
950
|
+
const spearman = computeSpearmanR(xs, ys);
|
|
951
|
+
assert.ok(spearman > pearson, `Spearman (${spearman}) should be >= Pearson (${pearson}) for monotonic non-linear`);
|
|
952
|
+
});
|
|
953
|
+
});
|
|
954
|
+
describe('effectSize in correlation matrix', () => {
|
|
955
|
+
it('reports large effect size for strongly correlated metrics', () => {
|
|
956
|
+
const timeSeries = new Map([
|
|
957
|
+
['a', [1, COUNT_TWO, COUNT_THREE, COUNT_FOUR, COUNT_FIVE, COUNT_SIX, COUNT_SEVEN, COUNT_EIGHT, COUNT_NINE, COUNT_TEN]],
|
|
958
|
+
['b', [COUNT_TWO, COUNT_FOUR, COUNT_SIX, COUNT_EIGHT, COUNT_TEN, TEST_SEQUENCE_TWELVE, COUNT_FOURTEEN, TEST_SEQUENCE_SIXTEEN, TEST_SEQUENCE_EIGHTEEN, COUNT_TWENTY]],
|
|
959
|
+
]);
|
|
960
|
+
const results = computeCorrelationMatrix(timeSeries);
|
|
961
|
+
assert.ok(results[0].effectSize >= TEST_SCORE_GOOD, `Expected large effect size (>= 0.8), got ${results[0].effectSize}`);
|
|
962
|
+
});
|
|
963
|
+
it('reports small effect size for weakly correlated metrics', () => {
|
|
964
|
+
const timeSeries = new Map([
|
|
965
|
+
['a', [1, COUNT_TWO, COUNT_THREE, COUNT_FOUR, COUNT_FIVE, COUNT_SIX, COUNT_SEVEN, COUNT_EIGHT, COUNT_NINE, COUNT_TEN]],
|
|
966
|
+
['b', [COUNT_FIVE, COUNT_THREE, COUNT_SEVEN, 1, COUNT_NINE, COUNT_TWO, COUNT_EIGHT, COUNT_FOUR, COUNT_SIX, COUNT_FIVE]], // noisy
|
|
967
|
+
]);
|
|
968
|
+
const results = computeCorrelationMatrix(timeSeries);
|
|
969
|
+
assert.ok(results[0].effectSize < TEST_SCORE_GOOD, `Expected small effect size, got ${results[0].effectSize}`);
|
|
970
|
+
});
|
|
971
|
+
});
|
|
972
|
+
// ============================================================================
|
|
973
|
+
// METRIC_SCALE_STRATEGY Tests
|
|
974
|
+
// ============================================================================
|
|
975
|
+
describe('METRIC_SCALE_STRATEGY', () => {
|
|
976
|
+
it('defines strategies for all 7 core metrics', () => {
|
|
977
|
+
const expected = ['relevance', 'faithfulness', 'coherence', 'hallucination', 'task_completion', 'tool_correctness', 'evaluation_latency'];
|
|
978
|
+
for (const metric of expected) {
|
|
979
|
+
assert.ok(metric in METRIC_SCALE_STRATEGY, `Missing strategy for ${metric}`);
|
|
980
|
+
}
|
|
981
|
+
});
|
|
982
|
+
});
|
|
983
|
+
// ============================================================================
|
|
984
|
+
// R4: Percentile Distribution Tests
|
|
985
|
+
// ============================================================================
|
|
986
|
+
describe('computePercentileDistribution', () => {
|
|
987
|
+
it('computes correct percentiles for sorted array', () => {
|
|
988
|
+
const scores = Array.from({ length: COUNT_HUNDRED }, (_, i) => i / TEST_PERCENTILE_DIVISOR_MAX_INDEX);
|
|
989
|
+
const dist = computePercentileDistribution(scores);
|
|
990
|
+
assert.ok(dist !== undefined);
|
|
991
|
+
assert.ok(Math.abs(requireDefined(dist).p50 - TEST_SCORE_MID) < TEST_CQI_CONTRIBUTION_TOLERANCE, `p50 expected ~0.5, got ${requireDefined(dist).p50}`);
|
|
992
|
+
assert.ok(requireDefined(dist).p10 < requireDefined(dist).p25, 'p10 < p25');
|
|
993
|
+
assert.ok(requireDefined(dist).p25 < requireDefined(dist).p50, 'p25 < p50');
|
|
994
|
+
assert.ok(requireDefined(dist).p50 < requireDefined(dist).p75, 'p50 < p75');
|
|
995
|
+
assert.ok(requireDefined(dist).p75 < requireDefined(dist).p90, 'p75 < p90');
|
|
996
|
+
});
|
|
997
|
+
it('returns undefined for fewer than 3 scores', () => {
|
|
998
|
+
assert.strictEqual(computePercentileDistribution([TEST_SCORE_MID, TEST_SCORE_BASELINE]), undefined);
|
|
999
|
+
assert.strictEqual(computePercentileDistribution([]), undefined);
|
|
1000
|
+
});
|
|
1001
|
+
it('handles unsorted input', () => {
|
|
1002
|
+
const dist = computePercentileDistribution([TEST_SCORE_HIGH, TEST_SCORE_WARNING, TEST_SCORE_MID, TEST_SCORE_LOW, TEST_SCORE_PASSING]);
|
|
1003
|
+
assert.ok(dist !== undefined);
|
|
1004
|
+
assert.ok(requireDefined(dist).p10 <= requireDefined(dist).p90);
|
|
1005
|
+
});
|
|
1006
|
+
it('handles identical values', () => {
|
|
1007
|
+
const dist = computePercentileDistribution([TEST_SCORE_MID, TEST_SCORE_MID, TEST_SCORE_MID, TEST_SCORE_MID]);
|
|
1008
|
+
assert.ok(dist !== undefined);
|
|
1009
|
+
assert.strictEqual(requireDefined(dist).p50, TEST_SCORE_MID);
|
|
1010
|
+
assert.strictEqual(requireDefined(dist).p10, TEST_SCORE_MID);
|
|
1011
|
+
});
|
|
1012
|
+
});
|
|
1013
|
+
describe('computePercentileRank', () => {
|
|
1014
|
+
it('returns fractional rank for median value', () => {
|
|
1015
|
+
const sorted = [1, COUNT_TWO, COUNT_THREE, COUNT_FOUR, COUNT_FIVE, COUNT_SIX, COUNT_SEVEN, COUNT_EIGHT, COUNT_NINE, COUNT_TEN];
|
|
1016
|
+
// 4 values < 5, 5 values <= 5, rank = (4+5)/(2*10) = 0.45
|
|
1017
|
+
const rank = computePercentileRank(COUNT_FIVE, sorted);
|
|
1018
|
+
assert.strictEqual(rank, TEST_SCORE_BORDERLINE);
|
|
1019
|
+
});
|
|
1020
|
+
it('returns 1 for value above all', () => {
|
|
1021
|
+
const sorted = [1, COUNT_TWO, COUNT_THREE, COUNT_FOUR, COUNT_FIVE];
|
|
1022
|
+
assert.strictEqual(computePercentileRank(COUNT_TEN, sorted), 1);
|
|
1023
|
+
});
|
|
1024
|
+
it('returns 0 for value below all', () => {
|
|
1025
|
+
const sorted = [1, COUNT_TWO, COUNT_THREE, COUNT_FOUR, COUNT_FIVE];
|
|
1026
|
+
assert.strictEqual(computePercentileRank(0, sorted), 0);
|
|
1027
|
+
});
|
|
1028
|
+
it('returns 0.5 for empty array', () => {
|
|
1029
|
+
assert.strictEqual(computePercentileRank(COUNT_FIVE, []), TEST_SCORE_MID);
|
|
1030
|
+
});
|
|
1031
|
+
it('returns ~0.5 for all-equal values (ER2 fix)', () => {
|
|
1032
|
+
// All-equal: lower=0, upper=3, rank = (0+3)/(2*3) = 0.5
|
|
1033
|
+
assert.strictEqual(computePercentileRank(TEST_SCORE_MID, [TEST_SCORE_MID, TEST_SCORE_MID, TEST_SCORE_MID]), TEST_SCORE_MID);
|
|
1034
|
+
assert.strictEqual(computePercentileRank(1, [1, 1, 1, 1, 1]), TEST_SCORE_MID);
|
|
1035
|
+
});
|
|
1036
|
+
it('handles tied values at edges', () => {
|
|
1037
|
+
// [1, 1, 2, 3]: value=1 → lower=0, upper=2, rank = (0+2)/(2*4) = 0.25
|
|
1038
|
+
assert.strictEqual(computePercentileRank(1, [1, 1, COUNT_TWO, COUNT_THREE]), TEST_CDF_INTERPOLATION_MIN);
|
|
1039
|
+
// [1, 2, 3, 3]: value=3 → lower=2, upper=4, rank = (2+4)/(2*4) = 0.75
|
|
1040
|
+
assert.strictEqual(computePercentileRank(COUNT_THREE, [1, COUNT_TWO, COUNT_THREE, COUNT_THREE]), TEST_CDF_INTERPOLATION_INPUT);
|
|
1041
|
+
});
|
|
1042
|
+
});
|
|
1043
|
+
describe('StreamingPercentile', () => {
|
|
1044
|
+
it('produces accurate percentiles for 1000 uniform values', () => {
|
|
1045
|
+
const sp = new StreamingPercentile();
|
|
1046
|
+
const maxIndex = COUNT_THOUSAND - 1;
|
|
1047
|
+
for (let i = 0; i < COUNT_THOUSAND; i++)
|
|
1048
|
+
sp.add(i / maxIndex);
|
|
1049
|
+
const dist = sp.distribution();
|
|
1050
|
+
assert.ok(dist !== undefined);
|
|
1051
|
+
assert.ok(Math.abs(requireDefined(dist).p50 - TEST_SCORE_MID) < DELTA, `p50 expected ~0.5, got ${requireDefined(dist).p50}`);
|
|
1052
|
+
assert.ok(Math.abs(requireDefined(dist).p10 - TEST_SCORE_WARNING) < DELTA, `p10 expected ~0.1, got ${requireDefined(dist).p10}`);
|
|
1053
|
+
assert.ok(Math.abs(requireDefined(dist).p90 - TEST_SCORE_HIGH) < DELTA, `p90 expected ~0.9, got ${requireDefined(dist).p90}`);
|
|
1054
|
+
});
|
|
1055
|
+
it('tracks count correctly', () => {
|
|
1056
|
+
const sp = new StreamingPercentile();
|
|
1057
|
+
sp.addAll([1, COUNT_TWO, COUNT_THREE, COUNT_FOUR, COUNT_FIVE]);
|
|
1058
|
+
assert.strictEqual(sp.count(), COUNT_FIVE);
|
|
1059
|
+
});
|
|
1060
|
+
it('returns undefined distribution for fewer than 3 values', () => {
|
|
1061
|
+
const sp = new StreamingPercentile();
|
|
1062
|
+
sp.add(1);
|
|
1063
|
+
sp.add(COUNT_TWO);
|
|
1064
|
+
assert.strictEqual(sp.distribution(), undefined);
|
|
1065
|
+
});
|
|
1066
|
+
it('produces monotonic percentiles', () => {
|
|
1067
|
+
const sp = new StreamingPercentile();
|
|
1068
|
+
for (let i = 0; i < TEST_STREAMING_MONOTONIC_SAMPLE_SIZE; i++)
|
|
1069
|
+
sp.add(Math.random());
|
|
1070
|
+
const dist = requireDefined(sp.distribution(), 'Expected percentile distribution');
|
|
1071
|
+
assert.ok(dist.p10 <= dist.p25, `p10 (${dist.p10}) <= p25 (${dist.p25})`);
|
|
1072
|
+
assert.ok(dist.p25 <= dist.p50, `p25 (${dist.p25}) <= p50 (${dist.p50})`);
|
|
1073
|
+
assert.ok(dist.p50 <= dist.p75, `p50 (${dist.p50}) <= p75 (${dist.p75})`);
|
|
1074
|
+
assert.ok(dist.p75 <= dist.p90, `p75 (${dist.p75}) <= p90 (${dist.p90})`);
|
|
1075
|
+
});
|
|
1076
|
+
it('rejects NaN (ER12)', () => {
|
|
1077
|
+
const sp = new StreamingPercentile();
|
|
1078
|
+
assert.throws(() => sp.add(NaN), /must be finite/);
|
|
1079
|
+
});
|
|
1080
|
+
it('rejects Infinity (ER12)', () => {
|
|
1081
|
+
const sp = new StreamingPercentile();
|
|
1082
|
+
assert.throws(() => sp.add(Infinity), /must be finite/);
|
|
1083
|
+
assert.throws(() => sp.add(-Infinity), /must be finite/);
|
|
1084
|
+
});
|
|
1085
|
+
it('handles empty percentile digest gracefully', () => {
|
|
1086
|
+
const sp = new StreamingPercentile();
|
|
1087
|
+
assert.strictEqual(sp.count(), 0);
|
|
1088
|
+
assert.strictEqual(sp.distribution(), undefined);
|
|
1089
|
+
assert.strictEqual(sp.percentile(50), 0); // Returns 0 for empty
|
|
1090
|
+
});
|
|
1091
|
+
it('handles single value correctly', () => {
|
|
1092
|
+
const sp = new StreamingPercentile();
|
|
1093
|
+
sp.add(42);
|
|
1094
|
+
assert.strictEqual(sp.count(), 1);
|
|
1095
|
+
assert.strictEqual(sp.distribution(), undefined); // Needs min 3 values
|
|
1096
|
+
// DDSketch uses relative error; approximate match
|
|
1097
|
+
assert.ok(Math.abs(sp.percentile(50) - 42) < 42 * 0.05, 'p50 should approximate 42');
|
|
1098
|
+
});
|
|
1099
|
+
it('addAll batches multiple values', () => {
|
|
1100
|
+
const sp = new StreamingPercentile();
|
|
1101
|
+
sp.addAll([1, 2, 3, 4, 5]);
|
|
1102
|
+
assert.strictEqual(sp.count(), 5);
|
|
1103
|
+
const dist = sp.distribution();
|
|
1104
|
+
assert.ok(dist !== undefined);
|
|
1105
|
+
assert.ok(dist.p50 > 0 && dist.p50 < 5);
|
|
1106
|
+
});
|
|
1107
|
+
it('addAll rejects NaN in batch', () => {
|
|
1108
|
+
const sp = new StreamingPercentile();
|
|
1109
|
+
assert.throws(() => sp.addAll([1, 2, NaN, 4]), /must be finite/);
|
|
1110
|
+
});
|
|
1111
|
+
it('addAll rejects Infinity in batch', () => {
|
|
1112
|
+
const sp = new StreamingPercentile();
|
|
1113
|
+
assert.throws(() => sp.addAll([1, Infinity]), /must be finite/);
|
|
1114
|
+
});
|
|
1115
|
+
it('maintains accuracy with many values', () => {
|
|
1116
|
+
const sp = new StreamingPercentile();
|
|
1117
|
+
for (let i = 0; i < 100; i++) {
|
|
1118
|
+
sp.add(Math.random());
|
|
1119
|
+
}
|
|
1120
|
+
assert.strictEqual(sp.count(), 100);
|
|
1121
|
+
const dist = sp.distribution();
|
|
1122
|
+
assert.ok(dist !== undefined);
|
|
1123
|
+
// Verify monotonicity is preserved even after compression
|
|
1124
|
+
assert.ok(dist.p10 <= dist.p90);
|
|
1125
|
+
});
|
|
1126
|
+
});
|
|
1127
|
+
// ============================================================================
|
|
1128
|
+
// QM2: Derived Features Pipeline Tests
|
|
1129
|
+
// ============================================================================
|
|
1130
|
+
describe('computeDerivedFeatures', () => {
|
|
1131
|
+
const metrics = [
|
|
1132
|
+
makeMetricResult({ name: 'relevance', values: { avg: TEST_SCORE_STRONG, p50: null, p95: null, p99: null, min: null, max: null, count: COUNT_HUNDRED } }),
|
|
1133
|
+
makeMetricResult({ name: 'faithfulness', values: { avg: 0.90, p50: null, p95: null, p99: null, min: null, max: null, count: COUNT_HUNDRED } }),
|
|
1134
|
+
makeMetricResult({
|
|
1135
|
+
name: 'hallucination',
|
|
1136
|
+
values: { avg: DELTA, p50: null, p95: null, p99: null, min: null, max: null, count: COUNT_HUNDRED },
|
|
1137
|
+
alerts: [{ severity: 'warning', message: '', aggregation: 'avg', threshold: TEST_SCORE_WARNING, actualValue: DELTA, direction: 'above' }],
|
|
1138
|
+
}),
|
|
1139
|
+
];
|
|
1140
|
+
it('computes CQI from metrics', () => {
|
|
1141
|
+
const result = computeDerivedFeatures({ metrics });
|
|
1142
|
+
assert.ok(result.cqi !== undefined);
|
|
1143
|
+
assert.ok(requireDefined(result.cqi).value > 0);
|
|
1144
|
+
});
|
|
1145
|
+
it('computes CQI with custom weights (FE1)', () => {
|
|
1146
|
+
const result = computeDerivedFeatures({
|
|
1147
|
+
metrics,
|
|
1148
|
+
cqiWeights: { relevance: TEST_SCORE_MID, faithfulness: TEST_SCORE_MID },
|
|
1149
|
+
});
|
|
1150
|
+
assert.ok(result.cqi !== undefined);
|
|
1151
|
+
assert.strictEqual(requireDefined(result.cqi).contributions.length, COUNT_TWO);
|
|
1152
|
+
});
|
|
1153
|
+
it('computes coverage confidence when evaluationsPerInput provided', () => {
|
|
1154
|
+
const metricsWithConf = metrics.map(m => ({
|
|
1155
|
+
...m,
|
|
1156
|
+
confidence: { level: 'high', sampleCount: COUNT_HUNDRED, scoreStdDev: DELTA, evaluatorCount: 1, evaluatorAgreement: null },
|
|
1157
|
+
}));
|
|
1158
|
+
const evaluationsPerInput = new Map([
|
|
1159
|
+
['relevance', [COUNT_TWENTY, COUNT_TWENTY, COUNT_TWENTY, COUNT_TWENTY, COUNT_TWENTY]],
|
|
1160
|
+
['faithfulness', [COUNT_FIFTY, COUNT_FIFTY]],
|
|
1161
|
+
]);
|
|
1162
|
+
const result = computeDerivedFeatures({
|
|
1163
|
+
metrics: metricsWithConf,
|
|
1164
|
+
evaluationsPerInput,
|
|
1165
|
+
});
|
|
1166
|
+
assert.ok(result.coverageConfidence.size >= COUNT_TWO);
|
|
1167
|
+
assert.ok(result.coverageConfidence.has('relevance'));
|
|
1168
|
+
});
|
|
1169
|
+
it('computes correlation matrix when metricTimeSeries provided', () => {
|
|
1170
|
+
const metricTimeSeries = new Map([
|
|
1171
|
+
['relevance', [TEST_SCORE_GOOD, TEST_SCORE_PASSING, TEST_SCORE_BASELINE, TEST_SCORE_MID, TEST_SCORE_POOR]],
|
|
1172
|
+
['hallucination', [TEST_SCORE_WARNING, TEST_SCORE_VERY_LOW, TEST_SCORE_LOW, TEST_SCORE_POOR, TEST_SCORE_MID]],
|
|
1173
|
+
]);
|
|
1174
|
+
const result = computeDerivedFeatures({ metrics, metricTimeSeries });
|
|
1175
|
+
assert.ok(result.correlations.length > 0);
|
|
1176
|
+
});
|
|
1177
|
+
it('returns empty correlations when no time series', () => {
|
|
1178
|
+
const result = computeDerivedFeatures({ metrics });
|
|
1179
|
+
assert.strictEqual(result.correlations.length, 0);
|
|
1180
|
+
});
|
|
1181
|
+
it('includes CQI sensitivity when CQI is computed', () => {
|
|
1182
|
+
const result = computeDerivedFeatures({ metrics });
|
|
1183
|
+
assert.ok(result.cqiSensitivity !== undefined);
|
|
1184
|
+
});
|
|
1185
|
+
});
|
|
1186
|
+
// ==========================================================================
|
|
1187
|
+
// R3: Granger Causality Tests
|
|
1188
|
+
// ==========================================================================
|
|
1189
|
+
describe('computeGrangerCausality', () => {
|
|
1190
|
+
it('detects A->B causality with lagged linear relationship', () => {
|
|
1191
|
+
// A leads B by 1 step: B[t] = 0.8 * A[t-1] + noise
|
|
1192
|
+
const n = COUNT_SIXTY;
|
|
1193
|
+
const a = [];
|
|
1194
|
+
const b = [];
|
|
1195
|
+
for (let i = 0; i < n; i++) {
|
|
1196
|
+
a.push(Math.sin(i * TEST_SCORE_LOW) + (i % COUNT_THREE) * TEST_DECIMAL_EPSILON);
|
|
1197
|
+
b.push(i > 0 ? TEST_SCORE_GOOD * a[i - 1] + TEST_SCORE_WARNING * Math.cos(i) : TEST_SCORE_MID);
|
|
1198
|
+
}
|
|
1199
|
+
const result = computeGrangerCausality(a, b, COUNT_TWO);
|
|
1200
|
+
assert.ok(result !== undefined, 'should return a result');
|
|
1201
|
+
assert.ok(requireDefined(result).fStatistic > 0, 'F-statistic should be positive');
|
|
1202
|
+
assert.ok(requireDefined(result).lags >= 1, 'should use at least 1 lag');
|
|
1203
|
+
assert.ok(['A->B', 'bidirectional'].includes(requireDefined(result).direction), `Expected A->B or bidirectional, got: ${requireDefined(result).direction}`);
|
|
1204
|
+
assert.ok(requireDefined(result).pValue < TEST_SCORE_WARNING, `Expected significant p-value, got ${requireDefined(result).pValue}`);
|
|
1205
|
+
});
|
|
1206
|
+
it('returns undefined for insufficient data', () => {
|
|
1207
|
+
const result = computeGrangerCausality([1, COUNT_TWO, COUNT_THREE], [COUNT_FOUR, COUNT_FIVE, COUNT_SIX], 1);
|
|
1208
|
+
assert.strictEqual(result, undefined);
|
|
1209
|
+
});
|
|
1210
|
+
it('detects no causality for independent series', () => {
|
|
1211
|
+
const n = 80;
|
|
1212
|
+
const a = [];
|
|
1213
|
+
const b = [];
|
|
1214
|
+
for (let i = 0; i < n; i++) {
|
|
1215
|
+
a.push(Math.sin(i * TEST_SCORE_MID));
|
|
1216
|
+
b.push(Math.cos(i * TEST_SCORE_PASSING + COUNT_HUNDRED));
|
|
1217
|
+
}
|
|
1218
|
+
const result = computeGrangerCausality(a, b, COUNT_TWO);
|
|
1219
|
+
assert.ok(result !== undefined);
|
|
1220
|
+
// Independent deterministic series should show no significant causal direction
|
|
1221
|
+
assert.strictEqual(requireDefined(result).direction, 'none', 'independent series should show no causal direction');
|
|
1222
|
+
});
|
|
1223
|
+
it('detects bidirectional causality for tightly coupled series', () => {
|
|
1224
|
+
const n = 80;
|
|
1225
|
+
const a = [TEST_SCORE_MID, TEST_SCORE_MID];
|
|
1226
|
+
const b = [TEST_SCORE_MID, TEST_SCORE_MID];
|
|
1227
|
+
for (let i = COUNT_TWO; i < n; i++) {
|
|
1228
|
+
a.push(TEST_SCORE_BASELINE * a[i - 1] + TEST_SCORE_LOW * b[i - 1] + DELTA * Math.sin(i));
|
|
1229
|
+
b.push(TEST_SCORE_MID * b[i - 1] + TEST_SCORE_POOR * a[i - 1] + DELTA * Math.cos(i));
|
|
1230
|
+
}
|
|
1231
|
+
const result = computeGrangerCausality(a, b, COUNT_TWO, TEST_SCORE_WARNING);
|
|
1232
|
+
assert.ok(result !== undefined);
|
|
1233
|
+
// With tight coupling, likely bidirectional or at least one direction
|
|
1234
|
+
assert.ok(['A->B', 'B->A', 'bidirectional'].includes(requireDefined(result).direction), `expected causal direction, got: ${requireDefined(result).direction}`);
|
|
1235
|
+
});
|
|
1236
|
+
it('handles series with NaN values by treating as insufficient data', () => {
|
|
1237
|
+
const a = [1, 2, NaN, 4, 5];
|
|
1238
|
+
const b = [2, 3, 4, 5, 6];
|
|
1239
|
+
const result = computeGrangerCausality(a, b, 1);
|
|
1240
|
+
// NaN should make the test fail (return undefined)
|
|
1241
|
+
assert.strictEqual(result, undefined);
|
|
1242
|
+
});
|
|
1243
|
+
it('returns valid pValue (0 <= p <= 1)', () => {
|
|
1244
|
+
const n = 100;
|
|
1245
|
+
const a = [];
|
|
1246
|
+
const b = [];
|
|
1247
|
+
for (let i = 0; i < n; i++) {
|
|
1248
|
+
a.push(Math.sin(i * 0.05) + Math.random() * 0.1);
|
|
1249
|
+
b.push(i > 0 ? 0.7 * a[i - 1] + Math.random() * 0.1 : 0.5);
|
|
1250
|
+
}
|
|
1251
|
+
const result = computeGrangerCausality(a, b, 2);
|
|
1252
|
+
assert.ok(result !== undefined);
|
|
1253
|
+
assert.ok(requireDefined(result).pValue >= 0 && requireDefined(result).pValue <= 1);
|
|
1254
|
+
});
|
|
1255
|
+
});
|
|
1256
|
+
describe('computeCorrelationMatrix with Granger', () => {
|
|
1257
|
+
it('runs Granger test when enabled and pairs are significant', () => {
|
|
1258
|
+
const n = COUNT_SIXTY;
|
|
1259
|
+
const series1 = [];
|
|
1260
|
+
const series2 = [];
|
|
1261
|
+
for (let i = 0; i < n; i++) {
|
|
1262
|
+
series1.push(TEST_SCORE_MID + TEST_SCORE_LOW * Math.sin(i * TEST_SCORE_VERY_LOW));
|
|
1263
|
+
series2.push(i > 0 ? TEST_SCORE_PASSING * series1[i - 1] + TEST_SCORE_WARNING : TEST_SCORE_MID);
|
|
1264
|
+
}
|
|
1265
|
+
const metricTimeSeries = new Map([
|
|
1266
|
+
['metricA', series1],
|
|
1267
|
+
['metricB', series2],
|
|
1268
|
+
]);
|
|
1269
|
+
const results = computeCorrelationMatrix(metricTimeSeries, undefined, undefined, {
|
|
1270
|
+
grangerConfig: { enabled: true, minSampleSize: COUNT_TWENTY },
|
|
1271
|
+
});
|
|
1272
|
+
assert.ok(results.length === 1);
|
|
1273
|
+
const r = results[0];
|
|
1274
|
+
// If significant, should have granger result
|
|
1275
|
+
if (r.significant) {
|
|
1276
|
+
assert.ok(r.granger !== undefined, 'significant pair should have Granger result');
|
|
1277
|
+
assert.ok(requireDefined(r.granger).fStatistic >= 0, 'F-statistic should be non-negative');
|
|
1278
|
+
assert.ok(['correlation', 'granger'].includes(r.causalConfidence));
|
|
1279
|
+
}
|
|
1280
|
+
});
|
|
1281
|
+
it('skips Granger when disabled', () => {
|
|
1282
|
+
const metricTimeSeries = new Map([
|
|
1283
|
+
['a', [TEST_SCORE_HIGH, TEST_SCORE_GOOD, TEST_SCORE_PASSING, TEST_SCORE_BASELINE, TEST_SCORE_MID]],
|
|
1284
|
+
['b', [TEST_SCORE_WARNING, TEST_SCORE_VERY_LOW, TEST_SCORE_LOW, TEST_SCORE_POOR, TEST_SCORE_MID]],
|
|
1285
|
+
]);
|
|
1286
|
+
const results = computeCorrelationMatrix(metricTimeSeries);
|
|
1287
|
+
assert.ok(results.length === 1);
|
|
1288
|
+
assert.strictEqual(results[0].granger, undefined);
|
|
1289
|
+
assert.strictEqual(results[0].causalConfidence, 'correlation');
|
|
1290
|
+
});
|
|
1291
|
+
it('skips Granger when sample size too small', () => {
|
|
1292
|
+
const metricTimeSeries = new Map([
|
|
1293
|
+
['a', [TEST_SCORE_HIGH, TEST_SCORE_GOOD, TEST_SCORE_PASSING, TEST_SCORE_BASELINE, TEST_SCORE_MID]],
|
|
1294
|
+
['b', [TEST_SCORE_WARNING, TEST_SCORE_VERY_LOW, TEST_SCORE_LOW, TEST_SCORE_POOR, TEST_SCORE_MID]],
|
|
1295
|
+
]);
|
|
1296
|
+
const results = computeCorrelationMatrix(metricTimeSeries, undefined, undefined, {
|
|
1297
|
+
grangerConfig: { enabled: true, minSampleSize: COUNT_HUNDRED },
|
|
1298
|
+
});
|
|
1299
|
+
assert.ok(results.length === 1);
|
|
1300
|
+
assert.strictEqual(results[0].granger, undefined);
|
|
1301
|
+
});
|
|
1302
|
+
});
|
|
1303
|
+
// ==========================================================================
|
|
1304
|
+
// R5: Degradation Backtesting Tests
|
|
1305
|
+
// ==========================================================================
|
|
1306
|
+
describe('injectDegradationScenario', () => {
|
|
1307
|
+
it('injects variance spike at specified range', () => {
|
|
1308
|
+
const base = Array.from({ length: COUNT_TEN }, () => ({
|
|
1309
|
+
currentStdDev: DELTA, baselineStdDev: DELTA,
|
|
1310
|
+
coverageGapCount: 0, totalCoverageCells: COUNT_HUNDRED,
|
|
1311
|
+
}));
|
|
1312
|
+
const result = injectDegradationScenario(base, 'variance_spike', TEST_VARIANCE_SPIKE_MULTIPLIER, COUNT_THREE, COUNT_FOUR);
|
|
1313
|
+
assert.strictEqual(result[COUNT_TWO].currentStdDev, DELTA); // before injection
|
|
1314
|
+
assert.ok(Math.abs(result[COUNT_THREE].currentStdDev - TEST_MINIMIZE_GOOD_INPUT) < TEST_ABSOLUTE_EPSILON, 'should be ~0.15'); // DELTA * 3.0
|
|
1315
|
+
assert.ok(Math.abs(result[COUNT_SIX].currentStdDev - TEST_MINIMIZE_GOOD_INPUT) < TEST_ABSOLUTE_EPSILON, 'should be ~0.15'); // last injected
|
|
1316
|
+
assert.strictEqual(result[COUNT_SEVEN].currentStdDev, DELTA); // after injection
|
|
1317
|
+
});
|
|
1318
|
+
it('injects coverage gap', () => {
|
|
1319
|
+
const base = Array.from({ length: COUNT_TEN }, () => ({
|
|
1320
|
+
currentStdDev: DELTA, baselineStdDev: DELTA,
|
|
1321
|
+
coverageGapCount: 0, totalCoverageCells: COUNT_HUNDRED,
|
|
1322
|
+
}));
|
|
1323
|
+
const result = injectDegradationScenario(base, 'coverage_gap', TEST_SCORE_MID, COUNT_FIVE, COUNT_TWO);
|
|
1324
|
+
assert.strictEqual(result[COUNT_FOUR].coverageGapCount, 0);
|
|
1325
|
+
assert.strictEqual(result[COUNT_FIVE].coverageGapCount, COUNT_FIFTY); // 100 * 0.5
|
|
1326
|
+
assert.strictEqual(result[COUNT_SIX].coverageGapCount, COUNT_FIFTY);
|
|
1327
|
+
assert.strictEqual(result[COUNT_SEVEN].coverageGapCount, 0);
|
|
1328
|
+
});
|
|
1329
|
+
it('injects latency jump', () => {
|
|
1330
|
+
const base = Array.from({ length: COUNT_TEN }, () => ({
|
|
1331
|
+
currentStdDev: DELTA, baselineStdDev: DELTA,
|
|
1332
|
+
coverageGapCount: 0, totalCoverageCells: COUNT_HUNDRED,
|
|
1333
|
+
latencyP50: COUNT_TWO, latencyP95: COUNT_FOUR,
|
|
1334
|
+
}));
|
|
1335
|
+
const result = injectDegradationScenario(base, 'latency_jump', COUNT_FIVE, 0, COUNT_THREE);
|
|
1336
|
+
assert.strictEqual(result[0].latencyP95, COUNT_TEN); // 2 * 5
|
|
1337
|
+
assert.strictEqual(result[COUNT_TWO].latencyP95, COUNT_TEN);
|
|
1338
|
+
assert.strictEqual(result[COUNT_THREE].latencyP95, COUNT_FOUR); // unaffected
|
|
1339
|
+
});
|
|
1340
|
+
it('does not mutate original array', () => {
|
|
1341
|
+
const base = [{ currentStdDev: DELTA, baselineStdDev: DELTA, coverageGapCount: 0, totalCoverageCells: COUNT_HUNDRED }];
|
|
1342
|
+
injectDegradationScenario(base, 'variance_spike', TEST_VARIANCE_SPIKE_MULTIPLIER, 0, 1);
|
|
1343
|
+
assert.strictEqual(base[0].currentStdDev, DELTA);
|
|
1344
|
+
});
|
|
1345
|
+
it('preserves references for unmodified snapshots', () => {
|
|
1346
|
+
const base = [
|
|
1347
|
+
{ currentStdDev: DELTA, baselineStdDev: DELTA, coverageGapCount: 0, totalCoverageCells: COUNT_HUNDRED },
|
|
1348
|
+
{ currentStdDev: DELTA, baselineStdDev: DELTA, coverageGapCount: 0, totalCoverageCells: COUNT_HUNDRED },
|
|
1349
|
+
{ currentStdDev: DELTA, baselineStdDev: DELTA, coverageGapCount: 0, totalCoverageCells: COUNT_HUNDRED },
|
|
1350
|
+
];
|
|
1351
|
+
const result = injectDegradationScenario(base, 'variance_spike', TEST_VARIANCE_SPIKE_MULTIPLIER, 1, 1);
|
|
1352
|
+
assert.strictEqual(result[0], base[0], 'Unmodified item before injection range should be same reference');
|
|
1353
|
+
assert.notStrictEqual(result[1], base[1], 'Modified item should be a new object');
|
|
1354
|
+
assert.strictEqual(result[2], base[2], 'Unmodified item after injection range should be same reference');
|
|
1355
|
+
});
|
|
1356
|
+
it('handles empty array safely', () => {
|
|
1357
|
+
const result = injectDegradationScenario([], 'variance_spike', TEST_VARIANCE_SPIKE_MULTIPLIER, 0, 1);
|
|
1358
|
+
assert.strictEqual(result.length, 0);
|
|
1359
|
+
});
|
|
1360
|
+
it('handles edge case where endIdx >= array length', () => {
|
|
1361
|
+
const base = [
|
|
1362
|
+
{ currentStdDev: DELTA, baselineStdDev: DELTA, coverageGapCount: 0, totalCoverageCells: COUNT_HUNDRED },
|
|
1363
|
+
{ currentStdDev: DELTA, baselineStdDev: DELTA, coverageGapCount: 0, totalCoverageCells: COUNT_HUNDRED },
|
|
1364
|
+
];
|
|
1365
|
+
const result = injectDegradationScenario(base, 'variance_spike', TEST_VARIANCE_SPIKE_MULTIPLIER, 0, 100); // beyond bounds
|
|
1366
|
+
assert.strictEqual(result.length, COUNT_TWO);
|
|
1367
|
+
});
|
|
1368
|
+
});
|
|
1369
|
+
// ==========================================================================
|
|
1370
|
+
// R5 — Rolling Degradation Signal Tests
|
|
1371
|
+
// ==========================================================================
|
|
1372
|
+
describe('computeStdDev', () => {
|
|
1373
|
+
it('computes sample standard deviation for known values', () => {
|
|
1374
|
+
// [2, 4, 4, 4, 5, 5, 7, 9] → stdDev ≈ 2.138
|
|
1375
|
+
const result = computeStdDev([COUNT_TWO, COUNT_FOUR, COUNT_FOUR, COUNT_FOUR, COUNT_FIVE, COUNT_FIVE, COUNT_SEVEN, COUNT_NINE]);
|
|
1376
|
+
assert.ok(result !== null);
|
|
1377
|
+
assert.ok(Math.abs(result - TEST_STD_DEV_REFERENCE_HIGH) < TEST_PVALUE_STRICT, `Expected ~2.138, got ${result}`);
|
|
1378
|
+
});
|
|
1379
|
+
it('returns null for single value', () => {
|
|
1380
|
+
assert.strictEqual(computeStdDev([TEST_SCORE_MID]), null);
|
|
1381
|
+
});
|
|
1382
|
+
it('returns null for empty array', () => {
|
|
1383
|
+
assert.strictEqual(computeStdDev([]), null);
|
|
1384
|
+
});
|
|
1385
|
+
it('returns 0 for identical values', () => {
|
|
1386
|
+
assert.strictEqual(computeStdDev([TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD]), 0);
|
|
1387
|
+
});
|
|
1388
|
+
it('computes stdDev for two values', () => {
|
|
1389
|
+
// [0, 1] → stdDev = sqrt(0.5) ≈ 0.707
|
|
1390
|
+
const result = computeStdDev([0, 1]);
|
|
1391
|
+
assert.ok(result !== null);
|
|
1392
|
+
assert.ok(Math.abs(result - TEST_STD_DEV_REFERENCE_LOW) < TEST_PVALUE_STRICT);
|
|
1393
|
+
});
|
|
1394
|
+
});
|
|
1395
|
+
describe('loadDegradationState / saveDegradationState', () => {
|
|
1396
|
+
let tmpDir;
|
|
1397
|
+
beforeEach(() => {
|
|
1398
|
+
tmpDir = mkdtempSync(join(tmpdir(), 'degradation-state-'));
|
|
1399
|
+
});
|
|
1400
|
+
afterEach(() => {
|
|
1401
|
+
rmSync(tmpDir, { recursive: true, force: true });
|
|
1402
|
+
});
|
|
1403
|
+
it('returns empty state on missing file', () => {
|
|
1404
|
+
const state = loadDegradationState(tmpDir);
|
|
1405
|
+
assert.strictEqual(state.lastRun, '');
|
|
1406
|
+
assert.deepStrictEqual(state.breaches, {});
|
|
1407
|
+
});
|
|
1408
|
+
it('round-trips state correctly', () => {
|
|
1409
|
+
const state = {
|
|
1410
|
+
lastRun: '2026-03-01T00:00:00Z',
|
|
1411
|
+
breaches: { relevance: COUNT_TWO, coherence: 0, hallucination: 1 },
|
|
1412
|
+
};
|
|
1413
|
+
saveDegradationState(tmpDir, state);
|
|
1414
|
+
const loaded = loadDegradationState(tmpDir);
|
|
1415
|
+
assert.strictEqual(loaded.lastRun, state.lastRun);
|
|
1416
|
+
assert.deepStrictEqual(loaded.breaches, state.breaches);
|
|
1417
|
+
});
|
|
1418
|
+
it('returns empty state on malformed JSON', () => {
|
|
1419
|
+
writeFileSync(join(tmpDir, '.degradation-state.json'), 'not json');
|
|
1420
|
+
const state = loadDegradationState(tmpDir);
|
|
1421
|
+
assert.strictEqual(state.lastRun, '');
|
|
1422
|
+
assert.deepStrictEqual(state.breaches, {});
|
|
1423
|
+
});
|
|
1424
|
+
it('handles empty breach counts in saved state', () => {
|
|
1425
|
+
const state = {
|
|
1426
|
+
lastRun: '2026-03-01T00:00:00Z',
|
|
1427
|
+
breaches: {}, // Empty breaches
|
|
1428
|
+
};
|
|
1429
|
+
saveDegradationState(tmpDir, state);
|
|
1430
|
+
const loaded = loadDegradationState(tmpDir);
|
|
1431
|
+
assert.deepStrictEqual(loaded.breaches, {});
|
|
1432
|
+
});
|
|
1433
|
+
it('persists complex breach data across save/load cycles', () => {
|
|
1434
|
+
const original = {
|
|
1435
|
+
lastRun: '2026-03-01T12:30:45Z',
|
|
1436
|
+
breaches: {
|
|
1437
|
+
relevance: 5,
|
|
1438
|
+
faithfulness: 3,
|
|
1439
|
+
coherence: 0,
|
|
1440
|
+
hallucination: 2,
|
|
1441
|
+
task_completion: 1,
|
|
1442
|
+
},
|
|
1443
|
+
};
|
|
1444
|
+
saveDegradationState(tmpDir, original);
|
|
1445
|
+
const loaded = loadDegradationState(tmpDir);
|
|
1446
|
+
assert.strictEqual(loaded.lastRun, original.lastRun);
|
|
1447
|
+
assert.strictEqual(loaded.breaches.relevance, 5);
|
|
1448
|
+
assert.strictEqual(loaded.breaches.task_completion, 1);
|
|
1449
|
+
});
|
|
1450
|
+
});
|
|
1451
|
+
describe('loadCalibrationState / saveCalibrationState', () => {
|
|
1452
|
+
let tmpDir;
|
|
1453
|
+
beforeEach(() => {
|
|
1454
|
+
tmpDir = mkdtempSync(join(tmpdir(), 'calibration-state-'));
|
|
1455
|
+
});
|
|
1456
|
+
afterEach(() => {
|
|
1457
|
+
rmSync(tmpDir, { recursive: true, force: true });
|
|
1458
|
+
});
|
|
1459
|
+
it('returns null on missing file', () => {
|
|
1460
|
+
const state = loadCalibrationState(tmpDir);
|
|
1461
|
+
assert.strictEqual(state, null);
|
|
1462
|
+
});
|
|
1463
|
+
it('round-trips calibration state correctly', () => {
|
|
1464
|
+
const dist = {
|
|
1465
|
+
p10: 0.1, p25: 0.25, p50: 0.5, p75: 0.75, p90: 0.9
|
|
1466
|
+
};
|
|
1467
|
+
const state = {
|
|
1468
|
+
lastCalibrated: '2026-03-01T00:00:00Z',
|
|
1469
|
+
distributions: {
|
|
1470
|
+
relevance: {
|
|
1471
|
+
distribution: dist,
|
|
1472
|
+
sampleSize: 100,
|
|
1473
|
+
windowStart: '2026-03-01',
|
|
1474
|
+
windowEnd: '2026-03-07',
|
|
1475
|
+
},
|
|
1476
|
+
},
|
|
1477
|
+
};
|
|
1478
|
+
saveCalibrationState(tmpDir, state);
|
|
1479
|
+
const loaded = requireDefined(loadCalibrationState(tmpDir), 'Expected loaded calibration state');
|
|
1480
|
+
assert.strictEqual(loaded.lastCalibrated, state.lastCalibrated);
|
|
1481
|
+
assert.ok('relevance' in loaded.distributions);
|
|
1482
|
+
const relevanceData = loaded.distributions.relevance;
|
|
1483
|
+
assert.strictEqual(relevanceData.sampleSize, 100);
|
|
1484
|
+
});
|
|
1485
|
+
it('handles malformed JSON gracefully', () => {
|
|
1486
|
+
writeFileSync(join(tmpDir, '.calibration-state.json'), '{invalid}');
|
|
1487
|
+
const state = loadCalibrationState(tmpDir);
|
|
1488
|
+
assert.strictEqual(state, null);
|
|
1489
|
+
});
|
|
1490
|
+
it('persists multiple metric distributions', () => {
|
|
1491
|
+
const dist1 = { p10: 0.1, p25: 0.25, p50: 0.5, p75: 0.75, p90: 0.9 };
|
|
1492
|
+
const dist2 = { p10: 0.05, p25: 0.2, p50: 0.45, p75: 0.8, p90: 0.95 };
|
|
1493
|
+
const state = {
|
|
1494
|
+
lastCalibrated: '2026-03-01T00:00:00Z',
|
|
1495
|
+
distributions: {
|
|
1496
|
+
relevance: { distribution: dist1, sampleSize: 100, windowStart: '2026-03-01', windowEnd: '2026-03-07' },
|
|
1497
|
+
faithfulness: { distribution: dist2, sampleSize: 150, windowStart: '2026-03-01', windowEnd: '2026-03-07' },
|
|
1498
|
+
},
|
|
1499
|
+
};
|
|
1500
|
+
saveCalibrationState(tmpDir, state);
|
|
1501
|
+
const loaded = requireDefined(loadCalibrationState(tmpDir), 'Expected loaded calibration state');
|
|
1502
|
+
assert.strictEqual(Object.keys(loaded.distributions).length, 2);
|
|
1503
|
+
assert.ok('relevance' in loaded.distributions);
|
|
1504
|
+
assert.ok('faithfulness' in loaded.distributions);
|
|
1505
|
+
});
|
|
1506
|
+
});
|
|
1507
|
+
describe('computeRollingDegradationSignals', () => {
|
|
1508
|
+
function makeBuckets(avgScores, scoresPerBucket = COUNT_TEN) {
|
|
1509
|
+
return avgScores.map((avg, i) => ({
|
|
1510
|
+
scores: avg === -1 ? [] : Array.from({ length: scoresPerBucket }, () => avg),
|
|
1511
|
+
startTime: new Date(TEST_YEAR_2026, 0, 1 + i).toISOString(),
|
|
1512
|
+
endTime: new Date(TEST_YEAR_2026, 0, COUNT_TWO + i).toISOString(),
|
|
1513
|
+
}));
|
|
1514
|
+
}
|
|
1515
|
+
const emptyState = { lastRun: '', breaches: {} };
|
|
1516
|
+
const window = { startDate: '2026-01-01T00:00:00Z', endDate: '2026-01-11T00:00:00Z' };
|
|
1517
|
+
it('returns healthy for 10 stable-score buckets', () => {
|
|
1518
|
+
const buckets = makeBuckets([TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD]);
|
|
1519
|
+
const reports = computeRollingDegradationSignals({ relevance: buckets }, ['relevance'], emptyState, window);
|
|
1520
|
+
assert.strictEqual(reports.length, 1);
|
|
1521
|
+
assert.strictEqual(reports[0].signal.predictedStatus, 'healthy');
|
|
1522
|
+
assert.strictEqual(reports[0].signal.confirmed, false);
|
|
1523
|
+
});
|
|
1524
|
+
it('detects variance spike with high stdDev in last buckets', () => {
|
|
1525
|
+
// Baseline: stable at 0.8. Last bucket: scores spread from 0.1 to 0.9
|
|
1526
|
+
const stableBuckets = makeBuckets([TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD]);
|
|
1527
|
+
const spikeBucket = {
|
|
1528
|
+
scores: [
|
|
1529
|
+
TEST_SCORE_WARNING,
|
|
1530
|
+
TEST_SCORE_VERY_LOW,
|
|
1531
|
+
TEST_SCORE_LOW,
|
|
1532
|
+
TEST_SCORE_POOR,
|
|
1533
|
+
TEST_SCORE_MID,
|
|
1534
|
+
TEST_SCORE_BASELINE,
|
|
1535
|
+
TEST_SCORE_PASSING,
|
|
1536
|
+
TEST_SCORE_GOOD,
|
|
1537
|
+
TEST_SCORE_HIGH,
|
|
1538
|
+
1,
|
|
1539
|
+
],
|
|
1540
|
+
startTime: new Date(TEST_YEAR_2026, 0, COUNT_EIGHT).toISOString(),
|
|
1541
|
+
endTime: new Date(TEST_YEAR_2026, 0, COUNT_NINE).toISOString(),
|
|
1542
|
+
};
|
|
1543
|
+
const buckets = [...stableBuckets, spikeBucket];
|
|
1544
|
+
const reports = computeRollingDegradationSignals({ relevance: buckets }, ['relevance'], emptyState, window);
|
|
1545
|
+
assert.strictEqual(reports.length, 1);
|
|
1546
|
+
// Variance spike → at least warning
|
|
1547
|
+
assert.ok(reports[0].signal.predictedStatus === 'warning' || reports[0].signal.predictedStatus === 'critical', `Expected warning or critical, got ${reports[0].signal.predictedStatus}`);
|
|
1548
|
+
});
|
|
1549
|
+
it('skips metrics with fewer than MIN_BUCKETS_FOR_SIGNAL non-empty buckets', () => {
|
|
1550
|
+
const buckets = makeBuckets([TEST_SCORE_GOOD, TEST_SCORE_GOOD, -1, -1, -1, -1, -1, -1, -1, -1]);
|
|
1551
|
+
const reports = computeRollingDegradationSignals({ relevance: buckets }, ['relevance'], emptyState, window);
|
|
1552
|
+
assert.strictEqual(reports.length, 0);
|
|
1553
|
+
});
|
|
1554
|
+
it('handles all-identical scores (MAD=0 fallback path)', () => {
|
|
1555
|
+
const buckets = makeBuckets([TEST_SCORE_HIGH, TEST_SCORE_HIGH, TEST_SCORE_HIGH, TEST_SCORE_HIGH, TEST_SCORE_HIGH, TEST_SCORE_HIGH, TEST_SCORE_HIGH, TEST_SCORE_HIGH, TEST_SCORE_HIGH, TEST_SCORE_HIGH]);
|
|
1556
|
+
const reports = computeRollingDegradationSignals({ relevance: buckets }, ['relevance'], emptyState, window);
|
|
1557
|
+
assert.strictEqual(reports.length, 1);
|
|
1558
|
+
// All identical → stdDev is 0, varianceRatio defaults to 1 → healthy
|
|
1559
|
+
assert.strictEqual(reports[0].signal.predictedStatus, 'healthy');
|
|
1560
|
+
});
|
|
1561
|
+
it('carries forward priorConsecutiveBreaches from state', () => {
|
|
1562
|
+
// Use scores that create a mean drift: baseline stable at 0.8, then drop to 0.3
|
|
1563
|
+
const buckets = makeBuckets([TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_LOW, TEST_SCORE_LOW, TEST_SCORE_LOW]);
|
|
1564
|
+
const stateWithBreaches = {
|
|
1565
|
+
lastRun: '2026-03-01T00:00:00Z',
|
|
1566
|
+
breaches: { relevance: 1 },
|
|
1567
|
+
};
|
|
1568
|
+
const reports = computeRollingDegradationSignals({ relevance: buckets }, ['relevance'], stateWithBreaches, window);
|
|
1569
|
+
assert.strictEqual(reports.length, 1);
|
|
1570
|
+
// EWMA drift is reliably detected for this data (baseline=0.8, current drops to 0.3;
|
|
1571
|
+
// MAD=0 fallback: |0.665 - 0.8| = 0.135 > 0.1 * 0.5 range = 0.05).
|
|
1572
|
+
// With priorBreaches=1, consecutiveBreaches=2 => confirmed=true.
|
|
1573
|
+
assert.strictEqual(reports[0].signal.predictedStatus, 'warning');
|
|
1574
|
+
assert.strictEqual(reports[0].signal.consecutiveBreaches, COUNT_TWO);
|
|
1575
|
+
assert.strictEqual(reports[0].signal.confirmed, true);
|
|
1576
|
+
});
|
|
1577
|
+
it('processes multiple metrics independently', () => {
|
|
1578
|
+
const reports = computeRollingDegradationSignals({
|
|
1579
|
+
relevance: makeBuckets([TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD]),
|
|
1580
|
+
coherence: makeBuckets([TEST_SCORE_PASSING, TEST_SCORE_PASSING, TEST_SCORE_PASSING, TEST_SCORE_PASSING, TEST_SCORE_PASSING]),
|
|
1581
|
+
}, ['relevance', 'coherence'], emptyState, window);
|
|
1582
|
+
assert.strictEqual(reports.length, COUNT_TWO);
|
|
1583
|
+
assert.strictEqual(reports[0].metricName, 'relevance');
|
|
1584
|
+
assert.strictEqual(reports[1].metricName, 'coherence');
|
|
1585
|
+
});
|
|
1586
|
+
it('returns evaluation count across all buckets', () => {
|
|
1587
|
+
const buckets = makeBuckets([TEST_SCORE_GOOD, TEST_SCORE_GOOD, TEST_SCORE_GOOD], COUNT_FIVE);
|
|
1588
|
+
const reports = computeRollingDegradationSignals({ relevance: buckets }, ['relevance'], emptyState, window);
|
|
1589
|
+
assert.strictEqual(reports[0].evaluationCount, COUNT_FIFTEEN); // 3 buckets × 5 scores
|
|
1590
|
+
});
|
|
1591
|
+
it('skips missing metrics gracefully', () => {
|
|
1592
|
+
const reports = computeRollingDegradationSignals({}, ['nonexistent'], emptyState, window);
|
|
1593
|
+
assert.strictEqual(reports.length, 0);
|
|
1594
|
+
});
|
|
1595
|
+
});
|
|
1596
|
+
// ==========================================================================
|
|
1597
|
+
// FE-R1: computePSI
|
|
1598
|
+
// ==========================================================================
|
|
1599
|
+
describe('computePSI', () => {
|
|
1600
|
+
const PSI_SHIFTED_VALUE = 0.95;
|
|
1601
|
+
const PSI_OUT_OF_RANGE_VALUE = 5.0;
|
|
1602
|
+
/** Build an array of n values uniformly spread [0, 1) */
|
|
1603
|
+
function uniform(n) {
|
|
1604
|
+
return Array.from({ length: n }, (_, i) => i / n);
|
|
1605
|
+
}
|
|
1606
|
+
/** Build an array of n values all clustered near 1 (shifted distribution) */
|
|
1607
|
+
function shifted(n) {
|
|
1608
|
+
return Array.from({ length: n }, () => PSI_SHIFTED_VALUE);
|
|
1609
|
+
}
|
|
1610
|
+
it('returns psi=0 and drifted=false for identical distributions', () => {
|
|
1611
|
+
const data = uniform(COUNT_HUNDRED);
|
|
1612
|
+
const result = computePSI(data, data);
|
|
1613
|
+
assert.strictEqual(result.psi, 0);
|
|
1614
|
+
assert.strictEqual(result.drifted, false);
|
|
1615
|
+
});
|
|
1616
|
+
it('returns drifted=true when distribution has shifted significantly', () => {
|
|
1617
|
+
const baseline = uniform(COUNT_HUNDRED);
|
|
1618
|
+
const current = shifted(COUNT_HUNDRED);
|
|
1619
|
+
const result = computePSI(baseline, current);
|
|
1620
|
+
assert.ok(result.psi > PSI_RECALIBRATION_THRESHOLD, `expected psi > ${PSI_RECALIBRATION_THRESHOLD}, got ${result.psi}`);
|
|
1621
|
+
assert.strictEqual(result.drifted, true);
|
|
1622
|
+
});
|
|
1623
|
+
it('returns { psi: 0, drifted: false } when expected array is too small', () => {
|
|
1624
|
+
const small = uniform(MIN_QUANTILE_SAMPLE_SIZE - 1);
|
|
1625
|
+
const large = uniform(COUNT_HUNDRED);
|
|
1626
|
+
const result = computePSI(small, large);
|
|
1627
|
+
assert.strictEqual(result.psi, 0);
|
|
1628
|
+
assert.strictEqual(result.drifted, false);
|
|
1629
|
+
});
|
|
1630
|
+
it('returns { psi: 0, drifted: false } when actual array is too small', () => {
|
|
1631
|
+
const large = uniform(COUNT_HUNDRED);
|
|
1632
|
+
const small = uniform(MIN_QUANTILE_SAMPLE_SIZE - 1);
|
|
1633
|
+
const result = computePSI(large, small);
|
|
1634
|
+
assert.strictEqual(result.psi, 0);
|
|
1635
|
+
assert.strictEqual(result.drifted, false);
|
|
1636
|
+
});
|
|
1637
|
+
it('clamps out-of-range values into the boundary bins without throwing', () => {
|
|
1638
|
+
// Values > 1.0 (e.g. latency in seconds) map to the last bin via Math.min clamp
|
|
1639
|
+
const outOfRange = Array.from({ length: COUNT_HUNDRED }, () => PSI_OUT_OF_RANGE_VALUE);
|
|
1640
|
+
const baseline = uniform(COUNT_HUNDRED);
|
|
1641
|
+
assert.doesNotThrow(() => computePSI(baseline, outOfRange));
|
|
1642
|
+
const result = computePSI(baseline, outOfRange);
|
|
1643
|
+
assert.ok(result.psi >= 0);
|
|
1644
|
+
});
|
|
1645
|
+
it('psi is non-negative for all valid inputs', () => {
|
|
1646
|
+
const a = Array.from({ length: COUNT_HUNDRED }, () => Math.random());
|
|
1647
|
+
const b = Array.from({ length: COUNT_HUNDRED }, () => Math.random());
|
|
1648
|
+
const result = computePSI(a, b);
|
|
1649
|
+
assert.ok(result.psi >= 0, `expected non-negative psi, got ${result.psi}`);
|
|
1650
|
+
});
|
|
1651
|
+
});
|
|
1652
|
+
// ==========================================================================
|
|
1653
|
+
// FE-R1: computeCalibrationDistributions
|
|
1654
|
+
// ==========================================================================
|
|
1655
|
+
describe('computeCalibrationDistributions', () => {
|
|
1656
|
+
/** Build n scores uniformly spread [0, 1) */
|
|
1657
|
+
function makeScores(n) {
|
|
1658
|
+
return Array.from({ length: n }, (_, i) => i / n);
|
|
1659
|
+
}
|
|
1660
|
+
it('returns distribution for metric with >= 100 scores', () => {
|
|
1661
|
+
const result = computeCalibrationDistributions({ relevance: makeScores(COUNT_HUNDRED) });
|
|
1662
|
+
assert.ok(result['relevance'], 'expected distribution entry for relevance');
|
|
1663
|
+
assert.strictEqual(result['relevance'].sampleSize, COUNT_HUNDRED);
|
|
1664
|
+
const { p10, p25, p50, p75, p90 } = result['relevance'].distribution;
|
|
1665
|
+
assert.ok(p10 < p25 && p25 < p50 && p50 < p75 && p75 < p90, 'percentiles should be monotonically increasing');
|
|
1666
|
+
});
|
|
1667
|
+
it('skips metric with fewer than MIN_QUANTILE_SAMPLE_SIZE scores', () => {
|
|
1668
|
+
const result = computeCalibrationDistributions({ relevance: makeScores(MIN_QUANTILE_SAMPLE_SIZE - 1) });
|
|
1669
|
+
assert.strictEqual(result['relevance'], undefined);
|
|
1670
|
+
});
|
|
1671
|
+
it('returns empty object for empty input', () => {
|
|
1672
|
+
const result = computeCalibrationDistributions({});
|
|
1673
|
+
assert.deepStrictEqual(result, {});
|
|
1674
|
+
});
|
|
1675
|
+
it('handles mixed metrics — only qualifying ones included', () => {
|
|
1676
|
+
const result = computeCalibrationDistributions({
|
|
1677
|
+
relevance: makeScores(COUNT_HUNDRED),
|
|
1678
|
+
coherence: makeScores(MIN_QUANTILE_SAMPLE_SIZE - 1), // below threshold
|
|
1679
|
+
});
|
|
1680
|
+
assert.ok(result['relevance'], 'relevance should be included');
|
|
1681
|
+
assert.strictEqual(result['coherence'], undefined, 'coherence should be excluded');
|
|
1682
|
+
});
|
|
1683
|
+
it('populates windowStart and windowEnd on each entry', () => {
|
|
1684
|
+
const result = computeCalibrationDistributions({ relevance: makeScores(COUNT_HUNDRED) });
|
|
1685
|
+
const entry = result['relevance'];
|
|
1686
|
+
assert.ok(entry, 'expected entry');
|
|
1687
|
+
assert.ok(entry.windowStart, 'expected windowStart');
|
|
1688
|
+
assert.ok(entry.windowEnd, 'expected windowEnd');
|
|
1689
|
+
assert.ok(new Date(entry.windowStart) < new Date(entry.windowEnd), 'windowStart should precede windowEnd');
|
|
1690
|
+
});
|
|
1691
|
+
});
|
|
1692
|
+
// ==========================================================================
|
|
1693
|
+
// FE-R1-PSI: shouldRecalibrate — PSI-based drift gating
|
|
1694
|
+
// ==========================================================================
|
|
1695
|
+
describe('shouldRecalibrate', () => {
|
|
1696
|
+
/** Build n uniformly-spread scores in [0, 1) */
|
|
1697
|
+
function makeUniformScores(n) {
|
|
1698
|
+
return Array.from({ length: n }, (_, i) => i / n);
|
|
1699
|
+
}
|
|
1700
|
+
/** Build n scores tightly clustered near 0.9 (shifted distribution) */
|
|
1701
|
+
function makeShiftedScores(n) {
|
|
1702
|
+
return Array.from({ length: n }, (_, i) => 0.85 + (i / n) * 0.1);
|
|
1703
|
+
}
|
|
1704
|
+
const LARGE_SAMPLE = 200;
|
|
1705
|
+
it('returns shouldWrite: true when previousState is null (first run)', () => {
|
|
1706
|
+
const currentScores = { relevance: makeUniformScores(LARGE_SAMPLE) };
|
|
1707
|
+
const result = shouldRecalibrate(null, currentScores);
|
|
1708
|
+
assert.strictEqual(result.shouldWrite, true);
|
|
1709
|
+
});
|
|
1710
|
+
it('returns psiValues as empty object when previousState is null', () => {
|
|
1711
|
+
const currentScores = { relevance: makeUniformScores(LARGE_SAMPLE) };
|
|
1712
|
+
const result = shouldRecalibrate(null, currentScores);
|
|
1713
|
+
assert.deepStrictEqual(result.psiValues, {});
|
|
1714
|
+
});
|
|
1715
|
+
it('returns shouldWrite: false when all metrics have PSI <= threshold (stable distribution)', () => {
|
|
1716
|
+
const scores = makeUniformScores(LARGE_SAMPLE);
|
|
1717
|
+
const previousState = {
|
|
1718
|
+
lastCalibrated: '2026-03-10T00:00:00.000Z',
|
|
1719
|
+
distributions: {
|
|
1720
|
+
relevance: {
|
|
1721
|
+
distribution: { p10: 0.1, p25: 0.25, p50: 0.5, p75: 0.75, p90: 0.9 },
|
|
1722
|
+
sampleSize: LARGE_SAMPLE,
|
|
1723
|
+
windowStart: '2026-02-08T00:00:00.000Z',
|
|
1724
|
+
windowEnd: '2026-03-10T00:00:00.000Z',
|
|
1725
|
+
},
|
|
1726
|
+
},
|
|
1727
|
+
rawScores: { relevance: scores },
|
|
1728
|
+
};
|
|
1729
|
+
// Same distribution — PSI should be ~0
|
|
1730
|
+
const result = shouldRecalibrate(previousState, { relevance: scores });
|
|
1731
|
+
assert.strictEqual(result.shouldWrite, false);
|
|
1732
|
+
});
|
|
1733
|
+
it('returns shouldWrite: true when any metric PSI exceeds threshold (shifted distribution)', () => {
|
|
1734
|
+
const previousScores = makeUniformScores(LARGE_SAMPLE);
|
|
1735
|
+
const currentScores = makeShiftedScores(LARGE_SAMPLE);
|
|
1736
|
+
const previousState = {
|
|
1737
|
+
lastCalibrated: '2026-03-10T00:00:00.000Z',
|
|
1738
|
+
distributions: {
|
|
1739
|
+
relevance: {
|
|
1740
|
+
distribution: { p10: 0.1, p25: 0.25, p50: 0.5, p75: 0.75, p90: 0.9 },
|
|
1741
|
+
sampleSize: LARGE_SAMPLE,
|
|
1742
|
+
windowStart: '2026-02-08T00:00:00.000Z',
|
|
1743
|
+
windowEnd: '2026-03-10T00:00:00.000Z',
|
|
1744
|
+
},
|
|
1745
|
+
},
|
|
1746
|
+
rawScores: { relevance: previousScores },
|
|
1747
|
+
};
|
|
1748
|
+
const result = shouldRecalibrate(previousState, { relevance: currentScores });
|
|
1749
|
+
assert.strictEqual(result.shouldWrite, true);
|
|
1750
|
+
});
|
|
1751
|
+
it('returns shouldWrite: true when one metric drifts even if another is stable', () => {
|
|
1752
|
+
const stableScores = makeUniformScores(LARGE_SAMPLE);
|
|
1753
|
+
const driftedScores = makeShiftedScores(LARGE_SAMPLE);
|
|
1754
|
+
const previousState = {
|
|
1755
|
+
lastCalibrated: '2026-03-10T00:00:00.000Z',
|
|
1756
|
+
distributions: {
|
|
1757
|
+
relevance: {
|
|
1758
|
+
distribution: { p10: 0.1, p25: 0.25, p50: 0.5, p75: 0.75, p90: 0.9 },
|
|
1759
|
+
sampleSize: LARGE_SAMPLE,
|
|
1760
|
+
windowStart: '2026-02-08T00:00:00.000Z',
|
|
1761
|
+
windowEnd: '2026-03-10T00:00:00.000Z',
|
|
1762
|
+
},
|
|
1763
|
+
coherence: {
|
|
1764
|
+
distribution: { p10: 0.85, p25: 0.87, p50: 0.89, p75: 0.91, p90: 0.94 },
|
|
1765
|
+
sampleSize: LARGE_SAMPLE,
|
|
1766
|
+
windowStart: '2026-02-08T00:00:00.000Z',
|
|
1767
|
+
windowEnd: '2026-03-10T00:00:00.000Z',
|
|
1768
|
+
},
|
|
1769
|
+
},
|
|
1770
|
+
rawScores: {
|
|
1771
|
+
relevance: stableScores,
|
|
1772
|
+
coherence: driftedScores,
|
|
1773
|
+
},
|
|
1774
|
+
};
|
|
1775
|
+
// relevance is stable, coherence has drifted back toward uniform
|
|
1776
|
+
const result = shouldRecalibrate(previousState, {
|
|
1777
|
+
relevance: stableScores,
|
|
1778
|
+
coherence: stableScores,
|
|
1779
|
+
});
|
|
1780
|
+
assert.strictEqual(result.shouldWrite, true);
|
|
1781
|
+
});
|
|
1782
|
+
it('populates psiValues keyed by metric name when previousState has rawScores', () => {
|
|
1783
|
+
const scores = makeUniformScores(LARGE_SAMPLE);
|
|
1784
|
+
const previousState = {
|
|
1785
|
+
lastCalibrated: '2026-03-10T00:00:00.000Z',
|
|
1786
|
+
distributions: {
|
|
1787
|
+
relevance: {
|
|
1788
|
+
distribution: { p10: 0.1, p25: 0.25, p50: 0.5, p75: 0.75, p90: 0.9 },
|
|
1789
|
+
sampleSize: LARGE_SAMPLE,
|
|
1790
|
+
windowStart: '2026-02-08T00:00:00.000Z',
|
|
1791
|
+
windowEnd: '2026-03-10T00:00:00.000Z',
|
|
1792
|
+
},
|
|
1793
|
+
},
|
|
1794
|
+
rawScores: { relevance: scores },
|
|
1795
|
+
};
|
|
1796
|
+
const result = shouldRecalibrate(previousState, { relevance: scores });
|
|
1797
|
+
assert.ok('relevance' in result.psiValues, 'expected psiValues to contain relevance key');
|
|
1798
|
+
assert.ok(typeof result.psiValues['relevance'] === 'number', 'expected numeric PSI for relevance');
|
|
1799
|
+
});
|
|
1800
|
+
it('returns shouldWrite: true when previousState has no rawScores for a metric (cannot compare)', () => {
|
|
1801
|
+
const currentScores = makeUniformScores(LARGE_SAMPLE);
|
|
1802
|
+
const previousState = {
|
|
1803
|
+
lastCalibrated: '2026-03-10T00:00:00.000Z',
|
|
1804
|
+
distributions: {
|
|
1805
|
+
relevance: {
|
|
1806
|
+
distribution: { p10: 0.1, p25: 0.25, p50: 0.5, p75: 0.75, p90: 0.9 },
|
|
1807
|
+
sampleSize: LARGE_SAMPLE,
|
|
1808
|
+
windowStart: '2026-02-08T00:00:00.000Z',
|
|
1809
|
+
windowEnd: '2026-03-10T00:00:00.000Z',
|
|
1810
|
+
},
|
|
1811
|
+
},
|
|
1812
|
+
// rawScores absent
|
|
1813
|
+
};
|
|
1814
|
+
const result = shouldRecalibrate(previousState, { relevance: currentScores });
|
|
1815
|
+
assert.strictEqual(result.shouldWrite, true);
|
|
1816
|
+
});
|
|
1817
|
+
it('returns shouldWrite: true and preserves psiValues when a new metric appears in currentScores', () => {
|
|
1818
|
+
const stableScores = makeUniformScores(LARGE_SAMPLE);
|
|
1819
|
+
const previousState = {
|
|
1820
|
+
lastCalibrated: '2026-03-10T00:00:00.000Z',
|
|
1821
|
+
distributions: {
|
|
1822
|
+
relevance: {
|
|
1823
|
+
distribution: { p10: 0.1, p25: 0.25, p50: 0.5, p75: 0.75, p90: 0.9 },
|
|
1824
|
+
sampleSize: LARGE_SAMPLE,
|
|
1825
|
+
windowStart: '2026-02-08T00:00:00.000Z',
|
|
1826
|
+
windowEnd: '2026-03-10T00:00:00.000Z',
|
|
1827
|
+
},
|
|
1828
|
+
},
|
|
1829
|
+
rawScores: { relevance: stableScores },
|
|
1830
|
+
};
|
|
1831
|
+
// 'coherence' is a new metric not in rawScores — should trigger write
|
|
1832
|
+
// but preserve PSI computed for 'relevance'
|
|
1833
|
+
const result = shouldRecalibrate(previousState, {
|
|
1834
|
+
relevance: stableScores,
|
|
1835
|
+
coherence: makeUniformScores(LARGE_SAMPLE),
|
|
1836
|
+
});
|
|
1837
|
+
assert.strictEqual(result.shouldWrite, true);
|
|
1838
|
+
assert.ok('relevance' in result.psiValues, 'expected psiValues to contain relevance key');
|
|
1839
|
+
assert.ok(typeof result.psiValues['relevance'] === 'number', 'expected numeric PSI for relevance');
|
|
1840
|
+
});
|
|
1841
|
+
it('returns shouldWrite: false when current scores are below MIN_QUANTILE_SAMPLE_SIZE (cannot compute PSI)', () => {
|
|
1842
|
+
const tinyScores = makeUniformScores(MIN_QUANTILE_SAMPLE_SIZE - 1);
|
|
1843
|
+
const previousState = {
|
|
1844
|
+
lastCalibrated: '2026-03-10T00:00:00.000Z',
|
|
1845
|
+
distributions: {
|
|
1846
|
+
relevance: {
|
|
1847
|
+
distribution: { p10: 0.1, p25: 0.25, p50: 0.5, p75: 0.75, p90: 0.9 },
|
|
1848
|
+
sampleSize: LARGE_SAMPLE,
|
|
1849
|
+
windowStart: '2026-02-08T00:00:00.000Z',
|
|
1850
|
+
windowEnd: '2026-03-10T00:00:00.000Z',
|
|
1851
|
+
},
|
|
1852
|
+
},
|
|
1853
|
+
rawScores: { relevance: makeUniformScores(LARGE_SAMPLE) },
|
|
1854
|
+
};
|
|
1855
|
+
// computePSI returns { psi: 0, drifted: false } for small samples — no drift detected
|
|
1856
|
+
const result = shouldRecalibrate(previousState, { relevance: tinyScores });
|
|
1857
|
+
assert.strictEqual(result.shouldWrite, false);
|
|
1858
|
+
});
|
|
1859
|
+
});
|
|
1860
|
+
// ==========================================================================
|
|
1861
|
+
// FE-R2: AHP-CRITIC CQI Weight Tuning
|
|
1862
|
+
// ==========================================================================
|
|
1863
|
+
// --- tunedCQIWeightsSchema ---
|
|
1864
|
+
describe('tunedCQIWeightsSchema', () => {
|
|
1865
|
+
it('validates a well-formed TunedCQIWeights object', () => {
|
|
1866
|
+
const valid = {
|
|
1867
|
+
featureVersion: '2.0',
|
|
1868
|
+
weights: { relevance: 0.5, coherence: 0.5 },
|
|
1869
|
+
ahpWeights: { relevance: 0.5, coherence: 0.5 },
|
|
1870
|
+
criticWeights: { relevance: 0.5, coherence: 0.5 },
|
|
1871
|
+
alpha: 0.5,
|
|
1872
|
+
consistencyRatio: 0.03,
|
|
1873
|
+
incidentCorrelations: { relevance: 0.2, coherence: 0.1 },
|
|
1874
|
+
};
|
|
1875
|
+
const result = tunedCQIWeightsSchema.safeParse(valid);
|
|
1876
|
+
assert.ok(result.success, `Expected valid, got: ${JSON.stringify(result.error?.issues)}`);
|
|
1877
|
+
});
|
|
1878
|
+
it('accepts output of computeHybridCQIWeights', () => {
|
|
1879
|
+
const ahp = { relevance: 0.6, coherence: 0.4 };
|
|
1880
|
+
const critic = { relevance: 0.5, coherence: 0.5 };
|
|
1881
|
+
const incidents = [];
|
|
1882
|
+
const tuned = computeHybridCQIWeights(ahp, critic, incidents, ['relevance', 'coherence']);
|
|
1883
|
+
const result = tunedCQIWeightsSchema.safeParse(tuned);
|
|
1884
|
+
assert.ok(result.success, `Schema should accept computeHybridCQIWeights output, got: ${JSON.stringify(result.error?.issues)}`);
|
|
1885
|
+
});
|
|
1886
|
+
it('rejects missing required fields', () => {
|
|
1887
|
+
const missing = {
|
|
1888
|
+
featureVersion: '2.0',
|
|
1889
|
+
weights: { relevance: 0.5 },
|
|
1890
|
+
};
|
|
1891
|
+
const result = tunedCQIWeightsSchema.safeParse(missing);
|
|
1892
|
+
assert.ok(!result.success);
|
|
1893
|
+
});
|
|
1894
|
+
it('rejects weights outside 0-1 range', () => {
|
|
1895
|
+
const invalid = {
|
|
1896
|
+
featureVersion: '2.0',
|
|
1897
|
+
weights: { relevance: 1.5 },
|
|
1898
|
+
ahpWeights: { relevance: 0.5 },
|
|
1899
|
+
criticWeights: { relevance: 0.5 },
|
|
1900
|
+
alpha: 0.5,
|
|
1901
|
+
consistencyRatio: 0,
|
|
1902
|
+
incidentCorrelations: { relevance: 0 },
|
|
1903
|
+
};
|
|
1904
|
+
const result = tunedCQIWeightsSchema.safeParse(invalid);
|
|
1905
|
+
assert.ok(!result.success);
|
|
1906
|
+
});
|
|
1907
|
+
});
|
|
1908
|
+
// --- computeAHPWeights ---
|
|
1909
|
+
describe('computeAHPWeights', () => {
|
|
1910
|
+
const AHP_WEIGHT_TOLERANCE = 1e-6;
|
|
1911
|
+
it('weights sum to 1.0 for two metrics with equal preference', () => {
|
|
1912
|
+
const comparisons = [
|
|
1913
|
+
{ metricA: 'relevance', metricB: 'coherence', preference: 1, preferred: 'A' },
|
|
1914
|
+
];
|
|
1915
|
+
const result = computeAHPWeights(comparisons, ['relevance', 'coherence']);
|
|
1916
|
+
const sum = Object.values(result.weights).reduce((acc, v) => acc + v, 0);
|
|
1917
|
+
assert.ok(Math.abs(sum - 1.0) < AHP_WEIGHT_TOLERANCE, `weights sum ${sum} should be 1.0`);
|
|
1918
|
+
});
|
|
1919
|
+
it('returns equal weights when all comparisons have preference=1', () => {
|
|
1920
|
+
const comparisons = [
|
|
1921
|
+
{ metricA: 'relevance', metricB: 'coherence', preference: 1, preferred: 'A' },
|
|
1922
|
+
{ metricA: 'relevance', metricB: 'completeness', preference: 1, preferred: 'A' },
|
|
1923
|
+
{ metricA: 'coherence', metricB: 'completeness', preference: 1, preferred: 'A' },
|
|
1924
|
+
];
|
|
1925
|
+
const result = computeAHPWeights(comparisons, ['relevance', 'coherence', 'completeness']);
|
|
1926
|
+
const expectedWeight = 1 / 3;
|
|
1927
|
+
for (const metric of ['relevance', 'coherence', 'completeness']) {
|
|
1928
|
+
assert.ok(Math.abs((result.weights[metric] ?? 0) - expectedWeight) < AHP_WEIGHT_TOLERANCE, `${metric} weight ${result.weights[metric]} should be ~${expectedWeight}`);
|
|
1929
|
+
}
|
|
1930
|
+
});
|
|
1931
|
+
it('strongly preferred metric gets highest weight', () => {
|
|
1932
|
+
const comparisons = [
|
|
1933
|
+
{ metricA: 'relevance', metricB: 'coherence', preference: 9, preferred: 'A' },
|
|
1934
|
+
{ metricA: 'relevance', metricB: 'completeness', preference: 9, preferred: 'A' },
|
|
1935
|
+
{ metricA: 'coherence', metricB: 'completeness', preference: 1, preferred: 'A' },
|
|
1936
|
+
];
|
|
1937
|
+
const result = computeAHPWeights(comparisons, ['relevance', 'coherence', 'completeness']);
|
|
1938
|
+
const relevanceWeight = result.weights['relevance'] ?? 0;
|
|
1939
|
+
const coherenceWeight = result.weights['coherence'] ?? 0;
|
|
1940
|
+
const completenessWeight = result.weights['completeness'] ?? 0;
|
|
1941
|
+
assert.ok(relevanceWeight > coherenceWeight, 'relevance should outweigh coherence');
|
|
1942
|
+
assert.ok(relevanceWeight > completenessWeight, 'relevance should outweigh completeness');
|
|
1943
|
+
});
|
|
1944
|
+
it('consistent comparisons produce CR < 0.1', () => {
|
|
1945
|
+
// Perfectly consistent: A>B by 3, A>C by 9, B>C by 3 (3*3=9)
|
|
1946
|
+
const comparisons = [
|
|
1947
|
+
{ metricA: 'relevance', metricB: 'coherence', preference: 3, preferred: 'A' },
|
|
1948
|
+
{ metricA: 'relevance', metricB: 'completeness', preference: 9, preferred: 'A' },
|
|
1949
|
+
{ metricA: 'coherence', metricB: 'completeness', preference: 3, preferred: 'A' },
|
|
1950
|
+
];
|
|
1951
|
+
const result = computeAHPWeights(comparisons, ['relevance', 'coherence', 'completeness']);
|
|
1952
|
+
assert.ok(result.consistencyRatio < 0.1, `CR ${result.consistencyRatio} should be < 0.1`);
|
|
1953
|
+
});
|
|
1954
|
+
it('inconsistent comparisons produce CR > 0.1', () => {
|
|
1955
|
+
// Contradictory: A>>B, B>>C, but C>>A (cycle)
|
|
1956
|
+
const comparisons = [
|
|
1957
|
+
{ metricA: 'relevance', metricB: 'coherence', preference: 9, preferred: 'A' },
|
|
1958
|
+
{ metricA: 'coherence', metricB: 'completeness', preference: 9, preferred: 'A' },
|
|
1959
|
+
{ metricA: 'completeness', metricB: 'relevance', preference: 9, preferred: 'A' },
|
|
1960
|
+
];
|
|
1961
|
+
const result = computeAHPWeights(comparisons, ['relevance', 'coherence', 'completeness']);
|
|
1962
|
+
assert.ok(result.consistencyRatio > 0.1, `CR ${result.consistencyRatio} should be > 0.1`);
|
|
1963
|
+
});
|
|
1964
|
+
it('empty comparisons return equal weights', () => {
|
|
1965
|
+
const result = computeAHPWeights([], ['relevance', 'coherence']);
|
|
1966
|
+
const sum = Object.values(result.weights).reduce((acc, v) => acc + v, 0);
|
|
1967
|
+
assert.ok(Math.abs(sum - 1.0) < AHP_WEIGHT_TOLERANCE, `weights sum ${sum} should be 1.0`);
|
|
1968
|
+
assert.ok(Math.abs((result.weights['relevance'] ?? 0) - 0.5) < AHP_WEIGHT_TOLERANCE, 'relevance weight should be 0.5 with empty comparisons');
|
|
1969
|
+
assert.ok(Math.abs((result.weights['coherence'] ?? 0) - 0.5) < AHP_WEIGHT_TOLERANCE, 'coherence weight should be 0.5 with empty comparisons');
|
|
1970
|
+
});
|
|
1971
|
+
it('weights sum to 1.0 for four metrics', () => {
|
|
1972
|
+
const metrics = ['relevance', 'coherence', 'completeness', 'groundedness'];
|
|
1973
|
+
const comparisons = [
|
|
1974
|
+
{ metricA: 'relevance', metricB: 'coherence', preference: 3, preferred: 'A' },
|
|
1975
|
+
{ metricA: 'relevance', metricB: 'completeness', preference: 5, preferred: 'A' },
|
|
1976
|
+
{ metricA: 'relevance', metricB: 'groundedness', preference: 7, preferred: 'A' },
|
|
1977
|
+
{ metricA: 'coherence', metricB: 'completeness', preference: 3, preferred: 'A' },
|
|
1978
|
+
{ metricA: 'coherence', metricB: 'groundedness', preference: 5, preferred: 'A' },
|
|
1979
|
+
{ metricA: 'completeness', metricB: 'groundedness', preference: 3, preferred: 'A' },
|
|
1980
|
+
];
|
|
1981
|
+
const result = computeAHPWeights(comparisons, metrics);
|
|
1982
|
+
const sum = Object.values(result.weights).reduce((acc, v) => acc + v, 0);
|
|
1983
|
+
assert.ok(Math.abs(sum - 1.0) < AHP_WEIGHT_TOLERANCE, `weights sum ${sum} should be 1.0`);
|
|
1984
|
+
});
|
|
1985
|
+
it('geometric means computation is stable (pipe refactoring)', () => {
|
|
1986
|
+
// Test with standard AHP scale preferences (1, 3, 5, 7, 9)
|
|
1987
|
+
const comparisons = [
|
|
1988
|
+
{ metricA: 'relevance', metricB: 'coherence', preference: 3, preferred: 'A' },
|
|
1989
|
+
{ metricA: 'relevance', metricB: 'completeness', preference: 5, preferred: 'A' },
|
|
1990
|
+
{ metricA: 'coherence', metricB: 'completeness', preference: 3, preferred: 'A' },
|
|
1991
|
+
];
|
|
1992
|
+
const result = computeAHPWeights(comparisons, ['relevance', 'coherence', 'completeness']);
|
|
1993
|
+
const relevanceWeight = result.weights['relevance'] ?? 0;
|
|
1994
|
+
const coherenceWeight = result.weights['coherence'] ?? 0;
|
|
1995
|
+
const completenessWeight = result.weights['completeness'] ?? 0;
|
|
1996
|
+
// Verify weights maintain expected ordering from preferences
|
|
1997
|
+
assert.ok(relevanceWeight > coherenceWeight, 'relevance should outweigh coherence');
|
|
1998
|
+
assert.ok(coherenceWeight > completenessWeight, 'coherence should outweigh completeness');
|
|
1999
|
+
assert.ok(relevanceWeight > completenessWeight, 'relevance should outweigh completeness');
|
|
2000
|
+
});
|
|
2001
|
+
it('handles minimal preference values in geometric mean', () => {
|
|
2002
|
+
const comparisons = [
|
|
2003
|
+
{ metricA: 'relevance', metricB: 'coherence', preference: 1, preferred: 'A' },
|
|
2004
|
+
{ metricA: 'relevance', metricB: 'completeness', preference: 1, preferred: 'A' },
|
|
2005
|
+
{ metricA: 'coherence', metricB: 'completeness', preference: 1, preferred: 'A' },
|
|
2006
|
+
];
|
|
2007
|
+
const result = computeAHPWeights(comparisons, ['relevance', 'coherence', 'completeness']);
|
|
2008
|
+
const sum = Object.values(result.weights).reduce((acc, v) => acc + v, 0);
|
|
2009
|
+
assert.ok(Math.abs(sum - 1.0) < AHP_WEIGHT_TOLERANCE, `equal preferences should produce sum=1.0, got ${sum}`);
|
|
2010
|
+
const expectedWeight = 1 / 3;
|
|
2011
|
+
for (const metric of ['relevance', 'coherence', 'completeness']) {
|
|
2012
|
+
assert.ok(Math.abs((result.weights[metric] ?? 0) - expectedWeight) < AHP_WEIGHT_TOLERANCE, `metric ${metric} should have equal weight ~${expectedWeight}, got ${result.weights[metric]}`);
|
|
2013
|
+
}
|
|
2014
|
+
});
|
|
2015
|
+
it('weights remain normalized across multiple invocations (consistency)', () => {
|
|
2016
|
+
const comparisons = [
|
|
2017
|
+
{ metricA: 'relevance', metricB: 'coherence', preference: 3, preferred: 'A' },
|
|
2018
|
+
{ metricA: 'relevance', metricB: 'completeness', preference: 5, preferred: 'A' },
|
|
2019
|
+
{ metricA: 'coherence', metricB: 'completeness', preference: 3, preferred: 'A' },
|
|
2020
|
+
];
|
|
2021
|
+
// Call multiple times to ensure consistency
|
|
2022
|
+
const result1 = computeAHPWeights(comparisons, ['relevance', 'coherence', 'completeness']);
|
|
2023
|
+
const result2 = computeAHPWeights(comparisons, ['relevance', 'coherence', 'completeness']);
|
|
2024
|
+
for (const metric of ['relevance', 'coherence', 'completeness']) {
|
|
2025
|
+
assert.strictEqual(result1.weights[metric], result2.weights[metric], `weight for ${metric} should be identical across calls`);
|
|
2026
|
+
}
|
|
2027
|
+
// Verify final sum is correct
|
|
2028
|
+
const sum1 = Object.values(result1.weights).reduce((acc, v) => acc + v, 0);
|
|
2029
|
+
const sum2 = Object.values(result2.weights).reduce((acc, v) => acc + v, 0);
|
|
2030
|
+
assert.ok(Math.abs(sum1 - 1.0) < AHP_WEIGHT_TOLERANCE, `result1 should sum to 1.0`);
|
|
2031
|
+
assert.ok(Math.abs(sum2 - 1.0) < AHP_WEIGHT_TOLERANCE, `result2 should sum to 1.0`);
|
|
2032
|
+
});
|
|
2033
|
+
});
|
|
2034
|
+
// --- computeCRITICWeights ---
|
|
2035
|
+
describe('computeCRITICWeights', () => {
|
|
2036
|
+
const CRITIC_WEIGHT_TOLERANCE = 1e-6;
|
|
2037
|
+
it('weights sum to 1.0', () => {
|
|
2038
|
+
const history = {
|
|
2039
|
+
relevance: [0.7, 0.8, 0.6, 0.9, 0.5],
|
|
2040
|
+
coherence: [0.6, 0.7, 0.8, 0.5, 0.9],
|
|
2041
|
+
};
|
|
2042
|
+
const result = computeCRITICWeights(history);
|
|
2043
|
+
const sum = Object.values(result.weights).reduce((acc, v) => acc + v, 0);
|
|
2044
|
+
assert.ok(Math.abs(sum - 1.0) < CRITIC_WEIGHT_TOLERANCE, `weights sum ${sum} should be 1.0`);
|
|
2045
|
+
});
|
|
2046
|
+
it('single metric returns weight of 1.0', () => {
|
|
2047
|
+
const history = {
|
|
2048
|
+
relevance: [0.7, 0.8, 0.6, 0.9, 0.5],
|
|
2049
|
+
};
|
|
2050
|
+
const result = computeCRITICWeights(history);
|
|
2051
|
+
assert.ok(Math.abs((result.weights['relevance'] ?? 0) - 1.0) < CRITIC_WEIGHT_TOLERANCE, `single metric weight should be 1.0, got ${result.weights['relevance']}`);
|
|
2052
|
+
});
|
|
2053
|
+
it('uniform data produces equal weights', () => {
|
|
2054
|
+
const history = {
|
|
2055
|
+
relevance: [0.8, 0.8, 0.8, 0.8, 0.8],
|
|
2056
|
+
coherence: [0.7, 0.7, 0.7, 0.7, 0.7],
|
|
2057
|
+
};
|
|
2058
|
+
const result = computeCRITICWeights(history);
|
|
2059
|
+
const sum = Object.values(result.weights).reduce((acc, v) => acc + v, 0);
|
|
2060
|
+
assert.ok(Math.abs(sum - 1.0) < CRITIC_WEIGHT_TOLERANCE, `weights sum ${sum} should be 1.0`);
|
|
2061
|
+
});
|
|
2062
|
+
it('high-variance uncorrelated metric gets highest weight', () => {
|
|
2063
|
+
// relevance has high variance, completeness is near-constant
|
|
2064
|
+
const history = {
|
|
2065
|
+
relevance: [0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.1, 0.9],
|
|
2066
|
+
completeness: [0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6],
|
|
2067
|
+
};
|
|
2068
|
+
const result = computeCRITICWeights(history);
|
|
2069
|
+
assert.ok((result.weights['relevance'] ?? 0) > (result.weights['completeness'] ?? 0), `relevance weight ${result.weights['relevance']} should exceed completeness ${result.weights['completeness']}`);
|
|
2070
|
+
});
|
|
2071
|
+
it('perfectly correlated metrics produce lower weight than uncorrelated metrics', () => {
|
|
2072
|
+
// metricA and metricB are identical (perfectly correlated)
|
|
2073
|
+
// metricC is uncorrelated with same variance
|
|
2074
|
+
const base = [0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.15, 0.85];
|
|
2075
|
+
const uncorrelated = [0.8, 0.2, 0.9, 0.1, 0.7, 0.3, 0.85, 0.15];
|
|
2076
|
+
const history = {
|
|
2077
|
+
metricA: base,
|
|
2078
|
+
metricB: [...base],
|
|
2079
|
+
metricC: uncorrelated,
|
|
2080
|
+
};
|
|
2081
|
+
const result = computeCRITICWeights(history);
|
|
2082
|
+
const weightC = result.weights['metricC'] ?? 0;
|
|
2083
|
+
const weightA = result.weights['metricA'] ?? 0;
|
|
2084
|
+
assert.ok(weightC > weightA, `uncorrelated metricC weight ${weightC} should exceed correlated metricA weight ${weightA}`);
|
|
2085
|
+
});
|
|
2086
|
+
it('informationContent is present for each metric', () => {
|
|
2087
|
+
const history = {
|
|
2088
|
+
relevance: [0.7, 0.8, 0.6, 0.9, 0.5],
|
|
2089
|
+
coherence: [0.6, 0.7, 0.8, 0.5, 0.9],
|
|
2090
|
+
};
|
|
2091
|
+
const result = computeCRITICWeights(history);
|
|
2092
|
+
assert.ok('relevance' in result.informationContent, 'informationContent should contain relevance');
|
|
2093
|
+
assert.ok('coherence' in result.informationContent, 'informationContent should contain coherence');
|
|
2094
|
+
assert.strictEqual(typeof result.informationContent['relevance'], 'number');
|
|
2095
|
+
});
|
|
2096
|
+
});
|
|
2097
|
+
// --- computeHybridCQIWeights ---
|
|
2098
|
+
describe('computeHybridCQIWeights', () => {
|
|
2099
|
+
const HYBRID_WEIGHT_TOLERANCE = 1e-6;
|
|
2100
|
+
const metrics = ['relevance', 'coherence'];
|
|
2101
|
+
const ahpWeights = { relevance: 0.7, coherence: 0.3 };
|
|
2102
|
+
const criticWeights = { relevance: 0.4, coherence: 0.6 };
|
|
2103
|
+
it('featureVersion is "2.0"', () => {
|
|
2104
|
+
const result = computeHybridCQIWeights(ahpWeights, criticWeights, [], metrics);
|
|
2105
|
+
assert.strictEqual(result.featureVersion, '2.0');
|
|
2106
|
+
});
|
|
2107
|
+
it('alpha=1.0 produces weights equal to AHP weights', () => {
|
|
2108
|
+
const result = computeHybridCQIWeights(ahpWeights, criticWeights, [], metrics, 1.0);
|
|
2109
|
+
assert.ok(Math.abs((result.weights['relevance'] ?? 0) - (ahpWeights['relevance'] ?? 0)) < HYBRID_WEIGHT_TOLERANCE, `relevance weight should match AHP: ${result.weights['relevance']} vs ${ahpWeights['relevance']}`);
|
|
2110
|
+
assert.ok(Math.abs((result.weights['coherence'] ?? 0) - (ahpWeights['coherence'] ?? 0)) < HYBRID_WEIGHT_TOLERANCE, `coherence weight should match AHP: ${result.weights['coherence']} vs ${ahpWeights['coherence']}`);
|
|
2111
|
+
});
|
|
2112
|
+
it('alpha=0.0 produces weights equal to CRITIC weights', () => {
|
|
2113
|
+
const result = computeHybridCQIWeights(ahpWeights, criticWeights, [], metrics, 0.0);
|
|
2114
|
+
assert.ok(Math.abs((result.weights['relevance'] ?? 0) - (criticWeights['relevance'] ?? 0)) < HYBRID_WEIGHT_TOLERANCE, `relevance weight should match CRITIC: ${result.weights['relevance']} vs ${criticWeights['relevance']}`);
|
|
2115
|
+
assert.ok(Math.abs((result.weights['coherence'] ?? 0) - (criticWeights['coherence'] ?? 0)) < HYBRID_WEIGHT_TOLERANCE, `coherence weight should match CRITIC: ${result.weights['coherence']} vs ${criticWeights['coherence']}`);
|
|
2116
|
+
});
|
|
2117
|
+
it('alpha=0.5 produces midpoint blend of AHP and CRITIC weights', () => {
|
|
2118
|
+
const result = computeHybridCQIWeights(ahpWeights, criticWeights, [], metrics, 0.5);
|
|
2119
|
+
const expectedRelevance = 0.5 * (ahpWeights['relevance'] ?? 0) + 0.5 * (criticWeights['relevance'] ?? 0);
|
|
2120
|
+
const expectedCoherence = 0.5 * (ahpWeights['coherence'] ?? 0) + 0.5 * (criticWeights['coherence'] ?? 0);
|
|
2121
|
+
assert.ok(Math.abs((result.weights['relevance'] ?? 0) - expectedRelevance) < HYBRID_WEIGHT_TOLERANCE, `relevance ${result.weights['relevance']} should be midpoint ${expectedRelevance}`);
|
|
2122
|
+
assert.ok(Math.abs((result.weights['coherence'] ?? 0) - expectedCoherence) < HYBRID_WEIGHT_TOLERANCE, `coherence ${result.weights['coherence']} should be midpoint ${expectedCoherence}`);
|
|
2123
|
+
});
|
|
2124
|
+
it('default alpha is 0.5', () => {
|
|
2125
|
+
const withDefault = computeHybridCQIWeights(ahpWeights, criticWeights, [], metrics);
|
|
2126
|
+
const withExplicit = computeHybridCQIWeights(ahpWeights, criticWeights, [], metrics, 0.5);
|
|
2127
|
+
assert.deepStrictEqual(withDefault.weights, withExplicit.weights);
|
|
2128
|
+
});
|
|
2129
|
+
it('weights sum to 1.0', () => {
|
|
2130
|
+
const result = computeHybridCQIWeights(ahpWeights, criticWeights, [], metrics, 0.5);
|
|
2131
|
+
const sum = Object.values(result.weights).reduce((acc, v) => acc + v, 0);
|
|
2132
|
+
assert.ok(Math.abs(sum - 1.0) < HYBRID_WEIGHT_TOLERANCE, `weights sum ${sum} should be 1.0`);
|
|
2133
|
+
});
|
|
2134
|
+
it('empty incidents produce incidentCorrelations of 0 for each metric', () => {
|
|
2135
|
+
const result = computeHybridCQIWeights(ahpWeights, criticWeights, [], metrics);
|
|
2136
|
+
assert.ok('relevance' in result.incidentCorrelations, 'incidentCorrelations should include relevance');
|
|
2137
|
+
assert.ok('coherence' in result.incidentCorrelations, 'incidentCorrelations should include coherence');
|
|
2138
|
+
assert.strictEqual(result.incidentCorrelations['relevance'], 0);
|
|
2139
|
+
assert.strictEqual(result.incidentCorrelations['coherence'], 0);
|
|
2140
|
+
});
|
|
2141
|
+
it('metric that drops before incidents gets higher correlation than stable metric', () => {
|
|
2142
|
+
const incidents = [
|
|
2143
|
+
{
|
|
2144
|
+
timestamp: '2026-03-10T00:00:00.000Z',
|
|
2145
|
+
metricScores: { relevance: 0.3, coherence: 0.75 },
|
|
2146
|
+
cqiValue: 0.4,
|
|
2147
|
+
severity: 'major',
|
|
2148
|
+
},
|
|
2149
|
+
{
|
|
2150
|
+
timestamp: '2026-03-15T00:00:00.000Z',
|
|
2151
|
+
metricScores: { relevance: 0.25, coherence: 0.78 },
|
|
2152
|
+
cqiValue: 0.35,
|
|
2153
|
+
severity: 'critical',
|
|
2154
|
+
},
|
|
2155
|
+
];
|
|
2156
|
+
const result = computeHybridCQIWeights(ahpWeights, criticWeights, incidents, metrics);
|
|
2157
|
+
const relevanceCorr = result.incidentCorrelations['relevance'] ?? 0;
|
|
2158
|
+
const coherenceCorr = result.incidentCorrelations['coherence'] ?? 0;
|
|
2159
|
+
assert.ok(relevanceCorr > coherenceCorr, `relevance correlation ${relevanceCorr} should exceed coherence ${coherenceCorr} as relevance drops at incidents`);
|
|
2160
|
+
});
|
|
2161
|
+
it('returns ahpWeights and criticWeights fields on result', () => {
|
|
2162
|
+
const result = computeHybridCQIWeights(ahpWeights, criticWeights, [], metrics);
|
|
2163
|
+
assert.deepStrictEqual(result.ahpWeights, ahpWeights);
|
|
2164
|
+
assert.deepStrictEqual(result.criticWeights, criticWeights);
|
|
2165
|
+
});
|
|
2166
|
+
it('consistencyRatio field is present on result', () => {
|
|
2167
|
+
const result = computeHybridCQIWeights(ahpWeights, criticWeights, [], metrics);
|
|
2168
|
+
assert.strictEqual(typeof result.consistencyRatio, 'number');
|
|
2169
|
+
});
|
|
2170
|
+
it('alpha field on result reflects the alpha used', () => {
|
|
2171
|
+
const result = computeHybridCQIWeights(ahpWeights, criticWeights, [], metrics, 0.75);
|
|
2172
|
+
assert.strictEqual(result.alpha, 0.75);
|
|
2173
|
+
});
|
|
2174
|
+
});
|
|
2175
|
+
// --- computeTaPR ---
|
|
2176
|
+
describe('computeTaPR', () => {
|
|
2177
|
+
// Time constants (ms)
|
|
2178
|
+
const T0 = 1_000_000;
|
|
2179
|
+
const T1 = T0 + 1_000;
|
|
2180
|
+
const T2 = T0 + 2_000;
|
|
2181
|
+
const T3 = T0 + 3_000;
|
|
2182
|
+
const T4 = T0 + 4_000;
|
|
2183
|
+
const T5 = T0 + 5_000;
|
|
2184
|
+
const T6 = T0 + 6_000;
|
|
2185
|
+
const T7 = T0 + 7_000;
|
|
2186
|
+
const T8 = T0 + 8_000;
|
|
2187
|
+
const TAPR_TOLERANCE = 1e-9;
|
|
2188
|
+
it('perfect detection: single detected range fully covering single incident → precision=1, recall=1, f1=1, detectionDelay=0', () => {
|
|
2189
|
+
const detected = [{ start: T0, end: T4 }];
|
|
2190
|
+
const incidents = [{ start: T0, end: T4 }];
|
|
2191
|
+
const result = computeTaPR(detected, incidents);
|
|
2192
|
+
assert.ok(Math.abs(result.precision - 1) < TAPR_TOLERANCE, `precision should be 1, got ${result.precision}`);
|
|
2193
|
+
assert.ok(Math.abs(result.recall - 1) < TAPR_TOLERANCE, `recall should be 1, got ${result.recall}`);
|
|
2194
|
+
assert.ok(Math.abs(result.f1 - 1) < TAPR_TOLERANCE, `f1 should be 1, got ${result.f1}`);
|
|
2195
|
+
assert.ok(Math.abs(result.detectionDelay - 0) < TAPR_TOLERANCE, `detectionDelay should be 0, got ${result.detectionDelay}`);
|
|
2196
|
+
});
|
|
2197
|
+
it('no detections: recall=0, precision=0, f1=0', () => {
|
|
2198
|
+
const detected = [];
|
|
2199
|
+
const incidents = [{ start: T0, end: T4 }];
|
|
2200
|
+
const result = computeTaPR(detected, incidents);
|
|
2201
|
+
assert.strictEqual(result.recall, 0, 'recall should be 0 with no detections');
|
|
2202
|
+
assert.strictEqual(result.precision, 0, 'precision should be 0 with no detections');
|
|
2203
|
+
assert.strictEqual(result.f1, 0, 'f1 should be 0 with no detections');
|
|
2204
|
+
});
|
|
2205
|
+
it('no incidents but detections: precision=0, recall=0, f1=0', () => {
|
|
2206
|
+
const detected = [{ start: T0, end: T2 }];
|
|
2207
|
+
const incidents = [];
|
|
2208
|
+
const result = computeTaPR(detected, incidents);
|
|
2209
|
+
assert.strictEqual(result.precision, 0, 'precision should be 0 when no incidents exist');
|
|
2210
|
+
assert.strictEqual(result.recall, 0, 'recall should be 0 when no incidents exist');
|
|
2211
|
+
assert.strictEqual(result.f1, 0, 'f1 should be 0 when no incidents exist');
|
|
2212
|
+
});
|
|
2213
|
+
it('partial overlap: precision and recall are strictly between 0 and 1', () => {
|
|
2214
|
+
// Incident spans T0–T4 (4s), detection spans T2–T6 (4s), overlap T2–T4 (2s)
|
|
2215
|
+
// Precision overlap ratio = 2s / 4s = 0.5
|
|
2216
|
+
// Recall overlap ratio = 2s / 4s = 0.5
|
|
2217
|
+
const detected = [{ start: T2, end: T6 }];
|
|
2218
|
+
const incidents = [{ start: T0, end: T4 }];
|
|
2219
|
+
const result = computeTaPR(detected, incidents);
|
|
2220
|
+
assert.ok(result.precision > 0 && result.precision < 1, `precision ${result.precision} should be in (0,1)`);
|
|
2221
|
+
assert.ok(result.recall > 0 && result.recall < 1, `recall ${result.recall} should be in (0,1)`);
|
|
2222
|
+
assert.ok(result.f1 > 0 && result.f1 < 1, `f1 ${result.f1} should be in (0,1)`);
|
|
2223
|
+
});
|
|
2224
|
+
it('partial overlap: precision = overlap / detected duration', () => {
|
|
2225
|
+
// Detection T2–T6 (4s), incident T0–T4 (4s), overlap T2–T4 = 2s
|
|
2226
|
+
const DETECTED_DURATION_MS = T6 - T2; // 4000
|
|
2227
|
+
const OVERLAP_MS = T4 - T2; // 2000
|
|
2228
|
+
const EXPECTED_PRECISION = OVERLAP_MS / DETECTED_DURATION_MS; // 0.5
|
|
2229
|
+
const detected = [{ start: T2, end: T6 }];
|
|
2230
|
+
const incidents = [{ start: T0, end: T4 }];
|
|
2231
|
+
const result = computeTaPR(detected, incidents);
|
|
2232
|
+
assert.ok(Math.abs(result.precision - EXPECTED_PRECISION) < TAPR_TOLERANCE, `precision should be ${EXPECTED_PRECISION}, got ${result.precision}`);
|
|
2233
|
+
});
|
|
2234
|
+
it('partial overlap: recall = overlap / incident duration', () => {
|
|
2235
|
+
// Detection T2–T6 (4s), incident T0–T4 (4s), overlap T2–T4 = 2s
|
|
2236
|
+
const INCIDENT_DURATION_MS = T4 - T0; // 4000
|
|
2237
|
+
const OVERLAP_MS = T4 - T2; // 2000
|
|
2238
|
+
const EXPECTED_RECALL = OVERLAP_MS / INCIDENT_DURATION_MS; // 0.5
|
|
2239
|
+
const detected = [{ start: T2, end: T6 }];
|
|
2240
|
+
const incidents = [{ start: T0, end: T4 }];
|
|
2241
|
+
const result = computeTaPR(detected, incidents);
|
|
2242
|
+
assert.ok(Math.abs(result.recall - EXPECTED_RECALL) < TAPR_TOLERANCE, `recall should be ${EXPECTED_RECALL}, got ${result.recall}`);
|
|
2243
|
+
});
|
|
2244
|
+
it('detection delay: late detection (after incident start) equals gap between incident start and detection start', () => {
|
|
2245
|
+
// Detection starts T2, incident starts T0 → delay = T2 - T0 = 2000ms
|
|
2246
|
+
const EXPECTED_DELAY_MS = T2 - T0; // 2000
|
|
2247
|
+
const detected = [{ start: T2, end: T6 }];
|
|
2248
|
+
const incidents = [{ start: T0, end: T4 }];
|
|
2249
|
+
const result = computeTaPR(detected, incidents);
|
|
2250
|
+
assert.ok(Math.abs(result.detectionDelay - EXPECTED_DELAY_MS) < TAPR_TOLERANCE, `detectionDelay should be ${EXPECTED_DELAY_MS}ms (late detection), got ${result.detectionDelay}`);
|
|
2251
|
+
});
|
|
2252
|
+
it('detection delay: early detection (before incident start) yields detectionDelay=0 (no negative delay)', () => {
|
|
2253
|
+
// Detection starts T0, incident starts T2 — detection was early.
|
|
2254
|
+
// Implementation clamps via Math.max(0, ...), so exact 0 expected.
|
|
2255
|
+
const detected = [{ start: T0, end: T4 }];
|
|
2256
|
+
const incidents = [{ start: T2, end: T6 }];
|
|
2257
|
+
const result = computeTaPR(detected, incidents);
|
|
2258
|
+
assert.strictEqual(result.detectionDelay, 0, 'early detection should clamp detectionDelay to 0, not produce a negative value');
|
|
2259
|
+
});
|
|
2260
|
+
it('f1 is harmonic mean of precision and recall', () => {
|
|
2261
|
+
const detected = [{ start: T2, end: T6 }];
|
|
2262
|
+
const incidents = [{ start: T0, end: T4 }];
|
|
2263
|
+
const result = computeTaPR(detected, incidents);
|
|
2264
|
+
if (result.precision + result.recall > 0) {
|
|
2265
|
+
const expectedF1 = 2 * result.precision * result.recall / (result.precision + result.recall);
|
|
2266
|
+
assert.ok(Math.abs(result.f1 - expectedF1) < TAPR_TOLERANCE, `f1 ${result.f1} should equal harmonic mean ${expectedF1}`);
|
|
2267
|
+
}
|
|
2268
|
+
});
|
|
2269
|
+
it('multiple incidents: incident with no overlapping detection is excluded from delay average', () => {
|
|
2270
|
+
// Two incidents: T0–T2 detected by T0–T2 (delay=0), T5–T7 has no detection
|
|
2271
|
+
// detectionDelay should only reflect the incident that was detected
|
|
2272
|
+
const detected = [{ start: T0, end: T2 }];
|
|
2273
|
+
const incidents = [
|
|
2274
|
+
{ start: T0, end: T2 }, // detected
|
|
2275
|
+
{ start: T5, end: T7 }, // missed
|
|
2276
|
+
];
|
|
2277
|
+
const result = computeTaPR(detected, incidents);
|
|
2278
|
+
assert.ok(result.recall < 1, `recall ${result.recall} should be < 1 because second incident is missed`);
|
|
2279
|
+
assert.ok(Math.abs(result.detectionDelay - 0) < TAPR_TOLERANCE, `detectionDelay should be 0 (only detected incident counted), got ${result.detectionDelay}`);
|
|
2280
|
+
});
|
|
2281
|
+
it('multiple incidents with partial coverage: recall < 1', () => {
|
|
2282
|
+
// Three incidents; only first two are partially detected
|
|
2283
|
+
const detected = [
|
|
2284
|
+
{ start: T0, end: T2 }, // overlaps incident 1 (T0–T3) partially
|
|
2285
|
+
{ start: T3, end: T5 }, // overlaps incident 2 (T3–T6) partially
|
|
2286
|
+
];
|
|
2287
|
+
const incidents = [
|
|
2288
|
+
{ start: T0, end: T3 }, // partially detected
|
|
2289
|
+
{ start: T3, end: T6 }, // partially detected
|
|
2290
|
+
{ start: T6, end: T8 }, // fully missed
|
|
2291
|
+
];
|
|
2292
|
+
const result = computeTaPR(detected, incidents);
|
|
2293
|
+
assert.ok(result.recall < 1, `recall ${result.recall} should be < 1 with partially-covered incidents`);
|
|
2294
|
+
assert.ok(result.recall > 0, `recall ${result.recall} should be > 0 with some overlapping detections`);
|
|
2295
|
+
});
|
|
2296
|
+
it('empty inputs: precision=0, recall=0, f1=0', () => {
|
|
2297
|
+
const result = computeTaPR([], []);
|
|
2298
|
+
assert.strictEqual(result.precision, 0);
|
|
2299
|
+
assert.strictEqual(result.recall, 0);
|
|
2300
|
+
assert.strictEqual(result.f1, 0);
|
|
2301
|
+
});
|
|
2302
|
+
it('detection that only partially enters incident boundary: detection delay reflects first overlap point', () => {
|
|
2303
|
+
// Detection T1–T5, incident T0–T2
|
|
2304
|
+
// Detection starts after incident; delay = T1 - T0 = 1000ms
|
|
2305
|
+
const EXPECTED_DELAY_MS = T1 - T0; // 1000
|
|
2306
|
+
const detected = [{ start: T1, end: T5 }];
|
|
2307
|
+
const incidents = [{ start: T0, end: T2 }];
|
|
2308
|
+
const result = computeTaPR(detected, incidents);
|
|
2309
|
+
assert.ok(Math.abs(result.detectionDelay - EXPECTED_DELAY_MS) < TAPR_TOLERANCE, `detectionDelay should be ${EXPECTED_DELAY_MS}ms, got ${result.detectionDelay}`);
|
|
2310
|
+
});
|
|
2311
|
+
it('detection starting exactly at incident start: detectionDelay=0', () => {
|
|
2312
|
+
const detected = [{ start: T0, end: T4 }];
|
|
2313
|
+
const incidents = [{ start: T0, end: T4 }];
|
|
2314
|
+
const result = computeTaPR(detected, incidents);
|
|
2315
|
+
assert.strictEqual(result.detectionDelay, 0, 'detectionDelay should be 0 when detection starts at incident start');
|
|
2316
|
+
});
|
|
2317
|
+
it('detection half-inside incident boundary: precision reflects only overlapping portion', () => {
|
|
2318
|
+
// Detection T0–T4 (4s), incident T2–T6 (4s), overlap T2–T4 = 2s
|
|
2319
|
+
const DETECTED_DURATION_MS = T4 - T0; // 4000
|
|
2320
|
+
const OVERLAP_MS = T4 - T2; // 2000
|
|
2321
|
+
const EXPECTED_PRECISION = OVERLAP_MS / DETECTED_DURATION_MS; // 0.5
|
|
2322
|
+
const detected = [{ start: T0, end: T4 }];
|
|
2323
|
+
const incidents = [{ start: T2, end: T6 }];
|
|
2324
|
+
const result = computeTaPR(detected, incidents);
|
|
2325
|
+
assert.ok(Math.abs(result.precision - EXPECTED_PRECISION) < TAPR_TOLERANCE, `precision should be ${EXPECTED_PRECISION}, got ${result.precision}`);
|
|
2326
|
+
});
|
|
2327
|
+
});
|
|
2328
|
+
// --- backtestDegradationConfig ---
|
|
2329
|
+
describe('backtestDegradationConfig', () => {
|
|
2330
|
+
// Timestamps: 1-second intervals starting at a fixed epoch
|
|
2331
|
+
const BASE_TIME_MS = 1_700_000_000_000;
|
|
2332
|
+
const STEP_MS = 1_000;
|
|
2333
|
+
// Config constants
|
|
2334
|
+
const PRODUCTION_VARIANCE_THRESHOLD = 2.0;
|
|
2335
|
+
const PRODUCTION_COVERAGE_THRESHOLD = 0.3;
|
|
2336
|
+
const PRODUCTION_LATENCY_SKEW_THRESHOLD = 3.0;
|
|
2337
|
+
const PRODUCTION_CONFIRMATION_WINDOW = 2;
|
|
2338
|
+
const PRODUCTION_EWMA_LAMBDA = 0.1;
|
|
2339
|
+
const PRODUCTION_STABILITY_THRESHOLD = 2.0;
|
|
2340
|
+
const SENSITIVE_VARIANCE_THRESHOLD = 1.05;
|
|
2341
|
+
const SENSITIVE_COVERAGE_THRESHOLD = 0.01;
|
|
2342
|
+
const SENSITIVE_LATENCY_SKEW_THRESHOLD = 1.1;
|
|
2343
|
+
const SENSITIVE_CONFIRMATION_WINDOW = 1;
|
|
2344
|
+
const CONSERVATIVE_VARIANCE_THRESHOLD = 10.0;
|
|
2345
|
+
const CONSERVATIVE_COVERAGE_THRESHOLD = 0.95;
|
|
2346
|
+
const CONSERVATIVE_LATENCY_SKEW_THRESHOLD = 20.0;
|
|
2347
|
+
const CONSERVATIVE_CONFIRMATION_WINDOW = 5;
|
|
2348
|
+
// Signal values
|
|
2349
|
+
const NORMAL_STD_DEV = 0.05;
|
|
2350
|
+
const DEGRADED_STD_DEV = 0.50;
|
|
2351
|
+
const BASELINE_STD_DEV = 0.10;
|
|
2352
|
+
const NORMAL_COVERAGE_GAPS = 0;
|
|
2353
|
+
const DEGRADED_COVERAGE_GAPS = 50;
|
|
2354
|
+
const TOTAL_COVERAGE_CELLS = 100;
|
|
2355
|
+
const NORMAL_LATENCY_P95 = 0.2;
|
|
2356
|
+
const NORMAL_LATENCY_P50 = 0.1;
|
|
2357
|
+
const DEGRADED_LATENCY_P95 = 0.9;
|
|
2358
|
+
// Series lengths
|
|
2359
|
+
const SERIES_LENGTH_TWENTY = 20;
|
|
2360
|
+
const SERIES_LENGTH_THIRTY = 30;
|
|
2361
|
+
const DEGRADED_START_IDX = 8;
|
|
2362
|
+
const DEGRADED_END_IDX = 16; // exclusive
|
|
2363
|
+
const FLOAT_TOLERANCE = 1e-9;
|
|
2364
|
+
function isoAt(offsetMs) {
|
|
2365
|
+
return new Date(BASE_TIME_MS + offsetMs).toISOString();
|
|
2366
|
+
}
|
|
2367
|
+
function makePoint(degraded, ts) {
|
|
2368
|
+
return {
|
|
2369
|
+
timestamp: ts,
|
|
2370
|
+
currentStdDev: degraded ? DEGRADED_STD_DEV : NORMAL_STD_DEV,
|
|
2371
|
+
baselineStdDev: BASELINE_STD_DEV,
|
|
2372
|
+
coverageGapCount: degraded ? DEGRADED_COVERAGE_GAPS : NORMAL_COVERAGE_GAPS,
|
|
2373
|
+
totalCoverageCells: TOTAL_COVERAGE_CELLS,
|
|
2374
|
+
latencyP95: degraded ? DEGRADED_LATENCY_P95 : NORMAL_LATENCY_P95,
|
|
2375
|
+
latencyP50: NORMAL_LATENCY_P50,
|
|
2376
|
+
historicalValues: degraded
|
|
2377
|
+
? [0.8, 0.85, 0.9, 0.88, 0.91]
|
|
2378
|
+
: [0.5, 0.5, 0.51, 0.49, 0.5],
|
|
2379
|
+
};
|
|
2380
|
+
}
|
|
2381
|
+
const productionConfig = {
|
|
2382
|
+
varianceThreshold: PRODUCTION_VARIANCE_THRESHOLD,
|
|
2383
|
+
coverageDropoutThreshold: PRODUCTION_COVERAGE_THRESHOLD,
|
|
2384
|
+
latencySkewThreshold: PRODUCTION_LATENCY_SKEW_THRESHOLD,
|
|
2385
|
+
confirmationWindow: PRODUCTION_CONFIRMATION_WINDOW,
|
|
2386
|
+
ewmaLambda: PRODUCTION_EWMA_LAMBDA,
|
|
2387
|
+
stabilityThreshold: PRODUCTION_STABILITY_THRESHOLD,
|
|
2388
|
+
};
|
|
2389
|
+
const sensitiveConfig = {
|
|
2390
|
+
varianceThreshold: SENSITIVE_VARIANCE_THRESHOLD,
|
|
2391
|
+
coverageDropoutThreshold: SENSITIVE_COVERAGE_THRESHOLD,
|
|
2392
|
+
latencySkewThreshold: SENSITIVE_LATENCY_SKEW_THRESHOLD,
|
|
2393
|
+
confirmationWindow: SENSITIVE_CONFIRMATION_WINDOW,
|
|
2394
|
+
ewmaLambda: PRODUCTION_EWMA_LAMBDA,
|
|
2395
|
+
stabilityThreshold: PRODUCTION_STABILITY_THRESHOLD,
|
|
2396
|
+
};
|
|
2397
|
+
const conservativeConfig = {
|
|
2398
|
+
varianceThreshold: CONSERVATIVE_VARIANCE_THRESHOLD,
|
|
2399
|
+
coverageDropoutThreshold: CONSERVATIVE_COVERAGE_THRESHOLD,
|
|
2400
|
+
latencySkewThreshold: CONSERVATIVE_LATENCY_SKEW_THRESHOLD,
|
|
2401
|
+
confirmationWindow: CONSERVATIVE_CONFIRMATION_WINDOW,
|
|
2402
|
+
ewmaLambda: PRODUCTION_EWMA_LAMBDA,
|
|
2403
|
+
stabilityThreshold: PRODUCTION_STABILITY_THRESHOLD,
|
|
2404
|
+
};
|
|
2405
|
+
it('production thresholds with known degradation period: detects true positives', () => {
|
|
2406
|
+
const timeSeries = Array.from({ length: SERIES_LENGTH_TWENTY }, (_, i) => makePoint(i >= DEGRADED_START_IDX && i < DEGRADED_END_IDX, BASE_TIME_MS + i * STEP_MS));
|
|
2407
|
+
const incidents = [{
|
|
2408
|
+
startTime: isoAt(DEGRADED_START_IDX * STEP_MS),
|
|
2409
|
+
endTime: isoAt((DEGRADED_END_IDX - 1) * STEP_MS),
|
|
2410
|
+
severity: 'major',
|
|
2411
|
+
}];
|
|
2412
|
+
const result = backtestDegradationConfig(productionConfig, timeSeries, incidents);
|
|
2413
|
+
assert.ok(result.truePositives > 0, `expected truePositives > 0, got ${result.truePositives}`);
|
|
2414
|
+
assert.strictEqual(result.config, productionConfig);
|
|
2415
|
+
});
|
|
2416
|
+
it('sensitive config yields high recall (low thresholds trigger on normal variance)', () => {
|
|
2417
|
+
const timeSeries = Array.from({ length: SERIES_LENGTH_TWENTY }, (_, i) => makePoint(i >= DEGRADED_START_IDX && i < DEGRADED_END_IDX, BASE_TIME_MS + i * STEP_MS));
|
|
2418
|
+
const incidents = [{
|
|
2419
|
+
startTime: isoAt(DEGRADED_START_IDX * STEP_MS),
|
|
2420
|
+
endTime: isoAt((DEGRADED_END_IDX - 1) * STEP_MS),
|
|
2421
|
+
severity: 'minor',
|
|
2422
|
+
}];
|
|
2423
|
+
const sensitiveResult = backtestDegradationConfig(sensitiveConfig, timeSeries, incidents);
|
|
2424
|
+
const productionResult = backtestDegradationConfig(productionConfig, timeSeries, incidents);
|
|
2425
|
+
assert.ok(sensitiveResult.pointRecall >= productionResult.pointRecall, `sensitive recall ${sensitiveResult.pointRecall} should be >= production recall ${productionResult.pointRecall}`);
|
|
2426
|
+
assert.ok(sensitiveResult.falsePositives >= productionResult.falsePositives, `sensitive FPs ${sensitiveResult.falsePositives} should be >= production FPs ${productionResult.falsePositives}`);
|
|
2427
|
+
});
|
|
2428
|
+
it('conservative config yields high precision but low recall (high thresholds miss incidents)', () => {
|
|
2429
|
+
const timeSeries = Array.from({ length: SERIES_LENGTH_TWENTY }, (_, i) => makePoint(i >= DEGRADED_START_IDX && i < DEGRADED_END_IDX, BASE_TIME_MS + i * STEP_MS));
|
|
2430
|
+
const incidents = [{
|
|
2431
|
+
startTime: isoAt(DEGRADED_START_IDX * STEP_MS),
|
|
2432
|
+
endTime: isoAt((DEGRADED_END_IDX - 1) * STEP_MS),
|
|
2433
|
+
severity: 'critical',
|
|
2434
|
+
}];
|
|
2435
|
+
const result = backtestDegradationConfig(conservativeConfig, timeSeries, incidents);
|
|
2436
|
+
assert.ok(result.falseNegatives > 0, `expected falseNegatives > 0 with conservative config, got ${result.falseNegatives}`);
|
|
2437
|
+
});
|
|
2438
|
+
it('no incidents: all detections are false positives, pointRecall is 0', () => {
|
|
2439
|
+
// Degraded signal throughout — but zero labeled incidents
|
|
2440
|
+
const timeSeries = Array.from({ length: SERIES_LENGTH_TWENTY }, (_, i) => makePoint(true, BASE_TIME_MS + i * STEP_MS));
|
|
2441
|
+
const incidents = [];
|
|
2442
|
+
const result = backtestDegradationConfig(sensitiveConfig, timeSeries, incidents);
|
|
2443
|
+
assert.strictEqual(result.pointRecall, 0, `pointRecall should be 0 with no incidents, got ${result.pointRecall}`);
|
|
2444
|
+
assert.strictEqual(result.falseNegatives, 0, `falseNegatives should be 0 with no incidents, got ${result.falseNegatives}`);
|
|
2445
|
+
});
|
|
2446
|
+
it('all timestamps degraded and fully covered by incidents: high TP, pointPrecision near 1', () => {
|
|
2447
|
+
const timeSeries = Array.from({ length: SERIES_LENGTH_TWENTY }, (_, i) => makePoint(true, BASE_TIME_MS + i * STEP_MS));
|
|
2448
|
+
const incidents = [{
|
|
2449
|
+
startTime: isoAt(0),
|
|
2450
|
+
endTime: isoAt((SERIES_LENGTH_TWENTY - 1) * STEP_MS),
|
|
2451
|
+
severity: 'critical',
|
|
2452
|
+
}];
|
|
2453
|
+
const result = backtestDegradationConfig(sensitiveConfig, timeSeries, incidents);
|
|
2454
|
+
assert.ok(result.truePositives > 0, `truePositives should be > 0, got ${result.truePositives}`);
|
|
2455
|
+
assert.ok(result.pointPrecision > 0.5, `pointPrecision should be near 1 when all detections are incident-covered, got ${result.pointPrecision}`);
|
|
2456
|
+
});
|
|
2457
|
+
it('empty time series returns all-zero counts and metrics', () => {
|
|
2458
|
+
const result = backtestDegradationConfig(productionConfig, [], []);
|
|
2459
|
+
assert.strictEqual(result.truePositives, 0);
|
|
2460
|
+
assert.strictEqual(result.falsePositives, 0);
|
|
2461
|
+
assert.strictEqual(result.falseNegatives, 0);
|
|
2462
|
+
assert.strictEqual(result.trueNegatives, 0);
|
|
2463
|
+
assert.ok(Math.abs(result.pointPrecision - 0) < FLOAT_TOLERANCE, `pointPrecision should be 0, got ${result.pointPrecision}`);
|
|
2464
|
+
assert.ok(Math.abs(result.pointRecall - 0) < FLOAT_TOLERANCE, `pointRecall should be 0, got ${result.pointRecall}`);
|
|
2465
|
+
assert.ok(Math.abs(result.pointF1 - 0) < FLOAT_TOLERANCE, `pointF1 should be 0, got ${result.pointF1}`);
|
|
2466
|
+
assert.strictEqual(result.tapr.precision, 0);
|
|
2467
|
+
assert.strictEqual(result.tapr.recall, 0);
|
|
2468
|
+
assert.strictEqual(result.tapr.f1, 0);
|
|
2469
|
+
});
|
|
2470
|
+
it('result config field references the same config object passed in', () => {
|
|
2471
|
+
const result = backtestDegradationConfig(productionConfig, [], []);
|
|
2472
|
+
assert.strictEqual(result.config, productionConfig);
|
|
2473
|
+
});
|
|
2474
|
+
it('pointF1 is harmonic mean of pointPrecision and pointRecall when both > 0', () => {
|
|
2475
|
+
const timeSeries = Array.from({ length: SERIES_LENGTH_THIRTY }, (_, i) => makePoint(i >= DEGRADED_START_IDX && i < DEGRADED_END_IDX, BASE_TIME_MS + i * STEP_MS));
|
|
2476
|
+
const incidents = [{
|
|
2477
|
+
startTime: isoAt(DEGRADED_START_IDX * STEP_MS),
|
|
2478
|
+
endTime: isoAt((DEGRADED_END_IDX - 1) * STEP_MS),
|
|
2479
|
+
severity: 'major',
|
|
2480
|
+
}];
|
|
2481
|
+
const result = backtestDegradationConfig(productionConfig, timeSeries, incidents);
|
|
2482
|
+
if (result.pointPrecision + result.pointRecall > 0) {
|
|
2483
|
+
const expectedF1 = 2 * result.pointPrecision * result.pointRecall
|
|
2484
|
+
/ (result.pointPrecision + result.pointRecall);
|
|
2485
|
+
assert.ok(Math.abs(result.pointF1 - expectedF1) < FLOAT_TOLERANCE, `pointF1 ${result.pointF1} should equal harmonic mean ${expectedF1}`);
|
|
2486
|
+
}
|
|
2487
|
+
});
|
|
2488
|
+
});
|
|
2489
|
+
// --- sweepDegradationParams ---
|
|
2490
|
+
describe('sweepDegradationParams', () => {
|
|
2491
|
+
// Shared time series: 8 points — small to keep sweep fast once implemented
|
|
2492
|
+
const SWEEP_BASE_TIME_MS = 1_700_000_000_000;
|
|
2493
|
+
const SWEEP_STEP_MS = 1_000;
|
|
2494
|
+
const SWEEP_SERIES_LENGTH = 8;
|
|
2495
|
+
const SWEEP_DEGRADED_START_IDX = 3;
|
|
2496
|
+
const SWEEP_DEGRADED_END_IDX = 7; // exclusive
|
|
2497
|
+
const SWEEP_NORMAL_STD_DEV = 0.05;
|
|
2498
|
+
const SWEEP_DEGRADED_STD_DEV = 0.50;
|
|
2499
|
+
const SWEEP_BASELINE_STD_DEV = 0.10;
|
|
2500
|
+
const SWEEP_NORMAL_COVERAGE_GAPS = 0;
|
|
2501
|
+
const SWEEP_DEGRADED_COVERAGE_GAPS = 50;
|
|
2502
|
+
const SWEEP_TOTAL_COVERAGE_CELLS = 100;
|
|
2503
|
+
const SWEEP_NORMAL_LATENCY_P95 = 0.2;
|
|
2504
|
+
const SWEEP_NORMAL_LATENCY_P50 = 0.1;
|
|
2505
|
+
const SWEEP_DEGRADED_LATENCY_P95 = 0.9;
|
|
2506
|
+
const EXPECTED_SWEEP_TOTAL = BACKTEST_SWEEP.varianceThreshold.length *
|
|
2507
|
+
BACKTEST_SWEEP.coverageDropoutThreshold.length *
|
|
2508
|
+
BACKTEST_SWEEP.ewmaLambda.length *
|
|
2509
|
+
BACKTEST_SWEEP.confirmationWindow.length *
|
|
2510
|
+
BACKTEST_SWEEP.stabilityThreshold.length;
|
|
2511
|
+
const FIXED_LATENCY_SKEW_THRESHOLD = LATENCY_SKEW_SIGNAL_THRESHOLD;
|
|
2512
|
+
function makeSweepPoint(degraded, ts) {
|
|
2513
|
+
return {
|
|
2514
|
+
timestamp: ts,
|
|
2515
|
+
currentStdDev: degraded ? SWEEP_DEGRADED_STD_DEV : SWEEP_NORMAL_STD_DEV,
|
|
2516
|
+
baselineStdDev: SWEEP_BASELINE_STD_DEV,
|
|
2517
|
+
coverageGapCount: degraded ? SWEEP_DEGRADED_COVERAGE_GAPS : SWEEP_NORMAL_COVERAGE_GAPS,
|
|
2518
|
+
totalCoverageCells: SWEEP_TOTAL_COVERAGE_CELLS,
|
|
2519
|
+
latencyP95: degraded ? SWEEP_DEGRADED_LATENCY_P95 : SWEEP_NORMAL_LATENCY_P95,
|
|
2520
|
+
latencyP50: SWEEP_NORMAL_LATENCY_P50,
|
|
2521
|
+
historicalValues: degraded
|
|
2522
|
+
? [0.8, 0.85, 0.9, 0.88, 0.91]
|
|
2523
|
+
: [0.5, 0.5, 0.51, 0.49, 0.5],
|
|
2524
|
+
};
|
|
2525
|
+
}
|
|
2526
|
+
function isoSweepAt(offsetMs) {
|
|
2527
|
+
return new Date(SWEEP_BASE_TIME_MS + offsetMs).toISOString();
|
|
2528
|
+
}
|
|
2529
|
+
const sweepTimeSeries = Array.from({ length: SWEEP_SERIES_LENGTH }, (_, i) => makeSweepPoint(i >= SWEEP_DEGRADED_START_IDX && i < SWEEP_DEGRADED_END_IDX, SWEEP_BASE_TIME_MS + i * SWEEP_STEP_MS));
|
|
2530
|
+
const sweepIncidents = [{
|
|
2531
|
+
startTime: isoSweepAt(SWEEP_DEGRADED_START_IDX * SWEEP_STEP_MS),
|
|
2532
|
+
endTime: isoSweepAt((SWEEP_DEGRADED_END_IDX - 1) * SWEEP_STEP_MS),
|
|
2533
|
+
severity: 'major',
|
|
2534
|
+
}];
|
|
2535
|
+
it('results.length equals total grid size (2500)', () => {
|
|
2536
|
+
const sweep = sweepDegradationParams(sweepTimeSeries, sweepIncidents);
|
|
2537
|
+
assert.strictEqual(sweep.results.length, EXPECTED_SWEEP_TOTAL);
|
|
2538
|
+
});
|
|
2539
|
+
it('currentConfigResult is populated and matches the production config values', () => {
|
|
2540
|
+
const sweep = sweepDegradationParams(sweepTimeSeries, sweepIncidents);
|
|
2541
|
+
assert.strictEqual(sweep.currentConfigResult.config.varianceThreshold, CURRENT_PRODUCTION_CONFIG.varianceThreshold);
|
|
2542
|
+
assert.strictEqual(sweep.currentConfigResult.config.coverageDropoutThreshold, CURRENT_PRODUCTION_CONFIG.coverageDropoutThreshold);
|
|
2543
|
+
assert.strictEqual(sweep.currentConfigResult.config.ewmaLambda, CURRENT_PRODUCTION_CONFIG.ewmaLambda);
|
|
2544
|
+
assert.strictEqual(sweep.currentConfigResult.config.confirmationWindow, CURRENT_PRODUCTION_CONFIG.confirmationWindow);
|
|
2545
|
+
});
|
|
2546
|
+
it('bestByF1 has tapr.f1 >= currentConfigResult.tapr.f1', () => {
|
|
2547
|
+
const sweep = sweepDegradationParams(sweepTimeSeries, sweepIncidents);
|
|
2548
|
+
assert.ok(sweep.bestByF1.tapr.f1 >= sweep.currentConfigResult.tapr.f1, `bestByF1.tapr.f1 (${sweep.bestByF1.tapr.f1}) should be >= currentConfigResult.tapr.f1 (${sweep.currentConfigResult.tapr.f1})`);
|
|
2549
|
+
});
|
|
2550
|
+
it('bestByRecall has tapr.recall >= bestByF1.tapr.recall', () => {
|
|
2551
|
+
const sweep = sweepDegradationParams(sweepTimeSeries, sweepIncidents);
|
|
2552
|
+
assert.ok(sweep.bestByRecall.tapr.recall >= sweep.bestByF1.tapr.recall, `bestByRecall.tapr.recall (${sweep.bestByRecall.tapr.recall}) should be >= bestByF1.tapr.recall (${sweep.bestByF1.tapr.recall})`);
|
|
2553
|
+
});
|
|
2554
|
+
it('all results have valid config objects with fields from the sweep grid', () => {
|
|
2555
|
+
const sweep = sweepDegradationParams(sweepTimeSeries, sweepIncidents);
|
|
2556
|
+
for (const result of sweep.results) {
|
|
2557
|
+
assert.ok(BACKTEST_SWEEP.varianceThreshold.includes(result.config.varianceThreshold), `varianceThreshold ${result.config.varianceThreshold} not in sweep grid`);
|
|
2558
|
+
assert.ok(BACKTEST_SWEEP.coverageDropoutThreshold.includes(result.config.coverageDropoutThreshold), `coverageDropoutThreshold ${result.config.coverageDropoutThreshold} not in sweep grid`);
|
|
2559
|
+
assert.ok(BACKTEST_SWEEP.confirmationWindow.includes(result.config.confirmationWindow), `confirmationWindow ${result.config.confirmationWindow} not in sweep grid`);
|
|
2560
|
+
}
|
|
2561
|
+
});
|
|
2562
|
+
it('latencySkewThreshold is fixed at 3 for all configs (not swept)', () => {
|
|
2563
|
+
const sweep = sweepDegradationParams(sweepTimeSeries, sweepIncidents);
|
|
2564
|
+
for (const result of sweep.results) {
|
|
2565
|
+
assert.strictEqual(result.config.latencySkewThreshold, FIXED_LATENCY_SKEW_THRESHOLD, `latencySkewThreshold should be ${FIXED_LATENCY_SKEW_THRESHOLD}, got ${result.config.latencySkewThreshold}`);
|
|
2566
|
+
}
|
|
2567
|
+
});
|
|
2568
|
+
it('empty time series produces results where all metrics are zero', () => {
|
|
2569
|
+
const sweep = sweepDegradationParams([], []);
|
|
2570
|
+
for (const result of sweep.results) {
|
|
2571
|
+
assert.strictEqual(result.tapr.f1, 0, 'tapr.f1 should be 0 for empty series');
|
|
2572
|
+
assert.strictEqual(result.tapr.precision, 0, 'tapr.precision should be 0 for empty series');
|
|
2573
|
+
assert.strictEqual(result.tapr.recall, 0, 'tapr.recall should be 0 for empty series');
|
|
2574
|
+
}
|
|
2575
|
+
});
|
|
2576
|
+
it('currentConfigResult is included in sweep results array', () => {
|
|
2577
|
+
const sweep = sweepDegradationParams(sweepTimeSeries, sweepIncidents);
|
|
2578
|
+
const found = sweep.results.some(r => r.config.varianceThreshold === sweep.currentConfigResult.config.varianceThreshold &&
|
|
2579
|
+
r.config.coverageDropoutThreshold === sweep.currentConfigResult.config.coverageDropoutThreshold &&
|
|
2580
|
+
r.config.ewmaLambda === sweep.currentConfigResult.config.ewmaLambda &&
|
|
2581
|
+
r.config.confirmationWindow === sweep.currentConfigResult.config.confirmationWindow);
|
|
2582
|
+
assert.ok(found, 'currentConfigResult config should appear in sweep results');
|
|
2583
|
+
});
|
|
2584
|
+
});
|
|
2585
|
+
// --- sweepWithCrossValidation (FU-FE-R5-LOO) ---
|
|
2586
|
+
describe('sweepWithCrossValidation', () => {
|
|
2587
|
+
const LOO_BASE_MS = 1_700_000_000_000;
|
|
2588
|
+
const LOO_STEP_MS = 1_000;
|
|
2589
|
+
const LOO_SERIES_LENGTH = 10;
|
|
2590
|
+
const LOO_DEGRADED_START = 4;
|
|
2591
|
+
const LOO_DEGRADED_END = 9; // exclusive
|
|
2592
|
+
const LOO_NORMAL_STD_DEV = 0.05;
|
|
2593
|
+
const LOO_DEGRADED_STD_DEV = 0.50;
|
|
2594
|
+
const LOO_BASELINE_STD_DEV = 0.10;
|
|
2595
|
+
const LOO_COVERAGE_CELLS = 100;
|
|
2596
|
+
function makeLOOPoint(degraded, ts) {
|
|
2597
|
+
return {
|
|
2598
|
+
timestamp: ts,
|
|
2599
|
+
currentStdDev: degraded ? LOO_DEGRADED_STD_DEV : LOO_NORMAL_STD_DEV,
|
|
2600
|
+
baselineStdDev: LOO_BASELINE_STD_DEV,
|
|
2601
|
+
coverageGapCount: degraded ? 50 : 0,
|
|
2602
|
+
totalCoverageCells: LOO_COVERAGE_CELLS,
|
|
2603
|
+
latencyP95: degraded ? 0.9 : 0.2,
|
|
2604
|
+
latencyP50: 0.1,
|
|
2605
|
+
historicalValues: degraded ? [0.8, 0.85, 0.9] : [0.5, 0.5, 0.51],
|
|
2606
|
+
};
|
|
2607
|
+
}
|
|
2608
|
+
function isoLOOAt(offsetMs) {
|
|
2609
|
+
return new Date(LOO_BASE_MS + offsetMs).toISOString();
|
|
2610
|
+
}
|
|
2611
|
+
const looTimeSeries = Array.from({ length: LOO_SERIES_LENGTH }, (_, i) => makeLOOPoint(i >= LOO_DEGRADED_START && i < LOO_DEGRADED_END, LOO_BASE_MS + i * LOO_STEP_MS));
|
|
2612
|
+
const looIncidents = [
|
|
2613
|
+
{
|
|
2614
|
+
startTime: isoLOOAt(LOO_DEGRADED_START * LOO_STEP_MS),
|
|
2615
|
+
endTime: isoLOOAt((LOO_DEGRADED_END - 1) * LOO_STEP_MS),
|
|
2616
|
+
severity: 'major',
|
|
2617
|
+
},
|
|
2618
|
+
{
|
|
2619
|
+
startTime: isoLOOAt((LOO_DEGRADED_START + 1) * LOO_STEP_MS),
|
|
2620
|
+
endTime: isoLOOAt((LOO_DEGRADED_END - 2) * LOO_STEP_MS),
|
|
2621
|
+
severity: 'minor',
|
|
2622
|
+
},
|
|
2623
|
+
];
|
|
2624
|
+
it('returns fullSweep and folds with correct counts', () => {
|
|
2625
|
+
const result = sweepWithCrossValidation(looTimeSeries, looIncidents);
|
|
2626
|
+
assert.ok(result.fullSweep, 'fullSweep should be present');
|
|
2627
|
+
assert.strictEqual(result.folds.length, looIncidents.length, `folds.length should equal incident count (${looIncidents.length})`);
|
|
2628
|
+
});
|
|
2629
|
+
it('each fold holds out exactly one incident', () => {
|
|
2630
|
+
const result = sweepWithCrossValidation(looTimeSeries, looIncidents);
|
|
2631
|
+
for (let i = 0; i < looIncidents.length; i++) {
|
|
2632
|
+
assert.strictEqual(result.folds[i].heldOutIncident.startTime, looIncidents[i].startTime, `fold ${i} should hold out incident ${i}`);
|
|
2633
|
+
}
|
|
2634
|
+
});
|
|
2635
|
+
it('fullSweep matches standalone sweepDegradationParams on same inputs', () => {
|
|
2636
|
+
const result = sweepWithCrossValidation(looTimeSeries, looIncidents);
|
|
2637
|
+
const standalone = sweepDegradationParams(looTimeSeries, looIncidents);
|
|
2638
|
+
assert.strictEqual(result.fullSweep.results.length, standalone.results.length, 'fullSweep result count should match standalone sweep');
|
|
2639
|
+
assert.strictEqual(result.fullSweep.bestByF1.tapr.f1, standalone.bestByF1.tapr.f1, 'fullSweep bestByF1.tapr.f1 should match standalone sweep');
|
|
2640
|
+
});
|
|
2641
|
+
it('parameterStability fields are non-negative numbers', () => {
|
|
2642
|
+
const result = sweepWithCrossValidation(looTimeSeries, looIncidents);
|
|
2643
|
+
const ps = result.parameterStability;
|
|
2644
|
+
assert.ok(ps.varianceThresholdStdDev >= 0, 'varianceThresholdStdDev should be non-negative');
|
|
2645
|
+
assert.ok(ps.coverageDropoutThresholdStdDev >= 0, 'coverageDropoutThresholdStdDev should be non-negative');
|
|
2646
|
+
assert.ok(ps.confirmationWindowStdDev >= 0, 'confirmationWindowStdDev should be non-negative');
|
|
2647
|
+
assert.ok(isFinite(ps.varianceThresholdStdDev), 'varianceThresholdStdDev should be finite');
|
|
2648
|
+
assert.ok(isFinite(ps.coverageDropoutThresholdStdDev), 'coverageDropoutThresholdStdDev should be finite');
|
|
2649
|
+
assert.ok(isFinite(ps.confirmationWindowStdDev), 'confirmationWindowStdDev should be finite');
|
|
2650
|
+
});
|
|
2651
|
+
it('isStable is boolean', () => {
|
|
2652
|
+
const result = sweepWithCrossValidation(looTimeSeries, looIncidents);
|
|
2653
|
+
assert.ok(typeof result.isStable === 'boolean', 'isStable should be a boolean');
|
|
2654
|
+
});
|
|
2655
|
+
it('zero incidents: returns empty folds, parameterStability all 0, isStable vacuously true', () => {
|
|
2656
|
+
const result = sweepWithCrossValidation(looTimeSeries, []);
|
|
2657
|
+
assert.strictEqual(result.folds.length, 0, 'folds should be empty with no incidents');
|
|
2658
|
+
// computeStdDev([]) returns null, nullish coalescing to 0
|
|
2659
|
+
assert.strictEqual(result.parameterStability.varianceThresholdStdDev, 0);
|
|
2660
|
+
assert.strictEqual(result.parameterStability.coverageDropoutThresholdStdDev, 0);
|
|
2661
|
+
assert.strictEqual(result.parameterStability.confirmationWindowStdDev, 0);
|
|
2662
|
+
// folds.every() on empty array is vacuously true
|
|
2663
|
+
assert.strictEqual(result.isStable, true, 'isStable is vacuously true when there are no folds');
|
|
2664
|
+
});
|
|
2665
|
+
it('single incident: one fold trained on empty incidents, parameterStability all 0', () => {
|
|
2666
|
+
const singleIncident = [looIncidents[0]];
|
|
2667
|
+
const result = sweepWithCrossValidation(looTimeSeries, singleIncident);
|
|
2668
|
+
assert.strictEqual(result.folds.length, 1, 'should have one fold for one incident');
|
|
2669
|
+
assert.ok(result.folds[0].sweepResult.bestByF1, 'fold sweepResult should have bestByF1 when trained on empty incidents');
|
|
2670
|
+
// computeStdDev of a single value returns null → 0
|
|
2671
|
+
assert.strictEqual(result.parameterStability.varianceThresholdStdDev, 0, 'single fold => std dev is 0');
|
|
2672
|
+
assert.strictEqual(result.parameterStability.coverageDropoutThresholdStdDev, 0, 'single fold => std dev is 0');
|
|
2673
|
+
assert.strictEqual(result.parameterStability.confirmationWindowStdDev, 0, 'single fold => std dev is 0');
|
|
2674
|
+
});
|
|
2675
|
+
});
|
|
2676
|
+
// --- computeTaPR stress tests (ST-FE-R5-TAPR) ---
|
|
2677
|
+
describe('computeTaPR stress', () => {
|
|
2678
|
+
const BASE = 1_700_000_000_000;
|
|
2679
|
+
const HOUR_MS = 3_600_000;
|
|
2680
|
+
const TOLERANCE = 1e-9;
|
|
2681
|
+
it('precision and recall are always in [0,1] for 100 random-offset detected/incident pairs', () => {
|
|
2682
|
+
for (let i = 0; i < 100; i++) {
|
|
2683
|
+
const offset = i * HOUR_MS;
|
|
2684
|
+
const detected = [{ start: BASE + offset, end: BASE + offset + HOUR_MS }];
|
|
2685
|
+
const incidents = [{ start: BASE + offset + HOUR_MS / 4, end: BASE + offset + HOUR_MS * 3 / 4 }];
|
|
2686
|
+
const r = computeTaPR(detected, incidents);
|
|
2687
|
+
assert.ok(r.precision >= 0 && r.precision <= 1, `precision ${r.precision} out of [0,1] at i=${i}`);
|
|
2688
|
+
assert.ok(r.recall >= 0 && r.recall <= 1, `recall ${r.recall} out of [0,1] at i=${i}`);
|
|
2689
|
+
assert.ok(r.f1 >= 0 && r.f1 <= 1, `f1 ${r.f1} out of [0,1] at i=${i}`);
|
|
2690
|
+
assert.ok(r.detectionDelay >= 0, `detectionDelay ${r.detectionDelay} is negative at i=${i}`);
|
|
2691
|
+
}
|
|
2692
|
+
});
|
|
2693
|
+
it('overlapping incident windows: two overlapping incidents produce finite precision/recall/f1', () => {
|
|
2694
|
+
// Two incidents that overlap in time
|
|
2695
|
+
const detected = [{ start: BASE, end: BASE + 2 * HOUR_MS }];
|
|
2696
|
+
const incidents = [
|
|
2697
|
+
{ start: BASE, end: BASE + HOUR_MS },
|
|
2698
|
+
{ start: BASE + HOUR_MS / 2, end: BASE + HOUR_MS + HOUR_MS / 2 },
|
|
2699
|
+
];
|
|
2700
|
+
const r = computeTaPR(detected, incidents);
|
|
2701
|
+
assert.ok(isFinite(r.precision), `precision should be finite, got ${r.precision}`);
|
|
2702
|
+
assert.ok(isFinite(r.recall), `recall should be finite, got ${r.recall}`);
|
|
2703
|
+
assert.ok(isFinite(r.f1), `f1 should be finite, got ${r.f1}`);
|
|
2704
|
+
assert.ok(r.precision >= 0 && r.precision <= 1, `precision ${r.precision} out of bounds`);
|
|
2705
|
+
assert.ok(r.recall >= 0 && r.recall <= 1, `recall ${r.recall} out of bounds`);
|
|
2706
|
+
});
|
|
2707
|
+
it('zero-duration detected ranges are skipped: does not produce NaN or divide-by-zero', () => {
|
|
2708
|
+
// Mix of valid and zero-duration detected ranges
|
|
2709
|
+
const detected = [
|
|
2710
|
+
{ start: BASE, end: BASE }, // zero-duration — should be skipped
|
|
2711
|
+
{ start: BASE, end: BASE + HOUR_MS }, // valid
|
|
2712
|
+
];
|
|
2713
|
+
const incidents = [{ start: BASE, end: BASE + HOUR_MS }];
|
|
2714
|
+
const r = computeTaPR(detected, incidents);
|
|
2715
|
+
assert.ok(isFinite(r.precision), `precision should be finite, got ${r.precision}`);
|
|
2716
|
+
assert.ok(isFinite(r.recall), `recall should be finite, got ${r.recall}`);
|
|
2717
|
+
});
|
|
2718
|
+
it('incident with identical start/end (zero-duration): recall skips zero-duration incidents', () => {
|
|
2719
|
+
const detected = [{ start: BASE, end: BASE + HOUR_MS }];
|
|
2720
|
+
const incidents = [
|
|
2721
|
+
{ start: BASE + 1000, end: BASE + 1000 }, // zero-duration incident
|
|
2722
|
+
{ start: BASE, end: BASE + HOUR_MS }, // valid
|
|
2723
|
+
];
|
|
2724
|
+
const r = computeTaPR(detected, incidents);
|
|
2725
|
+
assert.ok(isFinite(r.recall), `recall should be finite with zero-duration incident`);
|
|
2726
|
+
assert.ok(r.recall >= 0 && r.recall <= 1, `recall ${r.recall} out of bounds`);
|
|
2727
|
+
});
|
|
2728
|
+
it('1000 non-overlapping detected ranges with 1 incident: precision stays in [0,1]', () => {
|
|
2729
|
+
const SEGMENT_MS = 1_000;
|
|
2730
|
+
const detectedRanges = Array.from({ length: 1_000 }, (_, i) => ({
|
|
2731
|
+
start: BASE + i * 2 * SEGMENT_MS,
|
|
2732
|
+
end: BASE + i * 2 * SEGMENT_MS + SEGMENT_MS,
|
|
2733
|
+
}));
|
|
2734
|
+
// Single incident covering only the first detected range
|
|
2735
|
+
const incidents = [{ start: BASE, end: BASE + SEGMENT_MS }];
|
|
2736
|
+
const r = computeTaPR(detectedRanges, incidents);
|
|
2737
|
+
assert.ok(r.precision >= 0 && r.precision <= 1, `precision ${r.precision} out of [0,1] with 1000 ranges`);
|
|
2738
|
+
assert.ok(r.recall >= 0 && r.recall <= 1, `recall ${r.recall} out of [0,1] with 1000 ranges`);
|
|
2739
|
+
assert.ok(isFinite(r.f1), `f1 should be finite with 1000 detected ranges`);
|
|
2740
|
+
});
|
|
2741
|
+
it('detection delay is non-negative when detection starts after incident', () => {
|
|
2742
|
+
const DELAY_MS = 5_000;
|
|
2743
|
+
const detected = [{ start: BASE + DELAY_MS, end: BASE + 2 * HOUR_MS }];
|
|
2744
|
+
const incidents = [{ start: BASE, end: BASE + HOUR_MS }];
|
|
2745
|
+
const r = computeTaPR(detected, incidents);
|
|
2746
|
+
assert.ok(r.detectionDelay >= 0, `detectionDelay ${r.detectionDelay} should be non-negative`);
|
|
2747
|
+
assert.ok(Math.abs(r.detectionDelay - DELAY_MS) < TOLERANCE, `detectionDelay should be ~${DELAY_MS}ms, got ${r.detectionDelay}`);
|
|
2748
|
+
});
|
|
2749
|
+
});
|
|
2750
|
+
// --- sweepDegradationParams stress tests (ST-FE-R5-SWEEP) ---
|
|
2751
|
+
describe('sweepDegradationParams stress', () => {
|
|
2752
|
+
const SWEEP_STRESS_BASE_MS = 1_700_000_000_000;
|
|
2753
|
+
const HOUR_STEP_MS = 3_600_000;
|
|
2754
|
+
const THIRTY_DAY_HOURS = 720;
|
|
2755
|
+
const SWEEP_STRESS_TIMEOUT_MS = 30_000;
|
|
2756
|
+
const INCIDENT_START_HOUR = 360;
|
|
2757
|
+
const INCIDENT_END_HOUR = 380;
|
|
2758
|
+
// Minimal point values for healthy/degraded signals
|
|
2759
|
+
const HEALTHY_STD = 0.05;
|
|
2760
|
+
const DEGRADED_STD = 0.50;
|
|
2761
|
+
const BASELINE_STD = 0.10;
|
|
2762
|
+
const NORMAL_GAPS = 0;
|
|
2763
|
+
const DEGRADED_GAPS = 50;
|
|
2764
|
+
const TOTAL_CELLS = 100;
|
|
2765
|
+
const NORMAL_P95 = 0.2;
|
|
2766
|
+
const NORMAL_P50 = 0.1;
|
|
2767
|
+
const DEGRADED_P95 = 0.9;
|
|
2768
|
+
function makeStressPoint(degraded, ts) {
|
|
2769
|
+
return {
|
|
2770
|
+
timestamp: ts,
|
|
2771
|
+
currentStdDev: degraded ? DEGRADED_STD : HEALTHY_STD,
|
|
2772
|
+
baselineStdDev: BASELINE_STD,
|
|
2773
|
+
coverageGapCount: degraded ? DEGRADED_GAPS : NORMAL_GAPS,
|
|
2774
|
+
totalCoverageCells: TOTAL_CELLS,
|
|
2775
|
+
latencyP95: degraded ? DEGRADED_P95 : NORMAL_P95,
|
|
2776
|
+
latencyP50: NORMAL_P50,
|
|
2777
|
+
historicalValues: degraded ? [0.8, 0.85, 0.9] : [0.5, 0.5, 0.5],
|
|
2778
|
+
};
|
|
2779
|
+
}
|
|
2780
|
+
const stressTimeSeries = Array.from({ length: THIRTY_DAY_HOURS }, (_, i) => makeStressPoint(i >= INCIDENT_START_HOUR && i < INCIDENT_END_HOUR, SWEEP_STRESS_BASE_MS + i * HOUR_STEP_MS));
|
|
2781
|
+
const stressIncidents = [{
|
|
2782
|
+
startTime: new Date(SWEEP_STRESS_BASE_MS + INCIDENT_START_HOUR * HOUR_STEP_MS).toISOString(),
|
|
2783
|
+
endTime: new Date(SWEEP_STRESS_BASE_MS + (INCIDENT_END_HOUR - 1) * HOUR_STEP_MS).toISOString(),
|
|
2784
|
+
severity: 'major',
|
|
2785
|
+
}];
|
|
2786
|
+
it('720-point (30-day) sweep completes within 30 seconds', () => {
|
|
2787
|
+
const start = Date.now();
|
|
2788
|
+
sweepDegradationParams(stressTimeSeries, stressIncidents);
|
|
2789
|
+
const elapsed = Date.now() - start;
|
|
2790
|
+
assert.ok(elapsed < SWEEP_STRESS_TIMEOUT_MS, `sweep took ${elapsed}ms, exceeds 30s threshold`);
|
|
2791
|
+
});
|
|
2792
|
+
it('sweep is deterministic: two calls with same input produce identical bestByF1 config', () => {
|
|
2793
|
+
const sweep1 = sweepDegradationParams(stressTimeSeries, stressIncidents);
|
|
2794
|
+
const sweep2 = sweepDegradationParams(stressTimeSeries, stressIncidents);
|
|
2795
|
+
assert.strictEqual(sweep1.bestByF1.tapr.f1, sweep2.bestByF1.tapr.f1, 'bestByF1.tapr.f1 differs between runs');
|
|
2796
|
+
assert.strictEqual(sweep1.bestByF1.config.varianceThreshold, sweep2.bestByF1.config.varianceThreshold, 'bestByF1 config varianceThreshold differs between runs');
|
|
2797
|
+
});
|
|
2798
|
+
it('no NaN or Infinity in any result metric field', () => {
|
|
2799
|
+
const sweep = sweepDegradationParams(stressTimeSeries, stressIncidents);
|
|
2800
|
+
for (const r of sweep.results) {
|
|
2801
|
+
assert.ok(isFinite(r.tapr.precision), `tapr.precision is not finite: ${r.tapr.precision}`);
|
|
2802
|
+
assert.ok(isFinite(r.tapr.recall), `tapr.recall is not finite: ${r.tapr.recall}`);
|
|
2803
|
+
assert.ok(isFinite(r.tapr.f1), `tapr.f1 is not finite: ${r.tapr.f1}`);
|
|
2804
|
+
assert.ok(isFinite(r.tapr.detectionDelay), `tapr.detectionDelay is not finite: ${r.tapr.detectionDelay}`);
|
|
2805
|
+
assert.ok(isFinite(r.pointPrecision), `pointPrecision is not finite: ${r.pointPrecision}`);
|
|
2806
|
+
assert.ok(isFinite(r.pointRecall), `pointRecall is not finite: ${r.pointRecall}`);
|
|
2807
|
+
assert.ok(isFinite(r.pointF1), `pointF1 is not finite: ${r.pointF1}`);
|
|
2808
|
+
}
|
|
2809
|
+
});
|
|
2810
|
+
it('CURRENT_PRODUCTION_CONFIG always appears in sweep results', () => {
|
|
2811
|
+
const sweep = sweepDegradationParams(stressTimeSeries, stressIncidents);
|
|
2812
|
+
const found = sweep.results.some(r => r.config.varianceThreshold === CURRENT_PRODUCTION_CONFIG.varianceThreshold &&
|
|
2813
|
+
r.config.coverageDropoutThreshold === CURRENT_PRODUCTION_CONFIG.coverageDropoutThreshold &&
|
|
2814
|
+
r.config.ewmaLambda === CURRENT_PRODUCTION_CONFIG.ewmaLambda &&
|
|
2815
|
+
r.config.confirmationWindow === CURRENT_PRODUCTION_CONFIG.confirmationWindow);
|
|
2816
|
+
assert.ok(found, 'CURRENT_PRODUCTION_CONFIG must appear in sweep results');
|
|
2817
|
+
});
|
|
2818
|
+
});
|
|
2819
|
+
// --- backtestDegradationConfig stress tests (ST-FE-R5-BACKTEST) ---
|
|
2820
|
+
describe('backtestDegradationConfig stress', () => {
|
|
2821
|
+
const BT_BASE_MS = 1_700_000_000_000;
|
|
2822
|
+
const BT_STEP_MS = 1_000;
|
|
2823
|
+
const HEALTHY_STD = 0.05;
|
|
2824
|
+
const DEGRADED_STD = 0.50;
|
|
2825
|
+
const BASELINE_STD = 0.10;
|
|
2826
|
+
const NORMAL_GAPS = 0;
|
|
2827
|
+
const DEGRADED_GAPS = 50;
|
|
2828
|
+
const TOTAL_CELLS = 100;
|
|
2829
|
+
const NORMAL_P95 = 0.2;
|
|
2830
|
+
const NORMAL_P50 = 0.1;
|
|
2831
|
+
const DEGRADED_P95 = 0.9;
|
|
2832
|
+
// Use production latency skew threshold (3 > healthy P95/P50 ratio of 2.0, so healthy data won't trigger)
|
|
2833
|
+
const SENSITIVE_CONFIG = {
|
|
2834
|
+
varianceThreshold: 1.05,
|
|
2835
|
+
coverageDropoutThreshold: 0.01,
|
|
2836
|
+
latencySkewThreshold: LATENCY_SKEW_SIGNAL_THRESHOLD,
|
|
2837
|
+
confirmationWindow: 1,
|
|
2838
|
+
ewmaLambda: 0.1,
|
|
2839
|
+
stabilityThreshold: 2.0,
|
|
2840
|
+
};
|
|
2841
|
+
function makeBtPoint(degraded, ts) {
|
|
2842
|
+
return {
|
|
2843
|
+
timestamp: ts,
|
|
2844
|
+
currentStdDev: degraded ? DEGRADED_STD : HEALTHY_STD,
|
|
2845
|
+
baselineStdDev: BASELINE_STD,
|
|
2846
|
+
coverageGapCount: degraded ? DEGRADED_GAPS : NORMAL_GAPS,
|
|
2847
|
+
totalCoverageCells: TOTAL_CELLS,
|
|
2848
|
+
latencyP95: degraded ? DEGRADED_P95 : NORMAL_P95,
|
|
2849
|
+
latencyP50: NORMAL_P50,
|
|
2850
|
+
historicalValues: [0.5, 0.5, 0.5, 0.5, 0.5],
|
|
2851
|
+
};
|
|
2852
|
+
}
|
|
2853
|
+
it('all-healthy time series produces zero false positives with no incidents', () => {
|
|
2854
|
+
const series = Array.from({ length: 20 }, (_, i) => makeBtPoint(false, BT_BASE_MS + i * BT_STEP_MS));
|
|
2855
|
+
const r = backtestDegradationConfig(SENSITIVE_CONFIG, series, []);
|
|
2856
|
+
assert.strictEqual(r.falsePositives, 0, `expected 0 FP for all-healthy series with no incidents, got ${r.falsePositives}`);
|
|
2857
|
+
assert.strictEqual(r.falseNegatives, 0, `expected 0 FN with no incidents, got ${r.falseNegatives}`);
|
|
2858
|
+
});
|
|
2859
|
+
it('all-degraded time series with a covering incident produces zero false negatives', () => {
|
|
2860
|
+
const SERIES_LEN = 20;
|
|
2861
|
+
const CONFIRMATION_WINDOW = 1;
|
|
2862
|
+
const config = { ...SENSITIVE_CONFIG, confirmationWindow: CONFIRMATION_WINDOW };
|
|
2863
|
+
const series = Array.from({ length: SERIES_LEN }, (_, i) => makeBtPoint(true, BT_BASE_MS + i * BT_STEP_MS));
|
|
2864
|
+
const incidents = [{
|
|
2865
|
+
startTime: new Date(BT_BASE_MS).toISOString(),
|
|
2866
|
+
endTime: new Date(BT_BASE_MS + (SERIES_LEN - 1) * BT_STEP_MS).toISOString(),
|
|
2867
|
+
severity: 'critical',
|
|
2868
|
+
}];
|
|
2869
|
+
const r = backtestDegradationConfig(config, series, incidents);
|
|
2870
|
+
assert.strictEqual(r.falseNegatives, 0, `expected 0 FN for all-degraded series fully covered by incident, got ${r.falseNegatives}`);
|
|
2871
|
+
});
|
|
2872
|
+
it('single-point time series: returns valid result without throwing', () => {
|
|
2873
|
+
const singlePoint = [makeBtPoint(false, BT_BASE_MS)];
|
|
2874
|
+
const r = backtestDegradationConfig(SENSITIVE_CONFIG, singlePoint, []);
|
|
2875
|
+
assert.ok(isFinite(r.pointF1), `pointF1 should be finite for single-point series`);
|
|
2876
|
+
assert.ok(isFinite(r.tapr.f1), `tapr.f1 should be finite for single-point series`);
|
|
2877
|
+
});
|
|
2878
|
+
it('confirmation window larger than series length: all points unconfirmed → zero false positives', () => {
|
|
2879
|
+
const SERIES_LEN = 5;
|
|
2880
|
+
const config = {
|
|
2881
|
+
...SENSITIVE_CONFIG,
|
|
2882
|
+
confirmationWindow: SERIES_LEN + 1, // larger than series
|
|
2883
|
+
};
|
|
2884
|
+
const series = Array.from({ length: SERIES_LEN }, (_, i) => makeBtPoint(true, BT_BASE_MS + i * BT_STEP_MS));
|
|
2885
|
+
const r = backtestDegradationConfig(config, series, []);
|
|
2886
|
+
assert.strictEqual(r.falsePositives, 0, `confirmationWindow > series length should produce 0 FP, got ${r.falsePositives}`);
|
|
2887
|
+
assert.strictEqual(r.truePositives, 0, `confirmationWindow > series length should produce 0 TP, got ${r.truePositives}`);
|
|
2888
|
+
});
|
|
2889
|
+
it('flat historical values with EWMA drift detection: no NaN in results', () => {
|
|
2890
|
+
const flatHistoryPoint = (ts) => ({
|
|
2891
|
+
timestamp: ts,
|
|
2892
|
+
currentStdDev: HEALTHY_STD,
|
|
2893
|
+
baselineStdDev: BASELINE_STD,
|
|
2894
|
+
coverageGapCount: NORMAL_GAPS,
|
|
2895
|
+
totalCoverageCells: TOTAL_CELLS,
|
|
2896
|
+
latencyP95: NORMAL_P95,
|
|
2897
|
+
latencyP50: NORMAL_P50,
|
|
2898
|
+
historicalValues: [0.5, 0.5, 0.5, 0.5, 0.5], // perfectly flat — no drift
|
|
2899
|
+
});
|
|
2900
|
+
const series = Array.from({ length: 10 }, (_, i) => flatHistoryPoint(BT_BASE_MS + i * BT_STEP_MS));
|
|
2901
|
+
const r = backtestDegradationConfig(SENSITIVE_CONFIG, series, []);
|
|
2902
|
+
assert.ok(isFinite(r.tapr.f1), `tapr.f1 should be finite with flat historical values`);
|
|
2903
|
+
assert.ok(isFinite(r.pointPrecision), `pointPrecision should be finite with flat historical values`);
|
|
2904
|
+
assert.ok(isFinite(r.pointRecall), `pointRecall should be finite with flat historical values`);
|
|
2905
|
+
});
|
|
2906
|
+
});
|
|
2907
|
+
});
|
|
2908
|
+
//# sourceMappingURL=quality-feature-engineering.test.js.map
|