observability-toolkit 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +163 -398
- package/dist/__tests__/find-constant-dedup.test.d.ts +11 -0
- package/dist/__tests__/find-constant-dedup.test.d.ts.map +1 -0
- package/dist/__tests__/find-constant-dedup.test.js +132 -0
- package/dist/__tests__/find-constant-dedup.test.js.map +1 -0
- package/dist/backends/backend-schemas.d.ts +309 -0
- package/dist/backends/backend-schemas.d.ts.map +1 -0
- package/dist/backends/backend-schemas.js +215 -0
- package/dist/backends/backend-schemas.js.map +1 -0
- package/dist/backends/cloud.d.ts +46 -0
- package/dist/backends/cloud.d.ts.map +1 -0
- package/dist/backends/cloud.js +520 -0
- package/dist/backends/cloud.js.map +1 -0
- package/dist/backends/cloud.test.d.ts +2 -0
- package/dist/backends/cloud.test.d.ts.map +1 -0
- package/dist/backends/cloud.test.js +436 -0
- package/dist/backends/cloud.test.js.map +1 -0
- package/dist/backends/index.d.ts +659 -386
- package/dist/backends/index.d.ts.map +1 -1
- package/dist/backends/index.js +318 -41
- package/dist/backends/index.js.map +1 -1
- package/dist/backends/index.test.js +578 -57
- package/dist/backends/index.test.js.map +1 -1
- package/dist/backends/local-jsonl-boolean-search.test.js +8 -7
- package/dist/backends/local-jsonl-boolean-search.test.js.map +1 -1
- package/dist/backends/local-jsonl-cache.test.js +33 -31
- package/dist/backends/local-jsonl-cache.test.js.map +1 -1
- package/dist/backends/local-jsonl-circuit-breaker.test.js +9 -7
- package/dist/backends/local-jsonl-circuit-breaker.test.js.map +1 -1
- package/dist/backends/local-jsonl-export.test.js +73 -58
- package/dist/backends/local-jsonl-export.test.js.map +1 -1
- package/dist/backends/local-jsonl-index.test.js +52 -50
- package/dist/backends/local-jsonl-index.test.js.map +1 -1
- package/dist/backends/local-jsonl-logs.test.js +47 -31
- package/dist/backends/local-jsonl-logs.test.js.map +1 -1
- package/dist/backends/local-jsonl-metrics.test.js +85 -82
- package/dist/backends/local-jsonl-metrics.test.js.map +1 -1
- package/dist/backends/local-jsonl-otlp-unwrap.test.d.ts +2 -0
- package/dist/backends/local-jsonl-otlp-unwrap.test.d.ts.map +1 -0
- package/dist/backends/local-jsonl-otlp-unwrap.test.js +602 -0
- package/dist/backends/local-jsonl-otlp-unwrap.test.js.map +1 -0
- package/dist/backends/local-jsonl-traces.test.js +161 -147
- package/dist/backends/local-jsonl-traces.test.js.map +1 -1
- package/dist/backends/local-jsonl.d.ts +37 -8
- package/dist/backends/local-jsonl.d.ts.map +1 -1
- package/dist/backends/local-jsonl.js +1088 -241
- package/dist/backends/local-jsonl.js.map +1 -1
- package/dist/backends/shared.d.ts +9 -0
- package/dist/backends/shared.d.ts.map +1 -0
- package/dist/backends/shared.js +9 -0
- package/dist/backends/shared.js.map +1 -0
- package/dist/generated/opentelemetry/proto/collector/logs/v1/logs_service_pb.d.ts +40 -0
- package/dist/generated/opentelemetry/proto/collector/logs/v1/logs_service_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/collector/logs/v1/logs_service_pb.js +27 -0
- package/dist/generated/opentelemetry/proto/collector/logs/v1/logs_service_pb.js.map +1 -0
- package/dist/generated/opentelemetry/proto/collector/metrics/v1/metrics_service_pb.d.ts +106 -0
- package/dist/generated/opentelemetry/proto/collector/metrics/v1/metrics_service_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/collector/metrics/v1/metrics_service_pb.js +43 -0
- package/dist/generated/opentelemetry/proto/collector/metrics/v1/metrics_service_pb.js.map +1 -0
- package/dist/generated/opentelemetry/proto/collector/profiles/v1development/profiles_service_pb.d.ts +111 -0
- package/dist/generated/opentelemetry/proto/collector/profiles/v1development/profiles_service_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/collector/profiles/v1development/profiles_service_pb.js +42 -0
- package/dist/generated/opentelemetry/proto/collector/profiles/v1development/profiles_service_pb.js.map +1 -0
- package/dist/generated/opentelemetry/proto/collector/trace/v1/trace_service_pb.d.ts +106 -0
- package/dist/generated/opentelemetry/proto/collector/trace/v1/trace_service_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/collector/trace/v1/trace_service_pb.js +43 -0
- package/dist/generated/opentelemetry/proto/collector/trace/v1/trace_service_pb.js.map +1 -0
- package/dist/generated/opentelemetry/proto/common/v1/common_pb.d.ts +243 -0
- package/dist/generated/opentelemetry/proto/common/v1/common_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/common/v1/common_pb.js +49 -0
- package/dist/generated/opentelemetry/proto/common/v1/common_pb.js.map +1 -0
- package/dist/generated/opentelemetry/proto/logs/v1/logs_pb.d.ts +90 -0
- package/dist/generated/opentelemetry/proto/logs/v1/logs_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/logs/v1/logs_pb.js +66 -0
- package/dist/generated/opentelemetry/proto/logs/v1/logs_pb.js.map +1 -0
- package/dist/generated/opentelemetry/proto/metrics/v1/metrics_pb.d.ts +1134 -0
- package/dist/generated/opentelemetry/proto/metrics/v1/metrics_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/metrics/v1/metrics_pb.js +223 -0
- package/dist/generated/opentelemetry/proto/metrics/v1/metrics_pb.js.map +1 -0
- package/dist/generated/opentelemetry/proto/profiles/v1development/profiles_pb.d.ts +678 -0
- package/dist/generated/opentelemetry/proto/profiles/v1development/profiles_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/profiles/v1development/profiles_pb.js +107 -0
- package/dist/generated/opentelemetry/proto/profiles/v1development/profiles_pb.js.map +1 -0
- package/dist/generated/opentelemetry/proto/resource/v1/resource_pb.d.ts +46 -0
- package/dist/generated/opentelemetry/proto/resource/v1/resource_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/resource/v1/resource_pb.js +25 -0
- package/dist/generated/opentelemetry/proto/resource/v1/resource_pb.js.map +1 -0
- package/dist/generated/opentelemetry/proto/trace/v1/trace_pb.d.ts +569 -0
- package/dist/generated/opentelemetry/proto/trace/v1/trace_pb.d.ts.map +1 -0
- package/dist/generated/opentelemetry/proto/trace/v1/trace_pb.js +195 -0
- package/dist/generated/opentelemetry/proto/trace/v1/trace_pb.js.map +1 -0
- package/dist/lib/agent-judge/agent-as-judge.d.ts +157 -0
- package/dist/lib/agent-judge/agent-as-judge.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-as-judge.js +137 -0
- package/dist/lib/agent-judge/agent-as-judge.js.map +1 -0
- package/dist/lib/agent-judge/agent-as-judge.test.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-as-judge.test.js +839 -0
- package/dist/lib/agent-judge/agent-as-judge.test.js.map +1 -0
- package/dist/lib/agent-judge/agent-eval-metrics.d.ts +293 -0
- package/dist/lib/agent-judge/agent-eval-metrics.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-eval-metrics.js +715 -0
- package/dist/lib/agent-judge/agent-eval-metrics.js.map +1 -0
- package/dist/lib/agent-judge/agent-eval-metrics.test.d.ts +5 -0
- package/dist/lib/agent-judge/agent-eval-metrics.test.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-eval-metrics.test.js +676 -0
- package/dist/lib/agent-judge/agent-eval-metrics.test.js.map +1 -0
- package/dist/lib/agent-judge/agent-judge-classes.d.ts +95 -0
- package/dist/lib/agent-judge/agent-judge-classes.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-judge-classes.js +222 -0
- package/dist/lib/agent-judge/agent-judge-classes.js.map +1 -0
- package/dist/lib/agent-judge/agent-judge-classes.test.d.ts +6 -0
- package/dist/lib/agent-judge/agent-judge-classes.test.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-judge-classes.test.js +271 -0
- package/dist/lib/agent-judge/agent-judge-classes.test.js.map +1 -0
- package/dist/lib/agent-judge/agent-judge-consensus.d.ts +58 -0
- package/dist/lib/agent-judge/agent-judge-consensus.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-judge-consensus.js +149 -0
- package/dist/lib/agent-judge/agent-judge-consensus.js.map +1 -0
- package/dist/lib/agent-judge/agent-judge-consensus.test.d.ts +2 -0
- package/dist/lib/agent-judge/agent-judge-consensus.test.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-judge-consensus.test.js +170 -0
- package/dist/lib/agent-judge/agent-judge-consensus.test.js.map +1 -0
- package/dist/lib/agent-judge/agent-judge-verification.d.ts +89 -0
- package/dist/lib/agent-judge/agent-judge-verification.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-judge-verification.js +235 -0
- package/dist/lib/agent-judge/agent-judge-verification.js.map +1 -0
- package/dist/lib/agent-judge/agent-judge-verification.test.d.ts +5 -0
- package/dist/lib/agent-judge/agent-judge-verification.test.d.ts.map +1 -0
- package/dist/lib/agent-judge/agent-judge-verification.test.js +399 -0
- package/dist/lib/agent-judge/agent-judge-verification.test.js.map +1 -0
- package/dist/lib/audit/agent-auditor-scoring.d.ts +167 -0
- package/dist/lib/audit/agent-auditor-scoring.d.ts.map +1 -0
- package/dist/lib/audit/agent-auditor-scoring.js +338 -0
- package/dist/lib/audit/agent-auditor-scoring.js.map +1 -0
- package/dist/lib/audit/agent-auditor-scoring.test.d.ts +2 -0
- package/dist/lib/audit/agent-auditor-scoring.test.d.ts.map +1 -0
- package/dist/lib/audit/agent-auditor-scoring.test.js +576 -0
- package/dist/lib/audit/agent-auditor-scoring.test.js.map +1 -0
- package/dist/lib/audit/audit-record.d.ts +139 -0
- package/dist/lib/audit/audit-record.d.ts.map +1 -0
- package/dist/lib/audit/audit-record.js +288 -0
- package/dist/lib/audit/audit-record.js.map +1 -0
- package/dist/lib/audit/audit-record.test.d.ts +5 -0
- package/dist/lib/audit/audit-record.test.d.ts.map +1 -0
- package/dist/lib/audit/audit-record.test.js +258 -0
- package/dist/lib/audit/audit-record.test.js.map +1 -0
- package/dist/lib/audit/audit-scoring-constants.d.ts +57 -0
- package/dist/lib/audit/audit-scoring-constants.d.ts.map +1 -0
- package/dist/lib/audit/audit-scoring-constants.js +59 -0
- package/dist/lib/audit/audit-scoring-constants.js.map +1 -0
- package/dist/lib/audit/compliance-report.d.ts +125 -0
- package/dist/lib/audit/compliance-report.d.ts.map +1 -0
- package/dist/lib/audit/compliance-report.js +205 -0
- package/dist/lib/audit/compliance-report.js.map +1 -0
- package/dist/lib/audit/compliance-report.test.d.ts +5 -0
- package/dist/lib/audit/compliance-report.test.d.ts.map +1 -0
- package/dist/lib/audit/compliance-report.test.js +290 -0
- package/dist/lib/audit/compliance-report.test.js.map +1 -0
- package/dist/lib/audit/retention-guard.d.ts +41 -0
- package/dist/lib/audit/retention-guard.d.ts.map +1 -0
- package/dist/lib/audit/retention-guard.js +103 -0
- package/dist/lib/audit/retention-guard.js.map +1 -0
- package/dist/lib/audit/retention-guard.test.d.ts +5 -0
- package/dist/lib/audit/retention-guard.test.d.ts.map +1 -0
- package/dist/lib/audit/retention-guard.test.js +109 -0
- package/dist/lib/audit/retention-guard.test.js.map +1 -0
- package/dist/lib/audit/skill-auditor-scoring.d.ts +69 -0
- package/dist/lib/audit/skill-auditor-scoring.d.ts.map +1 -0
- package/dist/lib/audit/skill-auditor-scoring.js +149 -0
- package/dist/lib/audit/skill-auditor-scoring.js.map +1 -0
- package/dist/lib/audit/skill-auditor-scoring.test.d.ts +2 -0
- package/dist/lib/audit/skill-auditor-scoring.test.d.ts.map +1 -0
- package/dist/lib/audit/skill-auditor-scoring.test.js +369 -0
- package/dist/lib/audit/skill-auditor-scoring.test.js.map +1 -0
- package/dist/lib/audit/verification-events.d.ts +119 -0
- package/dist/lib/audit/verification-events.d.ts.map +1 -0
- package/dist/lib/audit/verification-events.js +175 -0
- package/dist/lib/audit/verification-events.js.map +1 -0
- package/dist/lib/audit/verification-events.test.d.ts.map +1 -0
- package/dist/lib/audit/verification-events.test.js +197 -0
- package/dist/lib/audit/verification-events.test.js.map +1 -0
- package/dist/lib/core/constants-models.d.ts +90 -0
- package/dist/lib/core/constants-models.d.ts.map +1 -0
- package/dist/lib/core/constants-models.js +208 -0
- package/dist/lib/core/constants-models.js.map +1 -0
- package/dist/lib/core/constants-otel.d.ts +68 -0
- package/dist/lib/core/constants-otel.d.ts.map +1 -0
- package/dist/lib/core/constants-otel.js +128 -0
- package/dist/lib/core/constants-otel.js.map +1 -0
- package/dist/lib/core/constants-symlink.test.d.ts.map +1 -0
- package/dist/lib/core/constants-symlink.test.js +358 -0
- package/dist/lib/core/constants-symlink.test.js.map +1 -0
- package/dist/lib/core/constants-telemetry.d.ts +21 -0
- package/dist/lib/core/constants-telemetry.d.ts.map +1 -0
- package/dist/lib/core/constants-telemetry.js +162 -0
- package/dist/lib/core/constants-telemetry.js.map +1 -0
- package/dist/lib/core/constants.d.ts +152 -0
- package/dist/lib/core/constants.d.ts.map +1 -0
- package/dist/lib/core/constants.js +223 -0
- package/dist/lib/core/constants.js.map +1 -0
- package/dist/lib/core/constants.test.d.ts.map +1 -0
- package/dist/lib/core/constants.test.js +833 -0
- package/dist/lib/core/constants.test.js.map +1 -0
- package/dist/lib/core/doc-sync.test.d.ts +9 -0
- package/dist/lib/core/doc-sync.test.d.ts.map +1 -0
- package/dist/lib/core/doc-sync.test.js +159 -0
- package/dist/lib/core/doc-sync.test.js.map +1 -0
- package/dist/lib/core/edge-cases.test.d.ts.map +1 -0
- package/dist/lib/core/edge-cases.test.js +637 -0
- package/dist/lib/core/edge-cases.test.js.map +1 -0
- package/dist/lib/core/file-utils.d.ts +360 -0
- package/dist/lib/core/file-utils.d.ts.map +1 -0
- package/dist/lib/core/file-utils.js +890 -0
- package/dist/lib/core/file-utils.js.map +1 -0
- package/dist/lib/core/file-utils.test-constants.d.ts +38 -0
- package/dist/lib/core/file-utils.test-constants.d.ts.map +1 -0
- package/dist/lib/core/file-utils.test-constants.js +40 -0
- package/dist/lib/core/file-utils.test-constants.js.map +1 -0
- package/dist/lib/core/file-utils.test.d.ts.map +1 -0
- package/dist/lib/core/file-utils.test.js +1329 -0
- package/dist/lib/core/file-utils.test.js.map +1 -0
- package/dist/lib/core/input-validator.d.ts +125 -0
- package/dist/lib/core/input-validator.d.ts.map +1 -0
- package/dist/lib/core/input-validator.fuzz.test.d.ts.map +1 -0
- package/dist/lib/core/input-validator.fuzz.test.js +302 -0
- package/dist/lib/core/input-validator.fuzz.test.js.map +1 -0
- package/dist/lib/core/input-validator.js +348 -0
- package/dist/lib/core/input-validator.js.map +1 -0
- package/dist/lib/core/input-validator.test.d.ts.map +1 -0
- package/dist/lib/core/input-validator.test.js +465 -0
- package/dist/lib/core/input-validator.test.js.map +1 -0
- package/dist/lib/core/logger.d.ts +32 -0
- package/dist/lib/core/logger.d.ts.map +1 -0
- package/dist/lib/core/logger.js +104 -0
- package/dist/lib/core/logger.js.map +1 -0
- package/dist/lib/core/logger.test.d.ts.map +1 -0
- package/dist/lib/core/logger.test.js.map +1 -0
- package/dist/lib/core/schema-types.d.ts +37 -0
- package/dist/lib/core/schema-types.d.ts.map +1 -0
- package/dist/lib/core/schema-types.js +29 -0
- package/dist/lib/core/schema-types.js.map +1 -0
- package/dist/lib/core/server-utils.d.ts +98 -0
- package/dist/lib/core/server-utils.d.ts.map +1 -0
- package/dist/lib/core/server-utils.js +193 -0
- package/dist/lib/core/server-utils.js.map +1 -0
- package/dist/lib/core/shared-schemas.d.ts +301 -0
- package/dist/lib/core/shared-schemas.d.ts.map +1 -0
- package/dist/lib/core/shared-schemas.js +222 -0
- package/dist/lib/core/shared-schemas.js.map +1 -0
- package/dist/lib/core/shared-schemas.test.d.ts.map +1 -0
- package/dist/lib/core/shared-schemas.test.js +136 -0
- package/dist/lib/core/shared-schemas.test.js.map +1 -0
- package/dist/lib/core/units.d.ts +67 -0
- package/dist/lib/core/units.d.ts.map +1 -0
- package/dist/lib/core/units.js +88 -0
- package/dist/lib/core/units.js.map +1 -0
- package/dist/lib/cost/cost-estimation.d.ts +264 -0
- package/dist/lib/cost/cost-estimation.d.ts.map +1 -0
- package/dist/lib/cost/cost-estimation.js +541 -0
- package/dist/lib/cost/cost-estimation.js.map +1 -0
- package/dist/lib/cost/cost-estimation.test.d.ts +5 -0
- package/dist/lib/cost/cost-estimation.test.d.ts.map +1 -0
- package/dist/lib/cost/cost-estimation.test.js +701 -0
- package/dist/lib/cost/cost-estimation.test.js.map +1 -0
- package/dist/lib/cost/pricing-cache.d.ts +59 -0
- package/dist/lib/cost/pricing-cache.d.ts.map +1 -0
- package/dist/lib/cost/pricing-cache.js +120 -0
- package/dist/lib/cost/pricing-cache.js.map +1 -0
- package/dist/lib/cost/pricing-cache.test.d.ts +5 -0
- package/dist/lib/cost/pricing-cache.test.d.ts.map +1 -0
- package/dist/lib/cost/pricing-cache.test.js +176 -0
- package/dist/lib/cost/pricing-cache.test.js.map +1 -0
- package/dist/lib/dashboard-file-utils.d.ts +35 -0
- package/dist/lib/dashboard-file-utils.d.ts.map +1 -0
- package/dist/lib/dashboard-file-utils.js +94 -0
- package/dist/lib/dashboard-file-utils.js.map +1 -0
- package/dist/lib/errors/error-sanitizer.d.ts +62 -0
- package/dist/lib/errors/error-sanitizer.d.ts.map +1 -0
- package/dist/lib/errors/error-sanitizer.js +235 -0
- package/dist/lib/errors/error-sanitizer.js.map +1 -0
- package/dist/lib/errors/error-sanitizer.test.d.ts.map +1 -0
- package/dist/lib/errors/error-sanitizer.test.js +534 -0
- package/dist/lib/errors/error-sanitizer.test.js.map +1 -0
- package/dist/lib/errors/error-types.d.ts +59 -0
- package/dist/lib/errors/error-types.d.ts.map +1 -0
- package/dist/lib/errors/error-types.js +187 -0
- package/dist/lib/errors/error-types.js.map +1 -0
- package/dist/lib/errors/error-types.test.d.ts.map +1 -0
- package/dist/lib/errors/error-types.test.js +246 -0
- package/dist/lib/errors/error-types.test.js.map +1 -0
- package/dist/lib/errors/query-sanitizer.d.ts.map +1 -0
- package/dist/lib/errors/query-sanitizer.js +269 -0
- package/dist/lib/errors/query-sanitizer.js.map +1 -0
- package/dist/lib/errors/query-sanitizer.test.d.ts.map +1 -0
- package/dist/lib/errors/query-sanitizer.test.js +403 -0
- package/dist/lib/errors/query-sanitizer.test.js.map +1 -0
- package/dist/lib/exports/confident-export.d.ts +105 -0
- package/dist/lib/exports/confident-export.d.ts.map +1 -0
- package/dist/lib/exports/confident-export.js +385 -0
- package/dist/lib/exports/confident-export.js.map +1 -0
- package/dist/lib/exports/confident-export.test.d.ts.map +1 -0
- package/dist/lib/exports/confident-export.test.js +848 -0
- package/dist/lib/exports/confident-export.test.js.map +1 -0
- package/dist/lib/exports/datadog-export.d.ts +200 -0
- package/dist/lib/exports/datadog-export.d.ts.map +1 -0
- package/dist/lib/exports/datadog-export.js +488 -0
- package/dist/lib/exports/datadog-export.js.map +1 -0
- package/dist/lib/exports/datadog-export.test.d.ts +2 -0
- package/dist/lib/exports/datadog-export.test.d.ts.map +1 -0
- package/dist/lib/exports/datadog-export.test.js +890 -0
- package/dist/lib/exports/datadog-export.test.js.map +1 -0
- package/dist/lib/exports/export-config-schemas.d.ts +67 -0
- package/dist/lib/exports/export-config-schemas.d.ts.map +1 -0
- package/dist/lib/exports/export-config-schemas.js +120 -0
- package/dist/lib/exports/export-config-schemas.js.map +1 -0
- package/dist/lib/exports/export-config-schemas.test.d.ts +8 -0
- package/dist/lib/exports/export-config-schemas.test.d.ts.map +1 -0
- package/dist/lib/exports/export-config-schemas.test.js +503 -0
- package/dist/lib/exports/export-config-schemas.test.js.map +1 -0
- package/dist/lib/exports/export-utils.d.ts +127 -0
- package/dist/lib/exports/export-utils.d.ts.map +1 -0
- package/dist/lib/exports/export-utils.js +303 -0
- package/dist/lib/exports/export-utils.js.map +1 -0
- package/dist/lib/exports/export-utils.test.d.ts.map +1 -0
- package/dist/lib/exports/export-utils.test.js +344 -0
- package/dist/lib/exports/export-utils.test.js.map +1 -0
- package/dist/lib/exports/langfuse-export.d.ts +129 -0
- package/dist/lib/exports/langfuse-export.d.ts.map +1 -0
- package/dist/lib/exports/langfuse-export.js +370 -0
- package/dist/lib/exports/langfuse-export.js.map +1 -0
- package/dist/lib/exports/langfuse-export.test.d.ts.map +1 -0
- package/dist/lib/exports/langfuse-export.test.js +1020 -0
- package/dist/lib/exports/langfuse-export.test.js.map +1 -0
- package/dist/lib/exports/otlp-export.d.ts +179 -0
- package/dist/lib/exports/otlp-export.d.ts.map +1 -0
- package/dist/lib/exports/otlp-export.js +397 -0
- package/dist/lib/exports/otlp-export.js.map +1 -0
- package/dist/lib/exports/otlp-format-converter.d.ts +70 -0
- package/dist/lib/exports/otlp-format-converter.d.ts.map +1 -0
- package/dist/lib/exports/otlp-format-converter.js +401 -0
- package/dist/lib/exports/otlp-format-converter.js.map +1 -0
- package/dist/lib/exports/otlp-proto-encode.d.ts +53 -0
- package/dist/lib/exports/otlp-proto-encode.d.ts.map +1 -0
- package/dist/lib/exports/otlp-proto-encode.js +165 -0
- package/dist/lib/exports/otlp-proto-encode.js.map +1 -0
- package/dist/lib/exports/otlp-proto-encode.test.d.ts +7 -0
- package/dist/lib/exports/otlp-proto-encode.test.d.ts.map +1 -0
- package/dist/lib/exports/otlp-proto-encode.test.js +997 -0
- package/dist/lib/exports/otlp-proto-encode.test.js.map +1 -0
- package/dist/lib/exports/phoenix-export.d.ts +119 -0
- package/dist/lib/exports/phoenix-export.d.ts.map +1 -0
- package/dist/lib/exports/phoenix-export.js +448 -0
- package/dist/lib/exports/phoenix-export.js.map +1 -0
- package/dist/lib/exports/phoenix-export.test.d.ts.map +1 -0
- package/dist/lib/exports/phoenix-export.test.js +816 -0
- package/dist/lib/exports/phoenix-export.test.js.map +1 -0
- package/dist/lib/index.d.ts +16 -0
- package/dist/lib/index.d.ts.map +1 -0
- package/dist/lib/index.js +31 -0
- package/dist/lib/index.js.map +1 -0
- package/dist/lib/judge/evaluation-hooks-schemas.d.ts +186 -0
- package/dist/lib/judge/evaluation-hooks-schemas.d.ts.map +1 -0
- package/dist/lib/judge/evaluation-hooks-schemas.js +125 -0
- package/dist/lib/judge/evaluation-hooks-schemas.js.map +1 -0
- package/dist/lib/judge/evaluation-hooks.d.ts +88 -0
- package/dist/lib/judge/evaluation-hooks.d.ts.map +1 -0
- package/dist/lib/judge/evaluation-hooks.js +658 -0
- package/dist/lib/judge/evaluation-hooks.js.map +1 -0
- package/dist/lib/judge/evaluation-hooks.test.d.ts.map +1 -0
- package/dist/lib/judge/evaluation-hooks.test.js +934 -0
- package/dist/lib/judge/evaluation-hooks.test.js.map +1 -0
- package/dist/lib/judge/llm-as-judge.d.ts +138 -0
- package/dist/lib/judge/llm-as-judge.d.ts.map +1 -0
- package/dist/lib/judge/llm-as-judge.js +103 -0
- package/dist/lib/judge/llm-as-judge.js.map +1 -0
- package/dist/lib/judge/llm-as-judge.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-as-judge.test.js +2179 -0
- package/dist/lib/judge/llm-as-judge.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-bias.d.ts +44 -0
- package/dist/lib/judge/llm-judge-bias.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-bias.js +130 -0
- package/dist/lib/judge/llm-judge-bias.js.map +1 -0
- package/dist/lib/judge/llm-judge-bias.test.d.ts +2 -0
- package/dist/lib/judge/llm-judge-bias.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-bias.test.js +380 -0
- package/dist/lib/judge/llm-judge-bias.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-code.d.ts +99 -0
- package/dist/lib/judge/llm-judge-code.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-code.js +261 -0
- package/dist/lib/judge/llm-judge-code.js.map +1 -0
- package/dist/lib/judge/llm-judge-code.test.d.ts +2 -0
- package/dist/lib/judge/llm-judge-code.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-code.test.js +981 -0
- package/dist/lib/judge/llm-judge-code.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-config.d.ts +241 -0
- package/dist/lib/judge/llm-judge-config.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-config.js +390 -0
- package/dist/lib/judge/llm-judge-config.js.map +1 -0
- package/dist/lib/judge/llm-judge-config.test.d.ts +5 -0
- package/dist/lib/judge/llm-judge-config.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-config.test.js +392 -0
- package/dist/lib/judge/llm-judge-config.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-constants.d.ts +111 -0
- package/dist/lib/judge/llm-judge-constants.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-constants.js +150 -0
- package/dist/lib/judge/llm-judge-constants.js.map +1 -0
- package/dist/lib/judge/llm-judge-dag.d.ts +57 -0
- package/dist/lib/judge/llm-judge-dag.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-dag.js +217 -0
- package/dist/lib/judge/llm-judge-dag.js.map +1 -0
- package/dist/lib/judge/llm-judge-dag.test.d.ts +8 -0
- package/dist/lib/judge/llm-judge-dag.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-dag.test.js +973 -0
- package/dist/lib/judge/llm-judge-dag.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-domain.d.ts +42 -0
- package/dist/lib/judge/llm-judge-domain.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-domain.js +167 -0
- package/dist/lib/judge/llm-judge-domain.js.map +1 -0
- package/dist/lib/judge/llm-judge-domain.test.d.ts +6 -0
- package/dist/lib/judge/llm-judge-domain.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-domain.test.js +337 -0
- package/dist/lib/judge/llm-judge-domain.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-geval.d.ts +42 -0
- package/dist/lib/judge/llm-judge-geval.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-geval.js +213 -0
- package/dist/lib/judge/llm-judge-geval.js.map +1 -0
- package/dist/lib/judge/llm-judge-geval.test.d.ts +2 -0
- package/dist/lib/judge/llm-judge-geval.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-geval.test.js +556 -0
- package/dist/lib/judge/llm-judge-geval.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-otel.test.d.ts +9 -0
- package/dist/lib/judge/llm-judge-otel.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-otel.test.js +91 -0
- package/dist/lib/judge/llm-judge-otel.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-qag.d.ts +38 -0
- package/dist/lib/judge/llm-judge-qag.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-qag.js +205 -0
- package/dist/lib/judge/llm-judge-qag.js.map +1 -0
- package/dist/lib/judge/llm-judge-qag.test.d.ts +2 -0
- package/dist/lib/judge/llm-judge-qag.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-qag.test.js +386 -0
- package/dist/lib/judge/llm-judge-qag.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-resilience.d.ts +74 -0
- package/dist/lib/judge/llm-judge-resilience.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-resilience.js +146 -0
- package/dist/lib/judge/llm-judge-resilience.js.map +1 -0
- package/dist/lib/judge/llm-judge-resilience.test.d.ts +2 -0
- package/dist/lib/judge/llm-judge-resilience.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-resilience.test.js +353 -0
- package/dist/lib/judge/llm-judge-resilience.test.js.map +1 -0
- package/dist/lib/judge/llm-judge-security.d.ts +106 -0
- package/dist/lib/judge/llm-judge-security.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-security.js +314 -0
- package/dist/lib/judge/llm-judge-security.js.map +1 -0
- package/dist/lib/judge/llm-judge-security.test.d.ts +2 -0
- package/dist/lib/judge/llm-judge-security.test.d.ts.map +1 -0
- package/dist/lib/judge/llm-judge-security.test.js +1011 -0
- package/dist/lib/judge/llm-judge-security.test.js.map +1 -0
- package/dist/lib/observability/context-accumulator.d.ts +32 -0
- package/dist/lib/observability/context-accumulator.d.ts.map +1 -0
- package/dist/lib/observability/context-accumulator.js +87 -0
- package/dist/lib/observability/context-accumulator.js.map +1 -0
- package/dist/lib/observability/evaluation-events.d.ts +35 -0
- package/dist/lib/observability/evaluation-events.d.ts.map +1 -0
- package/dist/lib/observability/evaluation-events.js +90 -0
- package/dist/lib/observability/evaluation-events.js.map +1 -0
- package/dist/lib/observability/file-span-exporter.d.ts +17 -0
- package/dist/lib/observability/file-span-exporter.d.ts.map +1 -0
- package/dist/lib/observability/file-span-exporter.js +49 -0
- package/dist/lib/observability/file-span-exporter.js.map +1 -0
- package/dist/lib/observability/histogram-bucket-constants.d.ts +25 -0
- package/dist/lib/observability/histogram-bucket-constants.d.ts.map +1 -0
- package/dist/lib/observability/histogram-bucket-constants.js +60 -0
- package/dist/lib/observability/histogram-bucket-constants.js.map +1 -0
- package/dist/lib/observability/histogram.d.ts +112 -0
- package/dist/lib/observability/histogram.d.ts.map +1 -0
- package/dist/lib/observability/histogram.js +170 -0
- package/dist/lib/observability/histogram.js.map +1 -0
- package/dist/lib/observability/histogram.test.d.ts.map +1 -0
- package/dist/lib/observability/histogram.test.js +385 -0
- package/dist/lib/observability/histogram.test.js.map +1 -0
- package/dist/lib/observability/indexer.d.ts +114 -0
- package/dist/lib/observability/indexer.d.ts.map +1 -0
- package/dist/lib/observability/indexer.js +402 -0
- package/dist/lib/observability/indexer.js.map +1 -0
- package/dist/lib/observability/indexer.test.d.ts.map +1 -0
- package/dist/lib/observability/indexer.test.js +713 -0
- package/dist/lib/observability/indexer.test.js.map +1 -0
- package/dist/lib/observability/instrumentation-eval.test.d.ts +5 -0
- package/dist/lib/observability/instrumentation-eval.test.d.ts.map +1 -0
- package/dist/lib/observability/instrumentation-eval.test.js +63 -0
- package/dist/lib/observability/instrumentation-eval.test.js.map +1 -0
- package/dist/lib/observability/instrumentation-init-errors.test.d.ts +13 -0
- package/dist/lib/observability/instrumentation-init-errors.test.d.ts.map +1 -0
- package/dist/lib/observability/instrumentation-init-errors.test.js +194 -0
- package/dist/lib/observability/instrumentation-init-errors.test.js.map +1 -0
- package/dist/lib/observability/instrumentation-retry-timeout.test.d.ts +15 -0
- package/dist/lib/observability/instrumentation-retry-timeout.test.d.ts.map +1 -0
- package/dist/lib/observability/instrumentation-retry-timeout.test.js +188 -0
- package/dist/lib/observability/instrumentation-retry-timeout.test.js.map +1 -0
- package/dist/lib/observability/instrumentation-set-otel.test.d.ts +5 -0
- package/dist/lib/observability/instrumentation-set-otel.test.d.ts.map +1 -0
- package/dist/lib/observability/instrumentation-set-otel.test.js +59 -0
- package/dist/lib/observability/instrumentation-set-otel.test.js.map +1 -0
- package/dist/lib/observability/instrumentation.d.ts +158 -0
- package/dist/lib/observability/instrumentation.d.ts.map +1 -0
- package/dist/lib/observability/instrumentation.integration.test.d.ts.map +1 -0
- package/dist/lib/observability/instrumentation.integration.test.js +590 -0
- package/dist/lib/observability/instrumentation.integration.test.js.map +1 -0
- package/dist/lib/observability/instrumentation.js +512 -0
- package/dist/lib/observability/instrumentation.js.map +1 -0
- package/dist/lib/observability/instrumentation.test.d.ts.map +1 -0
- package/dist/lib/observability/instrumentation.test.js +822 -0
- package/dist/lib/observability/instrumentation.test.js.map +1 -0
- package/dist/lib/observability/mcp-semconv-constants.d.ts +98 -0
- package/dist/lib/observability/mcp-semconv-constants.d.ts.map +1 -0
- package/dist/lib/observability/mcp-semconv-constants.js +102 -0
- package/dist/lib/observability/mcp-semconv-constants.js.map +1 -0
- package/dist/lib/observability/mcp-semconv.d.ts +37 -0
- package/dist/lib/observability/mcp-semconv.d.ts.map +1 -0
- package/dist/lib/observability/mcp-semconv.js +87 -0
- package/dist/lib/observability/mcp-semconv.js.map +1 -0
- package/dist/lib/observability/mcp-semconv.test.d.ts +2 -0
- package/dist/lib/observability/mcp-semconv.test.d.ts.map +1 -0
- package/dist/lib/observability/mcp-semconv.test.js +168 -0
- package/dist/lib/observability/mcp-semconv.test.js.map +1 -0
- package/dist/lib/observability/metrics.d.ts +100 -0
- package/dist/lib/observability/metrics.d.ts.map +1 -0
- package/dist/lib/observability/metrics.js +429 -0
- package/dist/lib/observability/metrics.js.map +1 -0
- package/dist/lib/observability/metrics.test.d.ts.map +1 -0
- package/dist/lib/observability/metrics.test.js +191 -0
- package/dist/lib/observability/metrics.test.js.map +1 -0
- package/dist/lib/observability/observability-test-constants.d.ts +34 -0
- package/dist/lib/observability/observability-test-constants.d.ts.map +1 -0
- package/dist/lib/observability/observability-test-constants.js +55 -0
- package/dist/lib/observability/observability-test-constants.js.map +1 -0
- package/dist/lib/observability/opentelemetry-resources.test.d.ts +2 -0
- package/dist/lib/observability/opentelemetry-resources.test.d.ts.map +1 -0
- package/dist/lib/observability/opentelemetry-resources.test.js +19 -0
- package/dist/lib/observability/opentelemetry-resources.test.js.map +1 -0
- package/dist/lib/observability/parse-stats.d.ts.map +1 -0
- package/dist/lib/observability/parse-stats.js +207 -0
- package/dist/lib/observability/parse-stats.js.map +1 -0
- package/dist/lib/observability/parse-stats.test.d.ts.map +1 -0
- package/dist/lib/observability/parse-stats.test.js +287 -0
- package/dist/lib/observability/parse-stats.test.js.map +1 -0
- package/dist/lib/observability/render-trace-tree.d.ts +31 -0
- package/dist/lib/observability/render-trace-tree.d.ts.map +1 -0
- package/dist/lib/observability/render-trace-tree.js +95 -0
- package/dist/lib/observability/render-trace-tree.js.map +1 -0
- package/dist/lib/observability/render-trace-tree.test.d.ts +5 -0
- package/dist/lib/observability/render-trace-tree.test.d.ts.map +1 -0
- package/dist/lib/observability/render-trace-tree.test.js +97 -0
- package/dist/lib/observability/render-trace-tree.test.js.map +1 -0
- package/dist/lib/observability/span-attributes.d.ts +27 -0
- package/dist/lib/observability/span-attributes.d.ts.map +1 -0
- package/dist/lib/observability/span-attributes.js +85 -0
- package/dist/lib/observability/span-attributes.js.map +1 -0
- package/dist/lib/observability/trace-anomaly-detector.d.ts +23 -0
- package/dist/lib/observability/trace-anomaly-detector.d.ts.map +1 -0
- package/dist/lib/observability/trace-anomaly-detector.js +211 -0
- package/dist/lib/observability/trace-anomaly-detector.js.map +1 -0
- package/dist/lib/observability/trace-anomaly-detector.test.d.ts +5 -0
- package/dist/lib/observability/trace-anomaly-detector.test.d.ts.map +1 -0
- package/dist/lib/observability/trace-anomaly-detector.test.js +224 -0
- package/dist/lib/observability/trace-anomaly-detector.test.js.map +1 -0
- package/dist/lib/observability/trace-anomaly-schemas.d.ts +189 -0
- package/dist/lib/observability/trace-anomaly-schemas.d.ts.map +1 -0
- package/dist/lib/observability/trace-anomaly-schemas.js +167 -0
- package/dist/lib/observability/trace-anomaly-schemas.js.map +1 -0
- package/dist/lib/privacy/content-redaction.d.ts +141 -0
- package/dist/lib/privacy/content-redaction.d.ts.map +1 -0
- package/dist/lib/privacy/content-redaction.js +210 -0
- package/dist/lib/privacy/content-redaction.js.map +1 -0
- package/dist/lib/privacy/content-redaction.test.d.ts +2 -0
- package/dist/lib/privacy/content-redaction.test.d.ts.map +1 -0
- package/dist/lib/privacy/content-redaction.test.js +302 -0
- package/dist/lib/privacy/content-redaction.test.js.map +1 -0
- package/dist/lib/quality/bucket-utils.d.ts +17 -0
- package/dist/lib/quality/bucket-utils.d.ts.map +1 -0
- package/dist/lib/quality/bucket-utils.js +31 -0
- package/dist/lib/quality/bucket-utils.js.map +1 -0
- package/dist/lib/quality/bucket-utils.test.d.ts +2 -0
- package/dist/lib/quality/bucket-utils.test.d.ts.map +1 -0
- package/dist/lib/quality/bucket-utils.test.js +42 -0
- package/dist/lib/quality/bucket-utils.test.js.map +1 -0
- package/dist/lib/quality/qfe-backtest-detail.test.d.ts +5 -0
- package/dist/lib/quality/qfe-backtest-detail.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-backtest-detail.test.js +179 -0
- package/dist/lib/quality/qfe-backtest-detail.test.js.map +1 -0
- package/dist/lib/quality/qfe-calibration-paths.test.d.ts +5 -0
- package/dist/lib/quality/qfe-calibration-paths.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-calibration-paths.test.js +203 -0
- package/dist/lib/quality/qfe-calibration-paths.test.js.map +1 -0
- package/dist/lib/quality/qfe-correlation-helpers.test.d.ts +6 -0
- package/dist/lib/quality/qfe-correlation-helpers.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-correlation-helpers.test.js +143 -0
- package/dist/lib/quality/qfe-correlation-helpers.test.js.map +1 -0
- package/dist/lib/quality/qfe-cqi-paths.test.d.ts +6 -0
- package/dist/lib/quality/qfe-cqi-paths.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-cqi-paths.test.js +231 -0
- package/dist/lib/quality/qfe-cqi-paths.test.js.map +1 -0
- package/dist/lib/quality/qfe-critic-internals.test.d.ts +6 -0
- package/dist/lib/quality/qfe-critic-internals.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-critic-internals.test.js +191 -0
- package/dist/lib/quality/qfe-critic-internals.test.js.map +1 -0
- package/dist/lib/quality/qfe-derived-paths.test.d.ts +2 -0
- package/dist/lib/quality/qfe-derived-paths.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-derived-paths.test.js +372 -0
- package/dist/lib/quality/qfe-derived-paths.test.js.map +1 -0
- package/dist/lib/quality/qfe-dynamics-paths.test.d.ts +8 -0
- package/dist/lib/quality/qfe-dynamics-paths.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-dynamics-paths.test.js +223 -0
- package/dist/lib/quality/qfe-dynamics-paths.test.js.map +1 -0
- package/dist/lib/quality/qfe-granger-internals.test.d.ts +6 -0
- package/dist/lib/quality/qfe-granger-internals.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-granger-internals.test.js +158 -0
- package/dist/lib/quality/qfe-granger-internals.test.js.map +1 -0
- package/dist/lib/quality/qfe-label-normalize.test.d.ts +7 -0
- package/dist/lib/quality/qfe-label-normalize.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-label-normalize.test.js +332 -0
- package/dist/lib/quality/qfe-label-normalize.test.js.map +1 -0
- package/dist/lib/quality/qfe-ordinal-edge.test.d.ts +6 -0
- package/dist/lib/quality/qfe-ordinal-edge.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-ordinal-edge.test.js +98 -0
- package/dist/lib/quality/qfe-ordinal-edge.test.js.map +1 -0
- package/dist/lib/quality/qfe-roles-detail.test.d.ts +5 -0
- package/dist/lib/quality/qfe-roles-detail.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-roles-detail.test.js +115 -0
- package/dist/lib/quality/qfe-roles-detail.test.js.map +1 -0
- package/dist/lib/quality/qfe-rolling-detail.test.d.ts +7 -0
- package/dist/lib/quality/qfe-rolling-detail.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-rolling-detail.test.js +249 -0
- package/dist/lib/quality/qfe-rolling-detail.test.js.map +1 -0
- package/dist/lib/quality/qfe-stats-internals.test.d.ts +7 -0
- package/dist/lib/quality/qfe-stats-internals.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-stats-internals.test.js +143 -0
- package/dist/lib/quality/qfe-stats-internals.test.js.map +1 -0
- package/dist/lib/quality/qfe-streaming.test.d.ts +5 -0
- package/dist/lib/quality/qfe-streaming.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-streaming.test.js +239 -0
- package/dist/lib/quality/qfe-streaming.test.js.map +1 -0
- package/dist/lib/quality/qfe-sweep-detail.test.d.ts +6 -0
- package/dist/lib/quality/qfe-sweep-detail.test.d.ts.map +1 -0
- package/dist/lib/quality/qfe-sweep-detail.test.js +291 -0
- package/dist/lib/quality/qfe-sweep-detail.test.js.map +1 -0
- package/dist/lib/quality/quality-alerts.d.ts +23 -0
- package/dist/lib/quality/quality-alerts.d.ts.map +1 -0
- package/dist/lib/quality/quality-alerts.js +89 -0
- package/dist/lib/quality/quality-alerts.js.map +1 -0
- package/dist/lib/quality/quality-alerts.test.d.ts +2 -0
- package/dist/lib/quality/quality-alerts.test.d.ts.map +1 -0
- package/dist/lib/quality/quality-alerts.test.js +86 -0
- package/dist/lib/quality/quality-alerts.test.js.map +1 -0
- package/dist/lib/quality/quality-constants.d.ts +294 -0
- package/dist/lib/quality/quality-constants.d.ts.map +1 -0
- package/dist/lib/quality/quality-constants.js +335 -0
- package/dist/lib/quality/quality-constants.js.map +1 -0
- package/dist/lib/quality/quality-feature-engineering.d.ts +1071 -0
- package/dist/lib/quality/quality-feature-engineering.d.ts.map +1 -0
- package/dist/lib/quality/quality-feature-engineering.js +2076 -0
- package/dist/lib/quality/quality-feature-engineering.js.map +1 -0
- package/dist/lib/quality/quality-feature-engineering.test.d.ts +5 -0
- package/dist/lib/quality/quality-feature-engineering.test.d.ts.map +1 -0
- package/dist/lib/quality/quality-feature-engineering.test.js +2908 -0
- package/dist/lib/quality/quality-feature-engineering.test.js.map +1 -0
- package/dist/lib/quality/quality-metrics.d.ts +943 -0
- package/dist/lib/quality/quality-metrics.d.ts.map +1 -0
- package/dist/lib/quality/quality-metrics.js +1151 -0
- package/dist/lib/quality/quality-metrics.js.map +1 -0
- package/dist/lib/quality/quality-metrics.test.d.ts +5 -0
- package/dist/lib/quality/quality-metrics.test.d.ts.map +1 -0
- package/dist/lib/quality/quality-metrics.test.js +2766 -0
- package/dist/lib/quality/quality-metrics.test.js.map +1 -0
- package/dist/lib/quality/quality-multi-agent.d.ts +106 -0
- package/dist/lib/quality/quality-multi-agent.d.ts.map +1 -0
- package/dist/lib/quality/quality-multi-agent.js +124 -0
- package/dist/lib/quality/quality-multi-agent.js.map +1 -0
- package/dist/lib/quality/quality-multi-agent.test.d.ts +6 -0
- package/dist/lib/quality/quality-multi-agent.test.d.ts.map +1 -0
- package/dist/lib/quality/quality-multi-agent.test.js +163 -0
- package/dist/lib/quality/quality-multi-agent.test.js.map +1 -0
- package/dist/lib/quality/quality-sla.d.ts +35 -0
- package/dist/lib/quality/quality-sla.d.ts.map +1 -0
- package/dist/lib/quality/quality-sla.js +62 -0
- package/dist/lib/quality/quality-sla.js.map +1 -0
- package/dist/lib/quality/quality-sla.test.d.ts +5 -0
- package/dist/lib/quality/quality-sla.test.d.ts.map +1 -0
- package/dist/lib/quality/quality-sla.test.js +144 -0
- package/dist/lib/quality/quality-sla.test.js.map +1 -0
- package/dist/lib/quality/quality-test-constants.d.ts +23 -0
- package/dist/lib/quality/quality-test-constants.d.ts.map +1 -0
- package/dist/lib/quality/quality-test-constants.js +25 -0
- package/dist/lib/quality/quality-test-constants.js.map +1 -0
- package/dist/lib/quality/quality-trends.d.ts +101 -0
- package/dist/lib/quality/quality-trends.d.ts.map +1 -0
- package/dist/lib/quality/quality-trends.js +299 -0
- package/dist/lib/quality/quality-trends.js.map +1 -0
- package/dist/lib/quality/quality-trends.test.d.ts +6 -0
- package/dist/lib/quality/quality-trends.test.d.ts.map +1 -0
- package/dist/lib/quality/quality-trends.test.js +377 -0
- package/dist/lib/quality/quality-trends.test.js.map +1 -0
- package/dist/lib/quality/quality-views.d.ts +966 -0
- package/dist/lib/quality/quality-views.d.ts.map +1 -0
- package/dist/lib/quality/quality-views.js +367 -0
- package/dist/lib/quality/quality-views.js.map +1 -0
- package/dist/lib/quality/quality-views.test.d.ts +6 -0
- package/dist/lib/quality/quality-views.test.d.ts.map +1 -0
- package/dist/lib/quality/quality-views.test.js +262 -0
- package/dist/lib/quality/quality-views.test.js.map +1 -0
- package/dist/lib/quality/quality-visualization.d.ts +112 -0
- package/dist/lib/quality/quality-visualization.d.ts.map +1 -0
- package/dist/lib/quality/quality-visualization.js +136 -0
- package/dist/lib/quality/quality-visualization.js.map +1 -0
- package/dist/lib/quality/quality-visualization.test.d.ts +5 -0
- package/dist/lib/quality/quality-visualization.test.d.ts.map +1 -0
- package/dist/lib/quality/quality-visualization.test.js +189 -0
- package/dist/lib/quality/quality-visualization.test.js.map +1 -0
- package/dist/lib/resilience/cache.d.ts +56 -0
- package/dist/lib/resilience/cache.d.ts.map +1 -0
- package/dist/lib/resilience/cache.js +96 -0
- package/dist/lib/resilience/cache.js.map +1 -0
- package/dist/lib/resilience/cache.test.d.ts.map +1 -0
- package/dist/lib/resilience/cache.test.js +106 -0
- package/dist/lib/resilience/cache.test.js.map +1 -0
- package/dist/lib/resilience/circuit-breaker.d.ts +147 -0
- package/dist/lib/resilience/circuit-breaker.d.ts.map +1 -0
- package/dist/lib/resilience/circuit-breaker.js +251 -0
- package/dist/lib/resilience/circuit-breaker.js.map +1 -0
- package/dist/lib/resilience/circuit-breaker.test.d.ts.map +1 -0
- package/dist/lib/resilience/circuit-breaker.test.js +266 -0
- package/dist/lib/resilience/circuit-breaker.test.js.map +1 -0
- package/dist/lib/resilience/toon-encoder.d.ts +31 -0
- package/dist/lib/resilience/toon-encoder.d.ts.map +1 -0
- package/dist/lib/resilience/toon-encoder.js +66 -0
- package/dist/lib/resilience/toon-encoder.js.map +1 -0
- package/dist/lib/resilience/toon-encoder.test.d.ts.map +1 -0
- package/dist/lib/resilience/toon-encoder.test.js +86 -0
- package/dist/lib/resilience/toon-encoder.test.js.map +1 -0
- package/dist/lib/testing/mock-llm-builder.d.ts +139 -0
- package/dist/lib/testing/mock-llm-builder.d.ts.map +1 -0
- package/dist/lib/testing/mock-llm-builder.js +254 -0
- package/dist/lib/testing/mock-llm-builder.js.map +1 -0
- package/dist/lib/testing/mock-llm-builder.test.d.ts +5 -0
- package/dist/lib/testing/mock-llm-builder.test.d.ts.map +1 -0
- package/dist/lib/testing/mock-llm-builder.test.js +304 -0
- package/dist/lib/testing/mock-llm-builder.test.js.map +1 -0
- package/dist/lib/validation/api-schemas.d.ts +705 -0
- package/dist/lib/validation/api-schemas.d.ts.map +1 -0
- package/dist/lib/validation/api-schemas.js +351 -0
- package/dist/lib/validation/api-schemas.js.map +1 -0
- package/dist/lib/validation/api-schemas.test.d.ts +5 -0
- package/dist/lib/validation/api-schemas.test.d.ts.map +1 -0
- package/dist/lib/validation/api-schemas.test.js +427 -0
- package/dist/lib/validation/api-schemas.test.js.map +1 -0
- package/dist/lib/validation/dashboard-schemas.d.ts +203 -0
- package/dist/lib/validation/dashboard-schemas.d.ts.map +1 -0
- package/dist/lib/validation/dashboard-schemas.js +186 -0
- package/dist/lib/validation/dashboard-schemas.js.map +1 -0
- package/dist/lib/validation/dashboard-schemas.test.d.ts +5 -0
- package/dist/lib/validation/dashboard-schemas.test.d.ts.map +1 -0
- package/dist/lib/validation/dashboard-schemas.test.js +353 -0
- package/dist/lib/validation/dashboard-schemas.test.js.map +1 -0
- package/dist/server.d.ts +2 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +158 -144
- package/dist/server.js.map +1 -1
- package/dist/server.test.js +102 -95
- package/dist/server.test.js.map +1 -1
- package/dist/test-helpers/assertions.d.ts +6 -0
- package/dist/test-helpers/assertions.d.ts.map +1 -0
- package/dist/test-helpers/assertions.js +11 -0
- package/dist/test-helpers/assertions.js.map +1 -0
- package/dist/test-helpers/env-utils.d.ts +0 -64
- package/dist/test-helpers/env-utils.d.ts.map +1 -1
- package/dist/test-helpers/env-utils.js +0 -100
- package/dist/test-helpers/env-utils.js.map +1 -1
- package/dist/test-helpers/fuzz-generators.d.ts.map +1 -1
- package/dist/test-helpers/fuzz-generators.js +62 -22
- package/dist/test-helpers/fuzz-generators.js.map +1 -1
- package/dist/test-helpers/index.d.ts +3 -2
- package/dist/test-helpers/index.d.ts.map +1 -1
- package/dist/test-helpers/index.js +4 -2
- package/dist/test-helpers/index.js.map +1 -1
- package/dist/test-helpers/memfs-utils.test.js +81 -76
- package/dist/test-helpers/memfs-utils.test.js.map +1 -1
- package/dist/test-helpers/mock-backends.d.ts +19 -17
- package/dist/test-helpers/mock-backends.d.ts.map +1 -1
- package/dist/test-helpers/mock-backends.js +16 -4
- package/dist/test-helpers/mock-backends.js.map +1 -1
- package/dist/test-helpers/mock-backends.test.js +43 -112
- package/dist/test-helpers/mock-backends.test.js.map +1 -1
- package/dist/test-helpers/race-condition-helpers.d.ts.map +1 -1
- package/dist/test-helpers/race-condition-helpers.js +3 -2
- package/dist/test-helpers/race-condition-helpers.js.map +1 -1
- package/dist/test-helpers/schema-validators.d.ts +2 -2
- package/dist/test-helpers/schema-validators.d.ts.map +1 -1
- package/dist/test-helpers/schema-validators.js +35 -31
- package/dist/test-helpers/schema-validators.js.map +1 -1
- package/dist/test-helpers/test-constants.d.ts +74 -0
- package/dist/test-helpers/test-constants.d.ts.map +1 -0
- package/dist/test-helpers/test-constants.js +78 -0
- package/dist/test-helpers/test-constants.js.map +1 -0
- package/dist/test-helpers/test-data-builders.d.ts +25 -7
- package/dist/test-helpers/test-data-builders.d.ts.map +1 -1
- package/dist/test-helpers/test-data-builders.js +32 -9
- package/dist/test-helpers/test-data-builders.js.map +1 -1
- package/dist/test-helpers/test-data-builders.test.js +116 -107
- package/dist/test-helpers/test-data-builders.test.js.map +1 -1
- package/dist/test-helpers/tool-validators.d.ts +1 -1
- package/dist/test-helpers/tool-validators.d.ts.map +1 -1
- package/dist/test-helpers/tool-validators.js +10 -10
- package/dist/test-helpers/tool-validators.js.map +1 -1
- package/dist/tools/audit-trail.d.ts +170 -0
- package/dist/tools/audit-trail.d.ts.map +1 -0
- package/dist/tools/audit-trail.js +109 -0
- package/dist/tools/audit-trail.js.map +1 -0
- package/dist/tools/audit-trail.test.d.ts +5 -0
- package/dist/tools/audit-trail.test.d.ts.map +1 -0
- package/dist/tools/audit-trail.test.js +122 -0
- package/dist/tools/audit-trail.test.js.map +1 -0
- package/dist/tools/context-stats.d.ts +6 -20
- package/dist/tools/context-stats.d.ts.map +1 -1
- package/dist/tools/context-stats.js +106 -88
- package/dist/tools/context-stats.js.map +1 -1
- package/dist/tools/context-stats.test.js +109 -60
- package/dist/tools/context-stats.test.js.map +1 -1
- package/dist/tools/detect-trace-anomalies.d.ts +123 -0
- package/dist/tools/detect-trace-anomalies.d.ts.map +1 -0
- package/dist/tools/detect-trace-anomalies.js +66 -0
- package/dist/tools/detect-trace-anomalies.js.map +1 -0
- package/dist/tools/estimate-cost.d.ts +77 -0
- package/dist/tools/estimate-cost.d.ts.map +1 -0
- package/dist/tools/estimate-cost.js +104 -0
- package/dist/tools/estimate-cost.js.map +1 -0
- package/dist/tools/estimate-cost.test.d.ts +5 -0
- package/dist/tools/estimate-cost.test.d.ts.map +1 -0
- package/dist/tools/estimate-cost.test.js +343 -0
- package/dist/tools/estimate-cost.test.js.map +1 -0
- package/dist/tools/export-base.d.ts +77 -0
- package/dist/tools/export-base.d.ts.map +1 -0
- package/dist/tools/export-base.js +150 -0
- package/dist/tools/export-base.js.map +1 -0
- package/dist/tools/export-base.test.d.ts +18 -0
- package/dist/tools/export-base.test.d.ts.map +1 -0
- package/dist/tools/export-base.test.js +220 -0
- package/dist/tools/export-base.test.js.map +1 -0
- package/dist/tools/export-confident.d.ts +94 -90
- package/dist/tools/export-confident.d.ts.map +1 -1
- package/dist/tools/export-confident.js +17 -115
- package/dist/tools/export-confident.js.map +1 -1
- package/dist/tools/export-confident.test.js +79 -75
- package/dist/tools/export-confident.test.js.map +1 -1
- package/dist/tools/export-datadog.d.ts +77 -116
- package/dist/tools/export-datadog.d.ts.map +1 -1
- package/dist/tools/export-datadog.js +38 -40
- package/dist/tools/export-datadog.js.map +1 -1
- package/dist/tools/export-datadog.test.js +122 -165
- package/dist/tools/export-datadog.test.js.map +1 -1
- package/dist/tools/export-jaeger.d.ts +100 -0
- package/dist/tools/export-jaeger.d.ts.map +1 -0
- package/dist/tools/export-jaeger.js +154 -0
- package/dist/tools/export-jaeger.js.map +1 -0
- package/dist/tools/export-jaeger.test.d.ts +2 -0
- package/dist/tools/export-jaeger.test.d.ts.map +1 -0
- package/dist/tools/export-jaeger.test.js +113 -0
- package/dist/tools/export-jaeger.test.js.map +1 -0
- package/dist/tools/export-langfuse.d.ts +78 -80
- package/dist/tools/export-langfuse.d.ts.map +1 -1
- package/dist/tools/export-langfuse.js +15 -113
- package/dist/tools/export-langfuse.js.map +1 -1
- package/dist/tools/export-langfuse.test.js +70 -81
- package/dist/tools/export-langfuse.test.js.map +1 -1
- package/dist/tools/export-phoenix.d.ts +115 -90
- package/dist/tools/export-phoenix.d.ts.map +1 -1
- package/dist/tools/export-phoenix.js +29 -117
- package/dist/tools/export-phoenix.js.map +1 -1
- package/dist/tools/export-phoenix.test.js +95 -94
- package/dist/tools/export-phoenix.test.js.map +1 -1
- package/dist/tools/get-trace-url.d.ts +2 -10
- package/dist/tools/get-trace-url.d.ts.map +1 -1
- package/dist/tools/get-trace-url.js +5 -8
- package/dist/tools/get-trace-url.js.map +1 -1
- package/dist/tools/get-trace-url.test.js +81 -399
- package/dist/tools/get-trace-url.test.js.map +1 -1
- package/dist/tools/hallucination-detection.d.ts +203 -0
- package/dist/tools/hallucination-detection.d.ts.map +1 -0
- package/dist/tools/hallucination-detection.js +189 -0
- package/dist/tools/hallucination-detection.js.map +1 -0
- package/dist/tools/hallucination-detection.test.d.ts +5 -0
- package/dist/tools/hallucination-detection.test.d.ts.map +1 -0
- package/dist/tools/hallucination-detection.test.js +529 -0
- package/dist/tools/hallucination-detection.test.js.map +1 -0
- package/dist/tools/health-check.d.ts +9 -16
- package/dist/tools/health-check.d.ts.map +1 -1
- package/dist/tools/health-check.js +88 -101
- package/dist/tools/health-check.js.map +1 -1
- package/dist/tools/health-check.test.js +72 -165
- package/dist/tools/health-check.test.js.map +1 -1
- package/dist/tools/index.d.ts +13 -0
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +13 -0
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/ingest-constants.d.ts +8 -0
- package/dist/tools/ingest-constants.d.ts.map +1 -0
- package/dist/tools/ingest-constants.js +8 -0
- package/dist/tools/ingest-constants.js.map +1 -0
- package/dist/tools/ingest-spans.d.ts +45 -0
- package/dist/tools/ingest-spans.d.ts.map +1 -0
- package/dist/tools/ingest-spans.js +129 -0
- package/dist/tools/ingest-spans.js.map +1 -0
- package/dist/tools/ingest-spans.test.d.ts +5 -0
- package/dist/tools/ingest-spans.test.d.ts.map +1 -0
- package/dist/tools/ingest-spans.test.js +250 -0
- package/dist/tools/ingest-spans.test.js.map +1 -0
- package/dist/tools/ingest-traces.d.ts +76 -0
- package/dist/tools/ingest-traces.d.ts.map +1 -0
- package/dist/tools/ingest-traces.js +164 -0
- package/dist/tools/ingest-traces.js.map +1 -0
- package/dist/tools/ingest-traces.test.d.ts +5 -0
- package/dist/tools/ingest-traces.test.d.ts.map +1 -0
- package/dist/tools/ingest-traces.test.js +483 -0
- package/dist/tools/ingest-traces.test.js.map +1 -0
- package/dist/tools/inject-evaluations.d.ts +136 -1197
- package/dist/tools/inject-evaluations.d.ts.map +1 -1
- package/dist/tools/inject-evaluations.js +65 -53
- package/dist/tools/inject-evaluations.js.map +1 -1
- package/dist/tools/inject-evaluations.test.js +83 -71
- package/dist/tools/inject-evaluations.test.js.map +1 -1
- package/dist/tools/manage-datasets.d.ts +850 -0
- package/dist/tools/manage-datasets.d.ts.map +1 -0
- package/dist/tools/manage-datasets.js +139 -0
- package/dist/tools/manage-datasets.js.map +1 -0
- package/dist/tools/manage-datasets.test.d.ts +5 -0
- package/dist/tools/manage-datasets.test.d.ts.map +1 -0
- package/dist/tools/manage-datasets.test.js +430 -0
- package/dist/tools/manage-datasets.test.js.map +1 -0
- package/dist/tools/multi-agent-coordination.d.ts +178 -0
- package/dist/tools/multi-agent-coordination.d.ts.map +1 -0
- package/dist/tools/multi-agent-coordination.js +270 -0
- package/dist/tools/multi-agent-coordination.js.map +1 -0
- package/dist/tools/multi-agent-coordination.test.d.ts +5 -0
- package/dist/tools/multi-agent-coordination.test.d.ts.map +1 -0
- package/dist/tools/multi-agent-coordination.test.js +530 -0
- package/dist/tools/multi-agent-coordination.test.js.map +1 -0
- package/dist/tools/query-evaluations.d.ts +147 -105
- package/dist/tools/query-evaluations.d.ts.map +1 -1
- package/dist/tools/query-evaluations.js +205 -178
- package/dist/tools/query-evaluations.js.map +1 -1
- package/dist/tools/query-evaluations.test.js +386 -391
- package/dist/tools/query-evaluations.test.js.map +1 -1
- package/dist/tools/query-llm-events.d.ts +100 -75
- package/dist/tools/query-llm-events.d.ts.map +1 -1
- package/dist/tools/query-llm-events.js +106 -80
- package/dist/tools/query-llm-events.js.map +1 -1
- package/dist/tools/query-llm-events.test.js +183 -346
- package/dist/tools/query-llm-events.test.js.map +1 -1
- package/dist/tools/query-logs.d.ts +45 -58
- package/dist/tools/query-logs.d.ts.map +1 -1
- package/dist/tools/query-logs.js +54 -101
- package/dist/tools/query-logs.js.map +1 -1
- package/dist/tools/query-logs.test.js +118 -314
- package/dist/tools/query-logs.test.js.map +1 -1
- package/dist/tools/query-metric-histograms.d.ts +112 -0
- package/dist/tools/query-metric-histograms.d.ts.map +1 -0
- package/dist/tools/query-metric-histograms.js +69 -0
- package/dist/tools/query-metric-histograms.js.map +1 -0
- package/dist/tools/query-metric-histograms.test.d.ts +5 -0
- package/dist/tools/query-metric-histograms.test.d.ts.map +1 -0
- package/dist/tools/query-metric-histograms.test.js +209 -0
- package/dist/tools/query-metric-histograms.test.js.map +1 -0
- package/dist/tools/query-metrics.d.ts +159 -60
- package/dist/tools/query-metrics.d.ts.map +1 -1
- package/dist/tools/query-metrics.js +133 -111
- package/dist/tools/query-metrics.js.map +1 -1
- package/dist/tools/query-metrics.test.js +314 -389
- package/dist/tools/query-metrics.test.js.map +1 -1
- package/dist/tools/query-regressions.d.ts +76 -0
- package/dist/tools/query-regressions.d.ts.map +1 -0
- package/dist/tools/query-regressions.js +122 -0
- package/dist/tools/query-regressions.js.map +1 -0
- package/dist/tools/query-regressions.test.d.ts +8 -0
- package/dist/tools/query-regressions.test.d.ts.map +1 -0
- package/dist/tools/query-regressions.test.js +129 -0
- package/dist/tools/query-regressions.test.js.map +1 -0
- package/dist/tools/query-traces.d.ts +103 -71
- package/dist/tools/query-traces.d.ts.map +1 -1
- package/dist/tools/query-traces.js +75 -106
- package/dist/tools/query-traces.js.map +1 -1
- package/dist/tools/query-traces.test.js +140 -846
- package/dist/tools/query-traces.test.js.map +1 -1
- package/dist/tools/query-verifications.d.ts +55 -43
- package/dist/tools/query-verifications.d.ts.map +1 -1
- package/dist/tools/query-verifications.js +47 -46
- package/dist/tools/query-verifications.js.map +1 -1
- package/dist/tools/query-verifications.test.js +42 -35
- package/dist/tools/query-verifications.test.js.map +1 -1
- package/dist/tools/routing-telemetry.d.ts +168 -0
- package/dist/tools/routing-telemetry.d.ts.map +1 -0
- package/dist/tools/routing-telemetry.js +267 -0
- package/dist/tools/routing-telemetry.js.map +1 -0
- package/dist/tools/routing-telemetry.test.d.ts +5 -0
- package/dist/tools/routing-telemetry.test.d.ts.map +1 -0
- package/dist/tools/routing-telemetry.test.js +747 -0
- package/dist/tools/routing-telemetry.test.js.map +1 -0
- package/dist/tools/setup-claudeignore.d.ts +4 -32
- package/dist/tools/setup-claudeignore.d.ts.map +1 -1
- package/dist/tools/setup-claudeignore.js +18 -22
- package/dist/tools/setup-claudeignore.js.map +1 -1
- package/dist/tools/setup-claudeignore.test.js +50 -49
- package/dist/tools/setup-claudeignore.test.js.map +1 -1
- package/dist/tools/token-budget.d.ts +170 -0
- package/dist/tools/token-budget.d.ts.map +1 -0
- package/dist/tools/token-budget.js +219 -0
- package/dist/tools/token-budget.js.map +1 -0
- package/dist/tools/token-budget.test.d.ts +5 -0
- package/dist/tools/token-budget.test.d.ts.map +1 -0
- package/dist/tools/token-budget.test.js +293 -0
- package/dist/tools/token-budget.test.js.map +1 -0
- package/package.json +72 -10
- package/dist/backends/local-jsonl.test.d.ts +0 -2
- package/dist/backends/local-jsonl.test.d.ts.map +0 -1
- package/dist/backends/local-jsonl.test.js +0 -4651
- package/dist/backends/local-jsonl.test.js.map +0 -1
- package/dist/backends/signoz-api-circuit-breaker.test.d.ts +0 -6
- package/dist/backends/signoz-api-circuit-breaker.test.d.ts.map +0 -1
- package/dist/backends/signoz-api-circuit-breaker.test.js +0 -548
- package/dist/backends/signoz-api-circuit-breaker.test.js.map +0 -1
- package/dist/backends/signoz-api-rate-limiter.test.d.ts +0 -6
- package/dist/backends/signoz-api-rate-limiter.test.d.ts.map +0 -1
- package/dist/backends/signoz-api-rate-limiter.test.js +0 -390
- package/dist/backends/signoz-api-rate-limiter.test.js.map +0 -1
- package/dist/backends/signoz-api-ssrf.test.d.ts +0 -6
- package/dist/backends/signoz-api-ssrf.test.d.ts.map +0 -1
- package/dist/backends/signoz-api-ssrf.test.js +0 -216
- package/dist/backends/signoz-api-ssrf.test.js.map +0 -1
- package/dist/backends/signoz-api-test-helpers.d.ts +0 -80
- package/dist/backends/signoz-api-test-helpers.d.ts.map +0 -1
- package/dist/backends/signoz-api-test-helpers.js +0 -79
- package/dist/backends/signoz-api-test-helpers.js.map +0 -1
- package/dist/backends/signoz-api.d.ts +0 -109
- package/dist/backends/signoz-api.d.ts.map +0 -1
- package/dist/backends/signoz-api.integration.test.d.ts +0 -8
- package/dist/backends/signoz-api.integration.test.d.ts.map +0 -1
- package/dist/backends/signoz-api.integration.test.js +0 -137
- package/dist/backends/signoz-api.integration.test.js.map +0 -1
- package/dist/backends/signoz-api.js +0 -1132
- package/dist/backends/signoz-api.js.map +0 -1
- package/dist/backends/signoz-api.test.d.ts +0 -11
- package/dist/backends/signoz-api.test.d.ts.map +0 -1
- package/dist/backends/signoz-api.test.js +0 -832
- package/dist/backends/signoz-api.test.js.map +0 -1
- package/dist/lib/agent-as-judge.d.ts +0 -388
- package/dist/lib/agent-as-judge.d.ts.map +0 -1
- package/dist/lib/agent-as-judge.js +0 -740
- package/dist/lib/agent-as-judge.js.map +0 -1
- package/dist/lib/agent-as-judge.test.d.ts.map +0 -1
- package/dist/lib/agent-as-judge.test.js +0 -816
- package/dist/lib/agent-as-judge.test.js.map +0 -1
- package/dist/lib/cache.d.ts +0 -90
- package/dist/lib/cache.d.ts.map +0 -1
- package/dist/lib/cache.js +0 -133
- package/dist/lib/cache.js.map +0 -1
- package/dist/lib/cache.test.d.ts.map +0 -1
- package/dist/lib/cache.test.js +0 -105
- package/dist/lib/cache.test.js.map +0 -1
- package/dist/lib/circuit-breaker.d.ts +0 -101
- package/dist/lib/circuit-breaker.d.ts.map +0 -1
- package/dist/lib/circuit-breaker.js +0 -158
- package/dist/lib/circuit-breaker.js.map +0 -1
- package/dist/lib/circuit-breaker.test.d.ts.map +0 -1
- package/dist/lib/circuit-breaker.test.js +0 -263
- package/dist/lib/circuit-breaker.test.js.map +0 -1
- package/dist/lib/confident-export.d.ts +0 -101
- package/dist/lib/confident-export.d.ts.map +0 -1
- package/dist/lib/confident-export.js +0 -393
- package/dist/lib/confident-export.js.map +0 -1
- package/dist/lib/confident-export.test.d.ts.map +0 -1
- package/dist/lib/confident-export.test.js +0 -835
- package/dist/lib/confident-export.test.js.map +0 -1
- package/dist/lib/constants-symlink.test.d.ts.map +0 -1
- package/dist/lib/constants-symlink.test.js +0 -357
- package/dist/lib/constants-symlink.test.js.map +0 -1
- package/dist/lib/constants.d.ts +0 -183
- package/dist/lib/constants.d.ts.map +0 -1
- package/dist/lib/constants.js +0 -453
- package/dist/lib/constants.js.map +0 -1
- package/dist/lib/constants.test.d.ts.map +0 -1
- package/dist/lib/constants.test.js +0 -717
- package/dist/lib/constants.test.js.map +0 -1
- package/dist/lib/datadog-export.d.ts +0 -156
- package/dist/lib/datadog-export.d.ts.map +0 -1
- package/dist/lib/datadog-export.js +0 -464
- package/dist/lib/datadog-export.js.map +0 -1
- package/dist/lib/datadog-export.test.d.ts +0 -14
- package/dist/lib/datadog-export.test.d.ts.map +0 -1
- package/dist/lib/datadog-export.test.js +0 -890
- package/dist/lib/datadog-export.test.js.map +0 -1
- package/dist/lib/edge-cases.test.d.ts.map +0 -1
- package/dist/lib/edge-cases.test.js +0 -634
- package/dist/lib/edge-cases.test.js.map +0 -1
- package/dist/lib/error-sanitizer.d.ts +0 -57
- package/dist/lib/error-sanitizer.d.ts.map +0 -1
- package/dist/lib/error-sanitizer.js +0 -233
- package/dist/lib/error-sanitizer.js.map +0 -1
- package/dist/lib/error-sanitizer.test.d.ts.map +0 -1
- package/dist/lib/error-sanitizer.test.js +0 -528
- package/dist/lib/error-sanitizer.test.js.map +0 -1
- package/dist/lib/error-types.d.ts +0 -54
- package/dist/lib/error-types.d.ts.map +0 -1
- package/dist/lib/error-types.js +0 -154
- package/dist/lib/error-types.js.map +0 -1
- package/dist/lib/error-types.test.d.ts.map +0 -1
- package/dist/lib/error-types.test.js +0 -196
- package/dist/lib/error-types.test.js.map +0 -1
- package/dist/lib/evaluation-hooks.d.ts +0 -49
- package/dist/lib/evaluation-hooks.d.ts.map +0 -1
- package/dist/lib/evaluation-hooks.js +0 -488
- package/dist/lib/evaluation-hooks.js.map +0 -1
- package/dist/lib/evaluation-hooks.test.d.ts.map +0 -1
- package/dist/lib/evaluation-hooks.test.js +0 -624
- package/dist/lib/evaluation-hooks.test.js.map +0 -1
- package/dist/lib/export-utils.d.ts +0 -99
- package/dist/lib/export-utils.d.ts.map +0 -1
- package/dist/lib/export-utils.js +0 -238
- package/dist/lib/export-utils.js.map +0 -1
- package/dist/lib/export-utils.test.d.ts.map +0 -1
- package/dist/lib/export-utils.test.js +0 -193
- package/dist/lib/export-utils.test.js.map +0 -1
- package/dist/lib/file-utils.d.ts +0 -320
- package/dist/lib/file-utils.d.ts.map +0 -1
- package/dist/lib/file-utils.js +0 -816
- package/dist/lib/file-utils.js.map +0 -1
- package/dist/lib/file-utils.test.d.ts.map +0 -1
- package/dist/lib/file-utils.test.js +0 -1333
- package/dist/lib/file-utils.test.js.map +0 -1
- package/dist/lib/histogram.d.ts +0 -119
- package/dist/lib/histogram.d.ts.map +0 -1
- package/dist/lib/histogram.js +0 -202
- package/dist/lib/histogram.js.map +0 -1
- package/dist/lib/histogram.test.d.ts.map +0 -1
- package/dist/lib/histogram.test.js +0 -381
- package/dist/lib/histogram.test.js.map +0 -1
- package/dist/lib/indexer.d.ts +0 -96
- package/dist/lib/indexer.d.ts.map +0 -1
- package/dist/lib/indexer.js +0 -353
- package/dist/lib/indexer.js.map +0 -1
- package/dist/lib/indexer.test.d.ts.map +0 -1
- package/dist/lib/indexer.test.js +0 -696
- package/dist/lib/indexer.test.js.map +0 -1
- package/dist/lib/input-validator.d.ts +0 -115
- package/dist/lib/input-validator.d.ts.map +0 -1
- package/dist/lib/input-validator.fuzz.test.d.ts.map +0 -1
- package/dist/lib/input-validator.fuzz.test.js +0 -290
- package/dist/lib/input-validator.fuzz.test.js.map +0 -1
- package/dist/lib/input-validator.js +0 -304
- package/dist/lib/input-validator.js.map +0 -1
- package/dist/lib/input-validator.test.d.ts.map +0 -1
- package/dist/lib/input-validator.test.js +0 -415
- package/dist/lib/input-validator.test.js.map +0 -1
- package/dist/lib/instrumentation.d.ts +0 -153
- package/dist/lib/instrumentation.d.ts.map +0 -1
- package/dist/lib/instrumentation.integration.test.d.ts.map +0 -1
- package/dist/lib/instrumentation.integration.test.js +0 -589
- package/dist/lib/instrumentation.integration.test.js.map +0 -1
- package/dist/lib/instrumentation.js +0 -520
- package/dist/lib/instrumentation.js.map +0 -1
- package/dist/lib/instrumentation.test.d.ts.map +0 -1
- package/dist/lib/instrumentation.test.js +0 -821
- package/dist/lib/instrumentation.test.js.map +0 -1
- package/dist/lib/langfuse-export.d.ts +0 -125
- package/dist/lib/langfuse-export.d.ts.map +0 -1
- package/dist/lib/langfuse-export.js +0 -367
- package/dist/lib/langfuse-export.js.map +0 -1
- package/dist/lib/langfuse-export.test.d.ts.map +0 -1
- package/dist/lib/langfuse-export.test.js +0 -1007
- package/dist/lib/langfuse-export.test.js.map +0 -1
- package/dist/lib/llm-as-judge.d.ts +0 -657
- package/dist/lib/llm-as-judge.d.ts.map +0 -1
- package/dist/lib/llm-as-judge.js +0 -1397
- package/dist/lib/llm-as-judge.js.map +0 -1
- package/dist/lib/llm-as-judge.test.d.ts.map +0 -1
- package/dist/lib/llm-as-judge.test.js +0 -2409
- package/dist/lib/llm-as-judge.test.js.map +0 -1
- package/dist/lib/logger.d.ts +0 -46
- package/dist/lib/logger.d.ts.map +0 -1
- package/dist/lib/logger.js +0 -81
- package/dist/lib/logger.js.map +0 -1
- package/dist/lib/logger.test.d.ts.map +0 -1
- package/dist/lib/logger.test.js.map +0 -1
- package/dist/lib/metrics.d.ts +0 -62
- package/dist/lib/metrics.d.ts.map +0 -1
- package/dist/lib/metrics.js +0 -166
- package/dist/lib/metrics.js.map +0 -1
- package/dist/lib/metrics.test.d.ts.map +0 -1
- package/dist/lib/metrics.test.js +0 -189
- package/dist/lib/metrics.test.js.map +0 -1
- package/dist/lib/otlp-export.d.ts +0 -178
- package/dist/lib/otlp-export.d.ts.map +0 -1
- package/dist/lib/otlp-export.js +0 -382
- package/dist/lib/otlp-export.js.map +0 -1
- package/dist/lib/parse-stats.d.ts.map +0 -1
- package/dist/lib/parse-stats.js +0 -206
- package/dist/lib/parse-stats.js.map +0 -1
- package/dist/lib/parse-stats.test.d.ts.map +0 -1
- package/dist/lib/parse-stats.test.js +0 -283
- package/dist/lib/parse-stats.test.js.map +0 -1
- package/dist/lib/phoenix-export.d.ts +0 -109
- package/dist/lib/phoenix-export.d.ts.map +0 -1
- package/dist/lib/phoenix-export.js +0 -429
- package/dist/lib/phoenix-export.js.map +0 -1
- package/dist/lib/phoenix-export.test.d.ts.map +0 -1
- package/dist/lib/phoenix-export.test.js +0 -725
- package/dist/lib/phoenix-export.test.js.map +0 -1
- package/dist/lib/query-sanitizer.d.ts.map +0 -1
- package/dist/lib/query-sanitizer.js +0 -261
- package/dist/lib/query-sanitizer.js.map +0 -1
- package/dist/lib/query-sanitizer.test.d.ts.map +0 -1
- package/dist/lib/query-sanitizer.test.js +0 -400
- package/dist/lib/query-sanitizer.test.js.map +0 -1
- package/dist/lib/server-utils.d.ts +0 -93
- package/dist/lib/server-utils.d.ts.map +0 -1
- package/dist/lib/server-utils.js +0 -181
- package/dist/lib/server-utils.js.map +0 -1
- package/dist/lib/shared-schemas.d.ts +0 -87
- package/dist/lib/shared-schemas.d.ts.map +0 -1
- package/dist/lib/shared-schemas.js +0 -87
- package/dist/lib/shared-schemas.js.map +0 -1
- package/dist/lib/shared-schemas.test.d.ts.map +0 -1
- package/dist/lib/shared-schemas.test.js +0 -106
- package/dist/lib/shared-schemas.test.js.map +0 -1
- package/dist/lib/toon-encoder.d.ts +0 -26
- package/dist/lib/toon-encoder.d.ts.map +0 -1
- package/dist/lib/toon-encoder.js +0 -61
- package/dist/lib/toon-encoder.js.map +0 -1
- package/dist/lib/toon-encoder.test.d.ts.map +0 -1
- package/dist/lib/toon-encoder.test.js +0 -85
- package/dist/lib/toon-encoder.test.js.map +0 -1
- package/dist/lib/verification-events.d.ts +0 -100
- package/dist/lib/verification-events.d.ts.map +0 -1
- package/dist/lib/verification-events.js +0 -162
- package/dist/lib/verification-events.js.map +0 -1
- package/dist/lib/verification-events.test.d.ts.map +0 -1
- package/dist/lib/verification-events.test.js +0 -193
- package/dist/lib/verification-events.test.js.map +0 -1
- package/dist/tools/signoz.integration.test.d.ts +0 -8
- package/dist/tools/signoz.integration.test.d.ts.map +0 -1
- package/dist/tools/signoz.integration.test.js +0 -141
- package/dist/tools/signoz.integration.test.js.map +0 -1
- package/dist/types/evaluation-hooks.d.ts +0 -176
- package/dist/types/evaluation-hooks.d.ts.map +0 -1
- package/dist/types/evaluation-hooks.js +0 -49
- package/dist/types/evaluation-hooks.js.map +0 -1
- /package/dist/lib/{agent-as-judge.test.d.ts → agent-judge/agent-as-judge.test.d.ts} +0 -0
- /package/dist/lib/{verification-events.test.d.ts → audit/verification-events.test.d.ts} +0 -0
- /package/dist/lib/{constants-symlink.test.d.ts → core/constants-symlink.test.d.ts} +0 -0
- /package/dist/lib/{constants.test.d.ts → core/constants.test.d.ts} +0 -0
- /package/dist/lib/{edge-cases.test.d.ts → core/edge-cases.test.d.ts} +0 -0
- /package/dist/lib/{file-utils.test.d.ts → core/file-utils.test.d.ts} +0 -0
- /package/dist/lib/{input-validator.fuzz.test.d.ts → core/input-validator.fuzz.test.d.ts} +0 -0
- /package/dist/lib/{input-validator.test.d.ts → core/input-validator.test.d.ts} +0 -0
- /package/dist/lib/{logger.test.d.ts → core/logger.test.d.ts} +0 -0
- /package/dist/lib/{logger.test.js → core/logger.test.js} +0 -0
- /package/dist/lib/{shared-schemas.test.d.ts → core/shared-schemas.test.d.ts} +0 -0
- /package/dist/lib/{error-sanitizer.test.d.ts → errors/error-sanitizer.test.d.ts} +0 -0
- /package/dist/lib/{error-types.test.d.ts → errors/error-types.test.d.ts} +0 -0
- /package/dist/lib/{query-sanitizer.d.ts → errors/query-sanitizer.d.ts} +0 -0
- /package/dist/lib/{query-sanitizer.test.d.ts → errors/query-sanitizer.test.d.ts} +0 -0
- /package/dist/lib/{confident-export.test.d.ts → exports/confident-export.test.d.ts} +0 -0
- /package/dist/lib/{export-utils.test.d.ts → exports/export-utils.test.d.ts} +0 -0
- /package/dist/lib/{langfuse-export.test.d.ts → exports/langfuse-export.test.d.ts} +0 -0
- /package/dist/lib/{phoenix-export.test.d.ts → exports/phoenix-export.test.d.ts} +0 -0
- /package/dist/lib/{evaluation-hooks.test.d.ts → judge/evaluation-hooks.test.d.ts} +0 -0
- /package/dist/lib/{llm-as-judge.test.d.ts → judge/llm-as-judge.test.d.ts} +0 -0
- /package/dist/lib/{histogram.test.d.ts → observability/histogram.test.d.ts} +0 -0
- /package/dist/lib/{indexer.test.d.ts → observability/indexer.test.d.ts} +0 -0
- /package/dist/lib/{instrumentation.integration.test.d.ts → observability/instrumentation.integration.test.d.ts} +0 -0
- /package/dist/lib/{instrumentation.test.d.ts → observability/instrumentation.test.d.ts} +0 -0
- /package/dist/lib/{metrics.test.d.ts → observability/metrics.test.d.ts} +0 -0
- /package/dist/lib/{parse-stats.d.ts → observability/parse-stats.d.ts} +0 -0
- /package/dist/lib/{parse-stats.test.d.ts → observability/parse-stats.test.d.ts} +0 -0
- /package/dist/lib/{cache.test.d.ts → resilience/cache.test.d.ts} +0 -0
- /package/dist/lib/{circuit-breaker.test.d.ts → resilience/circuit-breaker.test.d.ts} +0 -0
- /package/dist/lib/{toon-encoder.test.d.ts → resilience/toon-encoder.test.d.ts} +0 -0
|
@@ -0,0 +1,973 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DAGEval Tests — RED phase
|
|
3
|
+
*
|
|
4
|
+
* Tests for the not-yet-implemented dagEval function and DAGEvalConfig types.
|
|
5
|
+
* All tests are expected to fail until llm-judge-dag.ts is implemented.
|
|
6
|
+
*/
|
|
7
|
+
import { describe, it } from 'vitest';
|
|
8
|
+
import assert from 'node:assert';
|
|
9
|
+
import { dagEval, validateDAGConfig, } from './llm-judge-dag.js';
|
|
10
|
+
// ============================================================================
|
|
11
|
+
// Constants
|
|
12
|
+
// ============================================================================
|
|
13
|
+
const DAG_NORMALIZED_MIN = 0;
|
|
14
|
+
const DAG_NORMALIZED_MAX = 1;
|
|
15
|
+
const DAG_SCORE_MID = 5;
|
|
16
|
+
const DAG_NORMALIZED_MID = 0.5;
|
|
17
|
+
const DAG_SCORE_TEN = 10;
|
|
18
|
+
const DAG_SCORE_ZERO = 0;
|
|
19
|
+
const FLOAT_TOLERANCE = 1e-9;
|
|
20
|
+
const SHORT_TIMEOUT_MS = 10;
|
|
21
|
+
const CONDITIONAL_THRESHOLD = 5;
|
|
22
|
+
const EDGE_WEIGHT_HALF = 0.5;
|
|
23
|
+
const MAX_DEPTH_TWO = 2;
|
|
24
|
+
// ============================================================================
|
|
25
|
+
// Mock LLM Provider
|
|
26
|
+
// ============================================================================
|
|
27
|
+
function createMockLLM(responses) {
|
|
28
|
+
let callIndex = 0;
|
|
29
|
+
return {
|
|
30
|
+
async generate(_prompt, _options) {
|
|
31
|
+
const response = responses[callIndex] ?? responses[responses.length - 1];
|
|
32
|
+
callIndex++;
|
|
33
|
+
return { text: response };
|
|
34
|
+
},
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
function createHangingLLM() {
|
|
38
|
+
return {
|
|
39
|
+
generate(_prompt, options) {
|
|
40
|
+
return new Promise((_resolve, _reject) => {
|
|
41
|
+
// Respect abort signal if provided
|
|
42
|
+
if (options?.signal) {
|
|
43
|
+
options.signal.addEventListener('abort', () => {
|
|
44
|
+
_reject(new Error('AbortError'));
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
// Otherwise hang forever
|
|
48
|
+
});
|
|
49
|
+
},
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
// ============================================================================
|
|
53
|
+
// Fixture Builders
|
|
54
|
+
// ============================================================================
|
|
55
|
+
const VERDICT_PASS = {
|
|
56
|
+
type: 'verdict',
|
|
57
|
+
score: DAG_SCORE_TEN,
|
|
58
|
+
label: 'pass',
|
|
59
|
+
};
|
|
60
|
+
const VERDICT_FAIL = {
|
|
61
|
+
type: 'verdict',
|
|
62
|
+
score: DAG_SCORE_ZERO,
|
|
63
|
+
label: 'fail',
|
|
64
|
+
};
|
|
65
|
+
const VERDICT_PARTIAL = {
|
|
66
|
+
type: 'verdict',
|
|
67
|
+
score: DAG_SCORE_MID,
|
|
68
|
+
label: 'partial',
|
|
69
|
+
};
|
|
70
|
+
const BASE_TEST_CASE = {
|
|
71
|
+
input: 'What is AI?',
|
|
72
|
+
output: 'AI is artificial intelligence.',
|
|
73
|
+
};
|
|
74
|
+
// ============================================================================
|
|
75
|
+
// Basic Traversal
|
|
76
|
+
// ============================================================================
|
|
77
|
+
describe('dagEval — basic traversal', () => {
|
|
78
|
+
it('should traverse a single VerdictNode and return normalized score', async () => {
|
|
79
|
+
const config = {
|
|
80
|
+
name: 'single-verdict',
|
|
81
|
+
nodes: new Map([['root', VERDICT_PASS]]),
|
|
82
|
+
rootId: 'root',
|
|
83
|
+
};
|
|
84
|
+
const result = await dagEval(createMockLLM([]), config, BASE_TEST_CASE);
|
|
85
|
+
assert.ok(result.score >= DAG_NORMALIZED_MIN && result.score <= DAG_NORMALIZED_MAX, `score ${result.score} should be in [0, 1]`);
|
|
86
|
+
assert.ok(Math.abs(result.score - DAG_NORMALIZED_MAX) < FLOAT_TOLERANCE, `score 10/10 should normalize to 1.0, got ${result.score}`);
|
|
87
|
+
});
|
|
88
|
+
it('should normalize VerdictNode score 0 to 0.0', async () => {
|
|
89
|
+
const config = {
|
|
90
|
+
name: 'verdict-zero',
|
|
91
|
+
nodes: new Map([['root', VERDICT_FAIL]]),
|
|
92
|
+
rootId: 'root',
|
|
93
|
+
};
|
|
94
|
+
const result = await dagEval(createMockLLM([]), config, BASE_TEST_CASE);
|
|
95
|
+
assert.ok(Math.abs(result.score - DAG_NORMALIZED_MIN) < FLOAT_TOLERANCE, `score 0/10 should normalize to 0.0, got ${result.score}`);
|
|
96
|
+
});
|
|
97
|
+
it('should normalize VerdictNode score 5 to 0.5', async () => {
|
|
98
|
+
const config = {
|
|
99
|
+
name: 'verdict-mid',
|
|
100
|
+
nodes: new Map([['root', VERDICT_PARTIAL]]),
|
|
101
|
+
rootId: 'root',
|
|
102
|
+
};
|
|
103
|
+
const result = await dagEval(createMockLLM([]), config, BASE_TEST_CASE);
|
|
104
|
+
assert.ok(Math.abs(result.score - DAG_NORMALIZED_MID) < FLOAT_TOLERANCE, `score 5/10 should normalize to 0.5, got ${result.score}`);
|
|
105
|
+
});
|
|
106
|
+
it('should follow BinaryJudgementNode true branch when LLM responds true', async () => {
|
|
107
|
+
const trueChild = { type: 'verdict', score: DAG_SCORE_TEN, label: 'true-branch' };
|
|
108
|
+
const falseChild = { type: 'verdict', score: DAG_SCORE_ZERO, label: 'false-branch' };
|
|
109
|
+
const judgeNode = {
|
|
110
|
+
type: 'binary_judgement',
|
|
111
|
+
criteria: 'Is the response relevant?',
|
|
112
|
+
trueChild: 'true-node',
|
|
113
|
+
falseChild: 'false-node',
|
|
114
|
+
};
|
|
115
|
+
const config = {
|
|
116
|
+
name: 'binary-true',
|
|
117
|
+
nodes: new Map([
|
|
118
|
+
['root', judgeNode],
|
|
119
|
+
['true-node', trueChild],
|
|
120
|
+
['false-node', falseChild],
|
|
121
|
+
]),
|
|
122
|
+
rootId: 'root',
|
|
123
|
+
};
|
|
124
|
+
const result = await dagEval(createMockLLM(['true']), config, BASE_TEST_CASE);
|
|
125
|
+
assert.ok(Math.abs(result.score - DAG_NORMALIZED_MAX) < FLOAT_TOLERANCE, `true branch should yield score 1.0, got ${result.score}`);
|
|
126
|
+
});
|
|
127
|
+
it('should follow BinaryJudgementNode false branch when LLM responds false', async () => {
|
|
128
|
+
const trueChild = { type: 'verdict', score: DAG_SCORE_TEN, label: 'true-branch' };
|
|
129
|
+
const falseChild = { type: 'verdict', score: DAG_SCORE_ZERO, label: 'false-branch' };
|
|
130
|
+
const judgeNode = {
|
|
131
|
+
type: 'binary_judgement',
|
|
132
|
+
criteria: 'Is the response relevant?',
|
|
133
|
+
trueChild: 'true-node',
|
|
134
|
+
falseChild: 'false-node',
|
|
135
|
+
};
|
|
136
|
+
const config = {
|
|
137
|
+
name: 'binary-false',
|
|
138
|
+
nodes: new Map([
|
|
139
|
+
['root', judgeNode],
|
|
140
|
+
['true-node', trueChild],
|
|
141
|
+
['false-node', falseChild],
|
|
142
|
+
]),
|
|
143
|
+
rootId: 'root',
|
|
144
|
+
};
|
|
145
|
+
const result = await dagEval(createMockLLM(['false']), config, BASE_TEST_CASE);
|
|
146
|
+
assert.ok(Math.abs(result.score - DAG_NORMALIZED_MIN) < FLOAT_TOLERANCE, `false branch should yield score 0.0, got ${result.score}`);
|
|
147
|
+
});
|
|
148
|
+
it('should follow NonBinaryJudgementNode to matching verdict child', async () => {
|
|
149
|
+
const excellentVerdict = { type: 'verdict', score: DAG_SCORE_TEN, label: 'excellent' };
|
|
150
|
+
const poorVerdict = { type: 'verdict', score: DAG_SCORE_ZERO, label: 'poor' };
|
|
151
|
+
const nonBinaryNode = {
|
|
152
|
+
type: 'non_binary_judgement',
|
|
153
|
+
criteria: 'Rate the quality',
|
|
154
|
+
verdicts: new Map([
|
|
155
|
+
['excellent', 'excellent-node'],
|
|
156
|
+
['poor', 'poor-node'],
|
|
157
|
+
]),
|
|
158
|
+
};
|
|
159
|
+
const config = {
|
|
160
|
+
name: 'non-binary',
|
|
161
|
+
nodes: new Map([
|
|
162
|
+
['root', nonBinaryNode],
|
|
163
|
+
['excellent-node', excellentVerdict],
|
|
164
|
+
['poor-node', poorVerdict],
|
|
165
|
+
]),
|
|
166
|
+
rootId: 'root',
|
|
167
|
+
};
|
|
168
|
+
const result = await dagEval(createMockLLM(['excellent']), config, BASE_TEST_CASE);
|
|
169
|
+
assert.ok(Math.abs(result.score - DAG_NORMALIZED_MAX) < FLOAT_TOLERANCE, `'excellent' verdict should yield score 1.0, got ${result.score}`);
|
|
170
|
+
});
|
|
171
|
+
it('should use TaskNode output as transformed input for next node', async () => {
|
|
172
|
+
const taskNode = {
|
|
173
|
+
type: 'task',
|
|
174
|
+
instruction: 'Extract the main topic from the output',
|
|
175
|
+
next: 'judge-node',
|
|
176
|
+
};
|
|
177
|
+
const judgeNode = {
|
|
178
|
+
type: 'binary_judgement',
|
|
179
|
+
criteria: 'Is the topic relevant?',
|
|
180
|
+
trueChild: 'pass-node',
|
|
181
|
+
falseChild: 'fail-node',
|
|
182
|
+
};
|
|
183
|
+
const config = {
|
|
184
|
+
name: 'task-then-judge',
|
|
185
|
+
nodes: new Map([
|
|
186
|
+
['root', taskNode],
|
|
187
|
+
['judge-node', judgeNode],
|
|
188
|
+
['pass-node', VERDICT_PASS],
|
|
189
|
+
['fail-node', VERDICT_FAIL],
|
|
190
|
+
]),
|
|
191
|
+
rootId: 'root',
|
|
192
|
+
};
|
|
193
|
+
// First LLM call: TaskNode transformation. Second: BinaryJudgementNode.
|
|
194
|
+
const result = await dagEval(createMockLLM(['artificial intelligence', 'true']), config, BASE_TEST_CASE);
|
|
195
|
+
assert.ok(result.score >= DAG_NORMALIZED_MIN && result.score <= DAG_NORMALIZED_MAX);
|
|
196
|
+
});
|
|
197
|
+
it('should traverse a multi-hop DAG and reach the correct VerdictNode', async () => {
|
|
198
|
+
const firstJudge = {
|
|
199
|
+
type: 'binary_judgement',
|
|
200
|
+
criteria: 'Is the output non-empty?',
|
|
201
|
+
trueChild: 'second-judge',
|
|
202
|
+
falseChild: 'fail-node',
|
|
203
|
+
};
|
|
204
|
+
const secondJudge = {
|
|
205
|
+
type: 'binary_judgement',
|
|
206
|
+
criteria: 'Is the output accurate?',
|
|
207
|
+
trueChild: 'pass-node',
|
|
208
|
+
falseChild: 'partial-node',
|
|
209
|
+
};
|
|
210
|
+
const config = {
|
|
211
|
+
name: 'multi-hop',
|
|
212
|
+
nodes: new Map([
|
|
213
|
+
['root', firstJudge],
|
|
214
|
+
['second-judge', secondJudge],
|
|
215
|
+
['pass-node', VERDICT_PASS],
|
|
216
|
+
['fail-node', VERDICT_FAIL],
|
|
217
|
+
['partial-node', VERDICT_PARTIAL],
|
|
218
|
+
]),
|
|
219
|
+
rootId: 'root',
|
|
220
|
+
};
|
|
221
|
+
const result = await dagEval(createMockLLM(['true', 'true']), config, BASE_TEST_CASE);
|
|
222
|
+
assert.ok(Math.abs(result.score - DAG_NORMALIZED_MAX) < FLOAT_TOLERANCE, `two true branches should reach pass verdict (1.0), got ${result.score}`);
|
|
223
|
+
});
|
|
224
|
+
});
|
|
225
|
+
// ============================================================================
|
|
226
|
+
// Explanation Trail
|
|
227
|
+
// ============================================================================
|
|
228
|
+
describe('dagEval — explanation trail', () => {
|
|
229
|
+
it('should return a non-empty reason string', async () => {
|
|
230
|
+
const config = {
|
|
231
|
+
name: 'reason-check',
|
|
232
|
+
nodes: new Map([['root', VERDICT_PASS]]),
|
|
233
|
+
rootId: 'root',
|
|
234
|
+
};
|
|
235
|
+
const result = await dagEval(createMockLLM([]), config, BASE_TEST_CASE);
|
|
236
|
+
assert.ok(typeof result.reason === 'string' && result.reason.length > 0, 'reason should be a non-empty string');
|
|
237
|
+
});
|
|
238
|
+
it('should include the verdict label in the reason', async () => {
|
|
239
|
+
const config = {
|
|
240
|
+
name: 'label-in-reason',
|
|
241
|
+
nodes: new Map([['root', VERDICT_PASS]]),
|
|
242
|
+
rootId: 'root',
|
|
243
|
+
};
|
|
244
|
+
const result = await dagEval(createMockLLM([]), config, BASE_TEST_CASE);
|
|
245
|
+
assert.ok(result.reason.includes('pass'), `reason should include verdict label 'pass', got: ${result.reason}`);
|
|
246
|
+
});
|
|
247
|
+
it('should include each traversed node ID in the reason', async () => {
|
|
248
|
+
const judgeNode = {
|
|
249
|
+
type: 'binary_judgement',
|
|
250
|
+
criteria: 'Is the output relevant?',
|
|
251
|
+
trueChild: 'verdict-node',
|
|
252
|
+
falseChild: 'fail-node',
|
|
253
|
+
};
|
|
254
|
+
const config = {
|
|
255
|
+
name: 'trail-test',
|
|
256
|
+
nodes: new Map([
|
|
257
|
+
['root', judgeNode],
|
|
258
|
+
['verdict-node', VERDICT_PASS],
|
|
259
|
+
['fail-node', VERDICT_FAIL],
|
|
260
|
+
]),
|
|
261
|
+
rootId: 'root',
|
|
262
|
+
};
|
|
263
|
+
const result = await dagEval(createMockLLM(['true']), config, BASE_TEST_CASE);
|
|
264
|
+
assert.ok(result.reason.includes('root') && result.reason.includes('verdict-node'), `reason should include traversed node IDs, got: ${result.reason}`);
|
|
265
|
+
});
|
|
266
|
+
it('should include each node decision in the reason', async () => {
|
|
267
|
+
const judgeNode = {
|
|
268
|
+
type: 'binary_judgement',
|
|
269
|
+
criteria: 'Is the response correct?',
|
|
270
|
+
trueChild: 'pass-node',
|
|
271
|
+
falseChild: 'fail-node',
|
|
272
|
+
};
|
|
273
|
+
const config = {
|
|
274
|
+
name: 'decision-trail',
|
|
275
|
+
nodes: new Map([
|
|
276
|
+
['root', judgeNode],
|
|
277
|
+
['pass-node', VERDICT_PASS],
|
|
278
|
+
['fail-node', VERDICT_FAIL],
|
|
279
|
+
]),
|
|
280
|
+
rootId: 'root',
|
|
281
|
+
};
|
|
282
|
+
const result = await dagEval(createMockLLM(['true']), config, BASE_TEST_CASE);
|
|
283
|
+
// Reason should capture the binary decision made
|
|
284
|
+
assert.ok(result.reason.length > 0);
|
|
285
|
+
assert.ok(typeof result.reason === 'string');
|
|
286
|
+
});
|
|
287
|
+
});
|
|
288
|
+
// ============================================================================
|
|
289
|
+
// Score Normalization
|
|
290
|
+
// ============================================================================
|
|
291
|
+
describe('dagEval — score normalization', () => {
|
|
292
|
+
it('should always return score in [0, 1] range regardless of VerdictNode score', async () => {
|
|
293
|
+
const scores = [DAG_SCORE_ZERO, 1, 3, DAG_SCORE_MID, 7, DAG_SCORE_TEN];
|
|
294
|
+
for (const rawScore of scores) {
|
|
295
|
+
const config = {
|
|
296
|
+
name: `score-${rawScore}`,
|
|
297
|
+
nodes: new Map([['root', { type: 'verdict', score: rawScore, label: 'test' }]]),
|
|
298
|
+
rootId: 'root',
|
|
299
|
+
};
|
|
300
|
+
const result = await dagEval(createMockLLM([]), config, BASE_TEST_CASE);
|
|
301
|
+
assert.ok(result.score >= DAG_NORMALIZED_MIN && result.score <= DAG_NORMALIZED_MAX, `raw score ${rawScore} normalized to ${result.score}, expected in [0, 1]`);
|
|
302
|
+
}
|
|
303
|
+
});
|
|
304
|
+
it('should produce monotonically increasing normalized scores for increasing raw scores', async () => {
|
|
305
|
+
const rawScores = [DAG_SCORE_ZERO, DAG_SCORE_MID, DAG_SCORE_TEN];
|
|
306
|
+
const normalizedScores = [];
|
|
307
|
+
for (const rawScore of rawScores) {
|
|
308
|
+
const config = {
|
|
309
|
+
name: `monotone-${rawScore}`,
|
|
310
|
+
nodes: new Map([['root', { type: 'verdict', score: rawScore, label: 'test' }]]),
|
|
311
|
+
rootId: 'root',
|
|
312
|
+
};
|
|
313
|
+
const result = await dagEval(createMockLLM([]), config, BASE_TEST_CASE);
|
|
314
|
+
normalizedScores.push(result.score);
|
|
315
|
+
}
|
|
316
|
+
assert.ok(normalizedScores[0] < normalizedScores[1], `0 → ${normalizedScores[0]} should be less than 5 → ${normalizedScores[1]}`);
|
|
317
|
+
assert.ok(normalizedScores[1] < normalizedScores[2], `5 → ${normalizedScores[1]} should be less than 10 → ${normalizedScores[2]}`);
|
|
318
|
+
});
|
|
319
|
+
});
|
|
320
|
+
// ============================================================================
|
|
321
|
+
// Cycle Detection
|
|
322
|
+
// ============================================================================
|
|
323
|
+
describe('dagEval — cycle detection', () => {
|
|
324
|
+
it('should throw when a node references itself as a child', async () => {
|
|
325
|
+
const selfRefNode = {
|
|
326
|
+
type: 'binary_judgement',
|
|
327
|
+
criteria: 'Is it valid?',
|
|
328
|
+
trueChild: 'root', // self-reference
|
|
329
|
+
falseChild: 'fail-node',
|
|
330
|
+
};
|
|
331
|
+
const config = {
|
|
332
|
+
name: 'self-cycle',
|
|
333
|
+
nodes: new Map([
|
|
334
|
+
['root', selfRefNode],
|
|
335
|
+
['fail-node', VERDICT_FAIL],
|
|
336
|
+
]),
|
|
337
|
+
rootId: 'root',
|
|
338
|
+
};
|
|
339
|
+
await assert.rejects(dagEval(createMockLLM([]), config, BASE_TEST_CASE), /cycle/i, 'should throw an error mentioning cycle');
|
|
340
|
+
});
|
|
341
|
+
it('should throw when two nodes form a mutual cycle', async () => {
|
|
342
|
+
const nodeA = {
|
|
343
|
+
type: 'binary_judgement',
|
|
344
|
+
criteria: 'Check A',
|
|
345
|
+
trueChild: 'node-b',
|
|
346
|
+
falseChild: 'node-b',
|
|
347
|
+
};
|
|
348
|
+
const nodeB = {
|
|
349
|
+
type: 'binary_judgement',
|
|
350
|
+
criteria: 'Check B',
|
|
351
|
+
trueChild: 'node-a', // cycle back
|
|
352
|
+
falseChild: 'node-a',
|
|
353
|
+
};
|
|
354
|
+
const config = {
|
|
355
|
+
name: 'mutual-cycle',
|
|
356
|
+
nodes: new Map([
|
|
357
|
+
['root', nodeA],
|
|
358
|
+
['node-a', nodeA],
|
|
359
|
+
['node-b', nodeB],
|
|
360
|
+
]),
|
|
361
|
+
rootId: 'root',
|
|
362
|
+
};
|
|
363
|
+
await assert.rejects(dagEval(createMockLLM([]), config, BASE_TEST_CASE), /cycle/i);
|
|
364
|
+
});
|
|
365
|
+
it('should throw when validateDAGConfig is called with a cyclic config', () => {
|
|
366
|
+
const selfRefNode = {
|
|
367
|
+
type: 'binary_judgement',
|
|
368
|
+
criteria: 'Cycle check',
|
|
369
|
+
trueChild: 'root',
|
|
370
|
+
falseChild: 'fail-node',
|
|
371
|
+
};
|
|
372
|
+
const config = {
|
|
373
|
+
name: 'validate-cycle',
|
|
374
|
+
nodes: new Map([
|
|
375
|
+
['root', selfRefNode],
|
|
376
|
+
['fail-node', VERDICT_FAIL],
|
|
377
|
+
]),
|
|
378
|
+
rootId: 'root',
|
|
379
|
+
};
|
|
380
|
+
assert.throws(() => validateDAGConfig(config), /cycle/i);
|
|
381
|
+
});
|
|
382
|
+
});
|
|
383
|
+
// ============================================================================
|
|
384
|
+
// Config Validation
|
|
385
|
+
// ============================================================================
|
|
386
|
+
describe('dagEval — config validation', () => {
|
|
387
|
+
it('should throw when rootId does not exist in nodes map', async () => {
|
|
388
|
+
const config = {
|
|
389
|
+
name: 'missing-root',
|
|
390
|
+
nodes: new Map([['other-node', VERDICT_PASS]]),
|
|
391
|
+
rootId: 'nonexistent',
|
|
392
|
+
};
|
|
393
|
+
await assert.rejects(dagEval(createMockLLM([]), config, BASE_TEST_CASE), /root/i, 'should throw an error mentioning root');
|
|
394
|
+
});
|
|
395
|
+
it('should throw when a child node reference does not exist in nodes map', async () => {
|
|
396
|
+
const judgeNode = {
|
|
397
|
+
type: 'binary_judgement',
|
|
398
|
+
criteria: 'Is it valid?',
|
|
399
|
+
trueChild: 'pass-node',
|
|
400
|
+
falseChild: 'dangling-node', // does not exist
|
|
401
|
+
};
|
|
402
|
+
const config = {
|
|
403
|
+
name: 'dangling-ref',
|
|
404
|
+
nodes: new Map([
|
|
405
|
+
['root', judgeNode],
|
|
406
|
+
['pass-node', VERDICT_PASS],
|
|
407
|
+
// 'dangling-node' intentionally omitted
|
|
408
|
+
]),
|
|
409
|
+
rootId: 'root',
|
|
410
|
+
};
|
|
411
|
+
await assert.rejects(dagEval(createMockLLM([]), config, BASE_TEST_CASE), /node/i);
|
|
412
|
+
});
|
|
413
|
+
it('should throw when no VerdictNode is reachable from root', async () => {
|
|
414
|
+
// Non-binary node pointing to non-existent verdict keys with no matching responses
|
|
415
|
+
const nonBinaryNode = {
|
|
416
|
+
type: 'non_binary_judgement',
|
|
417
|
+
criteria: 'Classify quality',
|
|
418
|
+
verdicts: new Map([
|
|
419
|
+
['good', 'good-verdict'],
|
|
420
|
+
]),
|
|
421
|
+
};
|
|
422
|
+
const config = {
|
|
423
|
+
name: 'unreachable-verdict',
|
|
424
|
+
nodes: new Map([
|
|
425
|
+
['root', nonBinaryNode],
|
|
426
|
+
['good-verdict', VERDICT_PASS],
|
|
427
|
+
]),
|
|
428
|
+
rootId: 'root',
|
|
429
|
+
};
|
|
430
|
+
// LLM returns a verdict key that doesn't exist in the map
|
|
431
|
+
await assert.rejects(dagEval(createMockLLM(['unknown-label']), config, BASE_TEST_CASE), /verdict/i);
|
|
432
|
+
});
|
|
433
|
+
it('should throw when nodes map is empty', async () => {
|
|
434
|
+
const config = {
|
|
435
|
+
name: 'empty-nodes',
|
|
436
|
+
nodes: new Map(),
|
|
437
|
+
rootId: 'root',
|
|
438
|
+
};
|
|
439
|
+
await assert.rejects(dagEval(createMockLLM([]), config, BASE_TEST_CASE), /root/i);
|
|
440
|
+
});
|
|
441
|
+
it('should throw when validateDAGConfig is called with missing rootId', () => {
|
|
442
|
+
const config = {
|
|
443
|
+
name: 'missing-root-validate',
|
|
444
|
+
nodes: new Map([['other', VERDICT_PASS]]),
|
|
445
|
+
rootId: 'missing',
|
|
446
|
+
};
|
|
447
|
+
assert.throws(() => validateDAGConfig(config), /root/i);
|
|
448
|
+
});
|
|
449
|
+
it('should throw when validateDAGConfig is called with a dangling child reference', () => {
|
|
450
|
+
const judgeNode = {
|
|
451
|
+
type: 'binary_judgement',
|
|
452
|
+
criteria: 'Check',
|
|
453
|
+
trueChild: 'exists',
|
|
454
|
+
falseChild: 'does-not-exist',
|
|
455
|
+
};
|
|
456
|
+
const config = {
|
|
457
|
+
name: 'dangling-validate',
|
|
458
|
+
nodes: new Map([
|
|
459
|
+
['root', judgeNode],
|
|
460
|
+
['exists', VERDICT_PASS],
|
|
461
|
+
]),
|
|
462
|
+
rootId: 'root',
|
|
463
|
+
};
|
|
464
|
+
assert.throws(() => validateDAGConfig(config), /node/i);
|
|
465
|
+
});
|
|
466
|
+
it('should not throw when validateDAGConfig is called with a valid config', () => {
|
|
467
|
+
const judgeNode = {
|
|
468
|
+
type: 'binary_judgement',
|
|
469
|
+
criteria: 'Is it good?',
|
|
470
|
+
trueChild: 'pass-node',
|
|
471
|
+
falseChild: 'fail-node',
|
|
472
|
+
};
|
|
473
|
+
const config = {
|
|
474
|
+
name: 'valid-config',
|
|
475
|
+
nodes: new Map([
|
|
476
|
+
['root', judgeNode],
|
|
477
|
+
['pass-node', VERDICT_PASS],
|
|
478
|
+
['fail-node', VERDICT_FAIL],
|
|
479
|
+
]),
|
|
480
|
+
rootId: 'root',
|
|
481
|
+
};
|
|
482
|
+
assert.doesNotThrow(() => validateDAGConfig(config));
|
|
483
|
+
});
|
|
484
|
+
});
|
|
485
|
+
// ============================================================================
|
|
486
|
+
// Timeout Behavior
|
|
487
|
+
// ============================================================================
|
|
488
|
+
describe('dagEval — timeout', () => {
|
|
489
|
+
it('should reject with a timeout error when LLM hangs and timeoutMs is exceeded', async () => {
|
|
490
|
+
const judgeNode = {
|
|
491
|
+
type: 'binary_judgement',
|
|
492
|
+
criteria: 'Is the output relevant?',
|
|
493
|
+
trueChild: 'pass-node',
|
|
494
|
+
falseChild: 'fail-node',
|
|
495
|
+
};
|
|
496
|
+
const config = {
|
|
497
|
+
name: 'timeout-test',
|
|
498
|
+
nodes: new Map([
|
|
499
|
+
['root', judgeNode],
|
|
500
|
+
['pass-node', VERDICT_PASS],
|
|
501
|
+
['fail-node', VERDICT_FAIL],
|
|
502
|
+
]),
|
|
503
|
+
rootId: 'root',
|
|
504
|
+
};
|
|
505
|
+
await assert.rejects(dagEval(createHangingLLM(), config, BASE_TEST_CASE, SHORT_TIMEOUT_MS), (err) => {
|
|
506
|
+
const msg = err.message.toLowerCase();
|
|
507
|
+
return msg.includes('timeout') || msg.includes('abort') || err.constructor.name === 'LLMTimeoutError';
|
|
508
|
+
});
|
|
509
|
+
});
|
|
510
|
+
it('should complete without error when LLM responds before timeout', async () => {
|
|
511
|
+
const config = {
|
|
512
|
+
name: 'no-timeout',
|
|
513
|
+
nodes: new Map([['root', VERDICT_PASS]]),
|
|
514
|
+
rootId: 'root',
|
|
515
|
+
};
|
|
516
|
+
// VerdictNode requires no LLM call; any reasonable timeout should pass
|
|
517
|
+
const result = await dagEval(createMockLLM([]), config, BASE_TEST_CASE, 5000);
|
|
518
|
+
assert.ok(result.score >= DAG_NORMALIZED_MIN && result.score <= DAG_NORMALIZED_MAX);
|
|
519
|
+
});
|
|
520
|
+
});
|
|
521
|
+
// ============================================================================
|
|
522
|
+
// Return Shape
|
|
523
|
+
// ============================================================================
|
|
524
|
+
describe('dagEval — return shape', () => {
|
|
525
|
+
it('should return an object with score and reason fields', async () => {
|
|
526
|
+
const config = {
|
|
527
|
+
name: 'shape-test',
|
|
528
|
+
nodes: new Map([['root', VERDICT_PASS]]),
|
|
529
|
+
rootId: 'root',
|
|
530
|
+
};
|
|
531
|
+
const result = await dagEval(createMockLLM([]), config, BASE_TEST_CASE);
|
|
532
|
+
assert.ok('score' in result, 'result should have score field');
|
|
533
|
+
assert.ok('reason' in result, 'result should have reason field');
|
|
534
|
+
assert.ok(typeof result.score === 'number', 'score should be a number');
|
|
535
|
+
assert.ok(typeof result.reason === 'string', 'reason should be a string');
|
|
536
|
+
});
|
|
537
|
+
it('should include the config name somewhere in the result metadata or reason', async () => {
|
|
538
|
+
const config = {
|
|
539
|
+
name: 'unique-eval-name-xyz',
|
|
540
|
+
nodes: new Map([['root', VERDICT_PASS]]),
|
|
541
|
+
rootId: 'root',
|
|
542
|
+
};
|
|
543
|
+
const result = await dagEval(createMockLLM([]), config, BASE_TEST_CASE);
|
|
544
|
+
// Either reason includes the name, or rawResponse does — flexible check
|
|
545
|
+
const serialized = JSON.stringify(result);
|
|
546
|
+
assert.ok(serialized.includes('unique-eval-name-xyz') || result.reason.length > 0, 'result should reference the evaluation config name or have a non-empty reason');
|
|
547
|
+
});
|
|
548
|
+
});
|
|
549
|
+
// ============================================================================
|
|
550
|
+
// ConditionalJudgementNode
|
|
551
|
+
// ============================================================================
|
|
552
|
+
describe('dagEval — ConditionalJudgementNode', () => {
|
|
553
|
+
it('should route to aboveChild when LLM score >= threshold', async () => {
|
|
554
|
+
const conditionalNode = {
|
|
555
|
+
type: 'conditional_judgement',
|
|
556
|
+
criteria: 'Rate the quality',
|
|
557
|
+
scoreThreshold: CONDITIONAL_THRESHOLD,
|
|
558
|
+
aboveChild: 'pass-node',
|
|
559
|
+
belowChild: 'fail-node',
|
|
560
|
+
};
|
|
561
|
+
const config = {
|
|
562
|
+
name: 'conditional-above',
|
|
563
|
+
nodes: new Map([
|
|
564
|
+
['root', conditionalNode],
|
|
565
|
+
['pass-node', VERDICT_PASS],
|
|
566
|
+
['fail-node', VERDICT_FAIL],
|
|
567
|
+
]),
|
|
568
|
+
rootId: 'root',
|
|
569
|
+
};
|
|
570
|
+
// LLM returns 7, which is >= threshold of 5
|
|
571
|
+
const result = await dagEval(createMockLLM(['7']), config, BASE_TEST_CASE);
|
|
572
|
+
assert.ok(Math.abs(result.score - DAG_NORMALIZED_MAX) < FLOAT_TOLERANCE, `score >= threshold should route to pass (1.0), got ${result.score}`);
|
|
573
|
+
});
|
|
574
|
+
it('should route to belowChild when LLM score < threshold', async () => {
|
|
575
|
+
const conditionalNode = {
|
|
576
|
+
type: 'conditional_judgement',
|
|
577
|
+
criteria: 'Rate the quality',
|
|
578
|
+
scoreThreshold: CONDITIONAL_THRESHOLD,
|
|
579
|
+
aboveChild: 'pass-node',
|
|
580
|
+
belowChild: 'fail-node',
|
|
581
|
+
};
|
|
582
|
+
const config = {
|
|
583
|
+
name: 'conditional-below',
|
|
584
|
+
nodes: new Map([
|
|
585
|
+
['root', conditionalNode],
|
|
586
|
+
['pass-node', VERDICT_PASS],
|
|
587
|
+
['fail-node', VERDICT_FAIL],
|
|
588
|
+
]),
|
|
589
|
+
rootId: 'root',
|
|
590
|
+
};
|
|
591
|
+
// LLM returns 3, which is < threshold of 5
|
|
592
|
+
const result = await dagEval(createMockLLM(['3']), config, BASE_TEST_CASE);
|
|
593
|
+
assert.ok(Math.abs(result.score - DAG_NORMALIZED_MIN) < FLOAT_TOLERANCE, `score < threshold should route to fail (0.0), got ${result.score}`);
|
|
594
|
+
});
|
|
595
|
+
it('should throw when scoreThreshold is out of [0, 10] range', async () => {
|
|
596
|
+
const conditionalNode = {
|
|
597
|
+
type: 'conditional_judgement',
|
|
598
|
+
criteria: 'Rate the quality',
|
|
599
|
+
scoreThreshold: 11, // invalid
|
|
600
|
+
aboveChild: 'pass-node',
|
|
601
|
+
belowChild: 'fail-node',
|
|
602
|
+
};
|
|
603
|
+
const config = {
|
|
604
|
+
name: 'invalid-threshold',
|
|
605
|
+
nodes: new Map([
|
|
606
|
+
['root', conditionalNode],
|
|
607
|
+
['pass-node', VERDICT_PASS],
|
|
608
|
+
['fail-node', VERDICT_FAIL],
|
|
609
|
+
]),
|
|
610
|
+
rootId: 'root',
|
|
611
|
+
};
|
|
612
|
+
await assert.rejects(dagEval(createMockLLM([]), config, BASE_TEST_CASE), /scoreThreshold/i);
|
|
613
|
+
});
|
|
614
|
+
it('should throw when LLM returns non-numeric response for conditional node', async () => {
|
|
615
|
+
const conditionalNode = {
|
|
616
|
+
type: 'conditional_judgement',
|
|
617
|
+
criteria: 'Rate the quality',
|
|
618
|
+
scoreThreshold: CONDITIONAL_THRESHOLD,
|
|
619
|
+
aboveChild: 'pass-node',
|
|
620
|
+
belowChild: 'fail-node',
|
|
621
|
+
};
|
|
622
|
+
const config = {
|
|
623
|
+
name: 'conditional-bad-response',
|
|
624
|
+
nodes: new Map([
|
|
625
|
+
['root', conditionalNode],
|
|
626
|
+
['pass-node', VERDICT_PASS],
|
|
627
|
+
['fail-node', VERDICT_FAIL],
|
|
628
|
+
]),
|
|
629
|
+
rootId: 'root',
|
|
630
|
+
};
|
|
631
|
+
await assert.rejects(dagEval(createMockLLM(['not-a-number']), config, BASE_TEST_CASE), /score/i);
|
|
632
|
+
});
|
|
633
|
+
it('should parse score from multi-line response when number is not on first line', async () => {
|
|
634
|
+
const conditionalNode = {
|
|
635
|
+
type: 'conditional_judgement',
|
|
636
|
+
criteria: 'Rate the quality',
|
|
637
|
+
scoreThreshold: CONDITIONAL_THRESHOLD,
|
|
638
|
+
aboveChild: 'pass-node',
|
|
639
|
+
belowChild: 'fail-node',
|
|
640
|
+
};
|
|
641
|
+
const config = {
|
|
642
|
+
name: 'conditional-multiline',
|
|
643
|
+
nodes: new Map([
|
|
644
|
+
['root', conditionalNode],
|
|
645
|
+
['pass-node', VERDICT_PASS],
|
|
646
|
+
['fail-node', VERDICT_FAIL],
|
|
647
|
+
]),
|
|
648
|
+
rootId: 'root',
|
|
649
|
+
};
|
|
650
|
+
// LLM returns text then number on second line — should extract 8 and route above threshold
|
|
651
|
+
const result = await dagEval(createMockLLM(['The quality is high.\n8']), config, BASE_TEST_CASE);
|
|
652
|
+
assert.ok(Math.abs(result.score - DAG_NORMALIZED_MAX) < FLOAT_TOLERANCE, `multi-line response with score 8 should route above threshold (5) to 1.0, got ${result.score}`);
|
|
653
|
+
});
|
|
654
|
+
it('should not parse partial-numeric lines like "1.5x" as a score', async () => {
|
|
655
|
+
const conditionalNode = {
|
|
656
|
+
type: 'conditional_judgement',
|
|
657
|
+
criteria: 'Rate the quality',
|
|
658
|
+
scoreThreshold: CONDITIONAL_THRESHOLD,
|
|
659
|
+
aboveChild: 'pass-node',
|
|
660
|
+
belowChild: 'fail-node',
|
|
661
|
+
};
|
|
662
|
+
const config = {
|
|
663
|
+
name: 'conditional-partial-numeric',
|
|
664
|
+
nodes: new Map([
|
|
665
|
+
['root', conditionalNode],
|
|
666
|
+
['pass-node', VERDICT_PASS],
|
|
667
|
+
['fail-node', VERDICT_FAIL],
|
|
668
|
+
]),
|
|
669
|
+
rootId: 'root',
|
|
670
|
+
};
|
|
671
|
+
// "Score: 1.5x" is not a fully-numeric line — should throw rather than silently parse 1.5
|
|
672
|
+
await assert.rejects(dagEval(createMockLLM(['Score: 1.5x (out of 10)']), config, BASE_TEST_CASE), /Expected a line containing only a number/i);
|
|
673
|
+
});
|
|
674
|
+
it('should throw with descriptive message when all lines are non-numeric', async () => {
|
|
675
|
+
const conditionalNode = {
|
|
676
|
+
type: 'conditional_judgement',
|
|
677
|
+
criteria: 'Rate the quality',
|
|
678
|
+
scoreThreshold: CONDITIONAL_THRESHOLD,
|
|
679
|
+
aboveChild: 'pass-node',
|
|
680
|
+
belowChild: 'fail-node',
|
|
681
|
+
};
|
|
682
|
+
const config = {
|
|
683
|
+
name: 'conditional-all-text',
|
|
684
|
+
nodes: new Map([
|
|
685
|
+
['root', conditionalNode],
|
|
686
|
+
['pass-node', VERDICT_PASS],
|
|
687
|
+
['fail-node', VERDICT_FAIL],
|
|
688
|
+
]),
|
|
689
|
+
rootId: 'root',
|
|
690
|
+
};
|
|
691
|
+
await assert.rejects(dagEval(createMockLLM(['I cannot provide a number for this.']), config, BASE_TEST_CASE), /Expected a line containing only a number/i);
|
|
692
|
+
});
|
|
693
|
+
it('should clamp negative LLM score to 0 and route to belowChild', async () => {
|
|
694
|
+
const conditionalNode = {
|
|
695
|
+
type: 'conditional_judgement',
|
|
696
|
+
criteria: 'Rate the quality',
|
|
697
|
+
scoreThreshold: CONDITIONAL_THRESHOLD,
|
|
698
|
+
aboveChild: 'pass-node',
|
|
699
|
+
belowChild: 'fail-node',
|
|
700
|
+
};
|
|
701
|
+
const config = {
|
|
702
|
+
name: 'conditional-negative-clamp',
|
|
703
|
+
nodes: new Map([
|
|
704
|
+
['root', conditionalNode],
|
|
705
|
+
['pass-node', VERDICT_PASS],
|
|
706
|
+
['fail-node', VERDICT_FAIL],
|
|
707
|
+
]),
|
|
708
|
+
rootId: 'root',
|
|
709
|
+
};
|
|
710
|
+
// LLM returns -3: clamped to 0, which is < threshold (5), so routes to belowChild (fail)
|
|
711
|
+
const result = await dagEval(createMockLLM(['-3']), config, BASE_TEST_CASE);
|
|
712
|
+
assert.ok(Math.abs(result.score - DAG_NORMALIZED_MIN) < FLOAT_TOLERANCE, `negative score -3 should clamp to 0, route below threshold (5) to 0.0, got ${result.score}`);
|
|
713
|
+
});
|
|
714
|
+
it('should clamp LLM score to [0, 10] before comparing to threshold', async () => {
|
|
715
|
+
const conditionalNode = {
|
|
716
|
+
type: 'conditional_judgement',
|
|
717
|
+
criteria: 'Rate the quality',
|
|
718
|
+
scoreThreshold: CONDITIONAL_THRESHOLD,
|
|
719
|
+
aboveChild: 'pass-node',
|
|
720
|
+
belowChild: 'fail-node',
|
|
721
|
+
};
|
|
722
|
+
const config = {
|
|
723
|
+
name: 'conditional-clamp',
|
|
724
|
+
nodes: new Map([
|
|
725
|
+
['root', conditionalNode],
|
|
726
|
+
['pass-node', VERDICT_PASS],
|
|
727
|
+
['fail-node', VERDICT_FAIL],
|
|
728
|
+
]),
|
|
729
|
+
rootId: 'root',
|
|
730
|
+
};
|
|
731
|
+
// LLM returns 100 (clamped to 10 >= 5 → above)
|
|
732
|
+
const result = await dagEval(createMockLLM(['100']), config, BASE_TEST_CASE);
|
|
733
|
+
assert.ok(result.score > 0, `out-of-range score 100 should clamp to 10, route above threshold, got ${result.score}`);
|
|
734
|
+
});
|
|
735
|
+
});
|
|
736
|
+
// ============================================================================
|
|
737
|
+
// maxDepth
|
|
738
|
+
// ============================================================================
|
|
739
|
+
describe('dagEval — maxDepth', () => {
|
|
740
|
+
it('should throw when traversal depth exceeds maxDepth', async () => {
|
|
741
|
+
const judgeA = {
|
|
742
|
+
type: 'binary_judgement',
|
|
743
|
+
criteria: 'First check',
|
|
744
|
+
trueChild: 'judge-b',
|
|
745
|
+
falseChild: 'fail-node',
|
|
746
|
+
};
|
|
747
|
+
const judgeB = {
|
|
748
|
+
type: 'binary_judgement',
|
|
749
|
+
criteria: 'Second check',
|
|
750
|
+
trueChild: 'judge-c',
|
|
751
|
+
falseChild: 'fail-node',
|
|
752
|
+
};
|
|
753
|
+
const judgeC = {
|
|
754
|
+
type: 'binary_judgement',
|
|
755
|
+
criteria: 'Third check',
|
|
756
|
+
trueChild: 'pass-node',
|
|
757
|
+
falseChild: 'fail-node',
|
|
758
|
+
};
|
|
759
|
+
const config = {
|
|
760
|
+
name: 'depth-exceeded',
|
|
761
|
+
nodes: new Map([
|
|
762
|
+
['root', judgeA],
|
|
763
|
+
['judge-b', judgeB],
|
|
764
|
+
['judge-c', judgeC],
|
|
765
|
+
['pass-node', VERDICT_PASS],
|
|
766
|
+
['fail-node', VERDICT_FAIL],
|
|
767
|
+
]),
|
|
768
|
+
rootId: 'root',
|
|
769
|
+
maxDepth: MAX_DEPTH_TWO, // allows visiting ≤2 nodes; 3-hop chain exceeds this
|
|
770
|
+
};
|
|
771
|
+
await assert.rejects(dagEval(createMockLLM(['true', 'true', 'true']), config, BASE_TEST_CASE), /maxDepth/i);
|
|
772
|
+
});
|
|
773
|
+
it('should complete normally when traversal stays within maxDepth', async () => {
|
|
774
|
+
const config = {
|
|
775
|
+
name: 'depth-ok',
|
|
776
|
+
nodes: new Map([['root', VERDICT_PASS]]),
|
|
777
|
+
rootId: 'root',
|
|
778
|
+
maxDepth: MAX_DEPTH_TWO,
|
|
779
|
+
};
|
|
780
|
+
const result = await dagEval(createMockLLM([]), config, BASE_TEST_CASE);
|
|
781
|
+
assert.ok(result.score >= DAG_NORMALIZED_MIN && result.score <= DAG_NORMALIZED_MAX);
|
|
782
|
+
});
|
|
783
|
+
});
|
|
784
|
+
// ============================================================================
|
|
785
|
+
// edgeWeights
|
|
786
|
+
// ============================================================================
|
|
787
|
+
describe('dagEval — edgeWeights', () => {
|
|
788
|
+
it('should apply edge weight to final verdict score', async () => {
|
|
789
|
+
const judgeNode = {
|
|
790
|
+
type: 'binary_judgement',
|
|
791
|
+
criteria: 'Is the response relevant?',
|
|
792
|
+
trueChild: 'pass-node',
|
|
793
|
+
falseChild: 'fail-node',
|
|
794
|
+
};
|
|
795
|
+
const config = {
|
|
796
|
+
name: 'weighted-edge',
|
|
797
|
+
nodes: new Map([
|
|
798
|
+
['root', judgeNode],
|
|
799
|
+
['pass-node', VERDICT_PASS],
|
|
800
|
+
['fail-node', VERDICT_FAIL],
|
|
801
|
+
]),
|
|
802
|
+
rootId: 'root',
|
|
803
|
+
edgeWeights: new Map([['root:pass-node', EDGE_WEIGHT_HALF]]),
|
|
804
|
+
};
|
|
805
|
+
// Routes to pass-node (score=10), but edge weight=0.5 → final 1.0 * 0.5 = 0.5
|
|
806
|
+
const result = await dagEval(createMockLLM(['true']), config, BASE_TEST_CASE);
|
|
807
|
+
assert.ok(Math.abs(result.score - DAG_NORMALIZED_MID) < FLOAT_TOLERANCE, `edge weight 0.5 on pass path should yield 0.5, got ${result.score}`);
|
|
808
|
+
});
|
|
809
|
+
it('should use default weight 1.0 for edges not in edgeWeights map', async () => {
|
|
810
|
+
const config = {
|
|
811
|
+
name: 'default-weight',
|
|
812
|
+
nodes: new Map([['root', VERDICT_PASS]]),
|
|
813
|
+
rootId: 'root',
|
|
814
|
+
edgeWeights: new Map(), // empty map
|
|
815
|
+
};
|
|
816
|
+
const result = await dagEval(createMockLLM([]), config, BASE_TEST_CASE);
|
|
817
|
+
assert.ok(Math.abs(result.score - DAG_NORMALIZED_MAX) < FLOAT_TOLERANCE, `default weight should not affect score, expected 1.0, got ${result.score}`);
|
|
818
|
+
});
|
|
819
|
+
it('should throw when an edge weight is zero', async () => {
|
|
820
|
+
const judgeNode = {
|
|
821
|
+
type: 'binary_judgement',
|
|
822
|
+
criteria: 'Is the response relevant?',
|
|
823
|
+
trueChild: 'pass-node',
|
|
824
|
+
falseChild: 'fail-node',
|
|
825
|
+
};
|
|
826
|
+
const config = {
|
|
827
|
+
name: 'zero-weight',
|
|
828
|
+
nodes: new Map([
|
|
829
|
+
['root', judgeNode],
|
|
830
|
+
['pass-node', VERDICT_PASS],
|
|
831
|
+
['fail-node', VERDICT_FAIL],
|
|
832
|
+
]),
|
|
833
|
+
rootId: 'root',
|
|
834
|
+
edgeWeights: new Map([['root:pass-node', 0]]),
|
|
835
|
+
};
|
|
836
|
+
await assert.rejects(dagEval(createMockLLM(['true']), config, BASE_TEST_CASE), /edge weight/i);
|
|
837
|
+
});
|
|
838
|
+
it('should accumulate weights across multiple hops', async () => {
|
|
839
|
+
const judgeA = {
|
|
840
|
+
type: 'binary_judgement',
|
|
841
|
+
criteria: 'First check',
|
|
842
|
+
trueChild: 'judge-b',
|
|
843
|
+
falseChild: 'fail-node',
|
|
844
|
+
};
|
|
845
|
+
const judgeB = {
|
|
846
|
+
type: 'binary_judgement',
|
|
847
|
+
criteria: 'Second check',
|
|
848
|
+
trueChild: 'pass-node',
|
|
849
|
+
falseChild: 'fail-node',
|
|
850
|
+
};
|
|
851
|
+
const config = {
|
|
852
|
+
name: 'multi-hop-weights',
|
|
853
|
+
nodes: new Map([
|
|
854
|
+
['root', judgeA],
|
|
855
|
+
['judge-b', judgeB],
|
|
856
|
+
['pass-node', VERDICT_PASS],
|
|
857
|
+
['fail-node', VERDICT_FAIL],
|
|
858
|
+
]),
|
|
859
|
+
rootId: 'root',
|
|
860
|
+
// Both edges weighted at 0.5 → accumulated = 0.25
|
|
861
|
+
edgeWeights: new Map([
|
|
862
|
+
['root:judge-b', EDGE_WEIGHT_HALF],
|
|
863
|
+
['judge-b:pass-node', EDGE_WEIGHT_HALF],
|
|
864
|
+
]),
|
|
865
|
+
};
|
|
866
|
+
const result = await dagEval(createMockLLM(['true', 'true']), config, BASE_TEST_CASE);
|
|
867
|
+
// 1.0 * 0.5 * 0.5 = 0.25
|
|
868
|
+
assert.ok(Math.abs(result.score - 0.25) < FLOAT_TOLERANCE, `two 0.5 weights should accumulate to 0.25, got ${result.score}`);
|
|
869
|
+
});
|
|
870
|
+
});
|
|
871
|
+
// ============================================================================
|
|
872
|
+
// Stress Tests (ST-R6)
|
|
873
|
+
// ============================================================================
|
|
874
|
+
const ST_R6_DEEP_TREE_SIZE = 52; // 50+ nodes
|
|
875
|
+
const ST_R6_CYCLE_SIZE = 20;
|
|
876
|
+
/**
|
|
877
|
+
* Build a linear chain DAG: root → n1 → n2 → ... → verdict.
|
|
878
|
+
* Each intermediate node is a binary_judgement that always takes trueChild.
|
|
879
|
+
*/
|
|
880
|
+
function buildDeepLinearDAG(depth) {
|
|
881
|
+
const nodes = new Map();
|
|
882
|
+
const verdictNode = { type: 'verdict', score: 8, label: 'deep-pass' };
|
|
883
|
+
nodes.set('verdict', verdictNode);
|
|
884
|
+
for (let i = depth - 1; i >= 0; i--) {
|
|
885
|
+
const nextId = i === depth - 1 ? 'verdict' : `node-${i + 1}`;
|
|
886
|
+
const judgeNode = {
|
|
887
|
+
type: 'binary_judgement',
|
|
888
|
+
criteria: `Check step ${i}`,
|
|
889
|
+
trueChild: nextId,
|
|
890
|
+
falseChild: 'verdict',
|
|
891
|
+
};
|
|
892
|
+
nodes.set(`node-${i}`, judgeNode);
|
|
893
|
+
}
|
|
894
|
+
return {
|
|
895
|
+
name: 'deep-linear',
|
|
896
|
+
nodes,
|
|
897
|
+
rootId: 'node-0',
|
|
898
|
+
};
|
|
899
|
+
}
|
|
900
|
+
describe('dagEval stress (ST-R6)', () => {
|
|
901
|
+
it('deep tree (50+ nodes): validateDAGConfig succeeds without throwing', () => {
|
|
902
|
+
const config = buildDeepLinearDAG(ST_R6_DEEP_TREE_SIZE);
|
|
903
|
+
assert.doesNotThrow(() => validateDAGConfig(config), 'validateDAGConfig should not throw for a valid deep tree');
|
|
904
|
+
});
|
|
905
|
+
it('deep tree (50+ nodes): traversal completes and score is in [0, 1]', async () => {
|
|
906
|
+
const config = buildDeepLinearDAG(ST_R6_DEEP_TREE_SIZE);
|
|
907
|
+
// Always return 'true' so traversal takes all trueChild paths
|
|
908
|
+
const responses = Array.from({ length: ST_R6_DEEP_TREE_SIZE }, () => 'true');
|
|
909
|
+
const result = await dagEval(createMockLLM(responses), config, BASE_TEST_CASE);
|
|
910
|
+
assert.ok(result.score >= DAG_NORMALIZED_MIN && result.score <= DAG_NORMALIZED_MAX, `score ${result.score} out of [0,1] for deep tree`);
|
|
911
|
+
});
|
|
912
|
+
it('maxDepth enforcement: passes with limit = total nodes, throws with limit = total nodes - 1', async () => {
|
|
913
|
+
// buildDeepLinearDAG(N) produces N judgment nodes + 1 verdict node = N+1 total nodes visited.
|
|
914
|
+
// The check is trail.length > maxDepth, so maxDepth must be >= N+1 to avoid throwing.
|
|
915
|
+
const totalNodes = ST_R6_DEEP_TREE_SIZE + 1; // 52 judgment + 1 verdict
|
|
916
|
+
// With maxDepth === totalNodes, trail.length never exceeds it → completes successfully
|
|
917
|
+
const configPass = { ...buildDeepLinearDAG(ST_R6_DEEP_TREE_SIZE), maxDepth: totalNodes };
|
|
918
|
+
const responsesPass = Array.from({ length: ST_R6_DEEP_TREE_SIZE }, () => 'true');
|
|
919
|
+
const resultPass = await dagEval(createMockLLM(responsesPass), configPass, BASE_TEST_CASE);
|
|
920
|
+
assert.ok(resultPass.score >= DAG_NORMALIZED_MIN && resultPass.score <= DAG_NORMALIZED_MAX, `score ${resultPass.score} out of [0,1] at exact maxDepth boundary`);
|
|
921
|
+
// With maxDepth === totalNodes - 1 (= ST_R6_DEEP_TREE_SIZE), the verdict push makes trail.length = N+1 > N → throws
|
|
922
|
+
const configFail = { ...buildDeepLinearDAG(ST_R6_DEEP_TREE_SIZE), maxDepth: ST_R6_DEEP_TREE_SIZE };
|
|
923
|
+
const responsesFail = Array.from({ length: ST_R6_DEEP_TREE_SIZE }, () => 'true');
|
|
924
|
+
await assert.rejects(dagEval(createMockLLM(responsesFail), configFail, BASE_TEST_CASE), /maxDepth/i, 'should throw when traversal exceeds maxDepth by exactly 1');
|
|
925
|
+
});
|
|
926
|
+
it('cyclic graph at scale: validateDAGConfig throws for a cycle in a large graph', () => {
|
|
927
|
+
// Build a long chain, then create a back-edge to simulate a cycle at scale
|
|
928
|
+
const nodes = new Map();
|
|
929
|
+
for (let i = 0; i < ST_R6_CYCLE_SIZE; i++) {
|
|
930
|
+
const nextId = i < ST_R6_CYCLE_SIZE - 1 ? `node-${i + 1}` : 'node-0'; // back-edge at end
|
|
931
|
+
const judgeNode = {
|
|
932
|
+
type: 'binary_judgement',
|
|
933
|
+
criteria: `Check ${i}`,
|
|
934
|
+
trueChild: nextId,
|
|
935
|
+
falseChild: nextId, // both branches continue chain
|
|
936
|
+
};
|
|
937
|
+
nodes.set(`node-${i}`, judgeNode);
|
|
938
|
+
}
|
|
939
|
+
const config = { name: 'cyclic-large', nodes, rootId: 'node-0' };
|
|
940
|
+
assert.throws(() => validateDAGConfig(config), /cycle/i, 'should reject cyclic graph at scale');
|
|
941
|
+
});
|
|
942
|
+
it('score normalization: all verdict scores are clamped to [0, 1] regardless of raw score value', async () => {
|
|
943
|
+
// Verdict with score=10 → normalized 1.0
|
|
944
|
+
const verdictMax = { type: 'verdict', score: 10, label: 'max' };
|
|
945
|
+
// Verdict with score=0 → normalized 0.0
|
|
946
|
+
const verdictMin = { type: 'verdict', score: 0, label: 'min' };
|
|
947
|
+
const rootNode = {
|
|
948
|
+
type: 'binary_judgement',
|
|
949
|
+
criteria: 'check',
|
|
950
|
+
trueChild: 'verdict-max',
|
|
951
|
+
falseChild: 'verdict-min',
|
|
952
|
+
};
|
|
953
|
+
const configMax = {
|
|
954
|
+
name: 'score-max',
|
|
955
|
+
nodes: new Map([['root', rootNode], ['verdict-max', verdictMax], ['verdict-min', verdictMin]]),
|
|
956
|
+
rootId: 'root',
|
|
957
|
+
};
|
|
958
|
+
const resultMax = await dagEval(createMockLLM(['true']), configMax, BASE_TEST_CASE);
|
|
959
|
+
const resultMin = await dagEval(createMockLLM(['false']), configMax, BASE_TEST_CASE);
|
|
960
|
+
assert.ok(Math.abs(resultMax.score - 1.0) < FLOAT_TOLERANCE, `score should be 1.0, got ${resultMax.score}`);
|
|
961
|
+
assert.ok(Math.abs(resultMin.score - 0.0) < FLOAT_TOLERANCE, `score should be 0.0, got ${resultMin.score}`);
|
|
962
|
+
});
|
|
963
|
+
it('score normalization stability: 50 traversals on a 50+-node tree all produce scores in [0,1]', async () => {
|
|
964
|
+
const config = buildDeepLinearDAG(ST_R6_DEEP_TREE_SIZE);
|
|
965
|
+
for (let trial = 0; trial < 50; trial++) {
|
|
966
|
+
// Random binary responses — each traversal takes a different path through the tree
|
|
967
|
+
const responses = Array.from({ length: ST_R6_DEEP_TREE_SIZE }, () => Math.random() > 0.5 ? 'true' : 'false');
|
|
968
|
+
const result = await dagEval(createMockLLM(responses), config, BASE_TEST_CASE);
|
|
969
|
+
assert.ok(result.score >= DAG_NORMALIZED_MIN && result.score <= DAG_NORMALIZED_MAX, `trial ${trial}: score ${result.score} out of [0,1]`);
|
|
970
|
+
}
|
|
971
|
+
});
|
|
972
|
+
});
|
|
973
|
+
//# sourceMappingURL=llm-judge-dag.test.js.map
|