observability-toolkit 2.0.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1285) hide show
  1. package/README.md +166 -398
  2. package/dist/__tests__/find-constant-dedup.test.d.ts +11 -0
  3. package/dist/__tests__/find-constant-dedup.test.d.ts.map +1 -0
  4. package/dist/__tests__/find-constant-dedup.test.js +132 -0
  5. package/dist/__tests__/find-constant-dedup.test.js.map +1 -0
  6. package/dist/backends/backend-schemas.d.ts +309 -0
  7. package/dist/backends/backend-schemas.d.ts.map +1 -0
  8. package/dist/backends/backend-schemas.js +215 -0
  9. package/dist/backends/backend-schemas.js.map +1 -0
  10. package/dist/backends/cloud.d.ts +46 -0
  11. package/dist/backends/cloud.d.ts.map +1 -0
  12. package/dist/backends/cloud.js +520 -0
  13. package/dist/backends/cloud.js.map +1 -0
  14. package/dist/backends/cloud.test.d.ts +2 -0
  15. package/dist/backends/cloud.test.d.ts.map +1 -0
  16. package/dist/backends/cloud.test.js +436 -0
  17. package/dist/backends/cloud.test.js.map +1 -0
  18. package/dist/backends/index.d.ts +659 -386
  19. package/dist/backends/index.d.ts.map +1 -1
  20. package/dist/backends/index.js +318 -41
  21. package/dist/backends/index.js.map +1 -1
  22. package/dist/backends/index.test.js +578 -57
  23. package/dist/backends/index.test.js.map +1 -1
  24. package/dist/backends/local-jsonl-boolean-search.test.js +8 -7
  25. package/dist/backends/local-jsonl-boolean-search.test.js.map +1 -1
  26. package/dist/backends/local-jsonl-cache.test.js +33 -31
  27. package/dist/backends/local-jsonl-cache.test.js.map +1 -1
  28. package/dist/backends/local-jsonl-circuit-breaker.test.js +9 -7
  29. package/dist/backends/local-jsonl-circuit-breaker.test.js.map +1 -1
  30. package/dist/backends/local-jsonl-export.test.js +73 -58
  31. package/dist/backends/local-jsonl-export.test.js.map +1 -1
  32. package/dist/backends/local-jsonl-index.test.js +52 -50
  33. package/dist/backends/local-jsonl-index.test.js.map +1 -1
  34. package/dist/backends/local-jsonl-logs.test.js +47 -31
  35. package/dist/backends/local-jsonl-logs.test.js.map +1 -1
  36. package/dist/backends/local-jsonl-metrics.test.js +85 -82
  37. package/dist/backends/local-jsonl-metrics.test.js.map +1 -1
  38. package/dist/backends/local-jsonl-otlp-unwrap.test.d.ts +2 -0
  39. package/dist/backends/local-jsonl-otlp-unwrap.test.d.ts.map +1 -0
  40. package/dist/backends/local-jsonl-otlp-unwrap.test.js +602 -0
  41. package/dist/backends/local-jsonl-otlp-unwrap.test.js.map +1 -0
  42. package/dist/backends/local-jsonl-traces.test.js +161 -147
  43. package/dist/backends/local-jsonl-traces.test.js.map +1 -1
  44. package/dist/backends/local-jsonl.d.ts +37 -8
  45. package/dist/backends/local-jsonl.d.ts.map +1 -1
  46. package/dist/backends/local-jsonl.js +1088 -241
  47. package/dist/backends/local-jsonl.js.map +1 -1
  48. package/dist/backends/shared.d.ts +9 -0
  49. package/dist/backends/shared.d.ts.map +1 -0
  50. package/dist/backends/shared.js +9 -0
  51. package/dist/backends/shared.js.map +1 -0
  52. package/dist/generated/opentelemetry/proto/collector/logs/v1/logs_service_pb.d.ts +40 -0
  53. package/dist/generated/opentelemetry/proto/collector/logs/v1/logs_service_pb.d.ts.map +1 -0
  54. package/dist/generated/opentelemetry/proto/collector/logs/v1/logs_service_pb.js +27 -0
  55. package/dist/generated/opentelemetry/proto/collector/logs/v1/logs_service_pb.js.map +1 -0
  56. package/dist/generated/opentelemetry/proto/collector/metrics/v1/metrics_service_pb.d.ts +106 -0
  57. package/dist/generated/opentelemetry/proto/collector/metrics/v1/metrics_service_pb.d.ts.map +1 -0
  58. package/dist/generated/opentelemetry/proto/collector/metrics/v1/metrics_service_pb.js +43 -0
  59. package/dist/generated/opentelemetry/proto/collector/metrics/v1/metrics_service_pb.js.map +1 -0
  60. package/dist/generated/opentelemetry/proto/collector/profiles/v1development/profiles_service_pb.d.ts +111 -0
  61. package/dist/generated/opentelemetry/proto/collector/profiles/v1development/profiles_service_pb.d.ts.map +1 -0
  62. package/dist/generated/opentelemetry/proto/collector/profiles/v1development/profiles_service_pb.js +42 -0
  63. package/dist/generated/opentelemetry/proto/collector/profiles/v1development/profiles_service_pb.js.map +1 -0
  64. package/dist/generated/opentelemetry/proto/collector/trace/v1/trace_service_pb.d.ts +106 -0
  65. package/dist/generated/opentelemetry/proto/collector/trace/v1/trace_service_pb.d.ts.map +1 -0
  66. package/dist/generated/opentelemetry/proto/collector/trace/v1/trace_service_pb.js +43 -0
  67. package/dist/generated/opentelemetry/proto/collector/trace/v1/trace_service_pb.js.map +1 -0
  68. package/dist/generated/opentelemetry/proto/common/v1/common_pb.d.ts +243 -0
  69. package/dist/generated/opentelemetry/proto/common/v1/common_pb.d.ts.map +1 -0
  70. package/dist/generated/opentelemetry/proto/common/v1/common_pb.js +49 -0
  71. package/dist/generated/opentelemetry/proto/common/v1/common_pb.js.map +1 -0
  72. package/dist/generated/opentelemetry/proto/logs/v1/logs_pb.d.ts +90 -0
  73. package/dist/generated/opentelemetry/proto/logs/v1/logs_pb.d.ts.map +1 -0
  74. package/dist/generated/opentelemetry/proto/logs/v1/logs_pb.js +66 -0
  75. package/dist/generated/opentelemetry/proto/logs/v1/logs_pb.js.map +1 -0
  76. package/dist/generated/opentelemetry/proto/metrics/v1/metrics_pb.d.ts +1134 -0
  77. package/dist/generated/opentelemetry/proto/metrics/v1/metrics_pb.d.ts.map +1 -0
  78. package/dist/generated/opentelemetry/proto/metrics/v1/metrics_pb.js +223 -0
  79. package/dist/generated/opentelemetry/proto/metrics/v1/metrics_pb.js.map +1 -0
  80. package/dist/generated/opentelemetry/proto/profiles/v1development/profiles_pb.d.ts +678 -0
  81. package/dist/generated/opentelemetry/proto/profiles/v1development/profiles_pb.d.ts.map +1 -0
  82. package/dist/generated/opentelemetry/proto/profiles/v1development/profiles_pb.js +107 -0
  83. package/dist/generated/opentelemetry/proto/profiles/v1development/profiles_pb.js.map +1 -0
  84. package/dist/generated/opentelemetry/proto/resource/v1/resource_pb.d.ts +46 -0
  85. package/dist/generated/opentelemetry/proto/resource/v1/resource_pb.d.ts.map +1 -0
  86. package/dist/generated/opentelemetry/proto/resource/v1/resource_pb.js +25 -0
  87. package/dist/generated/opentelemetry/proto/resource/v1/resource_pb.js.map +1 -0
  88. package/dist/generated/opentelemetry/proto/trace/v1/trace_pb.d.ts +569 -0
  89. package/dist/generated/opentelemetry/proto/trace/v1/trace_pb.d.ts.map +1 -0
  90. package/dist/generated/opentelemetry/proto/trace/v1/trace_pb.js +195 -0
  91. package/dist/generated/opentelemetry/proto/trace/v1/trace_pb.js.map +1 -0
  92. package/dist/lib/agent-judge/agent-as-judge.d.ts +157 -0
  93. package/dist/lib/agent-judge/agent-as-judge.d.ts.map +1 -0
  94. package/dist/lib/agent-judge/agent-as-judge.js +137 -0
  95. package/dist/lib/agent-judge/agent-as-judge.js.map +1 -0
  96. package/dist/lib/agent-judge/agent-as-judge.test.d.ts.map +1 -0
  97. package/dist/lib/agent-judge/agent-as-judge.test.js +839 -0
  98. package/dist/lib/agent-judge/agent-as-judge.test.js.map +1 -0
  99. package/dist/lib/agent-judge/agent-eval-metrics.d.ts +293 -0
  100. package/dist/lib/agent-judge/agent-eval-metrics.d.ts.map +1 -0
  101. package/dist/lib/agent-judge/agent-eval-metrics.js +715 -0
  102. package/dist/lib/agent-judge/agent-eval-metrics.js.map +1 -0
  103. package/dist/lib/agent-judge/agent-eval-metrics.test.d.ts +5 -0
  104. package/dist/lib/agent-judge/agent-eval-metrics.test.d.ts.map +1 -0
  105. package/dist/lib/agent-judge/agent-eval-metrics.test.js +676 -0
  106. package/dist/lib/agent-judge/agent-eval-metrics.test.js.map +1 -0
  107. package/dist/lib/agent-judge/agent-judge-classes.d.ts +95 -0
  108. package/dist/lib/agent-judge/agent-judge-classes.d.ts.map +1 -0
  109. package/dist/lib/agent-judge/agent-judge-classes.js +222 -0
  110. package/dist/lib/agent-judge/agent-judge-classes.js.map +1 -0
  111. package/dist/lib/agent-judge/agent-judge-classes.test.d.ts +6 -0
  112. package/dist/lib/agent-judge/agent-judge-classes.test.d.ts.map +1 -0
  113. package/dist/lib/agent-judge/agent-judge-classes.test.js +271 -0
  114. package/dist/lib/agent-judge/agent-judge-classes.test.js.map +1 -0
  115. package/dist/lib/agent-judge/agent-judge-consensus.d.ts +58 -0
  116. package/dist/lib/agent-judge/agent-judge-consensus.d.ts.map +1 -0
  117. package/dist/lib/agent-judge/agent-judge-consensus.js +149 -0
  118. package/dist/lib/agent-judge/agent-judge-consensus.js.map +1 -0
  119. package/dist/lib/agent-judge/agent-judge-consensus.test.d.ts +2 -0
  120. package/dist/lib/agent-judge/agent-judge-consensus.test.d.ts.map +1 -0
  121. package/dist/lib/agent-judge/agent-judge-consensus.test.js +170 -0
  122. package/dist/lib/agent-judge/agent-judge-consensus.test.js.map +1 -0
  123. package/dist/lib/agent-judge/agent-judge-verification.d.ts +89 -0
  124. package/dist/lib/agent-judge/agent-judge-verification.d.ts.map +1 -0
  125. package/dist/lib/agent-judge/agent-judge-verification.js +235 -0
  126. package/dist/lib/agent-judge/agent-judge-verification.js.map +1 -0
  127. package/dist/lib/agent-judge/agent-judge-verification.test.d.ts +5 -0
  128. package/dist/lib/agent-judge/agent-judge-verification.test.d.ts.map +1 -0
  129. package/dist/lib/agent-judge/agent-judge-verification.test.js +399 -0
  130. package/dist/lib/agent-judge/agent-judge-verification.test.js.map +1 -0
  131. package/dist/lib/audit/agent-auditor-scoring.d.ts +167 -0
  132. package/dist/lib/audit/agent-auditor-scoring.d.ts.map +1 -0
  133. package/dist/lib/audit/agent-auditor-scoring.js +338 -0
  134. package/dist/lib/audit/agent-auditor-scoring.js.map +1 -0
  135. package/dist/lib/audit/agent-auditor-scoring.test.d.ts +2 -0
  136. package/dist/lib/audit/agent-auditor-scoring.test.d.ts.map +1 -0
  137. package/dist/lib/audit/agent-auditor-scoring.test.js +576 -0
  138. package/dist/lib/audit/agent-auditor-scoring.test.js.map +1 -0
  139. package/dist/lib/audit/audit-record.d.ts +139 -0
  140. package/dist/lib/audit/audit-record.d.ts.map +1 -0
  141. package/dist/lib/audit/audit-record.js +288 -0
  142. package/dist/lib/audit/audit-record.js.map +1 -0
  143. package/dist/lib/audit/audit-record.test.d.ts +5 -0
  144. package/dist/lib/audit/audit-record.test.d.ts.map +1 -0
  145. package/dist/lib/audit/audit-record.test.js +258 -0
  146. package/dist/lib/audit/audit-record.test.js.map +1 -0
  147. package/dist/lib/audit/audit-scoring-constants.d.ts +57 -0
  148. package/dist/lib/audit/audit-scoring-constants.d.ts.map +1 -0
  149. package/dist/lib/audit/audit-scoring-constants.js +59 -0
  150. package/dist/lib/audit/audit-scoring-constants.js.map +1 -0
  151. package/dist/lib/audit/compliance-report.d.ts +125 -0
  152. package/dist/lib/audit/compliance-report.d.ts.map +1 -0
  153. package/dist/lib/audit/compliance-report.js +205 -0
  154. package/dist/lib/audit/compliance-report.js.map +1 -0
  155. package/dist/lib/audit/compliance-report.test.d.ts +5 -0
  156. package/dist/lib/audit/compliance-report.test.d.ts.map +1 -0
  157. package/dist/lib/audit/compliance-report.test.js +290 -0
  158. package/dist/lib/audit/compliance-report.test.js.map +1 -0
  159. package/dist/lib/audit/retention-guard.d.ts +41 -0
  160. package/dist/lib/audit/retention-guard.d.ts.map +1 -0
  161. package/dist/lib/audit/retention-guard.js +103 -0
  162. package/dist/lib/audit/retention-guard.js.map +1 -0
  163. package/dist/lib/audit/retention-guard.test.d.ts +5 -0
  164. package/dist/lib/audit/retention-guard.test.d.ts.map +1 -0
  165. package/dist/lib/audit/retention-guard.test.js +109 -0
  166. package/dist/lib/audit/retention-guard.test.js.map +1 -0
  167. package/dist/lib/audit/skill-auditor-scoring.d.ts +69 -0
  168. package/dist/lib/audit/skill-auditor-scoring.d.ts.map +1 -0
  169. package/dist/lib/audit/skill-auditor-scoring.js +149 -0
  170. package/dist/lib/audit/skill-auditor-scoring.js.map +1 -0
  171. package/dist/lib/audit/skill-auditor-scoring.test.d.ts +2 -0
  172. package/dist/lib/audit/skill-auditor-scoring.test.d.ts.map +1 -0
  173. package/dist/lib/audit/skill-auditor-scoring.test.js +369 -0
  174. package/dist/lib/audit/skill-auditor-scoring.test.js.map +1 -0
  175. package/dist/lib/audit/verification-events.d.ts +119 -0
  176. package/dist/lib/audit/verification-events.d.ts.map +1 -0
  177. package/dist/lib/audit/verification-events.js +175 -0
  178. package/dist/lib/audit/verification-events.js.map +1 -0
  179. package/dist/lib/audit/verification-events.test.d.ts.map +1 -0
  180. package/dist/lib/audit/verification-events.test.js +197 -0
  181. package/dist/lib/audit/verification-events.test.js.map +1 -0
  182. package/dist/lib/core/constants-models.d.ts +90 -0
  183. package/dist/lib/core/constants-models.d.ts.map +1 -0
  184. package/dist/lib/core/constants-models.js +208 -0
  185. package/dist/lib/core/constants-models.js.map +1 -0
  186. package/dist/lib/core/constants-otel.d.ts +68 -0
  187. package/dist/lib/core/constants-otel.d.ts.map +1 -0
  188. package/dist/lib/core/constants-otel.js +128 -0
  189. package/dist/lib/core/constants-otel.js.map +1 -0
  190. package/dist/lib/core/constants-symlink.test.d.ts.map +1 -0
  191. package/dist/lib/core/constants-symlink.test.js +358 -0
  192. package/dist/lib/core/constants-symlink.test.js.map +1 -0
  193. package/dist/lib/core/constants-telemetry.d.ts +21 -0
  194. package/dist/lib/core/constants-telemetry.d.ts.map +1 -0
  195. package/dist/lib/core/constants-telemetry.js +162 -0
  196. package/dist/lib/core/constants-telemetry.js.map +1 -0
  197. package/dist/lib/core/constants.d.ts +152 -0
  198. package/dist/lib/core/constants.d.ts.map +1 -0
  199. package/dist/lib/core/constants.js +223 -0
  200. package/dist/lib/core/constants.js.map +1 -0
  201. package/dist/lib/core/constants.test.d.ts.map +1 -0
  202. package/dist/lib/core/constants.test.js +833 -0
  203. package/dist/lib/core/constants.test.js.map +1 -0
  204. package/dist/lib/core/doc-sync.test.d.ts +9 -0
  205. package/dist/lib/core/doc-sync.test.d.ts.map +1 -0
  206. package/dist/lib/core/doc-sync.test.js +159 -0
  207. package/dist/lib/core/doc-sync.test.js.map +1 -0
  208. package/dist/lib/core/edge-cases.test.d.ts.map +1 -0
  209. package/dist/lib/core/edge-cases.test.js +637 -0
  210. package/dist/lib/core/edge-cases.test.js.map +1 -0
  211. package/dist/lib/core/file-utils.d.ts +360 -0
  212. package/dist/lib/core/file-utils.d.ts.map +1 -0
  213. package/dist/lib/core/file-utils.js +890 -0
  214. package/dist/lib/core/file-utils.js.map +1 -0
  215. package/dist/lib/core/file-utils.test-constants.d.ts +38 -0
  216. package/dist/lib/core/file-utils.test-constants.d.ts.map +1 -0
  217. package/dist/lib/core/file-utils.test-constants.js +40 -0
  218. package/dist/lib/core/file-utils.test-constants.js.map +1 -0
  219. package/dist/lib/core/file-utils.test.d.ts.map +1 -0
  220. package/dist/lib/core/file-utils.test.js +1329 -0
  221. package/dist/lib/core/file-utils.test.js.map +1 -0
  222. package/dist/lib/core/input-validator.d.ts +125 -0
  223. package/dist/lib/core/input-validator.d.ts.map +1 -0
  224. package/dist/lib/core/input-validator.fuzz.test.d.ts.map +1 -0
  225. package/dist/lib/core/input-validator.fuzz.test.js +302 -0
  226. package/dist/lib/core/input-validator.fuzz.test.js.map +1 -0
  227. package/dist/lib/core/input-validator.js +348 -0
  228. package/dist/lib/core/input-validator.js.map +1 -0
  229. package/dist/lib/core/input-validator.test.d.ts.map +1 -0
  230. package/dist/lib/core/input-validator.test.js +465 -0
  231. package/dist/lib/core/input-validator.test.js.map +1 -0
  232. package/dist/lib/core/logger.d.ts +32 -0
  233. package/dist/lib/core/logger.d.ts.map +1 -0
  234. package/dist/lib/core/logger.js +104 -0
  235. package/dist/lib/core/logger.js.map +1 -0
  236. package/dist/lib/core/logger.test.d.ts.map +1 -0
  237. package/dist/lib/core/logger.test.js.map +1 -0
  238. package/dist/lib/core/schema-types.d.ts +37 -0
  239. package/dist/lib/core/schema-types.d.ts.map +1 -0
  240. package/dist/lib/core/schema-types.js +29 -0
  241. package/dist/lib/core/schema-types.js.map +1 -0
  242. package/dist/lib/core/server-utils.d.ts +98 -0
  243. package/dist/lib/core/server-utils.d.ts.map +1 -0
  244. package/dist/lib/core/server-utils.js +193 -0
  245. package/dist/lib/core/server-utils.js.map +1 -0
  246. package/dist/lib/core/shared-schemas.d.ts +301 -0
  247. package/dist/lib/core/shared-schemas.d.ts.map +1 -0
  248. package/dist/lib/core/shared-schemas.js +222 -0
  249. package/dist/lib/core/shared-schemas.js.map +1 -0
  250. package/dist/lib/core/shared-schemas.test.d.ts.map +1 -0
  251. package/dist/lib/core/shared-schemas.test.js +136 -0
  252. package/dist/lib/core/shared-schemas.test.js.map +1 -0
  253. package/dist/lib/core/units.d.ts +67 -0
  254. package/dist/lib/core/units.d.ts.map +1 -0
  255. package/dist/lib/core/units.js +88 -0
  256. package/dist/lib/core/units.js.map +1 -0
  257. package/dist/lib/cost/cost-estimation.d.ts +264 -0
  258. package/dist/lib/cost/cost-estimation.d.ts.map +1 -0
  259. package/dist/lib/cost/cost-estimation.js +541 -0
  260. package/dist/lib/cost/cost-estimation.js.map +1 -0
  261. package/dist/lib/cost/cost-estimation.test.d.ts +5 -0
  262. package/dist/lib/cost/cost-estimation.test.d.ts.map +1 -0
  263. package/dist/lib/cost/cost-estimation.test.js +701 -0
  264. package/dist/lib/cost/cost-estimation.test.js.map +1 -0
  265. package/dist/lib/cost/pricing-cache.d.ts +59 -0
  266. package/dist/lib/cost/pricing-cache.d.ts.map +1 -0
  267. package/dist/lib/cost/pricing-cache.js +120 -0
  268. package/dist/lib/cost/pricing-cache.js.map +1 -0
  269. package/dist/lib/cost/pricing-cache.test.d.ts +5 -0
  270. package/dist/lib/cost/pricing-cache.test.d.ts.map +1 -0
  271. package/dist/lib/cost/pricing-cache.test.js +176 -0
  272. package/dist/lib/cost/pricing-cache.test.js.map +1 -0
  273. package/dist/lib/dashboard-file-utils.d.ts +35 -0
  274. package/dist/lib/dashboard-file-utils.d.ts.map +1 -0
  275. package/dist/lib/dashboard-file-utils.js +94 -0
  276. package/dist/lib/dashboard-file-utils.js.map +1 -0
  277. package/dist/lib/errors/error-sanitizer.d.ts +62 -0
  278. package/dist/lib/errors/error-sanitizer.d.ts.map +1 -0
  279. package/dist/lib/errors/error-sanitizer.js +235 -0
  280. package/dist/lib/errors/error-sanitizer.js.map +1 -0
  281. package/dist/lib/errors/error-sanitizer.test.d.ts.map +1 -0
  282. package/dist/lib/errors/error-sanitizer.test.js +534 -0
  283. package/dist/lib/errors/error-sanitizer.test.js.map +1 -0
  284. package/dist/lib/errors/error-types.d.ts +59 -0
  285. package/dist/lib/errors/error-types.d.ts.map +1 -0
  286. package/dist/lib/errors/error-types.js +187 -0
  287. package/dist/lib/errors/error-types.js.map +1 -0
  288. package/dist/lib/errors/error-types.test.d.ts.map +1 -0
  289. package/dist/lib/errors/error-types.test.js +246 -0
  290. package/dist/lib/errors/error-types.test.js.map +1 -0
  291. package/dist/lib/errors/query-sanitizer.d.ts.map +1 -0
  292. package/dist/lib/errors/query-sanitizer.js +269 -0
  293. package/dist/lib/errors/query-sanitizer.js.map +1 -0
  294. package/dist/lib/errors/query-sanitizer.test.d.ts.map +1 -0
  295. package/dist/lib/errors/query-sanitizer.test.js +403 -0
  296. package/dist/lib/errors/query-sanitizer.test.js.map +1 -0
  297. package/dist/lib/exports/confident-export.d.ts +105 -0
  298. package/dist/lib/exports/confident-export.d.ts.map +1 -0
  299. package/dist/lib/exports/confident-export.js +385 -0
  300. package/dist/lib/exports/confident-export.js.map +1 -0
  301. package/dist/lib/exports/confident-export.test.d.ts.map +1 -0
  302. package/dist/lib/exports/confident-export.test.js +848 -0
  303. package/dist/lib/exports/confident-export.test.js.map +1 -0
  304. package/dist/lib/exports/datadog-export.d.ts +200 -0
  305. package/dist/lib/exports/datadog-export.d.ts.map +1 -0
  306. package/dist/lib/exports/datadog-export.js +488 -0
  307. package/dist/lib/exports/datadog-export.js.map +1 -0
  308. package/dist/lib/exports/datadog-export.test.d.ts +2 -0
  309. package/dist/lib/exports/datadog-export.test.d.ts.map +1 -0
  310. package/dist/lib/exports/datadog-export.test.js +890 -0
  311. package/dist/lib/exports/datadog-export.test.js.map +1 -0
  312. package/dist/lib/exports/export-config-schemas.d.ts +67 -0
  313. package/dist/lib/exports/export-config-schemas.d.ts.map +1 -0
  314. package/dist/lib/exports/export-config-schemas.js +120 -0
  315. package/dist/lib/exports/export-config-schemas.js.map +1 -0
  316. package/dist/lib/exports/export-config-schemas.test.d.ts +8 -0
  317. package/dist/lib/exports/export-config-schemas.test.d.ts.map +1 -0
  318. package/dist/lib/exports/export-config-schemas.test.js +503 -0
  319. package/dist/lib/exports/export-config-schemas.test.js.map +1 -0
  320. package/dist/lib/exports/export-utils.d.ts +127 -0
  321. package/dist/lib/exports/export-utils.d.ts.map +1 -0
  322. package/dist/lib/exports/export-utils.js +303 -0
  323. package/dist/lib/exports/export-utils.js.map +1 -0
  324. package/dist/lib/exports/export-utils.test.d.ts.map +1 -0
  325. package/dist/lib/exports/export-utils.test.js +344 -0
  326. package/dist/lib/exports/export-utils.test.js.map +1 -0
  327. package/dist/lib/exports/langfuse-export.d.ts +129 -0
  328. package/dist/lib/exports/langfuse-export.d.ts.map +1 -0
  329. package/dist/lib/exports/langfuse-export.js +370 -0
  330. package/dist/lib/exports/langfuse-export.js.map +1 -0
  331. package/dist/lib/exports/langfuse-export.test.d.ts.map +1 -0
  332. package/dist/lib/exports/langfuse-export.test.js +1020 -0
  333. package/dist/lib/exports/langfuse-export.test.js.map +1 -0
  334. package/dist/lib/exports/otlp-export.d.ts +179 -0
  335. package/dist/lib/exports/otlp-export.d.ts.map +1 -0
  336. package/dist/lib/exports/otlp-export.js +397 -0
  337. package/dist/lib/exports/otlp-export.js.map +1 -0
  338. package/dist/lib/exports/otlp-format-converter.d.ts +70 -0
  339. package/dist/lib/exports/otlp-format-converter.d.ts.map +1 -0
  340. package/dist/lib/exports/otlp-format-converter.js +401 -0
  341. package/dist/lib/exports/otlp-format-converter.js.map +1 -0
  342. package/dist/lib/exports/otlp-proto-encode.d.ts +53 -0
  343. package/dist/lib/exports/otlp-proto-encode.d.ts.map +1 -0
  344. package/dist/lib/exports/otlp-proto-encode.js +165 -0
  345. package/dist/lib/exports/otlp-proto-encode.js.map +1 -0
  346. package/dist/lib/exports/otlp-proto-encode.test.d.ts +7 -0
  347. package/dist/lib/exports/otlp-proto-encode.test.d.ts.map +1 -0
  348. package/dist/lib/exports/otlp-proto-encode.test.js +997 -0
  349. package/dist/lib/exports/otlp-proto-encode.test.js.map +1 -0
  350. package/dist/lib/exports/phoenix-export.d.ts +119 -0
  351. package/dist/lib/exports/phoenix-export.d.ts.map +1 -0
  352. package/dist/lib/exports/phoenix-export.js +448 -0
  353. package/dist/lib/exports/phoenix-export.js.map +1 -0
  354. package/dist/lib/exports/phoenix-export.test.d.ts.map +1 -0
  355. package/dist/lib/exports/phoenix-export.test.js +816 -0
  356. package/dist/lib/exports/phoenix-export.test.js.map +1 -0
  357. package/dist/lib/index.d.ts +16 -0
  358. package/dist/lib/index.d.ts.map +1 -0
  359. package/dist/lib/index.js +31 -0
  360. package/dist/lib/index.js.map +1 -0
  361. package/dist/lib/judge/evaluation-hooks-schemas.d.ts +186 -0
  362. package/dist/lib/judge/evaluation-hooks-schemas.d.ts.map +1 -0
  363. package/dist/lib/judge/evaluation-hooks-schemas.js +125 -0
  364. package/dist/lib/judge/evaluation-hooks-schemas.js.map +1 -0
  365. package/dist/lib/judge/evaluation-hooks.d.ts +88 -0
  366. package/dist/lib/judge/evaluation-hooks.d.ts.map +1 -0
  367. package/dist/lib/judge/evaluation-hooks.js +658 -0
  368. package/dist/lib/judge/evaluation-hooks.js.map +1 -0
  369. package/dist/lib/judge/evaluation-hooks.test.d.ts.map +1 -0
  370. package/dist/lib/judge/evaluation-hooks.test.js +934 -0
  371. package/dist/lib/judge/evaluation-hooks.test.js.map +1 -0
  372. package/dist/lib/judge/llm-as-judge.d.ts +138 -0
  373. package/dist/lib/judge/llm-as-judge.d.ts.map +1 -0
  374. package/dist/lib/judge/llm-as-judge.js +103 -0
  375. package/dist/lib/judge/llm-as-judge.js.map +1 -0
  376. package/dist/lib/judge/llm-as-judge.test.d.ts.map +1 -0
  377. package/dist/lib/judge/llm-as-judge.test.js +2179 -0
  378. package/dist/lib/judge/llm-as-judge.test.js.map +1 -0
  379. package/dist/lib/judge/llm-judge-bias.d.ts +44 -0
  380. package/dist/lib/judge/llm-judge-bias.d.ts.map +1 -0
  381. package/dist/lib/judge/llm-judge-bias.js +130 -0
  382. package/dist/lib/judge/llm-judge-bias.js.map +1 -0
  383. package/dist/lib/judge/llm-judge-bias.test.d.ts +2 -0
  384. package/dist/lib/judge/llm-judge-bias.test.d.ts.map +1 -0
  385. package/dist/lib/judge/llm-judge-bias.test.js +380 -0
  386. package/dist/lib/judge/llm-judge-bias.test.js.map +1 -0
  387. package/dist/lib/judge/llm-judge-code.d.ts +99 -0
  388. package/dist/lib/judge/llm-judge-code.d.ts.map +1 -0
  389. package/dist/lib/judge/llm-judge-code.js +261 -0
  390. package/dist/lib/judge/llm-judge-code.js.map +1 -0
  391. package/dist/lib/judge/llm-judge-code.test.d.ts +2 -0
  392. package/dist/lib/judge/llm-judge-code.test.d.ts.map +1 -0
  393. package/dist/lib/judge/llm-judge-code.test.js +981 -0
  394. package/dist/lib/judge/llm-judge-code.test.js.map +1 -0
  395. package/dist/lib/judge/llm-judge-config.d.ts +241 -0
  396. package/dist/lib/judge/llm-judge-config.d.ts.map +1 -0
  397. package/dist/lib/judge/llm-judge-config.js +390 -0
  398. package/dist/lib/judge/llm-judge-config.js.map +1 -0
  399. package/dist/lib/judge/llm-judge-config.test.d.ts +5 -0
  400. package/dist/lib/judge/llm-judge-config.test.d.ts.map +1 -0
  401. package/dist/lib/judge/llm-judge-config.test.js +392 -0
  402. package/dist/lib/judge/llm-judge-config.test.js.map +1 -0
  403. package/dist/lib/judge/llm-judge-constants.d.ts +111 -0
  404. package/dist/lib/judge/llm-judge-constants.d.ts.map +1 -0
  405. package/dist/lib/judge/llm-judge-constants.js +150 -0
  406. package/dist/lib/judge/llm-judge-constants.js.map +1 -0
  407. package/dist/lib/judge/llm-judge-dag.d.ts +57 -0
  408. package/dist/lib/judge/llm-judge-dag.d.ts.map +1 -0
  409. package/dist/lib/judge/llm-judge-dag.js +217 -0
  410. package/dist/lib/judge/llm-judge-dag.js.map +1 -0
  411. package/dist/lib/judge/llm-judge-dag.test.d.ts +8 -0
  412. package/dist/lib/judge/llm-judge-dag.test.d.ts.map +1 -0
  413. package/dist/lib/judge/llm-judge-dag.test.js +973 -0
  414. package/dist/lib/judge/llm-judge-dag.test.js.map +1 -0
  415. package/dist/lib/judge/llm-judge-domain.d.ts +42 -0
  416. package/dist/lib/judge/llm-judge-domain.d.ts.map +1 -0
  417. package/dist/lib/judge/llm-judge-domain.js +167 -0
  418. package/dist/lib/judge/llm-judge-domain.js.map +1 -0
  419. package/dist/lib/judge/llm-judge-domain.test.d.ts +6 -0
  420. package/dist/lib/judge/llm-judge-domain.test.d.ts.map +1 -0
  421. package/dist/lib/judge/llm-judge-domain.test.js +337 -0
  422. package/dist/lib/judge/llm-judge-domain.test.js.map +1 -0
  423. package/dist/lib/judge/llm-judge-geval.d.ts +42 -0
  424. package/dist/lib/judge/llm-judge-geval.d.ts.map +1 -0
  425. package/dist/lib/judge/llm-judge-geval.js +213 -0
  426. package/dist/lib/judge/llm-judge-geval.js.map +1 -0
  427. package/dist/lib/judge/llm-judge-geval.test.d.ts +2 -0
  428. package/dist/lib/judge/llm-judge-geval.test.d.ts.map +1 -0
  429. package/dist/lib/judge/llm-judge-geval.test.js +556 -0
  430. package/dist/lib/judge/llm-judge-geval.test.js.map +1 -0
  431. package/dist/lib/judge/llm-judge-otel.test.d.ts +9 -0
  432. package/dist/lib/judge/llm-judge-otel.test.d.ts.map +1 -0
  433. package/dist/lib/judge/llm-judge-otel.test.js +91 -0
  434. package/dist/lib/judge/llm-judge-otel.test.js.map +1 -0
  435. package/dist/lib/judge/llm-judge-qag.d.ts +38 -0
  436. package/dist/lib/judge/llm-judge-qag.d.ts.map +1 -0
  437. package/dist/lib/judge/llm-judge-qag.js +205 -0
  438. package/dist/lib/judge/llm-judge-qag.js.map +1 -0
  439. package/dist/lib/judge/llm-judge-qag.test.d.ts +2 -0
  440. package/dist/lib/judge/llm-judge-qag.test.d.ts.map +1 -0
  441. package/dist/lib/judge/llm-judge-qag.test.js +386 -0
  442. package/dist/lib/judge/llm-judge-qag.test.js.map +1 -0
  443. package/dist/lib/judge/llm-judge-resilience.d.ts +74 -0
  444. package/dist/lib/judge/llm-judge-resilience.d.ts.map +1 -0
  445. package/dist/lib/judge/llm-judge-resilience.js +146 -0
  446. package/dist/lib/judge/llm-judge-resilience.js.map +1 -0
  447. package/dist/lib/judge/llm-judge-resilience.test.d.ts +2 -0
  448. package/dist/lib/judge/llm-judge-resilience.test.d.ts.map +1 -0
  449. package/dist/lib/judge/llm-judge-resilience.test.js +353 -0
  450. package/dist/lib/judge/llm-judge-resilience.test.js.map +1 -0
  451. package/dist/lib/judge/llm-judge-security.d.ts +106 -0
  452. package/dist/lib/judge/llm-judge-security.d.ts.map +1 -0
  453. package/dist/lib/judge/llm-judge-security.js +314 -0
  454. package/dist/lib/judge/llm-judge-security.js.map +1 -0
  455. package/dist/lib/judge/llm-judge-security.test.d.ts +2 -0
  456. package/dist/lib/judge/llm-judge-security.test.d.ts.map +1 -0
  457. package/dist/lib/judge/llm-judge-security.test.js +1011 -0
  458. package/dist/lib/judge/llm-judge-security.test.js.map +1 -0
  459. package/dist/lib/observability/context-accumulator.d.ts +32 -0
  460. package/dist/lib/observability/context-accumulator.d.ts.map +1 -0
  461. package/dist/lib/observability/context-accumulator.js +87 -0
  462. package/dist/lib/observability/context-accumulator.js.map +1 -0
  463. package/dist/lib/observability/evaluation-events.d.ts +35 -0
  464. package/dist/lib/observability/evaluation-events.d.ts.map +1 -0
  465. package/dist/lib/observability/evaluation-events.js +90 -0
  466. package/dist/lib/observability/evaluation-events.js.map +1 -0
  467. package/dist/lib/observability/file-span-exporter.d.ts +17 -0
  468. package/dist/lib/observability/file-span-exporter.d.ts.map +1 -0
  469. package/dist/lib/observability/file-span-exporter.js +49 -0
  470. package/dist/lib/observability/file-span-exporter.js.map +1 -0
  471. package/dist/lib/observability/histogram-bucket-constants.d.ts +25 -0
  472. package/dist/lib/observability/histogram-bucket-constants.d.ts.map +1 -0
  473. package/dist/lib/observability/histogram-bucket-constants.js +60 -0
  474. package/dist/lib/observability/histogram-bucket-constants.js.map +1 -0
  475. package/dist/lib/observability/histogram.d.ts +112 -0
  476. package/dist/lib/observability/histogram.d.ts.map +1 -0
  477. package/dist/lib/observability/histogram.js +170 -0
  478. package/dist/lib/observability/histogram.js.map +1 -0
  479. package/dist/lib/observability/histogram.test.d.ts.map +1 -0
  480. package/dist/lib/observability/histogram.test.js +385 -0
  481. package/dist/lib/observability/histogram.test.js.map +1 -0
  482. package/dist/lib/observability/indexer.d.ts +114 -0
  483. package/dist/lib/observability/indexer.d.ts.map +1 -0
  484. package/dist/lib/observability/indexer.js +402 -0
  485. package/dist/lib/observability/indexer.js.map +1 -0
  486. package/dist/lib/observability/indexer.test.d.ts.map +1 -0
  487. package/dist/lib/observability/indexer.test.js +713 -0
  488. package/dist/lib/observability/indexer.test.js.map +1 -0
  489. package/dist/lib/observability/instrumentation-eval.test.d.ts +5 -0
  490. package/dist/lib/observability/instrumentation-eval.test.d.ts.map +1 -0
  491. package/dist/lib/observability/instrumentation-eval.test.js +63 -0
  492. package/dist/lib/observability/instrumentation-eval.test.js.map +1 -0
  493. package/dist/lib/observability/instrumentation-init-errors.test.d.ts +13 -0
  494. package/dist/lib/observability/instrumentation-init-errors.test.d.ts.map +1 -0
  495. package/dist/lib/observability/instrumentation-init-errors.test.js +194 -0
  496. package/dist/lib/observability/instrumentation-init-errors.test.js.map +1 -0
  497. package/dist/lib/observability/instrumentation-retry-timeout.test.d.ts +15 -0
  498. package/dist/lib/observability/instrumentation-retry-timeout.test.d.ts.map +1 -0
  499. package/dist/lib/observability/instrumentation-retry-timeout.test.js +188 -0
  500. package/dist/lib/observability/instrumentation-retry-timeout.test.js.map +1 -0
  501. package/dist/lib/observability/instrumentation-set-otel.test.d.ts +5 -0
  502. package/dist/lib/observability/instrumentation-set-otel.test.d.ts.map +1 -0
  503. package/dist/lib/observability/instrumentation-set-otel.test.js +59 -0
  504. package/dist/lib/observability/instrumentation-set-otel.test.js.map +1 -0
  505. package/dist/lib/observability/instrumentation.d.ts +158 -0
  506. package/dist/lib/observability/instrumentation.d.ts.map +1 -0
  507. package/dist/lib/observability/instrumentation.integration.test.d.ts.map +1 -0
  508. package/dist/lib/observability/instrumentation.integration.test.js +590 -0
  509. package/dist/lib/observability/instrumentation.integration.test.js.map +1 -0
  510. package/dist/lib/observability/instrumentation.js +512 -0
  511. package/dist/lib/observability/instrumentation.js.map +1 -0
  512. package/dist/lib/observability/instrumentation.test.d.ts.map +1 -0
  513. package/dist/lib/observability/instrumentation.test.js +822 -0
  514. package/dist/lib/observability/instrumentation.test.js.map +1 -0
  515. package/dist/lib/observability/mcp-semconv-constants.d.ts +98 -0
  516. package/dist/lib/observability/mcp-semconv-constants.d.ts.map +1 -0
  517. package/dist/lib/observability/mcp-semconv-constants.js +102 -0
  518. package/dist/lib/observability/mcp-semconv-constants.js.map +1 -0
  519. package/dist/lib/observability/mcp-semconv.d.ts +37 -0
  520. package/dist/lib/observability/mcp-semconv.d.ts.map +1 -0
  521. package/dist/lib/observability/mcp-semconv.js +87 -0
  522. package/dist/lib/observability/mcp-semconv.js.map +1 -0
  523. package/dist/lib/observability/mcp-semconv.test.d.ts +2 -0
  524. package/dist/lib/observability/mcp-semconv.test.d.ts.map +1 -0
  525. package/dist/lib/observability/mcp-semconv.test.js +168 -0
  526. package/dist/lib/observability/mcp-semconv.test.js.map +1 -0
  527. package/dist/lib/observability/metrics.d.ts +100 -0
  528. package/dist/lib/observability/metrics.d.ts.map +1 -0
  529. package/dist/lib/observability/metrics.js +429 -0
  530. package/dist/lib/observability/metrics.js.map +1 -0
  531. package/dist/lib/observability/metrics.test.d.ts.map +1 -0
  532. package/dist/lib/observability/metrics.test.js +191 -0
  533. package/dist/lib/observability/metrics.test.js.map +1 -0
  534. package/dist/lib/observability/observability-test-constants.d.ts +34 -0
  535. package/dist/lib/observability/observability-test-constants.d.ts.map +1 -0
  536. package/dist/lib/observability/observability-test-constants.js +55 -0
  537. package/dist/lib/observability/observability-test-constants.js.map +1 -0
  538. package/dist/lib/observability/opentelemetry-resources.test.d.ts +2 -0
  539. package/dist/lib/observability/opentelemetry-resources.test.d.ts.map +1 -0
  540. package/dist/lib/observability/opentelemetry-resources.test.js +19 -0
  541. package/dist/lib/observability/opentelemetry-resources.test.js.map +1 -0
  542. package/dist/lib/observability/parse-stats.d.ts.map +1 -0
  543. package/dist/lib/observability/parse-stats.js +207 -0
  544. package/dist/lib/observability/parse-stats.js.map +1 -0
  545. package/dist/lib/observability/parse-stats.test.d.ts.map +1 -0
  546. package/dist/lib/observability/parse-stats.test.js +287 -0
  547. package/dist/lib/observability/parse-stats.test.js.map +1 -0
  548. package/dist/lib/observability/render-trace-tree.d.ts +31 -0
  549. package/dist/lib/observability/render-trace-tree.d.ts.map +1 -0
  550. package/dist/lib/observability/render-trace-tree.js +95 -0
  551. package/dist/lib/observability/render-trace-tree.js.map +1 -0
  552. package/dist/lib/observability/render-trace-tree.test.d.ts +5 -0
  553. package/dist/lib/observability/render-trace-tree.test.d.ts.map +1 -0
  554. package/dist/lib/observability/render-trace-tree.test.js +97 -0
  555. package/dist/lib/observability/render-trace-tree.test.js.map +1 -0
  556. package/dist/lib/observability/span-attributes.d.ts +27 -0
  557. package/dist/lib/observability/span-attributes.d.ts.map +1 -0
  558. package/dist/lib/observability/span-attributes.js +85 -0
  559. package/dist/lib/observability/span-attributes.js.map +1 -0
  560. package/dist/lib/observability/trace-anomaly-detector.d.ts +23 -0
  561. package/dist/lib/observability/trace-anomaly-detector.d.ts.map +1 -0
  562. package/dist/lib/observability/trace-anomaly-detector.js +211 -0
  563. package/dist/lib/observability/trace-anomaly-detector.js.map +1 -0
  564. package/dist/lib/observability/trace-anomaly-detector.test.d.ts +5 -0
  565. package/dist/lib/observability/trace-anomaly-detector.test.d.ts.map +1 -0
  566. package/dist/lib/observability/trace-anomaly-detector.test.js +224 -0
  567. package/dist/lib/observability/trace-anomaly-detector.test.js.map +1 -0
  568. package/dist/lib/observability/trace-anomaly-schemas.d.ts +189 -0
  569. package/dist/lib/observability/trace-anomaly-schemas.d.ts.map +1 -0
  570. package/dist/lib/observability/trace-anomaly-schemas.js +167 -0
  571. package/dist/lib/observability/trace-anomaly-schemas.js.map +1 -0
  572. package/dist/lib/privacy/content-redaction.d.ts +141 -0
  573. package/dist/lib/privacy/content-redaction.d.ts.map +1 -0
  574. package/dist/lib/privacy/content-redaction.js +210 -0
  575. package/dist/lib/privacy/content-redaction.js.map +1 -0
  576. package/dist/lib/privacy/content-redaction.test.d.ts +2 -0
  577. package/dist/lib/privacy/content-redaction.test.d.ts.map +1 -0
  578. package/dist/lib/privacy/content-redaction.test.js +302 -0
  579. package/dist/lib/privacy/content-redaction.test.js.map +1 -0
  580. package/dist/lib/quality/bucket-utils.d.ts +17 -0
  581. package/dist/lib/quality/bucket-utils.d.ts.map +1 -0
  582. package/dist/lib/quality/bucket-utils.js +31 -0
  583. package/dist/lib/quality/bucket-utils.js.map +1 -0
  584. package/dist/lib/quality/bucket-utils.test.d.ts +2 -0
  585. package/dist/lib/quality/bucket-utils.test.d.ts.map +1 -0
  586. package/dist/lib/quality/bucket-utils.test.js +42 -0
  587. package/dist/lib/quality/bucket-utils.test.js.map +1 -0
  588. package/dist/lib/quality/qfe-backtest-detail.test.d.ts +5 -0
  589. package/dist/lib/quality/qfe-backtest-detail.test.d.ts.map +1 -0
  590. package/dist/lib/quality/qfe-backtest-detail.test.js +179 -0
  591. package/dist/lib/quality/qfe-backtest-detail.test.js.map +1 -0
  592. package/dist/lib/quality/qfe-calibration-paths.test.d.ts +5 -0
  593. package/dist/lib/quality/qfe-calibration-paths.test.d.ts.map +1 -0
  594. package/dist/lib/quality/qfe-calibration-paths.test.js +203 -0
  595. package/dist/lib/quality/qfe-calibration-paths.test.js.map +1 -0
  596. package/dist/lib/quality/qfe-correlation-helpers.test.d.ts +6 -0
  597. package/dist/lib/quality/qfe-correlation-helpers.test.d.ts.map +1 -0
  598. package/dist/lib/quality/qfe-correlation-helpers.test.js +143 -0
  599. package/dist/lib/quality/qfe-correlation-helpers.test.js.map +1 -0
  600. package/dist/lib/quality/qfe-cqi-paths.test.d.ts +6 -0
  601. package/dist/lib/quality/qfe-cqi-paths.test.d.ts.map +1 -0
  602. package/dist/lib/quality/qfe-cqi-paths.test.js +231 -0
  603. package/dist/lib/quality/qfe-cqi-paths.test.js.map +1 -0
  604. package/dist/lib/quality/qfe-critic-internals.test.d.ts +6 -0
  605. package/dist/lib/quality/qfe-critic-internals.test.d.ts.map +1 -0
  606. package/dist/lib/quality/qfe-critic-internals.test.js +191 -0
  607. package/dist/lib/quality/qfe-critic-internals.test.js.map +1 -0
  608. package/dist/lib/quality/qfe-derived-paths.test.d.ts +2 -0
  609. package/dist/lib/quality/qfe-derived-paths.test.d.ts.map +1 -0
  610. package/dist/lib/quality/qfe-derived-paths.test.js +372 -0
  611. package/dist/lib/quality/qfe-derived-paths.test.js.map +1 -0
  612. package/dist/lib/quality/qfe-dynamics-paths.test.d.ts +8 -0
  613. package/dist/lib/quality/qfe-dynamics-paths.test.d.ts.map +1 -0
  614. package/dist/lib/quality/qfe-dynamics-paths.test.js +223 -0
  615. package/dist/lib/quality/qfe-dynamics-paths.test.js.map +1 -0
  616. package/dist/lib/quality/qfe-granger-internals.test.d.ts +6 -0
  617. package/dist/lib/quality/qfe-granger-internals.test.d.ts.map +1 -0
  618. package/dist/lib/quality/qfe-granger-internals.test.js +158 -0
  619. package/dist/lib/quality/qfe-granger-internals.test.js.map +1 -0
  620. package/dist/lib/quality/qfe-label-normalize.test.d.ts +7 -0
  621. package/dist/lib/quality/qfe-label-normalize.test.d.ts.map +1 -0
  622. package/dist/lib/quality/qfe-label-normalize.test.js +332 -0
  623. package/dist/lib/quality/qfe-label-normalize.test.js.map +1 -0
  624. package/dist/lib/quality/qfe-ordinal-edge.test.d.ts +6 -0
  625. package/dist/lib/quality/qfe-ordinal-edge.test.d.ts.map +1 -0
  626. package/dist/lib/quality/qfe-ordinal-edge.test.js +98 -0
  627. package/dist/lib/quality/qfe-ordinal-edge.test.js.map +1 -0
  628. package/dist/lib/quality/qfe-roles-detail.test.d.ts +5 -0
  629. package/dist/lib/quality/qfe-roles-detail.test.d.ts.map +1 -0
  630. package/dist/lib/quality/qfe-roles-detail.test.js +115 -0
  631. package/dist/lib/quality/qfe-roles-detail.test.js.map +1 -0
  632. package/dist/lib/quality/qfe-rolling-detail.test.d.ts +7 -0
  633. package/dist/lib/quality/qfe-rolling-detail.test.d.ts.map +1 -0
  634. package/dist/lib/quality/qfe-rolling-detail.test.js +249 -0
  635. package/dist/lib/quality/qfe-rolling-detail.test.js.map +1 -0
  636. package/dist/lib/quality/qfe-stats-internals.test.d.ts +7 -0
  637. package/dist/lib/quality/qfe-stats-internals.test.d.ts.map +1 -0
  638. package/dist/lib/quality/qfe-stats-internals.test.js +143 -0
  639. package/dist/lib/quality/qfe-stats-internals.test.js.map +1 -0
  640. package/dist/lib/quality/qfe-streaming.test.d.ts +5 -0
  641. package/dist/lib/quality/qfe-streaming.test.d.ts.map +1 -0
  642. package/dist/lib/quality/qfe-streaming.test.js +239 -0
  643. package/dist/lib/quality/qfe-streaming.test.js.map +1 -0
  644. package/dist/lib/quality/qfe-sweep-detail.test.d.ts +6 -0
  645. package/dist/lib/quality/qfe-sweep-detail.test.d.ts.map +1 -0
  646. package/dist/lib/quality/qfe-sweep-detail.test.js +291 -0
  647. package/dist/lib/quality/qfe-sweep-detail.test.js.map +1 -0
  648. package/dist/lib/quality/quality-alerts.d.ts +23 -0
  649. package/dist/lib/quality/quality-alerts.d.ts.map +1 -0
  650. package/dist/lib/quality/quality-alerts.js +89 -0
  651. package/dist/lib/quality/quality-alerts.js.map +1 -0
  652. package/dist/lib/quality/quality-alerts.test.d.ts +2 -0
  653. package/dist/lib/quality/quality-alerts.test.d.ts.map +1 -0
  654. package/dist/lib/quality/quality-alerts.test.js +86 -0
  655. package/dist/lib/quality/quality-alerts.test.js.map +1 -0
  656. package/dist/lib/quality/quality-constants.d.ts +294 -0
  657. package/dist/lib/quality/quality-constants.d.ts.map +1 -0
  658. package/dist/lib/quality/quality-constants.js +335 -0
  659. package/dist/lib/quality/quality-constants.js.map +1 -0
  660. package/dist/lib/quality/quality-feature-engineering.d.ts +1071 -0
  661. package/dist/lib/quality/quality-feature-engineering.d.ts.map +1 -0
  662. package/dist/lib/quality/quality-feature-engineering.js +2076 -0
  663. package/dist/lib/quality/quality-feature-engineering.js.map +1 -0
  664. package/dist/lib/quality/quality-feature-engineering.test.d.ts +5 -0
  665. package/dist/lib/quality/quality-feature-engineering.test.d.ts.map +1 -0
  666. package/dist/lib/quality/quality-feature-engineering.test.js +2908 -0
  667. package/dist/lib/quality/quality-feature-engineering.test.js.map +1 -0
  668. package/dist/lib/quality/quality-metrics.d.ts +943 -0
  669. package/dist/lib/quality/quality-metrics.d.ts.map +1 -0
  670. package/dist/lib/quality/quality-metrics.js +1151 -0
  671. package/dist/lib/quality/quality-metrics.js.map +1 -0
  672. package/dist/lib/quality/quality-metrics.test.d.ts +5 -0
  673. package/dist/lib/quality/quality-metrics.test.d.ts.map +1 -0
  674. package/dist/lib/quality/quality-metrics.test.js +2766 -0
  675. package/dist/lib/quality/quality-metrics.test.js.map +1 -0
  676. package/dist/lib/quality/quality-multi-agent.d.ts +106 -0
  677. package/dist/lib/quality/quality-multi-agent.d.ts.map +1 -0
  678. package/dist/lib/quality/quality-multi-agent.js +124 -0
  679. package/dist/lib/quality/quality-multi-agent.js.map +1 -0
  680. package/dist/lib/quality/quality-multi-agent.test.d.ts +6 -0
  681. package/dist/lib/quality/quality-multi-agent.test.d.ts.map +1 -0
  682. package/dist/lib/quality/quality-multi-agent.test.js +163 -0
  683. package/dist/lib/quality/quality-multi-agent.test.js.map +1 -0
  684. package/dist/lib/quality/quality-sla.d.ts +35 -0
  685. package/dist/lib/quality/quality-sla.d.ts.map +1 -0
  686. package/dist/lib/quality/quality-sla.js +62 -0
  687. package/dist/lib/quality/quality-sla.js.map +1 -0
  688. package/dist/lib/quality/quality-sla.test.d.ts +5 -0
  689. package/dist/lib/quality/quality-sla.test.d.ts.map +1 -0
  690. package/dist/lib/quality/quality-sla.test.js +144 -0
  691. package/dist/lib/quality/quality-sla.test.js.map +1 -0
  692. package/dist/lib/quality/quality-test-constants.d.ts +23 -0
  693. package/dist/lib/quality/quality-test-constants.d.ts.map +1 -0
  694. package/dist/lib/quality/quality-test-constants.js +25 -0
  695. package/dist/lib/quality/quality-test-constants.js.map +1 -0
  696. package/dist/lib/quality/quality-trends.d.ts +101 -0
  697. package/dist/lib/quality/quality-trends.d.ts.map +1 -0
  698. package/dist/lib/quality/quality-trends.js +299 -0
  699. package/dist/lib/quality/quality-trends.js.map +1 -0
  700. package/dist/lib/quality/quality-trends.test.d.ts +6 -0
  701. package/dist/lib/quality/quality-trends.test.d.ts.map +1 -0
  702. package/dist/lib/quality/quality-trends.test.js +377 -0
  703. package/dist/lib/quality/quality-trends.test.js.map +1 -0
  704. package/dist/lib/quality/quality-views.d.ts +966 -0
  705. package/dist/lib/quality/quality-views.d.ts.map +1 -0
  706. package/dist/lib/quality/quality-views.js +367 -0
  707. package/dist/lib/quality/quality-views.js.map +1 -0
  708. package/dist/lib/quality/quality-views.test.d.ts +6 -0
  709. package/dist/lib/quality/quality-views.test.d.ts.map +1 -0
  710. package/dist/lib/quality/quality-views.test.js +262 -0
  711. package/dist/lib/quality/quality-views.test.js.map +1 -0
  712. package/dist/lib/quality/quality-visualization.d.ts +112 -0
  713. package/dist/lib/quality/quality-visualization.d.ts.map +1 -0
  714. package/dist/lib/quality/quality-visualization.js +136 -0
  715. package/dist/lib/quality/quality-visualization.js.map +1 -0
  716. package/dist/lib/quality/quality-visualization.test.d.ts +5 -0
  717. package/dist/lib/quality/quality-visualization.test.d.ts.map +1 -0
  718. package/dist/lib/quality/quality-visualization.test.js +189 -0
  719. package/dist/lib/quality/quality-visualization.test.js.map +1 -0
  720. package/dist/lib/resilience/cache.d.ts +56 -0
  721. package/dist/lib/resilience/cache.d.ts.map +1 -0
  722. package/dist/lib/resilience/cache.js +96 -0
  723. package/dist/lib/resilience/cache.js.map +1 -0
  724. package/dist/lib/resilience/cache.test.d.ts.map +1 -0
  725. package/dist/lib/resilience/cache.test.js +106 -0
  726. package/dist/lib/resilience/cache.test.js.map +1 -0
  727. package/dist/lib/resilience/circuit-breaker.d.ts +147 -0
  728. package/dist/lib/resilience/circuit-breaker.d.ts.map +1 -0
  729. package/dist/lib/resilience/circuit-breaker.js +251 -0
  730. package/dist/lib/resilience/circuit-breaker.js.map +1 -0
  731. package/dist/lib/resilience/circuit-breaker.test.d.ts.map +1 -0
  732. package/dist/lib/resilience/circuit-breaker.test.js +266 -0
  733. package/dist/lib/resilience/circuit-breaker.test.js.map +1 -0
  734. package/dist/lib/resilience/toon-encoder.d.ts +31 -0
  735. package/dist/lib/resilience/toon-encoder.d.ts.map +1 -0
  736. package/dist/lib/resilience/toon-encoder.js +66 -0
  737. package/dist/lib/resilience/toon-encoder.js.map +1 -0
  738. package/dist/lib/resilience/toon-encoder.test.d.ts.map +1 -0
  739. package/dist/lib/resilience/toon-encoder.test.js +86 -0
  740. package/dist/lib/resilience/toon-encoder.test.js.map +1 -0
  741. package/dist/lib/testing/mock-llm-builder.d.ts +139 -0
  742. package/dist/lib/testing/mock-llm-builder.d.ts.map +1 -0
  743. package/dist/lib/testing/mock-llm-builder.js +254 -0
  744. package/dist/lib/testing/mock-llm-builder.js.map +1 -0
  745. package/dist/lib/testing/mock-llm-builder.test.d.ts +5 -0
  746. package/dist/lib/testing/mock-llm-builder.test.d.ts.map +1 -0
  747. package/dist/lib/testing/mock-llm-builder.test.js +304 -0
  748. package/dist/lib/testing/mock-llm-builder.test.js.map +1 -0
  749. package/dist/lib/validation/api-schemas.d.ts +705 -0
  750. package/dist/lib/validation/api-schemas.d.ts.map +1 -0
  751. package/dist/lib/validation/api-schemas.js +351 -0
  752. package/dist/lib/validation/api-schemas.js.map +1 -0
  753. package/dist/lib/validation/api-schemas.test.d.ts +5 -0
  754. package/dist/lib/validation/api-schemas.test.d.ts.map +1 -0
  755. package/dist/lib/validation/api-schemas.test.js +427 -0
  756. package/dist/lib/validation/api-schemas.test.js.map +1 -0
  757. package/dist/lib/validation/dashboard-schemas.d.ts +203 -0
  758. package/dist/lib/validation/dashboard-schemas.d.ts.map +1 -0
  759. package/dist/lib/validation/dashboard-schemas.js +186 -0
  760. package/dist/lib/validation/dashboard-schemas.js.map +1 -0
  761. package/dist/lib/validation/dashboard-schemas.test.d.ts +5 -0
  762. package/dist/lib/validation/dashboard-schemas.test.d.ts.map +1 -0
  763. package/dist/lib/validation/dashboard-schemas.test.js +353 -0
  764. package/dist/lib/validation/dashboard-schemas.test.js.map +1 -0
  765. package/dist/server.d.ts +2 -1
  766. package/dist/server.d.ts.map +1 -1
  767. package/dist/server.js +158 -144
  768. package/dist/server.js.map +1 -1
  769. package/dist/server.test.js +102 -95
  770. package/dist/server.test.js.map +1 -1
  771. package/dist/test-helpers/assertions.d.ts +6 -0
  772. package/dist/test-helpers/assertions.d.ts.map +1 -0
  773. package/dist/test-helpers/assertions.js +11 -0
  774. package/dist/test-helpers/assertions.js.map +1 -0
  775. package/dist/test-helpers/env-utils.d.ts +0 -64
  776. package/dist/test-helpers/env-utils.d.ts.map +1 -1
  777. package/dist/test-helpers/env-utils.js +0 -100
  778. package/dist/test-helpers/env-utils.js.map +1 -1
  779. package/dist/test-helpers/fuzz-generators.d.ts.map +1 -1
  780. package/dist/test-helpers/fuzz-generators.js +62 -22
  781. package/dist/test-helpers/fuzz-generators.js.map +1 -1
  782. package/dist/test-helpers/index.d.ts +3 -2
  783. package/dist/test-helpers/index.d.ts.map +1 -1
  784. package/dist/test-helpers/index.js +4 -2
  785. package/dist/test-helpers/index.js.map +1 -1
  786. package/dist/test-helpers/memfs-utils.test.js +81 -76
  787. package/dist/test-helpers/memfs-utils.test.js.map +1 -1
  788. package/dist/test-helpers/mock-backends.d.ts +19 -17
  789. package/dist/test-helpers/mock-backends.d.ts.map +1 -1
  790. package/dist/test-helpers/mock-backends.js +16 -4
  791. package/dist/test-helpers/mock-backends.js.map +1 -1
  792. package/dist/test-helpers/mock-backends.test.js +43 -112
  793. package/dist/test-helpers/mock-backends.test.js.map +1 -1
  794. package/dist/test-helpers/race-condition-helpers.d.ts.map +1 -1
  795. package/dist/test-helpers/race-condition-helpers.js +3 -2
  796. package/dist/test-helpers/race-condition-helpers.js.map +1 -1
  797. package/dist/test-helpers/schema-validators.d.ts +2 -2
  798. package/dist/test-helpers/schema-validators.d.ts.map +1 -1
  799. package/dist/test-helpers/schema-validators.js +35 -31
  800. package/dist/test-helpers/schema-validators.js.map +1 -1
  801. package/dist/test-helpers/test-constants.d.ts +74 -0
  802. package/dist/test-helpers/test-constants.d.ts.map +1 -0
  803. package/dist/test-helpers/test-constants.js +78 -0
  804. package/dist/test-helpers/test-constants.js.map +1 -0
  805. package/dist/test-helpers/test-data-builders.d.ts +25 -7
  806. package/dist/test-helpers/test-data-builders.d.ts.map +1 -1
  807. package/dist/test-helpers/test-data-builders.js +32 -9
  808. package/dist/test-helpers/test-data-builders.js.map +1 -1
  809. package/dist/test-helpers/test-data-builders.test.js +116 -107
  810. package/dist/test-helpers/test-data-builders.test.js.map +1 -1
  811. package/dist/test-helpers/tool-validators.d.ts +1 -1
  812. package/dist/test-helpers/tool-validators.d.ts.map +1 -1
  813. package/dist/test-helpers/tool-validators.js +10 -10
  814. package/dist/test-helpers/tool-validators.js.map +1 -1
  815. package/dist/tools/audit-trail.d.ts +170 -0
  816. package/dist/tools/audit-trail.d.ts.map +1 -0
  817. package/dist/tools/audit-trail.js +109 -0
  818. package/dist/tools/audit-trail.js.map +1 -0
  819. package/dist/tools/audit-trail.test.d.ts +5 -0
  820. package/dist/tools/audit-trail.test.d.ts.map +1 -0
  821. package/dist/tools/audit-trail.test.js +122 -0
  822. package/dist/tools/audit-trail.test.js.map +1 -0
  823. package/dist/tools/context-stats.d.ts +6 -20
  824. package/dist/tools/context-stats.d.ts.map +1 -1
  825. package/dist/tools/context-stats.js +106 -88
  826. package/dist/tools/context-stats.js.map +1 -1
  827. package/dist/tools/context-stats.test.js +109 -60
  828. package/dist/tools/context-stats.test.js.map +1 -1
  829. package/dist/tools/detect-trace-anomalies.d.ts +123 -0
  830. package/dist/tools/detect-trace-anomalies.d.ts.map +1 -0
  831. package/dist/tools/detect-trace-anomalies.js +66 -0
  832. package/dist/tools/detect-trace-anomalies.js.map +1 -0
  833. package/dist/tools/estimate-cost.d.ts +77 -0
  834. package/dist/tools/estimate-cost.d.ts.map +1 -0
  835. package/dist/tools/estimate-cost.js +104 -0
  836. package/dist/tools/estimate-cost.js.map +1 -0
  837. package/dist/tools/estimate-cost.test.d.ts +5 -0
  838. package/dist/tools/estimate-cost.test.d.ts.map +1 -0
  839. package/dist/tools/estimate-cost.test.js +343 -0
  840. package/dist/tools/estimate-cost.test.js.map +1 -0
  841. package/dist/tools/export-base.d.ts +77 -0
  842. package/dist/tools/export-base.d.ts.map +1 -0
  843. package/dist/tools/export-base.js +150 -0
  844. package/dist/tools/export-base.js.map +1 -0
  845. package/dist/tools/export-base.test.d.ts +18 -0
  846. package/dist/tools/export-base.test.d.ts.map +1 -0
  847. package/dist/tools/export-base.test.js +220 -0
  848. package/dist/tools/export-base.test.js.map +1 -0
  849. package/dist/tools/export-confident.d.ts +94 -90
  850. package/dist/tools/export-confident.d.ts.map +1 -1
  851. package/dist/tools/export-confident.js +17 -115
  852. package/dist/tools/export-confident.js.map +1 -1
  853. package/dist/tools/export-confident.test.js +79 -75
  854. package/dist/tools/export-confident.test.js.map +1 -1
  855. package/dist/tools/export-datadog.d.ts +77 -116
  856. package/dist/tools/export-datadog.d.ts.map +1 -1
  857. package/dist/tools/export-datadog.js +38 -40
  858. package/dist/tools/export-datadog.js.map +1 -1
  859. package/dist/tools/export-datadog.test.js +122 -165
  860. package/dist/tools/export-datadog.test.js.map +1 -1
  861. package/dist/tools/export-jaeger.d.ts +100 -0
  862. package/dist/tools/export-jaeger.d.ts.map +1 -0
  863. package/dist/tools/export-jaeger.js +154 -0
  864. package/dist/tools/export-jaeger.js.map +1 -0
  865. package/dist/tools/export-jaeger.test.d.ts +2 -0
  866. package/dist/tools/export-jaeger.test.d.ts.map +1 -0
  867. package/dist/tools/export-jaeger.test.js +113 -0
  868. package/dist/tools/export-jaeger.test.js.map +1 -0
  869. package/dist/tools/export-langfuse.d.ts +78 -80
  870. package/dist/tools/export-langfuse.d.ts.map +1 -1
  871. package/dist/tools/export-langfuse.js +15 -113
  872. package/dist/tools/export-langfuse.js.map +1 -1
  873. package/dist/tools/export-langfuse.test.js +70 -81
  874. package/dist/tools/export-langfuse.test.js.map +1 -1
  875. package/dist/tools/export-phoenix.d.ts +115 -90
  876. package/dist/tools/export-phoenix.d.ts.map +1 -1
  877. package/dist/tools/export-phoenix.js +29 -117
  878. package/dist/tools/export-phoenix.js.map +1 -1
  879. package/dist/tools/export-phoenix.test.js +95 -94
  880. package/dist/tools/export-phoenix.test.js.map +1 -1
  881. package/dist/tools/get-trace-url.d.ts +2 -10
  882. package/dist/tools/get-trace-url.d.ts.map +1 -1
  883. package/dist/tools/get-trace-url.js +5 -8
  884. package/dist/tools/get-trace-url.js.map +1 -1
  885. package/dist/tools/get-trace-url.test.js +81 -399
  886. package/dist/tools/get-trace-url.test.js.map +1 -1
  887. package/dist/tools/hallucination-detection.d.ts +203 -0
  888. package/dist/tools/hallucination-detection.d.ts.map +1 -0
  889. package/dist/tools/hallucination-detection.js +189 -0
  890. package/dist/tools/hallucination-detection.js.map +1 -0
  891. package/dist/tools/hallucination-detection.test.d.ts +5 -0
  892. package/dist/tools/hallucination-detection.test.d.ts.map +1 -0
  893. package/dist/tools/hallucination-detection.test.js +529 -0
  894. package/dist/tools/hallucination-detection.test.js.map +1 -0
  895. package/dist/tools/health-check.d.ts +9 -16
  896. package/dist/tools/health-check.d.ts.map +1 -1
  897. package/dist/tools/health-check.js +88 -101
  898. package/dist/tools/health-check.js.map +1 -1
  899. package/dist/tools/health-check.test.js +72 -165
  900. package/dist/tools/health-check.test.js.map +1 -1
  901. package/dist/tools/index.d.ts +13 -0
  902. package/dist/tools/index.d.ts.map +1 -1
  903. package/dist/tools/index.js +13 -0
  904. package/dist/tools/index.js.map +1 -1
  905. package/dist/tools/ingest-constants.d.ts +8 -0
  906. package/dist/tools/ingest-constants.d.ts.map +1 -0
  907. package/dist/tools/ingest-constants.js +8 -0
  908. package/dist/tools/ingest-constants.js.map +1 -0
  909. package/dist/tools/ingest-spans.d.ts +45 -0
  910. package/dist/tools/ingest-spans.d.ts.map +1 -0
  911. package/dist/tools/ingest-spans.js +129 -0
  912. package/dist/tools/ingest-spans.js.map +1 -0
  913. package/dist/tools/ingest-spans.test.d.ts +5 -0
  914. package/dist/tools/ingest-spans.test.d.ts.map +1 -0
  915. package/dist/tools/ingest-spans.test.js +250 -0
  916. package/dist/tools/ingest-spans.test.js.map +1 -0
  917. package/dist/tools/ingest-traces.d.ts +76 -0
  918. package/dist/tools/ingest-traces.d.ts.map +1 -0
  919. package/dist/tools/ingest-traces.js +164 -0
  920. package/dist/tools/ingest-traces.js.map +1 -0
  921. package/dist/tools/ingest-traces.test.d.ts +5 -0
  922. package/dist/tools/ingest-traces.test.d.ts.map +1 -0
  923. package/dist/tools/ingest-traces.test.js +483 -0
  924. package/dist/tools/ingest-traces.test.js.map +1 -0
  925. package/dist/tools/inject-evaluations.d.ts +136 -1197
  926. package/dist/tools/inject-evaluations.d.ts.map +1 -1
  927. package/dist/tools/inject-evaluations.js +65 -53
  928. package/dist/tools/inject-evaluations.js.map +1 -1
  929. package/dist/tools/inject-evaluations.test.js +83 -71
  930. package/dist/tools/inject-evaluations.test.js.map +1 -1
  931. package/dist/tools/manage-datasets.d.ts +850 -0
  932. package/dist/tools/manage-datasets.d.ts.map +1 -0
  933. package/dist/tools/manage-datasets.js +139 -0
  934. package/dist/tools/manage-datasets.js.map +1 -0
  935. package/dist/tools/manage-datasets.test.d.ts +5 -0
  936. package/dist/tools/manage-datasets.test.d.ts.map +1 -0
  937. package/dist/tools/manage-datasets.test.js +430 -0
  938. package/dist/tools/manage-datasets.test.js.map +1 -0
  939. package/dist/tools/multi-agent-coordination.d.ts +178 -0
  940. package/dist/tools/multi-agent-coordination.d.ts.map +1 -0
  941. package/dist/tools/multi-agent-coordination.js +270 -0
  942. package/dist/tools/multi-agent-coordination.js.map +1 -0
  943. package/dist/tools/multi-agent-coordination.test.d.ts +5 -0
  944. package/dist/tools/multi-agent-coordination.test.d.ts.map +1 -0
  945. package/dist/tools/multi-agent-coordination.test.js +530 -0
  946. package/dist/tools/multi-agent-coordination.test.js.map +1 -0
  947. package/dist/tools/query-evaluations.d.ts +147 -105
  948. package/dist/tools/query-evaluations.d.ts.map +1 -1
  949. package/dist/tools/query-evaluations.js +205 -178
  950. package/dist/tools/query-evaluations.js.map +1 -1
  951. package/dist/tools/query-evaluations.test.js +386 -391
  952. package/dist/tools/query-evaluations.test.js.map +1 -1
  953. package/dist/tools/query-llm-events.d.ts +100 -75
  954. package/dist/tools/query-llm-events.d.ts.map +1 -1
  955. package/dist/tools/query-llm-events.js +106 -80
  956. package/dist/tools/query-llm-events.js.map +1 -1
  957. package/dist/tools/query-llm-events.test.js +183 -346
  958. package/dist/tools/query-llm-events.test.js.map +1 -1
  959. package/dist/tools/query-logs.d.ts +45 -58
  960. package/dist/tools/query-logs.d.ts.map +1 -1
  961. package/dist/tools/query-logs.js +54 -101
  962. package/dist/tools/query-logs.js.map +1 -1
  963. package/dist/tools/query-logs.test.js +118 -314
  964. package/dist/tools/query-logs.test.js.map +1 -1
  965. package/dist/tools/query-metric-histograms.d.ts +112 -0
  966. package/dist/tools/query-metric-histograms.d.ts.map +1 -0
  967. package/dist/tools/query-metric-histograms.js +69 -0
  968. package/dist/tools/query-metric-histograms.js.map +1 -0
  969. package/dist/tools/query-metric-histograms.test.d.ts +5 -0
  970. package/dist/tools/query-metric-histograms.test.d.ts.map +1 -0
  971. package/dist/tools/query-metric-histograms.test.js +209 -0
  972. package/dist/tools/query-metric-histograms.test.js.map +1 -0
  973. package/dist/tools/query-metrics.d.ts +159 -60
  974. package/dist/tools/query-metrics.d.ts.map +1 -1
  975. package/dist/tools/query-metrics.js +133 -111
  976. package/dist/tools/query-metrics.js.map +1 -1
  977. package/dist/tools/query-metrics.test.js +314 -389
  978. package/dist/tools/query-metrics.test.js.map +1 -1
  979. package/dist/tools/query-regressions.d.ts +76 -0
  980. package/dist/tools/query-regressions.d.ts.map +1 -0
  981. package/dist/tools/query-regressions.js +122 -0
  982. package/dist/tools/query-regressions.js.map +1 -0
  983. package/dist/tools/query-regressions.test.d.ts +8 -0
  984. package/dist/tools/query-regressions.test.d.ts.map +1 -0
  985. package/dist/tools/query-regressions.test.js +129 -0
  986. package/dist/tools/query-regressions.test.js.map +1 -0
  987. package/dist/tools/query-traces.d.ts +103 -71
  988. package/dist/tools/query-traces.d.ts.map +1 -1
  989. package/dist/tools/query-traces.js +75 -106
  990. package/dist/tools/query-traces.js.map +1 -1
  991. package/dist/tools/query-traces.test.js +140 -846
  992. package/dist/tools/query-traces.test.js.map +1 -1
  993. package/dist/tools/query-verifications.d.ts +55 -43
  994. package/dist/tools/query-verifications.d.ts.map +1 -1
  995. package/dist/tools/query-verifications.js +47 -46
  996. package/dist/tools/query-verifications.js.map +1 -1
  997. package/dist/tools/query-verifications.test.js +42 -35
  998. package/dist/tools/query-verifications.test.js.map +1 -1
  999. package/dist/tools/routing-telemetry.d.ts +168 -0
  1000. package/dist/tools/routing-telemetry.d.ts.map +1 -0
  1001. package/dist/tools/routing-telemetry.js +267 -0
  1002. package/dist/tools/routing-telemetry.js.map +1 -0
  1003. package/dist/tools/routing-telemetry.test.d.ts +5 -0
  1004. package/dist/tools/routing-telemetry.test.d.ts.map +1 -0
  1005. package/dist/tools/routing-telemetry.test.js +747 -0
  1006. package/dist/tools/routing-telemetry.test.js.map +1 -0
  1007. package/dist/tools/setup-claudeignore.d.ts +4 -32
  1008. package/dist/tools/setup-claudeignore.d.ts.map +1 -1
  1009. package/dist/tools/setup-claudeignore.js +18 -22
  1010. package/dist/tools/setup-claudeignore.js.map +1 -1
  1011. package/dist/tools/setup-claudeignore.test.js +50 -49
  1012. package/dist/tools/setup-claudeignore.test.js.map +1 -1
  1013. package/dist/tools/token-budget.d.ts +170 -0
  1014. package/dist/tools/token-budget.d.ts.map +1 -0
  1015. package/dist/tools/token-budget.js +219 -0
  1016. package/dist/tools/token-budget.js.map +1 -0
  1017. package/dist/tools/token-budget.test.d.ts +5 -0
  1018. package/dist/tools/token-budget.test.d.ts.map +1 -0
  1019. package/dist/tools/token-budget.test.js +293 -0
  1020. package/dist/tools/token-budget.test.js.map +1 -0
  1021. package/package.json +72 -10
  1022. package/dist/backends/local-jsonl.test.d.ts +0 -2
  1023. package/dist/backends/local-jsonl.test.d.ts.map +0 -1
  1024. package/dist/backends/local-jsonl.test.js +0 -4651
  1025. package/dist/backends/local-jsonl.test.js.map +0 -1
  1026. package/dist/backends/signoz-api-circuit-breaker.test.d.ts +0 -6
  1027. package/dist/backends/signoz-api-circuit-breaker.test.d.ts.map +0 -1
  1028. package/dist/backends/signoz-api-circuit-breaker.test.js +0 -548
  1029. package/dist/backends/signoz-api-circuit-breaker.test.js.map +0 -1
  1030. package/dist/backends/signoz-api-rate-limiter.test.d.ts +0 -6
  1031. package/dist/backends/signoz-api-rate-limiter.test.d.ts.map +0 -1
  1032. package/dist/backends/signoz-api-rate-limiter.test.js +0 -390
  1033. package/dist/backends/signoz-api-rate-limiter.test.js.map +0 -1
  1034. package/dist/backends/signoz-api-ssrf.test.d.ts +0 -6
  1035. package/dist/backends/signoz-api-ssrf.test.d.ts.map +0 -1
  1036. package/dist/backends/signoz-api-ssrf.test.js +0 -216
  1037. package/dist/backends/signoz-api-ssrf.test.js.map +0 -1
  1038. package/dist/backends/signoz-api-test-helpers.d.ts +0 -80
  1039. package/dist/backends/signoz-api-test-helpers.d.ts.map +0 -1
  1040. package/dist/backends/signoz-api-test-helpers.js +0 -79
  1041. package/dist/backends/signoz-api-test-helpers.js.map +0 -1
  1042. package/dist/backends/signoz-api.d.ts +0 -109
  1043. package/dist/backends/signoz-api.d.ts.map +0 -1
  1044. package/dist/backends/signoz-api.integration.test.d.ts +0 -8
  1045. package/dist/backends/signoz-api.integration.test.d.ts.map +0 -1
  1046. package/dist/backends/signoz-api.integration.test.js +0 -137
  1047. package/dist/backends/signoz-api.integration.test.js.map +0 -1
  1048. package/dist/backends/signoz-api.js +0 -1132
  1049. package/dist/backends/signoz-api.js.map +0 -1
  1050. package/dist/backends/signoz-api.test.d.ts +0 -11
  1051. package/dist/backends/signoz-api.test.d.ts.map +0 -1
  1052. package/dist/backends/signoz-api.test.js +0 -832
  1053. package/dist/backends/signoz-api.test.js.map +0 -1
  1054. package/dist/lib/agent-as-judge.d.ts +0 -388
  1055. package/dist/lib/agent-as-judge.d.ts.map +0 -1
  1056. package/dist/lib/agent-as-judge.js +0 -740
  1057. package/dist/lib/agent-as-judge.js.map +0 -1
  1058. package/dist/lib/agent-as-judge.test.d.ts.map +0 -1
  1059. package/dist/lib/agent-as-judge.test.js +0 -816
  1060. package/dist/lib/agent-as-judge.test.js.map +0 -1
  1061. package/dist/lib/cache.d.ts +0 -90
  1062. package/dist/lib/cache.d.ts.map +0 -1
  1063. package/dist/lib/cache.js +0 -133
  1064. package/dist/lib/cache.js.map +0 -1
  1065. package/dist/lib/cache.test.d.ts.map +0 -1
  1066. package/dist/lib/cache.test.js +0 -105
  1067. package/dist/lib/cache.test.js.map +0 -1
  1068. package/dist/lib/circuit-breaker.d.ts +0 -101
  1069. package/dist/lib/circuit-breaker.d.ts.map +0 -1
  1070. package/dist/lib/circuit-breaker.js +0 -158
  1071. package/dist/lib/circuit-breaker.js.map +0 -1
  1072. package/dist/lib/circuit-breaker.test.d.ts.map +0 -1
  1073. package/dist/lib/circuit-breaker.test.js +0 -263
  1074. package/dist/lib/circuit-breaker.test.js.map +0 -1
  1075. package/dist/lib/confident-export.d.ts +0 -101
  1076. package/dist/lib/confident-export.d.ts.map +0 -1
  1077. package/dist/lib/confident-export.js +0 -393
  1078. package/dist/lib/confident-export.js.map +0 -1
  1079. package/dist/lib/confident-export.test.d.ts.map +0 -1
  1080. package/dist/lib/confident-export.test.js +0 -835
  1081. package/dist/lib/confident-export.test.js.map +0 -1
  1082. package/dist/lib/constants-symlink.test.d.ts.map +0 -1
  1083. package/dist/lib/constants-symlink.test.js +0 -357
  1084. package/dist/lib/constants-symlink.test.js.map +0 -1
  1085. package/dist/lib/constants.d.ts +0 -183
  1086. package/dist/lib/constants.d.ts.map +0 -1
  1087. package/dist/lib/constants.js +0 -453
  1088. package/dist/lib/constants.js.map +0 -1
  1089. package/dist/lib/constants.test.d.ts.map +0 -1
  1090. package/dist/lib/constants.test.js +0 -717
  1091. package/dist/lib/constants.test.js.map +0 -1
  1092. package/dist/lib/datadog-export.d.ts +0 -156
  1093. package/dist/lib/datadog-export.d.ts.map +0 -1
  1094. package/dist/lib/datadog-export.js +0 -464
  1095. package/dist/lib/datadog-export.js.map +0 -1
  1096. package/dist/lib/datadog-export.test.d.ts +0 -14
  1097. package/dist/lib/datadog-export.test.d.ts.map +0 -1
  1098. package/dist/lib/datadog-export.test.js +0 -890
  1099. package/dist/lib/datadog-export.test.js.map +0 -1
  1100. package/dist/lib/edge-cases.test.d.ts.map +0 -1
  1101. package/dist/lib/edge-cases.test.js +0 -634
  1102. package/dist/lib/edge-cases.test.js.map +0 -1
  1103. package/dist/lib/error-sanitizer.d.ts +0 -57
  1104. package/dist/lib/error-sanitizer.d.ts.map +0 -1
  1105. package/dist/lib/error-sanitizer.js +0 -233
  1106. package/dist/lib/error-sanitizer.js.map +0 -1
  1107. package/dist/lib/error-sanitizer.test.d.ts.map +0 -1
  1108. package/dist/lib/error-sanitizer.test.js +0 -528
  1109. package/dist/lib/error-sanitizer.test.js.map +0 -1
  1110. package/dist/lib/error-types.d.ts +0 -54
  1111. package/dist/lib/error-types.d.ts.map +0 -1
  1112. package/dist/lib/error-types.js +0 -154
  1113. package/dist/lib/error-types.js.map +0 -1
  1114. package/dist/lib/error-types.test.d.ts.map +0 -1
  1115. package/dist/lib/error-types.test.js +0 -196
  1116. package/dist/lib/error-types.test.js.map +0 -1
  1117. package/dist/lib/evaluation-hooks.d.ts +0 -49
  1118. package/dist/lib/evaluation-hooks.d.ts.map +0 -1
  1119. package/dist/lib/evaluation-hooks.js +0 -488
  1120. package/dist/lib/evaluation-hooks.js.map +0 -1
  1121. package/dist/lib/evaluation-hooks.test.d.ts.map +0 -1
  1122. package/dist/lib/evaluation-hooks.test.js +0 -624
  1123. package/dist/lib/evaluation-hooks.test.js.map +0 -1
  1124. package/dist/lib/export-utils.d.ts +0 -99
  1125. package/dist/lib/export-utils.d.ts.map +0 -1
  1126. package/dist/lib/export-utils.js +0 -238
  1127. package/dist/lib/export-utils.js.map +0 -1
  1128. package/dist/lib/export-utils.test.d.ts.map +0 -1
  1129. package/dist/lib/export-utils.test.js +0 -193
  1130. package/dist/lib/export-utils.test.js.map +0 -1
  1131. package/dist/lib/file-utils.d.ts +0 -320
  1132. package/dist/lib/file-utils.d.ts.map +0 -1
  1133. package/dist/lib/file-utils.js +0 -816
  1134. package/dist/lib/file-utils.js.map +0 -1
  1135. package/dist/lib/file-utils.test.d.ts.map +0 -1
  1136. package/dist/lib/file-utils.test.js +0 -1333
  1137. package/dist/lib/file-utils.test.js.map +0 -1
  1138. package/dist/lib/histogram.d.ts +0 -119
  1139. package/dist/lib/histogram.d.ts.map +0 -1
  1140. package/dist/lib/histogram.js +0 -202
  1141. package/dist/lib/histogram.js.map +0 -1
  1142. package/dist/lib/histogram.test.d.ts.map +0 -1
  1143. package/dist/lib/histogram.test.js +0 -381
  1144. package/dist/lib/histogram.test.js.map +0 -1
  1145. package/dist/lib/indexer.d.ts +0 -96
  1146. package/dist/lib/indexer.d.ts.map +0 -1
  1147. package/dist/lib/indexer.js +0 -353
  1148. package/dist/lib/indexer.js.map +0 -1
  1149. package/dist/lib/indexer.test.d.ts.map +0 -1
  1150. package/dist/lib/indexer.test.js +0 -696
  1151. package/dist/lib/indexer.test.js.map +0 -1
  1152. package/dist/lib/input-validator.d.ts +0 -115
  1153. package/dist/lib/input-validator.d.ts.map +0 -1
  1154. package/dist/lib/input-validator.fuzz.test.d.ts.map +0 -1
  1155. package/dist/lib/input-validator.fuzz.test.js +0 -290
  1156. package/dist/lib/input-validator.fuzz.test.js.map +0 -1
  1157. package/dist/lib/input-validator.js +0 -304
  1158. package/dist/lib/input-validator.js.map +0 -1
  1159. package/dist/lib/input-validator.test.d.ts.map +0 -1
  1160. package/dist/lib/input-validator.test.js +0 -415
  1161. package/dist/lib/input-validator.test.js.map +0 -1
  1162. package/dist/lib/instrumentation.d.ts +0 -153
  1163. package/dist/lib/instrumentation.d.ts.map +0 -1
  1164. package/dist/lib/instrumentation.integration.test.d.ts.map +0 -1
  1165. package/dist/lib/instrumentation.integration.test.js +0 -589
  1166. package/dist/lib/instrumentation.integration.test.js.map +0 -1
  1167. package/dist/lib/instrumentation.js +0 -520
  1168. package/dist/lib/instrumentation.js.map +0 -1
  1169. package/dist/lib/instrumentation.test.d.ts.map +0 -1
  1170. package/dist/lib/instrumentation.test.js +0 -821
  1171. package/dist/lib/instrumentation.test.js.map +0 -1
  1172. package/dist/lib/langfuse-export.d.ts +0 -125
  1173. package/dist/lib/langfuse-export.d.ts.map +0 -1
  1174. package/dist/lib/langfuse-export.js +0 -367
  1175. package/dist/lib/langfuse-export.js.map +0 -1
  1176. package/dist/lib/langfuse-export.test.d.ts.map +0 -1
  1177. package/dist/lib/langfuse-export.test.js +0 -1007
  1178. package/dist/lib/langfuse-export.test.js.map +0 -1
  1179. package/dist/lib/llm-as-judge.d.ts +0 -657
  1180. package/dist/lib/llm-as-judge.d.ts.map +0 -1
  1181. package/dist/lib/llm-as-judge.js +0 -1397
  1182. package/dist/lib/llm-as-judge.js.map +0 -1
  1183. package/dist/lib/llm-as-judge.test.d.ts.map +0 -1
  1184. package/dist/lib/llm-as-judge.test.js +0 -2409
  1185. package/dist/lib/llm-as-judge.test.js.map +0 -1
  1186. package/dist/lib/logger.d.ts +0 -46
  1187. package/dist/lib/logger.d.ts.map +0 -1
  1188. package/dist/lib/logger.js +0 -81
  1189. package/dist/lib/logger.js.map +0 -1
  1190. package/dist/lib/logger.test.d.ts.map +0 -1
  1191. package/dist/lib/logger.test.js.map +0 -1
  1192. package/dist/lib/metrics.d.ts +0 -62
  1193. package/dist/lib/metrics.d.ts.map +0 -1
  1194. package/dist/lib/metrics.js +0 -166
  1195. package/dist/lib/metrics.js.map +0 -1
  1196. package/dist/lib/metrics.test.d.ts.map +0 -1
  1197. package/dist/lib/metrics.test.js +0 -189
  1198. package/dist/lib/metrics.test.js.map +0 -1
  1199. package/dist/lib/otlp-export.d.ts +0 -178
  1200. package/dist/lib/otlp-export.d.ts.map +0 -1
  1201. package/dist/lib/otlp-export.js +0 -382
  1202. package/dist/lib/otlp-export.js.map +0 -1
  1203. package/dist/lib/parse-stats.d.ts.map +0 -1
  1204. package/dist/lib/parse-stats.js +0 -206
  1205. package/dist/lib/parse-stats.js.map +0 -1
  1206. package/dist/lib/parse-stats.test.d.ts.map +0 -1
  1207. package/dist/lib/parse-stats.test.js +0 -283
  1208. package/dist/lib/parse-stats.test.js.map +0 -1
  1209. package/dist/lib/phoenix-export.d.ts +0 -109
  1210. package/dist/lib/phoenix-export.d.ts.map +0 -1
  1211. package/dist/lib/phoenix-export.js +0 -429
  1212. package/dist/lib/phoenix-export.js.map +0 -1
  1213. package/dist/lib/phoenix-export.test.d.ts.map +0 -1
  1214. package/dist/lib/phoenix-export.test.js +0 -725
  1215. package/dist/lib/phoenix-export.test.js.map +0 -1
  1216. package/dist/lib/query-sanitizer.d.ts.map +0 -1
  1217. package/dist/lib/query-sanitizer.js +0 -261
  1218. package/dist/lib/query-sanitizer.js.map +0 -1
  1219. package/dist/lib/query-sanitizer.test.d.ts.map +0 -1
  1220. package/dist/lib/query-sanitizer.test.js +0 -400
  1221. package/dist/lib/query-sanitizer.test.js.map +0 -1
  1222. package/dist/lib/server-utils.d.ts +0 -93
  1223. package/dist/lib/server-utils.d.ts.map +0 -1
  1224. package/dist/lib/server-utils.js +0 -181
  1225. package/dist/lib/server-utils.js.map +0 -1
  1226. package/dist/lib/shared-schemas.d.ts +0 -87
  1227. package/dist/lib/shared-schemas.d.ts.map +0 -1
  1228. package/dist/lib/shared-schemas.js +0 -87
  1229. package/dist/lib/shared-schemas.js.map +0 -1
  1230. package/dist/lib/shared-schemas.test.d.ts.map +0 -1
  1231. package/dist/lib/shared-schemas.test.js +0 -106
  1232. package/dist/lib/shared-schemas.test.js.map +0 -1
  1233. package/dist/lib/toon-encoder.d.ts +0 -26
  1234. package/dist/lib/toon-encoder.d.ts.map +0 -1
  1235. package/dist/lib/toon-encoder.js +0 -61
  1236. package/dist/lib/toon-encoder.js.map +0 -1
  1237. package/dist/lib/toon-encoder.test.d.ts.map +0 -1
  1238. package/dist/lib/toon-encoder.test.js +0 -85
  1239. package/dist/lib/toon-encoder.test.js.map +0 -1
  1240. package/dist/lib/verification-events.d.ts +0 -100
  1241. package/dist/lib/verification-events.d.ts.map +0 -1
  1242. package/dist/lib/verification-events.js +0 -162
  1243. package/dist/lib/verification-events.js.map +0 -1
  1244. package/dist/lib/verification-events.test.d.ts.map +0 -1
  1245. package/dist/lib/verification-events.test.js +0 -193
  1246. package/dist/lib/verification-events.test.js.map +0 -1
  1247. package/dist/tools/signoz.integration.test.d.ts +0 -8
  1248. package/dist/tools/signoz.integration.test.d.ts.map +0 -1
  1249. package/dist/tools/signoz.integration.test.js +0 -141
  1250. package/dist/tools/signoz.integration.test.js.map +0 -1
  1251. package/dist/types/evaluation-hooks.d.ts +0 -176
  1252. package/dist/types/evaluation-hooks.d.ts.map +0 -1
  1253. package/dist/types/evaluation-hooks.js +0 -49
  1254. package/dist/types/evaluation-hooks.js.map +0 -1
  1255. /package/dist/lib/{agent-as-judge.test.d.ts → agent-judge/agent-as-judge.test.d.ts} +0 -0
  1256. /package/dist/lib/{verification-events.test.d.ts → audit/verification-events.test.d.ts} +0 -0
  1257. /package/dist/lib/{constants-symlink.test.d.ts → core/constants-symlink.test.d.ts} +0 -0
  1258. /package/dist/lib/{constants.test.d.ts → core/constants.test.d.ts} +0 -0
  1259. /package/dist/lib/{edge-cases.test.d.ts → core/edge-cases.test.d.ts} +0 -0
  1260. /package/dist/lib/{file-utils.test.d.ts → core/file-utils.test.d.ts} +0 -0
  1261. /package/dist/lib/{input-validator.fuzz.test.d.ts → core/input-validator.fuzz.test.d.ts} +0 -0
  1262. /package/dist/lib/{input-validator.test.d.ts → core/input-validator.test.d.ts} +0 -0
  1263. /package/dist/lib/{logger.test.d.ts → core/logger.test.d.ts} +0 -0
  1264. /package/dist/lib/{logger.test.js → core/logger.test.js} +0 -0
  1265. /package/dist/lib/{shared-schemas.test.d.ts → core/shared-schemas.test.d.ts} +0 -0
  1266. /package/dist/lib/{error-sanitizer.test.d.ts → errors/error-sanitizer.test.d.ts} +0 -0
  1267. /package/dist/lib/{error-types.test.d.ts → errors/error-types.test.d.ts} +0 -0
  1268. /package/dist/lib/{query-sanitizer.d.ts → errors/query-sanitizer.d.ts} +0 -0
  1269. /package/dist/lib/{query-sanitizer.test.d.ts → errors/query-sanitizer.test.d.ts} +0 -0
  1270. /package/dist/lib/{confident-export.test.d.ts → exports/confident-export.test.d.ts} +0 -0
  1271. /package/dist/lib/{export-utils.test.d.ts → exports/export-utils.test.d.ts} +0 -0
  1272. /package/dist/lib/{langfuse-export.test.d.ts → exports/langfuse-export.test.d.ts} +0 -0
  1273. /package/dist/lib/{phoenix-export.test.d.ts → exports/phoenix-export.test.d.ts} +0 -0
  1274. /package/dist/lib/{evaluation-hooks.test.d.ts → judge/evaluation-hooks.test.d.ts} +0 -0
  1275. /package/dist/lib/{llm-as-judge.test.d.ts → judge/llm-as-judge.test.d.ts} +0 -0
  1276. /package/dist/lib/{histogram.test.d.ts → observability/histogram.test.d.ts} +0 -0
  1277. /package/dist/lib/{indexer.test.d.ts → observability/indexer.test.d.ts} +0 -0
  1278. /package/dist/lib/{instrumentation.integration.test.d.ts → observability/instrumentation.integration.test.d.ts} +0 -0
  1279. /package/dist/lib/{instrumentation.test.d.ts → observability/instrumentation.test.d.ts} +0 -0
  1280. /package/dist/lib/{metrics.test.d.ts → observability/metrics.test.d.ts} +0 -0
  1281. /package/dist/lib/{parse-stats.d.ts → observability/parse-stats.d.ts} +0 -0
  1282. /package/dist/lib/{parse-stats.test.d.ts → observability/parse-stats.test.d.ts} +0 -0
  1283. /package/dist/lib/{cache.test.d.ts → resilience/cache.test.d.ts} +0 -0
  1284. /package/dist/lib/{circuit-breaker.test.d.ts → resilience/circuit-breaker.test.d.ts} +0 -0
  1285. /package/dist/lib/{toon-encoder.test.d.ts → resilience/toon-encoder.test.d.ts} +0 -0
@@ -0,0 +1,2766 @@
1
+ /**
2
+ * Tests for Quality Metrics Dashboard Helpers
3
+ */
4
+ import { describe, it, afterEach } from 'vitest';
5
+ import * as assert from 'node:assert';
6
+ import { QUALITY_METRICS, computeAggregations, checkAlertThresholds, determineHealthStatus, computeQualityMetric, computeDashboardSummary, computeRoleView, computeTrend, computeConfidence, computeMetricDetail, toEvaluationDetail, applySeverityContext, evaluateHandoffs, computeTurnLevelResults, computeMultiAgentEvaluation, evaluateSLA, evaluateSLAs, registerQualityMetric, unregisterQualityMetric, getAllQualityMetrics, getQualityMetric, clearCustomMetrics, formatMetricValue, createMetricConfig, DEFAULT_CORRELATION_RULES, registerCorrelationRule, unregisterCorrelationRule, getCorrelationRules, clearCorrelationRules, evaluateCorrelationRules, computePipelineView, computeCoverageHeatmap, checkDivergenceAlert, hasQualityMetric, roundTo, sortAlertsBySeverity, SCORE_PRECISION, PERCENT_PRECISION, slaWindowFormatSchema, } from './quality-metrics.js';
7
+ import { TREND_MIN_SAMPLE_SIZE, DEFAULT_BIN_COUNT, DEFAULT_ENTROPY_THRESHOLD, } from './quality-constants.js';
8
+ import { TEST_DECIMAL_EPSILON, TEST_PERCENTILE_P99, TEST_SCORE_ADEQUATE, TEST_SCORE_BASELINE, TEST_SCORE_BORDERLINE, TEST_SCORE_EXCELLENT, TEST_SCORE_GOOD, TEST_SCORE_HIGH, TEST_SCORE_LOW, TEST_SCORE_MID, TEST_SCORE_PASSING, TEST_SCORE_POOR, TEST_SCORE_STRONG, TEST_SCORE_VERY_LOW, TEST_SCORE_WARNING, } from './quality-test-constants.js';
9
+ import { DEFAULT_LIMIT_10, SAMPLE_SIZE_100, COUNT_EIGHT, COUNT_FIFTEEN, COUNT_FIFTY, COUNT_FIVE, COUNT_FOUR, COUNT_HUNDRED, COUNT_NINE, COUNT_NINETY, COUNT_SIXTY, COUNT_TEN, COUNT_THREE, COUNT_THIRTY, COUNT_THOUSAND, COUNT_TWO, COUNT_TWO_HUNDRED, COUNT_TWENTY, COUNT_TWENTY_FIVE, DELTA, VALUE_FORTY_TWO, } from '../../test-helpers/test-constants.js';
10
+ import { requireDefined } from '../../test-helpers/assertions.js';
11
+ const TEST_OUT_OF_RANGE_SCORE_HIGH = 1.5;
12
+ const TEST_HALLUCINATION_NONCOMPLIANT_SCORE = TEST_SCORE_WARNING + DELTA;
13
+ const TEST_RELEVANCE_MARGIN_SCORE = 0.88;
14
+ const TEST_STEP_TWO_HUNDREDTHS = TEST_DECIMAL_EPSILON * COUNT_TWO;
15
+ const TEST_STEP_THREE_HUNDREDTHS = TEST_DECIMAL_EPSILON * COUNT_THREE;
16
+ const TEST_P95_LOWER_BOUND = TEST_SCORE_EXCELLENT - TEST_DECIMAL_EPSILON;
17
+ const TEST_P95_UPPER_BOUND = TEST_SCORE_EXCELLENT + TEST_DECIMAL_EPSILON;
18
+ const TEST_P99_LOWER_BOUND = TEST_SCORE_EXCELLENT + TEST_STEP_THREE_HUNDREDTHS;
19
+ const TEST_SINGLE_VALUE_SCORE = TEST_SCORE_PASSING + DELTA;
20
+ const TEST_AVG_EXPECTED_LOWER_BOUND = TEST_SCORE_STRONG - TEST_DECIMAL_EPSILON;
21
+ const TEST_AVG_EXPECTED_UPPER_BOUND = TEST_SCORE_STRONG + TEST_DECIMAL_EPSILON;
22
+ const TEST_TOTAL_EVALUATION_COUNT_MIN = SAMPLE_SIZE_100 + COUNT_TWENTY_FIVE;
23
+ const TEST_FORMATTED_SCORE_INPUT = 0.8567;
24
+ const TEST_FORMATTED_SECONDS_INPUT = 3.456;
25
+ const TEST_TINY_SCORE_1 = 0.00001;
26
+ const TEST_TINY_SCORE_2 = 0.00002;
27
+ const TEST_TINY_SCORE_3 = 0.00003;
28
+ const TEST_INTERPOLATED_P95_EXPECTED = 0.87;
29
+ const makeBaseResult = (name, overrides = {}) => ({
30
+ name,
31
+ displayName: name,
32
+ values: {
33
+ avg: overrides.avg ?? null,
34
+ min: null, max: null,
35
+ count: DEFAULT_LIMIT_10,
36
+ p50: overrides.p50 ?? null,
37
+ p95: null, p99: null,
38
+ },
39
+ sampleCount: DEFAULT_LIMIT_10,
40
+ alerts: [],
41
+ status: (overrides.avg ?? overrides.p50) === null ? 'no_data' : 'healthy',
42
+ });
43
+ describe('quality-metrics', () => {
44
+ afterEach(() => {
45
+ clearCustomMetrics();
46
+ clearCorrelationRules();
47
+ });
48
+ // ==========================================================================
49
+ // QUALITY_METRICS
50
+ // ==========================================================================
51
+ describe('QUALITY_METRICS', () => {
52
+ it('includes relevance metric', () => {
53
+ assert.ok(QUALITY_METRICS.relevance);
54
+ assert.strictEqual(QUALITY_METRICS.relevance.name, 'relevance');
55
+ assert.ok(QUALITY_METRICS.relevance.alerts.length > 0);
56
+ });
57
+ it('includes task_completion metric', () => {
58
+ assert.ok(QUALITY_METRICS.task_completion);
59
+ assert.strictEqual(QUALITY_METRICS.task_completion.name, 'task_completion');
60
+ });
61
+ it('includes tool_correctness metric', () => {
62
+ assert.ok(QUALITY_METRICS.tool_correctness);
63
+ assert.strictEqual(QUALITY_METRICS.tool_correctness.unit, 'rate');
64
+ });
65
+ it('includes hallucination metric', () => {
66
+ assert.ok(QUALITY_METRICS.hallucination);
67
+ const alert = QUALITY_METRICS.hallucination.alerts[0];
68
+ assert.strictEqual(alert.direction, 'above');
69
+ });
70
+ it('includes evaluation_latency metric', () => {
71
+ assert.ok(QUALITY_METRICS.evaluation_latency);
72
+ assert.strictEqual(QUALITY_METRICS.evaluation_latency.unit, 'seconds');
73
+ });
74
+ it('all metrics have remediation hints', () => {
75
+ for (const [name, config] of Object.entries(QUALITY_METRICS)) {
76
+ assert.ok(config.remediationHints, `${name} missing remediationHints`);
77
+ assert.ok(requireDefined(config.remediationHints).length > 0, `${name} has empty remediationHints`);
78
+ }
79
+ });
80
+ it('all metrics have required fields', () => {
81
+ for (const [name, config] of Object.entries(QUALITY_METRICS)) {
82
+ assert.ok(config.name, `${name} missing name`);
83
+ assert.ok(config.displayName, `${name} missing displayName`);
84
+ assert.ok(config.description, `${name} missing description`);
85
+ assert.ok(config.aggregations.length > 0, `${name} missing aggregations`);
86
+ assert.ok(config.range, `${name} missing range`);
87
+ assert.ok(config.unit, `${name} missing unit`);
88
+ }
89
+ });
90
+ });
91
+ // ==========================================================================
92
+ // computeAggregations
93
+ // ==========================================================================
94
+ describe('computeAggregations', () => {
95
+ it('computes avg correctly', () => {
96
+ const result = computeAggregations([TEST_SCORE_GOOD, TEST_SCORE_HIGH, TEST_SCORE_PASSING], ['avg']);
97
+ assert.strictEqual(result.avg, TEST_SCORE_GOOD);
98
+ });
99
+ it('computes min and max', () => {
100
+ const result = computeAggregations([TEST_SCORE_MID, TEST_SCORE_GOOD, TEST_SCORE_LOW, TEST_SCORE_HIGH], ['min', 'max']);
101
+ assert.strictEqual(result.min, TEST_SCORE_LOW);
102
+ assert.strictEqual(result.max, TEST_SCORE_HIGH);
103
+ });
104
+ it('computes count', () => {
105
+ const result = computeAggregations([TEST_SCORE_MID, TEST_SCORE_BASELINE, TEST_SCORE_PASSING, TEST_SCORE_GOOD], ['count']);
106
+ assert.strictEqual(result.count, COUNT_FOUR);
107
+ });
108
+ it('computes p50 (median)', () => {
109
+ const result = computeAggregations([TEST_SCORE_WARNING, TEST_SCORE_VERY_LOW, TEST_SCORE_LOW, TEST_SCORE_POOR, TEST_SCORE_MID], ['p50']);
110
+ assert.strictEqual(result.p50, TEST_SCORE_LOW);
111
+ });
112
+ it('computes p50 with even count', () => {
113
+ const result = computeAggregations([TEST_SCORE_VERY_LOW, TEST_SCORE_POOR, TEST_SCORE_BASELINE, TEST_SCORE_GOOD], ['p50']);
114
+ // R-7 interpolation: rank = TEST_SCORE_MID * 3 = 1.5
115
+ // Interpolate between index 1 (TEST_SCORE_POOR) and index 2 (TEST_SCORE_BASELINE)
116
+ assert.strictEqual(result.p50, TEST_SCORE_MID);
117
+ });
118
+ it('computes p95', () => {
119
+ const scores = Array.from({ length: COUNT_HUNDRED }, (_, i) => i / COUNT_HUNDRED);
120
+ const result = computeAggregations(scores, ['p95']);
121
+ assert.ok(requireDefined(result.p95) >= TEST_P95_LOWER_BOUND && requireDefined(result.p95) <= TEST_P95_UPPER_BOUND);
122
+ });
123
+ it('computes p99', () => {
124
+ const scores = Array.from({ length: COUNT_HUNDRED }, (_, i) => i / COUNT_HUNDRED);
125
+ const result = computeAggregations(scores, ['p99']);
126
+ assert.ok(requireDefined(result.p99) >= TEST_P99_LOWER_BOUND);
127
+ });
128
+ it('handles empty array', () => {
129
+ const result = computeAggregations([], ['avg', 'p50', 'count']);
130
+ assert.strictEqual(result.avg, null);
131
+ assert.strictEqual(result.p50, null);
132
+ assert.strictEqual(result.count, null);
133
+ });
134
+ it('handles single value', () => {
135
+ const result = computeAggregations([TEST_SINGLE_VALUE_SCORE], ['avg', 'min', 'max', 'p50']);
136
+ assert.strictEqual(result.avg, TEST_SINGLE_VALUE_SCORE);
137
+ assert.strictEqual(result.min, TEST_SINGLE_VALUE_SCORE);
138
+ assert.strictEqual(result.max, TEST_SINGLE_VALUE_SCORE);
139
+ assert.strictEqual(result.p50, TEST_SINGLE_VALUE_SCORE);
140
+ });
141
+ it('computes multiple aggregations', () => {
142
+ const result = computeAggregations([TEST_SCORE_MID, TEST_SCORE_BASELINE, TEST_SCORE_PASSING, TEST_SCORE_GOOD, TEST_SCORE_HIGH], ['avg', 'min', 'max', 'p50', 'count']);
143
+ assert.strictEqual(result.avg, TEST_SCORE_PASSING);
144
+ assert.strictEqual(result.min, TEST_SCORE_MID);
145
+ assert.strictEqual(result.max, TEST_SCORE_HIGH);
146
+ assert.strictEqual(result.p50, TEST_SCORE_PASSING);
147
+ assert.strictEqual(result.count, COUNT_FIVE);
148
+ });
149
+ it('filters out NaN and Infinity scores', () => {
150
+ const result = computeAggregations([TEST_SCORE_MID, NaN, TEST_SCORE_PASSING, Infinity, -Infinity, TEST_SCORE_HIGH], ['avg', 'count', 'min', 'max']);
151
+ assert.strictEqual(result.count, COUNT_THREE); // Only TEST_SCORE_MID, TEST_SCORE_PASSING, TEST_SCORE_HIGH are valid
152
+ assert.strictEqual(result.min, TEST_SCORE_MID);
153
+ assert.strictEqual(result.max, TEST_SCORE_HIGH);
154
+ assert.strictEqual(result.avg, TEST_SCORE_PASSING);
155
+ });
156
+ it('returns null aggregations when all scores are NaN/Infinity', () => {
157
+ const result = computeAggregations([NaN, Infinity, -Infinity], ['avg', 'count']);
158
+ assert.strictEqual(result.avg, null);
159
+ assert.strictEqual(result.count, null);
160
+ });
161
+ });
162
+ // ==========================================================================
163
+ // checkAlertThresholds
164
+ // ==========================================================================
165
+ describe('checkAlertThresholds', () => {
166
+ const thresholds = [
167
+ { aggregation: 'p50', value: TEST_SCORE_PASSING, direction: 'below', severity: 'warning', message: 'Low p50: {value}' },
168
+ { aggregation: 'avg', value: TEST_SCORE_WARNING, direction: 'above', severity: 'critical', message: 'High avg: {value}' },
169
+ ];
170
+ it('triggers below threshold alert', () => {
171
+ // avg=DELTA is below the TEST_SCORE_WARNING 'above' threshold, so only p50 alert should trigger
172
+ const values = { avg: DELTA, min: null, max: null, count: COUNT_TEN, p50: TEST_SCORE_ADEQUATE, p95: null, p99: null };
173
+ const alerts = checkAlertThresholds(values, thresholds);
174
+ assert.strictEqual(alerts.length, 1);
175
+ assert.strictEqual(alerts[0].severity, 'warning');
176
+ assert.strictEqual(alerts[0].direction, 'below');
177
+ assert.strictEqual(alerts[0].actualValue, TEST_SCORE_ADEQUATE);
178
+ });
179
+ it('triggers above threshold alert', () => {
180
+ const values = { avg: 0.15, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_GOOD, p95: null, p99: null };
181
+ const alerts = checkAlertThresholds(values, thresholds);
182
+ assert.strictEqual(alerts.length, 1);
183
+ assert.strictEqual(alerts[0].severity, 'critical');
184
+ assert.strictEqual(alerts[0].direction, 'above');
185
+ });
186
+ it('triggers multiple alerts', () => {
187
+ const values = { avg: 0.15, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_MID, p95: null, p99: null };
188
+ const alerts = checkAlertThresholds(values, thresholds);
189
+ assert.strictEqual(alerts.length, COUNT_TWO);
190
+ // Critical should come first (sorted by severity)
191
+ assert.strictEqual(alerts[0].severity, 'critical');
192
+ assert.strictEqual(alerts[1].severity, 'warning');
193
+ });
194
+ it('returns empty array when no alerts triggered', () => {
195
+ const values = { avg: DELTA, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_GOOD, p95: null, p99: null };
196
+ const alerts = checkAlertThresholds(values, thresholds);
197
+ assert.strictEqual(alerts.length, 0);
198
+ });
199
+ it('handles null values', () => {
200
+ const values = { avg: null, min: null, max: null, count: null, p50: null, p95: null, p99: null };
201
+ const alerts = checkAlertThresholds(values, thresholds);
202
+ assert.strictEqual(alerts.length, 0);
203
+ });
204
+ it('formats message with value', () => {
205
+ // Only trigger the p50 below threshold
206
+ const values = { avg: DELTA, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_ADEQUATE, p95: null, p99: null };
207
+ const alerts = checkAlertThresholds(values, thresholds);
208
+ // The message should contain the formatted value
209
+ assert.ok(alerts[0].message.includes('0.65'));
210
+ });
211
+ it('populates affectedCount from options.sampleCount', () => {
212
+ const values = { avg: DELTA, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_ADEQUATE, p95: null, p99: null };
213
+ const alerts = checkAlertThresholds(values, thresholds, { sampleCount: VALUE_FORTY_TWO });
214
+ assert.strictEqual(alerts[0].affectedCount, VALUE_FORTY_TWO);
215
+ });
216
+ it('falls back to values.count when sampleCount not provided', () => {
217
+ const values = { avg: DELTA, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_ADEQUATE, p95: null, p99: null };
218
+ const alerts = checkAlertThresholds(values, thresholds);
219
+ assert.strictEqual(alerts[0].affectedCount, COUNT_TEN);
220
+ });
221
+ it('appends sample count to alert message', () => {
222
+ const values = { avg: DELTA, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_ADEQUATE, p95: null, p99: null };
223
+ const alerts = checkAlertThresholds(values, thresholds, { sampleCount: COUNT_FIFTEEN });
224
+ assert.ok(alerts[0].message.includes(`(n=${COUNT_FIFTEEN} evaluations)`));
225
+ });
226
+ it('includes remediationHints when provided', () => {
227
+ const values = { avg: DELTA, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_ADEQUATE, p95: null, p99: null };
228
+ const hints = ['Fix your prompts', 'Add more context'];
229
+ const alerts = checkAlertThresholds(values, thresholds, { remediationHints: hints });
230
+ assert.deepStrictEqual(alerts[0].remediationHints, hints);
231
+ });
232
+ it('omits remediationHints when not provided', () => {
233
+ const values = { avg: DELTA, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_ADEQUATE, p95: null, p99: null };
234
+ const alerts = checkAlertThresholds(values, thresholds);
235
+ assert.strictEqual(alerts[0].remediationHints, undefined);
236
+ });
237
+ });
238
+ // ==========================================================================
239
+ // checkDivergenceAlert
240
+ // ==========================================================================
241
+ describe('checkDivergenceAlert', () => {
242
+ const divergenceThreshold = {
243
+ type: 'divergence',
244
+ entropyThreshold: TEST_SCORE_BASELINE,
245
+ bins: DEFAULT_LIMIT_10,
246
+ severity: 'warning',
247
+ message: 'Score distribution is bimodal (entropy {entropy})',
248
+ };
249
+ it('returns null for uniform scores (high entropy)', () => {
250
+ // Uniform distribution across [0, 1] — entropy near 1.0
251
+ const scores = Array.from({ length: SAMPLE_SIZE_100 }, (_, i) => i / TEST_PERCENTILE_P99);
252
+ const alert = checkDivergenceAlert(scores, divergenceThreshold);
253
+ assert.strictEqual(alert, null);
254
+ });
255
+ it('triggers for bimodal scores (low entropy)', () => {
256
+ // Bimodal: half at 0, half at 1
257
+ const scores = [...Array(COUNT_FIFTY).fill(0), ...Array(COUNT_FIFTY).fill(1)];
258
+ const alert = checkDivergenceAlert(scores, divergenceThreshold);
259
+ assert.ok(alert, 'Expected divergence alert for bimodal data');
260
+ assert.strictEqual(alert.severity, 'warning');
261
+ assert.strictEqual(alert.direction, 'below');
262
+ assert.strictEqual(alert.aggregation, 'count');
263
+ assert.ok(alert.divergenceDetails);
264
+ assert.strictEqual(requireDefined(alert.divergenceDetails).bins, COUNT_TEN);
265
+ assert.ok(requireDefined(alert.divergenceDetails).entropy < TEST_SCORE_BASELINE);
266
+ assert.strictEqual(requireDefined(alert.divergenceDetails).histogram.length, COUNT_TEN);
267
+ assert.strictEqual(alert.affectedCount, COUNT_HUNDRED);
268
+ });
269
+ it('returns null for insufficient data (< 10 scores)', () => {
270
+ const scores = [0, 0, 0, 1, 1, 1]; // only 6 scores
271
+ const alert = checkDivergenceAlert(scores, divergenceThreshold);
272
+ assert.strictEqual(alert, null);
273
+ });
274
+ it('replaces {entropy} placeholder in message', () => {
275
+ const scores = [...Array(COUNT_FIFTY).fill(0), ...Array(COUNT_FIFTY).fill(1)];
276
+ const alert = checkDivergenceAlert(scores, divergenceThreshold);
277
+ assert.ok(alert);
278
+ assert.ok(!alert.message.includes('{entropy}'));
279
+ assert.ok(alert.message.includes('entropy'));
280
+ });
281
+ it('returns null for exactly 10 uniform scores', () => {
282
+ const scores = Array.from({ length: COUNT_TEN }, (_, i) => i / COUNT_NINE);
283
+ const alert = checkDivergenceAlert(scores, divergenceThreshold);
284
+ assert.strictEqual(alert, null);
285
+ });
286
+ it('returns null for empty scores', () => {
287
+ const alert = checkDivergenceAlert([], divergenceThreshold);
288
+ assert.strictEqual(alert, null);
289
+ });
290
+ it('triggers for all-identical scores (entropy = 0)', () => {
291
+ const scores = Array(COUNT_TWENTY).fill(TEST_SCORE_MID);
292
+ const alert = checkDivergenceAlert(scores, divergenceThreshold);
293
+ assert.ok(alert, 'All identical scores should have entropy 0 and trigger alert');
294
+ assert.strictEqual(requireDefined(requireDefined(alert).divergenceDetails).entropy, 0);
295
+ });
296
+ it('filters out NaN and out-of-range scores', () => {
297
+ // 8 valid scores (< 10 min) + junk — should return null
298
+ const scores = [
299
+ TEST_SCORE_WARNING,
300
+ TEST_SCORE_VERY_LOW,
301
+ TEST_SCORE_LOW,
302
+ TEST_SCORE_POOR,
303
+ TEST_SCORE_MID,
304
+ TEST_SCORE_BASELINE,
305
+ TEST_SCORE_PASSING,
306
+ TEST_SCORE_GOOD,
307
+ NaN,
308
+ -1,
309
+ TEST_OUT_OF_RANGE_SCORE_HIGH,
310
+ Infinity,
311
+ ];
312
+ const alert = checkDivergenceAlert(scores, divergenceThreshold);
313
+ assert.strictEqual(alert, null); // only 8 valid scores, below MIN_DIVERGENCE_SAMPLES
314
+ });
315
+ it('affectedCount reflects valid scores only', () => {
316
+ // 50 valid + 5 invalid
317
+ const scores = [
318
+ ...Array(COUNT_TWENTY_FIVE).fill(0),
319
+ ...Array(COUNT_TWENTY_FIVE).fill(1),
320
+ NaN,
321
+ -TEST_SCORE_WARNING,
322
+ TEST_OUT_OF_RANGE_SCORE_HIGH,
323
+ Infinity,
324
+ -Infinity,
325
+ ];
326
+ const alert = checkDivergenceAlert(scores, divergenceThreshold);
327
+ assert.ok(alert);
328
+ assert.strictEqual(requireDefined(alert).affectedCount, COUNT_FIFTY); // not 55
329
+ });
330
+ });
331
+ // ==========================================================================
332
+ // checkAlertThresholds with divergence
333
+ // ==========================================================================
334
+ describe('checkAlertThresholds with divergence alerts', () => {
335
+ it('handles mixed value and divergence thresholds', () => {
336
+ const values = { avg: TEST_SCORE_MID, min: null, max: null, count: SAMPLE_SIZE_100, p50: TEST_SCORE_MID, p95: null, p99: null };
337
+ const mixed = [
338
+ { aggregation: 'p50', value: TEST_SCORE_GOOD, direction: 'below', severity: 'warning', message: 'p50 low ({value})' },
339
+ { type: 'divergence', entropyThreshold: TEST_SCORE_BASELINE, bins: DEFAULT_LIMIT_10, severity: 'warning', message: 'Bimodal (entropy {entropy})' },
340
+ ];
341
+ const bimodalScores = [...Array(COUNT_FIFTY).fill(0), ...Array(COUNT_FIFTY).fill(1)];
342
+ const alerts = checkAlertThresholds(values, mixed, { scores: bimodalScores });
343
+ // Both should fire: value alert (p50=0.5 < TEST_SCORE_GOOD) and divergence alert
344
+ assert.strictEqual(alerts.length, COUNT_TWO);
345
+ });
346
+ it('skips divergence alerts when scores not provided', () => {
347
+ const values = { avg: TEST_SCORE_HIGH, min: null, max: null, count: SAMPLE_SIZE_100, p50: TEST_SCORE_HIGH, p95: null, p99: null };
348
+ const thresholds = [
349
+ { type: 'divergence', entropyThreshold: TEST_SCORE_BASELINE, bins: DEFAULT_LIMIT_10, severity: 'warning', message: 'Bimodal' },
350
+ ];
351
+ const alerts = checkAlertThresholds(values, thresholds);
352
+ assert.strictEqual(alerts.length, 0);
353
+ });
354
+ it('backward compatible with old-style AlertThreshold (no type field)', () => {
355
+ const values = { avg: TEST_SCORE_MID, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_MID, p95: null, p99: null };
356
+ const oldStyle = [
357
+ { aggregation: 'p50', value: TEST_SCORE_GOOD, direction: 'below', severity: 'warning', message: 'Low ({value})' },
358
+ ];
359
+ const alerts = checkAlertThresholds(values, oldStyle);
360
+ assert.strictEqual(alerts.length, 1);
361
+ assert.strictEqual(alerts[0].direction, 'below');
362
+ });
363
+ });
364
+ // ==========================================================================
365
+ // QUALITY_METRICS divergence alerts (faithfulness, relevance, coherence, task_completion)
366
+ // ==========================================================================
367
+ const DIVERGENCE_METRICS = ['faithfulness', 'relevance', 'coherence', 'task_completion'];
368
+ for (const metricName of DIVERGENCE_METRICS) {
369
+ describe(`QUALITY_METRICS.${metricName} divergence alert`, () => {
370
+ it('includes a divergence alert threshold', () => {
371
+ const metric = QUALITY_METRICS[metricName];
372
+ const divergence = metric.alerts.find(a => 'type' in a && a.type === 'divergence');
373
+ assert.ok(divergence, `Expected divergence alert in ${metricName} metric`);
374
+ assert.strictEqual(divergence.entropyThreshold, TEST_SCORE_MID);
375
+ assert.strictEqual(divergence.bins, DEFAULT_BIN_COUNT);
376
+ assert.strictEqual(divergence.severity, 'warning');
377
+ });
378
+ it('fires divergence alert via computeQualityMetric with bimodal data', () => {
379
+ const bimodalEvals = [
380
+ ...Array.from({ length: COUNT_FIFTY }, () => ({
381
+ evaluationName: metricName, scoreValue: 0.0, timestamp: '2026-01-01T00:00:00Z',
382
+ })),
383
+ ...Array.from({ length: COUNT_FIFTY }, () => ({
384
+ evaluationName: metricName, scoreValue: 1.0, timestamp: '2026-01-01T00:00:00Z',
385
+ })),
386
+ ];
387
+ const result = computeQualityMetric(bimodalEvals, QUALITY_METRICS[metricName]);
388
+ const divergenceAlert = result.alerts.find(a => a.divergenceDetails != null);
389
+ assert.ok(divergenceAlert, `Expected divergence alert for bimodal ${metricName} data`);
390
+ assert.ok(requireDefined(divergenceAlert.divergenceDetails).entropy < TEST_SCORE_MID);
391
+ });
392
+ it(`does not fire divergence alert for uniform data`, () => {
393
+ const uniformEvals = Array.from({ length: SAMPLE_SIZE_100 }, (_, i) => ({
394
+ evaluationName: metricName, scoreValue: i / TEST_PERCENTILE_P99, timestamp: '2026-01-01T00:00:00Z',
395
+ }));
396
+ const result = computeQualityMetric(uniformEvals, QUALITY_METRICS[metricName]);
397
+ const divergenceAlert = result.alerts.find(a => a.divergenceDetails != null);
398
+ assert.strictEqual(divergenceAlert, undefined);
399
+ });
400
+ });
401
+ }
402
+ // ==========================================================================
403
+ // MetricConfigBuilder.divergenceAlert
404
+ // ==========================================================================
405
+ describe('MetricConfigBuilder.divergenceAlert', () => {
406
+ it('adds divergence alert via builder', () => {
407
+ const config = createMetricConfig('test_div')
408
+ .displayName('Test Divergence')
409
+ .description('Testing divergence builder')
410
+ .divergenceAlert(TEST_SCORE_MID, 'warning')
411
+ .build();
412
+ const divAlert = config.alerts.find(a => 'type' in a && a.type === 'divergence');
413
+ assert.ok(divAlert);
414
+ assert.strictEqual(divAlert.entropyThreshold, TEST_SCORE_MID);
415
+ assert.strictEqual(divAlert.bins, DEFAULT_BIN_COUNT);
416
+ assert.strictEqual(divAlert.severity, 'warning');
417
+ });
418
+ it('supports custom bins and message', () => {
419
+ const config = createMetricConfig('test_div2')
420
+ .displayName('Test Divergence 2')
421
+ .description('Testing custom bins')
422
+ .divergenceAlert(TEST_SCORE_PASSING, 'critical', 'Custom msg (entropy {entropy})', COUNT_TWENTY)
423
+ .build();
424
+ const divAlert = config.alerts.find(a => 'type' in a && a.type === 'divergence');
425
+ assert.ok(divAlert);
426
+ assert.strictEqual(divAlert.bins, COUNT_TWENTY);
427
+ assert.ok(divAlert.message.includes('Custom msg'));
428
+ });
429
+ });
430
+ // ==========================================================================
431
+ // Constants exported
432
+ // ==========================================================================
433
+ describe('divergence constants', () => {
434
+ it('exports DEFAULT_BIN_COUNT', () => {
435
+ assert.strictEqual(DEFAULT_BIN_COUNT, COUNT_TEN);
436
+ });
437
+ it('exports DEFAULT_ENTROPY_THRESHOLD', () => {
438
+ assert.strictEqual(DEFAULT_ENTROPY_THRESHOLD, TEST_SCORE_BASELINE);
439
+ });
440
+ });
441
+ // ==========================================================================
442
+ // determineHealthStatus
443
+ // ==========================================================================
444
+ describe('determineHealthStatus', () => {
445
+ it('returns no_data when no data', () => {
446
+ assert.strictEqual(determineHealthStatus([], false), 'no_data');
447
+ });
448
+ it('returns healthy when no alerts', () => {
449
+ assert.strictEqual(determineHealthStatus([], true), 'healthy');
450
+ });
451
+ it('returns warning when warning alerts', () => {
452
+ const alerts = [
453
+ { severity: 'warning', message: 'test', aggregation: 'avg', threshold: TEST_SCORE_PASSING, actualValue: TEST_SCORE_BASELINE, direction: 'below' }
454
+ ];
455
+ assert.strictEqual(determineHealthStatus(alerts, true), 'warning');
456
+ });
457
+ it('returns critical when critical alerts', () => {
458
+ const alerts = [
459
+ { severity: 'warning', message: 'test', aggregation: 'avg', threshold: TEST_SCORE_PASSING, actualValue: TEST_SCORE_BASELINE, direction: 'below' },
460
+ { severity: 'critical', message: 'test', aggregation: 'p50', threshold: TEST_SCORE_MID, actualValue: TEST_SCORE_POOR, direction: 'below' },
461
+ ];
462
+ assert.strictEqual(determineHealthStatus(alerts, true), 'critical');
463
+ });
464
+ });
465
+ // ==========================================================================
466
+ // computeQualityMetric
467
+ // ==========================================================================
468
+ describe('computeQualityMetric', () => {
469
+ const config = QUALITY_METRICS.relevance;
470
+ it('computes metric from evaluations', () => {
471
+ const evaluations = [
472
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_GOOD },
473
+ { timestamp: '2026-01-01T00:01:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_HIGH },
474
+ { timestamp: '2026-01-01T00:02:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_STRONG },
475
+ ];
476
+ const result = computeQualityMetric(evaluations, config);
477
+ assert.strictEqual(result.name, 'relevance');
478
+ assert.strictEqual(result.sampleCount, COUNT_THREE);
479
+ assert.ok(requireDefined(result.values.avg) >= TEST_AVG_EXPECTED_LOWER_BOUND && requireDefined(result.values.avg) <= TEST_AVG_EXPECTED_UPPER_BOUND);
480
+ assert.strictEqual(result.status, 'healthy');
481
+ });
482
+ it('handles empty evaluations', () => {
483
+ const result = computeQualityMetric([], config);
484
+ assert.strictEqual(result.sampleCount, 0);
485
+ assert.strictEqual(result.status, 'no_data');
486
+ });
487
+ it('triggers alerts for low scores', () => {
488
+ const evaluations = [
489
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_POOR },
490
+ { timestamp: '2026-01-01T00:01:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_MID },
491
+ { timestamp: '2026-01-01T00:02:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_BORDERLINE },
492
+ ];
493
+ const result = computeQualityMetric(evaluations, config);
494
+ assert.ok(result.alerts.length > 0);
495
+ assert.strictEqual(result.status, 'critical');
496
+ });
497
+ it('filters evaluations without scoreValue', () => {
498
+ const evaluations = [
499
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_GOOD },
500
+ { timestamp: '2026-01-01T00:01:00Z', evaluationName: 'relevance' }, // No scoreValue
501
+ { timestamp: '2026-01-01T00:02:00Z', evaluationName: 'relevance', scoreValue: undefined },
502
+ ];
503
+ const result = computeQualityMetric(evaluations, config);
504
+ assert.strictEqual(result.sampleCount, 1);
505
+ });
506
+ it('includes period if provided', () => {
507
+ const period = { start: '2026-01-01T00:00:00Z', end: '2026-01-02T00:00:00Z' };
508
+ const result = computeQualityMetric([], config, period);
509
+ assert.deepStrictEqual(result.period, period);
510
+ });
511
+ it('populates worstExplanation with lowest-scoring evaluation', () => {
512
+ const evaluations = [
513
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_GOOD, explanation: 'Good answer' },
514
+ { timestamp: '2026-01-01T00:01:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_LOW, explanation: 'Off-topic', traceId: 'trace-123' },
515
+ { timestamp: '2026-01-01T00:02:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_HIGH },
516
+ ];
517
+ const result = computeQualityMetric(evaluations, config);
518
+ assert.ok(result.worstExplanation);
519
+ assert.strictEqual(result.worstExplanation.score, TEST_SCORE_LOW);
520
+ assert.strictEqual(result.worstExplanation.explanation, 'Off-topic');
521
+ assert.strictEqual(result.worstExplanation.traceId, 'trace-123');
522
+ assert.strictEqual(result.worstExplanation.timestamp, '2026-01-01T00:01:00Z');
523
+ });
524
+ it('returns undefined worstExplanation for empty evaluations', () => {
525
+ const result = computeQualityMetric([], config);
526
+ assert.strictEqual(result.worstExplanation, undefined);
527
+ });
528
+ it('populates affectedCount and remediationHints on triggered alerts', () => {
529
+ const evaluations = [
530
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_LOW },
531
+ { timestamp: '2026-01-01T00:01:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_POOR },
532
+ ];
533
+ const result = computeQualityMetric(evaluations, config);
534
+ assert.ok(result.alerts.length > 0);
535
+ assert.strictEqual(result.alerts[0].affectedCount, COUNT_TWO);
536
+ assert.ok(Array.isArray(result.alerts[0].remediationHints));
537
+ assert.ok(requireDefined(result.alerts[0].remediationHints).length > 0);
538
+ });
539
+ it('alert messages include sample count', () => {
540
+ const evaluations = [
541
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_LOW },
542
+ ];
543
+ const result = computeQualityMetric(evaluations, config);
544
+ assert.ok(result.alerts.length > 0);
545
+ assert.ok(result.alerts[0].message.includes('(n=1 evaluations)'));
546
+ });
547
+ it('handles worstExplanation with missing optional fields', () => {
548
+ const evaluations = [
549
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_MID },
550
+ ];
551
+ const result = computeQualityMetric(evaluations, config);
552
+ assert.ok(result.worstExplanation);
553
+ assert.strictEqual(result.worstExplanation.score, TEST_SCORE_MID);
554
+ assert.strictEqual(result.worstExplanation.explanation, undefined);
555
+ assert.strictEqual(result.worstExplanation.traceId, undefined);
556
+ });
557
+ });
558
+ // ==========================================================================
559
+ // computeDashboardSummary
560
+ // ==========================================================================
561
+ describe('computeDashboardSummary', () => {
562
+ it('computes summary for multiple metrics', () => {
563
+ const evaluationsByMetric = new Map([
564
+ ['relevance', [
565
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_STRONG },
566
+ ]],
567
+ ['task_completion', [
568
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'task_completion', scoreValue: TEST_SCORE_HIGH },
569
+ ]],
570
+ ]);
571
+ const summary = computeDashboardSummary(evaluationsByMetric);
572
+ assert.strictEqual(summary.overallStatus, 'healthy');
573
+ assert.ok(summary.metrics.length > 0);
574
+ assert.ok(summary.timestamp);
575
+ });
576
+ it('detects worst status as overall', () => {
577
+ const evaluationsByMetric = new Map([
578
+ ['relevance', [
579
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_LOW },
580
+ ]],
581
+ ]);
582
+ const summary = computeDashboardSummary(evaluationsByMetric);
583
+ // Low relevance should trigger critical
584
+ assert.strictEqual(summary.overallStatus, 'critical');
585
+ });
586
+ it('collects all alerts with metric names', () => {
587
+ const evaluationsByMetric = new Map([
588
+ ['relevance', [
589
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_LOW },
590
+ ]],
591
+ ]);
592
+ const summary = computeDashboardSummary(evaluationsByMetric);
593
+ const relevanceAlerts = summary.alerts.filter(a => a.metricName === 'relevance');
594
+ assert.ok(relevanceAlerts.length > 0);
595
+ });
596
+ it('counts metrics by status', () => {
597
+ const evaluationsByMetric = new Map();
598
+ const summary = computeDashboardSummary(evaluationsByMetric);
599
+ assert.strictEqual(summary.summary.totalMetrics, Object.keys(QUALITY_METRICS).length);
600
+ assert.strictEqual(summary.summary.noDataMetrics, Object.keys(QUALITY_METRICS).length);
601
+ });
602
+ it('populates relatedMetrics on alerts when multiple metrics are alerting', () => {
603
+ const evaluationsByMetric = new Map([
604
+ ['relevance', [
605
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_LOW },
606
+ ]],
607
+ ['faithfulness', [
608
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'faithfulness', scoreValue: TEST_SCORE_POOR },
609
+ ]],
610
+ ]);
611
+ const summary = computeDashboardSummary(evaluationsByMetric);
612
+ const relevanceAlerts = summary.alerts.filter(a => a.metricName === 'relevance');
613
+ assert.ok(relevanceAlerts.length > 0);
614
+ assert.ok(requireDefined(relevanceAlerts[0].relatedMetrics).includes('faithfulness'));
615
+ assert.ok(!requireDefined(relevanceAlerts[0].relatedMetrics).includes('relevance'));
616
+ });
617
+ it('sets relatedMetrics to empty array when only one metric is alerting', () => {
618
+ const evaluationsByMetric = new Map([
619
+ ['relevance', [
620
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_LOW },
621
+ ]],
622
+ ['faithfulness', [
623
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'faithfulness', scoreValue: TEST_SCORE_EXCELLENT },
624
+ ]],
625
+ ]);
626
+ const summary = computeDashboardSummary(evaluationsByMetric);
627
+ const relevanceAlerts = summary.alerts.filter(a => a.metricName === 'relevance');
628
+ assert.ok(relevanceAlerts.length > 0);
629
+ assert.deepStrictEqual(relevanceAlerts[0].relatedMetrics, []);
630
+ });
631
+ it('handles relatedMetrics when one metric triggers multiple alerts', () => {
632
+ // relevance has two thresholds (warning at TEST_SCORE_PASSING, critical at 0.5)
633
+ // score of 0.3 triggers both
634
+ const evaluationsByMetric = new Map([
635
+ ['relevance', [
636
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_LOW },
637
+ ]],
638
+ ['faithfulness', [
639
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'faithfulness', scoreValue: TEST_SCORE_POOR },
640
+ ]],
641
+ ]);
642
+ const summary = computeDashboardSummary(evaluationsByMetric);
643
+ const relevanceAlerts = summary.alerts.filter(a => a.metricName === 'relevance');
644
+ assert.ok(relevanceAlerts.length >= COUNT_TWO, `Expected >=2 relevance alerts, got ${relevanceAlerts.length}`);
645
+ for (const alert of relevanceAlerts) {
646
+ assert.ok(requireDefined(alert.relatedMetrics).includes('faithfulness'));
647
+ assert.ok(!requireDefined(alert.relatedMetrics).includes('relevance'));
648
+ }
649
+ });
650
+ it('includes custom metrics', () => {
651
+ const customMetrics = {
652
+ custom_metric: {
653
+ name: 'custom_metric',
654
+ displayName: 'Custom Metric',
655
+ description: 'A custom metric',
656
+ aggregations: ['avg'],
657
+ alerts: [],
658
+ range: { min: 0, max: 1 },
659
+ unit: 'score',
660
+ },
661
+ };
662
+ const summary = computeDashboardSummary(new Map(), customMetrics);
663
+ const hasCustom = summary.metrics.some(m => m.name === 'custom_metric');
664
+ assert.ok(hasCustom);
665
+ });
666
+ });
667
+ // ==========================================================================
668
+ // Cross-Metric Correlation
669
+ // ==========================================================================
670
+ describe('DEFAULT_CORRELATION_RULES', () => {
671
+ it('defines 3 default rules', () => {
672
+ assert.strictEqual(DEFAULT_CORRELATION_RULES.length, COUNT_THREE);
673
+ });
674
+ it('each rule has at least 2 conditions', () => {
675
+ for (const rule of DEFAULT_CORRELATION_RULES) {
676
+ assert.ok(rule.conditions.length >= COUNT_TWO, `${rule.name} has fewer than 2 conditions`);
677
+ }
678
+ });
679
+ it('content_quality_crisis targets relevance and hallucination', () => {
680
+ const rule = DEFAULT_CORRELATION_RULES.find(r => r.name === 'content_quality_crisis');
681
+ assert.ok(rule);
682
+ assert.strictEqual(rule.severity, 'critical');
683
+ const metricNames = rule.conditions.map(c => c.metric);
684
+ assert.ok(metricNames.includes('relevance'));
685
+ assert.ok(metricNames.includes('hallucination'));
686
+ });
687
+ });
688
+ describe('evaluateCorrelationRules', () => {
689
+ it('fires compound alert when all conditions match', () => {
690
+ const results = [
691
+ { name: 'relevance', displayName: 'Relevance', values: { avg: TEST_SCORE_MID, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_BASELINE, p95: null, p99: null }, sampleCount: DEFAULT_LIMIT_10, alerts: [], status: 'warning' },
692
+ { name: 'hallucination', displayName: 'Hallucination', values: { avg: 0.15, min: null, max: null, count: DEFAULT_LIMIT_10, p50: null, p95: null, p99: null }, sampleCount: DEFAULT_LIMIT_10, alerts: [], status: 'warning' },
693
+ ];
694
+ const alerts = evaluateCorrelationRules(results, DEFAULT_CORRELATION_RULES);
695
+ const crisis = alerts.find(a => a.metricName === 'content_quality_crisis');
696
+ assert.ok(crisis, 'Expected content_quality_crisis compound alert');
697
+ assert.strictEqual(crisis.isCompound, true);
698
+ assert.strictEqual(crisis.severity, 'critical');
699
+ assert.ok(requireDefined(crisis.relatedMetrics).includes('relevance'));
700
+ assert.ok(requireDefined(crisis.relatedMetrics).includes('hallucination'));
701
+ });
702
+ it('does not fire when only some conditions match', () => {
703
+ const results = [
704
+ { name: 'relevance', displayName: 'Relevance', values: { avg: TEST_SCORE_HIGH, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_HIGH, p95: null, p99: null }, sampleCount: DEFAULT_LIMIT_10, alerts: [], status: 'healthy' },
705
+ { name: 'hallucination', displayName: 'Hallucination', values: { avg: 0.15, min: null, max: null, count: DEFAULT_LIMIT_10, p50: null, p95: null, p99: null }, sampleCount: DEFAULT_LIMIT_10, alerts: [], status: 'warning' },
706
+ ];
707
+ const alerts = evaluateCorrelationRules(results, DEFAULT_CORRELATION_RULES);
708
+ const crisis = alerts.find(a => a.metricName === 'content_quality_crisis');
709
+ assert.strictEqual(crisis, undefined);
710
+ });
711
+ it('does not fire when metric data is missing', () => {
712
+ const results = [
713
+ { name: 'relevance', displayName: 'Relevance', values: { avg: null, min: null, max: null, count: null, p50: null, p95: null, p99: null }, sampleCount: 0, alerts: [], status: 'no_data' },
714
+ ];
715
+ const alerts = evaluateCorrelationRules(results, DEFAULT_CORRELATION_RULES);
716
+ assert.strictEqual(alerts.length, 0);
717
+ });
718
+ it('does not fire when metric value is NaN', () => {
719
+ const results = [
720
+ { name: 'relevance', displayName: 'Relevance', values: { avg: NaN, min: null, max: null, count: DEFAULT_LIMIT_10, p50: null, p95: null, p99: null }, sampleCount: DEFAULT_LIMIT_10, alerts: [], status: 'warning' },
721
+ { name: 'hallucination', displayName: 'Hallucination', values: { avg: 0.15, min: null, max: null, count: DEFAULT_LIMIT_10, p50: null, p95: null, p99: null }, sampleCount: DEFAULT_LIMIT_10, alerts: [], status: 'warning' },
722
+ ];
723
+ const alerts = evaluateCorrelationRules(results, DEFAULT_CORRELATION_RULES);
724
+ const crisis = alerts.find(a => a.metricName === 'content_quality_crisis');
725
+ assert.strictEqual(crisis, undefined);
726
+ });
727
+ it('does not fire at exact threshold boundary (strict inequality)', () => {
728
+ // Correlation uses strict < and > to match checkAlertThresholds behavior
729
+ const results = [
730
+ { name: 'relevance', displayName: 'Relevance', values: { avg: TEST_SCORE_PASSING, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_PASSING, p95: null, p99: null }, sampleCount: DEFAULT_LIMIT_10, alerts: [], status: 'healthy' },
731
+ { name: 'hallucination', displayName: 'Hallucination', values: { avg: TEST_SCORE_WARNING, min: null, max: null, count: DEFAULT_LIMIT_10, p50: null, p95: null, p99: null }, sampleCount: DEFAULT_LIMIT_10, alerts: [], status: 'healthy' },
732
+ ];
733
+ const alerts = evaluateCorrelationRules(results, DEFAULT_CORRELATION_RULES);
734
+ // p50=TEST_SCORE_PASSING is NOT < TEST_SCORE_PASSING, and avg=TEST_SCORE_WARNING is NOT > TEST_SCORE_WARNING
735
+ const crisis = alerts.find(a => a.metricName === 'content_quality_crisis');
736
+ assert.strictEqual(crisis, undefined);
737
+ });
738
+ it('returns empty array when results are empty', () => {
739
+ const alerts = evaluateCorrelationRules([], DEFAULT_CORRELATION_RULES);
740
+ assert.strictEqual(alerts.length, 0);
741
+ });
742
+ it('compound alert uses first condition metadata', () => {
743
+ const results = [
744
+ { name: 'relevance', displayName: 'Relevance', values: { avg: TEST_SCORE_MID, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_BASELINE, p95: null, p99: null }, sampleCount: DEFAULT_LIMIT_10, alerts: [], status: 'warning' },
745
+ { name: 'hallucination', displayName: 'Hallucination', values: { avg: 0.15, min: null, max: null, count: DEFAULT_LIMIT_10, p50: null, p95: null, p99: null }, sampleCount: DEFAULT_LIMIT_10, alerts: [], status: 'warning' },
746
+ ];
747
+ const alerts = evaluateCorrelationRules(results, DEFAULT_CORRELATION_RULES);
748
+ const crisis = requireDefined(alerts.find(a => a.metricName === 'content_quality_crisis'), 'Expected content_quality_crisis alert');
749
+ // First condition is relevance.p50 < TEST_SCORE_PASSING
750
+ assert.strictEqual(crisis.aggregation, 'p50');
751
+ assert.strictEqual(crisis.threshold, TEST_SCORE_PASSING);
752
+ assert.strictEqual(crisis.direction, 'below');
753
+ });
754
+ it('fires multiple compound alerts when multiple rules match', () => {
755
+ const results = [
756
+ { name: 'relevance', displayName: 'Relevance', values: { avg: TEST_SCORE_MID, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_BASELINE, p95: null, p99: null }, sampleCount: DEFAULT_LIMIT_10, alerts: [], status: 'warning' },
757
+ { name: 'hallucination', displayName: 'Hallucination', values: { avg: 0.15, min: null, max: null, count: DEFAULT_LIMIT_10, p50: null, p95: null, p99: null }, sampleCount: DEFAULT_LIMIT_10, alerts: [], status: 'warning' },
758
+ { name: 'faithfulness', displayName: 'Faithfulness', values: { avg: TEST_SCORE_MID, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_PASSING, p95: null, p99: null }, sampleCount: DEFAULT_LIMIT_10, alerts: [], status: 'warning' },
759
+ { name: 'coherence', displayName: 'Coherence', values: { avg: TEST_SCORE_MID, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_PASSING, p95: null, p99: null }, sampleCount: DEFAULT_LIMIT_10, alerts: [], status: 'warning' },
760
+ ];
761
+ const alerts = evaluateCorrelationRules(results, DEFAULT_CORRELATION_RULES);
762
+ assert.ok(alerts.length >= COUNT_TWO, `Expected >=2 compound alerts, got ${alerts.length}`);
763
+ });
764
+ it('evaluates custom rule with >2 conditions', () => {
765
+ const rule = {
766
+ name: 'triple_threat',
767
+ displayName: 'Triple Threat',
768
+ conditions: [
769
+ { metric: 'relevance', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
770
+ { metric: 'coherence', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
771
+ { metric: 'faithfulness', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
772
+ ],
773
+ severity: 'critical',
774
+ explanation: 'Three metrics failing simultaneously',
775
+ };
776
+ const results = [
777
+ { name: 'relevance', displayName: 'Relevance', values: { avg: TEST_SCORE_LOW, min: null, max: null, count: COUNT_FIVE, p50: null, p95: null, p99: null }, sampleCount: 5, alerts: [], status: 'warning' },
778
+ { name: 'coherence', displayName: 'Coherence', values: { avg: TEST_SCORE_POOR, min: null, max: null, count: COUNT_FIVE, p50: null, p95: null, p99: null }, sampleCount: 5, alerts: [], status: 'warning' },
779
+ { name: 'faithfulness', displayName: 'Faithfulness', values: { avg: TEST_SCORE_VERY_LOW, min: null, max: null, count: COUNT_FIVE, p50: null, p95: null, p99: null }, sampleCount: 5, alerts: [], status: 'warning' },
780
+ ];
781
+ const alerts = evaluateCorrelationRules(results, [rule]);
782
+ assert.strictEqual(alerts.length, 1);
783
+ assert.strictEqual(alerts[0].metricName, 'triple_threat');
784
+ });
785
+ it('does not fire when condition references null aggregation value', () => {
786
+ const rule = {
787
+ name: 'null_test',
788
+ displayName: 'Null Test',
789
+ conditions: [
790
+ { metric: 'relevance', aggregation: 'p95', value: TEST_SCORE_MID, direction: 'below' },
791
+ { metric: 'coherence', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
792
+ ],
793
+ severity: 'warning',
794
+ explanation: 'Should not fire when p95 is null',
795
+ };
796
+ const results = [
797
+ { name: 'relevance', displayName: 'Relevance', values: { avg: TEST_SCORE_LOW, min: null, max: null, count: COUNT_FIVE, p50: null, p95: null, p99: null }, sampleCount: 5, alerts: [], status: 'warning' },
798
+ { name: 'coherence', displayName: 'Coherence', values: { avg: TEST_SCORE_POOR, min: null, max: null, count: COUNT_FIVE, p50: null, p95: null, p99: null }, sampleCount: 5, alerts: [], status: 'warning' },
799
+ ];
800
+ const alerts = evaluateCorrelationRules(results, [rule]);
801
+ assert.strictEqual(alerts.length, 0);
802
+ });
803
+ it('preserves severity ordering when multiple rules fire', () => {
804
+ const warningRule = {
805
+ name: 'warn_rule',
806
+ displayName: 'Warning Rule',
807
+ conditions: [
808
+ { metric: 'relevance', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
809
+ { metric: 'coherence', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
810
+ ],
811
+ severity: 'warning',
812
+ explanation: 'Warning level',
813
+ };
814
+ const criticalRule = {
815
+ name: 'crit_rule',
816
+ displayName: 'Critical Rule',
817
+ conditions: [
818
+ { metric: 'relevance', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
819
+ { metric: 'faithfulness', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
820
+ ],
821
+ severity: 'critical',
822
+ explanation: 'Critical level',
823
+ };
824
+ const results = [
825
+ { name: 'relevance', displayName: 'Relevance', values: { avg: TEST_SCORE_LOW, min: null, max: null, count: COUNT_FIVE, p50: null, p95: null, p99: null }, sampleCount: 5, alerts: [], status: 'warning' },
826
+ { name: 'coherence', displayName: 'Coherence', values: { avg: TEST_SCORE_POOR, min: null, max: null, count: COUNT_FIVE, p50: null, p95: null, p99: null }, sampleCount: 5, alerts: [], status: 'warning' },
827
+ { name: 'faithfulness', displayName: 'Faithfulness', values: { avg: TEST_SCORE_VERY_LOW, min: null, max: null, count: COUNT_FIVE, p50: null, p95: null, p99: null }, sampleCount: 5, alerts: [], status: 'warning' },
828
+ ];
829
+ const alerts = evaluateCorrelationRules(results, [warningRule, criticalRule]);
830
+ assert.strictEqual(alerts.length, COUNT_TWO);
831
+ const severities = alerts.map(a => a.severity);
832
+ assert.ok(severities.includes('warning'));
833
+ assert.ok(severities.includes('critical'));
834
+ });
835
+ });
836
+ // ==========================================================================
837
+ // Temporal MetricCondition (window + sustainedPeriods)
838
+ // ==========================================================================
839
+ describe('evaluateCorrelationRules temporal conditions', () => {
840
+ const makeResult = (name, avg) => makeBaseResult(name, { avg });
841
+ it('window condition checks value from matching previous-period result', () => {
842
+ const rule = {
843
+ name: 'window_test',
844
+ displayName: 'Window Test',
845
+ conditions: [
846
+ { metric: 'relevance', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below', window: '7d rolling' },
847
+ { metric: 'coherence', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
848
+ ],
849
+ severity: 'warning',
850
+ explanation: 'Test window condition',
851
+ };
852
+ // Current: relevance=TEST_SCORE_HIGH (above threshold), coherence=0.3 (below threshold)
853
+ const current = [makeResult('relevance', TEST_SCORE_HIGH), makeResult('coherence', TEST_SCORE_LOW)];
854
+ // Previous 7d: relevance=0.3 (below threshold)
855
+ const previous = [
856
+ { window: '7d rolling', results: [makeResult('relevance', TEST_SCORE_LOW), makeResult('coherence', TEST_SCORE_LOW)] },
857
+ ];
858
+ const alerts = evaluateCorrelationRules(current, [rule], previous);
859
+ assert.strictEqual(alerts.length, 1, 'Expected window condition to match against previous-period value');
860
+ assert.strictEqual(alerts[0].metricName, 'window_test');
861
+ });
862
+ it('window condition falls back to current results when window not found', () => {
863
+ const rule = {
864
+ name: 'fallback_test',
865
+ displayName: 'Fallback Test',
866
+ conditions: [
867
+ { metric: 'relevance', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below', window: '30d rolling' },
868
+ { metric: 'coherence', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
869
+ ],
870
+ severity: 'warning',
871
+ explanation: 'Test fallback to current',
872
+ };
873
+ // Current: relevance=0.3 (below), coherence=0.3 (below)
874
+ const current = [makeResult('relevance', TEST_SCORE_LOW), makeResult('coherence', TEST_SCORE_LOW)];
875
+ // Only 7d window available — 30d not found
876
+ const previous = [
877
+ { window: '7d rolling', results: [makeResult('relevance', TEST_SCORE_HIGH)] },
878
+ ];
879
+ const alerts = evaluateCorrelationRules(current, [rule], previous);
880
+ // Falls back to current relevance=0.3 which is < 0.5
881
+ assert.strictEqual(alerts.length, 1, 'Expected fallback to current results');
882
+ });
883
+ it('window condition does not fire when previous value is above threshold', () => {
884
+ const rule = {
885
+ name: 'no_fire_test',
886
+ displayName: 'No Fire Test',
887
+ conditions: [
888
+ { metric: 'relevance', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below', window: '7d rolling' },
889
+ { metric: 'coherence', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
890
+ ],
891
+ severity: 'warning',
892
+ explanation: 'Should not fire',
893
+ };
894
+ const current = [makeResult('relevance', TEST_SCORE_LOW), makeResult('coherence', TEST_SCORE_LOW)];
895
+ const previous = [
896
+ { window: '7d rolling', results: [makeResult('relevance', TEST_SCORE_HIGH)] },
897
+ ];
898
+ const alerts = evaluateCorrelationRules(current, [rule], previous);
899
+ // Window relevance=TEST_SCORE_HIGH is NOT < 0.5
900
+ assert.strictEqual(alerts.length, 0);
901
+ });
902
+ it('sustainedPeriods requires condition to hold in N previous periods', () => {
903
+ const rule = {
904
+ name: 'sustained_test',
905
+ displayName: 'Sustained Test',
906
+ conditions: [
907
+ { metric: 'relevance', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below', sustainedPeriods: 2 },
908
+ { metric: 'coherence', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
909
+ ],
910
+ severity: 'critical',
911
+ explanation: 'Sustained violation',
912
+ };
913
+ const current = [makeResult('relevance', TEST_SCORE_LOW), makeResult('coherence', TEST_SCORE_LOW)];
914
+ const previous = [
915
+ { window: '7d rolling', results: [makeResult('relevance', TEST_SCORE_POOR)] },
916
+ { window: '14d rolling', results: [makeResult('relevance', TEST_SCORE_LOW)] },
917
+ ];
918
+ const alerts = evaluateCorrelationRules(current, [rule], previous);
919
+ assert.strictEqual(alerts.length, 1, 'Expected sustained condition to fire with 2 matching periods');
920
+ });
921
+ it('sustainedPeriods does not fire when insufficient previous periods', () => {
922
+ const rule = {
923
+ name: 'insufficient_periods',
924
+ displayName: 'Insufficient',
925
+ conditions: [
926
+ { metric: 'relevance', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below', sustainedPeriods: 3 },
927
+ { metric: 'coherence', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
928
+ ],
929
+ severity: 'warning',
930
+ explanation: 'Need 3 periods',
931
+ };
932
+ const current = [makeResult('relevance', TEST_SCORE_LOW), makeResult('coherence', TEST_SCORE_LOW)];
933
+ // Only 2 previous periods, need 3
934
+ const previous = [
935
+ { window: '7d rolling', results: [makeResult('relevance', TEST_SCORE_POOR)] },
936
+ { window: '14d rolling', results: [makeResult('relevance', TEST_SCORE_LOW)] },
937
+ ];
938
+ const alerts = evaluateCorrelationRules(current, [rule], previous);
939
+ assert.strictEqual(alerts.length, 0);
940
+ });
941
+ it('sustainedPeriods does not fire when a period breaks the streak', () => {
942
+ const rule = {
943
+ name: 'broken_streak',
944
+ displayName: 'Broken Streak',
945
+ conditions: [
946
+ { metric: 'relevance', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below', sustainedPeriods: 2 },
947
+ { metric: 'coherence', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
948
+ ],
949
+ severity: 'warning',
950
+ explanation: 'Streak broken',
951
+ };
952
+ const current = [makeResult('relevance', TEST_SCORE_LOW), makeResult('coherence', TEST_SCORE_LOW)];
953
+ // First previous period: relevance=TEST_SCORE_GOOD (above threshold — breaks streak)
954
+ const previous = [
955
+ { window: '7d rolling', results: [makeResult('relevance', TEST_SCORE_GOOD)] },
956
+ { window: '14d rolling', results: [makeResult('relevance', TEST_SCORE_LOW)] },
957
+ ];
958
+ const alerts = evaluateCorrelationRules(current, [rule], previous);
959
+ assert.strictEqual(alerts.length, 0, 'Streak broken by first period');
960
+ });
961
+ it('sustainedPeriods does not fire when current period does not match', () => {
962
+ const rule = {
963
+ name: 'current_fails',
964
+ displayName: 'Current Fails',
965
+ conditions: [
966
+ { metric: 'relevance', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below', sustainedPeriods: 1 },
967
+ { metric: 'coherence', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
968
+ ],
969
+ severity: 'warning',
970
+ explanation: 'Current must match too',
971
+ };
972
+ // Current relevance=TEST_SCORE_HIGH (above threshold)
973
+ const current = [makeResult('relevance', TEST_SCORE_HIGH), makeResult('coherence', TEST_SCORE_LOW)];
974
+ const previous = [
975
+ { window: '7d rolling', results: [makeResult('relevance', TEST_SCORE_LOW)] },
976
+ ];
977
+ const alerts = evaluateCorrelationRules(current, [rule], previous);
978
+ assert.strictEqual(alerts.length, 0);
979
+ });
980
+ it('sustainedPeriods does not fire without previousResults', () => {
981
+ const rule = {
982
+ name: 'no_history',
983
+ displayName: 'No History',
984
+ conditions: [
985
+ { metric: 'relevance', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below', sustainedPeriods: 1 },
986
+ { metric: 'coherence', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
987
+ ],
988
+ severity: 'warning',
989
+ explanation: 'No previous data',
990
+ };
991
+ const current = [makeResult('relevance', TEST_SCORE_LOW), makeResult('coherence', TEST_SCORE_LOW)];
992
+ const alerts = evaluateCorrelationRules(current, [rule]);
993
+ assert.strictEqual(alerts.length, 0);
994
+ });
995
+ it('mixed temporal and non-temporal conditions in same rule', () => {
996
+ const rule = {
997
+ name: 'mixed_rule',
998
+ displayName: 'Mixed Rule',
999
+ conditions: [
1000
+ { metric: 'relevance', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below', window: '7d rolling' },
1001
+ { metric: 'coherence', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
1002
+ ],
1003
+ severity: 'warning',
1004
+ explanation: 'Mixed temporal and point-in-time',
1005
+ };
1006
+ // Current: relevance=TEST_SCORE_HIGH (healthy), coherence=0.3 (below)
1007
+ // Previous 7d: relevance=0.3 (below)
1008
+ const current = [makeResult('relevance', TEST_SCORE_HIGH), makeResult('coherence', TEST_SCORE_LOW)];
1009
+ const previous = [
1010
+ { window: '7d rolling', results: [makeResult('relevance', TEST_SCORE_LOW)] },
1011
+ ];
1012
+ const alerts = evaluateCorrelationRules(current, [rule], previous);
1013
+ assert.strictEqual(alerts.length, 1, 'Window condition uses previous, non-temporal uses current');
1014
+ });
1015
+ it('backward compatible: no previousResults means all conditions use current results', () => {
1016
+ // Existing default rules should work exactly as before
1017
+ const current = [
1018
+ makeResult('relevance', TEST_SCORE_MID),
1019
+ makeResult('hallucination', TEST_HALLUCINATION_NONCOMPLIANT_SCORE),
1020
+ ];
1021
+ // Override p50 for relevance to trigger content_quality_crisis
1022
+ current[0].values.p50 = TEST_SCORE_BASELINE;
1023
+ const alerts = evaluateCorrelationRules(current, DEFAULT_CORRELATION_RULES);
1024
+ const crisis = alerts.find(a => a.metricName === 'content_quality_crisis');
1025
+ assert.ok(crisis, 'Default rules still work without previousResults');
1026
+ });
1027
+ it('registers temporal condition via Zod schema validation', () => {
1028
+ const rule = {
1029
+ name: 'temporal_validated',
1030
+ displayName: 'Temporal Validated',
1031
+ conditions: [
1032
+ { metric: 'relevance', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below', window: '30d rolling' },
1033
+ { metric: 'coherence', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below', sustainedPeriods: 3 },
1034
+ ],
1035
+ severity: 'warning',
1036
+ explanation: 'Validates temporal fields pass schema',
1037
+ };
1038
+ registerCorrelationRule(rule);
1039
+ const all = getCorrelationRules();
1040
+ assert.ok(all.some(r => r.name === 'temporal_validated'));
1041
+ });
1042
+ it('rejects invalid window value via Zod schema', () => {
1043
+ assert.throws(() => registerCorrelationRule({
1044
+ name: 'bad_window',
1045
+ displayName: 'Bad Window',
1046
+ conditions: [
1047
+ // Intentionally bypass static typing to verify runtime schema validation rejects invalid window strings.
1048
+ { metric: 'a', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below', window: 'invalid' },
1049
+ { metric: 'b', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
1050
+ ],
1051
+ severity: 'warning',
1052
+ explanation: 'Should fail validation',
1053
+ }));
1054
+ });
1055
+ it('sustainedPeriods does not fire with empty previousResults array', () => {
1056
+ const rule = {
1057
+ name: 'empty_array',
1058
+ displayName: 'Empty Array',
1059
+ conditions: [
1060
+ { metric: 'relevance', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below', sustainedPeriods: 1 },
1061
+ { metric: 'coherence', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
1062
+ ],
1063
+ severity: 'warning',
1064
+ explanation: 'Empty previous results array',
1065
+ };
1066
+ const current = [makeResult('relevance', TEST_SCORE_LOW), makeResult('coherence', TEST_SCORE_LOW)];
1067
+ const alerts = evaluateCorrelationRules(current, [rule], []);
1068
+ assert.strictEqual(alerts.length, 0, 'Empty array has 0 entries < sustainedPeriods=1');
1069
+ });
1070
+ it('sustainedPeriods takes precedence over window when both set', () => {
1071
+ const rule = {
1072
+ name: 'both_set',
1073
+ displayName: 'Both Set',
1074
+ conditions: [
1075
+ { metric: 'relevance', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below', window: '7d rolling', sustainedPeriods: 1 },
1076
+ { metric: 'coherence', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
1077
+ ],
1078
+ severity: 'warning',
1079
+ explanation: 'sustainedPeriods should take precedence',
1080
+ };
1081
+ // Current: relevance=0.3 (matches), coherence=0.3 (matches)
1082
+ // Previous 7d: relevance=0.3 (matches for sustained)
1083
+ const current = [makeResult('relevance', TEST_SCORE_LOW), makeResult('coherence', TEST_SCORE_LOW)];
1084
+ const previous = [
1085
+ { window: '7d rolling', results: [makeResult('relevance', TEST_SCORE_LOW)] },
1086
+ ];
1087
+ const alerts = evaluateCorrelationRules(current, [rule], previous);
1088
+ // sustainedPeriods branch fires (current + 1 previous match)
1089
+ assert.strictEqual(alerts.length, 1);
1090
+ });
1091
+ it('alert message annotates temporal condition sources', () => {
1092
+ const rule = {
1093
+ name: 'annotated_msg',
1094
+ displayName: 'Annotated',
1095
+ conditions: [
1096
+ { metric: 'relevance', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below', window: '7d rolling' },
1097
+ { metric: 'coherence', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
1098
+ ],
1099
+ severity: 'warning',
1100
+ explanation: 'Check message annotation',
1101
+ };
1102
+ const current = [makeResult('relevance', TEST_SCORE_HIGH), makeResult('coherence', TEST_SCORE_LOW)];
1103
+ const previous = [
1104
+ { window: '7d rolling', results: [makeResult('relevance', TEST_SCORE_LOW)] },
1105
+ ];
1106
+ const alerts = evaluateCorrelationRules(current, [rule], previous);
1107
+ assert.strictEqual(alerts.length, 1);
1108
+ assert.ok(alerts[0].message.includes('[7d rolling]'), 'Expected window annotation in message');
1109
+ });
1110
+ it('rejects sustainedPeriods of 0 via Zod schema', () => {
1111
+ assert.throws(() => registerCorrelationRule({
1112
+ name: 'bad_sustained',
1113
+ displayName: 'Bad Sustained',
1114
+ conditions: [
1115
+ { metric: 'a', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below', sustainedPeriods: 0 },
1116
+ { metric: 'b', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
1117
+ ],
1118
+ severity: 'warning',
1119
+ explanation: 'Should fail validation',
1120
+ }));
1121
+ });
1122
+ });
1123
+ describe('registerCorrelationRule', () => {
1124
+ it('registers a custom correlation rule', () => {
1125
+ const rule = {
1126
+ name: 'custom_rule',
1127
+ displayName: 'Custom Rule',
1128
+ conditions: [
1129
+ { metric: 'relevance', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
1130
+ { metric: 'coherence', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
1131
+ ],
1132
+ severity: 'warning',
1133
+ explanation: 'Test rule',
1134
+ };
1135
+ registerCorrelationRule(rule);
1136
+ const all = getCorrelationRules();
1137
+ assert.ok(all.some(r => r.name === 'custom_rule'));
1138
+ });
1139
+ it('throws on duplicate rule name', () => {
1140
+ assert.throws(() => registerCorrelationRule({
1141
+ name: 'content_quality_crisis',
1142
+ displayName: 'Dup',
1143
+ conditions: [
1144
+ { metric: 'a', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
1145
+ { metric: 'b', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
1146
+ ],
1147
+ severity: 'warning',
1148
+ explanation: 'Dup',
1149
+ }));
1150
+ });
1151
+ it('rejects rule with fewer than 2 conditions', () => {
1152
+ assert.throws(() => registerCorrelationRule({
1153
+ name: 'bad_rule',
1154
+ displayName: 'Bad',
1155
+ conditions: [
1156
+ { metric: 'a', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
1157
+ ],
1158
+ severity: 'warning',
1159
+ explanation: 'Test',
1160
+ }));
1161
+ });
1162
+ it('rejects rule with empty explanation', () => {
1163
+ assert.throws(() => registerCorrelationRule({
1164
+ name: 'bad_rule',
1165
+ displayName: 'Bad',
1166
+ conditions: [
1167
+ { metric: 'a', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
1168
+ { metric: 'b', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
1169
+ ],
1170
+ severity: 'warning',
1171
+ explanation: '',
1172
+ }));
1173
+ });
1174
+ it('rejects condition with NaN value', () => {
1175
+ assert.throws(() => registerCorrelationRule({
1176
+ name: 'nan_value',
1177
+ displayName: 'NaN Value',
1178
+ conditions: [
1179
+ { metric: 'a', aggregation: 'avg', value: NaN, direction: 'below' },
1180
+ { metric: 'b', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
1181
+ ],
1182
+ severity: 'warning',
1183
+ explanation: 'Should fail validation',
1184
+ }));
1185
+ });
1186
+ it('rejects condition with Infinity value', () => {
1187
+ assert.throws(() => registerCorrelationRule({
1188
+ name: 'inf_value',
1189
+ displayName: 'Infinity Value',
1190
+ conditions: [
1191
+ { metric: 'a', aggregation: 'avg', value: Infinity, direction: 'below' },
1192
+ { metric: 'b', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
1193
+ ],
1194
+ severity: 'warning',
1195
+ explanation: 'Should fail validation',
1196
+ }));
1197
+ });
1198
+ it('rejects sustainedPeriods exceeding max allowed', () => {
1199
+ const MAX_SUSTAINED_PERIODS = 12;
1200
+ assert.throws(() => registerCorrelationRule({
1201
+ name: 'over_sustained',
1202
+ displayName: 'Over Sustained',
1203
+ conditions: [
1204
+ { metric: 'a', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below', sustainedPeriods: MAX_SUSTAINED_PERIODS + 1 },
1205
+ { metric: 'b', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
1206
+ ],
1207
+ severity: 'warning',
1208
+ explanation: 'Should fail validation',
1209
+ }));
1210
+ });
1211
+ it('unregisters a custom rule', () => {
1212
+ registerCorrelationRule({
1213
+ name: 'to_remove',
1214
+ displayName: 'Remove',
1215
+ conditions: [
1216
+ { metric: 'a', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
1217
+ { metric: 'b', aggregation: 'avg', value: TEST_SCORE_MID, direction: 'below' },
1218
+ ],
1219
+ severity: 'warning',
1220
+ explanation: 'Test',
1221
+ });
1222
+ assert.strictEqual(unregisterCorrelationRule('to_remove'), true);
1223
+ assert.strictEqual(unregisterCorrelationRule('to_remove'), false);
1224
+ });
1225
+ });
1226
+ describe('computeDashboardSummary with correlation', () => {
1227
+ it('includes compound alerts in dashboard alerts', () => {
1228
+ const evaluationsByMetric = new Map([
1229
+ ['relevance', [
1230
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_LOW },
1231
+ ]],
1232
+ ['hallucination', [
1233
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'hallucination', scoreValue: 0.25 },
1234
+ ]],
1235
+ ]);
1236
+ const summary = computeDashboardSummary(evaluationsByMetric);
1237
+ const compoundAlerts = summary.alerts.filter(a => a.isCompound);
1238
+ assert.ok(compoundAlerts.length > 0, 'Expected compound alerts');
1239
+ assert.strictEqual(compoundAlerts[0].severity, 'critical');
1240
+ });
1241
+ it('compound alerts can upgrade overall status', () => {
1242
+ // Create a scenario where individual metrics are only 'warning' but compound rule fires as 'critical'
1243
+ // relevance p50 < TEST_SCORE_PASSING triggers warning, hallucination avg > TEST_SCORE_WARNING triggers warning
1244
+ // Together they should fire content_quality_crisis as critical
1245
+ const evaluationsByMetric = new Map([
1246
+ ['relevance', [
1247
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_ADEQUATE },
1248
+ ]],
1249
+ ['hallucination', [
1250
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'hallucination', scoreValue: 0.12 },
1251
+ ]],
1252
+ ]);
1253
+ const summary = computeDashboardSummary(evaluationsByMetric);
1254
+ // Content quality crisis should fire as critical
1255
+ const compoundAlerts = summary.alerts.filter(a => a.isCompound);
1256
+ assert.ok(compoundAlerts.some(a => a.severity === 'critical'));
1257
+ assert.strictEqual(summary.overallStatus, 'critical');
1258
+ });
1259
+ });
1260
+ // ==========================================================================
1261
+ // Multi-Agent Explainability (R5)
1262
+ // ==========================================================================
1263
+ describe('evaluateHandoffs', () => {
1264
+ it('detects handoffs between agents', () => {
1265
+ const steps = [
1266
+ { step: 0, score: TEST_SCORE_HIGH, explanation: 'Good start' },
1267
+ { step: 1, score: TEST_SCORE_GOOD, explanation: 'Handoff to planner' },
1268
+ { step: 2, score: TEST_SCORE_PASSING, explanation: 'Planning' },
1269
+ ];
1270
+ const agentMap = new Map([[0, 'router'], [1, 'planner'], [COUNT_TWO, 'planner']]);
1271
+ const handoffs = evaluateHandoffs(steps, agentMap);
1272
+ assert.strictEqual(handoffs.length, 1);
1273
+ assert.strictEqual(handoffs[0].sourceAgent, 'router');
1274
+ assert.strictEqual(handoffs[0].targetAgent, 'planner');
1275
+ assert.strictEqual(handoffs[0].correctTarget, true);
1276
+ });
1277
+ it('returns empty for single agent', () => {
1278
+ const steps = [
1279
+ { step: 0, score: TEST_SCORE_HIGH },
1280
+ { step: 1, score: TEST_SCORE_GOOD },
1281
+ ];
1282
+ const agentMap = new Map([[0, 'agent'], [1, 'agent']]);
1283
+ const handoffs = evaluateHandoffs(steps, agentMap);
1284
+ assert.strictEqual(handoffs.length, 0);
1285
+ });
1286
+ it('returns empty without agent map', () => {
1287
+ const steps = [{ step: 0, score: TEST_SCORE_HIGH }];
1288
+ const handoffs = evaluateHandoffs(steps);
1289
+ assert.strictEqual(handoffs.length, 0);
1290
+ });
1291
+ it('marks low-score handoff as incorrect target', () => {
1292
+ const steps = [
1293
+ { step: 0, score: TEST_SCORE_HIGH },
1294
+ { step: 1, score: TEST_SCORE_VERY_LOW },
1295
+ ];
1296
+ const agentMap = new Map([[0, 'a'], [1, 'b']]);
1297
+ const handoffs = evaluateHandoffs(steps, agentMap);
1298
+ assert.strictEqual(handoffs[0].correctTarget, false);
1299
+ assert.strictEqual(handoffs[0].contextPreserved, false);
1300
+ });
1301
+ it('uses custom thresholds when provided', () => {
1302
+ const steps = [
1303
+ { step: 0, score: TEST_SCORE_HIGH },
1304
+ { step: 1, score: TEST_SCORE_BASELINE },
1305
+ ];
1306
+ const agentMap = new Map([[0, 'a'], [1, 'b']]);
1307
+ const thresholds = { correctTarget: TEST_SCORE_PASSING, contextPreserved: TEST_SCORE_HIGH };
1308
+ const handoffs = evaluateHandoffs(steps, agentMap, thresholds);
1309
+ assert.strictEqual(handoffs[0].correctTarget, false); // TEST_SCORE_BASELINE < TEST_SCORE_PASSING
1310
+ assert.strictEqual(handoffs[0].contextPreserved, false); // TEST_SCORE_BASELINE < TEST_SCORE_HIGH
1311
+ });
1312
+ it('applies default thresholds when none provided', () => {
1313
+ const steps = [
1314
+ { step: 0, score: TEST_SCORE_HIGH },
1315
+ { step: 1, score: 0.55 },
1316
+ ];
1317
+ const agentMap = new Map([[0, 'a'], [1, 'b']]);
1318
+ const handoffs = evaluateHandoffs(steps, agentMap);
1319
+ assert.strictEqual(handoffs[0].correctTarget, true); // 0.55 >= 0.5
1320
+ assert.strictEqual(handoffs[0].contextPreserved, false); // 0.55 < TEST_SCORE_PASSING
1321
+ });
1322
+ });
1323
+ describe('computeTurnLevelResults', () => {
1324
+ it('maps step scores to turn results', () => {
1325
+ const steps = [
1326
+ { step: 0, score: TEST_SCORE_HIGH },
1327
+ { step: 1, score: TEST_SCORE_MID },
1328
+ { step: 2, score: TEST_SCORE_WARNING },
1329
+ ];
1330
+ const turns = computeTurnLevelResults(steps);
1331
+ assert.strictEqual(turns.length, COUNT_THREE);
1332
+ assert.strictEqual(turns[0].taskProgress, 1); // TEST_SCORE_HIGH >= TEST_SCORE_GOOD
1333
+ assert.strictEqual(turns[1].taskProgress, TEST_SCORE_MID); // 0.5 >= 0.5
1334
+ assert.strictEqual(turns[2].hasError, true); // TEST_SCORE_WARNING < 0.3
1335
+ });
1336
+ it('includes agent names from map', () => {
1337
+ const steps = [{ step: 0, score: TEST_SCORE_GOOD }];
1338
+ const agentMap = new Map([[0, 'researcher']]);
1339
+ const turns = computeTurnLevelResults(steps, agentMap);
1340
+ assert.strictEqual(turns[0].agentName, 'researcher');
1341
+ });
1342
+ it('skips named steps (non-numeric)', () => {
1343
+ const steps = [
1344
+ { step: 'planning', score: TEST_SCORE_HIGH },
1345
+ { step: 0, score: TEST_SCORE_GOOD },
1346
+ ];
1347
+ const turns = computeTurnLevelResults(steps);
1348
+ assert.strictEqual(turns.length, 1);
1349
+ assert.strictEqual(turns[0].turnIndex, 0);
1350
+ });
1351
+ });
1352
+ describe('computeMultiAgentEvaluation', () => {
1353
+ it('aggregates handoffs and turns', () => {
1354
+ const steps = [
1355
+ { step: 0, score: TEST_SCORE_HIGH },
1356
+ { step: 1, score: TEST_SCORE_GOOD },
1357
+ { step: 2, score: TEST_SCORE_PASSING },
1358
+ ];
1359
+ const agentMap = new Map([[0, 'router'], [1, 'planner'], [COUNT_TWO, 'executor']]);
1360
+ const result = computeMultiAgentEvaluation(steps, agentMap);
1361
+ assert.strictEqual(result.totalTurns, COUNT_THREE);
1362
+ assert.ok(result.handoffScore !== null);
1363
+ assert.ok(result.avgTurnRelevance !== null);
1364
+ assert.strictEqual(result.handoffs.length, COUNT_TWO); // router->planner, planner->executor
1365
+ });
1366
+ it('computes error propagation turns', () => {
1367
+ const steps = [
1368
+ { step: 0, score: TEST_SCORE_HIGH },
1369
+ { step: 1, score: TEST_SCORE_VERY_LOW }, // error
1370
+ { step: 2, score: TEST_SCORE_LOW },
1371
+ { step: 3, score: TEST_SCORE_WARNING }, // error
1372
+ ];
1373
+ const result = computeMultiAgentEvaluation(steps);
1374
+ assert.strictEqual(result.errorPropagationTurns, COUNT_TWO); // 2 turns after first error
1375
+ });
1376
+ it('handles empty step scores', () => {
1377
+ const result = computeMultiAgentEvaluation([]);
1378
+ assert.strictEqual(result.totalTurns, 0);
1379
+ assert.strictEqual(result.handoffScore, null);
1380
+ assert.strictEqual(result.avgTurnRelevance, null);
1381
+ assert.strictEqual(result.conversationCompleteness, null);
1382
+ assert.strictEqual(result.errorPropagationTurns, 0);
1383
+ });
1384
+ it('conversation completeness uses last turn', () => {
1385
+ const steps = [
1386
+ { step: 0, score: TEST_SCORE_MID },
1387
+ { step: 1, score: TEST_SCORE_HIGH }, // taskProgress = 1
1388
+ ];
1389
+ const result = computeMultiAgentEvaluation(steps);
1390
+ assert.strictEqual(result.conversationCompleteness, 1);
1391
+ });
1392
+ it('treats agentMap with all-same agents as single-agent (no handoffs)', () => {
1393
+ const steps = [
1394
+ { step: 0, score: TEST_SCORE_HIGH },
1395
+ { step: 1, score: TEST_SCORE_GOOD },
1396
+ ];
1397
+ // All steps mapped to same agent - should produce no handoffs
1398
+ const agentMap = new Map([[0, 'only_agent'], [1, 'only_agent']]);
1399
+ const result = computeMultiAgentEvaluation(steps, agentMap);
1400
+ assert.strictEqual(result.handoffs.length, 0);
1401
+ assert.strictEqual(result.handoffScore, null);
1402
+ });
1403
+ it('passes custom handoff thresholds through', () => {
1404
+ const steps = [
1405
+ { step: 0, score: TEST_SCORE_HIGH },
1406
+ { step: 1, score: TEST_SCORE_BASELINE },
1407
+ ];
1408
+ const agentMap = new Map([[0, 'a'], [1, 'b']]);
1409
+ const result = computeMultiAgentEvaluation(steps, agentMap, undefined, { correctTarget: TEST_SCORE_GOOD, contextPreserved: TEST_SCORE_HIGH });
1410
+ assert.strictEqual(result.handoffs[0].correctTarget, false); // TEST_SCORE_BASELINE < TEST_SCORE_GOOD
1411
+ assert.strictEqual(result.handoffs[0].contextPreserved, false); // TEST_SCORE_BASELINE < TEST_SCORE_HIGH
1412
+ });
1413
+ });
1414
+ // ==========================================================================
1415
+ // Role-Specific Views (G7)
1416
+ // ==========================================================================
1417
+ describe('computeRoleView', () => {
1418
+ function makeDashboard() {
1419
+ const evaluationsByMetric = new Map([
1420
+ ['relevance', [
1421
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_LOW },
1422
+ ]],
1423
+ ['faithfulness', [
1424
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'faithfulness', scoreValue: TEST_SCORE_HIGH },
1425
+ ]],
1426
+ ]);
1427
+ return computeDashboardSummary(evaluationsByMetric);
1428
+ }
1429
+ it('creates executive view with status summary', () => {
1430
+ const dashboard = makeDashboard();
1431
+ const view = computeRoleView(dashboard, 'executive');
1432
+ assert.strictEqual(view.role, 'executive');
1433
+ assert.ok(view.summary);
1434
+ assert.ok(view.metricStatuses.length > 0);
1435
+ assert.ok('alertCounts' in view);
1436
+ assert.strictEqual(typeof view.alertCounts.critical, 'number');
1437
+ });
1438
+ it('creates operator view with prioritized alerts', () => {
1439
+ const dashboard = makeDashboard();
1440
+ const view = computeRoleView(dashboard, 'operator');
1441
+ assert.strictEqual(view.role, 'operator');
1442
+ assert.ok(Array.isArray(view.prioritizedAlerts));
1443
+ assert.ok(Array.isArray(view.alertingMetrics));
1444
+ assert.ok(Array.isArray(view.degradingTrends));
1445
+ // Alerts should be sorted critical-first
1446
+ if (view.prioritizedAlerts.length >= COUNT_TWO) {
1447
+ const severityOrder = { critical: 0, warning: 1, info: 2 };
1448
+ for (let i = 1; i < view.prioritizedAlerts.length; i++) {
1449
+ assert.ok(severityOrder[view.prioritizedAlerts[i].severity] >=
1450
+ severityOrder[view.prioritizedAlerts[i - 1].severity]);
1451
+ }
1452
+ }
1453
+ });
1454
+ it('creates auditor view with full provenance', () => {
1455
+ const dashboard = makeDashboard();
1456
+ const view = computeRoleView(dashboard, 'auditor');
1457
+ assert.strictEqual(view.role, 'auditor');
1458
+ assert.ok(view.metrics.length > 0);
1459
+ assert.ok(view.alerts.length >= 0);
1460
+ assert.ok(view.timestamp);
1461
+ assert.strictEqual(typeof view.totalEvaluationCount, 'number');
1462
+ });
1463
+ it('operator view filters to alerting metrics only', () => {
1464
+ const dashboard = makeDashboard();
1465
+ const view = computeRoleView(dashboard, 'operator');
1466
+ for (const metric of view.alertingMetrics) {
1467
+ assert.ok(metric.alerts.length > 0, `${metric.name} should have alerts`);
1468
+ }
1469
+ });
1470
+ it('executive view includes topIssues sorted by severity', () => {
1471
+ const dashboard = makeDashboard();
1472
+ const view = computeRoleView(dashboard, 'executive');
1473
+ assert.ok(Array.isArray(view.topIssues));
1474
+ assert.ok(view.topIssues.length <= COUNT_FIVE); // default limit
1475
+ // Should be sorted worst-first
1476
+ if (view.topIssues.length >= COUNT_TWO) {
1477
+ const statusOrder = { critical: 0, warning: 1, no_data: 2, healthy: 3 };
1478
+ for (let i = 1; i < view.topIssues.length; i++) {
1479
+ assert.ok((statusOrder[view.topIssues[i].status] ?? COUNT_THREE) >=
1480
+ (statusOrder[view.topIssues[i - 1].status] ?? COUNT_THREE));
1481
+ }
1482
+ }
1483
+ });
1484
+ it('executive view respects configurable topIssuesLimit', () => {
1485
+ const dashboard = makeDashboard();
1486
+ const view = computeRoleView(dashboard, 'executive', { topIssuesLimit: 2 });
1487
+ assert.ok(view.topIssues.length <= COUNT_TWO);
1488
+ });
1489
+ it('operator view filters out info-level alerts', () => {
1490
+ const dashboard = makeDashboard();
1491
+ // Add an info alert
1492
+ dashboard.alerts.push({
1493
+ severity: 'info', message: 'Info alert', aggregation: 'avg',
1494
+ threshold: TEST_SCORE_MID, actualValue: TEST_SCORE_BASELINE, direction: 'below', metricName: 'test',
1495
+ });
1496
+ const view = computeRoleView(dashboard, 'operator');
1497
+ for (const alert of view.prioritizedAlerts) {
1498
+ assert.notStrictEqual(alert.severity, 'info');
1499
+ }
1500
+ });
1501
+ it('operator view filters by minActionableSampleCount', () => {
1502
+ const dashboard = makeDashboard();
1503
+ // All existing alerts should have affectedCount from computeQualityMetric
1504
+ const view = computeRoleView(dashboard, 'operator', { minActionableSampleCount: COUNT_THOUSAND });
1505
+ // With very high threshold, alerts with low affectedCount should be filtered
1506
+ for (const alert of view.prioritizedAlerts) {
1507
+ if (alert.affectedCount !== undefined) {
1508
+ assert.ok(alert.affectedCount >= COUNT_THOUSAND);
1509
+ }
1510
+ }
1511
+ });
1512
+ it('executive view includes SLA summary when available', () => {
1513
+ const evaluationsByMetric = new Map([
1514
+ ['relevance', [
1515
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_STRONG },
1516
+ ]],
1517
+ ]);
1518
+ const slas = [
1519
+ { metric: 'relevance', aggregation: 'p50', target: TEST_SCORE_GOOD, direction: 'above' },
1520
+ ];
1521
+ const dashboard = computeDashboardSummary(evaluationsByMetric, undefined, undefined, slas);
1522
+ const view = computeRoleView(dashboard, 'executive');
1523
+ assert.strictEqual(view.slaCompliantCount, 1);
1524
+ assert.strictEqual(view.slaTotalCount, 1);
1525
+ });
1526
+ });
1527
+ // ==========================================================================
1528
+ // Progressive Disclosure (G3)
1529
+ // ==========================================================================
1530
+ describe('computeMetricDetail', () => {
1531
+ const config = QUALITY_METRICS.relevance;
1532
+ it('includes score distribution with 10 buckets by default', () => {
1533
+ const evals = Array.from({ length: COUNT_TWENTY }, (_, i) => ({
1534
+ timestamp: `2026-01-01T00:00:${String(i).padStart(COUNT_TWO, '0')}Z`,
1535
+ evaluationName: 'relevance', scoreValue: i / COUNT_TWENTY,
1536
+ }));
1537
+ const detail = computeMetricDetail(evals, config);
1538
+ assert.strictEqual(detail.scoreDistribution.length, COUNT_TEN);
1539
+ const totalCount = detail.scoreDistribution.reduce((s, b) => s + b.count, 0);
1540
+ assert.strictEqual(totalCount, COUNT_TWENTY);
1541
+ });
1542
+ it('supports configurable bucket count', () => {
1543
+ const evals = Array.from({ length: COUNT_TWENTY }, (_, i) => ({
1544
+ timestamp: `2026-01-01T00:00:${String(i).padStart(COUNT_TWO, '0')}Z`,
1545
+ evaluationName: 'relevance', scoreValue: i / COUNT_TWENTY,
1546
+ }));
1547
+ const detail = computeMetricDetail(evals, config, { bucketCount: COUNT_FIVE });
1548
+ assert.strictEqual(detail.scoreDistribution.length, COUNT_FIVE);
1549
+ const totalCount = detail.scoreDistribution.reduce((s, b) => s + b.count, 0);
1550
+ assert.strictEqual(totalCount, COUNT_TWENTY);
1551
+ });
1552
+ it('clamps bucket count to valid range (2-100)', () => {
1553
+ const evals = [
1554
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_MID },
1555
+ ];
1556
+ const detailLow = computeMetricDetail(evals, config, { bucketCount: 1 });
1557
+ assert.strictEqual(detailLow.scoreDistribution.length, COUNT_TWO); // clamped to min 2
1558
+ const detailHigh = computeMetricDetail(evals, config, { bucketCount: COUNT_TWO_HUNDRED });
1559
+ assert.strictEqual(detailHigh.scoreDistribution.length, COUNT_HUNDRED); // clamped to max 100
1560
+ });
1561
+ it('includes worst evaluations sorted ascending', () => {
1562
+ const evals = [
1563
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_HIGH, explanation: 'Great' },
1564
+ { timestamp: '2026-01-01T00:01:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_VERY_LOW, explanation: 'Bad' },
1565
+ { timestamp: '2026-01-01T00:02:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_MID, explanation: 'Ok' },
1566
+ ];
1567
+ const detail = computeMetricDetail(evals, config);
1568
+ assert.strictEqual(detail.worstEvaluations[0].scoreValue, TEST_SCORE_VERY_LOW);
1569
+ assert.strictEqual(detail.worstEvaluations[0].explanation, 'Bad');
1570
+ });
1571
+ it('includes best evaluations sorted descending', () => {
1572
+ const evals = [
1573
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_HIGH },
1574
+ { timestamp: '2026-01-01T00:01:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_VERY_LOW },
1575
+ { timestamp: '2026-01-01T00:02:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_MID },
1576
+ ];
1577
+ const detail = computeMetricDetail(evals, config);
1578
+ assert.strictEqual(detail.bestEvaluations[0].scoreValue, TEST_SCORE_HIGH);
1579
+ });
1580
+ it('respects topN parameter', () => {
1581
+ const evals = Array.from({ length: COUNT_TWENTY }, (_, i) => ({
1582
+ timestamp: `2026-01-01T00:00:${String(i).padStart(COUNT_TWO, '0')}Z`,
1583
+ evaluationName: 'relevance', scoreValue: i / COUNT_TWENTY,
1584
+ }));
1585
+ const detail = computeMetricDetail(evals, config, { topN: 3 });
1586
+ assert.strictEqual(detail.worstEvaluations.length, COUNT_THREE);
1587
+ assert.strictEqual(detail.bestEvaluations.length, COUNT_THREE);
1588
+ });
1589
+ it('inherits base QualityMetricResult fields', () => {
1590
+ const evals = [
1591
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_GOOD },
1592
+ ];
1593
+ const detail = computeMetricDetail(evals, config);
1594
+ assert.strictEqual(detail.name, 'relevance');
1595
+ assert.strictEqual(detail.sampleCount, 1);
1596
+ assert.ok(detail.values.avg !== null);
1597
+ });
1598
+ it('handles empty evaluations', () => {
1599
+ const detail = computeMetricDetail([], config);
1600
+ assert.strictEqual(detail.scoreDistribution.length, COUNT_TEN);
1601
+ assert.strictEqual(detail.worstEvaluations.length, 0);
1602
+ assert.strictEqual(detail.bestEvaluations.length, 0);
1603
+ });
1604
+ });
1605
+ describe('toEvaluationDetail', () => {
1606
+ it('converts EvaluationResult to EvaluationDetailResult', () => {
1607
+ const eval_ = {
1608
+ timestamp: '2026-01-01T00:00:00Z',
1609
+ evaluationName: 'relevance',
1610
+ scoreValue: TEST_SCORE_STRONG,
1611
+ scoreLabel: 'pass',
1612
+ explanation: 'Good match',
1613
+ evaluator: 'gpt-4',
1614
+ evaluatorType: 'llm',
1615
+ traceId: 'trace-123',
1616
+ spanId: 'span-456',
1617
+ sessionId: 'sess-789',
1618
+ };
1619
+ const detail = toEvaluationDetail(eval_);
1620
+ assert.strictEqual(detail.evaluationName, 'relevance');
1621
+ assert.strictEqual(detail.scoreValue, TEST_SCORE_STRONG);
1622
+ assert.strictEqual(detail.explanation, 'Good match');
1623
+ assert.strictEqual(detail.traceId, 'trace-123');
1624
+ assert.strictEqual(detail.timestamp, '2026-01-01T00:00:00Z');
1625
+ });
1626
+ it('handles optional fields as undefined', () => {
1627
+ const eval_ = {
1628
+ timestamp: '2026-01-01T00:00:00Z',
1629
+ evaluationName: 'relevance',
1630
+ };
1631
+ const detail = toEvaluationDetail(eval_);
1632
+ assert.strictEqual(detail.scoreValue, undefined);
1633
+ assert.strictEqual(detail.explanation, undefined);
1634
+ assert.strictEqual(detail.traceId, undefined);
1635
+ });
1636
+ });
1637
+ // ==========================================================================
1638
+ // Contextual Severity (G5)
1639
+ // ==========================================================================
1640
+ describe('applySeverityContext', () => {
1641
+ const makeAlerts = () => [
1642
+ { severity: 'info', message: 'Info', aggregation: 'avg', threshold: TEST_SCORE_MID, actualValue: TEST_SCORE_POOR, direction: 'below' },
1643
+ { severity: 'warning', message: 'Warning', aggregation: 'p50', threshold: TEST_SCORE_PASSING, actualValue: TEST_SCORE_BASELINE, direction: 'below' },
1644
+ { severity: 'critical', message: 'Critical', aggregation: 'p50', threshold: TEST_SCORE_MID, actualValue: TEST_SCORE_LOW, direction: 'below' },
1645
+ ];
1646
+ it('suppresses info alerts when suppressInfo is true', () => {
1647
+ const result = applySeverityContext(makeAlerts(), { suppressInfo: true });
1648
+ assert.strictEqual(result.length, COUNT_TWO);
1649
+ assert.ok(result.every(a => a.severity !== 'info'));
1650
+ });
1651
+ it('escalates warnings to critical when escalateWarnings is true', () => {
1652
+ const result = applySeverityContext(makeAlerts(), { escalateWarnings: true });
1653
+ assert.strictEqual(result.length, COUNT_THREE);
1654
+ const severities = result.map(a => a.severity);
1655
+ assert.ok(!severities.includes('warning'));
1656
+ assert.strictEqual(severities.filter(s => s === 'critical').length, COUNT_TWO);
1657
+ });
1658
+ it('suppresses all alerts when below minSampleCount', () => {
1659
+ const result = applySeverityContext(makeAlerts(), { minSampleCount: COUNT_TEN }, COUNT_FIVE);
1660
+ assert.strictEqual(result.length, 0);
1661
+ });
1662
+ it('keeps alerts when at or above minSampleCount', () => {
1663
+ const result = applySeverityContext(makeAlerts(), { minSampleCount: COUNT_TEN }, COUNT_TEN);
1664
+ assert.strictEqual(result.length, COUNT_THREE);
1665
+ });
1666
+ it('applies multiple context rules together', () => {
1667
+ const result = applySeverityContext(makeAlerts(), {
1668
+ suppressInfo: true,
1669
+ escalateWarnings: true,
1670
+ });
1671
+ assert.strictEqual(result.length, COUNT_TWO);
1672
+ assert.ok(result.every(a => a.severity === 'critical'));
1673
+ });
1674
+ it('returns original alerts with empty context', () => {
1675
+ const result = applySeverityContext(makeAlerts(), {});
1676
+ assert.strictEqual(result.length, COUNT_THREE);
1677
+ });
1678
+ it('does not mutate original alerts', () => {
1679
+ const alerts = makeAlerts();
1680
+ const original = alerts.map(a => ({ ...a }));
1681
+ applySeverityContext(alerts, { escalateWarnings: true });
1682
+ assert.deepStrictEqual(alerts, original);
1683
+ });
1684
+ it('applies rules when environment pattern matches exactly', () => {
1685
+ const result = applySeverityContext(makeAlerts(), { environment: 'production', escalateWarnings: true }, undefined, 'production');
1686
+ assert.ok(result.every(a => a.severity !== 'warning'));
1687
+ });
1688
+ it('skips rules when environment pattern does not match', () => {
1689
+ const result = applySeverityContext(makeAlerts(), { environment: 'production', escalateWarnings: true }, undefined, 'staging');
1690
+ // Rules not applied - warning should still be present
1691
+ assert.ok(result.some(a => a.severity === 'warning'));
1692
+ assert.strictEqual(result.length, COUNT_THREE);
1693
+ });
1694
+ it('supports glob wildcard pattern matching', () => {
1695
+ const result = applySeverityContext(makeAlerts(), { environment: 'prod*', escalateWarnings: true }, undefined, 'production');
1696
+ assert.ok(result.every(a => a.severity !== 'warning'));
1697
+ });
1698
+ it('supports single-character wildcard (?)', () => {
1699
+ const result = applySeverityContext(makeAlerts(), { environment: 'staging-?', suppressInfo: true }, undefined, 'staging-a');
1700
+ assert.ok(result.every(a => a.severity !== 'info'));
1701
+ });
1702
+ it('matches case-insensitively', () => {
1703
+ const result = applySeverityContext(makeAlerts(), { environment: 'PRODUCTION', escalateWarnings: true }, undefined, 'production');
1704
+ assert.ok(result.every(a => a.severity !== 'warning'));
1705
+ });
1706
+ it('applies rules when no currentEnvironment provided (no filtering)', () => {
1707
+ const result = applySeverityContext(makeAlerts(), { environment: 'production', escalateWarnings: true });
1708
+ // No currentEnvironment => rules apply regardless
1709
+ assert.ok(result.every(a => a.severity !== 'warning'));
1710
+ });
1711
+ it('rejects overly long environment patterns (ReDoS protection)', () => {
1712
+ const longPattern = 'a'.repeat(COUNT_TWO_HUNDRED) + '*';
1713
+ const result = applySeverityContext(makeAlerts(), { environment: longPattern, escalateWarnings: true }, undefined, 'production');
1714
+ // Pattern too long => no match => rules not applied => original alerts returned
1715
+ assert.ok(result.some(a => a.severity === 'warning'));
1716
+ assert.strictEqual(result.length, COUNT_THREE);
1717
+ });
1718
+ });
1719
+ // ==========================================================================
1720
+ // SLA Tracking (G11)
1721
+ // ==========================================================================
1722
+ describe('evaluateSLA', () => {
1723
+ const makeResult = (name, p50) => makeBaseResult(name, { p50 });
1724
+ it('reports compliant SLA (above direction)', () => {
1725
+ const sla = { metric: 'relevance', aggregation: 'p50', target: TEST_SCORE_GOOD, direction: 'above' };
1726
+ const result = evaluateSLA(sla, [makeResult('relevance', TEST_SCORE_STRONG)]);
1727
+ assert.strictEqual(result.compliant, true);
1728
+ assert.strictEqual(result.actualValue, TEST_SCORE_STRONG);
1729
+ assert.ok(requireDefined(result.gap) > 0);
1730
+ });
1731
+ it('reports non-compliant SLA (above direction)', () => {
1732
+ const sla = { metric: 'relevance', aggregation: 'p50', target: TEST_SCORE_GOOD, direction: 'above' };
1733
+ const result = evaluateSLA(sla, [makeResult('relevance', TEST_SCORE_ADEQUATE)]);
1734
+ assert.strictEqual(result.compliant, false);
1735
+ assert.ok(requireDefined(result.gap) < 0);
1736
+ });
1737
+ it('reports compliant SLA (below direction)', () => {
1738
+ const sla = { metric: 'hallucination', aggregation: 'p50', target: TEST_SCORE_WARNING, direction: 'below' };
1739
+ const result = evaluateSLA(sla, [makeResult('hallucination', DELTA)]);
1740
+ assert.strictEqual(result.compliant, true);
1741
+ assert.ok(requireDefined(result.gap) > 0);
1742
+ });
1743
+ it('reports non-compliant SLA (below direction)', () => {
1744
+ const sla = { metric: 'hallucination', aggregation: 'p50', target: TEST_SCORE_WARNING, direction: 'below' };
1745
+ const result = evaluateSLA(sla, [makeResult('hallucination', TEST_HALLUCINATION_NONCOMPLIANT_SCORE)]);
1746
+ assert.strictEqual(result.compliant, false);
1747
+ assert.ok(requireDefined(result.gap) < 0);
1748
+ });
1749
+ it('handles missing metric data with metric_not_found status', () => {
1750
+ const sla = { metric: 'unknown', aggregation: 'p50', target: TEST_SCORE_GOOD, direction: 'above' };
1751
+ const result = evaluateSLA(sla, [makeResult('relevance', TEST_SCORE_STRONG)]);
1752
+ assert.strictEqual(result.compliant, false);
1753
+ assert.strictEqual(result.status, 'metric_not_found');
1754
+ assert.strictEqual(result.actualValue, null);
1755
+ assert.strictEqual(result.gap, null);
1756
+ });
1757
+ it('handles null aggregation value with no_data status', () => {
1758
+ const sla = { metric: 'relevance', aggregation: 'p50', target: TEST_SCORE_GOOD, direction: 'above' };
1759
+ const result = evaluateSLA(sla, [makeResult('relevance', null)]);
1760
+ assert.strictEqual(result.compliant, false);
1761
+ assert.strictEqual(result.status, 'no_data');
1762
+ assert.strictEqual(result.actualValue, null);
1763
+ });
1764
+ it('returns compliant status when SLA is met', () => {
1765
+ const sla = { metric: 'relevance', aggregation: 'p50', target: TEST_SCORE_GOOD, direction: 'above' };
1766
+ const result = evaluateSLA(sla, [makeResult('relevance', TEST_SCORE_HIGH)]);
1767
+ assert.strictEqual(result.status, 'compliant');
1768
+ });
1769
+ it('returns non_compliant status when SLA is not met', () => {
1770
+ const sla = { metric: 'relevance', aggregation: 'p50', target: TEST_SCORE_GOOD, direction: 'above' };
1771
+ const result = evaluateSLA(sla, [makeResult('relevance', TEST_SCORE_BASELINE)]);
1772
+ assert.strictEqual(result.status, 'non_compliant');
1773
+ });
1774
+ it('computes marginPercent', () => {
1775
+ const sla = { metric: 'relevance', aggregation: 'p50', target: TEST_SCORE_GOOD, direction: 'above' };
1776
+ const result = evaluateSLA(sla, [makeResult('relevance', TEST_RELEVANCE_MARGIN_SCORE)]);
1777
+ assert.ok(result.marginPercent !== null);
1778
+ assert.strictEqual(result.marginPercent, COUNT_TEN); // 0.08 / TEST_SCORE_GOOD = 10%
1779
+ });
1780
+ it('exactly at target is compliant for above direction', () => {
1781
+ const sla = { metric: 'relevance', aggregation: 'p50', target: TEST_SCORE_GOOD, direction: 'above' };
1782
+ const result = evaluateSLA(sla, [makeResult('relevance', TEST_SCORE_GOOD)]);
1783
+ assert.strictEqual(result.compliant, true);
1784
+ });
1785
+ it('accepts recognized SLA window formats', () => {
1786
+ // Type-level enforcement: SLAWindowFormat only allows recognized formats
1787
+ const sla = {
1788
+ metric: 'relevance', aggregation: 'p50', target: TEST_SCORE_GOOD, direction: 'above',
1789
+ window: '30d rolling',
1790
+ };
1791
+ const result = evaluateSLA(sla, [makeResult('relevance', TEST_SCORE_STRONG)]);
1792
+ assert.strictEqual(result.sla.window, '30d rolling');
1793
+ assert.strictEqual(result.compliant, true);
1794
+ });
1795
+ it('slaWindowFormatSchema includes all recognized window formats', () => {
1796
+ assert.ok(slaWindowFormatSchema.options.length >= COUNT_TEN);
1797
+ assert.ok(slaWindowFormatSchema.options.includes('30d rolling'));
1798
+ assert.ok(slaWindowFormatSchema.options.includes('calendar month'));
1799
+ assert.ok(slaWindowFormatSchema.options.includes('calendar year'));
1800
+ });
1801
+ });
1802
+ describe('evaluateSLAs', () => {
1803
+ const makeResult = (name, p50) => makeBaseResult(name, { p50 });
1804
+ it('evaluates multiple SLAs', () => {
1805
+ const slas = [
1806
+ { metric: 'relevance', aggregation: 'p50', target: TEST_SCORE_GOOD, direction: 'above' },
1807
+ { metric: 'coherence', aggregation: 'p50', target: TEST_SCORE_PASSING, direction: 'above' },
1808
+ ];
1809
+ const result = evaluateSLAs(slas, [makeResult('relevance', TEST_SCORE_HIGH), makeResult('coherence', TEST_SCORE_BASELINE)]);
1810
+ assert.strictEqual(result.noSLAsConfigured, false);
1811
+ assert.strictEqual(result.results.length, COUNT_TWO);
1812
+ assert.strictEqual(result.results[0].compliant, true);
1813
+ assert.strictEqual(result.results[1].compliant, false);
1814
+ });
1815
+ it('returns structured empty result when no SLAs configured', () => {
1816
+ const result = evaluateSLAs([], [makeResult('relevance', TEST_SCORE_HIGH)]);
1817
+ assert.strictEqual(result.noSLAsConfigured, true);
1818
+ assert.strictEqual(result.results.length, 0);
1819
+ });
1820
+ });
1821
+ describe('computeDashboardSummary with SLAs', () => {
1822
+ it('includes SLA compliance when definitions provided', () => {
1823
+ const evaluationsByMetric = new Map([
1824
+ ['relevance', [
1825
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_STRONG },
1826
+ ]],
1827
+ ]);
1828
+ const slas = [
1829
+ { metric: 'relevance', aggregation: 'p50', target: TEST_SCORE_GOOD, direction: 'above' },
1830
+ ];
1831
+ const summary = computeDashboardSummary(evaluationsByMetric, undefined, undefined, slas);
1832
+ assert.ok(summary.slaCompliance);
1833
+ assert.strictEqual(requireDefined(summary.slaCompliance).length, 1);
1834
+ assert.strictEqual(requireDefined(summary.slaCompliance)[0].compliant, true);
1835
+ });
1836
+ it('omits SLA compliance when no definitions', () => {
1837
+ const summary = computeDashboardSummary(new Map());
1838
+ assert.strictEqual(summary.slaCompliance, undefined);
1839
+ });
1840
+ it('includes trends when previousPeriodValues provided', () => {
1841
+ const evaluationsByMetric = new Map([
1842
+ ['relevance', [
1843
+ { timestamp: '2026-01-02T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_STRONG },
1844
+ ]],
1845
+ ]);
1846
+ const previousPeriodValues = new Map([
1847
+ ['relevance', { avg: null, min: null, max: null, count: DEFAULT_LIMIT_10, p50: 0.70, p95: null, p99: null }],
1848
+ ]);
1849
+ const summary = computeDashboardSummary(evaluationsByMetric, undefined, undefined, undefined, previousPeriodValues);
1850
+ const relevanceMetric = summary.metrics.find(m => m.name === 'relevance');
1851
+ assert.ok(relevanceMetric?.trend);
1852
+ assert.strictEqual(requireDefined(requireDefined(relevanceMetric).trend).direction, 'improving');
1853
+ });
1854
+ });
1855
+ describe('MetricConfigBuilder.sla', () => {
1856
+ it('creates an SLA definition from builder', () => {
1857
+ const sla = createMetricConfig('test_sla')
1858
+ .displayName('Test SLA')
1859
+ .description('Test')
1860
+ .sla('p50', TEST_SCORE_GOOD, 'above', '30d rolling');
1861
+ assert.strictEqual(sla.metric, 'test_sla');
1862
+ assert.strictEqual(sla.aggregation, 'p50');
1863
+ assert.strictEqual(sla.target, TEST_SCORE_GOOD);
1864
+ assert.strictEqual(sla.direction, 'above');
1865
+ assert.strictEqual(sla.window, '30d rolling');
1866
+ });
1867
+ });
1868
+ // ==========================================================================
1869
+ // Confidence Indicators (R3)
1870
+ // ==========================================================================
1871
+ describe('computeConfidence', () => {
1872
+ it('returns low confidence for small sample (<10)', () => {
1873
+ const evals = Array.from({ length: COUNT_FIVE }, (_, i) => ({
1874
+ timestamp: `2026-01-01T00:0${i}:00Z`, evaluationName: 'relevance', scoreValue: TEST_SCORE_GOOD,
1875
+ }));
1876
+ const confidence = computeConfidence(evals);
1877
+ assert.strictEqual(confidence.level, 'low');
1878
+ assert.strictEqual(confidence.sampleCount, COUNT_FIVE);
1879
+ });
1880
+ it('returns high confidence for large consistent sample', () => {
1881
+ const evals = Array.from({ length: COUNT_SIXTY }, (_, i) => ({
1882
+ timestamp: `2026-01-01T00:00:${String(i).padStart(COUNT_TWO, '0')}Z`,
1883
+ evaluationName: 'relevance', scoreValue: TEST_SCORE_GOOD + (i % COUNT_THREE) * TEST_DECIMAL_EPSILON,
1884
+ }));
1885
+ const confidence = computeConfidence(evals);
1886
+ assert.strictEqual(confidence.level, 'high');
1887
+ assert.strictEqual(confidence.sampleCount, COUNT_SIXTY);
1888
+ assert.ok(confidence.scoreStdDev !== null &&
1889
+ confidence.scoreStdDev < TEST_HALLUCINATION_NONCOMPLIANT_SCORE);
1890
+ });
1891
+ it('returns medium confidence for moderate sample', () => {
1892
+ const evals = Array.from({ length: COUNT_TWENTY_FIVE }, (_, i) => ({
1893
+ timestamp: `2026-01-01T00:00:${String(i).padStart(COUNT_TWO, '0')}Z`,
1894
+ evaluationName: 'relevance', scoreValue: TEST_SCORE_MID + (i % COUNT_FIVE) * DELTA,
1895
+ }));
1896
+ const confidence = computeConfidence(evals);
1897
+ assert.strictEqual(confidence.level, 'medium');
1898
+ });
1899
+ it('returns low confidence for high variance scores', () => {
1900
+ const evals = Array.from({ length: COUNT_TWENTY }, (_, i) => ({
1901
+ timestamp: `2026-01-01T00:00:${String(i).padStart(COUNT_TWO, '0')}Z`,
1902
+ evaluationName: 'relevance', scoreValue: i % COUNT_TWO === 0 ? TEST_SCORE_WARNING : TEST_SCORE_HIGH,
1903
+ }));
1904
+ const confidence = computeConfidence(evals);
1905
+ assert.strictEqual(confidence.level, 'low');
1906
+ assert.ok(confidence.scoreStdDev !== null && confidence.scoreStdDev > TEST_SCORE_LOW);
1907
+ });
1908
+ it('computes evaluator agreement for multiple judges', () => {
1909
+ const evals = [
1910
+ ...Array.from({ length: 30 }, (_, i) => ({
1911
+ timestamp: `2026-01-01T00:00:${String(i).padStart(COUNT_TWO, '0')}Z`,
1912
+ evaluationName: 'relevance', scoreValue: TEST_SCORE_STRONG, evaluator: 'judge-a',
1913
+ })),
1914
+ ...Array.from({ length: 30 }, (_, i) => ({
1915
+ timestamp: `2026-01-01T00:01:${String(i).padStart(COUNT_TWO, '0')}Z`,
1916
+ evaluationName: 'relevance', scoreValue: 0.83, evaluator: 'judge-b',
1917
+ })),
1918
+ ];
1919
+ const confidence = computeConfidence(evals);
1920
+ assert.strictEqual(confidence.evaluatorCount, COUNT_TWO);
1921
+ assert.ok(confidence.evaluatorAgreement !== null);
1922
+ assert.ok(requireDefined(confidence.evaluatorAgreement) > TEST_SCORE_HIGH); // High agreement
1923
+ });
1924
+ it('computes range-aware evaluator agreement for non-[0,1] metrics', () => {
1925
+ // Two judges scoring latency (0-60s range) with means 10s and 12s
1926
+ const evals = [
1927
+ ...Array.from({ length: 30 }, (_, i) => ({
1928
+ timestamp: `2026-01-01T00:00:${String(i).padStart(COUNT_TWO, '0')}Z`,
1929
+ evaluationName: 'evaluation_latency', scoreValue: COUNT_TEN, evaluator: 'judge-a',
1930
+ })),
1931
+ ...Array.from({ length: 30 }, (_, i) => ({
1932
+ timestamp: `2026-01-01T00:01:${String(i).padStart(COUNT_TWO, '0')}Z`,
1933
+ evaluationName: 'evaluation_latency', scoreValue: 12, evaluator: 'judge-b',
1934
+ })),
1935
+ ];
1936
+ // With [0,60] range, max stddev = 30, so 1s difference is very small
1937
+ const confidence = computeConfidence(evals, undefined, { min: 0, max: COUNT_SIXTY });
1938
+ assert.ok(confidence.evaluatorAgreement !== null);
1939
+ assert.ok(requireDefined(confidence.evaluatorAgreement) > TEST_SCORE_EXCELLENT, `Expected high agreement for small diff in wide range, got ${confidence.evaluatorAgreement}`);
1940
+ // Same data with default [0,1] range would show lower agreement
1941
+ const defaultConf = computeConfidence(evals);
1942
+ assert.ok(defaultConf.evaluatorAgreement !== null);
1943
+ // stddev of [10, 12] = ~1.41, /0.5 = 2.83, 1 - 2.83 = clamped to 0
1944
+ assert.strictEqual(defaultConf.evaluatorAgreement, 0);
1945
+ });
1946
+ it('returns null agreement for single evaluator', () => {
1947
+ const evals = Array.from({ length: COUNT_TEN }, (_, i) => ({
1948
+ timestamp: `2026-01-01T00:0${i}:00Z`, evaluationName: 'relevance',
1949
+ scoreValue: TEST_SCORE_GOOD, evaluator: 'judge-a',
1950
+ }));
1951
+ const confidence = computeConfidence(evals);
1952
+ assert.strictEqual(confidence.evaluatorCount, 1);
1953
+ assert.strictEqual(confidence.evaluatorAgreement, null);
1954
+ });
1955
+ it('filters evaluations without scores', () => {
1956
+ const evals = [
1957
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_GOOD },
1958
+ { timestamp: '2026-01-01T00:01:00Z', evaluationName: 'relevance' }, // no score
1959
+ { timestamp: '2026-01-01T00:02:00Z', evaluationName: 'relevance', scoreValue: undefined },
1960
+ ];
1961
+ const confidence = computeConfidence(evals);
1962
+ assert.strictEqual(confidence.sampleCount, 1);
1963
+ });
1964
+ it('uses custom thresholds when provided', () => {
1965
+ // 8 samples: normally "low" (default threshold is 10), but with custom threshold of 5 it's "medium"
1966
+ const evals = Array.from({ length: 8 }, (_, i) => ({
1967
+ timestamp: `2026-01-01T00:0${i}:00Z`, evaluationName: 'relevance', scoreValue: TEST_SCORE_GOOD,
1968
+ }));
1969
+ const defaultConf = computeConfidence(evals);
1970
+ assert.strictEqual(defaultConf.level, 'low');
1971
+ const customConf = computeConfidence(evals, { lowSampleThreshold: COUNT_FIVE });
1972
+ assert.strictEqual(customConf.level, 'medium');
1973
+ });
1974
+ it('custom high thresholds raise the bar for high confidence', () => {
1975
+ const evals = Array.from({ length: COUNT_SIXTY }, (_, i) => ({
1976
+ timestamp: `2026-01-01T00:00:${String(i).padStart(COUNT_TWO, '0')}Z`,
1977
+ evaluationName: 'relevance', scoreValue: TEST_SCORE_GOOD + (i % COUNT_THREE) * TEST_DECIMAL_EPSILON,
1978
+ }));
1979
+ const defaultConf = computeConfidence(evals);
1980
+ assert.strictEqual(defaultConf.level, 'high');
1981
+ const strictConf = computeConfidence(evals, { highSampleThreshold: COUNT_HUNDRED });
1982
+ assert.strictEqual(strictConf.level, 'medium'); // 60 < 100
1983
+ });
1984
+ });
1985
+ describe('computeQualityMetric with confidence', () => {
1986
+ it('includes confidence when evaluations have scores', () => {
1987
+ const evaluations = Array.from({ length: COUNT_FIFTEEN }, (_, i) => ({
1988
+ timestamp: `2026-01-01T00:0${i}:00Z`, evaluationName: 'relevance', scoreValue: TEST_SCORE_STRONG,
1989
+ }));
1990
+ const result = computeQualityMetric(evaluations, QUALITY_METRICS.relevance);
1991
+ assert.ok(result.confidence);
1992
+ assert.strictEqual(result.confidence.sampleCount, COUNT_FIFTEEN);
1993
+ });
1994
+ it('omits confidence when no scored evaluations', () => {
1995
+ const result = computeQualityMetric([], QUALITY_METRICS.relevance);
1996
+ assert.strictEqual(result.confidence, undefined);
1997
+ });
1998
+ });
1999
+ // ==========================================================================
2000
+ // Trend Analysis (G4)
2001
+ // ==========================================================================
2002
+ describe('computeTrend', () => {
2003
+ it('detects improving trend for below-direction metric', () => {
2004
+ const current = { avg: null, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_STRONG, p95: null, p99: null };
2005
+ const previous = { avg: null, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_PASSING, p95: null, p99: null };
2006
+ const trend = computeTrend(current, previous, QUALITY_METRICS.relevance);
2007
+ assert.ok(trend);
2008
+ assert.strictEqual(trend.direction, 'improving');
2009
+ assert.ok(Math.abs(trend.delta - TEST_HALLUCINATION_NONCOMPLIANT_SCORE) < TEST_DECIMAL_EPSILON);
2010
+ assert.strictEqual(trend.aggregation, 'p50');
2011
+ assert.strictEqual(trend.currentValue, TEST_SCORE_STRONG);
2012
+ assert.strictEqual(trend.previousValue, TEST_SCORE_PASSING);
2013
+ assert.ok(requireDefined(trend.percentChange) > 0);
2014
+ });
2015
+ it('detects degrading trend for below-direction metric', () => {
2016
+ const current = { avg: null, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_BASELINE, p95: null, p99: null };
2017
+ const previous = { avg: null, min: null, max: null, count: DEFAULT_LIMIT_10, p50: 0.80, p95: null, p99: null };
2018
+ const trend = computeTrend(current, previous, QUALITY_METRICS.relevance);
2019
+ assert.ok(trend);
2020
+ assert.strictEqual(trend.direction, 'degrading');
2021
+ assert.ok(trend.delta < 0);
2022
+ });
2023
+ it('detects improving trend for above-direction metric (hallucination)', () => {
2024
+ const current = { avg: DELTA, min: null, max: null, count: DEFAULT_LIMIT_10, p50: null, p95: null, p99: null };
2025
+ const previous = { avg: 0.15, min: null, max: null, count: DEFAULT_LIMIT_10, p50: null, p95: null, p99: null };
2026
+ const trend = computeTrend(current, previous, QUALITY_METRICS.hallucination);
2027
+ assert.ok(trend);
2028
+ assert.strictEqual(trend.direction, 'improving'); // Decrease in hallucination is good
2029
+ assert.ok(trend.delta < 0);
2030
+ });
2031
+ it('detects degrading trend for above-direction metric', () => {
2032
+ const current = { avg: 0.20, min: null, max: null, count: DEFAULT_LIMIT_10, p50: null, p95: null, p99: null };
2033
+ const previous = { avg: 0.08, min: null, max: null, count: DEFAULT_LIMIT_10, p50: null, p95: null, p99: null };
2034
+ const trend = computeTrend(current, previous, QUALITY_METRICS.hallucination);
2035
+ assert.ok(trend);
2036
+ assert.strictEqual(trend.direction, 'degrading');
2037
+ });
2038
+ it('detects stable trend for tiny changes', () => {
2039
+ const current = { avg: null, min: null, max: null, count: DEFAULT_LIMIT_10, p50: 0.8005, p95: null, p99: null };
2040
+ const previous = { avg: null, min: null, max: null, count: DEFAULT_LIMIT_10, p50: 0.8001, p95: null, p99: null };
2041
+ const trend = computeTrend(current, previous, QUALITY_METRICS.relevance);
2042
+ assert.ok(trend);
2043
+ assert.strictEqual(trend.direction, 'stable');
2044
+ });
2045
+ it('returns undefined when current value is null', () => {
2046
+ const current = { avg: null, min: null, max: null, count: null, p50: null, p95: null, p99: null };
2047
+ const previous = { avg: null, min: null, max: null, count: DEFAULT_LIMIT_10, p50: 0.80, p95: null, p99: null };
2048
+ const trend = computeTrend(current, previous, QUALITY_METRICS.relevance);
2049
+ assert.strictEqual(trend, undefined);
2050
+ });
2051
+ it('returns undefined when previous value is null', () => {
2052
+ const current = { avg: null, min: null, max: null, count: DEFAULT_LIMIT_10, p50: 0.80, p95: null, p99: null };
2053
+ const previous = { avg: null, min: null, max: null, count: null, p50: null, p95: null, p99: null };
2054
+ const trend = computeTrend(current, previous, QUALITY_METRICS.relevance);
2055
+ assert.strictEqual(trend, undefined);
2056
+ });
2057
+ it('handles zero previous value (null percentChange)', () => {
2058
+ const current = { avg: null, min: null, max: null, count: DEFAULT_LIMIT_10, p50: 0.50, p95: null, p99: null };
2059
+ const previous = { avg: null, min: null, max: null, count: DEFAULT_LIMIT_10, p50: 0, p95: null, p99: null };
2060
+ const trend = computeTrend(current, previous, QUALITY_METRICS.relevance);
2061
+ assert.ok(trend);
2062
+ assert.strictEqual(trend.percentChange, null);
2063
+ });
2064
+ it('uses range-aware stability threshold for latency metric', () => {
2065
+ // evaluation_latency has range 0-60, so stability threshold = 60 * 0.001 = 0.06
2066
+ const current = { avg: null, min: null, max: null, count: DEFAULT_LIMIT_10, p50: null, p95: 5.02, p99: null };
2067
+ const previous = { avg: null, min: null, max: null, count: DEFAULT_LIMIT_10, p50: null, p95: 5.0, p99: null };
2068
+ const trend = computeTrend(current, previous, QUALITY_METRICS.evaluation_latency);
2069
+ assert.ok(trend);
2070
+ assert.strictEqual(trend.direction, 'stable'); // 0.02 < 0.06 threshold
2071
+ });
2072
+ it('uses avg aggregation when config has no alerts', () => {
2073
+ const config = {
2074
+ name: 'test', displayName: 'Test', description: 'Test',
2075
+ aggregations: ['avg'], alerts: [], range: { min: 0, max: 1 }, unit: 'score',
2076
+ };
2077
+ const current = { avg: TEST_SCORE_HIGH, min: null, max: null, count: DEFAULT_LIMIT_10, p50: null, p95: null, p99: null };
2078
+ const previous = { avg: TEST_SCORE_PASSING, min: null, max: null, count: DEFAULT_LIMIT_10, p50: null, p95: null, p99: null };
2079
+ const trend = computeTrend(current, previous, config);
2080
+ assert.ok(trend);
2081
+ assert.strictEqual(trend.aggregation, 'avg');
2082
+ });
2083
+ it('sets lowSampleWarning when sample count is below TREND_MIN_SAMPLE_SIZE', () => {
2084
+ const current = { avg: null, min: null, max: null, count: DEFAULT_LIMIT_10, p50: TEST_SCORE_STRONG, p95: null, p99: null };
2085
+ const previous = { avg: null, min: null, max: null, count: DEFAULT_LIMIT_10, p50: 0.70, p95: null, p99: null };
2086
+ const trend = computeTrend(current, previous, QUALITY_METRICS.relevance, COUNT_FIFTEEN);
2087
+ assert.ok(trend);
2088
+ assert.strictEqual(trend.lowSampleWarning, true);
2089
+ });
2090
+ it('does not set lowSampleWarning when sample count meets minimum', () => {
2091
+ const current = { avg: null, min: null, max: null, count: 50, p50: TEST_SCORE_STRONG, p95: null, p99: null };
2092
+ const previous = { avg: null, min: null, max: null, count: 50, p50: 0.70, p95: null, p99: null };
2093
+ const trend = computeTrend(current, previous, QUALITY_METRICS.relevance, COUNT_FIFTY);
2094
+ assert.ok(trend);
2095
+ assert.strictEqual(trend.lowSampleWarning, undefined);
2096
+ });
2097
+ it('exports TREND_MIN_SAMPLE_SIZE constant', () => {
2098
+ assert.strictEqual(typeof TREND_MIN_SAMPLE_SIZE, 'number');
2099
+ assert.strictEqual(TREND_MIN_SAMPLE_SIZE, COUNT_THIRTY);
2100
+ });
2101
+ });
2102
+ describe('computeQualityMetric with trend', () => {
2103
+ it('includes trend when previousValues provided', () => {
2104
+ const evaluations = [
2105
+ { timestamp: '2026-01-02T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_STRONG },
2106
+ ];
2107
+ const previousValues = { avg: null, min: null, max: null, count: DEFAULT_LIMIT_10, p50: 0.70, p95: null, p99: null };
2108
+ const result = computeQualityMetric(evaluations, QUALITY_METRICS.relevance, undefined, previousValues);
2109
+ assert.ok(result.trend);
2110
+ assert.strictEqual(result.trend.direction, 'improving');
2111
+ });
2112
+ it('omits trend when previousValues not provided', () => {
2113
+ const evaluations = [
2114
+ { timestamp: '2026-01-02T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_STRONG },
2115
+ ];
2116
+ const result = computeQualityMetric(evaluations, QUALITY_METRICS.relevance);
2117
+ assert.strictEqual(result.trend, undefined);
2118
+ });
2119
+ });
2120
+ // ==========================================================================
2121
+ // Metric Registration
2122
+ // ==========================================================================
2123
+ describe('registerQualityMetric', () => {
2124
+ it('registers a custom metric', () => {
2125
+ const config = {
2126
+ name: 'custom_test',
2127
+ displayName: 'Custom Test',
2128
+ description: 'Test metric',
2129
+ aggregations: ['avg'],
2130
+ alerts: [],
2131
+ range: { min: 0, max: 1 },
2132
+ unit: 'score',
2133
+ };
2134
+ registerQualityMetric(config);
2135
+ const all = getAllQualityMetrics();
2136
+ assert.ok(all.custom_test);
2137
+ });
2138
+ it('rejects NaN/Infinity in alert threshold values', () => {
2139
+ const config = {
2140
+ name: 'nan_test', displayName: 'NaN Test', description: 'Test',
2141
+ aggregations: ['avg'],
2142
+ alerts: [{ aggregation: 'avg', value: NaN, direction: 'below', severity: 'warning', message: 'test' }],
2143
+ range: { min: 0, max: 1 }, unit: 'score',
2144
+ };
2145
+ assert.throws(() => registerQualityMetric(config));
2146
+ });
2147
+ it('rejects Infinity in range values', () => {
2148
+ const config = {
2149
+ name: 'inf_test', displayName: 'Inf Test', description: 'Test',
2150
+ aggregations: ['avg'], alerts: [],
2151
+ range: { min: 0, max: Infinity }, unit: 'score',
2152
+ };
2153
+ assert.throws(() => registerQualityMetric(config));
2154
+ });
2155
+ it('throws if metric already exists', () => {
2156
+ const config = {
2157
+ name: 'relevance', // Already exists in QUALITY_METRICS
2158
+ displayName: 'Duplicate',
2159
+ description: 'Test',
2160
+ aggregations: ['avg'],
2161
+ alerts: [],
2162
+ range: { min: 0, max: 1 },
2163
+ unit: 'score',
2164
+ };
2165
+ assert.throws(() => registerQualityMetric(config));
2166
+ });
2167
+ });
2168
+ describe('unregisterQualityMetric', () => {
2169
+ it('removes a custom metric', () => {
2170
+ const config = {
2171
+ name: 'to_remove',
2172
+ displayName: 'To Remove',
2173
+ description: 'Test',
2174
+ aggregations: ['avg'],
2175
+ alerts: [],
2176
+ range: { min: 0, max: 1 },
2177
+ unit: 'score',
2178
+ };
2179
+ registerQualityMetric(config);
2180
+ const removed = unregisterQualityMetric('to_remove');
2181
+ assert.strictEqual(removed, true);
2182
+ assert.strictEqual(getQualityMetric('to_remove'), undefined);
2183
+ });
2184
+ it('returns false for non-existent metric', () => {
2185
+ assert.strictEqual(unregisterQualityMetric('nonexistent'), false);
2186
+ });
2187
+ });
2188
+ describe('getQualityMetric', () => {
2189
+ it('returns built-in metric', () => {
2190
+ const metric = getQualityMetric('relevance');
2191
+ assert.ok(metric);
2192
+ assert.strictEqual(metric.name, 'relevance');
2193
+ });
2194
+ it('returns custom metric', () => {
2195
+ registerQualityMetric({
2196
+ name: 'custom_get_test',
2197
+ displayName: 'Test',
2198
+ description: 'Test',
2199
+ aggregations: ['avg'],
2200
+ alerts: [],
2201
+ range: { min: 0, max: 1 },
2202
+ unit: 'score',
2203
+ });
2204
+ const metric = getQualityMetric('custom_get_test');
2205
+ assert.ok(metric);
2206
+ });
2207
+ it('returns undefined for unknown metric', () => {
2208
+ assert.strictEqual(getQualityMetric('unknown'), undefined);
2209
+ });
2210
+ });
2211
+ // ==========================================================================
2212
+ // Utility Functions
2213
+ // ==========================================================================
2214
+ describe('formatMetricValue', () => {
2215
+ it('formats score', () => {
2216
+ assert.strictEqual(formatMetricValue(TEST_FORMATTED_SCORE_INPUT, 'score'), '0.8567');
2217
+ });
2218
+ it('formats percentage', () => {
2219
+ assert.strictEqual(formatMetricValue(TEST_SCORE_STRONG, 'percentage'), '85.0%');
2220
+ });
2221
+ it('formats rate', () => {
2222
+ assert.strictEqual(formatMetricValue(TEST_SCORE_EXCELLENT, 'rate'), '95.0%');
2223
+ });
2224
+ it('formats seconds', () => {
2225
+ assert.strictEqual(formatMetricValue(TEST_FORMATTED_SECONDS_INPUT, 'seconds'), '3.46s');
2226
+ });
2227
+ it('handles null', () => {
2228
+ assert.strictEqual(formatMetricValue(null, 'score'), 'N/A');
2229
+ });
2230
+ });
2231
+ describe('MetricConfigBuilder', () => {
2232
+ it('builds a metric config', () => {
2233
+ const config = createMetricConfig('test_builder')
2234
+ .displayName('Test Builder')
2235
+ .description('A test metric')
2236
+ .aggregations('avg', 'p50', 'p95')
2237
+ .range(0, COUNT_HUNDRED)
2238
+ .unit('percentage')
2239
+ .alertBelow('p50', COUNT_FIFTY, 'warning')
2240
+ .alertAbove('avg', COUNT_NINETY, 'info')
2241
+ .build();
2242
+ assert.strictEqual(config.name, 'test_builder');
2243
+ assert.strictEqual(config.displayName, 'Test Builder');
2244
+ assert.strictEqual(config.description, 'A test metric');
2245
+ assert.deepStrictEqual(config.aggregations, ['avg', 'p50', 'p95']);
2246
+ assert.deepStrictEqual(config.range, { min: 0, max: COUNT_HUNDRED });
2247
+ assert.strictEqual(config.unit, 'percentage');
2248
+ assert.strictEqual(config.alerts.length, COUNT_TWO);
2249
+ });
2250
+ it('supports remediationHints method', () => {
2251
+ const config = createMetricConfig('hint_test')
2252
+ .displayName('Hint Test')
2253
+ .description('Test metric with hints')
2254
+ .remediationHints('Fix prompt', 'Add context')
2255
+ .alertBelow('avg', TEST_SCORE_MID, 'warning')
2256
+ .build();
2257
+ assert.deepStrictEqual(config.remediationHints, ['Fix prompt', 'Add context']);
2258
+ });
2259
+ it('uses defaults for missing fields', () => {
2260
+ const config = createMetricConfig('minimal').build();
2261
+ assert.strictEqual(config.name, 'minimal');
2262
+ assert.strictEqual(config.displayName, 'minimal');
2263
+ assert.ok(config.description);
2264
+ });
2265
+ });
2266
+ // ==========================================================================
2267
+ // Full Pipeline Integration Test (Backlog #8)
2268
+ // ==========================================================================
2269
+ describe('full pipeline integration: dashboard + trends + SLAs + confidence', () => {
2270
+ it('exercises computeDashboardSummary with all v2.2 features end-to-end', () => {
2271
+ // Setup: realistic evaluation data for 3 metrics
2272
+ const relevanceEvals = Array.from({ length: COUNT_SIXTY }, (_, i) => ({
2273
+ timestamp: `2026-02-01T${String(Math.floor(i / COUNT_SIXTY)).padStart(COUNT_TWO, '0')}:${String(i % COUNT_SIXTY).padStart(COUNT_TWO, '0')}:00Z`,
2274
+ evaluationName: 'relevance',
2275
+ scoreValue: TEST_SINGLE_VALUE_SCORE + (i % COUNT_TEN) * TEST_STEP_TWO_HUNDREDTHS,
2276
+ evaluator: i % COUNT_TWO === 0 ? 'judge-a' : 'judge-b',
2277
+ }));
2278
+ const hallucinationEvals = Array.from({ length: 40 }, (_, i) => ({
2279
+ timestamp: `2026-02-01T${String(Math.floor(i / COUNT_SIXTY)).padStart(COUNT_TWO, '0')}:${String(i % COUNT_SIXTY).padStart(COUNT_TWO, '0')}:00Z`,
2280
+ evaluationName: 'hallucination',
2281
+ scoreValue: TEST_STEP_THREE_HUNDREDTHS + (i % COUNT_FIVE) * TEST_DECIMAL_EPSILON,
2282
+ evaluator: 'judge-a',
2283
+ }));
2284
+ const coherenceEvals = Array.from({ length: COUNT_TWENTY_FIVE }, (_, i) => ({
2285
+ timestamp: `2026-02-01T00:${String(i).padStart(COUNT_TWO, '0')}:00Z`,
2286
+ evaluationName: 'coherence',
2287
+ scoreValue: TEST_SCORE_ADEQUATE + (i % COUNT_EIGHT) * TEST_STEP_THREE_HUNDREDTHS,
2288
+ }));
2289
+ const evaluationsByMetric = new Map([
2290
+ ['relevance', relevanceEvals],
2291
+ ['hallucination', hallucinationEvals],
2292
+ ['coherence', coherenceEvals],
2293
+ ]);
2294
+ // Previous period values for trend comparison
2295
+ const previousPeriodValues = new Map([
2296
+ ['relevance', { avg: null, min: null, max: null, count: COUNT_FIFTY, p50: TEST_SCORE_PASSING, p95: null, p99: null }],
2297
+ ['hallucination', { avg: 0.10, min: null, max: null, count: 30, p50: null, p95: null, p99: null }],
2298
+ ['coherence', { avg: null, min: null, max: null, count: COUNT_TWENTY, p50: TEST_SCORE_BASELINE, p95: null, p99: null }],
2299
+ ]);
2300
+ // SLA definitions
2301
+ const slas = [
2302
+ { metric: 'relevance', aggregation: 'p50', target: TEST_SCORE_GOOD, direction: 'above', window: '30d rolling' },
2303
+ { metric: 'hallucination', aggregation: 'avg', target: TEST_SCORE_WARNING, direction: 'below' },
2304
+ { metric: 'coherence', aggregation: 'p50', target: TEST_SCORE_HIGH, direction: 'above' },
2305
+ ];
2306
+ // Execute full pipeline
2307
+ const dashboard = computeDashboardSummary(evaluationsByMetric, undefined, undefined, slas, previousPeriodValues);
2308
+ // --- Verify dashboard structure ---
2309
+ assert.ok(dashboard.overallStatus);
2310
+ assert.ok(['healthy', 'warning', 'critical', 'no_data'].includes(dashboard.overallStatus));
2311
+ // All 9 pre-defined QUALITY_METRICS are included; 3 have data, 6 have no_data
2312
+ assert.strictEqual(dashboard.metrics.length, COUNT_NINE);
2313
+ assert.strictEqual(dashboard.summary.totalMetrics, COUNT_NINE);
2314
+ assert.ok(dashboard.timestamp);
2315
+ // --- Verify trends populated ---
2316
+ const relevanceResult = requireDefined(dashboard.metrics.find(m => m.name === 'relevance'), 'Expected relevance metric result');
2317
+ const hallucinationResult = requireDefined(dashboard.metrics.find(m => m.name === 'hallucination'), 'Expected hallucination metric result');
2318
+ const coherenceResult = requireDefined(dashboard.metrics.find(m => m.name === 'coherence'), 'Expected coherence metric result');
2319
+ assert.ok(relevanceResult.trend, 'Relevance should have trend');
2320
+ assert.ok(hallucinationResult.trend, 'Hallucination should have trend');
2321
+ assert.ok(coherenceResult.trend, 'Coherence should have trend');
2322
+ // Relevance improved (current ~0.84 vs previous 0.70)
2323
+ assert.strictEqual(requireDefined(relevanceResult.trend).direction, 'improving');
2324
+ // Hallucination improved (current ~0.05 vs previous 0.10)
2325
+ assert.strictEqual(requireDefined(hallucinationResult.trend).direction, 'improving');
2326
+ // --- Verify confidence populated ---
2327
+ assert.ok(relevanceResult.confidence, 'Relevance should have confidence');
2328
+ assert.strictEqual(requireDefined(relevanceResult.confidence).evaluatorCount, COUNT_TWO);
2329
+ assert.ok(requireDefined(relevanceResult.confidence).evaluatorAgreement !== null);
2330
+ // 60 samples with low variance should be high confidence
2331
+ assert.strictEqual(requireDefined(relevanceResult.confidence).level, 'high');
2332
+ assert.ok(hallucinationResult.confidence);
2333
+ assert.ok(coherenceResult.confidence);
2334
+ // Coherence has 25 samples = medium confidence
2335
+ assert.strictEqual(requireDefined(coherenceResult.confidence).level, 'medium');
2336
+ // --- Verify SLA compliance ---
2337
+ assert.ok(dashboard.slaCompliance);
2338
+ assert.strictEqual(requireDefined(dashboard.slaCompliance).length, COUNT_THREE);
2339
+ const relevanceSLA = requireDefined(requireDefined(dashboard.slaCompliance).find(s => s.sla.metric === 'relevance'), 'Expected relevance SLA');
2340
+ const hallucinationSLA = requireDefined(requireDefined(dashboard.slaCompliance).find(s => s.sla.metric === 'hallucination'), 'Expected hallucination SLA');
2341
+ const coherenceSLA = requireDefined(requireDefined(dashboard.slaCompliance).find(s => s.sla.metric === 'coherence'), 'Expected coherence SLA');
2342
+ assert.ok(relevanceSLA.actualValue !== null);
2343
+ assert.ok(hallucinationSLA.compliant); // hallucination avg ~0.05 < TEST_SCORE_WARNING target
2344
+ assert.strictEqual(coherenceSLA.compliant, false); // coherence p50 ~0.74 < TEST_SCORE_HIGH target
2345
+ // --- Verify role views work with full data ---
2346
+ const execView = computeRoleView(dashboard, 'executive');
2347
+ assert.strictEqual(execView.role, 'executive');
2348
+ assert.ok(execView.topIssues.length > 0);
2349
+ assert.strictEqual(execView.slaTotalCount, COUNT_THREE);
2350
+ const opView = computeRoleView(dashboard, 'operator');
2351
+ assert.strictEqual(opView.role, 'operator');
2352
+ assert.ok(Array.isArray(opView.prioritizedAlerts));
2353
+ assert.ok(Array.isArray(opView.degradingTrends));
2354
+ const audView = computeRoleView(dashboard, 'auditor');
2355
+ assert.strictEqual(audView.role, 'auditor');
2356
+ assert.strictEqual(audView.metrics.length, Object.keys(QUALITY_METRICS).length);
2357
+ assert.ok(audView.totalEvaluationCount >= TEST_TOTAL_EVALUATION_COUNT_MIN); // 60 + 40 + 25
2358
+ // --- Verify severity context works in pipeline ---
2359
+ const prodContext = {
2360
+ environment: 'prod*',
2361
+ escalateWarnings: true,
2362
+ suppressInfo: true,
2363
+ };
2364
+ const contextAlerts = applySeverityContext(dashboard.alerts.map(a => ({ ...a })), prodContext, relevanceResult.sampleCount, 'production');
2365
+ // All info alerts should be filtered, warnings escalated
2366
+ assert.ok(contextAlerts.every(a => a.severity !== 'info'));
2367
+ assert.ok(contextAlerts.every(a => a.severity !== 'warning'));
2368
+ });
2369
+ });
2370
+ // ==========================================================================
2371
+ // Edge Case Tests (v2.4)
2372
+ // ==========================================================================
2373
+ describe('edge cases', () => {
2374
+ it('computeAggregations handles empty aggregation list', () => {
2375
+ const result = computeAggregations([TEST_SCORE_MID, TEST_SCORE_PASSING, TEST_SCORE_HIGH], []);
2376
+ // Count is always computed when scores exist, rest should be null
2377
+ assert.strictEqual(result.count, COUNT_THREE);
2378
+ assert.strictEqual(result.avg, null);
2379
+ assert.strictEqual(result.p50, null);
2380
+ });
2381
+ it('applySeverityContext matches unicode environment patterns', () => {
2382
+ const alerts = [{
2383
+ severity: 'warning', message: 'test', aggregation: 'avg',
2384
+ threshold: TEST_SCORE_MID, actualValue: TEST_SCORE_POOR, direction: 'below',
2385
+ }];
2386
+ const context = {
2387
+ environment: 'prod-日本*',
2388
+ escalateWarnings: true,
2389
+ };
2390
+ // Should match unicode pattern
2391
+ const result = applySeverityContext(alerts, context, COUNT_HUNDRED, 'prod-日本-east');
2392
+ assert.ok(result.every(a => a.severity === 'critical'));
2393
+ });
2394
+ it('applySeverityContext handles unicode environment that does not match', () => {
2395
+ const alerts = [{
2396
+ severity: 'warning', message: 'test', aggregation: 'avg',
2397
+ threshold: TEST_SCORE_MID, actualValue: TEST_SCORE_POOR, direction: 'below',
2398
+ }];
2399
+ const context = {
2400
+ environment: 'prod-日本*',
2401
+ escalateWarnings: true,
2402
+ };
2403
+ // Different unicode prefix should not match
2404
+ const result = applySeverityContext(alerts, context, COUNT_HUNDRED, 'prod-中国-east');
2405
+ assert.ok(result.every(a => a.severity === 'warning'));
2406
+ });
2407
+ it('computeAggregations handles floating-point boundary at exact 0 and 1', () => {
2408
+ const result = computeAggregations([0, 1], ['avg', 'min', 'max', 'p50']);
2409
+ assert.strictEqual(result.avg, TEST_SCORE_MID);
2410
+ assert.strictEqual(result.min, 0);
2411
+ assert.strictEqual(result.max, 1);
2412
+ assert.strictEqual(result.p50, TEST_SCORE_MID);
2413
+ });
2414
+ it('computeAggregations handles very small floating-point differences', () => {
2415
+ // Values that differ by less than score precision
2416
+ const result = computeAggregations([TEST_TINY_SCORE_1, TEST_TINY_SCORE_2, TEST_TINY_SCORE_3], ['avg', 'p50']);
2417
+ assert.strictEqual(result.avg, 0); // Rounds to 0 at 4 decimal places
2418
+ });
2419
+ it('checkAlertThresholds handles threshold at exact boundary value', () => {
2420
+ const values = { avg: TEST_SCORE_PASSING, min: null, max: null, count: DEFAULT_LIMIT_10, p50: null, p95: null, p99: null };
2421
+ const thresholds = [
2422
+ { aggregation: 'avg', value: TEST_SCORE_PASSING, direction: 'below', severity: 'warning', message: 'test {value}' },
2423
+ ];
2424
+ // avg = TEST_SCORE_PASSING is NOT below TEST_SCORE_PASSING (strict inequality), so no alert
2425
+ const alerts = checkAlertThresholds(values, thresholds);
2426
+ assert.strictEqual(alerts.length, 0);
2427
+ });
2428
+ it('evaluateSLA handles zero target value', () => {
2429
+ const sla = { metric: 'hallucination', aggregation: 'avg', target: 0, direction: 'below' };
2430
+ const results = [{
2431
+ name: 'hallucination', displayName: 'Hallucination', sampleCount: DEFAULT_LIMIT_10,
2432
+ values: { avg: 0, min: null, max: null, count: DEFAULT_LIMIT_10, p50: null, p95: null, p99: null },
2433
+ alerts: [], status: 'healthy',
2434
+ }];
2435
+ const result = evaluateSLA(sla, results);
2436
+ assert.strictEqual(result.compliant, true);
2437
+ assert.strictEqual(result.marginPercent, null); // target is 0, cannot compute %
2438
+ });
2439
+ it('computeAggregations percentile filtering with mixed valid/NaN/Infinity', () => {
2440
+ // Mixed data: 3 valid, 1 NaN, 1 Infinity, 1 -Infinity
2441
+ const scores = [TEST_SCORE_LOW, NaN, TEST_SCORE_BASELINE, Infinity, TEST_SCORE_HIGH, -Infinity];
2442
+ const result = computeAggregations(scores, ['avg', 'min', 'max', 'p50', 'p95', 'count']);
2443
+ // Only [0.3, TEST_SCORE_BASELINE, TEST_SCORE_HIGH] should survive filtering (6 input -> 3 valid)
2444
+ assert.strictEqual(result.count, COUNT_THREE);
2445
+ assert.strictEqual(result.min, TEST_SCORE_LOW);
2446
+ assert.strictEqual(result.max, TEST_SCORE_HIGH);
2447
+ assert.strictEqual(result.avg, TEST_SCORE_BASELINE);
2448
+ assert.strictEqual(result.p50, TEST_SCORE_BASELINE);
2449
+ // p95: rank = 0.95 * (3-1) = 1.9; lerp(sorted[1]=TEST_SCORE_BASELINE, sorted[2]=TEST_SCORE_HIGH, TEST_SCORE_HIGH) = 0.87
2450
+ assert.strictEqual(result.p95, TEST_INTERPOLATED_P95_EXPECTED);
2451
+ });
2452
+ it('SLAComplianceResult invariant: compliant === (status === compliant)', () => {
2453
+ const slas = [
2454
+ { metric: 'relevance', aggregation: 'avg', target: TEST_SCORE_GOOD, direction: 'above' },
2455
+ { metric: 'hallucination', aggregation: 'avg', target: TEST_SCORE_WARNING, direction: 'below' },
2456
+ { metric: 'missing_metric', aggregation: 'avg', target: TEST_SCORE_MID, direction: 'above' },
2457
+ { metric: 'coherence', aggregation: 'avg', target: TEST_SCORE_MID, direction: 'above' }, // null agg -> no_data
2458
+ { metric: 'relevance', aggregation: 'avg', target: TEST_SCORE_HIGH, direction: 'above' }, // exact boundary
2459
+ ];
2460
+ const results = [
2461
+ {
2462
+ name: 'relevance', displayName: 'Relevance', sampleCount: 50,
2463
+ values: { avg: TEST_SCORE_HIGH, min: null, max: null, count: 50, p50: null, p95: null, p99: null },
2464
+ alerts: [], status: 'healthy',
2465
+ },
2466
+ {
2467
+ name: 'hallucination', displayName: 'Hallucination', sampleCount: 50,
2468
+ values: { avg: 0.15, min: null, max: null, count: 50, p50: null, p95: null, p99: null },
2469
+ alerts: [], status: 'warning',
2470
+ },
2471
+ {
2472
+ name: 'coherence', displayName: 'Coherence', sampleCount: 0,
2473
+ values: { avg: null, min: null, max: null, count: null, p50: null, p95: null, p99: null },
2474
+ alerts: [], status: 'no_data',
2475
+ },
2476
+ ];
2477
+ for (const sla of slas) {
2478
+ const result = evaluateSLA(sla, results);
2479
+ assert.strictEqual(result.compliant, result.status === 'compliant', `Invariant violated for ${sla.metric}: compliant=${result.compliant}, status=${result.status}`);
2480
+ }
2481
+ });
2482
+ it('NaN filtering + SLA evaluation e2e: all-NaN scores -> null aggs -> no_data', () => {
2483
+ const evaluationsByMetric = new Map([
2484
+ ['relevance', [
2485
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: NaN },
2486
+ { timestamp: '2026-01-01T00:01:00Z', evaluationName: 'relevance', scoreValue: NaN },
2487
+ { timestamp: '2026-01-01T00:02:00Z', evaluationName: 'relevance', scoreValue: NaN },
2488
+ ]],
2489
+ ]);
2490
+ const dashboard = computeDashboardSummary(evaluationsByMetric);
2491
+ const relevance = requireDefined(dashboard.metrics.find(m => m.name === 'relevance'), 'Expected relevance metric result');
2492
+ // All NaN scores should result in no_data
2493
+ assert.strictEqual(relevance.status, 'no_data');
2494
+ assert.strictEqual(relevance.sampleCount, 0);
2495
+ assert.strictEqual(relevance.values.avg, null);
2496
+ assert.strictEqual(relevance.values.p50, null);
2497
+ // SLA against this metric should return no_data status
2498
+ const sla = { metric: 'relevance', aggregation: 'avg', target: TEST_SCORE_GOOD, direction: 'above' };
2499
+ const slaResult = evaluateSLA(sla, dashboard.metrics);
2500
+ assert.strictEqual(slaResult.status, 'no_data');
2501
+ assert.strictEqual(slaResult.compliant, false);
2502
+ assert.strictEqual(slaResult.actualValue, null);
2503
+ });
2504
+ });
2505
+ // ==========================================================================
2506
+ // Pipeline Visualization (G8)
2507
+ // ==========================================================================
2508
+ describe('computePipelineView', () => {
2509
+ it('computes 4 pipeline stages from evaluation data', () => {
2510
+ const evaluationsByMetric = new Map([
2511
+ ['relevance', [
2512
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_GOOD },
2513
+ { timestamp: '2026-01-01T00:01:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_HIGH },
2514
+ { timestamp: '2026-01-01T00:02:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_PASSING },
2515
+ ]],
2516
+ ['hallucination', [
2517
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'hallucination', scoreValue: DELTA },
2518
+ { timestamp: '2026-01-01T00:01:00Z', evaluationName: 'hallucination', scoreValue: 0.08 },
2519
+ ]],
2520
+ ]);
2521
+ const dashboard = computeDashboardSummary(evaluationsByMetric);
2522
+ const pipeline = computePipelineView(evaluationsByMetric, dashboard);
2523
+ assert.strictEqual(pipeline.stages.length, COUNT_FOUR);
2524
+ assert.strictEqual(pipeline.stages[0].name, 'ingested');
2525
+ assert.strictEqual(pipeline.stages[0].entryCount, COUNT_FIVE); // 3 + 2 total
2526
+ assert.strictEqual(pipeline.stages[1].name, 'scored');
2527
+ assert.strictEqual(pipeline.stages[2].name, 'evaluated');
2528
+ assert.strictEqual(pipeline.stages[3].name, 'alerted');
2529
+ });
2530
+ it('computes drop-off within stages', () => {
2531
+ const evaluationsByMetric = new Map([
2532
+ ['relevance', [
2533
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_GOOD },
2534
+ { timestamp: '2026-01-01T00:01:00Z', evaluationName: 'relevance' }, // no scoreValue
2535
+ ]],
2536
+ ]);
2537
+ const dashboard = computeDashboardSummary(evaluationsByMetric);
2538
+ const pipeline = computePipelineView(evaluationsByMetric, dashboard);
2539
+ assert.strictEqual(pipeline.dropoffs.length, COUNT_FOUR); // one per stage
2540
+ // Ingested stage: 2 entry, 1 exit (undefined score filtered)
2541
+ const ingestedDropoff = requireDefined(pipeline.dropoffs.find(d => d.stage === 'ingested'), 'Expected ingested dropoff');
2542
+ assert.strictEqual(ingestedDropoff.dropped, 1);
2543
+ assert.strictEqual(ingestedDropoff.dropoffPercent, COUNT_FIFTY);
2544
+ });
2545
+ it('handles empty evaluation map', () => {
2546
+ const evaluationsByMetric = new Map();
2547
+ const dashboard = computeDashboardSummary(evaluationsByMetric);
2548
+ const pipeline = computePipelineView(evaluationsByMetric, dashboard);
2549
+ assert.strictEqual(pipeline.stages[0].entryCount, 0);
2550
+ assert.strictEqual(pipeline.overallConversionPercent, 0);
2551
+ });
2552
+ it('computes overall conversion rate', () => {
2553
+ const evaluationsByMetric = new Map([
2554
+ ['hallucination', [
2555
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'hallucination', scoreValue: TEST_SCORE_LOW },
2556
+ { timestamp: '2026-01-01T00:01:00Z', evaluationName: 'hallucination', scoreValue: 0.25 },
2557
+ ]],
2558
+ ]);
2559
+ const dashboard = computeDashboardSummary(evaluationsByMetric);
2560
+ const pipeline = computePipelineView(evaluationsByMetric, dashboard);
2561
+ // If hallucination triggers alerts, alerted count > 0
2562
+ assert.ok(pipeline.overallConversionPercent >= 0);
2563
+ assert.ok(pipeline.overallConversionPercent <= COUNT_HUNDRED);
2564
+ });
2565
+ });
2566
+ // ==========================================================================
2567
+ // Coverage Heatmap (G9)
2568
+ // ==========================================================================
2569
+ describe('computeCoverageHeatmap', () => {
2570
+ it('computes coverage matrix from evaluation data', () => {
2571
+ const evaluationsByMetric = new Map([
2572
+ ['relevance', [
2573
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_GOOD, traceId: 'trace-1' },
2574
+ { timestamp: '2026-01-01T00:01:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_HIGH, traceId: 'trace-2' },
2575
+ ]],
2576
+ ['hallucination', [
2577
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'hallucination', scoreValue: DELTA, traceId: 'trace-1' },
2578
+ ]],
2579
+ ]);
2580
+ const heatmap = computeCoverageHeatmap(evaluationsByMetric);
2581
+ assert.strictEqual(heatmap.metrics.length, COUNT_TWO);
2582
+ assert.strictEqual(heatmap.inputs.length, COUNT_TWO);
2583
+ assert.strictEqual(heatmap.cells.length, COUNT_FOUR); // 2 metrics x 2 inputs
2584
+ });
2585
+ it('identifies coverage gaps', () => {
2586
+ const evaluationsByMetric = new Map([
2587
+ ['relevance', [
2588
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_GOOD, traceId: 'trace-1' },
2589
+ { timestamp: '2026-01-01T00:01:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_HIGH, traceId: 'trace-2' },
2590
+ ]],
2591
+ ['hallucination', [
2592
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'hallucination', scoreValue: DELTA, traceId: 'trace-1' },
2593
+ // Missing trace-2 for hallucination
2594
+ ]],
2595
+ ]);
2596
+ const heatmap = computeCoverageHeatmap(evaluationsByMetric);
2597
+ // hallucination is missing trace-2
2598
+ const hallucinationGap = heatmap.gaps.find(g => g.metric === 'hallucination');
2599
+ assert.ok(hallucinationGap);
2600
+ assert.ok(requireDefined(hallucinationGap).missingInputs.includes('trace-2'));
2601
+ assert.strictEqual(requireDefined(hallucinationGap).coveragePercent, COUNT_FIFTY);
2602
+ });
2603
+ it('computes overall coverage percentage', () => {
2604
+ const evaluationsByMetric = new Map([
2605
+ ['relevance', [
2606
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_GOOD, traceId: 'trace-1' },
2607
+ ]],
2608
+ ]);
2609
+ const heatmap = computeCoverageHeatmap(evaluationsByMetric);
2610
+ // 1 metric, 1 input, fully covered
2611
+ assert.strictEqual(heatmap.overallCoveragePercent, COUNT_HUNDRED);
2612
+ });
2613
+ it('handles empty evaluation map', () => {
2614
+ const evaluationsByMetric = new Map();
2615
+ const heatmap = computeCoverageHeatmap(evaluationsByMetric);
2616
+ assert.strictEqual(heatmap.metrics.length, 0);
2617
+ assert.strictEqual(heatmap.inputs.length, 0);
2618
+ assert.strictEqual(heatmap.cells.length, 0);
2619
+ assert.strictEqual(heatmap.overallCoveragePercent, 0);
2620
+ });
2621
+ it('supports sessionId as input key', () => {
2622
+ const evaluationsByMetric = new Map([
2623
+ ['relevance', [
2624
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_GOOD, sessionId: 'session-a' },
2625
+ { timestamp: '2026-01-01T00:01:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_HIGH, sessionId: 'session-b' },
2626
+ ]],
2627
+ ]);
2628
+ const heatmap = computeCoverageHeatmap(evaluationsByMetric, { inputKey: 'sessionId' });
2629
+ assert.strictEqual(heatmap.inputs.length, COUNT_TWO);
2630
+ assert.ok(heatmap.inputs.includes('session-a'));
2631
+ assert.ok(heatmap.inputs.includes('session-b'));
2632
+ });
2633
+ it('supports legacy string inputKey argument', () => {
2634
+ const evaluationsByMetric = new Map([
2635
+ ['relevance', [
2636
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_GOOD, sessionId: 'session-a' },
2637
+ ]],
2638
+ ]);
2639
+ const heatmap = computeCoverageHeatmap(evaluationsByMetric, 'sessionId');
2640
+ assert.ok(heatmap.inputs.includes('session-a'));
2641
+ });
2642
+ it('count=1 is covered with default threshold', () => {
2643
+ const evaluationsByMetric = new Map([
2644
+ ['relevance', [
2645
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_GOOD, traceId: 'trace-1' },
2646
+ ]],
2647
+ ]);
2648
+ const heatmap = computeCoverageHeatmap(evaluationsByMetric);
2649
+ const cell = requireDefined(heatmap.cells.find(c => c.metric === 'relevance' && c.input === 'trace-1'), 'Expected relevance heatmap cell for trace-1');
2650
+ assert.strictEqual(cell.status, 'covered');
2651
+ assert.strictEqual(cell.count, 1);
2652
+ });
2653
+ it('supports configurable coverage thresholds', () => {
2654
+ const evaluationsByMetric = new Map([
2655
+ ['relevance', [
2656
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_GOOD, traceId: 'trace-1' },
2657
+ { timestamp: '2026-01-01T00:01:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_HIGH, traceId: 'trace-1' },
2658
+ { timestamp: '2026-01-01T00:02:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_PASSING, traceId: 'trace-2' },
2659
+ ]],
2660
+ ]);
2661
+ // Require 2+ for 'covered', 1 for 'partial'
2662
+ const heatmap = computeCoverageHeatmap(evaluationsByMetric, { coveredThreshold: 2, partialThreshold: 0 });
2663
+ const cell1 = requireDefined(heatmap.cells.find(c => c.input === 'trace-1'), 'Expected heatmap cell for trace-1');
2664
+ const cell2 = requireDefined(heatmap.cells.find(c => c.input === 'trace-2'), 'Expected heatmap cell for trace-2');
2665
+ assert.strictEqual(cell1.status, 'covered'); // count=2 >= 2
2666
+ assert.strictEqual(cell2.status, 'partial'); // count=1 > 0 but < 2
2667
+ });
2668
+ it('assigns missing status for missing trace coverage', () => {
2669
+ const evaluationsByMetric = new Map([
2670
+ ['relevance', [
2671
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'relevance', scoreValue: TEST_SCORE_GOOD, traceId: 'trace-1' },
2672
+ ]],
2673
+ ['faithfulness', [
2674
+ { timestamp: '2026-01-01T00:00:00Z', evaluationName: 'faithfulness', scoreValue: TEST_SCORE_HIGH, traceId: 'trace-2' },
2675
+ ]],
2676
+ ]);
2677
+ const heatmap = computeCoverageHeatmap(evaluationsByMetric);
2678
+ // relevance missing trace-2, faithfulness missing trace-1
2679
+ const relevanceTrace2 = heatmap.cells.find(c => c.metric === 'relevance' && c.input === 'trace-2');
2680
+ assert.ok(relevanceTrace2);
2681
+ assert.strictEqual(requireDefined(relevanceTrace2).status, 'missing');
2682
+ assert.strictEqual(requireDefined(relevanceTrace2).count, 0);
2683
+ });
2684
+ });
2685
+ // ==========================================================================
2686
+ // roundTo
2687
+ // ==========================================================================
2688
+ describe('roundTo', () => {
2689
+ it('rounds to specified decimal places', () => {
2690
+ assert.strictEqual(roundTo(TEST_FORMATTED_SCORE_INPUT, SCORE_PRECISION), 0.8567);
2691
+ assert.strictEqual(roundTo(TEST_FORMATTED_SECONDS_INPUT, PERCENT_PRECISION), 3.46);
2692
+ });
2693
+ it('rounds to zero decimal places', () => {
2694
+ assert.strictEqual(roundTo(TEST_FORMATTED_SECONDS_INPUT, 0), COUNT_THREE);
2695
+ });
2696
+ it('handles NaN and Infinity', () => {
2697
+ assert.ok(Number.isNaN(roundTo(NaN, SCORE_PRECISION)));
2698
+ assert.strictEqual(roundTo(Infinity, SCORE_PRECISION), Infinity);
2699
+ assert.strictEqual(roundTo(-Infinity, SCORE_PRECISION), -Infinity);
2700
+ });
2701
+ });
2702
+ // ==========================================================================
2703
+ // sortAlertsBySeverity
2704
+ // ==========================================================================
2705
+ describe('sortAlertsBySeverity', () => {
2706
+ it('sorts critical before warning before info', () => {
2707
+ const alerts = [
2708
+ { severity: 'info', id: 1 },
2709
+ { severity: 'critical', id: 2 },
2710
+ { severity: 'warning', id: 3 },
2711
+ ];
2712
+ const sorted = sortAlertsBySeverity(alerts);
2713
+ assert.deepStrictEqual(sorted.map(a => a.severity), ['critical', 'warning', 'info']);
2714
+ });
2715
+ it('returns a new array without mutating the original', () => {
2716
+ const alerts = [
2717
+ { severity: 'info' },
2718
+ { severity: 'critical' },
2719
+ ];
2720
+ const sorted = sortAlertsBySeverity(alerts);
2721
+ assert.notStrictEqual(sorted, alerts);
2722
+ assert.strictEqual(alerts[0]?.severity, 'info'); // unchanged
2723
+ });
2724
+ it('handles empty array', () => {
2725
+ assert.deepStrictEqual(sortAlertsBySeverity([]), []);
2726
+ });
2727
+ });
2728
+ // ==========================================================================
2729
+ // hasQualityMetric
2730
+ // ==========================================================================
2731
+ describe('hasQualityMetric', () => {
2732
+ it('returns true for built-in metrics', () => {
2733
+ assert.strictEqual(hasQualityMetric('relevance'), true);
2734
+ assert.strictEqual(hasQualityMetric('hallucination'), true);
2735
+ });
2736
+ it('returns false for unknown metrics', () => {
2737
+ assert.strictEqual(hasQualityMetric('nonexistent_metric'), false);
2738
+ });
2739
+ it('returns true for registered custom metrics', () => {
2740
+ registerQualityMetric({
2741
+ name: 'custom_has_test',
2742
+ displayName: 'Custom',
2743
+ description: 'Test',
2744
+ aggregations: ['avg'],
2745
+ alerts: [],
2746
+ range: { min: 0, max: 1 },
2747
+ unit: 'score',
2748
+ });
2749
+ assert.strictEqual(hasQualityMetric('custom_has_test'), true);
2750
+ });
2751
+ it('returns false after unregistering a custom metric', () => {
2752
+ registerQualityMetric({
2753
+ name: 'temp_has_metric',
2754
+ displayName: 'Temp',
2755
+ description: 'Test',
2756
+ aggregations: ['avg'],
2757
+ alerts: [],
2758
+ range: { min: 0, max: 1 },
2759
+ unit: 'score',
2760
+ });
2761
+ unregisterQualityMetric('temp_has_metric');
2762
+ assert.strictEqual(hasQualityMetric('temp_has_metric'), false);
2763
+ });
2764
+ });
2765
+ });
2766
+ //# sourceMappingURL=quality-metrics.test.js.map