@tangle-network/agent-eval 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/CHANGELOG.md +102 -0
  2. package/README.md +141 -79
  3. package/dist/baseline-4R5deP0N.d.ts +108 -0
  4. package/dist/benchmarks/index.d.ts +3 -2
  5. package/dist/benchmarks/index.js +1 -1
  6. package/dist/builder-eval/index.d.ts +249 -0
  7. package/dist/builder-eval/index.js +391 -0
  8. package/dist/builder-eval/index.js.map +1 -0
  9. package/dist/{chunk-IOXMGMHQ.js → chunk-2A5XJB43.js} +142 -318
  10. package/dist/chunk-2A5XJB43.js.map +1 -0
  11. package/dist/chunk-47X6LRCE.js +76 -0
  12. package/dist/chunk-47X6LRCE.js.map +1 -0
  13. package/dist/{chunk-6M774GY6.js → chunk-4F5DQN55.js} +1 -1
  14. package/dist/chunk-4F5DQN55.js.map +1 -0
  15. package/dist/{chunk-KAO3Q65R.js → chunk-4S4BM3QQ.js} +15 -13
  16. package/dist/chunk-4S4BM3QQ.js.map +1 -0
  17. package/dist/chunk-5BKGXME7.js +65 -0
  18. package/dist/chunk-5BKGXME7.js.map +1 -0
  19. package/dist/{chunk-42I2QC2L.js → chunk-6QDKWHLS.js} +18 -14
  20. package/dist/chunk-6QDKWHLS.js.map +1 -0
  21. package/dist/chunk-I4MBDTY5.js +272 -0
  22. package/dist/chunk-I4MBDTY5.js.map +1 -0
  23. package/dist/chunk-K2TPS5LB.js +569 -0
  24. package/dist/chunk-K2TPS5LB.js.map +1 -0
  25. package/dist/chunk-KKHDIONI.js +414 -0
  26. package/dist/chunk-KKHDIONI.js.map +1 -0
  27. package/dist/chunk-KMPRBJK4.js +74 -0
  28. package/dist/chunk-KMPRBJK4.js.map +1 -0
  29. package/dist/{chunk-QUKKGHTZ.js → chunk-KTGTIOFD.js} +6 -3
  30. package/dist/chunk-KTGTIOFD.js.map +1 -0
  31. package/dist/chunk-LSH4MMOZ.js +838 -0
  32. package/dist/chunk-LSH4MMOZ.js.map +1 -0
  33. package/dist/chunk-NG236HPC.js +57 -0
  34. package/dist/chunk-NG236HPC.js.map +1 -0
  35. package/dist/{chunk-QBW3YBTR.js → chunk-NLMNWKVM.js} +14 -6
  36. package/dist/chunk-NLMNWKVM.js.map +1 -0
  37. package/dist/chunk-NU65VQ7M.js +99 -0
  38. package/dist/chunk-NU65VQ7M.js.map +1 -0
  39. package/dist/chunk-OHEPNJQN.js +554 -0
  40. package/dist/chunk-OHEPNJQN.js.map +1 -0
  41. package/dist/chunk-OWLAAMME.js +250 -0
  42. package/dist/chunk-OWLAAMME.js.map +1 -0
  43. package/dist/{chunk-SQQLHODJ.js → chunk-PC4UYEBM.js} +7 -4
  44. package/dist/chunk-PC4UYEBM.js.map +1 -0
  45. package/dist/{chunk-7EAUOUQS.js → chunk-RAF443UI.js} +213 -115
  46. package/dist/chunk-RAF443UI.js.map +1 -0
  47. package/dist/chunk-RZTMDUO7.js +49 -0
  48. package/dist/chunk-RZTMDUO7.js.map +1 -0
  49. package/dist/{chunk-EXGR4XEM.js → chunk-SESZDQPX.js} +23 -19
  50. package/dist/chunk-SESZDQPX.js.map +1 -0
  51. package/dist/{chunk-6KQG5HAH.js → chunk-SY6WAAAD.js} +84 -71
  52. package/dist/chunk-SY6WAAAD.js.map +1 -0
  53. package/dist/{chunk-5IIQKMD5.js → chunk-TVVP3ZZQ.js} +14 -4
  54. package/dist/chunk-TVVP3ZZQ.js.map +1 -0
  55. package/dist/{chunk-VQQSPGSM.js → chunk-VRJVTXRV.js} +169 -111
  56. package/dist/chunk-VRJVTXRV.js.map +1 -0
  57. package/dist/chunk-WWYCWKUM.js +196 -0
  58. package/dist/chunk-WWYCWKUM.js.map +1 -0
  59. package/dist/{chunk-AXHNWLIX.js → chunk-YRZ4M5GS.js} +2 -90
  60. package/dist/chunk-YRZ4M5GS.js.map +1 -0
  61. package/dist/chunk-ZN274SWR.js +613 -0
  62. package/dist/chunk-ZN274SWR.js.map +1 -0
  63. package/dist/cli.js +10 -6
  64. package/dist/cli.js.map +1 -1
  65. package/dist/{control-DvkH87qJ.d.ts → control-CBShYYA6.d.ts} +32 -33
  66. package/dist/control-runtime-BuJHoLg0.d.ts +180 -0
  67. package/dist/control.d.ts +8 -6
  68. package/dist/control.js +10 -7
  69. package/dist/{dataset-B9qvlm_o.d.ts → dataset-CiK_3LDr.d.ts} +5 -2
  70. package/dist/{emitter-B2XqDKFU.d.ts → emitter-DP_cSSiw.d.ts} +1 -1
  71. package/dist/errors-BZ9sTdz7.d.ts +70 -0
  72. package/dist/failure-cluster-C2EGSDiT.d.ts +76 -0
  73. package/dist/feedback-trajectory-DfFdrraJ.d.ts +169 -0
  74. package/dist/governance/index.d.ts +5 -0
  75. package/dist/governance/index.js +18 -0
  76. package/dist/governance/index.js.map +1 -0
  77. package/dist/{index-DDTlbHEK.d.ts → index--fVrWDiR.d.ts} +1 -1
  78. package/dist/index-Oj9fAPPN.d.ts +270 -0
  79. package/dist/index.d.ts +1866 -3151
  80. package/dist/index.js +5457 -7809
  81. package/dist/index.js.map +1 -1
  82. package/dist/{integrity-Cr5YodSY.d.ts → integrity-DK2EBVZC.d.ts} +4 -3
  83. package/dist/knowledge/index.d.ts +102 -0
  84. package/dist/knowledge/index.js +18 -0
  85. package/dist/knowledge/index.js.map +1 -0
  86. package/dist/meta-eval/index.d.ts +99 -0
  87. package/dist/meta-eval/index.js +324 -0
  88. package/dist/meta-eval/index.js.map +1 -0
  89. package/dist/multi-layer-verifier-LkP3LVKj.d.ts +141 -0
  90. package/dist/openapi.json +1 -1
  91. package/dist/optimization.d.ts +11 -8
  92. package/dist/optimization.js +11 -9
  93. package/dist/outcome-store-D6KWmYvj.d.ts +63 -0
  94. package/dist/pipelines/index.d.ts +172 -0
  95. package/dist/pipelines/index.js +409 -0
  96. package/dist/pipelines/index.js.map +1 -0
  97. package/dist/prm/index.d.ts +99 -0
  98. package/dist/prm/index.js +222 -0
  99. package/dist/prm/index.js.map +1 -0
  100. package/dist/query-DODUYdPg.d.ts +30 -0
  101. package/dist/release-report-TDPn1cxq.d.ts +292 -0
  102. package/dist/replay-BL96gCEP.d.ts +226 -0
  103. package/dist/reporting.d.ts +10 -295
  104. package/dist/reporting.js +10 -6
  105. package/dist/{eval-campaign-Ds5QljIh.d.ts → researcher-CUOiGcGv.d.ts} +148 -146
  106. package/dist/rl.d.ts +1762 -8
  107. package/dist/rl.js +2035 -58
  108. package/dist/rl.js.map +1 -1
  109. package/dist/rubric-D5tjHNJQ.d.ts +72 -0
  110. package/dist/rubric-predictive-validity-C0uDYwG6.d.ts +105 -0
  111. package/dist/{run-record-DNiOMBrZ.d.ts → run-record-CqzahIbx.d.ts} +4 -1
  112. package/dist/sequential-Dgz1n51-.d.ts +139 -0
  113. package/dist/{store-u47QaJ9G.d.ts → store-Db2Bv8Cf.d.ts} +1 -1
  114. package/dist/{summary-report-Ce1r4EYo.d.ts → summary-report-BXGs_9V0.d.ts} +3 -76
  115. package/dist/telemetry/file.js +4 -1
  116. package/dist/telemetry/file.js.map +1 -1
  117. package/dist/telemetry/index.js +57 -57
  118. package/dist/telemetry/index.js.map +1 -1
  119. package/dist/test-graded-scenario-B2kWEdh9.d.ts +146 -0
  120. package/dist/traces.d.ts +142 -387
  121. package/dist/traces.js +1302 -40
  122. package/dist/traces.js.map +1 -1
  123. package/dist/trajectory-CnoBo-JY.d.ts +32 -0
  124. package/dist/wire/index.d.ts +22 -22
  125. package/dist/wire/index.js +4 -3
  126. package/package.json +35 -2
  127. package/dist/chunk-42I2QC2L.js.map +0 -1
  128. package/dist/chunk-4W4NCYM2.js +0 -1945
  129. package/dist/chunk-4W4NCYM2.js.map +0 -1
  130. package/dist/chunk-5IIQKMD5.js.map +0 -1
  131. package/dist/chunk-6KQG5HAH.js.map +0 -1
  132. package/dist/chunk-6M774GY6.js.map +0 -1
  133. package/dist/chunk-7EAUOUQS.js.map +0 -1
  134. package/dist/chunk-AXHNWLIX.js.map +0 -1
  135. package/dist/chunk-EXGR4XEM.js.map +0 -1
  136. package/dist/chunk-IOXMGMHQ.js.map +0 -1
  137. package/dist/chunk-KAO3Q65R.js.map +0 -1
  138. package/dist/chunk-LZKIOBG2.js +0 -2026
  139. package/dist/chunk-LZKIOBG2.js.map +0 -1
  140. package/dist/chunk-QBW3YBTR.js.map +0 -1
  141. package/dist/chunk-QUKKGHTZ.js.map +0 -1
  142. package/dist/chunk-SQQLHODJ.js.map +0 -1
  143. package/dist/chunk-V5QSWN7L.js +0 -1310
  144. package/dist/chunk-V5QSWN7L.js.map +0 -1
  145. package/dist/chunk-VQQSPGSM.js.map +0 -1
  146. package/dist/feedback-trajectory-c43WGtTX.d.ts +0 -346
  147. package/dist/index-ekBXweiQ.d.ts +0 -1894
  148. package/dist/sequential-DgU2mFsE.d.ts +0 -304
@@ -10,6 +10,17 @@ __export(benchmarks_exports, {
10
10
  routing: () => routing_exports
11
11
  });
12
12
 
13
+ // src/benchmarks/routing/index.ts
14
+ var routing_exports = {};
15
+ __export(routing_exports, {
16
+ ROUTING_DATASET: () => ROUTING_DATASET,
17
+ RoutingAdapter: () => RoutingAdapter,
18
+ assignSplit: () => assignSplit,
19
+ evaluate: () => evaluate,
20
+ extractRouteTokens: () => extractRouteTokens,
21
+ loadDataset: () => loadDataset
22
+ });
23
+
13
24
  // src/benchmarks/types.ts
14
25
  function fnv1a32(input) {
15
26
  let h = 2166136261;
@@ -28,17 +39,6 @@ function deterministicSplit(itemId, seed = BENCHMARK_SPLIT_SEED) {
28
39
  return "holdout";
29
40
  }
30
41
 
31
- // src/benchmarks/routing/index.ts
32
- var routing_exports = {};
33
- __export(routing_exports, {
34
- ROUTING_DATASET: () => ROUTING_DATASET,
35
- RoutingAdapter: () => RoutingAdapter,
36
- assignSplit: () => assignSplit,
37
- evaluate: () => evaluate,
38
- extractRouteTokens: () => extractRouteTokens,
39
- loadDataset: () => loadDataset
40
- });
41
-
42
42
  // src/benchmarks/routing/dataset.ts
43
43
  var ROUTING_DATASET = [
44
44
  {
@@ -174,11 +174,15 @@ var ROUTING_DATASET = [
174
174
  // src/benchmarks/routing/index.ts
175
175
  var RoutingAdapter = class {
176
176
  async loadDataset(split) {
177
- return ROUTING_DATASET.map((item) => ({ id: item.id, payload: item })).filter((it) => assignSplitImpl(it.id) === split);
177
+ return ROUTING_DATASET.map((item) => ({ id: item.id, payload: item })).filter(
178
+ (it) => assignSplitImpl(it.id) === split
179
+ );
178
180
  }
179
181
  async evaluate(item, response) {
180
182
  const tokens = extractRouteTokens(response);
181
- const correct = new Set([item.payload.route, ...item.payload.synonyms].map((s) => s.toLowerCase()));
183
+ const correct = new Set(
184
+ [item.payload.route, ...item.payload.synonyms].map((s) => s.toLowerCase())
185
+ );
182
186
  const hardNeg = new Set(item.payload.hardNegatives.map((s) => s.toLowerCase()));
183
187
  const firstMatch = tokens.find((t) => correct.has(t.toLowerCase())) ?? null;
184
188
  const firstHardNeg = tokens.find((t) => hardNeg.has(t.toLowerCase())) ?? null;
@@ -216,4 +220,4 @@ export {
216
220
  routing_exports,
217
221
  benchmarks_exports
218
222
  };
219
- //# sourceMappingURL=chunk-42I2QC2L.js.map
223
+ //# sourceMappingURL=chunk-6QDKWHLS.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/benchmarks/index.ts","../src/benchmarks/routing/index.ts","../src/benchmarks/types.ts","../src/benchmarks/routing/dataset.ts"],"sourcesContent":["/**\n * Reference benchmark wrappers — entry point.\n *\n * Core surface (exported here):\n * - The `BenchmarkAdapter` contract.\n * - `deterministicSplit` + `BENCHMARK_SPLIT_SEED` for split assignment.\n * - `routing` — synthetic 16-task router benchmark. The only novel\n * benchmark we built; ships in the package.\n *\n * Example wrappers (under `examples/benchmarks/`, NOT in the bundle):\n * - `gsm8k` — exact-match math reasoning (HF mirror, dataset\n * not bundled).\n * - `swebench-lite` — 30-instance SWE-Bench subset via an external\n * grader command.\n *\n * The example wrappers are reference implementations of `BenchmarkAdapter`.\n * Read them, copy them, adapt them. They're intentionally not in the main\n * entry — every team will configure them differently.\n */\n\nexport * as routing from './routing/index'\nexport type {\n BenchmarkAdapter,\n BenchmarkDatasetItem,\n BenchmarkEvaluation,\n} from './types'\nexport { BENCHMARK_SPLIT_SEED, deterministicSplit } from './types'\n","/**\n * Routing benchmark — synthetic, dependency-free, ships in the\n * package. 16 cross-category items in `dataset.ts`. See\n * `routing/README.md` for the format.\n *\n * `evaluate` does case-insensitive exact match against the canonical\n * route plus declared synonyms. The first valid route token in the\n * response wins; everything else is ignored. Wrong answers also\n * report whether they hit a hard negative — useful when triaging\n * \"always picks the popular route\" failure modes.\n */\n\nimport type { RunSplitTag } from '../../run-record'\nimport type { BenchmarkAdapter, BenchmarkDatasetItem, BenchmarkEvaluation } from '../types'\nimport { deterministicSplit } from '../types'\nimport { ROUTING_DATASET, type RoutingItem } from './dataset'\n\nexport type { RoutingItem }\nexport type RoutingPayload = RoutingItem\nexport type RoutingDatasetItem = BenchmarkDatasetItem<RoutingPayload>\n\nclass RoutingAdapter implements BenchmarkAdapter<RoutingDatasetItem, RoutingPayload> {\n async loadDataset(split: RunSplitTag): Promise<RoutingDatasetItem[]> {\n return ROUTING_DATASET.map((item) => ({ id: item.id, payload: item })).filter(\n (it) => assignSplitImpl(it.id) === split,\n )\n }\n\n async evaluate(item: RoutingDatasetItem, response: string): Promise<BenchmarkEvaluation> {\n const tokens = extractRouteTokens(response)\n const correct = new Set<string>(\n [item.payload.route, ...item.payload.synonyms].map((s) => s.toLowerCase()),\n )\n const hardNeg = new Set<string>(item.payload.hardNegatives.map((s) => s.toLowerCase()))\n const firstMatch = tokens.find((t) => correct.has(t.toLowerCase())) ?? null\n const firstHardNeg = tokens.find((t) => hardNeg.has(t.toLowerCase())) ?? null\n const score = firstMatch ? 1 : 0\n return {\n score,\n raw: {\n firstToken: tokens[0] ?? null,\n matchedRoute: firstMatch,\n hitHardNegative: Boolean(firstHardNeg),\n hardNegativeRoute: firstHardNeg,\n category: item.payload.category,\n },\n }\n }\n\n assignSplit(itemId: string): RunSplitTag {\n return assignSplitImpl(itemId)\n }\n}\n\nfunction assignSplitImpl(itemId: string): RunSplitTag {\n return deterministicSplit(`routing::${itemId}`)\n}\n\n/**\n * Pull route-shaped tokens out of a model response. Routes look like\n * `category.action` (`fs.write`, `chat.reply`). Bare alphanumerics\n * are not routes, but `category.action` patterns are robust to most\n * model wrappers (JSON output, prose explanations, code fences).\n */\nexport function extractRouteTokens(response: string): string[] {\n const matches = response.match(/[a-z][a-z0-9_]*\\.[a-z][a-z0-9_]*/gi)\n return matches ?? []\n}\n\nconst adapter = new RoutingAdapter()\n\nexport const loadDataset = adapter.loadDataset.bind(adapter)\nexport const evaluate = adapter.evaluate.bind(adapter)\nexport const assignSplit = adapter.assignSplit.bind(adapter)\nexport { ROUTING_DATASET, RoutingAdapter }\n","/**\n * Shared types for the reference benchmark wrappers under\n * `src/benchmarks/`. Each wrapper exports the three functions in\n * `BenchmarkAdapter` plus its own typed `DatasetItem` shape.\n */\n\nimport type { RunSplitTag } from '../run-record'\n\nexport interface BenchmarkDatasetItem<TPayload = unknown> {\n /** Stable dataset-local item id (used for split assignment + paper\n * references). Unique within a benchmark. */\n id: string\n /** Free-form payload. Each benchmark defines its own shape. */\n payload: TPayload\n}\n\nexport interface BenchmarkEvaluation {\n /** [0, 1] score for the response on this item. Exact-match\n * benchmarks use 0/1; partial-credit benchmarks may return\n * fractional values. */\n score: number\n /** Optional bag of raw scoring signals — e.g. parsed numeric\n * answer, regex match, judge sub-scores. */\n raw: Record<string, unknown>\n}\n\n/** Common signature implemented by every adapter under `src/benchmarks/*`. */\n// `TPayload` is the per-item payload type; `_TItem` is preserved for\n// downstream type-narrowing extensions (a richer `BenchmarkDatasetItem`\n// subclass that adds e.g. provenance metadata) but is intentionally\n// unused here. `noUnusedLocals` requires the leading underscore.\nexport interface BenchmarkAdapter<_TItem = unknown, TPayload = unknown> {\n /** Load the dataset for the given split. May hit the network on\n * first call but should be cache-friendly. Adapters that don't\n * ship the dataset itself MUST throw a clearly-marked error\n * pointing the caller at the loader script. */\n loadDataset(split: RunSplitTag): Promise<BenchmarkDatasetItem<TPayload>[]>\n /** Score a single response. Pure with respect to the inputs. */\n evaluate(item: BenchmarkDatasetItem<TPayload>, response: string): Promise<BenchmarkEvaluation>\n /** Deterministic split assignment via item id hashing. The\n * fraction of items in each split is implementation-defined but\n * MUST be stable across processes and platforms. */\n assignSplit(itemId: string): RunSplitTag\n}\n\n// ── Deterministic split assignment ───────────────────────────────────\n\n/**\n * 32-bit FNV-1a hash. Stable, allocation-free, deterministic across\n * runtimes. We use it to assign items to splits rather than depending\n * on a polyfilled crypto.subtle path.\n */\nfunction fnv1a32(input: string): number {\n let h = 0x811c9dc5\n for (let i = 0; i < input.length; i++) {\n h ^= input.charCodeAt(i) & 0xff\n h = (h + ((h << 1) + (h << 4) + (h << 7) + (h << 8) + (h << 24))) >>> 0\n }\n return h >>> 0\n}\n\n/** Split-assignment seed shared across all benchmarks. Bumping this\n * value reshuffles every split — do NOT do that lightly. */\nexport const BENCHMARK_SPLIT_SEED = 'agent-eval-v1'\n\n/**\n * Assign an item id to one of `'search' | 'dev' | 'holdout'` using a\n * stable 32-bit hash of `${seed}::${id}`. Default proportions:\n *\n * search: 60% (optimization-readable)\n * dev: 20% (held-out for tuning, leak-on-purpose during dev)\n * holdout:20% (paper-grade held-out, gated reads)\n */\nexport function deterministicSplit(\n itemId: string,\n seed: string = BENCHMARK_SPLIT_SEED,\n): RunSplitTag {\n const h = fnv1a32(`${seed}::${itemId}`)\n const pos = h / 0x100000000\n if (pos < 0.6) return 'search'\n if (pos < 0.8) return 'dev'\n return 'holdout'\n}\n","/**\n * Synthetic routing dataset. 16 tasks across 4 categories. Used as a\n * deterministic, dependency-free benchmark for any router that maps a\n * natural-language request to one of a fixed set of route labels.\n *\n * Format (see `routing/README.md` for prose):\n *\n * {\n * id: stable per-task ID (matches across processes).\n * category: one of the four route labels.\n * prompt: the user-facing request the router must classify.\n * route: the ground-truth route the router should pick.\n * synonyms: other strings that count as a correct answer.\n * hardNegatives:close-but-wrong route labels — used to detect the\n * \"always picks the popular route\" failure mode.\n * }\n *\n * The four categories are intentionally cross-domain (file ops,\n * math, search, conversation) so a router that collapses to one\n * category is easy to spot.\n */\n\nexport interface RoutingItem {\n id: string\n category: 'file' | 'math' | 'search' | 'chat'\n prompt: string\n /** Canonical correct route label. */\n route: string\n /** Alternate route labels that also count as correct. */\n synonyms: string[]\n /** Wrong-but-tempting route labels (for analysis, not grading). */\n hardNegatives: string[]\n}\n\nexport const ROUTING_DATASET: RoutingItem[] = [\n {\n id: 'file_001',\n category: 'file',\n prompt: 'Save the meeting notes to /tmp/notes-2025-04.md as markdown.',\n route: 'fs.write',\n synonyms: ['filesystem.write', 'write_file'],\n hardNegatives: ['fs.read', 'chat.reply'],\n },\n {\n id: 'file_002',\n category: 'file',\n prompt: 'Read the contents of /etc/hosts and summarize the entries.',\n route: 'fs.read',\n synonyms: ['filesystem.read', 'read_file'],\n hardNegatives: ['fs.write', 'search.web'],\n },\n {\n id: 'file_003',\n category: 'file',\n prompt: 'List every Python file under src/ recursively.',\n route: 'fs.list',\n synonyms: ['filesystem.list', 'list_files'],\n hardNegatives: ['fs.read', 'search.code'],\n },\n {\n id: 'file_004',\n category: 'file',\n prompt: 'Delete the cached build at .turbo/cache.',\n route: 'fs.delete',\n synonyms: ['filesystem.delete', 'remove_file'],\n hardNegatives: ['fs.write', 'fs.list'],\n },\n {\n id: 'math_001',\n category: 'math',\n prompt: 'What is the integral of 3x^2 + 2x from 0 to 5?',\n route: 'math.integral',\n synonyms: ['calculator.integral', 'math.solve'],\n hardNegatives: ['math.derivative', 'chat.reply'],\n },\n {\n id: 'math_002',\n category: 'math',\n prompt: 'Compute the derivative of sin(x) * cos(x).',\n route: 'math.derivative',\n synonyms: ['calculator.derivative', 'math.solve'],\n hardNegatives: ['math.integral', 'math.algebra'],\n },\n {\n id: 'math_003',\n category: 'math',\n prompt: 'Solve 2x + 7 = 19 for x.',\n route: 'math.algebra',\n synonyms: ['calculator.algebra', 'math.solve'],\n hardNegatives: ['math.derivative', 'math.integral'],\n },\n {\n id: 'math_004',\n category: 'math',\n prompt: 'What is the prime factorization of 360?',\n route: 'math.numbertheory',\n synonyms: ['calculator.factor', 'math.solve'],\n hardNegatives: ['math.algebra', 'search.web'],\n },\n {\n id: 'search_001',\n category: 'search',\n prompt: 'Find recent papers on agent prompt optimization with held-out promotion gates.',\n route: 'search.web',\n synonyms: ['web.search', 'search.papers'],\n hardNegatives: ['search.code', 'chat.reply'],\n },\n {\n id: 'search_002',\n category: 'search',\n prompt: 'Search the codebase for every call site of `runProposeReview`.',\n route: 'search.code',\n synonyms: ['code.search', 'grep'],\n hardNegatives: ['search.web', 'fs.read'],\n },\n {\n id: 'search_003',\n category: 'search',\n prompt: 'What is the latest release of the Tangle network on GitHub?',\n route: 'search.web',\n synonyms: ['web.search', 'github.releases'],\n hardNegatives: ['search.code', 'chat.reply'],\n },\n {\n id: 'search_004',\n category: 'search',\n prompt: 'Find all TODO comments in the agent-eval src tree.',\n route: 'search.code',\n synonyms: ['code.search', 'grep'],\n hardNegatives: ['search.web', 'fs.list'],\n },\n {\n id: 'chat_001',\n category: 'chat',\n prompt: 'Hi there, how are you doing today?',\n route: 'chat.reply',\n synonyms: ['conversation.reply'],\n hardNegatives: ['search.web', 'fs.read'],\n },\n {\n id: 'chat_002',\n category: 'chat',\n prompt: 'Please explain the difference between an LLM and a foundation model.',\n route: 'chat.reply',\n synonyms: ['conversation.reply', 'qa.answer'],\n hardNegatives: ['search.web', 'math.algebra'],\n },\n {\n id: 'chat_003',\n category: 'chat',\n prompt: 'Tell me a short joke about distributed systems.',\n route: 'chat.reply',\n synonyms: ['conversation.reply'],\n hardNegatives: ['search.web', 'fs.read'],\n },\n {\n id: 'chat_004',\n category: 'chat',\n prompt: 'Acknowledge my last message with a thumbs up.',\n route: 'chat.reply',\n synonyms: ['conversation.reply', 'react'],\n hardNegatives: ['fs.write', 'search.web'],\n },\n]\n"],"mappings":";;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACoDA,SAAS,QAAQ,OAAuB;AACtC,MAAI,IAAI;AACR,WAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,SAAK,MAAM,WAAW,CAAC,IAAI;AAC3B,QAAK,MAAM,KAAK,MAAM,KAAK,MAAM,KAAK,MAAM,KAAK,MAAM,KAAK,SAAU;AAAA,EACxE;AACA,SAAO,MAAM;AACf;AAIO,IAAM,uBAAuB;AAU7B,SAAS,mBACd,QACA,OAAe,sBACF;AACb,QAAM,IAAI,QAAQ,GAAG,IAAI,KAAK,MAAM,EAAE;AACtC,QAAM,MAAM,IAAI;AAChB,MAAI,MAAM,IAAK,QAAO;AACtB,MAAI,MAAM,IAAK,QAAO;AACtB,SAAO;AACT;;;AChDO,IAAM,kBAAiC;AAAA,EAC5C;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,oBAAoB,YAAY;AAAA,IAC3C,eAAe,CAAC,WAAW,YAAY;AAAA,EACzC;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,mBAAmB,WAAW;AAAA,IACzC,eAAe,CAAC,YAAY,YAAY;AAAA,EAC1C;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,mBAAmB,YAAY;AAAA,IAC1C,eAAe,CAAC,WAAW,aAAa;AAAA,EAC1C;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,qBAAqB,aAAa;AAAA,IAC7C,eAAe,CAAC,YAAY,SAAS;AAAA,EACvC;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,uBAAuB,YAAY;AAAA,IAC9C,eAAe,CAAC,mBAAmB,YAAY;AAAA,EACjD;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,yBAAyB,YAAY;AAAA,IAChD,eAAe,CAAC,iBAAiB,cAAc;AAAA,EACjD;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,sBAAsB,YAAY;AAAA,IAC7C,eAAe,CAAC,mBAAmB,eAAe;AAAA,EACpD;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,qBAAqB,YAAY;AAAA,IAC5C,eAAe,CAAC,gBAAgB,YAAY;AAAA,EAC9C;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,cAAc,eAAe;AAAA,IACxC,eAAe,CAAC,eAAe,YAAY;AAAA,EAC7C;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,eAAe,MAAM;AAAA,IAChC,eAAe,CAAC,cAAc,SAAS;AAAA,EACzC;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,cAAc,iBAAiB;AAAA,IAC1C,eAAe,CAAC,eAAe,YAAY;AAAA,EAC7C;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,eAAe,MAAM;AAAA,IAChC,eAAe,CAAC,cAAc,SAAS;AAAA,EACzC;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,oBAAoB;AAAA,IAC/B,eAAe,CAAC,cAAc,SAAS;AAAA,EACzC;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,sBAAsB,WAAW;AAAA,IAC5C,eAAe,CAAC,cAAc,cAAc;AAAA,EAC9C;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,oBAAoB;AAAA,IAC/B,eAAe,CAAC,cAAc,SAAS;AAAA,EACzC;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,sBAAsB,OAAO;AAAA,IACxC,eAAe,CAAC,YAAY,YAAY;AAAA,EAC1C;AACF;;;AF9IA,IAAM,iBAAN,MAAqF;AAAA,EACnF,MAAM,YAAY,OAAmD;AACnE,WAAO,gBAAgB,IAAI,CAAC,UAAU,EAAE,IAAI,KAAK,IAAI,SAAS,KAAK,EAAE,EAAE;AAAA,MACrE,CAAC,OAAO,gBAAgB,GAAG,EAAE,MAAM;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,MAAM,SAAS,MAA0B,UAAgD;AACvF,UAAM,SAAS,mBAAmB,QAAQ;AAC1C,UAAM,UAAU,IAAI;AAAA,MAClB,CAAC,KAAK,QAAQ,OAAO,GAAG,KAAK,QAAQ,QAAQ,EAAE,IAAI,CAAC,MAAM,EAAE,YAAY,CAAC;AAAA,IAC3E;AACA,UAAM,UAAU,IAAI,IAAY,KAAK,QAAQ,cAAc,IAAI,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;AACtF,UAAM,aAAa,OAAO,KAAK,CAAC,MAAM,QAAQ,IAAI,EAAE,YAAY,CAAC,CAAC,KAAK;AACvE,UAAM,eAAe,OAAO,KAAK,CAAC,MAAM,QAAQ,IAAI,EAAE,YAAY,CAAC,CAAC,KAAK;AACzE,UAAM,QAAQ,aAAa,IAAI;AAC/B,WAAO;AAAA,MACL;AAAA,MACA,KAAK;AAAA,QACH,YAAY,OAAO,CAAC,KAAK;AAAA,QACzB,cAAc;AAAA,QACd,iBAAiB,QAAQ,YAAY;AAAA,QACrC,mBAAmB;AAAA,QACnB,UAAU,KAAK,QAAQ;AAAA,MACzB;AAAA,IACF;AAAA,EACF;AAAA,EAEA,YAAY,QAA6B;AACvC,WAAO,gBAAgB,MAAM;AAAA,EAC/B;AACF;AAEA,SAAS,gBAAgB,QAA6B;AACpD,SAAO,mBAAmB,YAAY,MAAM,EAAE;AAChD;AAQO,SAAS,mBAAmB,UAA4B;AAC7D,QAAM,UAAU,SAAS,MAAM,oCAAoC;AACnE,SAAO,WAAW,CAAC;AACrB;AAEA,IAAM,UAAU,IAAI,eAAe;AAE5B,IAAM,cAAc,QAAQ,YAAY,KAAK,OAAO;AACpD,IAAM,WAAW,QAAQ,SAAS,KAAK,OAAO;AAC9C,IAAM,cAAc,QAAQ,YAAY,KAAK,OAAO;","names":[]}
@@ -0,0 +1,272 @@
1
+ import {
2
+ ValidationError
3
+ } from "./chunk-NG236HPC.js";
4
+
5
+ // src/statistics.ts
6
+ var INVERTED_DIMENSIONS = /* @__PURE__ */ new Set(["hallucination", "false_confidence", "worst_failure"]);
7
+ function normalizeScores(scores) {
8
+ return scores.map((s) => {
9
+ if (INVERTED_DIMENSIONS.has(s.dimension)) {
10
+ return s;
11
+ }
12
+ return s;
13
+ });
14
+ }
15
+ function weightedMean(scores) {
16
+ if (scores.length === 0) return 0;
17
+ let totalWeight = 0;
18
+ let weightedSum = 0;
19
+ for (const { score, weight } of scores) {
20
+ const w = weight ?? 1;
21
+ weightedSum += score * w;
22
+ totalWeight += w;
23
+ }
24
+ return totalWeight > 0 ? weightedSum / totalWeight : 0;
25
+ }
26
+ function confidenceInterval(scores, confidence = 0.95) {
27
+ if (scores.length === 0) return { mean: 0, lower: 0, upper: 0 };
28
+ if (scores.length === 1) return { mean: scores[0], lower: scores[0], upper: scores[0] };
29
+ const n = scores.length;
30
+ const mean = scores.reduce((a, b) => a + b, 0) / n;
31
+ const B = 1e3;
32
+ const bootstrapMeans = [];
33
+ for (let i = 0; i < B; i++) {
34
+ let sum = 0;
35
+ for (let j = 0; j < n; j++) {
36
+ sum += scores[Math.floor(Math.random() * n)];
37
+ }
38
+ bootstrapMeans.push(sum / n);
39
+ }
40
+ bootstrapMeans.sort((a, b) => a - b);
41
+ const alpha = 1 - confidence;
42
+ const lowerIdx = Math.floor(alpha / 2 * B);
43
+ const upperIdx = Math.floor((1 - alpha / 2) * B) - 1;
44
+ return {
45
+ mean,
46
+ lower: bootstrapMeans[lowerIdx],
47
+ upper: bootstrapMeans[Math.min(upperIdx, B - 1)]
48
+ };
49
+ }
50
+ function interRaterReliability(judgeScores) {
51
+ if (judgeScores.length < 2) return 1;
52
+ const dimensionMap = /* @__PURE__ */ new Map();
53
+ for (const judgeSet of judgeScores) {
54
+ for (const s of judgeSet) {
55
+ if (!dimensionMap.has(s.dimension)) dimensionMap.set(s.dimension, []);
56
+ const arr = dimensionMap.get(s.dimension);
57
+ if (arr.length === 0 || arr[arr.length - 1].length >= judgeScores.length) {
58
+ arr.push([s.score]);
59
+ } else {
60
+ arr[arr.length - 1].push(s.score);
61
+ }
62
+ }
63
+ }
64
+ const allValues = [];
65
+ const pairDiffs = [];
66
+ for (const items of dimensionMap.values()) {
67
+ for (const ratings of items) {
68
+ if (ratings.length < 2) continue;
69
+ for (const v of ratings) allValues.push(v);
70
+ for (let i = 0; i < ratings.length; i++) {
71
+ for (let j = i + 1; j < ratings.length; j++) {
72
+ pairDiffs.push((ratings[i] - ratings[j]) ** 2);
73
+ }
74
+ }
75
+ }
76
+ }
77
+ if (pairDiffs.length === 0 || allValues.length < 2) return 1;
78
+ const observedDisagreement = pairDiffs.reduce((a, b) => a + b, 0) / pairDiffs.length;
79
+ let expectedDisagreement = 0;
80
+ let expectedCount = 0;
81
+ for (let i = 0; i < allValues.length; i++) {
82
+ for (let j = i + 1; j < allValues.length; j++) {
83
+ expectedDisagreement += (allValues[i] - allValues[j]) ** 2;
84
+ expectedCount++;
85
+ }
86
+ }
87
+ expectedDisagreement = expectedCount > 0 ? expectedDisagreement / expectedCount : 0;
88
+ if (expectedDisagreement === 0) return 1;
89
+ return 1 - observedDisagreement / expectedDisagreement;
90
+ }
91
+ function mannWhitneyU(a, b) {
92
+ if (a.length === 0 || b.length === 0) return { u: 0, p: 1 };
93
+ const n1 = a.length;
94
+ const n2 = b.length;
95
+ const combined = [
96
+ ...a.map((v) => ({ v, group: "a" })),
97
+ ...b.map((v) => ({ v, group: "b" }))
98
+ ].sort((x, y) => x.v - y.v);
99
+ const ranks = new Array(combined.length);
100
+ let i = 0;
101
+ while (i < combined.length) {
102
+ let j = i;
103
+ while (j < combined.length && combined[j].v === combined[i].v) j++;
104
+ const avgRank = (i + 1 + j) / 2;
105
+ for (let k = i; k < j; k++) ranks[k] = avgRank;
106
+ i = j;
107
+ }
108
+ let r1 = 0;
109
+ for (let k = 0; k < combined.length; k++) {
110
+ if (combined[k].group === "a") r1 += ranks[k];
111
+ }
112
+ const u1 = r1 - n1 * (n1 + 1) / 2;
113
+ const u2 = n1 * n2 - u1;
114
+ const u = Math.min(u1, u2);
115
+ const mu = n1 * n2 / 2;
116
+ const sigma = Math.sqrt(n1 * n2 * (n1 + n2 + 1) / 12);
117
+ if (sigma === 0) return { u, p: 1 };
118
+ const z = Math.abs(u - mu) / sigma;
119
+ const p = 2 * (1 - normalCdf(z));
120
+ return { u, p };
121
+ }
122
+ function partialCredit(current, target) {
123
+ if (target <= 0) return 1;
124
+ return Math.min(1, Math.max(0, current / target));
125
+ }
126
+ function pairedTTest(before, after) {
127
+ if (before.length !== after.length) {
128
+ throw new ValidationError(
129
+ `pairedTTest: unequal sample sizes (${before.length} vs ${after.length})`
130
+ );
131
+ }
132
+ const n = before.length;
133
+ if (n < 2) return { t: 0, df: 0, p: 1 };
134
+ const diffs = before.map((b, i) => after[i] - b);
135
+ const mean = diffs.reduce((a, b) => a + b, 0) / n;
136
+ const variance = diffs.reduce((acc, d) => acc + (d - mean) ** 2, 0) / (n - 1);
137
+ const se = Math.sqrt(variance / n);
138
+ if (se === 0) return { t: mean === 0 ? 0 : Infinity, df: n - 1, p: mean === 0 ? 1 : 0 };
139
+ const t = mean / se;
140
+ const df = n - 1;
141
+ const p = 2 * (1 - studentTCdf(Math.abs(t), df));
142
+ return { t, df, p };
143
+ }
144
+ function wilcoxonSignedRank(before, after) {
145
+ if (before.length !== after.length) {
146
+ throw new ValidationError(
147
+ `wilcoxonSignedRank: unequal sample sizes (${before.length} vs ${after.length})`
148
+ );
149
+ }
150
+ const diffs = before.map((b, i2) => after[i2] - b).filter((d) => d !== 0);
151
+ const n = diffs.length;
152
+ if (n < 6) return { w: 0, p: 1 };
153
+ const absRanks = diffs.map((d, i2) => ({ abs: Math.abs(d), sign: Math.sign(d), i: i2 })).sort((a, b) => a.abs - b.abs);
154
+ const ranks = new Array(n);
155
+ let i = 0;
156
+ while (i < n) {
157
+ let j = i;
158
+ while (j < n && absRanks[j].abs === absRanks[i].abs) j++;
159
+ const avg = (i + 1 + j) / 2;
160
+ for (let k = i; k < j; k++) ranks[absRanks[k].i] = avg;
161
+ i = j;
162
+ }
163
+ let wPlus = 0;
164
+ for (let k = 0; k < n; k++) if (diffs[k] > 0) wPlus += ranks[k];
165
+ const mean = n * (n + 1) / 4;
166
+ const variance = n * (n + 1) * (2 * n + 1) / 24;
167
+ const z = (wPlus - mean) / Math.sqrt(variance);
168
+ const p = 2 * (1 - normalCdf(Math.abs(z)));
169
+ return { w: wPlus, p };
170
+ }
171
+ function cohensD(a, b) {
172
+ if (a.length < 2 || b.length < 2) return 0;
173
+ const meanA = a.reduce((x, y) => x + y, 0) / a.length;
174
+ const meanB = b.reduce((x, y) => x + y, 0) / b.length;
175
+ const varA = a.reduce((acc, x) => acc + (x - meanA) ** 2, 0) / (a.length - 1);
176
+ const varB = b.reduce((acc, x) => acc + (x - meanB) ** 2, 0) / (b.length - 1);
177
+ const pooled = Math.sqrt(
178
+ ((a.length - 1) * varA + (b.length - 1) * varB) / (a.length + b.length - 2)
179
+ );
180
+ if (pooled === 0) return 0;
181
+ return (meanB - meanA) / pooled;
182
+ }
183
+ function studentTCdf(t, df) {
184
+ if (df <= 0) return 0.5;
185
+ if (df > 100) return normalCdf(t);
186
+ const x = df / (df + t * t);
187
+ const a = df / 2;
188
+ const b = 0.5;
189
+ const ib = incompleteBeta(x, a, b);
190
+ return t >= 0 ? 1 - 0.5 * ib : 0.5 * ib;
191
+ }
192
+ function incompleteBeta(x, a, b) {
193
+ if (x <= 0) return 0;
194
+ if (x >= 1) return 1;
195
+ const lnBeta = lnGamma(a) + lnGamma(b) - lnGamma(a + b);
196
+ const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a;
197
+ const maxIter = 200;
198
+ const eps = 3e-7;
199
+ let c = 1;
200
+ let d = 1 - (a + b) * x / (a + 1);
201
+ if (Math.abs(d) < 1e-30) d = 1e-30;
202
+ d = 1 / d;
203
+ let f = d;
204
+ for (let m = 1; m <= maxIter; m++) {
205
+ const m2 = 2 * m;
206
+ let num = m * (b - m) * x / ((a + m2 - 1) * (a + m2));
207
+ d = 1 + num * d;
208
+ if (Math.abs(d) < 1e-30) d = 1e-30;
209
+ c = 1 + num / c;
210
+ if (Math.abs(c) < 1e-30) c = 1e-30;
211
+ d = 1 / d;
212
+ f *= d * c;
213
+ num = -((a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1));
214
+ d = 1 + num * d;
215
+ if (Math.abs(d) < 1e-30) d = 1e-30;
216
+ c = 1 + num / c;
217
+ if (Math.abs(c) < 1e-30) c = 1e-30;
218
+ d = 1 / d;
219
+ const delta = d * c;
220
+ f *= delta;
221
+ if (Math.abs(delta - 1) < eps) break;
222
+ }
223
+ return front * f;
224
+ }
225
+ function lnGamma(z) {
226
+ const g = 7;
227
+ const coefs = [
228
+ 0.9999999999998099,
229
+ 676.5203681218851,
230
+ -1259.1392167224028,
231
+ 771.3234287776531,
232
+ -176.6150291621406,
233
+ 12.507343278686905,
234
+ -0.13857109526572012,
235
+ 9984369578019572e-21,
236
+ 15056327351493116e-23
237
+ ];
238
+ if (z < 0.5) {
239
+ return Math.log(Math.PI / Math.sin(Math.PI * z)) - lnGamma(1 - z);
240
+ }
241
+ z -= 1;
242
+ let x = coefs[0];
243
+ for (let i = 1; i < g + 2; i++) x += coefs[i] / (z + i);
244
+ const t = z + g + 0.5;
245
+ return 0.5 * Math.log(2 * Math.PI) + (z + 0.5) * Math.log(t) - t + Math.log(x);
246
+ }
247
+ function normalCdf(x) {
248
+ const a1 = 0.254829592;
249
+ const a2 = -0.284496736;
250
+ const a3 = 1.421413741;
251
+ const a4 = -1.453152027;
252
+ const a5 = 1.061405429;
253
+ const p = 0.3275911;
254
+ const sign = x < 0 ? -1 : 1;
255
+ const absX = Math.abs(x);
256
+ const t = 1 / (1 + p * absX);
257
+ const y = 1 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-absX * absX / 2);
258
+ return 0.5 * (1 + sign * y);
259
+ }
260
+
261
+ export {
262
+ normalizeScores,
263
+ weightedMean,
264
+ confidenceInterval,
265
+ interRaterReliability,
266
+ mannWhitneyU,
267
+ partialCredit,
268
+ pairedTTest,
269
+ wilcoxonSignedRank,
270
+ cohensD
271
+ };
272
+ //# sourceMappingURL=chunk-I4MBDTY5.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/statistics.ts"],"sourcesContent":["import { ValidationError } from './errors'\nimport type { JudgeScore } from './types'\n\n/** Dimensions where lower raw score = better outcome (inverted semantics) */\nconst INVERTED_DIMENSIONS = new Set(['hallucination', 'false_confidence', 'worst_failure'])\n\n/**\n * Normalize scores so all dimensions follow \"higher = better\".\n * Inverted dimensions (hallucination, false_confidence, worst_failure)\n * already use inverted scoring in the prompt (10 = no hallucination),\n * but this function ensures consistency if raw scores leak through.\n */\nexport function normalizeScores(scores: JudgeScore[]): JudgeScore[] {\n return scores.map((s) => {\n if (INVERTED_DIMENSIONS.has(s.dimension)) {\n return s\n }\n return s\n })\n}\n\n/** Weighted mean — falls back to uniform weights when omitted */\nexport function weightedMean(scores: { score: number; weight?: number }[]): number {\n if (scores.length === 0) return 0\n let totalWeight = 0\n let weightedSum = 0\n for (const { score, weight } of scores) {\n const w = weight ?? 1\n weightedSum += score * w\n totalWeight += w\n }\n return totalWeight > 0 ? weightedSum / totalWeight : 0\n}\n\n/** Bootstrap confidence interval */\nexport function confidenceInterval(\n scores: number[],\n confidence = 0.95,\n): { mean: number; lower: number; upper: number } {\n if (scores.length === 0) return { mean: 0, lower: 0, upper: 0 }\n if (scores.length === 1) return { mean: scores[0]!, lower: scores[0]!, upper: scores[0]! }\n\n const n = scores.length\n const mean = scores.reduce((a, b) => a + b, 0) / n\n\n const B = 1000\n const bootstrapMeans: number[] = []\n\n for (let i = 0; i < B; i++) {\n let sum = 0\n for (let j = 0; j < n; j++) {\n sum += scores[Math.floor(Math.random() * n)]!\n }\n bootstrapMeans.push(sum / n)\n }\n\n bootstrapMeans.sort((a, b) => a - b)\n\n const alpha = 1 - confidence\n const lowerIdx = Math.floor((alpha / 2) * B)\n const upperIdx = Math.floor((1 - alpha / 2) * B) - 1\n\n return {\n mean,\n lower: bootstrapMeans[lowerIdx]!,\n upper: bootstrapMeans[Math.min(upperIdx, B - 1)]!,\n }\n}\n\n/**\n * Inter-rater reliability — simplified Krippendorff's alpha.\n *\n * Each inner array is one judge's scores for all items.\n * All arrays must have the same length (same items scored).\n */\nexport function interRaterReliability(judgeScores: JudgeScore[][]): number {\n if (judgeScores.length < 2) return 1\n\n // Group scores by dimension across judges\n const dimensionMap = new Map<string, number[][]>()\n for (const judgeSet of judgeScores) {\n for (const s of judgeSet) {\n if (!dimensionMap.has(s.dimension)) dimensionMap.set(s.dimension, [])\n const arr = dimensionMap.get(s.dimension)!\n if (arr.length === 0 || arr[arr.length - 1]!.length >= judgeScores.length) {\n arr.push([s.score])\n } else {\n arr[arr.length - 1]!.push(s.score)\n }\n }\n }\n\n // Collect all paired ratings\n const allValues: number[] = []\n const pairDiffs: number[] = []\n\n for (const items of dimensionMap.values()) {\n for (const ratings of items) {\n if (ratings.length < 2) continue\n for (const v of ratings) allValues.push(v)\n for (let i = 0; i < ratings.length; i++) {\n for (let j = i + 1; j < ratings.length; j++) {\n pairDiffs.push((ratings[i]! - ratings[j]!) ** 2)\n }\n }\n }\n }\n\n if (pairDiffs.length === 0 || allValues.length < 2) return 1\n\n const observedDisagreement = pairDiffs.reduce((a, b) => a + b, 0) / pairDiffs.length\n\n // Expected disagreement from all possible pairings of values\n let expectedDisagreement = 0\n let expectedCount = 0\n for (let i = 0; i < allValues.length; i++) {\n for (let j = i + 1; j < allValues.length; j++) {\n expectedDisagreement += (allValues[i]! - allValues[j]!) ** 2\n expectedCount++\n }\n }\n expectedDisagreement = expectedCount > 0 ? expectedDisagreement / expectedCount : 0\n\n if (expectedDisagreement === 0) return 1\n return 1 - observedDisagreement / expectedDisagreement\n}\n\n/**\n * Mann-Whitney U test for comparing two independent groups.\n * Returns U statistic and approximate p-value (normal approximation).\n */\nexport function mannWhitneyU(a: number[], b: number[]): { u: number; p: number } {\n if (a.length === 0 || b.length === 0) return { u: 0, p: 1 }\n\n const n1 = a.length\n const n2 = b.length\n\n // Rank all values together\n const combined = [\n ...a.map((v) => ({ v, group: 'a' as const })),\n ...b.map((v) => ({ v, group: 'b' as const })),\n ].sort((x, y) => x.v - y.v)\n\n // Assign ranks with tie handling\n const ranks: number[] = new Array(combined.length)\n let i = 0\n while (i < combined.length) {\n let j = i\n while (j < combined.length && combined[j]!.v === combined[i]!.v) j++\n const avgRank = (i + 1 + j) / 2\n for (let k = i; k < j; k++) ranks[k] = avgRank\n i = j\n }\n\n // Sum ranks for group a\n let r1 = 0\n for (let k = 0; k < combined.length; k++) {\n if (combined[k]!.group === 'a') r1 += ranks[k]!\n }\n\n const u1 = r1 - (n1 * (n1 + 1)) / 2\n const u2 = n1 * n2 - u1\n const u = Math.min(u1, u2)\n\n // Normal approximation for p-value\n const mu = (n1 * n2) / 2\n const sigma = Math.sqrt((n1 * n2 * (n1 + n2 + 1)) / 12)\n\n if (sigma === 0) return { u, p: 1 }\n\n const z = Math.abs(u - mu) / sigma\n // Two-tailed p-value from z-score (approximation)\n const p = 2 * (1 - normalCdf(z))\n\n return { u, p }\n}\n\n/** Partial credit: returns 0-1 ratio of current toward target */\nexport function partialCredit(current: number, target: number): number {\n if (target <= 0) return 1\n return Math.min(1, Math.max(0, current / target))\n}\n\n/**\n * Paired t-test — before/after measurements on the SAME items.\n * Pairing removes inter-item variance, giving tighter significance than\n * an unpaired test when comparing prompt v1 vs prompt v2 on identical\n * scenarios.\n */\nexport function pairedTTest(\n before: number[],\n after: number[],\n): { t: number; df: number; p: number } {\n if (before.length !== after.length) {\n throw new ValidationError(\n `pairedTTest: unequal sample sizes (${before.length} vs ${after.length})`,\n )\n }\n const n = before.length\n if (n < 2) return { t: 0, df: 0, p: 1 }\n\n const diffs = before.map((b, i) => after[i]! - b)\n const mean = diffs.reduce((a, b) => a + b, 0) / n\n const variance = diffs.reduce((acc, d) => acc + (d - mean) ** 2, 0) / (n - 1)\n const se = Math.sqrt(variance / n)\n if (se === 0) return { t: mean === 0 ? 0 : Infinity, df: n - 1, p: mean === 0 ? 1 : 0 }\n\n const t = mean / se\n const df = n - 1\n const p = 2 * (1 - studentTCdf(Math.abs(t), df))\n return { t, df, p }\n}\n\n/**\n * Wilcoxon signed-rank test — paired non-parametric alternative.\n * Use when the differences aren't normally distributed.\n */\nexport function wilcoxonSignedRank(before: number[], after: number[]): { w: number; p: number } {\n if (before.length !== after.length) {\n throw new ValidationError(\n `wilcoxonSignedRank: unequal sample sizes (${before.length} vs ${after.length})`,\n )\n }\n const diffs = before.map((b, i) => after[i]! - b).filter((d) => d !== 0)\n const n = diffs.length\n if (n < 6) return { w: 0, p: 1 }\n\n const absRanks = diffs\n .map((d, i) => ({ abs: Math.abs(d), sign: Math.sign(d), i }))\n .sort((a, b) => a.abs - b.abs)\n const ranks: number[] = new Array(n)\n let i = 0\n while (i < n) {\n let j = i\n while (j < n && absRanks[j]!.abs === absRanks[i]!.abs) j++\n const avg = (i + 1 + j) / 2\n for (let k = i; k < j; k++) ranks[absRanks[k]!.i] = avg\n i = j\n }\n let wPlus = 0\n for (let k = 0; k < n; k++) if (diffs[k]! > 0) wPlus += ranks[k]!\n\n const mean = (n * (n + 1)) / 4\n const variance = (n * (n + 1) * (2 * n + 1)) / 24\n const z = (wPlus - mean) / Math.sqrt(variance)\n const p = 2 * (1 - normalCdf(Math.abs(z)))\n return { w: wPlus, p }\n}\n\n/**\n * Cohen's d — standardized effect size for two independent groups.\n * Positive d means group b has higher mean than group a.\n * Rule of thumb: |d| < 0.2 negligible, 0.2–0.5 small, 0.5–0.8 medium, > 0.8 large.\n */\nexport function cohensD(a: number[], b: number[]): number {\n if (a.length < 2 || b.length < 2) return 0\n const meanA = a.reduce((x, y) => x + y, 0) / a.length\n const meanB = b.reduce((x, y) => x + y, 0) / b.length\n const varA = a.reduce((acc, x) => acc + (x - meanA) ** 2, 0) / (a.length - 1)\n const varB = b.reduce((acc, x) => acc + (x - meanB) ** 2, 0) / (b.length - 1)\n const pooled = Math.sqrt(\n ((a.length - 1) * varA + (b.length - 1) * varB) / (a.length + b.length - 2),\n )\n if (pooled === 0) return 0\n return (meanB - meanA) / pooled\n}\n\n/** Student-t CDF approximation via Abramowitz-Stegun series. */\nfunction studentTCdf(t: number, df: number): number {\n if (df <= 0) return 0.5\n if (df > 100) return normalCdf(t)\n const x = df / (df + t * t)\n const a = df / 2\n const b = 0.5\n const ib = incompleteBeta(x, a, b)\n return t >= 0 ? 1 - 0.5 * ib : 0.5 * ib\n}\n\n/** Regularized incomplete beta function via continued fraction (Lentz). */\nfunction incompleteBeta(x: number, a: number, b: number): number {\n if (x <= 0) return 0\n if (x >= 1) return 1\n const lnBeta = lnGamma(a) + lnGamma(b) - lnGamma(a + b)\n const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a\n const maxIter = 200\n const eps = 3e-7\n let c = 1\n let d = 1 - ((a + b) * x) / (a + 1)\n if (Math.abs(d) < 1e-30) d = 1e-30\n d = 1 / d\n let f = d\n for (let m = 1; m <= maxIter; m++) {\n const m2 = 2 * m\n let num = (m * (b - m) * x) / ((a + m2 - 1) * (a + m2))\n d = 1 + num * d\n if (Math.abs(d) < 1e-30) d = 1e-30\n c = 1 + num / c\n if (Math.abs(c) < 1e-30) c = 1e-30\n d = 1 / d\n f *= d * c\n num = -((a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1))\n d = 1 + num * d\n if (Math.abs(d) < 1e-30) d = 1e-30\n c = 1 + num / c\n if (Math.abs(c) < 1e-30) c = 1e-30\n d = 1 / d\n const delta = d * c\n f *= delta\n if (Math.abs(delta - 1) < eps) break\n }\n return front * f\n}\n\n/** Lanczos approximation to ln Γ(z). */\nfunction lnGamma(z: number): number {\n const g = 7\n const coefs = [\n 0.99999999999980993, 676.5203681218851, -1259.1392167224028, 771.32342877765313,\n -176.61502916214059, 12.507343278686905, -0.13857109526572012, 9.9843695780195716e-6,\n 1.5056327351493116e-7,\n ]\n if (z < 0.5) {\n return Math.log(Math.PI / Math.sin(Math.PI * z)) - lnGamma(1 - z)\n }\n z -= 1\n let x = coefs[0]!\n for (let i = 1; i < g + 2; i++) x += coefs[i]! / (z + i)\n const t = z + g + 0.5\n return 0.5 * Math.log(2 * Math.PI) + (z + 0.5) * Math.log(t) - t + Math.log(x)\n}\n\n// Standard normal CDF approximation (Abramowitz and Stegun)\nfunction normalCdf(x: number): number {\n const a1 = 0.254829592\n const a2 = -0.284496736\n const a3 = 1.421413741\n const a4 = -1.453152027\n const a5 = 1.061405429\n const p = 0.3275911\n\n const sign = x < 0 ? -1 : 1\n const absX = Math.abs(x)\n const t = 1 / (1 + p * absX)\n const y = 1 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp((-absX * absX) / 2)\n\n return 0.5 * (1 + sign * y)\n}\n"],"mappings":";;;;;AAIA,IAAM,sBAAsB,oBAAI,IAAI,CAAC,iBAAiB,oBAAoB,eAAe,CAAC;AAQnF,SAAS,gBAAgB,QAAoC;AAClE,SAAO,OAAO,IAAI,CAAC,MAAM;AACvB,QAAI,oBAAoB,IAAI,EAAE,SAAS,GAAG;AACxC,aAAO;AAAA,IACT;AACA,WAAO;AAAA,EACT,CAAC;AACH;AAGO,SAAS,aAAa,QAAsD;AACjF,MAAI,OAAO,WAAW,EAAG,QAAO;AAChC,MAAI,cAAc;AAClB,MAAI,cAAc;AAClB,aAAW,EAAE,OAAO,OAAO,KAAK,QAAQ;AACtC,UAAM,IAAI,UAAU;AACpB,mBAAe,QAAQ;AACvB,mBAAe;AAAA,EACjB;AACA,SAAO,cAAc,IAAI,cAAc,cAAc;AACvD;AAGO,SAAS,mBACd,QACA,aAAa,MACmC;AAChD,MAAI,OAAO,WAAW,EAAG,QAAO,EAAE,MAAM,GAAG,OAAO,GAAG,OAAO,EAAE;AAC9D,MAAI,OAAO,WAAW,EAAG,QAAO,EAAE,MAAM,OAAO,CAAC,GAAI,OAAO,OAAO,CAAC,GAAI,OAAO,OAAO,CAAC,EAAG;AAEzF,QAAM,IAAI,OAAO;AACjB,QAAM,OAAO,OAAO,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI;AAEjD,QAAM,IAAI;AACV,QAAM,iBAA2B,CAAC;AAElC,WAAS,IAAI,GAAG,IAAI,GAAG,KAAK;AAC1B,QAAI,MAAM;AACV,aAAS,IAAI,GAAG,IAAI,GAAG,KAAK;AAC1B,aAAO,OAAO,KAAK,MAAM,KAAK,OAAO,IAAI,CAAC,CAAC;AAAA,IAC7C;AACA,mBAAe,KAAK,MAAM,CAAC;AAAA,EAC7B;AAEA,iBAAe,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAEnC,QAAM,QAAQ,IAAI;AAClB,QAAM,WAAW,KAAK,MAAO,QAAQ,IAAK,CAAC;AAC3C,QAAM,WAAW,KAAK,OAAO,IAAI,QAAQ,KAAK,CAAC,IAAI;AAEnD,SAAO;AAAA,IACL;AAAA,IACA,OAAO,eAAe,QAAQ;AAAA,IAC9B,OAAO,eAAe,KAAK,IAAI,UAAU,IAAI,CAAC,CAAC;AAAA,EACjD;AACF;AAQO,SAAS,sBAAsB,aAAqC;AACzE,MAAI,YAAY,SAAS,EAAG,QAAO;AAGnC,QAAM,eAAe,oBAAI,IAAwB;AACjD,aAAW,YAAY,aAAa;AAClC,eAAW,KAAK,UAAU;AACxB,UAAI,CAAC,aAAa,IAAI,EAAE,SAAS,EAAG,cAAa,IAAI,EAAE,WAAW,CAAC,CAAC;AACpE,YAAM,MAAM,aAAa,IAAI,EAAE,SAAS;AACxC,UAAI,IAAI,WAAW,KAAK,IAAI,IAAI,SAAS,CAAC,EAAG,UAAU,YAAY,QAAQ;AACzE,YAAI,KAAK,CAAC,EAAE,KAAK,CAAC;AAAA,MACpB,OAAO;AACL,YAAI,IAAI,SAAS,CAAC,EAAG,KAAK,EAAE,KAAK;AAAA,MACnC;AAAA,IACF;AAAA,EACF;AAGA,QAAM,YAAsB,CAAC;AAC7B,QAAM,YAAsB,CAAC;AAE7B,aAAW,SAAS,aAAa,OAAO,GAAG;AACzC,eAAW,WAAW,OAAO;AAC3B,UAAI,QAAQ,SAAS,EAAG;AACxB,iBAAW,KAAK,QAAS,WAAU,KAAK,CAAC;AACzC,eAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,iBAAS,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AAC3C,oBAAU,MAAM,QAAQ,CAAC,IAAK,QAAQ,CAAC,MAAO,CAAC;AAAA,QACjD;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,MAAI,UAAU,WAAW,KAAK,UAAU,SAAS,EAAG,QAAO;AAE3D,QAAM,uBAAuB,UAAU,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,UAAU;AAG9E,MAAI,uBAAuB;AAC3B,MAAI,gBAAgB;AACpB,WAAS,IAAI,GAAG,IAAI,UAAU,QAAQ,KAAK;AACzC,aAAS,IAAI,IAAI,GAAG,IAAI,UAAU,QAAQ,KAAK;AAC7C,+BAAyB,UAAU,CAAC,IAAK,UAAU,CAAC,MAAO;AAC3D;AAAA,IACF;AAAA,EACF;AACA,yBAAuB,gBAAgB,IAAI,uBAAuB,gBAAgB;AAElF,MAAI,yBAAyB,EAAG,QAAO;AACvC,SAAO,IAAI,uBAAuB;AACpC;AAMO,SAAS,aAAa,GAAa,GAAuC;AAC/E,MAAI,EAAE,WAAW,KAAK,EAAE,WAAW,EAAG,QAAO,EAAE,GAAG,GAAG,GAAG,EAAE;AAE1D,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,EAAE;AAGb,QAAM,WAAW;AAAA,IACf,GAAG,EAAE,IAAI,CAAC,OAAO,EAAE,GAAG,OAAO,IAAa,EAAE;AAAA,IAC5C,GAAG,EAAE,IAAI,CAAC,OAAO,EAAE,GAAG,OAAO,IAAa,EAAE;AAAA,EAC9C,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,IAAI,EAAE,CAAC;AAG1B,QAAM,QAAkB,IAAI,MAAM,SAAS,MAAM;AACjD,MAAI,IAAI;AACR,SAAO,IAAI,SAAS,QAAQ;AAC1B,QAAI,IAAI;AACR,WAAO,IAAI,SAAS,UAAU,SAAS,CAAC,EAAG,MAAM,SAAS,CAAC,EAAG,EAAG;AACjE,UAAM,WAAW,IAAI,IAAI,KAAK;AAC9B,aAAS,IAAI,GAAG,IAAI,GAAG,IAAK,OAAM,CAAC,IAAI;AACvC,QAAI;AAAA,EACN;AAGA,MAAI,KAAK;AACT,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,QAAI,SAAS,CAAC,EAAG,UAAU,IAAK,OAAM,MAAM,CAAC;AAAA,EAC/C;AAEA,QAAM,KAAK,KAAM,MAAM,KAAK,KAAM;AAClC,QAAM,KAAK,KAAK,KAAK;AACrB,QAAM,IAAI,KAAK,IAAI,IAAI,EAAE;AAGzB,QAAM,KAAM,KAAK,KAAM;AACvB,QAAM,QAAQ,KAAK,KAAM,KAAK,MAAM,KAAK,KAAK,KAAM,EAAE;AAEtD,MAAI,UAAU,EAAG,QAAO,EAAE,GAAG,GAAG,EAAE;AAElC,QAAM,IAAI,KAAK,IAAI,IAAI,EAAE,IAAI;AAE7B,QAAM,IAAI,KAAK,IAAI,UAAU,CAAC;AAE9B,SAAO,EAAE,GAAG,EAAE;AAChB;AAGO,SAAS,cAAc,SAAiB,QAAwB;AACrE,MAAI,UAAU,EAAG,QAAO;AACxB,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,UAAU,MAAM,CAAC;AAClD;AAQO,SAAS,YACd,QACA,OACsC;AACtC,MAAI,OAAO,WAAW,MAAM,QAAQ;AAClC,UAAM,IAAI;AAAA,MACR,sCAAsC,OAAO,MAAM,OAAO,MAAM,MAAM;AAAA,IACxE;AAAA,EACF;AACA,QAAM,IAAI,OAAO;AACjB,MAAI,IAAI,EAAG,QAAO,EAAE,GAAG,GAAG,IAAI,GAAG,GAAG,EAAE;AAEtC,QAAM,QAAQ,OAAO,IAAI,CAAC,GAAG,MAAM,MAAM,CAAC,IAAK,CAAC;AAChD,QAAM,OAAO,MAAM,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI;AAChD,QAAM,WAAW,MAAM,OAAO,CAAC,KAAK,MAAM,OAAO,IAAI,SAAS,GAAG,CAAC,KAAK,IAAI;AAC3E,QAAM,KAAK,KAAK,KAAK,WAAW,CAAC;AACjC,MAAI,OAAO,EAAG,QAAO,EAAE,GAAG,SAAS,IAAI,IAAI,UAAU,IAAI,IAAI,GAAG,GAAG,SAAS,IAAI,IAAI,EAAE;AAEtF,QAAM,IAAI,OAAO;AACjB,QAAM,KAAK,IAAI;AACf,QAAM,IAAI,KAAK,IAAI,YAAY,KAAK,IAAI,CAAC,GAAG,EAAE;AAC9C,SAAO,EAAE,GAAG,IAAI,EAAE;AACpB;AAMO,SAAS,mBAAmB,QAAkB,OAA2C;AAC9F,MAAI,OAAO,WAAW,MAAM,QAAQ;AAClC,UAAM,IAAI;AAAA,MACR,6CAA6C,OAAO,MAAM,OAAO,MAAM,MAAM;AAAA,IAC/E;AAAA,EACF;AACA,QAAM,QAAQ,OAAO,IAAI,CAAC,GAAGA,OAAM,MAAMA,EAAC,IAAK,CAAC,EAAE,OAAO,CAAC,MAAM,MAAM,CAAC;AACvE,QAAM,IAAI,MAAM;AAChB,MAAI,IAAI,EAAG,QAAO,EAAE,GAAG,GAAG,GAAG,EAAE;AAE/B,QAAM,WAAW,MACd,IAAI,CAAC,GAAGA,QAAO,EAAE,KAAK,KAAK,IAAI,CAAC,GAAG,MAAM,KAAK,KAAK,CAAC,GAAG,GAAAA,GAAE,EAAE,EAC3D,KAAK,CAAC,GAAG,MAAM,EAAE,MAAM,EAAE,GAAG;AAC/B,QAAM,QAAkB,IAAI,MAAM,CAAC;AACnC,MAAI,IAAI;AACR,SAAO,IAAI,GAAG;AACZ,QAAI,IAAI;AACR,WAAO,IAAI,KAAK,SAAS,CAAC,EAAG,QAAQ,SAAS,CAAC,EAAG,IAAK;AACvD,UAAM,OAAO,IAAI,IAAI,KAAK;AAC1B,aAAS,IAAI,GAAG,IAAI,GAAG,IAAK,OAAM,SAAS,CAAC,EAAG,CAAC,IAAI;AACpD,QAAI;AAAA,EACN;AACA,MAAI,QAAQ;AACZ,WAAS,IAAI,GAAG,IAAI,GAAG,IAAK,KAAI,MAAM,CAAC,IAAK,EAAG,UAAS,MAAM,CAAC;AAE/D,QAAM,OAAQ,KAAK,IAAI,KAAM;AAC7B,QAAM,WAAY,KAAK,IAAI,MAAM,IAAI,IAAI,KAAM;AAC/C,QAAM,KAAK,QAAQ,QAAQ,KAAK,KAAK,QAAQ;AAC7C,QAAM,IAAI,KAAK,IAAI,UAAU,KAAK,IAAI,CAAC,CAAC;AACxC,SAAO,EAAE,GAAG,OAAO,EAAE;AACvB;AAOO,SAAS,QAAQ,GAAa,GAAqB;AACxD,MAAI,EAAE,SAAS,KAAK,EAAE,SAAS,EAAG,QAAO;AACzC,QAAM,QAAQ,EAAE,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE;AAC/C,QAAM,QAAQ,EAAE,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE;AAC/C,QAAM,OAAO,EAAE,OAAO,CAAC,KAAK,MAAM,OAAO,IAAI,UAAU,GAAG,CAAC,KAAK,EAAE,SAAS;AAC3E,QAAM,OAAO,EAAE,OAAO,CAAC,KAAK,MAAM,OAAO,IAAI,UAAU,GAAG,CAAC,KAAK,EAAE,SAAS;AAC3E,QAAM,SAAS,KAAK;AAAA,MAChB,EAAE,SAAS,KAAK,QAAQ,EAAE,SAAS,KAAK,SAAS,EAAE,SAAS,EAAE,SAAS;AAAA,EAC3E;AACA,MAAI,WAAW,EAAG,QAAO;AACzB,UAAQ,QAAQ,SAAS;AAC3B;AAGA,SAAS,YAAY,GAAW,IAAoB;AAClD,MAAI,MAAM,EAAG,QAAO;AACpB,MAAI,KAAK,IAAK,QAAO,UAAU,CAAC;AAChC,QAAM,IAAI,MAAM,KAAK,IAAI;AACzB,QAAM,IAAI,KAAK;AACf,QAAM,IAAI;AACV,QAAM,KAAK,eAAe,GAAG,GAAG,CAAC;AACjC,SAAO,KAAK,IAAI,IAAI,MAAM,KAAK,MAAM;AACvC;AAGA,SAAS,eAAe,GAAW,GAAW,GAAmB;AAC/D,MAAI,KAAK,EAAG,QAAO;AACnB,MAAI,KAAK,EAAG,QAAO;AACnB,QAAM,SAAS,QAAQ,CAAC,IAAI,QAAQ,CAAC,IAAI,QAAQ,IAAI,CAAC;AACtD,QAAM,QAAQ,KAAK,IAAI,KAAK,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,IAAI,CAAC,IAAI,IAAI,MAAM,IAAI;AACzE,QAAM,UAAU;AAChB,QAAM,MAAM;AACZ,MAAI,IAAI;AACR,MAAI,IAAI,KAAM,IAAI,KAAK,KAAM,IAAI;AACjC,MAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,MAAI,IAAI;AACR,MAAI,IAAI;AACR,WAAS,IAAI,GAAG,KAAK,SAAS,KAAK;AACjC,UAAM,KAAK,IAAI;AACf,QAAI,MAAO,KAAK,IAAI,KAAK,MAAO,IAAI,KAAK,MAAM,IAAI;AACnD,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI;AACR,SAAK,IAAI;AACT,UAAM,GAAG,IAAI,MAAM,IAAI,IAAI,KAAK,OAAO,IAAI,OAAO,IAAI,KAAK;AAC3D,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI;AACR,UAAM,QAAQ,IAAI;AAClB,SAAK;AACL,QAAI,KAAK,IAAI,QAAQ,CAAC,IAAI,IAAK;AAAA,EACjC;AACA,SAAO,QAAQ;AACjB;AAGA,SAAS,QAAQ,GAAmB;AAClC,QAAM,IAAI;AACV,QAAM,QAAQ;AAAA,IACZ;AAAA,IAAqB;AAAA,IAAmB;AAAA,IAAqB;AAAA,IAC7D;AAAA,IAAqB;AAAA,IAAoB;AAAA,IAAsB;AAAA,IAC/D;AAAA,EACF;AACA,MAAI,IAAI,KAAK;AACX,WAAO,KAAK,IAAI,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,QAAQ,IAAI,CAAC;AAAA,EAClE;AACA,OAAK;AACL,MAAI,IAAI,MAAM,CAAC;AACf,WAAS,IAAI,GAAG,IAAI,IAAI,GAAG,IAAK,MAAK,MAAM,CAAC,KAAM,IAAI;AACtD,QAAM,IAAI,IAAI,IAAI;AAClB,SAAO,MAAM,KAAK,IAAI,IAAI,KAAK,EAAE,KAAK,IAAI,OAAO,KAAK,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,CAAC;AAC/E;AAGA,SAAS,UAAU,GAAmB;AACpC,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,IAAI;AAEV,QAAM,OAAO,IAAI,IAAI,KAAK;AAC1B,QAAM,OAAO,KAAK,IAAI,CAAC;AACvB,QAAM,IAAI,KAAK,IAAI,IAAI;AACvB,QAAM,IAAI,QAAQ,KAAK,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,IAAI,KAAK,IAAK,CAAC,OAAO,OAAQ,CAAC;AAE9F,SAAO,OAAO,IAAI,OAAO;AAC3B;","names":["i"]}