@tangle-network/agent-eval 0.27.0 → 0.27.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/CHANGELOG.md +72 -0
  2. package/README.md +4 -5
  3. package/dist/builder-eval/index.js +1 -1
  4. package/dist/{chunk-WWYCWKUM.js → chunk-3CKU6VGU.js} +2 -2
  5. package/dist/{chunk-K2TPS5LB.js → chunk-4U4BKCXK.js} +2 -2
  6. package/dist/chunk-4U4BKCXK.js.map +1 -0
  7. package/dist/{chunk-2A5XJB43.js → chunk-5AKPEK5L.js} +3 -3
  8. package/dist/chunk-5AKPEK5L.js.map +1 -0
  9. package/dist/{chunk-RAF443UI.js → chunk-DBIGN5MJ.js} +2 -2
  10. package/dist/{chunk-JLZQWFV3.js → chunk-K33INZHH.js} +2 -2
  11. package/dist/chunk-K33INZHH.js.map +1 -0
  12. package/dist/{chunk-NU65VQ7M.js → chunk-MAZ26DC7.js} +1 -1
  13. package/dist/chunk-MAZ26DC7.js.map +1 -0
  14. package/dist/{chunk-LSH4MMOZ.js → chunk-NCRFYPS3.js} +1 -1
  15. package/dist/chunk-NCRFYPS3.js.map +1 -0
  16. package/dist/{chunk-ZN274SWR.js → chunk-PALJO75S.js} +2 -2
  17. package/dist/{chunk-OWLAAMME.js → chunk-QHF6EQKK.js} +3 -2
  18. package/dist/chunk-QHF6EQKK.js.map +1 -0
  19. package/dist/chunk-R5UQJNKC.js +722 -0
  20. package/dist/chunk-R5UQJNKC.js.map +1 -0
  21. package/dist/{chunk-SESZDQPX.js → chunk-RUI6SIHY.js} +3 -3
  22. package/dist/chunk-RUI6SIHY.js.map +1 -0
  23. package/dist/{chunk-WHZMVFUV.js → chunk-SZSBQUIJ.js} +2 -2
  24. package/dist/chunk-SZSBQUIJ.js.map +1 -0
  25. package/dist/{chunk-4F5DQN55.js → chunk-VSMTAMNK.js} +1 -1
  26. package/dist/chunk-VSMTAMNK.js.map +1 -0
  27. package/dist/{chunk-5LBB5B3Z.js → chunk-XFZCM5Z3.js} +1 -1
  28. package/dist/chunk-XFZCM5Z3.js.map +1 -0
  29. package/dist/cli.js +1 -1
  30. package/dist/{control-CBShYYA6.d.ts → control-BT4qnXiS.d.ts} +2 -2
  31. package/dist/{control-runtime-BuJHoLg0.d.ts → control-runtime-BZ_lVLYW.d.ts} +1 -0
  32. package/dist/control.d.ts +3 -3
  33. package/dist/control.js +2 -2
  34. package/dist/{failure-cluster-C2EGSDiT.d.ts → failure-cluster-Cw65_5FY.d.ts} +1 -2
  35. package/dist/{feedback-trajectory-DfFdrraJ.d.ts → feedback-trajectory-D1aGKusy.d.ts} +1 -1
  36. package/dist/governance/index.d.ts +1 -1
  37. package/dist/{index-D3iBCjdF.d.ts → index-BhLlu-qO.d.ts} +1 -1
  38. package/dist/index.d.ts +157 -167
  39. package/dist/index.js +25 -335
  40. package/dist/index.js.map +1 -1
  41. package/dist/knowledge/index.d.ts +1 -1
  42. package/dist/knowledge/index.js +2 -2
  43. package/dist/{multi-layer-verifier-LkP3LVKj.d.ts → multi-layer-verifier-U-c8ge1k.d.ts} +1 -1
  44. package/dist/openapi.json +1 -1
  45. package/dist/optimization.d.ts +5 -5
  46. package/dist/optimization.js +5 -5
  47. package/dist/pipelines/index.d.ts +1 -1
  48. package/dist/pipelines/index.js +2 -2
  49. package/dist/{release-report-wfUySN5F.d.ts → release-report-CCQqnK46.d.ts} +1 -1
  50. package/dist/{replay-BL96gCEP.d.ts → replay-D7z0J43-.d.ts} +4 -5
  51. package/dist/reporting.d.ts +4 -4
  52. package/dist/reporting.js +5 -5
  53. package/dist/{researcher-bGkI7vCl.d.ts → researcher-G81CWc0q.d.ts} +9 -10
  54. package/dist/rl.d.ts +26 -44
  55. package/dist/rl.js +5 -5
  56. package/dist/rl.js.map +1 -1
  57. package/dist/{sequential-Dgz1n51-.d.ts → sequential-5iSVfzl2.d.ts} +2 -2
  58. package/dist/{summary-report-DZVXOCK_.d.ts → summary-report-Dl4akLKX.d.ts} +5 -5
  59. package/dist/traces.d.ts +1 -1
  60. package/dist/traces.js +2 -2
  61. package/dist/wire/index.d.ts +2 -2
  62. package/dist/wire/index.js +1 -1
  63. package/docs/research-report-methodology.md +4 -4
  64. package/docs/three-package-architecture.md +12 -24
  65. package/package.json +1 -1
  66. package/dist/chunk-2A5XJB43.js.map +0 -1
  67. package/dist/chunk-4F5DQN55.js.map +0 -1
  68. package/dist/chunk-5LBB5B3Z.js.map +0 -1
  69. package/dist/chunk-I4MBDTY5.js +0 -272
  70. package/dist/chunk-I4MBDTY5.js.map +0 -1
  71. package/dist/chunk-JLZQWFV3.js.map +0 -1
  72. package/dist/chunk-K2TPS5LB.js.map +0 -1
  73. package/dist/chunk-LSH4MMOZ.js.map +0 -1
  74. package/dist/chunk-NU65VQ7M.js.map +0 -1
  75. package/dist/chunk-OWLAAMME.js.map +0 -1
  76. package/dist/chunk-SESZDQPX.js.map +0 -1
  77. package/dist/chunk-WHZMVFUV.js.map +0 -1
  78. /package/dist/{chunk-WWYCWKUM.js.map → chunk-3CKU6VGU.js.map} +0 -0
  79. /package/dist/{chunk-RAF443UI.js.map → chunk-DBIGN5MJ.js.map} +0 -0
  80. /package/dist/{chunk-ZN274SWR.js.map → chunk-PALJO75S.js.map} +0 -0
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/wire/schemas.ts","../src/wire/rubrics.ts","../src/wire/handlers.ts","../src/wire/openapi.ts","../src/wire/rpc.ts","../src/wire/server.ts"],"sourcesContent":["/**\n * Wire-protocol schemas.\n *\n * These Zod schemas are the contract between the agent-eval runtime and\n * any non-TypeScript client (Python, Rust, Go, …). They get rendered to\n * OpenAPI by `wire/openapi.ts` and code-generators consume that spec to\n * produce typed clients in other languages.\n *\n * Rule: if it's not in this file, it isn't on the wire. Keep names and\n * shapes self-explanatory — every field has a `.describe()` so the\n * generated docs are useful without reading the source.\n */\nimport { extendZodWithOpenApi } from '@asteasolutions/zod-to-openapi'\nimport { z } from 'zod'\n\nextendZodWithOpenApi(z)\n\n// ── Building blocks ─────────────────────────────────────────────────\n\nexport const RubricDimensionSchema = z\n .object({\n id: z\n .string()\n .min(1)\n .describe('Short stable id like \"buyer_quality\" — used as the key in scoring output.'),\n description: z\n .string()\n .min(1)\n .describe('One-line plain-English meaning. Read by humans reviewing low scores.'),\n weight: z\n .number()\n .min(0)\n .default(1)\n .describe('Relative weight in the composite score. Default 1; 0 disables.'),\n min: z.number().default(0).describe('Lower bound of valid score for this dimension.'),\n max: z.number().default(1).describe('Upper bound of valid score for this dimension.'),\n })\n .openapi('RubricDimension')\n\nexport const FailureModeSchema = z\n .object({\n id: z.string().min(1).describe('Short stable id like \"ai-cadence\" — used in detection lists.'),\n description: z.string().min(1).describe('Plain-English description of the failure pattern.'),\n })\n .openapi('FailureMode')\n\n// ── Rubric ──────────────────────────────────────────────────────────\n\nexport const RubricSchema = z\n .object({\n name: z\n .string()\n .min(1)\n .describe('Stable name like \"anti-slop\" — used by clients to invoke this rubric.'),\n description: z\n .string()\n .min(1)\n .describe('What this rubric measures. Shown in /v1/rubrics listing.'),\n systemPrompt: z\n .string()\n .min(1)\n .describe(\n 'Instructs the judging LLM. Should explain the persona (e.g. \"senior engineer reviewing voice\"), what to score on, and what to return.',\n ),\n dimensions: z\n .array(RubricDimensionSchema)\n .min(1)\n .describe('Scoring axes. The composite score is a weighted sum of these.'),\n failureModes: z\n .array(FailureModeSchema)\n .default([])\n .describe('Patterns to detect; each detected mode appears in the result.failureModes list.'),\n wins: z\n .array(FailureModeSchema)\n .default([])\n .describe('Positive patterns; each detected one appears in the result.wins list.'),\n })\n .openapi('Rubric')\n\n// ── Judge call ──────────────────────────────────────────────────────\n\nexport const JudgeRequestSchema = z\n .object({\n rubricName: z\n .string()\n .optional()\n .describe('Use a built-in rubric by name. Mutually exclusive with `rubric`.'),\n rubric: RubricSchema.optional().describe(\n 'Inline rubric definition. Mutually exclusive with `rubricName`.',\n ),\n content: z\n .string()\n .min(1)\n .describe('The text being judged — a tweet, a blog post, a code snippet, anything stringly.'),\n context: z\n .record(z.string(), z.unknown())\n .optional()\n .describe(\n 'Free-form metadata for the rubric to use — analytics, source URL, author, etc. Surfaced to the LLM.',\n ),\n model: z\n .string()\n .optional()\n .describe('Override the judge model (default routes via tcloud). e.g. \"claude-opus-4-7\".'),\n })\n .refine((v) => Boolean(v.rubricName) !== Boolean(v.rubric), {\n message: 'Provide exactly one of `rubricName` or `rubric`.',\n })\n .openapi('JudgeRequest')\n\nexport const JudgeResultSchema = z\n .object({\n composite: z\n .number()\n .min(0)\n .max(1)\n .describe('Weighted combination of dimension scores in 0..1. The single number to gate on.'),\n dimensions: z\n .record(z.string(), z.number())\n .describe('Per-dimension score, keyed by RubricDimension.id.'),\n failureModes: z\n .array(z.string())\n .default([])\n .describe('Failure-mode ids detected in the content (subset of rubric.failureModes ids).'),\n wins: z\n .array(z.string())\n .default([])\n .describe('Win ids detected in the content (subset of rubric.wins ids).'),\n rationale: z\n .string()\n .describe('Plain-English explanation of the score. Surfaced to the human reviewer.'),\n rubricVersion: z\n .string()\n .describe(\n 'Stable hash of the rubric used. Scores are only comparable across runs when this matches.',\n ),\n model: z.string().describe('Model that produced the judgement, for reproducibility.'),\n durationMs: z.number().int().nonnegative().describe('End-to-end wall time for this call.'),\n })\n .openapi('JudgeResult')\n\n// ── Rubric listing ──────────────────────────────────────────────────\n\nexport const RubricInfoSchema = z\n .object({\n name: z.string().describe('Pass this to /v1/judge as `rubricName`.'),\n description: z.string().describe('What this rubric measures.'),\n dimensions: z\n .array(z.object({ id: z.string(), description: z.string(), weight: z.number() }))\n .describe('The scoring axes this rubric uses, with weights.'),\n failureModes: z.array(z.string()).default([]).describe('Failure-mode ids this rubric detects.'),\n rubricVersion: z.string().describe('Stable hash — match this to compare scores across runs.'),\n })\n .openapi('RubricInfo')\n\nexport const ListRubricsResponseSchema = z\n .object({\n rubrics: z.array(RubricInfoSchema),\n })\n .openapi('ListRubricsResponse')\n\n// ── Version / health ────────────────────────────────────────────────\n\nexport const VersionResponseSchema = z\n .object({\n package: z.string().describe('Package name (always \"@tangle-network/agent-eval\").'),\n version: z.string().describe('Semver of the running server. Match your client to this.'),\n wireVersion: z\n .string()\n .describe(\n 'Wire-protocol semver. Bumps separately from package version when the schema changes.',\n ),\n apiSurface: z.array(z.string()).describe('List of supported method names.'),\n })\n .openapi('VersionResponse')\n\nexport const HealthResponseSchema = z\n .object({\n status: z.literal('ok'),\n uptimeSec: z.number(),\n })\n .openapi('HealthResponse')\n\n// ── Ingestion: production traces + user feedback (0.25.0) ───────────\n\n/**\n * Minimal `TraceEvent` shape that the production runtime emits.\n * Matches `trace/schema.ts` `TraceEvent` but is duplicated here as a\n * wire schema so non-TypeScript clients can validate without depending\n * on internal types.\n */\nexport const TraceEventSchema = z\n .object({\n eventId: z.string().min(1).describe('Stable id for the event. Use ULID or UUID.'),\n runId: z.string().min(1).describe('Run this event belongs to.'),\n spanId: z.string().optional().describe('Span that emitted the event, if any.'),\n kind: z\n .enum([\n 'log',\n 'error',\n 'budget_decrement',\n 'budget_breach',\n 'state_mutation',\n 'policy_violation',\n 'redaction_applied',\n 'custom',\n ])\n .describe('Coarse event category — matches the TraceSchema v1 EventKind enum.'),\n timestamp: z\n .number()\n .int()\n .nonnegative()\n .describe('Unix millis. Must be monotonically non-decreasing within a span.'),\n payload: z\n .record(z.string(), z.unknown())\n .describe('Free-form payload — the runtime owns the shape.'),\n })\n .openapi('TraceEvent')\n\nexport const TracesIngestRequestSchema = z\n .object({\n events: z\n .array(TraceEventSchema)\n .min(1)\n .max(10_000)\n .describe('Batch of events. Max 10k per call — bigger streams should be chunked.'),\n })\n .openapi('TracesIngestRequest')\n\nexport const TracesIngestResponseSchema = z\n .object({\n accepted: z.number().int().nonnegative().describe('Number of events persisted.'),\n rejected: z\n .number()\n .int()\n .nonnegative()\n .describe('Number of events the store refused — see `errors[]` for reasons.'),\n errors: z\n .array(\n z.object({\n eventId: z.string().describe('Event id this error applies to.'),\n message: z.string().describe('Why the event was rejected.'),\n }),\n )\n .default([]),\n })\n .openapi('TracesIngestResponse')\n\nexport const FeedbackLabelSchema = z\n .object({\n id: z.string().optional(),\n source: z.enum(['user', 'judge', 'environment', 'metric', 'policy', 'system']),\n kind: z.enum([\n 'approve',\n 'reject',\n 'select',\n 'edit',\n 'rank',\n 'rate',\n 'comment',\n 'metric_outcome',\n 'policy_block',\n 'revision_request',\n ]),\n value: z.unknown(),\n reason: z.string().optional(),\n severity: z.enum(['info', 'warning', 'error', 'critical']).optional(),\n createdAt: z.string().describe('ISO-8601 UTC.'),\n metadata: z.record(z.string(), z.unknown()).optional(),\n })\n .openapi('FeedbackLabel')\n\nexport const FeedbackAttemptSchema = z\n .object({\n id: z.string().min(1),\n stepIndex: z.number().int().nonnegative(),\n artifactType: z.enum([\n 'text',\n 'code',\n 'plan',\n 'research',\n 'action',\n 'ui',\n 'decision',\n 'data',\n 'other',\n ]),\n artifact: z.unknown(),\n options: z.array(z.unknown()).optional(),\n proposedAction: z\n .object({\n type: z.string(),\n risk: z.enum(['low', 'medium', 'high']).optional(),\n costUsd: z.number().optional(),\n externalSideEffect: z.boolean().optional(),\n requiresApproval: z.boolean().optional(),\n metadata: z.record(z.string(), z.unknown()).optional(),\n })\n .optional(),\n feedback: z.array(FeedbackLabelSchema).optional(),\n createdAt: z.string(),\n metadata: z.record(z.string(), z.unknown()).optional(),\n })\n .openapi('FeedbackAttempt')\n\nexport const FeedbackTrajectorySchema = z\n .object({\n id: z.string().min(1).describe('Stable id; idempotency key for the trajectory.'),\n projectId: z.string().optional(),\n scenarioId: z.string().optional(),\n task: z.object({\n intent: z.string().min(1),\n context: z.unknown().optional(),\n }),\n attempts: z.array(FeedbackAttemptSchema).default([]),\n labels: z.array(FeedbackLabelSchema).default([]),\n outcome: z\n .object({\n success: z.boolean().optional(),\n score: z.number().optional(),\n metrics: z.record(z.string(), z.number()).optional(),\n costUsd: z.number().optional(),\n detail: z.string().optional(),\n observedAt: z.string().optional(),\n metadata: z.record(z.string(), z.unknown()).optional(),\n })\n .optional(),\n split: z.enum(['train', 'dev', 'test', 'holdout']).optional(),\n tags: z.record(z.string(), z.string()).optional(),\n createdAt: z.string().describe('ISO-8601 UTC.'),\n updatedAt: z.string().optional(),\n metadata: z.record(z.string(), z.unknown()).optional(),\n })\n .openapi('FeedbackTrajectory')\n\nexport const FeedbackIngestResponseSchema = z\n .object({\n id: z.string().describe('Trajectory id that was persisted.'),\n persisted: z.boolean().describe('True when the trajectory was saved (idempotent on id).'),\n })\n .openapi('FeedbackIngestResponse')\n\nexport type TraceEvent = z.infer<typeof TraceEventSchema>\nexport type TracesIngestRequest = z.infer<typeof TracesIngestRequestSchema>\nexport type TracesIngestResponse = z.infer<typeof TracesIngestResponseSchema>\nexport type FeedbackTrajectory = z.infer<typeof FeedbackTrajectorySchema>\nexport type FeedbackIngestResponse = z.infer<typeof FeedbackIngestResponseSchema>\n\n// ── Errors ──────────────────────────────────────────────────────────\n\nexport const ErrorResponseSchema = z\n .object({\n error: z\n .object({\n code: z\n .string()\n .describe(\n 'Machine-readable code: \"validation_error\", \"rubric_not_found\", \"judge_error\".',\n ),\n message: z.string().describe('Human-readable message.'),\n details: z.unknown().optional().describe('Optional structured detail.'),\n })\n .describe('Errors are always wrapped in this shape across all endpoints.'),\n })\n .openapi('ErrorResponse')\n\n// ── Type exports for callers in the same package ────────────────────\n\nexport type RubricDimension = z.infer<typeof RubricDimensionSchema>\nexport type FailureMode = z.infer<typeof FailureModeSchema>\nexport type Rubric = z.infer<typeof RubricSchema>\nexport type JudgeRequest = z.infer<typeof JudgeRequestSchema>\nexport type JudgeResult = z.infer<typeof JudgeResultSchema>\nexport type RubricInfo = z.infer<typeof RubricInfoSchema>\nexport type ListRubricsResponse = z.infer<typeof ListRubricsResponseSchema>\nexport type VersionResponse = z.infer<typeof VersionResponseSchema>\nexport type ErrorResponse = z.infer<typeof ErrorResponseSchema>\n\n// ── Wire-protocol version ───────────────────────────────────────────\n\n/**\n * Bump on any breaking change to a request/response schema.\n * Non-breaking (additive) changes don't require a bump.\n */\nexport const WIRE_VERSION = '1.0.0'\n\n/**\n * Stable hash of a rubric. Used to make scores comparable across runs:\n * if the rubricVersion matches, the rubric was identical.\n */\nexport function hashRubric(rubric: Rubric): string {\n const stable = stableStringify(rubric)\n let h = 5381\n for (let i = 0; i < stable.length; i++) {\n h = (h * 33) ^ stable.charCodeAt(i)\n }\n // Unsigned 32-bit hex, prefixed with rubric name + version slot\n return `${rubric.name}@${(h >>> 0).toString(16).padStart(8, '0')}`\n}\n\nfunction stableStringify(value: unknown): string {\n if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(',')}]`\n if (value && typeof value === 'object') {\n const entries = Object.entries(value as Record<string, unknown>)\n .sort(([a], [b]) => a.localeCompare(b))\n .map(([key, item]) => `${JSON.stringify(key)}:${stableStringify(item)}`)\n return `{${entries.join(',')}}`\n }\n return JSON.stringify(value)\n}\n","/**\n * Built-in rubrics shipped with agent-eval.\n *\n * A rubric is a set of scoring axes plus a system prompt that tells the\n * judging LLM how to grade against those axes. Built-in rubrics are\n * curated for use cases that recur across Tangle projects — call them\n * by name from any client.\n *\n * Adding a rubric:\n * 1. Define the Rubric object below with a clear `description` and\n * named `dimensions`.\n * 2. Register it in `BUILTIN_RUBRICS` at the bottom.\n * 3. Add a test in `tests/wire/rubrics.test.ts`.\n *\n * Custom rubrics: callers pass `rubric` inline to /v1/judge instead of\n * `rubricName` — see schemas.ts.\n */\nimport type { Rubric } from './schemas'\nimport { hashRubric } from './schemas'\n\n// ── anti-slop ───────────────────────────────────────────────────────\n// Voice/style judge tuned for technical-buyer audiences. Used by the\n// Postiz autoresearch loop and any content-quality gate.\n\nconst ANTI_SLOP: Rubric = {\n name: 'anti-slop',\n description:\n 'Voice and signal quality for content aimed at senior engineers. Catches AI cadence, marketing tone, and engagement-bait shapes.',\n systemPrompt: `You are evaluating a piece of content written for senior engineers and technical founders.\n\nYou score three things:\n- buyer_quality (0..1): would a senior engineer in the target ICP find this worth their attention? High = specific, earned, technically interesting. Low = generic, hyped, off-target.\n- voice (0..1): does it read like a person who built the thing, or like AI/marketing copy?\n- signal (0..1): does it contain a non-obvious detail, constraint, or claim a reader couldn't get from the public docs?\n\nDetect failure modes (return ids matching):\n- ai-cadence: rule-of-three openings, em-dash flourish, \"Let me explain\", \"Here's the thing\", AI rhythm\n- marketing-tone: \"We're excited to announce\", \"thrilled\", \"delighted\", \"game-changer\", buzzword stack\n- vague-claim: technical claim without a specific component, file, or measurement\n- no-hook: opening doesn't earn attention from the target reader\n- engagement-bait: \"agree?\", \"thoughts?\", listicles, controversy-fishing, hook-detail-pitch\n- off-icp: content shape would attract motivational/grift/hype audiences instead of buyers\n- stale-claim: repeats a positioning line we've used many times this month\n\nDetect wins (return ids matching):\n- specific-component: names a real file, component, or measurement\n- earned-detail: shares a non-obvious detail not derivable from public docs\n- constraint-articulated: names a real tradeoff and the side chosen\n- honest-failure: describes a real failure mode and what was done about it\n\nReturn ONLY JSON matching the response schema. Be conservative — most content has 0-1 wins and 1-2 failure modes, not many of each.`,\n dimensions: [\n {\n id: 'buyer_quality',\n description: 'Would the target buyer find this worth attention?',\n weight: 0.5,\n min: 0,\n max: 1,\n },\n {\n id: 'voice',\n description: 'Does it sound like a builder, not AI or marketing?',\n weight: 0.3,\n min: 0,\n max: 1,\n },\n {\n id: 'signal',\n description: 'Non-obvious detail, constraint, or claim?',\n weight: 0.2,\n min: 0,\n max: 1,\n },\n ],\n failureModes: [\n { id: 'ai-cadence', description: 'AI-rhythm openings and transitions' },\n { id: 'marketing-tone', description: 'Buzzwords, hype, corporate-PR voice' },\n { id: 'vague-claim', description: 'Technical claim without specifics' },\n { id: 'no-hook', description: 'Opening fails to earn attention' },\n { id: 'engagement-bait', description: 'Listicle/controversy/agree-pattern' },\n { id: 'off-icp', description: 'Voice attracts the wrong audience' },\n { id: 'stale-claim', description: 'Reuses an over-used positioning line' },\n ],\n wins: [\n { id: 'specific-component', description: 'Names a real file/component/number' },\n { id: 'earned-detail', description: 'Detail not in public docs' },\n { id: 'constraint-articulated', description: 'Names a real tradeoff' },\n { id: 'honest-failure', description: 'Describes a real failure honestly' },\n ],\n}\n\n// ── Registry ────────────────────────────────────────────────────────\n\nexport const BUILTIN_RUBRICS: Record<string, Rubric> = {\n 'anti-slop': ANTI_SLOP,\n}\n\n/** Get a built-in rubric by name, or undefined. */\nexport function getBuiltinRubric(name: string): Rubric | undefined {\n return BUILTIN_RUBRICS[name]\n}\n\n/** List built-in rubrics with their stable versions. */\nexport function listBuiltinRubrics() {\n return Object.values(BUILTIN_RUBRICS).map((r) => ({\n name: r.name,\n description: r.description,\n dimensions: r.dimensions.map((d) => ({\n id: d.id,\n description: d.description,\n weight: d.weight,\n })),\n failureModes: r.failureModes.map((f) => f.id),\n rubricVersion: hashRubric(r),\n }))\n}\n","/**\n * Pure handler functions — the \"business logic\" behind every wire-protocol\n * method. The HTTP server (`server.ts`) and the stdio RPC (`rpc.ts`) both\n * call these. Tests call these directly without spinning a server.\n *\n * Each handler:\n * - Takes a parsed request (already Zod-validated by the transport).\n * - Returns a result that matches the response schema.\n * - Throws `WireError` for caller-fixable errors (404, 400, 422).\n * - Lets unexpected errors bubble — the transport maps them to 500.\n */\nimport type { FeedbackTrajectoryStore } from '../feedback-trajectory'\nimport { callLlmJson } from '../llm-client'\nimport type { TraceEvent as InternalTraceEvent } from '../trace/schema'\nimport type { TraceStore } from '../trace/store'\nimport { getBuiltinRubric, listBuiltinRubrics } from './rubrics'\nimport {\n type FeedbackIngestResponse,\n hashRubric,\n type JudgeRequest,\n type JudgeResult,\n type ListRubricsResponse,\n type Rubric,\n type TracesIngestRequest,\n type TracesIngestResponse,\n type VersionResponse,\n WIRE_VERSION,\n type FeedbackTrajectory as WireFeedbackTrajectory,\n} from './schemas'\n\n/** Caller-fixable error. The transport renders this to 4xx + ErrorResponse. */\nexport class WireError extends Error {\n constructor(\n public readonly code: string,\n message: string,\n public readonly status: number = 400,\n public readonly details?: unknown,\n ) {\n super(message)\n this.name = 'WireError'\n }\n}\n\n// ── judge ───────────────────────────────────────────────────────────\n\n/** The JSON schema we ask the judging LLM to fill in. */\nfunction judgeOutputSchema(rubric: Rubric) {\n return {\n name: 'JudgeOutput',\n schema: {\n type: 'object',\n additionalProperties: false,\n properties: {\n dimensions: {\n type: 'object',\n additionalProperties: false,\n properties: Object.fromEntries(\n rubric.dimensions.map((d) => [\n d.id,\n { type: 'number', minimum: d.min, maximum: d.max },\n ]),\n ),\n required: rubric.dimensions.map((d) => d.id),\n },\n failureModes: {\n type: 'array',\n items: { type: 'string', enum: rubric.failureModes.map((f) => f.id) },\n },\n wins: {\n type: 'array',\n items: { type: 'string', enum: rubric.wins.map((w) => w.id) },\n },\n rationale: { type: 'string' },\n },\n required: ['dimensions', 'rationale'],\n } as Record<string, unknown>,\n }\n}\n\ninterface JudgeOutput {\n dimensions: Record<string, number>\n failureModes?: string[]\n wins?: string[]\n rationale: string\n}\n\nfunction validateJudgeOutput(value: unknown, rubric: Rubric): JudgeOutput {\n if (!value || typeof value !== 'object') {\n throw new WireError('judge_error', 'Judge returned malformed output.', 500, value)\n }\n const raw = value as Record<string, unknown>\n const rawDimensions = raw.dimensions\n if (!rawDimensions || typeof rawDimensions !== 'object' || Array.isArray(rawDimensions)) {\n throw new WireError('judge_error', 'Judge returned malformed dimensions.', 500, value)\n }\n\n const dimensions: Record<string, number> = {}\n const dimensionRecord = rawDimensions as Record<string, unknown>\n for (const dim of rubric.dimensions) {\n const score = dimensionRecord[dim.id]\n if (\n typeof score !== 'number' ||\n !Number.isFinite(score) ||\n score < dim.min ||\n score > dim.max\n ) {\n throw new WireError(\n 'judge_error',\n `Judge returned invalid score for dimension \"${dim.id}\".`,\n 500,\n value,\n )\n }\n dimensions[dim.id] = score\n }\n\n const allowedFailures = new Set(rubric.failureModes.map((mode) => mode.id))\n const allowedWins = new Set(rubric.wins.map((win) => win.id))\n const failureModes = validateIdArray(raw.failureModes, allowedFailures, 'failureModes', value)\n const wins = validateIdArray(raw.wins, allowedWins, 'wins', value)\n if (typeof raw.rationale !== 'string' || raw.rationale.trim().length === 0) {\n throw new WireError('judge_error', 'Judge returned missing rationale.', 500, value)\n }\n\n return { dimensions, failureModes, wins, rationale: raw.rationale }\n}\n\nfunction validateIdArray(\n raw: unknown,\n allowed: Set<string>,\n field: 'failureModes' | 'wins',\n original: unknown,\n): string[] {\n if (raw === undefined) return []\n if (!Array.isArray(raw)) {\n throw new WireError('judge_error', `Judge returned non-array ${field}.`, 500, original)\n }\n const out: string[] = []\n for (const item of raw) {\n if (typeof item !== 'string' || !allowed.has(item)) {\n throw new WireError(\n 'judge_error',\n `Judge returned unknown ${field} id \"${String(item)}\".`,\n 500,\n original,\n )\n }\n out.push(item)\n }\n return out\n}\n\nfunction compositeScore(dimensions: Record<string, number>, rubric: Rubric): number {\n let weighted = 0\n let totalWeight = 0\n for (const dim of rubric.dimensions) {\n const raw = dimensions[dim.id] ?? 0\n const range = dim.max - dim.min || 1\n const normalized = Math.max(0, Math.min(1, (raw - dim.min) / range))\n weighted += normalized * dim.weight\n totalWeight += dim.weight\n }\n return totalWeight > 0 ? weighted / totalWeight : 0\n}\n\nfunction buildJudgePrompt(content: string, context: unknown): string {\n const ctx = context && Object.keys(context as object).length ? JSON.stringify(context) : ''\n return [\n `CONTENT TO JUDGE:`,\n content,\n '',\n ctx ? `CONTEXT (metadata, analytics, etc.):` : '',\n ctx ? ctx : '',\n ]\n .filter(Boolean)\n .join('\\n')\n}\n\nconst DEFAULT_JUDGE_MODEL = 'claude-sonnet-4-6'\n\nexport async function handleJudge(req: JudgeRequest): Promise<JudgeResult> {\n // Resolve rubric\n let rubric: Rubric\n if (req.rubricName) {\n const found = getBuiltinRubric(req.rubricName)\n if (!found) {\n throw new WireError('rubric_not_found', `No built-in rubric named \"${req.rubricName}\".`, 404)\n }\n rubric = found\n } else if (req.rubric) {\n rubric = req.rubric\n } else {\n // refine() in the schema should already have caught this — defense in depth\n throw new WireError('validation_error', 'Provide either `rubricName` or `rubric`.', 422)\n }\n\n const startedAt = Date.now()\n const model = req.model ?? DEFAULT_JUDGE_MODEL\n\n const { value, result } = await callLlmJson<JudgeOutput>({\n model,\n messages: [\n { role: 'system', content: rubric.systemPrompt },\n { role: 'user', content: buildJudgePrompt(req.content, req.context) },\n ],\n jsonSchema: judgeOutputSchema(rubric),\n temperature: 0.0,\n timeoutMs: 60_000,\n })\n\n const output = validateJudgeOutput(value, rubric)\n\n const composite = compositeScore(output.dimensions, rubric)\n const durationMs = Date.now() - startedAt\n\n return {\n composite,\n dimensions: output.dimensions,\n failureModes: output.failureModes ?? [],\n wins: output.wins ?? [],\n rationale: output.rationale,\n rubricVersion: hashRubric(rubric),\n model: result.model,\n durationMs,\n }\n}\n\n// ── listRubrics ─────────────────────────────────────────────────────\n\nexport function handleListRubrics(): ListRubricsResponse {\n return { rubrics: listBuiltinRubrics() }\n}\n\n// ── version ─────────────────────────────────────────────────────────\n\nimport { readFileSync } from 'node:fs'\nimport { dirname, resolve } from 'node:path'\nimport { fileURLToPath } from 'node:url'\n\nlet CACHED_VERSION: string | undefined\n\nfunction readPackageVersion(): string {\n if (CACHED_VERSION) return CACHED_VERSION\n // Walk up from this file looking for the nearest package.json.\n // In dist/ this is dist/.., in src/wire/ this is ../../package.json.\n const here = dirname(fileURLToPath(import.meta.url))\n const candidates = [\n resolve(here, '..', '..', 'package.json'), // src/wire → repo root\n resolve(here, '..', 'package.json'), // dist → repo root\n ]\n for (const path of candidates) {\n try {\n const pkg = JSON.parse(readFileSync(path, 'utf-8')) as { version?: string }\n if (pkg.version) {\n CACHED_VERSION = pkg.version\n return pkg.version\n }\n } catch {\n // try next\n }\n }\n return '0.0.0-unknown'\n}\n\nexport function handleVersion(): VersionResponse {\n return {\n package: '@tangle-network/agent-eval',\n version: readPackageVersion(),\n wireVersion: WIRE_VERSION,\n apiSurface: ['judge', 'listRubrics', 'version', 'feedback.ingest', 'traces.ingest'],\n }\n}\n\n// ── Ingestion handlers (0.25.0) ─────────────────────────────────────\n\n/**\n * Pluggable stores the wire layer routes ingestion writes into. Both\n * are optional — when omitted, the corresponding endpoint returns 503.\n *\n * Production deployments wire a `FileSystemTraceStore` and\n * `FileSystemFeedbackTrajectoryStore` here. Tests substitute in-memory\n * stores.\n */\nexport interface IngestionStores {\n traceStore?: TraceStore\n feedbackStore?: FeedbackTrajectoryStore\n}\n\n/**\n * `POST /v1/traces/ingest` — accept a batch of `TraceEvent`s from the\n * production runtime. Best-effort: each event is appended independently;\n * one bad event does not poison the batch.\n *\n * Idempotency: the underlying store is append-only; consumers retrying\n * the same payload will get duplicate events. Consumers should\n * de-duplicate by `eventId` downstream — production traces frequently\n * land via at-least-once buses (Kafka, SQS) where dedup is unavoidable.\n */\nexport async function handleTracesIngest(\n req: TracesIngestRequest,\n stores: IngestionStores,\n): Promise<TracesIngestResponse> {\n if (!stores.traceStore) {\n throw new WireError(\n 'service_unavailable',\n 'No trace store configured on this server. Pass `traceStore` to `createApp`.',\n 503,\n )\n }\n const errors: Array<{ eventId: string; message: string }> = []\n let accepted = 0\n for (const event of req.events) {\n try {\n // The wire `TraceEvent` is structurally identical to the internal one.\n await stores.traceStore.appendEvent(event as InternalTraceEvent)\n accepted++\n } catch (err) {\n errors.push({\n eventId: event.eventId,\n message: err instanceof Error ? err.message : String(err),\n })\n }\n }\n return { accepted, rejected: errors.length, errors }\n}\n\n/**\n * `POST /v1/feedback` — accept a single `FeedbackTrajectory` from the\n * production runtime. Idempotent on `id`: re-posting the same trajectory\n * replaces the prior record.\n */\nexport async function handleFeedbackIngest(\n req: WireFeedbackTrajectory,\n stores: IngestionStores,\n): Promise<FeedbackIngestResponse> {\n if (!stores.feedbackStore) {\n throw new WireError(\n 'service_unavailable',\n 'No feedback store configured on this server. Pass `feedbackStore` to `createApp`.',\n 503,\n )\n }\n // The wire `FeedbackTrajectory` aligns 1:1 with the internal type;\n // cast through `unknown` since the wire schema is a Zod-inferred\n // structural type with optional fields the internal store consumes.\n await stores.feedbackStore.save(req as unknown as Parameters<FeedbackTrajectoryStore['save']>[0])\n return { id: req.id, persisted: true }\n}\n","/**\n * Build an OpenAPI spec from the wire schemas.\n *\n * The spec is the contract that other-language clients (Python, Rust,\n * Go, …) generate from. There is no hand-written client — clients are\n * derived artifacts of this file plus `schemas.ts`.\n *\n * Run `pnpm openapi` (defined in package.json) to write the spec to\n * `dist/openapi.json`. CI uses that file to regenerate the Python\n * client and gate the dual-publish workflow.\n */\nimport { OpenAPIRegistry, OpenApiGeneratorV31 } from '@asteasolutions/zod-to-openapi'\nimport type { OpenAPIObject } from 'openapi3-ts/oas31'\n\nimport {\n ErrorResponseSchema,\n FeedbackIngestResponseSchema,\n FeedbackTrajectorySchema,\n HealthResponseSchema,\n JudgeRequestSchema,\n JudgeResultSchema,\n ListRubricsResponseSchema,\n TracesIngestRequestSchema,\n TracesIngestResponseSchema,\n VersionResponseSchema,\n WIRE_VERSION,\n} from './schemas'\n\nexport function buildOpenApi(packageVersion: string): OpenAPIObject {\n const registry = new OpenAPIRegistry()\n\n // Components — each schema becomes a $ref-able component\n registry.register('JudgeRequest', JudgeRequestSchema)\n registry.register('JudgeResult', JudgeResultSchema)\n registry.register('ListRubricsResponse', ListRubricsResponseSchema)\n registry.register('VersionResponse', VersionResponseSchema)\n registry.register('HealthResponse', HealthResponseSchema)\n registry.register('ErrorResponse', ErrorResponseSchema)\n registry.register('TracesIngestRequest', TracesIngestRequestSchema)\n registry.register('TracesIngestResponse', TracesIngestResponseSchema)\n registry.register('FeedbackTrajectory', FeedbackTrajectorySchema)\n registry.register('FeedbackIngestResponse', FeedbackIngestResponseSchema)\n\n // Routes\n registry.registerPath({\n method: 'post',\n path: '/v1/judge',\n summary: 'Score a piece of content against a rubric',\n description:\n 'Runs the judging LLM with the named (or inline) rubric and returns dimension scores, detected failure modes, wins, and a composite score in 0..1.',\n request: {\n body: {\n content: {\n 'application/json': { schema: JudgeRequestSchema },\n },\n },\n },\n responses: {\n 200: {\n description: 'Successful judgement',\n content: { 'application/json': { schema: JudgeResultSchema } },\n },\n 400: {\n description: 'Validation error',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 404: {\n description: 'Rubric not found',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 500: {\n description: 'Judge error',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n },\n })\n\n registry.registerPath({\n method: 'get',\n path: '/v1/rubrics',\n summary: 'List built-in rubrics',\n description:\n 'Returns every rubric registered server-side, with their dimensions and stable rubricVersion hash.',\n responses: {\n 200: {\n description: 'Listing',\n content: { 'application/json': { schema: ListRubricsResponseSchema } },\n },\n },\n })\n\n registry.registerPath({\n method: 'get',\n path: '/v1/version',\n summary: 'Server and wire-protocol version',\n description: 'Match your client version to `version`; check `wireVersion` for compatibility.',\n responses: {\n 200: {\n description: 'Version info',\n content: { 'application/json': { schema: VersionResponseSchema } },\n },\n },\n })\n\n registry.registerPath({\n method: 'get',\n path: '/healthz',\n summary: 'Liveness check',\n responses: {\n 200: {\n description: 'OK',\n content: { 'application/json': { schema: HealthResponseSchema } },\n },\n },\n })\n\n registry.registerPath({\n method: 'post',\n path: '/v1/traces/ingest',\n summary: 'Ingest a batch of production TraceEvents',\n description:\n 'Append a batch of TraceEvents to the configured TraceStore. Accepts application/json ({events:[...]}) or application/x-ndjson (one event per line). Returns counts of accepted + rejected events.',\n request: {\n body: {\n content: {\n 'application/json': { schema: TracesIngestRequestSchema },\n 'application/x-ndjson': { schema: TracesIngestRequestSchema },\n },\n },\n },\n responses: {\n 200: {\n description: 'Ingestion summary',\n content: { 'application/json': { schema: TracesIngestResponseSchema } },\n },\n 400: {\n description: 'Validation error',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 401: {\n description: 'Unauthorized (when bearer auth is configured)',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 503: {\n description: 'No trace store configured',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n },\n })\n\n registry.registerPath({\n method: 'post',\n path: '/v1/feedback',\n summary: 'Ingest a FeedbackTrajectory from production',\n description:\n 'Persist a single FeedbackTrajectory. Idempotent on trajectory.id — re-posting replaces the prior record. Used by production runtimes to forward user 👍/👎/edits into the eval substrate.',\n request: {\n body: {\n content: {\n 'application/json': { schema: FeedbackTrajectorySchema },\n },\n },\n },\n responses: {\n 200: {\n description: 'Persisted',\n content: { 'application/json': { schema: FeedbackIngestResponseSchema } },\n },\n 400: {\n description: 'Validation error',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 401: {\n description: 'Unauthorized (when bearer auth is configured)',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 503: {\n description: 'No feedback store configured',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n },\n })\n\n const generator = new OpenApiGeneratorV31(registry.definitions)\n const doc = generator.generateDocument({\n openapi: '3.1.0',\n info: {\n title: '@tangle-network/agent-eval — wire protocol',\n version: packageVersion,\n description: `HTTP and stdio RPC interface to agent-eval. The TypeScript runtime is the source of truth; this spec is the contract that cross-language clients (Python, Rust, Go) generate from.\n\nWire-protocol version: ${WIRE_VERSION}. Bumps on breaking changes to request/response schemas.`,\n contact: { name: 'Tangle Network', url: 'https://github.com/tangle-network/agent-eval' },\n license: { name: 'MIT' },\n },\n servers: [{ url: 'http://localhost:5005', description: 'Local agent-eval serve' }],\n })\n const rubricRef = { $ref: '#/components/schemas/Rubric' } as const\n const commonJudgeFields = {\n content: { type: 'string', minLength: 1 },\n context: { type: 'object', additionalProperties: true },\n model: { type: 'string' },\n } as const\n doc.components ??= {}\n doc.components.schemas ??= {}\n doc.components.schemas.JudgeRequest = {\n oneOf: [\n {\n type: 'object',\n additionalProperties: false,\n required: ['rubricName', 'content'],\n properties: {\n rubricName: { type: 'string', minLength: 1 },\n ...commonJudgeFields,\n },\n },\n {\n type: 'object',\n additionalProperties: false,\n required: ['rubric', 'content'],\n properties: {\n rubric: rubricRef,\n ...commonJudgeFields,\n },\n },\n ],\n description: 'Judge request. Provide exactly one of rubricName or rubric.',\n }\n return doc\n}\n","/**\n * stdio RPC transport.\n *\n * For batch / cron use without a running server. The Python client falls\n * back to this when no server is reachable.\n *\n * Protocol (line-delimited JSON over stdin/stdout):\n * IN: one JSON object on stdin: {\"method\":\"judge\",\"params\":{...}}\n * OUT: one JSON object on stdout: {\"result\":{...}} or {\"error\":{...}}\n *\n * One request per process invocation. To pipeline many calls, the client\n * writes JSONL to stdin and reads JSONL from stdout — see batch mode below.\n */\nimport { handleJudge, handleListRubrics, handleVersion, WireError } from './handlers'\nimport { JudgeRequestSchema } from './schemas'\n\ninterface RpcRequest {\n method: 'judge' | 'listRubrics' | 'version'\n params?: unknown\n}\n\ninterface RpcSuccess {\n result: unknown\n}\n\ninterface RpcError {\n error: { code: string; message: string; details?: unknown }\n}\n\nexport async function dispatchRpc(req: RpcRequest): Promise<RpcSuccess | RpcError> {\n try {\n switch (req.method) {\n case 'judge': {\n const parsed = JudgeRequestSchema.safeParse(req.params)\n if (!parsed.success) {\n return {\n error: {\n code: 'validation_error',\n message: 'params did not match JudgeRequest schema.',\n details: parsed.error.issues,\n },\n }\n }\n return { result: await handleJudge(parsed.data) }\n }\n case 'listRubrics':\n return { result: handleListRubrics() }\n case 'version':\n return { result: handleVersion() }\n default:\n return {\n error: {\n code: 'unknown_method',\n message: `No such method: ${(req as { method: string }).method}`,\n },\n }\n }\n } catch (err) {\n if (err instanceof WireError) {\n return { error: { code: err.code, message: err.message, details: err.details } }\n }\n const message = err instanceof Error ? err.message : String(err)\n return { error: { code: 'internal_error', message } }\n }\n}\n\n// ── stdin/stdout driver ─────────────────────────────────────────────\n\nasync function readAll(stream: NodeJS.ReadableStream): Promise<string> {\n const chunks: Buffer[] = []\n for await (const chunk of stream) {\n chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string))\n }\n return Buffer.concat(chunks).toString('utf-8')\n}\n\n/** Read one JSON request from stdin, write one JSON response to stdout. */\nexport async function runRpcOnce(method?: string): Promise<number> {\n const raw = await readAll(process.stdin)\n let req: RpcRequest\n try {\n const body = JSON.parse(raw)\n req = method ? { method: method as RpcRequest['method'], params: body } : (body as RpcRequest)\n } catch (err) {\n process.stdout.write(\n `${JSON.stringify({\n error: {\n code: 'parse_error',\n message: `stdin was not valid JSON: ${err instanceof Error ? err.message : String(err)}`,\n },\n })}\\n`,\n )\n return 1\n }\n const out = await dispatchRpc(req)\n process.stdout.write(`${JSON.stringify(out)}\\n`)\n return 'error' in out ? 1 : 0\n}\n\n/** Read JSONL requests from stdin, write JSONL responses to stdout. */\nexport async function runRpcBatch(method?: string): Promise<number> {\n const raw = await readAll(process.stdin)\n const lines = raw.split('\\n').filter((l) => l.trim().length > 0)\n let exitCode = 0\n for (const line of lines) {\n let req: RpcRequest\n try {\n const body = JSON.parse(line)\n req = method ? { method: method as RpcRequest['method'], params: body } : (body as RpcRequest)\n } catch (err) {\n process.stdout.write(\n `${JSON.stringify({\n error: {\n code: 'parse_error',\n message: `line was not valid JSON: ${err instanceof Error ? err.message : String(err)}`,\n },\n })}\\n`,\n )\n exitCode = 1\n continue\n }\n const out = await dispatchRpc(req)\n process.stdout.write(`${JSON.stringify(out)}\\n`)\n if ('error' in out) exitCode = 1\n }\n return exitCode\n}\n","/**\n * HTTP transport for the wire protocol.\n *\n * Hono + @hono/node-server. Every endpoint:\n * 1. Validates the request against its Zod schema.\n * 2. Calls the matching handler in `handlers.ts`.\n * 3. Renders 4xx for `WireError` with structured body, 500 for unexpected.\n *\n * The server holds optional `IngestionStores` (passed to `createApp`)\n * to receive production traces and user feedback. With no stores wired,\n * the ingestion endpoints return 503 — read endpoints (`/v1/judge`,\n * `/v1/rubrics`, `/v1/version`) remain fully functional.\n *\n * Run via `agent-eval serve --port 5005`.\n */\nimport { type ServerType, serve } from '@hono/node-server'\nimport { Hono } from 'hono'\nimport { cors } from 'hono/cors'\n\nimport {\n handleFeedbackIngest,\n handleJudge,\n handleListRubrics,\n handleTracesIngest,\n handleVersion,\n type IngestionStores,\n WireError,\n} from './handlers'\nimport { buildOpenApi } from './openapi'\nimport { FeedbackTrajectorySchema, JudgeRequestSchema, TracesIngestRequestSchema } from './schemas'\n\nconst STARTED_AT = Date.now()\n\nexport interface CreateAppOptions {\n /** Stores wired to the ingestion endpoints. */\n stores?: IngestionStores\n /**\n * Bearer-token auth. When provided, every endpoint EXCEPT `/healthz`\n * and `/v1/version` requires `Authorization: Bearer <token>`. The\n * token may be a static string OR a function for time-bounded /\n * rotating tokens.\n *\n * Recommended for any server that accepts ingestion writes from the\n * public internet. Read-only deployments may omit it.\n */\n auth?: {\n bearer: string | ((token: string) => boolean | Promise<boolean>)\n }\n}\n\nconst AUTH_EXEMPT_PATHS = new Set(['/healthz', '/v1/version', '/openapi.json'])\n\nexport function createApp(opts: CreateAppOptions = {}) {\n const app = new Hono()\n\n app.use('*', cors())\n\n // Bearer-token middleware (only attached when configured).\n if (opts.auth) {\n const verify = opts.auth.bearer\n app.use('*', async (c, next) => {\n const path = new URL(c.req.url).pathname\n if (AUTH_EXEMPT_PATHS.has(path)) return next()\n const raw = c.req.header('authorization') ?? ''\n const match = raw.match(/^Bearer\\s+(.+)$/i)\n if (!match) {\n throw new WireError('unauthorized', 'Missing or malformed Authorization header.', 401)\n }\n const token = match[1] as string\n const ok = typeof verify === 'string' ? token === verify : await verify(token)\n if (!ok) {\n throw new WireError('unauthorized', 'Invalid bearer token.', 401)\n }\n return next()\n })\n }\n\n app.onError((err, c) => {\n if (err instanceof WireError) {\n const status = err.status as 400 | 401 | 404 | 422 | 500 | 503\n return c.json(\n { error: { code: err.code, message: err.message, details: err.details } },\n status,\n )\n }\n // Unexpected — log and return generic 500 without leaking internals.\n console.error('[agent-eval] unhandled error:', err)\n return c.json({ error: { code: 'internal_error', message: 'Internal server error.' } }, 500)\n })\n\n // ── Health ──\n app.get('/healthz', (c) =>\n c.json({ status: 'ok' as const, uptimeSec: (Date.now() - STARTED_AT) / 1000 }),\n )\n\n // ── Version ──\n app.get('/v1/version', (c) => c.json(handleVersion()))\n\n // ── Rubrics ──\n app.get('/v1/rubrics', (c) => c.json(handleListRubrics()))\n\n // ── Judge ──\n app.post('/v1/judge', async (c) => {\n const raw = await c.req.json().catch(() => null)\n if (raw == null) {\n throw new WireError('validation_error', 'Request body must be JSON.', 400)\n }\n const parsed = JudgeRequestSchema.safeParse(raw)\n if (!parsed.success) {\n throw new WireError(\n 'validation_error',\n 'Request did not match JudgeRequest schema.',\n 400,\n parsed.error.issues,\n )\n }\n const result = await handleJudge(parsed.data)\n return c.json(result)\n })\n\n // ── Traces ingest (NDJSON-friendly: accepts either {events:[...]} or NDJSON) ──\n app.post('/v1/traces/ingest', async (c) => {\n const contentType = c.req.header('content-type') ?? ''\n let payload: unknown\n if (contentType.includes('application/x-ndjson')) {\n const text = await c.req.text()\n const events = text\n .split('\\n')\n .map((line) => line.trim())\n .filter((line) => line.length > 0)\n .map((line) => {\n try {\n return JSON.parse(line)\n } catch {\n throw new WireError(\n 'validation_error',\n 'NDJSON line did not parse as JSON.',\n 400,\n line.slice(0, 200),\n )\n }\n })\n payload = { events }\n } else {\n payload = await c.req.json().catch(() => null)\n }\n if (payload == null) {\n throw new WireError('validation_error', 'Request body must be JSON or NDJSON.', 400)\n }\n const parsed = TracesIngestRequestSchema.safeParse(payload)\n if (!parsed.success) {\n throw new WireError(\n 'validation_error',\n 'Request did not match TracesIngestRequest schema.',\n 400,\n parsed.error.issues,\n )\n }\n const result = await handleTracesIngest(parsed.data, opts.stores ?? {})\n return c.json(result)\n })\n\n // ── Feedback ingest ──\n app.post('/v1/feedback', async (c) => {\n const raw = await c.req.json().catch(() => null)\n if (raw == null) {\n throw new WireError('validation_error', 'Request body must be JSON.', 400)\n }\n const parsed = FeedbackTrajectorySchema.safeParse(raw)\n if (!parsed.success) {\n throw new WireError(\n 'validation_error',\n 'Request did not match FeedbackTrajectory schema.',\n 400,\n parsed.error.issues,\n )\n }\n const result = await handleFeedbackIngest(parsed.data, opts.stores ?? {})\n return c.json(result)\n })\n\n // ── OpenAPI spec ──\n app.get('/openapi.json', (c) => c.json(buildOpenApi(handleVersion().version)))\n\n return app\n}\n\nexport interface ServeOptions extends CreateAppOptions {\n /** Default 5005. */\n port?: number\n /** Default '127.0.0.1'. Set to '0.0.0.0' to listen on all interfaces. */\n host?: string\n}\n\nexport function startServer(opts: ServeOptions = {}): ServerType {\n const app = createApp(opts)\n const port = opts.port ?? 5005\n const host = opts.host ?? '127.0.0.1'\n return serve({ fetch: app.fetch, port, hostname: host }, ({ address, port: actualPort }) => {\n // eslint-disable-next-line no-console\n console.log(`[agent-eval] serving on http://${address}:${actualPort}`)\n })\n}\n"],"mappings":";;;;;AAYA,SAAS,4BAA4B;AACrC,SAAS,SAAS;AAElB,qBAAqB,CAAC;AAIf,IAAM,wBAAwB,EAClC,OAAO;AAAA,EACN,IAAI,EACD,OAAO,EACP,IAAI,CAAC,EACL,SAAS,gFAA2E;AAAA,EACvF,aAAa,EACV,OAAO,EACP,IAAI,CAAC,EACL,SAAS,sEAAsE;AAAA,EAClF,QAAQ,EACL,OAAO,EACP,IAAI,CAAC,EACL,QAAQ,CAAC,EACT,SAAS,gEAAgE;AAAA,EAC5E,KAAK,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,SAAS,gDAAgD;AAAA,EACpF,KAAK,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,SAAS,gDAAgD;AACtF,CAAC,EACA,QAAQ,iBAAiB;AAErB,IAAM,oBAAoB,EAC9B,OAAO;AAAA,EACN,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,mEAA8D;AAAA,EAC7F,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,mDAAmD;AAC7F,CAAC,EACA,QAAQ,aAAa;AAIjB,IAAM,eAAe,EACzB,OAAO;AAAA,EACN,MAAM,EACH,OAAO,EACP,IAAI,CAAC,EACL,SAAS,4EAAuE;AAAA,EACnF,aAAa,EACV,OAAO,EACP,IAAI,CAAC,EACL,SAAS,0DAA0D;AAAA,EACtE,cAAc,EACX,OAAO,EACP,IAAI,CAAC,EACL;AAAA,IACC;AAAA,EACF;AAAA,EACF,YAAY,EACT,MAAM,qBAAqB,EAC3B,IAAI,CAAC,EACL,SAAS,+DAA+D;AAAA,EAC3E,cAAc,EACX,MAAM,iBAAiB,EACvB,QAAQ,CAAC,CAAC,EACV,SAAS,iFAAiF;AAAA,EAC7F,MAAM,EACH,MAAM,iBAAiB,EACvB,QAAQ,CAAC,CAAC,EACV,SAAS,uEAAuE;AACrF,CAAC,EACA,QAAQ,QAAQ;AAIZ,IAAM,qBAAqB,EAC/B,OAAO;AAAA,EACN,YAAY,EACT,OAAO,EACP,SAAS,EACT,SAAS,kEAAkE;AAAA,EAC9E,QAAQ,aAAa,SAAS,EAAE;AAAA,IAC9B;AAAA,EACF;AAAA,EACA,SAAS,EACN,OAAO,EACP,IAAI,CAAC,EACL,SAAS,uFAAkF;AAAA,EAC9F,SAAS,EACN,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAC9B,SAAS,EACT;AAAA,IACC;AAAA,EACF;AAAA,EACF,OAAO,EACJ,OAAO,EACP,SAAS,EACT,SAAS,+EAA+E;AAC7F,CAAC,EACA,OAAO,CAAC,MAAM,QAAQ,EAAE,UAAU,MAAM,QAAQ,EAAE,MAAM,GAAG;AAAA,EAC1D,SAAS;AACX,CAAC,EACA,QAAQ,cAAc;AAElB,IAAM,oBAAoB,EAC9B,OAAO;AAAA,EACN,WAAW,EACR,OAAO,EACP,IAAI,CAAC,EACL,IAAI,CAAC,EACL,SAAS,iFAAiF;AAAA,EAC7F,YAAY,EACT,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC,EAC7B,SAAS,mDAAmD;AAAA,EAC/D,cAAc,EACX,MAAM,EAAE,OAAO,CAAC,EAChB,QAAQ,CAAC,CAAC,EACV,SAAS,+EAA+E;AAAA,EAC3F,MAAM,EACH,MAAM,EAAE,OAAO,CAAC,EAChB,QAAQ,CAAC,CAAC,EACV,SAAS,8DAA8D;AAAA,EAC1E,WAAW,EACR,OAAO,EACP,SAAS,yEAAyE;AAAA,EACrF,eAAe,EACZ,OAAO,EACP;AAAA,IACC;AAAA,EACF;AAAA,EACF,OAAO,EAAE,OAAO,EAAE,SAAS,yDAAyD;AAAA,EACpF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,qCAAqC;AAC3F,CAAC,EACA,QAAQ,aAAa;AAIjB,IAAM,mBAAmB,EAC7B,OAAO;AAAA,EACN,MAAM,EAAE,OAAO,EAAE,SAAS,yCAAyC;AAAA,EACnE,aAAa,EAAE,OAAO,EAAE,SAAS,4BAA4B;AAAA,EAC7D,YAAY,EACT,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,GAAG,aAAa,EAAE,OAAO,GAAG,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC,EAC/E,SAAS,kDAAkD;AAAA,EAC9D,cAAc,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,QAAQ,CAAC,CAAC,EAAE,SAAS,uCAAuC;AAAA,EAC9F,eAAe,EAAE,OAAO,EAAE,SAAS,8DAAyD;AAC9F,CAAC,EACA,QAAQ,YAAY;AAEhB,IAAM,4BAA4B,EACtC,OAAO;AAAA,EACN,SAAS,EAAE,MAAM,gBAAgB;AACnC,CAAC,EACA,QAAQ,qBAAqB;AAIzB,IAAM,wBAAwB,EAClC,OAAO;AAAA,EACN,SAAS,EAAE,OAAO,EAAE,SAAS,qDAAqD;AAAA,EAClF,SAAS,EAAE,OAAO,EAAE,SAAS,0DAA0D;AAAA,EACvF,aAAa,EACV,OAAO,EACP;AAAA,IACC;AAAA,EACF;AAAA,EACF,YAAY,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,SAAS,iCAAiC;AAC5E,CAAC,EACA,QAAQ,iBAAiB;AAErB,IAAM,uBAAuB,EACjC,OAAO;AAAA,EACN,QAAQ,EAAE,QAAQ,IAAI;AAAA,EACtB,WAAW,EAAE,OAAO;AACtB,CAAC,EACA,QAAQ,gBAAgB;AAUpB,IAAM,mBAAmB,EAC7B,OAAO;AAAA,EACN,SAAS,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,4CAA4C;AAAA,EAChF,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,4BAA4B;AAAA,EAC9D,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,sCAAsC;AAAA,EAC7E,MAAM,EACH,KAAK;AAAA,IACJ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC,EACA,SAAS,yEAAoE;AAAA,EAChF,WAAW,EACR,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,kEAAkE;AAAA,EAC9E,SAAS,EACN,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAC9B,SAAS,sDAAiD;AAC/D,CAAC,EACA,QAAQ,YAAY;AAEhB,IAAM,4BAA4B,EACtC,OAAO;AAAA,EACN,QAAQ,EACL,MAAM,gBAAgB,EACtB,IAAI,CAAC,EACL,IAAI,GAAM,EACV,SAAS,4EAAuE;AACrF,CAAC,EACA,QAAQ,qBAAqB;AAEzB,IAAM,6BAA6B,EACvC,OAAO;AAAA,EACN,UAAU,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,6BAA6B;AAAA,EAC/E,UAAU,EACP,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,uEAAkE;AAAA,EAC9E,QAAQ,EACL;AAAA,IACC,EAAE,OAAO;AAAA,MACP,SAAS,EAAE,OAAO,EAAE,SAAS,iCAAiC;AAAA,MAC9D,SAAS,EAAE,OAAO,EAAE,SAAS,6BAA6B;AAAA,IAC5D,CAAC;AAAA,EACH,EACC,QAAQ,CAAC,CAAC;AACf,CAAC,EACA,QAAQ,sBAAsB;AAE1B,IAAM,sBAAsB,EAChC,OAAO;AAAA,EACN,IAAI,EAAE,OAAO,EAAE,SAAS;AAAA,EACxB,QAAQ,EAAE,KAAK,CAAC,QAAQ,SAAS,eAAe,UAAU,UAAU,QAAQ,CAAC;AAAA,EAC7E,MAAM,EAAE,KAAK;AAAA,IACX;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AAAA,EACD,OAAO,EAAE,QAAQ;AAAA,EACjB,QAAQ,EAAE,OAAO,EAAE,SAAS;AAAA,EAC5B,UAAU,EAAE,KAAK,CAAC,QAAQ,WAAW,SAAS,UAAU,CAAC,EAAE,SAAS;AAAA,EACpE,WAAW,EAAE,OAAO,EAAE,SAAS,eAAe;AAAA,EAC9C,UAAU,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAAE,SAAS;AACvD,CAAC,EACA,QAAQ,eAAe;AAEnB,IAAM,wBAAwB,EAClC,OAAO;AAAA,EACN,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACpB,WAAW,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACxC,cAAc,EAAE,KAAK;AAAA,IACnB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AAAA,EACD,UAAU,EAAE,QAAQ;AAAA,EACpB,SAAS,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,SAAS;AAAA,EACvC,gBAAgB,EACb,OAAO;AAAA,IACN,MAAM,EAAE,OAAO;AAAA,IACf,MAAM,EAAE,KAAK,CAAC,OAAO,UAAU,MAAM,CAAC,EAAE,SAAS;AAAA,IACjD,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,IAC7B,oBAAoB,EAAE,QAAQ,EAAE,SAAS;AAAA,IACzC,kBAAkB,EAAE,QAAQ,EAAE,SAAS;AAAA,IACvC,UAAU,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAAE,SAAS;AAAA,EACvD,CAAC,EACA,SAAS;AAAA,EACZ,UAAU,EAAE,MAAM,mBAAmB,EAAE,SAAS;AAAA,EAChD,WAAW,EAAE,OAAO;AAAA,EACpB,UAAU,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAAE,SAAS;AACvD,CAAC,EACA,QAAQ,iBAAiB;AAErB,IAAM,2BAA2B,EACrC,OAAO;AAAA,EACN,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,gDAAgD;AAAA,EAC/E,WAAW,EAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,YAAY,EAAE,OAAO,EAAE,SAAS;AAAA,EAChC,MAAM,EAAE,OAAO;AAAA,IACb,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,IACxB,SAAS,EAAE,QAAQ,EAAE,SAAS;AAAA,EAChC,CAAC;AAAA,EACD,UAAU,EAAE,MAAM,qBAAqB,EAAE,QAAQ,CAAC,CAAC;AAAA,EACnD,QAAQ,EAAE,MAAM,mBAAmB,EAAE,QAAQ,CAAC,CAAC;AAAA,EAC/C,SAAS,EACN,OAAO;AAAA,IACN,SAAS,EAAE,QAAQ,EAAE,SAAS;AAAA,IAC9B,OAAO,EAAE,OAAO,EAAE,SAAS;AAAA,IAC3B,SAAS,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC,EAAE,SAAS;AAAA,IACnD,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,IAC7B,QAAQ,EAAE,OAAO,EAAE,SAAS;AAAA,IAC5B,YAAY,EAAE,OAAO,EAAE,SAAS;AAAA,IAChC,UAAU,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAAE,SAAS;AAAA,EACvD,CAAC,EACA,SAAS;AAAA,EACZ,OAAO,EAAE,KAAK,CAAC,SAAS,OAAO,QAAQ,SAAS,CAAC,EAAE,SAAS;AAAA,EAC5D,MAAM,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC,EAAE,SAAS;AAAA,EAChD,WAAW,EAAE,OAAO,EAAE,SAAS,eAAe;AAAA,EAC9C,WAAW,EAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,UAAU,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAAE,SAAS;AACvD,CAAC,EACA,QAAQ,oBAAoB;AAExB,IAAM,+BAA+B,EACzC,OAAO;AAAA,EACN,IAAI,EAAE,OAAO,EAAE,SAAS,mCAAmC;AAAA,EAC3D,WAAW,EAAE,QAAQ,EAAE,SAAS,wDAAwD;AAC1F,CAAC,EACA,QAAQ,wBAAwB;AAU5B,IAAM,sBAAsB,EAChC,OAAO;AAAA,EACN,OAAO,EACJ,OAAO;AAAA,IACN,MAAM,EACH,OAAO,EACP;AAAA,MACC;AAAA,IACF;AAAA,IACF,SAAS,EAAE,OAAO,EAAE,SAAS,yBAAyB;AAAA,IACtD,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS,6BAA6B;AAAA,EACxE,CAAC,EACA,SAAS,+DAA+D;AAC7E,CAAC,EACA,QAAQ,eAAe;AAoBnB,IAAM,eAAe;AAMrB,SAAS,WAAW,QAAwB;AACjD,QAAM,SAAS,gBAAgB,MAAM;AACrC,MAAI,IAAI;AACR,WAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACtC,QAAK,IAAI,KAAM,OAAO,WAAW,CAAC;AAAA,EACpC;AAEA,SAAO,GAAG,OAAO,IAAI,KAAK,MAAM,GAAG,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC;AAClE;AAEA,SAAS,gBAAgB,OAAwB;AAC/C,MAAI,MAAM,QAAQ,KAAK,EAAG,QAAO,IAAI,MAAM,IAAI,CAAC,SAAS,gBAAgB,IAAI,CAAC,EAAE,KAAK,GAAG,CAAC;AACzF,MAAI,SAAS,OAAO,UAAU,UAAU;AACtC,UAAM,UAAU,OAAO,QAAQ,KAAgC,EAC5D,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC,EACrC,IAAI,CAAC,CAAC,KAAK,IAAI,MAAM,GAAG,KAAK,UAAU,GAAG,CAAC,IAAI,gBAAgB,IAAI,CAAC,EAAE;AACzE,WAAO,IAAI,QAAQ,KAAK,GAAG,CAAC;AAAA,EAC9B;AACA,SAAO,KAAK,UAAU,KAAK;AAC7B;;;ACjYA,IAAM,YAAoB;AAAA,EACxB,MAAM;AAAA,EACN,aACE;AAAA,EACF,cAAc;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAuBd,YAAY;AAAA,IACV;AAAA,MACE,IAAI;AAAA,MACJ,aAAa;AAAA,MACb,QAAQ;AAAA,MACR,KAAK;AAAA,MACL,KAAK;AAAA,IACP;AAAA,IACA;AAAA,MACE,IAAI;AAAA,MACJ,aAAa;AAAA,MACb,QAAQ;AAAA,MACR,KAAK;AAAA,MACL,KAAK;AAAA,IACP;AAAA,IACA;AAAA,MACE,IAAI;AAAA,MACJ,aAAa;AAAA,MACb,QAAQ;AAAA,MACR,KAAK;AAAA,MACL,KAAK;AAAA,IACP;AAAA,EACF;AAAA,EACA,cAAc;AAAA,IACZ,EAAE,IAAI,cAAc,aAAa,qCAAqC;AAAA,IACtE,EAAE,IAAI,kBAAkB,aAAa,sCAAsC;AAAA,IAC3E,EAAE,IAAI,eAAe,aAAa,oCAAoC;AAAA,IACtE,EAAE,IAAI,WAAW,aAAa,kCAAkC;AAAA,IAChE,EAAE,IAAI,mBAAmB,aAAa,qCAAqC;AAAA,IAC3E,EAAE,IAAI,WAAW,aAAa,oCAAoC;AAAA,IAClE,EAAE,IAAI,eAAe,aAAa,uCAAuC;AAAA,EAC3E;AAAA,EACA,MAAM;AAAA,IACJ,EAAE,IAAI,sBAAsB,aAAa,qCAAqC;AAAA,IAC9E,EAAE,IAAI,iBAAiB,aAAa,4BAA4B;AAAA,IAChE,EAAE,IAAI,0BAA0B,aAAa,wBAAwB;AAAA,IACrE,EAAE,IAAI,kBAAkB,aAAa,oCAAoC;AAAA,EAC3E;AACF;AAIO,IAAM,kBAA0C;AAAA,EACrD,aAAa;AACf;AAGO,SAAS,iBAAiB,MAAkC;AACjE,SAAO,gBAAgB,IAAI;AAC7B;AAGO,SAAS,qBAAqB;AACnC,SAAO,OAAO,OAAO,eAAe,EAAE,IAAI,CAAC,OAAO;AAAA,IAChD,MAAM,EAAE;AAAA,IACR,aAAa,EAAE;AAAA,IACf,YAAY,EAAE,WAAW,IAAI,CAAC,OAAO;AAAA,MACnC,IAAI,EAAE;AAAA,MACN,aAAa,EAAE;AAAA,MACf,QAAQ,EAAE;AAAA,IACZ,EAAE;AAAA,IACF,cAAc,EAAE,aAAa,IAAI,CAAC,MAAM,EAAE,EAAE;AAAA,IAC5C,eAAe,WAAW,CAAC;AAAA,EAC7B,EAAE;AACJ;;;ACwHA,SAAS,oBAAoB;AAC7B,SAAS,SAAS,eAAe;AACjC,SAAS,qBAAqB;AA9MvB,IAAM,YAAN,cAAwB,MAAM;AAAA,EACnC,YACkB,MAChB,SACgB,SAAiB,KACjB,SAChB;AACA,UAAM,OAAO;AALG;AAEA;AACA;AAGhB,SAAK,OAAO;AAAA,EACd;AAAA,EAPkB;AAAA,EAEA;AAAA,EACA;AAKpB;AAKA,SAAS,kBAAkB,QAAgB;AACzC,SAAO;AAAA,IACL,MAAM;AAAA,IACN,QAAQ;AAAA,MACN,MAAM;AAAA,MACN,sBAAsB;AAAA,MACtB,YAAY;AAAA,QACV,YAAY;AAAA,UACV,MAAM;AAAA,UACN,sBAAsB;AAAA,UACtB,YAAY,OAAO;AAAA,YACjB,OAAO,WAAW,IAAI,CAAC,MAAM;AAAA,cAC3B,EAAE;AAAA,cACF,EAAE,MAAM,UAAU,SAAS,EAAE,KAAK,SAAS,EAAE,IAAI;AAAA,YACnD,CAAC;AAAA,UACH;AAAA,UACA,UAAU,OAAO,WAAW,IAAI,CAAC,MAAM,EAAE,EAAE;AAAA,QAC7C;AAAA,QACA,cAAc;AAAA,UACZ,MAAM;AAAA,UACN,OAAO,EAAE,MAAM,UAAU,MAAM,OAAO,aAAa,IAAI,CAAC,MAAM,EAAE,EAAE,EAAE;AAAA,QACtE;AAAA,QACA,MAAM;AAAA,UACJ,MAAM;AAAA,UACN,OAAO,EAAE,MAAM,UAAU,MAAM,OAAO,KAAK,IAAI,CAAC,MAAM,EAAE,EAAE,EAAE;AAAA,QAC9D;AAAA,QACA,WAAW,EAAE,MAAM,SAAS;AAAA,MAC9B;AAAA,MACA,UAAU,CAAC,cAAc,WAAW;AAAA,IACtC;AAAA,EACF;AACF;AASA,SAAS,oBAAoB,OAAgB,QAA6B;AACxE,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACvC,UAAM,IAAI,UAAU,eAAe,oCAAoC,KAAK,KAAK;AAAA,EACnF;AACA,QAAM,MAAM;AACZ,QAAM,gBAAgB,IAAI;AAC1B,MAAI,CAAC,iBAAiB,OAAO,kBAAkB,YAAY,MAAM,QAAQ,aAAa,GAAG;AACvF,UAAM,IAAI,UAAU,eAAe,wCAAwC,KAAK,KAAK;AAAA,EACvF;AAEA,QAAM,aAAqC,CAAC;AAC5C,QAAM,kBAAkB;AACxB,aAAW,OAAO,OAAO,YAAY;AACnC,UAAM,QAAQ,gBAAgB,IAAI,EAAE;AACpC,QACE,OAAO,UAAU,YACjB,CAAC,OAAO,SAAS,KAAK,KACtB,QAAQ,IAAI,OACZ,QAAQ,IAAI,KACZ;AACA,YAAM,IAAI;AAAA,QACR;AAAA,QACA,+CAA+C,IAAI,EAAE;AAAA,QACrD;AAAA,QACA;AAAA,MACF;AAAA,IACF;AACA,eAAW,IAAI,EAAE,IAAI;AAAA,EACvB;AAEA,QAAM,kBAAkB,IAAI,IAAI,OAAO,aAAa,IAAI,CAAC,SAAS,KAAK,EAAE,CAAC;AAC1E,QAAM,cAAc,IAAI,IAAI,OAAO,KAAK,IAAI,CAAC,QAAQ,IAAI,EAAE,CAAC;AAC5D,QAAM,eAAe,gBAAgB,IAAI,cAAc,iBAAiB,gBAAgB,KAAK;AAC7F,QAAM,OAAO,gBAAgB,IAAI,MAAM,aAAa,QAAQ,KAAK;AACjE,MAAI,OAAO,IAAI,cAAc,YAAY,IAAI,UAAU,KAAK,EAAE,WAAW,GAAG;AAC1E,UAAM,IAAI,UAAU,eAAe,qCAAqC,KAAK,KAAK;AAAA,EACpF;AAEA,SAAO,EAAE,YAAY,cAAc,MAAM,WAAW,IAAI,UAAU;AACpE;AAEA,SAAS,gBACP,KACA,SACA,OACA,UACU;AACV,MAAI,QAAQ,OAAW,QAAO,CAAC;AAC/B,MAAI,CAAC,MAAM,QAAQ,GAAG,GAAG;AACvB,UAAM,IAAI,UAAU,eAAe,4BAA4B,KAAK,KAAK,KAAK,QAAQ;AAAA,EACxF;AACA,QAAM,MAAgB,CAAC;AACvB,aAAW,QAAQ,KAAK;AACtB,QAAI,OAAO,SAAS,YAAY,CAAC,QAAQ,IAAI,IAAI,GAAG;AAClD,YAAM,IAAI;AAAA,QACR;AAAA,QACA,0BAA0B,KAAK,QAAQ,OAAO,IAAI,CAAC;AAAA,QACnD;AAAA,QACA;AAAA,MACF;AAAA,IACF;AACA,QAAI,KAAK,IAAI;AAAA,EACf;AACA,SAAO;AACT;AAEA,SAAS,eAAe,YAAoC,QAAwB;AAClF,MAAI,WAAW;AACf,MAAI,cAAc;AAClB,aAAW,OAAO,OAAO,YAAY;AACnC,UAAM,MAAM,WAAW,IAAI,EAAE,KAAK;AAClC,UAAM,QAAQ,IAAI,MAAM,IAAI,OAAO;AACnC,UAAM,aAAa,KAAK,IAAI,GAAG,KAAK,IAAI,IAAI,MAAM,IAAI,OAAO,KAAK,CAAC;AACnE,gBAAY,aAAa,IAAI;AAC7B,mBAAe,IAAI;AAAA,EACrB;AACA,SAAO,cAAc,IAAI,WAAW,cAAc;AACpD;AAEA,SAAS,iBAAiB,SAAiB,SAA0B;AACnE,QAAM,MAAM,WAAW,OAAO,KAAK,OAAiB,EAAE,SAAS,KAAK,UAAU,OAAO,IAAI;AACzF,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA,MAAM,yCAAyC;AAAA,IAC/C,MAAM,MAAM;AAAA,EACd,EACG,OAAO,OAAO,EACd,KAAK,IAAI;AACd;AAEA,IAAM,sBAAsB;AAE5B,eAAsB,YAAY,KAAyC;AAEzE,MAAI;AACJ,MAAI,IAAI,YAAY;AAClB,UAAM,QAAQ,iBAAiB,IAAI,UAAU;AAC7C,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,UAAU,oBAAoB,6BAA6B,IAAI,UAAU,MAAM,GAAG;AAAA,IAC9F;AACA,aAAS;AAAA,EACX,WAAW,IAAI,QAAQ;AACrB,aAAS,IAAI;AAAA,EACf,OAAO;AAEL,UAAM,IAAI,UAAU,oBAAoB,4CAA4C,GAAG;AAAA,EACzF;AAEA,QAAM,YAAY,KAAK,IAAI;AAC3B,QAAM,QAAQ,IAAI,SAAS;AAE3B,QAAM,EAAE,OAAO,OAAO,IAAI,MAAM,YAAyB;AAAA,IACvD;AAAA,IACA,UAAU;AAAA,MACR,EAAE,MAAM,UAAU,SAAS,OAAO,aAAa;AAAA,MAC/C,EAAE,MAAM,QAAQ,SAAS,iBAAiB,IAAI,SAAS,IAAI,OAAO,EAAE;AAAA,IACtE;AAAA,IACA,YAAY,kBAAkB,MAAM;AAAA,IACpC,aAAa;AAAA,IACb,WAAW;AAAA,EACb,CAAC;AAED,QAAM,SAAS,oBAAoB,OAAO,MAAM;AAEhD,QAAM,YAAY,eAAe,OAAO,YAAY,MAAM;AAC1D,QAAM,aAAa,KAAK,IAAI,IAAI;AAEhC,SAAO;AAAA,IACL;AAAA,IACA,YAAY,OAAO;AAAA,IACnB,cAAc,OAAO,gBAAgB,CAAC;AAAA,IACtC,MAAM,OAAO,QAAQ,CAAC;AAAA,IACtB,WAAW,OAAO;AAAA,IAClB,eAAe,WAAW,MAAM;AAAA,IAChC,OAAO,OAAO;AAAA,IACd;AAAA,EACF;AACF;AAIO,SAAS,oBAAyC;AACvD,SAAO,EAAE,SAAS,mBAAmB,EAAE;AACzC;AAQA,IAAI;AAEJ,SAAS,qBAA6B;AACpC,MAAI,eAAgB,QAAO;AAG3B,QAAM,OAAO,QAAQ,cAAc,YAAY,GAAG,CAAC;AACnD,QAAM,aAAa;AAAA,IACjB,QAAQ,MAAM,MAAM,MAAM,cAAc;AAAA;AAAA,IACxC,QAAQ,MAAM,MAAM,cAAc;AAAA;AAAA,EACpC;AACA,aAAW,QAAQ,YAAY;AAC7B,QAAI;AACF,YAAM,MAAM,KAAK,MAAM,aAAa,MAAM,OAAO,CAAC;AAClD,UAAI,IAAI,SAAS;AACf,yBAAiB,IAAI;AACrB,eAAO,IAAI;AAAA,MACb;AAAA,IACF,QAAQ;AAAA,IAER;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,gBAAiC;AAC/C,SAAO;AAAA,IACL,SAAS;AAAA,IACT,SAAS,mBAAmB;AAAA,IAC5B,aAAa;AAAA,IACb,YAAY,CAAC,SAAS,eAAe,WAAW,mBAAmB,eAAe;AAAA,EACpF;AACF;AA2BA,eAAsB,mBACpB,KACA,QAC+B;AAC/B,MAAI,CAAC,OAAO,YAAY;AACtB,UAAM,IAAI;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACA,QAAM,SAAsD,CAAC;AAC7D,MAAI,WAAW;AACf,aAAW,SAAS,IAAI,QAAQ;AAC9B,QAAI;AAEF,YAAM,OAAO,WAAW,YAAY,KAA2B;AAC/D;AAAA,IACF,SAAS,KAAK;AACZ,aAAO,KAAK;AAAA,QACV,SAAS,MAAM;AAAA,QACf,SAAS,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,MAC1D,CAAC;AAAA,IACH;AAAA,EACF;AACA,SAAO,EAAE,UAAU,UAAU,OAAO,QAAQ,OAAO;AACrD;AAOA,eAAsB,qBACpB,KACA,QACiC;AACjC,MAAI,CAAC,OAAO,eAAe;AACzB,UAAM,IAAI;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAIA,QAAM,OAAO,cAAc,KAAK,GAAgE;AAChG,SAAO,EAAE,IAAI,IAAI,IAAI,WAAW,KAAK;AACvC;;;AChVA,SAAS,iBAAiB,2BAA2B;AAiB9C,SAAS,aAAa,gBAAuC;AAClE,QAAM,WAAW,IAAI,gBAAgB;AAGrC,WAAS,SAAS,gBAAgB,kBAAkB;AACpD,WAAS,SAAS,eAAe,iBAAiB;AAClD,WAAS,SAAS,uBAAuB,yBAAyB;AAClE,WAAS,SAAS,mBAAmB,qBAAqB;AAC1D,WAAS,SAAS,kBAAkB,oBAAoB;AACxD,WAAS,SAAS,iBAAiB,mBAAmB;AACtD,WAAS,SAAS,uBAAuB,yBAAyB;AAClE,WAAS,SAAS,wBAAwB,0BAA0B;AACpE,WAAS,SAAS,sBAAsB,wBAAwB;AAChE,WAAS,SAAS,0BAA0B,4BAA4B;AAGxE,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,aACE;AAAA,IACF,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,SAAS;AAAA,UACP,oBAAoB,EAAE,QAAQ,mBAAmB;AAAA,QACnD;AAAA,MACF;AAAA,IACF;AAAA,IACA,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,kBAAkB,EAAE;AAAA,MAC/D;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,IACF;AAAA,EACF,CAAC;AAED,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,aACE;AAAA,IACF,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,0BAA0B,EAAE;AAAA,MACvE;AAAA,IACF;AAAA,EACF,CAAC;AAED,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,aAAa;AAAA,IACb,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,sBAAsB,EAAE;AAAA,MACnE;AAAA,IACF;AAAA,EACF,CAAC;AAED,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,qBAAqB,EAAE;AAAA,MAClE;AAAA,IACF;AAAA,EACF,CAAC;AAED,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,aACE;AAAA,IACF,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,SAAS;AAAA,UACP,oBAAoB,EAAE,QAAQ,0BAA0B;AAAA,UACxD,wBAAwB,EAAE,QAAQ,0BAA0B;AAAA,QAC9D;AAAA,MACF;AAAA,IACF;AAAA,IACA,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,2BAA2B,EAAE;AAAA,MACxE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,IACF;AAAA,EACF,CAAC;AAED,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,aACE;AAAA,IACF,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,SAAS;AAAA,UACP,oBAAoB,EAAE,QAAQ,yBAAyB;AAAA,QACzD;AAAA,MACF;AAAA,IACF;AAAA,IACA,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,6BAA6B,EAAE;AAAA,MAC1E;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,IACF;AAAA,EACF,CAAC;AAED,QAAM,YAAY,IAAI,oBAAoB,SAAS,WAAW;AAC9D,QAAM,MAAM,UAAU,iBAAiB;AAAA,IACrC,SAAS;AAAA,IACT,MAAM;AAAA,MACJ,OAAO;AAAA,MACP,SAAS;AAAA,MACT,aAAa;AAAA;AAAA,yBAEM,YAAY;AAAA,MAC/B,SAAS,EAAE,MAAM,kBAAkB,KAAK,+CAA+C;AAAA,MACvF,SAAS,EAAE,MAAM,MAAM;AAAA,IACzB;AAAA,IACA,SAAS,CAAC,EAAE,KAAK,yBAAyB,aAAa,yBAAyB,CAAC;AAAA,EACnF,CAAC;AACD,QAAM,YAAY,EAAE,MAAM,8BAA8B;AACxD,QAAM,oBAAoB;AAAA,IACxB,SAAS,EAAE,MAAM,UAAU,WAAW,EAAE;AAAA,IACxC,SAAS,EAAE,MAAM,UAAU,sBAAsB,KAAK;AAAA,IACtD,OAAO,EAAE,MAAM,SAAS;AAAA,EAC1B;AACA,MAAI,eAAe,CAAC;AACpB,MAAI,WAAW,YAAY,CAAC;AAC5B,MAAI,WAAW,QAAQ,eAAe;AAAA,IACpC,OAAO;AAAA,MACL;AAAA,QACE,MAAM;AAAA,QACN,sBAAsB;AAAA,QACtB,UAAU,CAAC,cAAc,SAAS;AAAA,QAClC,YAAY;AAAA,UACV,YAAY,EAAE,MAAM,UAAU,WAAW,EAAE;AAAA,UAC3C,GAAG;AAAA,QACL;AAAA,MACF;AAAA,MACA;AAAA,QACE,MAAM;AAAA,QACN,sBAAsB;AAAA,QACtB,UAAU,CAAC,UAAU,SAAS;AAAA,QAC9B,YAAY;AAAA,UACV,QAAQ;AAAA,UACR,GAAG;AAAA,QACL;AAAA,MACF;AAAA,IACF;AAAA,IACA,aAAa;AAAA,EACf;AACA,SAAO;AACT;;;ACxMA,eAAsB,YAAY,KAAiD;AACjF,MAAI;AACF,YAAQ,IAAI,QAAQ;AAAA,MAClB,KAAK,SAAS;AACZ,cAAM,SAAS,mBAAmB,UAAU,IAAI,MAAM;AACtD,YAAI,CAAC,OAAO,SAAS;AACnB,iBAAO;AAAA,YACL,OAAO;AAAA,cACL,MAAM;AAAA,cACN,SAAS;AAAA,cACT,SAAS,OAAO,MAAM;AAAA,YACxB;AAAA,UACF;AAAA,QACF;AACA,eAAO,EAAE,QAAQ,MAAM,YAAY,OAAO,IAAI,EAAE;AAAA,MAClD;AAAA,MACA,KAAK;AACH,eAAO,EAAE,QAAQ,kBAAkB,EAAE;AAAA,MACvC,KAAK;AACH,eAAO,EAAE,QAAQ,cAAc,EAAE;AAAA,MACnC;AACE,eAAO;AAAA,UACL,OAAO;AAAA,YACL,MAAM;AAAA,YACN,SAAS,mBAAoB,IAA2B,MAAM;AAAA,UAChE;AAAA,QACF;AAAA,IACJ;AAAA,EACF,SAAS,KAAK;AACZ,QAAI,eAAe,WAAW;AAC5B,aAAO,EAAE,OAAO,EAAE,MAAM,IAAI,MAAM,SAAS,IAAI,SAAS,SAAS,IAAI,QAAQ,EAAE;AAAA,IACjF;AACA,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,EAAE,OAAO,EAAE,MAAM,kBAAkB,QAAQ,EAAE;AAAA,EACtD;AACF;AAIA,eAAe,QAAQ,QAAgD;AACrE,QAAM,SAAmB,CAAC;AAC1B,mBAAiB,SAAS,QAAQ;AAChC,WAAO,KAAK,OAAO,SAAS,KAAK,IAAI,QAAQ,OAAO,KAAK,KAAe,CAAC;AAAA,EAC3E;AACA,SAAO,OAAO,OAAO,MAAM,EAAE,SAAS,OAAO;AAC/C;AAGA,eAAsB,WAAW,QAAkC;AACjE,QAAM,MAAM,MAAM,QAAQ,QAAQ,KAAK;AACvC,MAAI;AACJ,MAAI;AACF,UAAM,OAAO,KAAK,MAAM,GAAG;AAC3B,UAAM,SAAS,EAAE,QAAwC,QAAQ,KAAK,IAAK;AAAA,EAC7E,SAAS,KAAK;AACZ,YAAQ,OAAO;AAAA,MACb,GAAG,KAAK,UAAU;AAAA,QAChB,OAAO;AAAA,UACL,MAAM;AAAA,UACN,SAAS,6BAA6B,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,QACxF;AAAA,MACF,CAAC,CAAC;AAAA;AAAA,IACJ;AACA,WAAO;AAAA,EACT;AACA,QAAM,MAAM,MAAM,YAAY,GAAG;AACjC,UAAQ,OAAO,MAAM,GAAG,KAAK,UAAU,GAAG,CAAC;AAAA,CAAI;AAC/C,SAAO,WAAW,MAAM,IAAI;AAC9B;AAGA,eAAsB,YAAY,QAAkC;AAClE,QAAM,MAAM,MAAM,QAAQ,QAAQ,KAAK;AACvC,QAAM,QAAQ,IAAI,MAAM,IAAI,EAAE,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC;AAC/D,MAAI,WAAW;AACf,aAAW,QAAQ,OAAO;AACxB,QAAI;AACJ,QAAI;AACF,YAAM,OAAO,KAAK,MAAM,IAAI;AAC5B,YAAM,SAAS,EAAE,QAAwC,QAAQ,KAAK,IAAK;AAAA,IAC7E,SAAS,KAAK;AACZ,cAAQ,OAAO;AAAA,QACb,GAAG,KAAK,UAAU;AAAA,UAChB,OAAO;AAAA,YACL,MAAM;AAAA,YACN,SAAS,4BAA4B,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,UACvF;AAAA,QACF,CAAC,CAAC;AAAA;AAAA,MACJ;AACA,iBAAW;AACX;AAAA,IACF;AACA,UAAM,MAAM,MAAM,YAAY,GAAG;AACjC,YAAQ,OAAO,MAAM,GAAG,KAAK,UAAU,GAAG,CAAC;AAAA,CAAI;AAC/C,QAAI,WAAW,IAAK,YAAW;AAAA,EACjC;AACA,SAAO;AACT;;;AC/GA,SAA0B,aAAa;AACvC,SAAS,YAAY;AACrB,SAAS,YAAY;AAcrB,IAAM,aAAa,KAAK,IAAI;AAmB5B,IAAM,oBAAoB,oBAAI,IAAI,CAAC,YAAY,eAAe,eAAe,CAAC;AAEvE,SAAS,UAAU,OAAyB,CAAC,GAAG;AACrD,QAAM,MAAM,IAAI,KAAK;AAErB,MAAI,IAAI,KAAK,KAAK,CAAC;AAGnB,MAAI,KAAK,MAAM;AACb,UAAM,SAAS,KAAK,KAAK;AACzB,QAAI,IAAI,KAAK,OAAO,GAAG,SAAS;AAC9B,YAAM,OAAO,IAAI,IAAI,EAAE,IAAI,GAAG,EAAE;AAChC,UAAI,kBAAkB,IAAI,IAAI,EAAG,QAAO,KAAK;AAC7C,YAAM,MAAM,EAAE,IAAI,OAAO,eAAe,KAAK;AAC7C,YAAM,QAAQ,IAAI,MAAM,kBAAkB;AAC1C,UAAI,CAAC,OAAO;AACV,cAAM,IAAI,UAAU,gBAAgB,8CAA8C,GAAG;AAAA,MACvF;AACA,YAAM,QAAQ,MAAM,CAAC;AACrB,YAAM,KAAK,OAAO,WAAW,WAAW,UAAU,SAAS,MAAM,OAAO,KAAK;AAC7E,UAAI,CAAC,IAAI;AACP,cAAM,IAAI,UAAU,gBAAgB,yBAAyB,GAAG;AAAA,MAClE;AACA,aAAO,KAAK;AAAA,IACd,CAAC;AAAA,EACH;AAEA,MAAI,QAAQ,CAAC,KAAK,MAAM;AACtB,QAAI,eAAe,WAAW;AAC5B,YAAM,SAAS,IAAI;AACnB,aAAO,EAAE;AAAA,QACP,EAAE,OAAO,EAAE,MAAM,IAAI,MAAM,SAAS,IAAI,SAAS,SAAS,IAAI,QAAQ,EAAE;AAAA,QACxE;AAAA,MACF;AAAA,IACF;AAEA,YAAQ,MAAM,iCAAiC,GAAG;AAClD,WAAO,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,kBAAkB,SAAS,yBAAyB,EAAE,GAAG,GAAG;AAAA,EAC7F,CAAC;AAGD,MAAI;AAAA,IAAI;AAAA,IAAY,CAAC,MACnB,EAAE,KAAK,EAAE,QAAQ,MAAe,YAAY,KAAK,IAAI,IAAI,cAAc,IAAK,CAAC;AAAA,EAC/E;AAGA,MAAI,IAAI,eAAe,CAAC,MAAM,EAAE,KAAK,cAAc,CAAC,CAAC;AAGrD,MAAI,IAAI,eAAe,CAAC,MAAM,EAAE,KAAK,kBAAkB,CAAC,CAAC;AAGzD,MAAI,KAAK,aAAa,OAAO,MAAM;AACjC,UAAM,MAAM,MAAM,EAAE,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI;AAC/C,QAAI,OAAO,MAAM;AACf,YAAM,IAAI,UAAU,oBAAoB,8BAA8B,GAAG;AAAA,IAC3E;AACA,UAAM,SAAS,mBAAmB,UAAU,GAAG;AAC/C,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,IAAI;AAAA,QACR;AAAA,QACA;AAAA,QACA;AAAA,QACA,OAAO,MAAM;AAAA,MACf;AAAA,IACF;AACA,UAAM,SAAS,MAAM,YAAY,OAAO,IAAI;AAC5C,WAAO,EAAE,KAAK,MAAM;AAAA,EACtB,CAAC;AAGD,MAAI,KAAK,qBAAqB,OAAO,MAAM;AACzC,UAAM,cAAc,EAAE,IAAI,OAAO,cAAc,KAAK;AACpD,QAAI;AACJ,QAAI,YAAY,SAAS,sBAAsB,GAAG;AAChD,YAAM,OAAO,MAAM,EAAE,IAAI,KAAK;AAC9B,YAAM,SAAS,KACZ,MAAM,IAAI,EACV,IAAI,CAAC,SAAS,KAAK,KAAK,CAAC,EACzB,OAAO,CAAC,SAAS,KAAK,SAAS,CAAC,EAChC,IAAI,CAAC,SAAS;AACb,YAAI;AACF,iBAAO,KAAK,MAAM,IAAI;AAAA,QACxB,QAAQ;AACN,gBAAM,IAAI;AAAA,YACR;AAAA,YACA;AAAA,YACA;AAAA,YACA,KAAK,MAAM,GAAG,GAAG;AAAA,UACnB;AAAA,QACF;AAAA,MACF,CAAC;AACH,gBAAU,EAAE,OAAO;AAAA,IACrB,OAAO;AACL,gBAAU,MAAM,EAAE,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI;AAAA,IAC/C;AACA,QAAI,WAAW,MAAM;AACnB,YAAM,IAAI,UAAU,oBAAoB,wCAAwC,GAAG;AAAA,IACrF;AACA,UAAM,SAAS,0BAA0B,UAAU,OAAO;AAC1D,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,IAAI;AAAA,QACR;AAAA,QACA;AAAA,QACA;AAAA,QACA,OAAO,MAAM;AAAA,MACf;AAAA,IACF;AACA,UAAM,SAAS,MAAM,mBAAmB,OAAO,MAAM,KAAK,UAAU,CAAC,CAAC;AACtE,WAAO,EAAE,KAAK,MAAM;AAAA,EACtB,CAAC;AAGD,MAAI,KAAK,gBAAgB,OAAO,MAAM;AACpC,UAAM,MAAM,MAAM,EAAE,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI;AAC/C,QAAI,OAAO,MAAM;AACf,YAAM,IAAI,UAAU,oBAAoB,8BAA8B,GAAG;AAAA,IAC3E;AACA,UAAM,SAAS,yBAAyB,UAAU,GAAG;AACrD,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,IAAI;AAAA,QACR;AAAA,QACA;AAAA,QACA;AAAA,QACA,OAAO,MAAM;AAAA,MACf;AAAA,IACF;AACA,UAAM,SAAS,MAAM,qBAAqB,OAAO,MAAM,KAAK,UAAU,CAAC,CAAC;AACxE,WAAO,EAAE,KAAK,MAAM;AAAA,EACtB,CAAC;AAGD,MAAI,IAAI,iBAAiB,CAAC,MAAM,EAAE,KAAK,aAAa,cAAc,EAAE,OAAO,CAAC,CAAC;AAE7E,SAAO;AACT;AASO,SAAS,YAAY,OAAqB,CAAC,GAAe;AAC/D,QAAM,MAAM,UAAU,IAAI;AAC1B,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,OAAO,KAAK,QAAQ;AAC1B,SAAO,MAAM,EAAE,OAAO,IAAI,OAAO,MAAM,UAAU,KAAK,GAAG,CAAC,EAAE,SAAS,MAAM,WAAW,MAAM;AAE1F,YAAQ,IAAI,kCAAkC,OAAO,IAAI,UAAU,EAAE;AAAA,EACvE,CAAC;AACH;","names":[]}
@@ -1,272 +0,0 @@
1
- import {
2
- ValidationError
3
- } from "./chunk-NG236HPC.js";
4
-
5
- // src/statistics.ts
6
- var INVERTED_DIMENSIONS = /* @__PURE__ */ new Set(["hallucination", "false_confidence", "worst_failure"]);
7
- function normalizeScores(scores) {
8
- return scores.map((s) => {
9
- if (INVERTED_DIMENSIONS.has(s.dimension)) {
10
- return s;
11
- }
12
- return s;
13
- });
14
- }
15
- function weightedMean(scores) {
16
- if (scores.length === 0) return 0;
17
- let totalWeight = 0;
18
- let weightedSum = 0;
19
- for (const { score, weight } of scores) {
20
- const w = weight ?? 1;
21
- weightedSum += score * w;
22
- totalWeight += w;
23
- }
24
- return totalWeight > 0 ? weightedSum / totalWeight : 0;
25
- }
26
- function confidenceInterval(scores, confidence = 0.95) {
27
- if (scores.length === 0) return { mean: 0, lower: 0, upper: 0 };
28
- if (scores.length === 1) return { mean: scores[0], lower: scores[0], upper: scores[0] };
29
- const n = scores.length;
30
- const mean = scores.reduce((a, b) => a + b, 0) / n;
31
- const B = 1e3;
32
- const bootstrapMeans = [];
33
- for (let i = 0; i < B; i++) {
34
- let sum = 0;
35
- for (let j = 0; j < n; j++) {
36
- sum += scores[Math.floor(Math.random() * n)];
37
- }
38
- bootstrapMeans.push(sum / n);
39
- }
40
- bootstrapMeans.sort((a, b) => a - b);
41
- const alpha = 1 - confidence;
42
- const lowerIdx = Math.floor(alpha / 2 * B);
43
- const upperIdx = Math.floor((1 - alpha / 2) * B) - 1;
44
- return {
45
- mean,
46
- lower: bootstrapMeans[lowerIdx],
47
- upper: bootstrapMeans[Math.min(upperIdx, B - 1)]
48
- };
49
- }
50
- function interRaterReliability(judgeScores) {
51
- if (judgeScores.length < 2) return 1;
52
- const dimensionMap = /* @__PURE__ */ new Map();
53
- for (const judgeSet of judgeScores) {
54
- for (const s of judgeSet) {
55
- if (!dimensionMap.has(s.dimension)) dimensionMap.set(s.dimension, []);
56
- const arr = dimensionMap.get(s.dimension);
57
- if (arr.length === 0 || arr[arr.length - 1].length >= judgeScores.length) {
58
- arr.push([s.score]);
59
- } else {
60
- arr[arr.length - 1].push(s.score);
61
- }
62
- }
63
- }
64
- const allValues = [];
65
- const pairDiffs = [];
66
- for (const items of dimensionMap.values()) {
67
- for (const ratings of items) {
68
- if (ratings.length < 2) continue;
69
- for (const v of ratings) allValues.push(v);
70
- for (let i = 0; i < ratings.length; i++) {
71
- for (let j = i + 1; j < ratings.length; j++) {
72
- pairDiffs.push((ratings[i] - ratings[j]) ** 2);
73
- }
74
- }
75
- }
76
- }
77
- if (pairDiffs.length === 0 || allValues.length < 2) return 1;
78
- const observedDisagreement = pairDiffs.reduce((a, b) => a + b, 0) / pairDiffs.length;
79
- let expectedDisagreement = 0;
80
- let expectedCount = 0;
81
- for (let i = 0; i < allValues.length; i++) {
82
- for (let j = i + 1; j < allValues.length; j++) {
83
- expectedDisagreement += (allValues[i] - allValues[j]) ** 2;
84
- expectedCount++;
85
- }
86
- }
87
- expectedDisagreement = expectedCount > 0 ? expectedDisagreement / expectedCount : 0;
88
- if (expectedDisagreement === 0) return 1;
89
- return 1 - observedDisagreement / expectedDisagreement;
90
- }
91
- function mannWhitneyU(a, b) {
92
- if (a.length === 0 || b.length === 0) return { u: 0, p: 1 };
93
- const n1 = a.length;
94
- const n2 = b.length;
95
- const combined = [
96
- ...a.map((v) => ({ v, group: "a" })),
97
- ...b.map((v) => ({ v, group: "b" }))
98
- ].sort((x, y) => x.v - y.v);
99
- const ranks = new Array(combined.length);
100
- let i = 0;
101
- while (i < combined.length) {
102
- let j = i;
103
- while (j < combined.length && combined[j].v === combined[i].v) j++;
104
- const avgRank = (i + 1 + j) / 2;
105
- for (let k = i; k < j; k++) ranks[k] = avgRank;
106
- i = j;
107
- }
108
- let r1 = 0;
109
- for (let k = 0; k < combined.length; k++) {
110
- if (combined[k].group === "a") r1 += ranks[k];
111
- }
112
- const u1 = r1 - n1 * (n1 + 1) / 2;
113
- const u2 = n1 * n2 - u1;
114
- const u = Math.min(u1, u2);
115
- const mu = n1 * n2 / 2;
116
- const sigma = Math.sqrt(n1 * n2 * (n1 + n2 + 1) / 12);
117
- if (sigma === 0) return { u, p: 1 };
118
- const z = Math.abs(u - mu) / sigma;
119
- const p = 2 * (1 - normalCdf(z));
120
- return { u, p };
121
- }
122
- function partialCredit(current, target) {
123
- if (target <= 0) return 1;
124
- return Math.min(1, Math.max(0, current / target));
125
- }
126
- function pairedTTest(before, after) {
127
- if (before.length !== after.length) {
128
- throw new ValidationError(
129
- `pairedTTest: unequal sample sizes (${before.length} vs ${after.length})`
130
- );
131
- }
132
- const n = before.length;
133
- if (n < 2) return { t: 0, df: 0, p: 1 };
134
- const diffs = before.map((b, i) => after[i] - b);
135
- const mean = diffs.reduce((a, b) => a + b, 0) / n;
136
- const variance = diffs.reduce((acc, d) => acc + (d - mean) ** 2, 0) / (n - 1);
137
- const se = Math.sqrt(variance / n);
138
- if (se === 0) return { t: mean === 0 ? 0 : Infinity, df: n - 1, p: mean === 0 ? 1 : 0 };
139
- const t = mean / se;
140
- const df = n - 1;
141
- const p = 2 * (1 - studentTCdf(Math.abs(t), df));
142
- return { t, df, p };
143
- }
144
- function wilcoxonSignedRank(before, after) {
145
- if (before.length !== after.length) {
146
- throw new ValidationError(
147
- `wilcoxonSignedRank: unequal sample sizes (${before.length} vs ${after.length})`
148
- );
149
- }
150
- const diffs = before.map((b, i2) => after[i2] - b).filter((d) => d !== 0);
151
- const n = diffs.length;
152
- if (n < 6) return { w: 0, p: 1 };
153
- const absRanks = diffs.map((d, i2) => ({ abs: Math.abs(d), sign: Math.sign(d), i: i2 })).sort((a, b) => a.abs - b.abs);
154
- const ranks = new Array(n);
155
- let i = 0;
156
- while (i < n) {
157
- let j = i;
158
- while (j < n && absRanks[j].abs === absRanks[i].abs) j++;
159
- const avg = (i + 1 + j) / 2;
160
- for (let k = i; k < j; k++) ranks[absRanks[k].i] = avg;
161
- i = j;
162
- }
163
- let wPlus = 0;
164
- for (let k = 0; k < n; k++) if (diffs[k] > 0) wPlus += ranks[k];
165
- const mean = n * (n + 1) / 4;
166
- const variance = n * (n + 1) * (2 * n + 1) / 24;
167
- const z = (wPlus - mean) / Math.sqrt(variance);
168
- const p = 2 * (1 - normalCdf(Math.abs(z)));
169
- return { w: wPlus, p };
170
- }
171
- function cohensD(a, b) {
172
- if (a.length < 2 || b.length < 2) return 0;
173
- const meanA = a.reduce((x, y) => x + y, 0) / a.length;
174
- const meanB = b.reduce((x, y) => x + y, 0) / b.length;
175
- const varA = a.reduce((acc, x) => acc + (x - meanA) ** 2, 0) / (a.length - 1);
176
- const varB = b.reduce((acc, x) => acc + (x - meanB) ** 2, 0) / (b.length - 1);
177
- const pooled = Math.sqrt(
178
- ((a.length - 1) * varA + (b.length - 1) * varB) / (a.length + b.length - 2)
179
- );
180
- if (pooled === 0) return 0;
181
- return (meanB - meanA) / pooled;
182
- }
183
- function studentTCdf(t, df) {
184
- if (df <= 0) return 0.5;
185
- if (df > 100) return normalCdf(t);
186
- const x = df / (df + t * t);
187
- const a = df / 2;
188
- const b = 0.5;
189
- const ib = incompleteBeta(x, a, b);
190
- return t >= 0 ? 1 - 0.5 * ib : 0.5 * ib;
191
- }
192
- function incompleteBeta(x, a, b) {
193
- if (x <= 0) return 0;
194
- if (x >= 1) return 1;
195
- const lnBeta = lnGamma(a) + lnGamma(b) - lnGamma(a + b);
196
- const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a;
197
- const maxIter = 200;
198
- const eps = 3e-7;
199
- let c = 1;
200
- let d = 1 - (a + b) * x / (a + 1);
201
- if (Math.abs(d) < 1e-30) d = 1e-30;
202
- d = 1 / d;
203
- let f = d;
204
- for (let m = 1; m <= maxIter; m++) {
205
- const m2 = 2 * m;
206
- let num = m * (b - m) * x / ((a + m2 - 1) * (a + m2));
207
- d = 1 + num * d;
208
- if (Math.abs(d) < 1e-30) d = 1e-30;
209
- c = 1 + num / c;
210
- if (Math.abs(c) < 1e-30) c = 1e-30;
211
- d = 1 / d;
212
- f *= d * c;
213
- num = -((a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1));
214
- d = 1 + num * d;
215
- if (Math.abs(d) < 1e-30) d = 1e-30;
216
- c = 1 + num / c;
217
- if (Math.abs(c) < 1e-30) c = 1e-30;
218
- d = 1 / d;
219
- const delta = d * c;
220
- f *= delta;
221
- if (Math.abs(delta - 1) < eps) break;
222
- }
223
- return front * f;
224
- }
225
- function lnGamma(z) {
226
- const g = 7;
227
- const coefs = [
228
- 0.9999999999998099,
229
- 676.5203681218851,
230
- -1259.1392167224028,
231
- 771.3234287776531,
232
- -176.6150291621406,
233
- 12.507343278686905,
234
- -0.13857109526572012,
235
- 9984369578019572e-21,
236
- 15056327351493116e-23
237
- ];
238
- if (z < 0.5) {
239
- return Math.log(Math.PI / Math.sin(Math.PI * z)) - lnGamma(1 - z);
240
- }
241
- z -= 1;
242
- let x = coefs[0];
243
- for (let i = 1; i < g + 2; i++) x += coefs[i] / (z + i);
244
- const t = z + g + 0.5;
245
- return 0.5 * Math.log(2 * Math.PI) + (z + 0.5) * Math.log(t) - t + Math.log(x);
246
- }
247
- function normalCdf(x) {
248
- const a1 = 0.254829592;
249
- const a2 = -0.284496736;
250
- const a3 = 1.421413741;
251
- const a4 = -1.453152027;
252
- const a5 = 1.061405429;
253
- const p = 0.3275911;
254
- const sign = x < 0 ? -1 : 1;
255
- const absX = Math.abs(x);
256
- const t = 1 / (1 + p * absX);
257
- const y = 1 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-absX * absX / 2);
258
- return 0.5 * (1 + sign * y);
259
- }
260
-
261
- export {
262
- normalizeScores,
263
- weightedMean,
264
- confidenceInterval,
265
- interRaterReliability,
266
- mannWhitneyU,
267
- partialCredit,
268
- pairedTTest,
269
- wilcoxonSignedRank,
270
- cohensD
271
- };
272
- //# sourceMappingURL=chunk-I4MBDTY5.js.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/statistics.ts"],"sourcesContent":["import { ValidationError } from './errors'\nimport type { JudgeScore } from './types'\n\n/** Dimensions where lower raw score = better outcome (inverted semantics) */\nconst INVERTED_DIMENSIONS = new Set(['hallucination', 'false_confidence', 'worst_failure'])\n\n/**\n * Normalize scores so all dimensions follow \"higher = better\".\n * Inverted dimensions (hallucination, false_confidence, worst_failure)\n * already use inverted scoring in the prompt (10 = no hallucination),\n * but this function ensures consistency if raw scores leak through.\n */\nexport function normalizeScores(scores: JudgeScore[]): JudgeScore[] {\n return scores.map((s) => {\n if (INVERTED_DIMENSIONS.has(s.dimension)) {\n return s\n }\n return s\n })\n}\n\n/** Weighted mean — falls back to uniform weights when omitted */\nexport function weightedMean(scores: { score: number; weight?: number }[]): number {\n if (scores.length === 0) return 0\n let totalWeight = 0\n let weightedSum = 0\n for (const { score, weight } of scores) {\n const w = weight ?? 1\n weightedSum += score * w\n totalWeight += w\n }\n return totalWeight > 0 ? weightedSum / totalWeight : 0\n}\n\n/** Bootstrap confidence interval */\nexport function confidenceInterval(\n scores: number[],\n confidence = 0.95,\n): { mean: number; lower: number; upper: number } {\n if (scores.length === 0) return { mean: 0, lower: 0, upper: 0 }\n if (scores.length === 1) return { mean: scores[0]!, lower: scores[0]!, upper: scores[0]! }\n\n const n = scores.length\n const mean = scores.reduce((a, b) => a + b, 0) / n\n\n const B = 1000\n const bootstrapMeans: number[] = []\n\n for (let i = 0; i < B; i++) {\n let sum = 0\n for (let j = 0; j < n; j++) {\n sum += scores[Math.floor(Math.random() * n)]!\n }\n bootstrapMeans.push(sum / n)\n }\n\n bootstrapMeans.sort((a, b) => a - b)\n\n const alpha = 1 - confidence\n const lowerIdx = Math.floor((alpha / 2) * B)\n const upperIdx = Math.floor((1 - alpha / 2) * B) - 1\n\n return {\n mean,\n lower: bootstrapMeans[lowerIdx]!,\n upper: bootstrapMeans[Math.min(upperIdx, B - 1)]!,\n }\n}\n\n/**\n * Inter-rater reliability — simplified Krippendorff's alpha.\n *\n * Each inner array is one judge's scores for all items.\n * All arrays must have the same length (same items scored).\n */\nexport function interRaterReliability(judgeScores: JudgeScore[][]): number {\n if (judgeScores.length < 2) return 1\n\n // Group scores by dimension across judges\n const dimensionMap = new Map<string, number[][]>()\n for (const judgeSet of judgeScores) {\n for (const s of judgeSet) {\n if (!dimensionMap.has(s.dimension)) dimensionMap.set(s.dimension, [])\n const arr = dimensionMap.get(s.dimension)!\n if (arr.length === 0 || arr[arr.length - 1]!.length >= judgeScores.length) {\n arr.push([s.score])\n } else {\n arr[arr.length - 1]!.push(s.score)\n }\n }\n }\n\n // Collect all paired ratings\n const allValues: number[] = []\n const pairDiffs: number[] = []\n\n for (const items of dimensionMap.values()) {\n for (const ratings of items) {\n if (ratings.length < 2) continue\n for (const v of ratings) allValues.push(v)\n for (let i = 0; i < ratings.length; i++) {\n for (let j = i + 1; j < ratings.length; j++) {\n pairDiffs.push((ratings[i]! - ratings[j]!) ** 2)\n }\n }\n }\n }\n\n if (pairDiffs.length === 0 || allValues.length < 2) return 1\n\n const observedDisagreement = pairDiffs.reduce((a, b) => a + b, 0) / pairDiffs.length\n\n // Expected disagreement from all possible pairings of values\n let expectedDisagreement = 0\n let expectedCount = 0\n for (let i = 0; i < allValues.length; i++) {\n for (let j = i + 1; j < allValues.length; j++) {\n expectedDisagreement += (allValues[i]! - allValues[j]!) ** 2\n expectedCount++\n }\n }\n expectedDisagreement = expectedCount > 0 ? expectedDisagreement / expectedCount : 0\n\n if (expectedDisagreement === 0) return 1\n return 1 - observedDisagreement / expectedDisagreement\n}\n\n/**\n * Mann-Whitney U test for comparing two independent groups.\n * Returns U statistic and approximate p-value (normal approximation).\n */\nexport function mannWhitneyU(a: number[], b: number[]): { u: number; p: number } {\n if (a.length === 0 || b.length === 0) return { u: 0, p: 1 }\n\n const n1 = a.length\n const n2 = b.length\n\n // Rank all values together\n const combined = [\n ...a.map((v) => ({ v, group: 'a' as const })),\n ...b.map((v) => ({ v, group: 'b' as const })),\n ].sort((x, y) => x.v - y.v)\n\n // Assign ranks with tie handling\n const ranks: number[] = new Array(combined.length)\n let i = 0\n while (i < combined.length) {\n let j = i\n while (j < combined.length && combined[j]!.v === combined[i]!.v) j++\n const avgRank = (i + 1 + j) / 2\n for (let k = i; k < j; k++) ranks[k] = avgRank\n i = j\n }\n\n // Sum ranks for group a\n let r1 = 0\n for (let k = 0; k < combined.length; k++) {\n if (combined[k]!.group === 'a') r1 += ranks[k]!\n }\n\n const u1 = r1 - (n1 * (n1 + 1)) / 2\n const u2 = n1 * n2 - u1\n const u = Math.min(u1, u2)\n\n // Normal approximation for p-value\n const mu = (n1 * n2) / 2\n const sigma = Math.sqrt((n1 * n2 * (n1 + n2 + 1)) / 12)\n\n if (sigma === 0) return { u, p: 1 }\n\n const z = Math.abs(u - mu) / sigma\n // Two-tailed p-value from z-score (approximation)\n const p = 2 * (1 - normalCdf(z))\n\n return { u, p }\n}\n\n/** Partial credit: returns 0-1 ratio of current toward target */\nexport function partialCredit(current: number, target: number): number {\n if (target <= 0) return 1\n return Math.min(1, Math.max(0, current / target))\n}\n\n/**\n * Paired t-test — before/after measurements on the SAME items.\n * Pairing removes inter-item variance, giving tighter significance than\n * an unpaired test when comparing prompt v1 vs prompt v2 on identical\n * scenarios.\n */\nexport function pairedTTest(\n before: number[],\n after: number[],\n): { t: number; df: number; p: number } {\n if (before.length !== after.length) {\n throw new ValidationError(\n `pairedTTest: unequal sample sizes (${before.length} vs ${after.length})`,\n )\n }\n const n = before.length\n if (n < 2) return { t: 0, df: 0, p: 1 }\n\n const diffs = before.map((b, i) => after[i]! - b)\n const mean = diffs.reduce((a, b) => a + b, 0) / n\n const variance = diffs.reduce((acc, d) => acc + (d - mean) ** 2, 0) / (n - 1)\n const se = Math.sqrt(variance / n)\n if (se === 0) return { t: mean === 0 ? 0 : Infinity, df: n - 1, p: mean === 0 ? 1 : 0 }\n\n const t = mean / se\n const df = n - 1\n const p = 2 * (1 - studentTCdf(Math.abs(t), df))\n return { t, df, p }\n}\n\n/**\n * Wilcoxon signed-rank test — paired non-parametric alternative.\n * Use when the differences aren't normally distributed.\n */\nexport function wilcoxonSignedRank(before: number[], after: number[]): { w: number; p: number } {\n if (before.length !== after.length) {\n throw new ValidationError(\n `wilcoxonSignedRank: unequal sample sizes (${before.length} vs ${after.length})`,\n )\n }\n const diffs = before.map((b, i) => after[i]! - b).filter((d) => d !== 0)\n const n = diffs.length\n if (n < 6) return { w: 0, p: 1 }\n\n const absRanks = diffs\n .map((d, i) => ({ abs: Math.abs(d), sign: Math.sign(d), i }))\n .sort((a, b) => a.abs - b.abs)\n const ranks: number[] = new Array(n)\n let i = 0\n while (i < n) {\n let j = i\n while (j < n && absRanks[j]!.abs === absRanks[i]!.abs) j++\n const avg = (i + 1 + j) / 2\n for (let k = i; k < j; k++) ranks[absRanks[k]!.i] = avg\n i = j\n }\n let wPlus = 0\n for (let k = 0; k < n; k++) if (diffs[k]! > 0) wPlus += ranks[k]!\n\n const mean = (n * (n + 1)) / 4\n const variance = (n * (n + 1) * (2 * n + 1)) / 24\n const z = (wPlus - mean) / Math.sqrt(variance)\n const p = 2 * (1 - normalCdf(Math.abs(z)))\n return { w: wPlus, p }\n}\n\n/**\n * Cohen's d — standardized effect size for two independent groups.\n * Positive d means group b has higher mean than group a.\n * Rule of thumb: |d| < 0.2 negligible, 0.2–0.5 small, 0.5–0.8 medium, > 0.8 large.\n */\nexport function cohensD(a: number[], b: number[]): number {\n if (a.length < 2 || b.length < 2) return 0\n const meanA = a.reduce((x, y) => x + y, 0) / a.length\n const meanB = b.reduce((x, y) => x + y, 0) / b.length\n const varA = a.reduce((acc, x) => acc + (x - meanA) ** 2, 0) / (a.length - 1)\n const varB = b.reduce((acc, x) => acc + (x - meanB) ** 2, 0) / (b.length - 1)\n const pooled = Math.sqrt(\n ((a.length - 1) * varA + (b.length - 1) * varB) / (a.length + b.length - 2),\n )\n if (pooled === 0) return 0\n return (meanB - meanA) / pooled\n}\n\n/** Student-t CDF approximation via Abramowitz-Stegun series. */\nfunction studentTCdf(t: number, df: number): number {\n if (df <= 0) return 0.5\n if (df > 100) return normalCdf(t)\n const x = df / (df + t * t)\n const a = df / 2\n const b = 0.5\n const ib = incompleteBeta(x, a, b)\n return t >= 0 ? 1 - 0.5 * ib : 0.5 * ib\n}\n\n/** Regularized incomplete beta function via continued fraction (Lentz). */\nfunction incompleteBeta(x: number, a: number, b: number): number {\n if (x <= 0) return 0\n if (x >= 1) return 1\n const lnBeta = lnGamma(a) + lnGamma(b) - lnGamma(a + b)\n const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a\n const maxIter = 200\n const eps = 3e-7\n let c = 1\n let d = 1 - ((a + b) * x) / (a + 1)\n if (Math.abs(d) < 1e-30) d = 1e-30\n d = 1 / d\n let f = d\n for (let m = 1; m <= maxIter; m++) {\n const m2 = 2 * m\n let num = (m * (b - m) * x) / ((a + m2 - 1) * (a + m2))\n d = 1 + num * d\n if (Math.abs(d) < 1e-30) d = 1e-30\n c = 1 + num / c\n if (Math.abs(c) < 1e-30) c = 1e-30\n d = 1 / d\n f *= d * c\n num = -((a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1))\n d = 1 + num * d\n if (Math.abs(d) < 1e-30) d = 1e-30\n c = 1 + num / c\n if (Math.abs(c) < 1e-30) c = 1e-30\n d = 1 / d\n const delta = d * c\n f *= delta\n if (Math.abs(delta - 1) < eps) break\n }\n return front * f\n}\n\n/** Lanczos approximation to ln Γ(z). */\nfunction lnGamma(z: number): number {\n const g = 7\n const coefs = [\n 0.99999999999980993, 676.5203681218851, -1259.1392167224028, 771.32342877765313,\n -176.61502916214059, 12.507343278686905, -0.13857109526572012, 9.9843695780195716e-6,\n 1.5056327351493116e-7,\n ]\n if (z < 0.5) {\n return Math.log(Math.PI / Math.sin(Math.PI * z)) - lnGamma(1 - z)\n }\n z -= 1\n let x = coefs[0]!\n for (let i = 1; i < g + 2; i++) x += coefs[i]! / (z + i)\n const t = z + g + 0.5\n return 0.5 * Math.log(2 * Math.PI) + (z + 0.5) * Math.log(t) - t + Math.log(x)\n}\n\n// Standard normal CDF approximation (Abramowitz and Stegun)\nfunction normalCdf(x: number): number {\n const a1 = 0.254829592\n const a2 = -0.284496736\n const a3 = 1.421413741\n const a4 = -1.453152027\n const a5 = 1.061405429\n const p = 0.3275911\n\n const sign = x < 0 ? -1 : 1\n const absX = Math.abs(x)\n const t = 1 / (1 + p * absX)\n const y = 1 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp((-absX * absX) / 2)\n\n return 0.5 * (1 + sign * y)\n}\n"],"mappings":";;;;;AAIA,IAAM,sBAAsB,oBAAI,IAAI,CAAC,iBAAiB,oBAAoB,eAAe,CAAC;AAQnF,SAAS,gBAAgB,QAAoC;AAClE,SAAO,OAAO,IAAI,CAAC,MAAM;AACvB,QAAI,oBAAoB,IAAI,EAAE,SAAS,GAAG;AACxC,aAAO;AAAA,IACT;AACA,WAAO;AAAA,EACT,CAAC;AACH;AAGO,SAAS,aAAa,QAAsD;AACjF,MAAI,OAAO,WAAW,EAAG,QAAO;AAChC,MAAI,cAAc;AAClB,MAAI,cAAc;AAClB,aAAW,EAAE,OAAO,OAAO,KAAK,QAAQ;AACtC,UAAM,IAAI,UAAU;AACpB,mBAAe,QAAQ;AACvB,mBAAe;AAAA,EACjB;AACA,SAAO,cAAc,IAAI,cAAc,cAAc;AACvD;AAGO,SAAS,mBACd,QACA,aAAa,MACmC;AAChD,MAAI,OAAO,WAAW,EAAG,QAAO,EAAE,MAAM,GAAG,OAAO,GAAG,OAAO,EAAE;AAC9D,MAAI,OAAO,WAAW,EAAG,QAAO,EAAE,MAAM,OAAO,CAAC,GAAI,OAAO,OAAO,CAAC,GAAI,OAAO,OAAO,CAAC,EAAG;AAEzF,QAAM,IAAI,OAAO;AACjB,QAAM,OAAO,OAAO,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI;AAEjD,QAAM,IAAI;AACV,QAAM,iBAA2B,CAAC;AAElC,WAAS,IAAI,GAAG,IAAI,GAAG,KAAK;AAC1B,QAAI,MAAM;AACV,aAAS,IAAI,GAAG,IAAI,GAAG,KAAK;AAC1B,aAAO,OAAO,KAAK,MAAM,KAAK,OAAO,IAAI,CAAC,CAAC;AAAA,IAC7C;AACA,mBAAe,KAAK,MAAM,CAAC;AAAA,EAC7B;AAEA,iBAAe,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAEnC,QAAM,QAAQ,IAAI;AAClB,QAAM,WAAW,KAAK,MAAO,QAAQ,IAAK,CAAC;AAC3C,QAAM,WAAW,KAAK,OAAO,IAAI,QAAQ,KAAK,CAAC,IAAI;AAEnD,SAAO;AAAA,IACL;AAAA,IACA,OAAO,eAAe,QAAQ;AAAA,IAC9B,OAAO,eAAe,KAAK,IAAI,UAAU,IAAI,CAAC,CAAC;AAAA,EACjD;AACF;AAQO,SAAS,sBAAsB,aAAqC;AACzE,MAAI,YAAY,SAAS,EAAG,QAAO;AAGnC,QAAM,eAAe,oBAAI,IAAwB;AACjD,aAAW,YAAY,aAAa;AAClC,eAAW,KAAK,UAAU;AACxB,UAAI,CAAC,aAAa,IAAI,EAAE,SAAS,EAAG,cAAa,IAAI,EAAE,WAAW,CAAC,CAAC;AACpE,YAAM,MAAM,aAAa,IAAI,EAAE,SAAS;AACxC,UAAI,IAAI,WAAW,KAAK,IAAI,IAAI,SAAS,CAAC,EAAG,UAAU,YAAY,QAAQ;AACzE,YAAI,KAAK,CAAC,EAAE,KAAK,CAAC;AAAA,MACpB,OAAO;AACL,YAAI,IAAI,SAAS,CAAC,EAAG,KAAK,EAAE,KAAK;AAAA,MACnC;AAAA,IACF;AAAA,EACF;AAGA,QAAM,YAAsB,CAAC;AAC7B,QAAM,YAAsB,CAAC;AAE7B,aAAW,SAAS,aAAa,OAAO,GAAG;AACzC,eAAW,WAAW,OAAO;AAC3B,UAAI,QAAQ,SAAS,EAAG;AACxB,iBAAW,KAAK,QAAS,WAAU,KAAK,CAAC;AACzC,eAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,iBAAS,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AAC3C,oBAAU,MAAM,QAAQ,CAAC,IAAK,QAAQ,CAAC,MAAO,CAAC;AAAA,QACjD;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,MAAI,UAAU,WAAW,KAAK,UAAU,SAAS,EAAG,QAAO;AAE3D,QAAM,uBAAuB,UAAU,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,UAAU;AAG9E,MAAI,uBAAuB;AAC3B,MAAI,gBAAgB;AACpB,WAAS,IAAI,GAAG,IAAI,UAAU,QAAQ,KAAK;AACzC,aAAS,IAAI,IAAI,GAAG,IAAI,UAAU,QAAQ,KAAK;AAC7C,+BAAyB,UAAU,CAAC,IAAK,UAAU,CAAC,MAAO;AAC3D;AAAA,IACF;AAAA,EACF;AACA,yBAAuB,gBAAgB,IAAI,uBAAuB,gBAAgB;AAElF,MAAI,yBAAyB,EAAG,QAAO;AACvC,SAAO,IAAI,uBAAuB;AACpC;AAMO,SAAS,aAAa,GAAa,GAAuC;AAC/E,MAAI,EAAE,WAAW,KAAK,EAAE,WAAW,EAAG,QAAO,EAAE,GAAG,GAAG,GAAG,EAAE;AAE1D,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,EAAE;AAGb,QAAM,WAAW;AAAA,IACf,GAAG,EAAE,IAAI,CAAC,OAAO,EAAE,GAAG,OAAO,IAAa,EAAE;AAAA,IAC5C,GAAG,EAAE,IAAI,CAAC,OAAO,EAAE,GAAG,OAAO,IAAa,EAAE;AAAA,EAC9C,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,IAAI,EAAE,CAAC;AAG1B,QAAM,QAAkB,IAAI,MAAM,SAAS,MAAM;AACjD,MAAI,IAAI;AACR,SAAO,IAAI,SAAS,QAAQ;AAC1B,QAAI,IAAI;AACR,WAAO,IAAI,SAAS,UAAU,SAAS,CAAC,EAAG,MAAM,SAAS,CAAC,EAAG,EAAG;AACjE,UAAM,WAAW,IAAI,IAAI,KAAK;AAC9B,aAAS,IAAI,GAAG,IAAI,GAAG,IAAK,OAAM,CAAC,IAAI;AACvC,QAAI;AAAA,EACN;AAGA,MAAI,KAAK;AACT,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,QAAI,SAAS,CAAC,EAAG,UAAU,IAAK,OAAM,MAAM,CAAC;AAAA,EAC/C;AAEA,QAAM,KAAK,KAAM,MAAM,KAAK,KAAM;AAClC,QAAM,KAAK,KAAK,KAAK;AACrB,QAAM,IAAI,KAAK,IAAI,IAAI,EAAE;AAGzB,QAAM,KAAM,KAAK,KAAM;AACvB,QAAM,QAAQ,KAAK,KAAM,KAAK,MAAM,KAAK,KAAK,KAAM,EAAE;AAEtD,MAAI,UAAU,EAAG,QAAO,EAAE,GAAG,GAAG,EAAE;AAElC,QAAM,IAAI,KAAK,IAAI,IAAI,EAAE,IAAI;AAE7B,QAAM,IAAI,KAAK,IAAI,UAAU,CAAC;AAE9B,SAAO,EAAE,GAAG,EAAE;AAChB;AAGO,SAAS,cAAc,SAAiB,QAAwB;AACrE,MAAI,UAAU,EAAG,QAAO;AACxB,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,UAAU,MAAM,CAAC;AAClD;AAQO,SAAS,YACd,QACA,OACsC;AACtC,MAAI,OAAO,WAAW,MAAM,QAAQ;AAClC,UAAM,IAAI;AAAA,MACR,sCAAsC,OAAO,MAAM,OAAO,MAAM,MAAM;AAAA,IACxE;AAAA,EACF;AACA,QAAM,IAAI,OAAO;AACjB,MAAI,IAAI,EAAG,QAAO,EAAE,GAAG,GAAG,IAAI,GAAG,GAAG,EAAE;AAEtC,QAAM,QAAQ,OAAO,IAAI,CAAC,GAAG,MAAM,MAAM,CAAC,IAAK,CAAC;AAChD,QAAM,OAAO,MAAM,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI;AAChD,QAAM,WAAW,MAAM,OAAO,CAAC,KAAK,MAAM,OAAO,IAAI,SAAS,GAAG,CAAC,KAAK,IAAI;AAC3E,QAAM,KAAK,KAAK,KAAK,WAAW,CAAC;AACjC,MAAI,OAAO,EAAG,QAAO,EAAE,GAAG,SAAS,IAAI,IAAI,UAAU,IAAI,IAAI,GAAG,GAAG,SAAS,IAAI,IAAI,EAAE;AAEtF,QAAM,IAAI,OAAO;AACjB,QAAM,KAAK,IAAI;AACf,QAAM,IAAI,KAAK,IAAI,YAAY,KAAK,IAAI,CAAC,GAAG,EAAE;AAC9C,SAAO,EAAE,GAAG,IAAI,EAAE;AACpB;AAMO,SAAS,mBAAmB,QAAkB,OAA2C;AAC9F,MAAI,OAAO,WAAW,MAAM,QAAQ;AAClC,UAAM,IAAI;AAAA,MACR,6CAA6C,OAAO,MAAM,OAAO,MAAM,MAAM;AAAA,IAC/E;AAAA,EACF;AACA,QAAM,QAAQ,OAAO,IAAI,CAAC,GAAGA,OAAM,MAAMA,EAAC,IAAK,CAAC,EAAE,OAAO,CAAC,MAAM,MAAM,CAAC;AACvE,QAAM,IAAI,MAAM;AAChB,MAAI,IAAI,EAAG,QAAO,EAAE,GAAG,GAAG,GAAG,EAAE;AAE/B,QAAM,WAAW,MACd,IAAI,CAAC,GAAGA,QAAO,EAAE,KAAK,KAAK,IAAI,CAAC,GAAG,MAAM,KAAK,KAAK,CAAC,GAAG,GAAAA,GAAE,EAAE,EAC3D,KAAK,CAAC,GAAG,MAAM,EAAE,MAAM,EAAE,GAAG;AAC/B,QAAM,QAAkB,IAAI,MAAM,CAAC;AACnC,MAAI,IAAI;AACR,SAAO,IAAI,GAAG;AACZ,QAAI,IAAI;AACR,WAAO,IAAI,KAAK,SAAS,CAAC,EAAG,QAAQ,SAAS,CAAC,EAAG,IAAK;AACvD,UAAM,OAAO,IAAI,IAAI,KAAK;AAC1B,aAAS,IAAI,GAAG,IAAI,GAAG,IAAK,OAAM,SAAS,CAAC,EAAG,CAAC,IAAI;AACpD,QAAI;AAAA,EACN;AACA,MAAI,QAAQ;AACZ,WAAS,IAAI,GAAG,IAAI,GAAG,IAAK,KAAI,MAAM,CAAC,IAAK,EAAG,UAAS,MAAM,CAAC;AAE/D,QAAM,OAAQ,KAAK,IAAI,KAAM;AAC7B,QAAM,WAAY,KAAK,IAAI,MAAM,IAAI,IAAI,KAAM;AAC/C,QAAM,KAAK,QAAQ,QAAQ,KAAK,KAAK,QAAQ;AAC7C,QAAM,IAAI,KAAK,IAAI,UAAU,KAAK,IAAI,CAAC,CAAC;AACxC,SAAO,EAAE,GAAG,OAAO,EAAE;AACvB;AAOO,SAAS,QAAQ,GAAa,GAAqB;AACxD,MAAI,EAAE,SAAS,KAAK,EAAE,SAAS,EAAG,QAAO;AACzC,QAAM,QAAQ,EAAE,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE;AAC/C,QAAM,QAAQ,EAAE,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE;AAC/C,QAAM,OAAO,EAAE,OAAO,CAAC,KAAK,MAAM,OAAO,IAAI,UAAU,GAAG,CAAC,KAAK,EAAE,SAAS;AAC3E,QAAM,OAAO,EAAE,OAAO,CAAC,KAAK,MAAM,OAAO,IAAI,UAAU,GAAG,CAAC,KAAK,EAAE,SAAS;AAC3E,QAAM,SAAS,KAAK;AAAA,MAChB,EAAE,SAAS,KAAK,QAAQ,EAAE,SAAS,KAAK,SAAS,EAAE,SAAS,EAAE,SAAS;AAAA,EAC3E;AACA,MAAI,WAAW,EAAG,QAAO;AACzB,UAAQ,QAAQ,SAAS;AAC3B;AAGA,SAAS,YAAY,GAAW,IAAoB;AAClD,MAAI,MAAM,EAAG,QAAO;AACpB,MAAI,KAAK,IAAK,QAAO,UAAU,CAAC;AAChC,QAAM,IAAI,MAAM,KAAK,IAAI;AACzB,QAAM,IAAI,KAAK;AACf,QAAM,IAAI;AACV,QAAM,KAAK,eAAe,GAAG,GAAG,CAAC;AACjC,SAAO,KAAK,IAAI,IAAI,MAAM,KAAK,MAAM;AACvC;AAGA,SAAS,eAAe,GAAW,GAAW,GAAmB;AAC/D,MAAI,KAAK,EAAG,QAAO;AACnB,MAAI,KAAK,EAAG,QAAO;AACnB,QAAM,SAAS,QAAQ,CAAC,IAAI,QAAQ,CAAC,IAAI,QAAQ,IAAI,CAAC;AACtD,QAAM,QAAQ,KAAK,IAAI,KAAK,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,IAAI,CAAC,IAAI,IAAI,MAAM,IAAI;AACzE,QAAM,UAAU;AAChB,QAAM,MAAM;AACZ,MAAI,IAAI;AACR,MAAI,IAAI,KAAM,IAAI,KAAK,KAAM,IAAI;AACjC,MAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,MAAI,IAAI;AACR,MAAI,IAAI;AACR,WAAS,IAAI,GAAG,KAAK,SAAS,KAAK;AACjC,UAAM,KAAK,IAAI;AACf,QAAI,MAAO,KAAK,IAAI,KAAK,MAAO,IAAI,KAAK,MAAM,IAAI;AACnD,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI;AACR,SAAK,IAAI;AACT,UAAM,GAAG,IAAI,MAAM,IAAI,IAAI,KAAK,OAAO,IAAI,OAAO,IAAI,KAAK;AAC3D,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI;AACR,UAAM,QAAQ,IAAI;AAClB,SAAK;AACL,QAAI,KAAK,IAAI,QAAQ,CAAC,IAAI,IAAK;AAAA,EACjC;AACA,SAAO,QAAQ;AACjB;AAGA,SAAS,QAAQ,GAAmB;AAClC,QAAM,IAAI;AACV,QAAM,QAAQ;AAAA,IACZ;AAAA,IAAqB;AAAA,IAAmB;AAAA,IAAqB;AAAA,IAC7D;AAAA,IAAqB;AAAA,IAAoB;AAAA,IAAsB;AAAA,IAC/D;AAAA,EACF;AACA,MAAI,IAAI,KAAK;AACX,WAAO,KAAK,IAAI,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,QAAQ,IAAI,CAAC;AAAA,EAClE;AACA,OAAK;AACL,MAAI,IAAI,MAAM,CAAC;AACf,WAAS,IAAI,GAAG,IAAI,IAAI,GAAG,IAAK,MAAK,MAAM,CAAC,KAAM,IAAI;AACtD,QAAM,IAAI,IAAI,IAAI;AAClB,SAAO,MAAM,KAAK,IAAI,IAAI,KAAK,EAAE,KAAK,IAAI,OAAO,KAAK,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,CAAC;AAC/E;AAGA,SAAS,UAAU,GAAmB;AACpC,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,IAAI;AAEV,QAAM,OAAO,IAAI,IAAI,KAAK;AAC1B,QAAM,OAAO,KAAK,IAAI,CAAC;AACvB,QAAM,IAAI,KAAK,IAAI,IAAI;AACvB,QAAM,IAAI,QAAQ,KAAK,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,IAAI,KAAK,IAAK,CAAC,OAAO,OAAQ,CAAC;AAE9F,SAAO,OAAO,IAAI,OAAO;AAC3B;","names":["i"]}
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/failure-taxonomy.ts","../src/pipelines/failure-cluster.ts","../src/tool-use-metrics.ts","../src/baseline.ts"],"sourcesContent":["/**\n * Failure taxonomy — canonical classes + a default classifier.\n *\n * Every failed run should end up in a named class. The classifier here\n * is rule-based (fast, deterministic); an LLM fallback can be added by\n * the consumer for novel cases and trained into the rule base over time.\n *\n * Consumers call `classifyFailure(run, spans, events)` and persist the\n * returned class as `Run.outcome.failureClass`.\n */\n\nimport type { FailureClass, Run, Span, TraceEvent } from './trace/schema'\nimport { FAILURE_CLASSES } from './trace/schema'\n\nexport { FAILURE_CLASSES, type FailureClass }\n\nexport interface FailureContext {\n run: Run\n spans: Span[]\n events: TraceEvent[]\n}\n\nexport interface FailureClassification {\n failureClass: FailureClass\n reason: string\n triggerSpanId?: string\n triggerEventId?: string\n}\n\n/** Ordered rules — first match wins. */\nexport interface FailureRule {\n id: string\n match: (ctx: FailureContext) => {\n failureClass: FailureClass\n reason: string\n triggerSpanId?: string\n triggerEventId?: string\n } | null\n}\n\nexport const DEFAULT_RULES: FailureRule[] = [\n // Outcome already named? Respect it.\n {\n id: 'explicit-outcome',\n match: ({ run }) => {\n const fc = run.outcome?.failureClass\n if (fc && fc !== 'unknown')\n return { failureClass: fc, reason: 'outcome.failureClass set explicitly' }\n return null\n },\n },\n {\n id: 'knowledge-readiness-blocked',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'readiness_scored' &&\n e.payload.passed === false,\n )\n return event\n ? {\n failureClass: 'knowledge_readiness_blocked',\n reason: 'knowledge readiness report blocked execution',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'bad-integration-manifest',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n ((e.payload.kind === 'integration_manifest_validated' && e.payload.valid === false) ||\n (e.payload.kind === 'integration_invoke_failed' &&\n e.payload.code === 'manifest_invalid')),\n )\n return event\n ? {\n failureClass: 'bad_integration_manifest',\n reason: 'integration manifest validation failed before launch',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'missing-integration-connection',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'integration_manifest_resolved' &&\n hasResolutionStatus(e.payload, 'missing_connection'),\n )\n return event\n ? {\n failureClass: 'missing_integration_connection',\n reason: 'required integration connection was missing',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'missing-integration-scope',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n ((e.payload.kind === 'integration_manifest_resolved' && hasMissingScopes(e.payload)) ||\n (e.payload.kind === 'integration_invoke_failed' && e.payload.code === 'scope_denied')),\n )\n return event\n ? {\n failureClass: 'missing_integration_scope',\n reason: 'integration grant or connection lacks required scopes',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'integration-approval-required',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n ((e.payload.kind === 'integration_invoke' && e.payload.status === 'approval_required') ||\n (e.payload.kind === 'integration_invoke_failed' &&\n e.payload.code === 'approval_required') ||\n e.payload.kind === 'integration_approval_required'),\n )\n return event\n ? {\n failureClass: 'integration_approval_required',\n reason: 'integration write paused for user approval',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'integration-auth-expired',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'integration_invoke_failed' &&\n (e.payload.code === 'auth_expired' ||\n e.payload.code === 'connection_not_active' ||\n e.payload.code === 'capability_expired' ||\n e.payload.status === 'expired'),\n )\n return event\n ? {\n failureClass: 'integration_auth_expired',\n reason: 'integration connection or capability expired',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'unsafe-integration-write-denied',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'integration_invoke_failed' &&\n (e.payload.code === 'unsafe_write_denied' ||\n e.payload.code === 'policy_denied' ||\n e.payload.code === 'action_denied'),\n )\n return event\n ? {\n failureClass: 'unsafe_integration_write_denied',\n reason: 'integration write was denied by policy or capability scope',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'integration-provider-failure',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'integration_invoke_failed' &&\n ![\n 'scope_denied',\n 'approval_required',\n 'auth_expired',\n 'connection_not_active',\n 'capability_expired',\n 'unsafe_write_denied',\n 'policy_denied',\n 'action_denied',\n 'manifest_invalid',\n ].includes(String(e.payload.code)),\n )\n return event\n ? {\n failureClass: 'integration_provider_failure',\n reason: 'integration provider invocation failed',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'missing-credentials',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'knowledge_gap' &&\n e.payload.category === 'credential_or_secret',\n )\n return event\n ? {\n failureClass: 'missing_credentials',\n reason: 'required credential or secret was missing',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'bad-retrieval',\n match: ({ run, spans }) => {\n if (run.outcome?.pass !== false) return null\n const retrieval = spans.find(\n (s) =>\n s.kind === 'retrieval' && (s.hits.length === 0 || s.hits.every((hit) => hit.score <= 0)),\n )\n return retrieval\n ? {\n failureClass: 'bad_retrieval',\n reason: 'retrieval returned no useful hits for a failed run',\n triggerSpanId: retrieval.spanId,\n }\n : null\n },\n },\n {\n id: 'insufficient-evidence',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'knowledge_gap' &&\n e.payload.reason === 'insufficient_evidence',\n )\n return event\n ? {\n failureClass: 'insufficient_evidence',\n reason: 'task proceeded with insufficient supporting evidence',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'contradictory-evidence',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'knowledge_gap' &&\n e.payload.reason === 'contradictory_evidence',\n )\n return event\n ? {\n failureClass: 'contradictory_evidence',\n reason: 'supporting evidence contradicted itself',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n // Budget breach events\n {\n id: 'budget-breach',\n match: ({ events }) => {\n const breach = events.find((e) => e.kind === 'budget_breach')\n return breach\n ? {\n failureClass: 'budget_exceeded',\n reason: `budget breached on ${breach.payload.dimension ?? 'unknown dimension'}`,\n triggerEventId: breach.eventId,\n }\n : null\n },\n },\n // Policy violations\n {\n id: 'policy-violation',\n match: ({ events }) => {\n const e = events.find((x) => x.kind === 'policy_violation')\n return e\n ? {\n failureClass: 'policy_violation',\n reason: 'policy_violation event emitted',\n triggerEventId: e.eventId,\n }\n : null\n },\n },\n // Sandbox non-zero exit code\n {\n id: 'sandbox-failure',\n match: ({ spans }) => {\n const s = spans.find(\n (x) => x.kind === 'sandbox' && typeof x.exitCode === 'number' && x.exitCode !== 0,\n )\n if (!s) return null\n return {\n failureClass: 'sandbox_failure',\n reason: `sandbox exited ${(s as Extract<Span, { kind: 'sandbox' }>).exitCode}`,\n triggerSpanId: s.spanId,\n }\n },\n },\n // Timeout: run aborted by external signal\n {\n id: 'timeout',\n match: ({ run, events }) => {\n if (run.status !== 'aborted') return null\n const hasTimeout = events.some(\n (e) =>\n e.kind === 'error' &&\n String(e.payload.reason ?? '')\n .toLowerCase()\n .includes('timeout'),\n )\n const note = (run.outcome?.notes ?? '').toLowerCase()\n if (hasTimeout || note.includes('timeout') || note.includes('deadline')) {\n return { failureClass: 'timeout', reason: 'timeout signal observed' }\n }\n return null\n },\n },\n // Tool recovery failure: many consecutive tool errors on the same tool\n {\n id: 'tool-recovery-failure',\n match: ({ spans }) => {\n const tools = spans.filter((s) => s.kind === 'tool')\n const byTool = new Map<string, Span[]>()\n for (const t of tools) {\n const name = (t as Extract<Span, { kind: 'tool' }>).toolName\n const arr = byTool.get(name) ?? []\n arr.push(t)\n byTool.set(name, arr)\n }\n for (const [name, arr] of byTool) {\n const errs = arr.filter((s) => s.status === 'error')\n if (errs.length >= 3 && errs.length === arr.length) {\n return {\n failureClass: 'tool_recovery_failure',\n reason: `${errs.length} consecutive errors on tool \"${name}\"`,\n triggerSpanId: errs[errs.length - 1]!.spanId,\n }\n }\n }\n return null\n },\n },\n // Tool selection error: the run failed and agent called zero tools despite having them\n {\n id: 'tool-selection-error',\n match: ({ run, spans }) => {\n if (run.outcome?.pass !== false) return null\n const hasToolsAvailable = spans.some(\n (s) =>\n s.kind === 'agent' &&\n (s.attributes?.toolsAvailable as number | undefined) !== undefined &&\n (s.attributes?.toolsAvailable as number) > 0,\n )\n const tools = spans.filter((s) => s.kind === 'tool')\n if (hasToolsAvailable && tools.length === 0) {\n return {\n failureClass: 'tool_selection_error',\n reason: 'tools were available but none were called',\n }\n }\n return null\n },\n },\n // Format drift: scored by a judge with dimension='format' below threshold\n {\n id: 'format-drift',\n match: ({ spans }) => {\n const judge = spans.find(\n (s) =>\n s.kind === 'judge' &&\n (s as Extract<Span, { kind: 'judge' }>).dimension === 'format' &&\n (s as Extract<Span, { kind: 'judge' }>).score < 0.5,\n )\n return judge\n ? {\n failureClass: 'format_drift',\n reason: 'format judge scored below 0.5',\n triggerSpanId: judge.spanId,\n }\n : null\n },\n },\n]\n\nfunction hasResolutionStatus(payload: Record<string, unknown>, status: string): boolean {\n if (status === 'missing_connection' && stringArray(payload.missingConnections).length > 0)\n return true\n return resolutionItems(payload).some((item) => item.status === status)\n}\n\nfunction hasMissingScopes(payload: Record<string, unknown>): boolean {\n if (stringArray(payload.missingScopes).length > 0) return true\n return resolutionItems(payload).some(\n (item) => Array.isArray(item.missingScopes) && item.missingScopes.length > 0,\n )\n}\n\nfunction resolutionItems(payload: Record<string, unknown>): Array<Record<string, unknown>> {\n return [\n ...records(payload.missing),\n ...records(payload.optionalMissing),\n ...records(payload.ready),\n ]\n}\n\nfunction records(value: unknown): Array<Record<string, unknown>> {\n if (!Array.isArray(value)) return []\n return value.filter(\n (item): item is Record<string, unknown> =>\n Boolean(item) && typeof item === 'object' && !Array.isArray(item),\n )\n}\n\nfunction stringArray(value: unknown): string[] {\n return Array.isArray(value)\n ? value.filter((item): item is string => typeof item === 'string')\n : []\n}\n\n/** Classify the failure mode of a run using an ordered rule list. */\nexport function classifyFailure(\n ctx: FailureContext,\n rules: FailureRule[] = DEFAULT_RULES,\n): FailureClassification {\n if (ctx.run.outcome?.pass !== false && ctx.run.status === 'completed') {\n return { failureClass: 'success', reason: 'run completed with pass=true (or no explicit fail)' }\n }\n for (const rule of rules) {\n const hit = rule.match(ctx)\n if (hit) return hit\n }\n return { failureClass: 'unknown', reason: 'no rule matched; run failed for unclassified reason' }\n}\n","/**\n * FailureClusterView — groups failed runs by (failureClass, triggerTool,\n * argHash-prefix) so weekly reviews can prioritize the top-N clusters.\n *\n * Each cluster includes: N runs, scenarios affected, representative\n * error message, a proposed mitigation hint (rule → action table).\n */\n\nimport { classifyFailure, DEFAULT_RULES, type FailureRule } from '../failure-taxonomy'\nimport { argHash, toolSpans } from '../trace/query'\nimport type { FailureClass, Span } from '../trace/schema'\nimport type { TraceStore } from '../trace/store'\n\nexport interface FailureCluster {\n failureClass: FailureClass\n /** Tool name when the trigger was a tool span, else undefined. */\n toolName?: string\n /** First 16 chars of argHash — clusters similar args. */\n argPrefix?: string\n /**\n * Source dimension when the trigger was a judge span (e.g. `'format'`,\n * `'safety'`, `'correctness'`). Lets cross-template aggregators\n * group failures by the dimension that fired without overloading\n * `argPrefix`. Optional — legacy clusters without this field\n * deserialize cleanly.\n */\n dimension?: string\n runCount: number\n scenarioIds: string[]\n exampleError?: string\n exampleRunId: string\n}\n\nexport interface FailureClusterReport {\n clusters: FailureCluster[]\n totalFailures: number\n totalRuns: number\n}\n\nexport async function failureClusterView(\n store: TraceStore,\n options: { rules?: FailureRule[]; minClusterSize?: number } = {},\n): Promise<FailureClusterReport> {\n const rules = options.rules ?? DEFAULT_RULES\n const minSize = options.minClusterSize ?? 1\n const runs = await store.listRuns()\n\n type Key = string\n const clusters = new Map<Key, FailureCluster>()\n let totalFailures = 0\n\n for (const run of runs) {\n if (run.status === 'completed' && run.outcome?.pass !== false) continue\n totalFailures++\n const spans = await store.spans({ runId: run.runId })\n const events = await store.events({ runId: run.runId })\n const cls = classifyFailure({ run, spans, events }, rules)\n\n let toolName: string | undefined\n let argPrefix: string | undefined\n let dimension: string | undefined\n if (cls.triggerSpanId) {\n const trig = spans.find((s) => s.spanId === cls.triggerSpanId)\n if (trig?.kind === 'tool') {\n toolName = trig.toolName\n argPrefix = argHash(trig.args).slice(0, 16)\n } else if (trig?.kind === 'judge') {\n dimension = trig.dimension\n }\n }\n // Fallback: look at the last errored tool span\n if (!toolName) {\n const ts = await toolSpans(store, run.runId)\n const errored = ts.filter((t) => t.status === 'error').pop()\n if (errored) {\n toolName = errored.toolName\n argPrefix = argHash(errored.args).slice(0, 16)\n }\n }\n // Secondary signal: any judge span on the failed run carries a\n // dimension. Useful when the rule classified by judge score but\n // didn't surface the trigger span (or surfaced a non-judge span).\n if (!dimension) {\n const judge = spans.find((s) => s.kind === 'judge' && typeof s.dimension === 'string')\n if (judge?.kind === 'judge') dimension = judge.dimension\n }\n\n const key = `${cls.failureClass}|${toolName ?? ''}|${argPrefix ?? ''}|${dimension ?? ''}`\n let cluster = clusters.get(key)\n if (!cluster) {\n cluster = {\n failureClass: cls.failureClass,\n toolName,\n argPrefix,\n dimension,\n runCount: 0,\n scenarioIds: [],\n exampleRunId: run.runId,\n exampleError: firstErrorMessage(spans) ?? cls.reason,\n }\n clusters.set(key, cluster)\n }\n cluster.runCount++\n if (!cluster.scenarioIds.includes(run.scenarioId)) cluster.scenarioIds.push(run.scenarioId)\n }\n\n const arr = [...clusters.values()]\n .filter((c) => c.runCount >= minSize)\n .sort((a, b) => b.runCount - a.runCount)\n\n return { clusters: arr, totalFailures, totalRuns: runs.length }\n}\n\nfunction firstErrorMessage(spans: Span[]): string | undefined {\n const errored = spans.find((s) => s.status === 'error')\n return errored?.error\n}\n","/**\n * Tool-use metrics — derived purely from trace data.\n *\n * No scoring assumptions: consumers supply optional ground-truth tool\n * selections per turn + optional \"information used downstream\" signals.\n * Without those, we still compute descriptive metrics (error rate,\n * retry rate, duplicate-call rate) that are useful on their own.\n */\n\nimport { argHash, groupBy, toolSpans } from './trace/query'\nimport type { Span } from './trace/schema'\nimport type { TraceStore } from './trace/store'\n\nexport interface ToolUseMetrics {\n runId: string\n totalCalls: number\n byTool: Record<string, ToolStats>\n errorRate: number\n /** Ratio of calls with identical (toolName, argHash) already seen earlier in the same run. */\n duplicateRate: number\n /** Ratio of error calls followed by ≥1 retry on same tool. */\n retryRate: number\n /** Optional: of the calls agent made, fraction the evaluator marked as \"correct selection\". */\n selectionAccuracy?: number\n}\n\nexport interface ToolStats {\n calls: number\n errors: number\n avgLatencyMs: number\n duplicates: number\n}\n\nexport interface ToolUseOptions {\n /** Map of spanId → whether the evaluator judged the tool selection correct. Optional. */\n selectionLabels?: Record<string, boolean>\n}\n\nexport async function computeToolUseMetrics(\n store: TraceStore,\n runId: string,\n options: ToolUseOptions = {},\n): Promise<ToolUseMetrics> {\n const tools = await toolSpans(store, runId)\n if (tools.length === 0) {\n return { runId, totalCalls: 0, byTool: {}, errorRate: 0, duplicateRate: 0, retryRate: 0 }\n }\n\n const byTool: Record<string, ToolStats> = {}\n let totalErrors = 0\n let totalDuplicates = 0\n const sortedTools = [...tools].sort((a, b) => a.startedAt - b.startedAt)\n const seenSignatures = new Set<string>()\n\n // duplicate detection + per-tool aggregation\n for (const t of sortedTools) {\n const stat = (byTool[t.toolName] ??= { calls: 0, errors: 0, avgLatencyMs: 0, duplicates: 0 })\n stat.calls += 1\n if (t.status === 'error') {\n stat.errors += 1\n totalErrors += 1\n }\n if (typeof t.latencyMs === 'number') stat.avgLatencyMs += t.latencyMs\n const sig = `${t.toolName}|${argHash(t.args)}`\n if (seenSignatures.has(sig)) {\n stat.duplicates += 1\n totalDuplicates += 1\n }\n seenSignatures.add(sig)\n }\n\n for (const stat of Object.values(byTool)) {\n stat.avgLatencyMs = stat.calls > 0 ? stat.avgLatencyMs / stat.calls : 0\n }\n\n // retry detection: per-tool chronological adjacency where error → next same-tool call\n let retryOpportunities = 0\n let retriesFollowed = 0\n for (const [, arr] of groupBy(sortedTools, (t) => t.toolName)) {\n for (let i = 0; i < arr.length; i++) {\n if (arr[i]!.status !== 'error') continue\n retryOpportunities += 1\n if (arr[i + 1]) retriesFollowed += 1\n }\n }\n const retryRate = retryOpportunities > 0 ? retriesFollowed / retryOpportunities : 0\n\n let selectionAccuracy: number | undefined\n if (options.selectionLabels) {\n const labeled = sortedTools.filter((t) => t.spanId in options.selectionLabels!)\n if (labeled.length > 0) {\n selectionAccuracy =\n labeled.filter((t) => options.selectionLabels![t.spanId]).length / labeled.length\n }\n }\n\n return {\n runId,\n totalCalls: sortedTools.length,\n byTool,\n errorRate: totalErrors / sortedTools.length,\n duplicateRate: totalDuplicates / sortedTools.length,\n retryRate,\n selectionAccuracy,\n }\n}\n\nexport type { Span }\n","/**\n * Baseline regression detection.\n *\n * Lifted from ADC baseline.ts. Every promotion-blocking signal boils down\n * to: \"is this run measurably worse than baseline?\" — with enough\n * statistical rigor to distinguish noise from drift.\n *\n * Uses:\n * - Welch's t-test (unequal variance) for per-metric mean comparison\n * - Cohen's d for effect size magnitude\n * - IQR for stability flag (unstable samples can't be trusted for comparisons)\n *\n * Returns a structured verdict: improved | regressed | stable | unstable.\n */\n\nimport { cohensD } from './statistics'\n\nexport interface MetricSamples {\n /** Stable metric key (e.g. \"overallScore\", \"firstTokenMs\"). */\n metric: string\n /** Whether higher values are better. */\n higherIsBetter: boolean\n baseline: number[]\n candidate: number[]\n}\n\nexport interface MetricVerdict {\n metric: string\n baselineMean: number\n candidateMean: number\n delta: number\n cohensD: number\n welchT: number\n welchDf: number\n welchP: number\n stable: boolean\n /** IQR of the combined samples — used as a rough stability indicator. */\n iqr: number\n verdict: 'improved' | 'regressed' | 'stable' | 'unstable'\n}\n\nexport interface BaselineReport {\n metrics: MetricVerdict[]\n /** True if any critical metric regressed. */\n hasRegression: boolean\n /** True if any metric is unstable (too noisy to judge). */\n hasUnstable: boolean\n}\n\nexport interface BaselineOptions {\n /** Effect size threshold for meaningful delta (default 0.5 — medium effect). */\n effectThreshold?: number\n /** p-value threshold for statistical significance (default 0.05). */\n alpha?: number\n /** IQR/mean ratio above which samples are flagged unstable (default 0.30). */\n unstableCvThreshold?: number\n}\n\n/**\n * Compare candidate samples against baseline per metric. Verdict logic:\n * - unstable: IQR/|mean| > threshold on either set — not enough signal\n * - improved: meaningful effect in the \"better\" direction AND p < alpha\n * - regressed: meaningful effect in the \"worse\" direction AND p < alpha\n * - stable: otherwise (no significant change)\n */\nexport function compareToBaseline(\n samples: MetricSamples[],\n options: BaselineOptions = {},\n): BaselineReport {\n const effectThreshold = options.effectThreshold ?? 0.5\n const alpha = options.alpha ?? 0.05\n const cvThreshold = options.unstableCvThreshold ?? 0.3\n\n const metrics: MetricVerdict[] = samples.map((s) => {\n if (s.baseline.length < 2 || s.candidate.length < 2) {\n throw new Error(`compareToBaseline: need ≥2 samples per side for \"${s.metric}\"`)\n }\n const bMean = mean(s.baseline)\n const cMean = mean(s.candidate)\n const delta = cMean - bMean\n const d = cohensD(s.baseline, s.candidate) // positive = candidate higher\n const { t, df, p } = welchsTTest(s.baseline, s.candidate)\n // Stability is per-side: a comparison is trustworthy only when BOTH\n // samples are internally consistent. Combining the sides would flag\n // large-but-real deltas as \"unstable\" which is exactly what we want\n // to detect.\n const baselineIqr = iqr(s.baseline)\n const candidateIqr = iqr(s.candidate)\n const baselineStable = baselineIqr / Math.max(Math.abs(bMean), 1e-9) <= cvThreshold\n const candidateStable = candidateIqr / Math.max(Math.abs(cMean), 1e-9) <= cvThreshold\n const stable = baselineStable && candidateStable\n const reportedIqr = Math.max(baselineIqr, candidateIqr)\n\n let verdict: MetricVerdict['verdict']\n if (!stable) {\n verdict = 'unstable'\n } else if (p < alpha && Math.abs(d) >= effectThreshold) {\n const candidateIsBetter = s.higherIsBetter ? delta > 0 : delta < 0\n verdict = candidateIsBetter ? 'improved' : 'regressed'\n } else {\n verdict = 'stable'\n }\n\n return {\n metric: s.metric,\n baselineMean: bMean,\n candidateMean: cMean,\n delta,\n cohensD: d,\n welchT: t,\n welchDf: df,\n welchP: p,\n stable,\n iqr: reportedIqr,\n verdict,\n }\n })\n\n return {\n metrics,\n hasRegression: metrics.some((m) => m.verdict === 'regressed'),\n hasUnstable: metrics.some((m) => m.verdict === 'unstable'),\n }\n}\n\nfunction mean(xs: number[]): number {\n return xs.reduce((a, b) => a + b, 0) / xs.length\n}\n\n/** Inter-quartile range; 0 when the sample has no spread. */\nexport function iqr(xs: number[]): number {\n if (xs.length === 0) return 0\n const sorted = [...xs].sort((a, b) => a - b)\n const q = (p: number) => {\n const idx = p * (sorted.length - 1)\n const lo = Math.floor(idx)\n const hi = Math.ceil(idx)\n return sorted[lo]! + (sorted[hi]! - sorted[lo]!) * (idx - lo)\n }\n return q(0.75) - q(0.25)\n}\n\n/**\n * Welch's t-test — unequal-variance two-sample t. Uses the same Student-t\n * CDF as `pairedTTest` (via incomplete beta); falls back to normal tail\n * when df is large.\n */\nexport function welchsTTest(a: number[], b: number[]): { t: number; df: number; p: number } {\n if (a.length < 2 || b.length < 2) return { t: 0, df: 0, p: 1 }\n const mA = mean(a)\n const mB = mean(b)\n const vA = variance(a, mA)\n const vB = variance(b, mB)\n const seSquared = vA / a.length + vB / b.length\n if (seSquared === 0) return { t: mA === mB ? 0 : Infinity, df: 0, p: mA === mB ? 1 : 0 }\n const t = (mB - mA) / Math.sqrt(seSquared)\n const df =\n (seSquared * seSquared) /\n ((vA / a.length) ** 2 / (a.length - 1) + (vB / b.length) ** 2 / (b.length - 1))\n const p = 2 * (1 - studentTCdf(Math.abs(t), df))\n return { t, df, p }\n}\n\nfunction variance(xs: number[], m: number): number {\n return xs.reduce((acc, x) => acc + (x - m) ** 2, 0) / (xs.length - 1)\n}\n\n// Re-used from statistics.ts via small local copy to avoid exporting internals.\nfunction studentTCdf(t: number, df: number): number {\n if (df <= 0) return 0.5\n if (df > 100) return normalCdf(t)\n const x = df / (df + t * t)\n const ib = incompleteBeta(x, df / 2, 0.5)\n return t >= 0 ? 1 - 0.5 * ib : 0.5 * ib\n}\n\nfunction incompleteBeta(x: number, a: number, b: number): number {\n if (x <= 0) return 0\n if (x >= 1) return 1\n const lnBeta = lnGamma(a) + lnGamma(b) - lnGamma(a + b)\n const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a\n let c = 1\n let d = 1 - ((a + b) * x) / (a + 1)\n if (Math.abs(d) < 1e-30) d = 1e-30\n d = 1 / d\n let f = d\n for (let m = 1; m <= 200; m++) {\n const m2 = 2 * m\n let num = (m * (b - m) * x) / ((a + m2 - 1) * (a + m2))\n d = 1 + num * d\n if (Math.abs(d) < 1e-30) d = 1e-30\n c = 1 + num / c\n if (Math.abs(c) < 1e-30) c = 1e-30\n d = 1 / d\n f *= d * c\n num = -((a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1))\n d = 1 + num * d\n if (Math.abs(d) < 1e-30) d = 1e-30\n c = 1 + num / c\n if (Math.abs(c) < 1e-30) c = 1e-30\n d = 1 / d\n const delta = d * c\n f *= delta\n if (Math.abs(delta - 1) < 3e-7) break\n }\n return front * f\n}\n\nfunction lnGamma(z: number): number {\n const coefs = [\n 0.99999999999980993, 676.5203681218851, -1259.1392167224028, 771.32342877765313,\n -176.61502916214059, 12.507343278686905, -0.13857109526572012, 9.9843695780195716e-6,\n 1.5056327351493116e-7,\n ]\n if (z < 0.5) return Math.log(Math.PI / Math.sin(Math.PI * z)) - lnGamma(1 - z)\n z -= 1\n let x = coefs[0]!\n for (let i = 1; i < 9; i++) x += coefs[i]! / (z + i)\n const t = z + 7.5\n return 0.5 * Math.log(2 * Math.PI) + (z + 0.5) * Math.log(t) - t + Math.log(x)\n}\n\nfunction normalCdf(x: number): number {\n const a1 = 0.254829592\n const a2 = -0.284496736\n const a3 = 1.421413741\n const a4 = -1.453152027\n const a5 = 1.061405429\n const p = 0.3275911\n const sign = x < 0 ? -1 : 1\n const absX = Math.abs(x)\n const t = 1 / (1 + p * absX)\n const y = 1 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp((-absX * absX) / 2)\n return 0.5 * (1 + sign * y)\n}\n"],"mappings":";;;;;;;;;;AAwCO,IAAM,gBAA+B;AAAA;AAAA,EAE1C;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,IAAI,MAAM;AAClB,YAAM,KAAK,IAAI,SAAS;AACxB,UAAI,MAAM,OAAO;AACf,eAAO,EAAE,cAAc,IAAI,QAAQ,sCAAsC;AAC3E,aAAO;AAAA,IACT;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,sBACnB,EAAE,QAAQ,WAAW;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,aACT,EAAE,QAAQ,SAAS,oCAAoC,EAAE,QAAQ,UAAU,SAC1E,EAAE,QAAQ,SAAS,+BAClB,EAAE,QAAQ,SAAS;AAAA,MAC3B;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,mCACnB,oBAAoB,EAAE,SAAS,oBAAoB;AAAA,MACvD;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,aACT,EAAE,QAAQ,SAAS,mCAAmC,iBAAiB,EAAE,OAAO,KAC/E,EAAE,QAAQ,SAAS,+BAA+B,EAAE,QAAQ,SAAS;AAAA,MAC5E;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,aACT,EAAE,QAAQ,SAAS,wBAAwB,EAAE,QAAQ,WAAW,uBAC/D,EAAE,QAAQ,SAAS,+BAClB,EAAE,QAAQ,SAAS,uBACrB,EAAE,QAAQ,SAAS;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,gCAClB,EAAE,QAAQ,SAAS,kBAClB,EAAE,QAAQ,SAAS,2BACnB,EAAE,QAAQ,SAAS,wBACnB,EAAE,QAAQ,WAAW;AAAA,MAC3B;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,gCAClB,EAAE,QAAQ,SAAS,yBAClB,EAAE,QAAQ,SAAS,mBACnB,EAAE,QAAQ,SAAS;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,+BACnB,CAAC;AAAA,UACC;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,QACF,EAAE,SAAS,OAAO,EAAE,QAAQ,IAAI,CAAC;AAAA,MACrC;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,mBACnB,EAAE,QAAQ,aAAa;AAAA,MAC3B;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,KAAK,MAAM,MAAM;AACzB,UAAI,IAAI,SAAS,SAAS,MAAO,QAAO;AACxC,YAAM,YAAY,MAAM;AAAA,QACtB,CAAC,MACC,EAAE,SAAS,gBAAgB,EAAE,KAAK,WAAW,KAAK,EAAE,KAAK,MAAM,CAAC,QAAQ,IAAI,SAAS,CAAC;AAAA,MAC1F;AACA,aAAO,YACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,eAAe,UAAU;AAAA,MAC3B,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,mBACnB,EAAE,QAAQ,WAAW;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,mBACnB,EAAE,QAAQ,WAAW;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,SAAS,OAAO,KAAK,CAAC,MAAM,EAAE,SAAS,eAAe;AAC5D,aAAO,SACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ,sBAAsB,OAAO,QAAQ,aAAa,mBAAmB;AAAA,QAC7E,gBAAgB,OAAO;AAAA,MACzB,IACA;AAAA,IACN;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,IAAI,OAAO,KAAK,CAAC,MAAM,EAAE,SAAS,kBAAkB;AAC1D,aAAO,IACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,EAAE;AAAA,MACpB,IACA;AAAA,IACN;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,MAAM,MAAM;AACpB,YAAM,IAAI,MAAM;AAAA,QACd,CAAC,MAAM,EAAE,SAAS,aAAa,OAAO,EAAE,aAAa,YAAY,EAAE,aAAa;AAAA,MAClF;AACA,UAAI,CAAC,EAAG,QAAO;AACf,aAAO;AAAA,QACL,cAAc;AAAA,QACd,QAAQ,kBAAmB,EAAyC,QAAQ;AAAA,QAC5E,eAAe,EAAE;AAAA,MACnB;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,KAAK,OAAO,MAAM;AAC1B,UAAI,IAAI,WAAW,UAAW,QAAO;AACrC,YAAM,aAAa,OAAO;AAAA,QACxB,CAAC,MACC,EAAE,SAAS,WACX,OAAO,EAAE,QAAQ,UAAU,EAAE,EAC1B,YAAY,EACZ,SAAS,SAAS;AAAA,MACzB;AACA,YAAM,QAAQ,IAAI,SAAS,SAAS,IAAI,YAAY;AACpD,UAAI,cAAc,KAAK,SAAS,SAAS,KAAK,KAAK,SAAS,UAAU,GAAG;AACvE,eAAO,EAAE,cAAc,WAAW,QAAQ,0BAA0B;AAAA,MACtE;AACA,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,MAAM,MAAM;AACpB,YAAM,QAAQ,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,MAAM;AACnD,YAAM,SAAS,oBAAI,IAAoB;AACvC,iBAAW,KAAK,OAAO;AACrB,cAAM,OAAQ,EAAsC;AACpD,cAAM,MAAM,OAAO,IAAI,IAAI,KAAK,CAAC;AACjC,YAAI,KAAK,CAAC;AACV,eAAO,IAAI,MAAM,GAAG;AAAA,MACtB;AACA,iBAAW,CAAC,MAAM,GAAG,KAAK,QAAQ;AAChC,cAAM,OAAO,IAAI,OAAO,CAAC,MAAM,EAAE,WAAW,OAAO;AACnD,YAAI,KAAK,UAAU,KAAK,KAAK,WAAW,IAAI,QAAQ;AAClD,iBAAO;AAAA,YACL,cAAc;AAAA,YACd,QAAQ,GAAG,KAAK,MAAM,gCAAgC,IAAI;AAAA,YAC1D,eAAe,KAAK,KAAK,SAAS,CAAC,EAAG;AAAA,UACxC;AAAA,QACF;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,KAAK,MAAM,MAAM;AACzB,UAAI,IAAI,SAAS,SAAS,MAAO,QAAO;AACxC,YAAM,oBAAoB,MAAM;AAAA,QAC9B,CAAC,MACC,EAAE,SAAS,WACV,EAAE,YAAY,mBAA0C,UACxD,EAAE,YAAY,iBAA4B;AAAA,MAC/C;AACA,YAAM,QAAQ,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,MAAM;AACnD,UAAI,qBAAqB,MAAM,WAAW,GAAG;AAC3C,eAAO;AAAA,UACL,cAAc;AAAA,UACd,QAAQ;AAAA,QACV;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,MAAM,MAAM;AACpB,YAAM,QAAQ,MAAM;AAAA,QAClB,CAAC,MACC,EAAE,SAAS,WACV,EAAuC,cAAc,YACrD,EAAuC,QAAQ;AAAA,MACpD;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,eAAe,MAAM;AAAA,MACvB,IACA;AAAA,IACN;AAAA,EACF;AACF;AAEA,SAAS,oBAAoB,SAAkC,QAAyB;AACtF,MAAI,WAAW,wBAAwB,YAAY,QAAQ,kBAAkB,EAAE,SAAS;AACtF,WAAO;AACT,SAAO,gBAAgB,OAAO,EAAE,KAAK,CAAC,SAAS,KAAK,WAAW,MAAM;AACvE;AAEA,SAAS,iBAAiB,SAA2C;AACnE,MAAI,YAAY,QAAQ,aAAa,EAAE,SAAS,EAAG,QAAO;AAC1D,SAAO,gBAAgB,OAAO,EAAE;AAAA,IAC9B,CAAC,SAAS,MAAM,QAAQ,KAAK,aAAa,KAAK,KAAK,cAAc,SAAS;AAAA,EAC7E;AACF;AAEA,SAAS,gBAAgB,SAAkE;AACzF,SAAO;AAAA,IACL,GAAG,QAAQ,QAAQ,OAAO;AAAA,IAC1B,GAAG,QAAQ,QAAQ,eAAe;AAAA,IAClC,GAAG,QAAQ,QAAQ,KAAK;AAAA,EAC1B;AACF;AAEA,SAAS,QAAQ,OAAgD;AAC/D,MAAI,CAAC,MAAM,QAAQ,KAAK,EAAG,QAAO,CAAC;AACnC,SAAO,MAAM;AAAA,IACX,CAAC,SACC,QAAQ,IAAI,KAAK,OAAO,SAAS,YAAY,CAAC,MAAM,QAAQ,IAAI;AAAA,EACpE;AACF;AAEA,SAAS,YAAY,OAA0B;AAC7C,SAAO,MAAM,QAAQ,KAAK,IACtB,MAAM,OAAO,CAAC,SAAyB,OAAO,SAAS,QAAQ,IAC/D,CAAC;AACP;AAGO,SAAS,gBACd,KACA,QAAuB,eACA;AACvB,MAAI,IAAI,IAAI,SAAS,SAAS,SAAS,IAAI,IAAI,WAAW,aAAa;AACrE,WAAO,EAAE,cAAc,WAAW,QAAQ,qDAAqD;AAAA,EACjG;AACA,aAAW,QAAQ,OAAO;AACxB,UAAM,MAAM,KAAK,MAAM,GAAG;AAC1B,QAAI,IAAK,QAAO;AAAA,EAClB;AACA,SAAO,EAAE,cAAc,WAAW,QAAQ,sDAAsD;AAClG;;;ACtaA,eAAsB,mBACpB,OACA,UAA8D,CAAC,GAChC;AAC/B,QAAM,QAAQ,QAAQ,SAAS;AAC/B,QAAM,UAAU,QAAQ,kBAAkB;AAC1C,QAAM,OAAO,MAAM,MAAM,SAAS;AAGlC,QAAM,WAAW,oBAAI,IAAyB;AAC9C,MAAI,gBAAgB;AAEpB,aAAW,OAAO,MAAM;AACtB,QAAI,IAAI,WAAW,eAAe,IAAI,SAAS,SAAS,MAAO;AAC/D;AACA,UAAM,QAAQ,MAAM,MAAM,MAAM,EAAE,OAAO,IAAI,MAAM,CAAC;AACpD,UAAM,SAAS,MAAM,MAAM,OAAO,EAAE,OAAO,IAAI,MAAM,CAAC;AACtD,UAAM,MAAM,gBAAgB,EAAE,KAAK,OAAO,OAAO,GAAG,KAAK;AAEzD,QAAI;AACJ,QAAI;AACJ,QAAI;AACJ,QAAI,IAAI,eAAe;AACrB,YAAM,OAAO,MAAM,KAAK,CAAC,MAAM,EAAE,WAAW,IAAI,aAAa;AAC7D,UAAI,MAAM,SAAS,QAAQ;AACzB,mBAAW,KAAK;AAChB,oBAAY,QAAQ,KAAK,IAAI,EAAE,MAAM,GAAG,EAAE;AAAA,MAC5C,WAAW,MAAM,SAAS,SAAS;AACjC,oBAAY,KAAK;AAAA,MACnB;AAAA,IACF;AAEA,QAAI,CAAC,UAAU;AACb,YAAM,KAAK,MAAM,UAAU,OAAO,IAAI,KAAK;AAC3C,YAAM,UAAU,GAAG,OAAO,CAAC,MAAM,EAAE,WAAW,OAAO,EAAE,IAAI;AAC3D,UAAI,SAAS;AACX,mBAAW,QAAQ;AACnB,oBAAY,QAAQ,QAAQ,IAAI,EAAE,MAAM,GAAG,EAAE;AAAA,MAC/C;AAAA,IACF;AAIA,QAAI,CAAC,WAAW;AACd,YAAM,QAAQ,MAAM,KAAK,CAAC,MAAM,EAAE,SAAS,WAAW,OAAO,EAAE,cAAc,QAAQ;AACrF,UAAI,OAAO,SAAS,QAAS,aAAY,MAAM;AAAA,IACjD;AAEA,UAAM,MAAM,GAAG,IAAI,YAAY,IAAI,YAAY,EAAE,IAAI,aAAa,EAAE,IAAI,aAAa,EAAE;AACvF,QAAI,UAAU,SAAS,IAAI,GAAG;AAC9B,QAAI,CAAC,SAAS;AACZ,gBAAU;AAAA,QACR,cAAc,IAAI;AAAA,QAClB;AAAA,QACA;AAAA,QACA;AAAA,QACA,UAAU;AAAA,QACV,aAAa,CAAC;AAAA,QACd,cAAc,IAAI;AAAA,QAClB,cAAc,kBAAkB,KAAK,KAAK,IAAI;AAAA,MAChD;AACA,eAAS,IAAI,KAAK,OAAO;AAAA,IAC3B;AACA,YAAQ;AACR,QAAI,CAAC,QAAQ,YAAY,SAAS,IAAI,UAAU,EAAG,SAAQ,YAAY,KAAK,IAAI,UAAU;AAAA,EAC5F;AAEA,QAAM,MAAM,CAAC,GAAG,SAAS,OAAO,CAAC,EAC9B,OAAO,CAAC,MAAM,EAAE,YAAY,OAAO,EACnC,KAAK,CAAC,GAAG,MAAM,EAAE,WAAW,EAAE,QAAQ;AAEzC,SAAO,EAAE,UAAU,KAAK,eAAe,WAAW,KAAK,OAAO;AAChE;AAEA,SAAS,kBAAkB,OAAmC;AAC5D,QAAM,UAAU,MAAM,KAAK,CAAC,MAAM,EAAE,WAAW,OAAO;AACtD,SAAO,SAAS;AAClB;;;AC9EA,eAAsB,sBACpB,OACA,OACA,UAA0B,CAAC,GACF;AACzB,QAAM,QAAQ,MAAM,UAAU,OAAO,KAAK;AAC1C,MAAI,MAAM,WAAW,GAAG;AACtB,WAAO,EAAE,OAAO,YAAY,GAAG,QAAQ,CAAC,GAAG,WAAW,GAAG,eAAe,GAAG,WAAW,EAAE;AAAA,EAC1F;AAEA,QAAM,SAAoC,CAAC;AAC3C,MAAI,cAAc;AAClB,MAAI,kBAAkB;AACtB,QAAM,cAAc,CAAC,GAAG,KAAK,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,YAAY,EAAE,SAAS;AACvE,QAAM,iBAAiB,oBAAI,IAAY;AAGvC,aAAW,KAAK,aAAa;AAC3B,UAAM,OAAQ,OAAO,EAAE,QAAQ,MAAM,EAAE,OAAO,GAAG,QAAQ,GAAG,cAAc,GAAG,YAAY,EAAE;AAC3F,SAAK,SAAS;AACd,QAAI,EAAE,WAAW,SAAS;AACxB,WAAK,UAAU;AACf,qBAAe;AAAA,IACjB;AACA,QAAI,OAAO,EAAE,cAAc,SAAU,MAAK,gBAAgB,EAAE;AAC5D,UAAM,MAAM,GAAG,EAAE,QAAQ,IAAI,QAAQ,EAAE,IAAI,CAAC;AAC5C,QAAI,eAAe,IAAI,GAAG,GAAG;AAC3B,WAAK,cAAc;AACnB,yBAAmB;AAAA,IACrB;AACA,mBAAe,IAAI,GAAG;AAAA,EACxB;AAEA,aAAW,QAAQ,OAAO,OAAO,MAAM,GAAG;AACxC,SAAK,eAAe,KAAK,QAAQ,IAAI,KAAK,eAAe,KAAK,QAAQ;AAAA,EACxE;AAGA,MAAI,qBAAqB;AACzB,MAAI,kBAAkB;AACtB,aAAW,CAAC,EAAE,GAAG,KAAK,QAAQ,aAAa,CAAC,MAAM,EAAE,QAAQ,GAAG;AAC7D,aAAS,IAAI,GAAG,IAAI,IAAI,QAAQ,KAAK;AACnC,UAAI,IAAI,CAAC,EAAG,WAAW,QAAS;AAChC,4BAAsB;AACtB,UAAI,IAAI,IAAI,CAAC,EAAG,oBAAmB;AAAA,IACrC;AAAA,EACF;AACA,QAAM,YAAY,qBAAqB,IAAI,kBAAkB,qBAAqB;AAElF,MAAI;AACJ,MAAI,QAAQ,iBAAiB;AAC3B,UAAM,UAAU,YAAY,OAAO,CAAC,MAAM,EAAE,UAAU,QAAQ,eAAgB;AAC9E,QAAI,QAAQ,SAAS,GAAG;AACtB,0BACE,QAAQ,OAAO,CAAC,MAAM,QAAQ,gBAAiB,EAAE,MAAM,CAAC,EAAE,SAAS,QAAQ;AAAA,IAC/E;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA,YAAY,YAAY;AAAA,IACxB;AAAA,IACA,WAAW,cAAc,YAAY;AAAA,IACrC,eAAe,kBAAkB,YAAY;AAAA,IAC7C;AAAA,IACA;AAAA,EACF;AACF;;;ACxCO,SAAS,kBACd,SACA,UAA2B,CAAC,GACZ;AAChB,QAAM,kBAAkB,QAAQ,mBAAmB;AACnD,QAAM,QAAQ,QAAQ,SAAS;AAC/B,QAAM,cAAc,QAAQ,uBAAuB;AAEnD,QAAM,UAA2B,QAAQ,IAAI,CAAC,MAAM;AAClD,QAAI,EAAE,SAAS,SAAS,KAAK,EAAE,UAAU,SAAS,GAAG;AACnD,YAAM,IAAI,MAAM,yDAAoD,EAAE,MAAM,GAAG;AAAA,IACjF;AACA,UAAM,QAAQ,KAAK,EAAE,QAAQ;AAC7B,UAAM,QAAQ,KAAK,EAAE,SAAS;AAC9B,UAAM,QAAQ,QAAQ;AACtB,UAAM,IAAI,QAAQ,EAAE,UAAU,EAAE,SAAS;AACzC,UAAM,EAAE,GAAG,IAAI,EAAE,IAAI,YAAY,EAAE,UAAU,EAAE,SAAS;AAKxD,UAAM,cAAc,IAAI,EAAE,QAAQ;AAClC,UAAM,eAAe,IAAI,EAAE,SAAS;AACpC,UAAM,iBAAiB,cAAc,KAAK,IAAI,KAAK,IAAI,KAAK,GAAG,IAAI,KAAK;AACxE,UAAM,kBAAkB,eAAe,KAAK,IAAI,KAAK,IAAI,KAAK,GAAG,IAAI,KAAK;AAC1E,UAAM,SAAS,kBAAkB;AACjC,UAAM,cAAc,KAAK,IAAI,aAAa,YAAY;AAEtD,QAAI;AACJ,QAAI,CAAC,QAAQ;AACX,gBAAU;AAAA,IACZ,WAAW,IAAI,SAAS,KAAK,IAAI,CAAC,KAAK,iBAAiB;AACtD,YAAM,oBAAoB,EAAE,iBAAiB,QAAQ,IAAI,QAAQ;AACjE,gBAAU,oBAAoB,aAAa;AAAA,IAC7C,OAAO;AACL,gBAAU;AAAA,IACZ;AAEA,WAAO;AAAA,MACL,QAAQ,EAAE;AAAA,MACV,cAAc;AAAA,MACd,eAAe;AAAA,MACf;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,SAAS;AAAA,MACT,QAAQ;AAAA,MACR;AAAA,MACA,KAAK;AAAA,MACL;AAAA,IACF;AAAA,EACF,CAAC;AAED,SAAO;AAAA,IACL;AAAA,IACA,eAAe,QAAQ,KAAK,CAAC,MAAM,EAAE,YAAY,WAAW;AAAA,IAC5D,aAAa,QAAQ,KAAK,CAAC,MAAM,EAAE,YAAY,UAAU;AAAA,EAC3D;AACF;AAEA,SAAS,KAAK,IAAsB;AAClC,SAAO,GAAG,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,GAAG;AAC5C;AAGO,SAAS,IAAI,IAAsB;AACxC,MAAI,GAAG,WAAW,EAAG,QAAO;AAC5B,QAAM,SAAS,CAAC,GAAG,EAAE,EAAE,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAC3C,QAAM,IAAI,CAAC,MAAc;AACvB,UAAM,MAAM,KAAK,OAAO,SAAS;AACjC,UAAM,KAAK,KAAK,MAAM,GAAG;AACzB,UAAM,KAAK,KAAK,KAAK,GAAG;AACxB,WAAO,OAAO,EAAE,KAAM,OAAO,EAAE,IAAK,OAAO,EAAE,MAAO,MAAM;AAAA,EAC5D;AACA,SAAO,EAAE,IAAI,IAAI,EAAE,IAAI;AACzB;AAOO,SAAS,YAAY,GAAa,GAAmD;AAC1F,MAAI,EAAE,SAAS,KAAK,EAAE,SAAS,EAAG,QAAO,EAAE,GAAG,GAAG,IAAI,GAAG,GAAG,EAAE;AAC7D,QAAM,KAAK,KAAK,CAAC;AACjB,QAAM,KAAK,KAAK,CAAC;AACjB,QAAM,KAAK,SAAS,GAAG,EAAE;AACzB,QAAM,KAAK,SAAS,GAAG,EAAE;AACzB,QAAM,YAAY,KAAK,EAAE,SAAS,KAAK,EAAE;AACzC,MAAI,cAAc,EAAG,QAAO,EAAE,GAAG,OAAO,KAAK,IAAI,UAAU,IAAI,GAAG,GAAG,OAAO,KAAK,IAAI,EAAE;AACvF,QAAM,KAAK,KAAK,MAAM,KAAK,KAAK,SAAS;AACzC,QAAM,KACH,YAAY,cACX,KAAK,EAAE,WAAW,KAAK,EAAE,SAAS,MAAM,KAAK,EAAE,WAAW,KAAK,EAAE,SAAS;AAC9E,QAAM,IAAI,KAAK,IAAI,YAAY,KAAK,IAAI,CAAC,GAAG,EAAE;AAC9C,SAAO,EAAE,GAAG,IAAI,EAAE;AACpB;AAEA,SAAS,SAAS,IAAc,GAAmB;AACjD,SAAO,GAAG,OAAO,CAAC,KAAK,MAAM,OAAO,IAAI,MAAM,GAAG,CAAC,KAAK,GAAG,SAAS;AACrE;AAGA,SAAS,YAAY,GAAW,IAAoB;AAClD,MAAI,MAAM,EAAG,QAAO;AACpB,MAAI,KAAK,IAAK,QAAO,UAAU,CAAC;AAChC,QAAM,IAAI,MAAM,KAAK,IAAI;AACzB,QAAM,KAAK,eAAe,GAAG,KAAK,GAAG,GAAG;AACxC,SAAO,KAAK,IAAI,IAAI,MAAM,KAAK,MAAM;AACvC;AAEA,SAAS,eAAe,GAAW,GAAW,GAAmB;AAC/D,MAAI,KAAK,EAAG,QAAO;AACnB,MAAI,KAAK,EAAG,QAAO;AACnB,QAAM,SAAS,QAAQ,CAAC,IAAI,QAAQ,CAAC,IAAI,QAAQ,IAAI,CAAC;AACtD,QAAM,QAAQ,KAAK,IAAI,KAAK,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,IAAI,CAAC,IAAI,IAAI,MAAM,IAAI;AACzE,MAAI,IAAI;AACR,MAAI,IAAI,KAAM,IAAI,KAAK,KAAM,IAAI;AACjC,MAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,MAAI,IAAI;AACR,MAAI,IAAI;AACR,WAAS,IAAI,GAAG,KAAK,KAAK,KAAK;AAC7B,UAAM,KAAK,IAAI;AACf,QAAI,MAAO,KAAK,IAAI,KAAK,MAAO,IAAI,KAAK,MAAM,IAAI;AACnD,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI;AACR,SAAK,IAAI;AACT,UAAM,GAAG,IAAI,MAAM,IAAI,IAAI,KAAK,OAAO,IAAI,OAAO,IAAI,KAAK;AAC3D,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI;AACR,UAAM,QAAQ,IAAI;AAClB,SAAK;AACL,QAAI,KAAK,IAAI,QAAQ,CAAC,IAAI,KAAM;AAAA,EAClC;AACA,SAAO,QAAQ;AACjB;AAEA,SAAS,QAAQ,GAAmB;AAClC,QAAM,QAAQ;AAAA,IACZ;AAAA,IAAqB;AAAA,IAAmB;AAAA,IAAqB;AAAA,IAC7D;AAAA,IAAqB;AAAA,IAAoB;AAAA,IAAsB;AAAA,IAC/D;AAAA,EACF;AACA,MAAI,IAAI,IAAK,QAAO,KAAK,IAAI,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,QAAQ,IAAI,CAAC;AAC7E,OAAK;AACL,MAAI,IAAI,MAAM,CAAC;AACf,WAAS,IAAI,GAAG,IAAI,GAAG,IAAK,MAAK,MAAM,CAAC,KAAM,IAAI;AAClD,QAAM,IAAI,IAAI;AACd,SAAO,MAAM,KAAK,IAAI,IAAI,KAAK,EAAE,KAAK,IAAI,OAAO,KAAK,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,CAAC;AAC/E;AAEA,SAAS,UAAU,GAAmB;AACpC,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,IAAI;AACV,QAAM,OAAO,IAAI,IAAI,KAAK;AAC1B,QAAM,OAAO,KAAK,IAAI,CAAC;AACvB,QAAM,IAAI,KAAK,IAAI,IAAI;AACvB,QAAM,IAAI,QAAQ,KAAK,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,IAAI,KAAK,IAAK,CAAC,OAAO,OAAQ,CAAC;AAC9F,SAAO,OAAO,IAAI,OAAO;AAC3B;","names":[]}