@tangle-network/agent-eval 0.23.1 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/CHANGELOG.md +145 -0
  2. package/README.md +212 -79
  3. package/dist/baseline-4R5deP0N.d.ts +108 -0
  4. package/dist/benchmarks/index.d.ts +3 -2
  5. package/dist/benchmarks/index.js +1 -1
  6. package/dist/builder-eval/index.d.ts +249 -0
  7. package/dist/builder-eval/index.js +391 -0
  8. package/dist/builder-eval/index.js.map +1 -0
  9. package/dist/{chunk-IOXMGMHQ.js → chunk-2A5XJB43.js} +142 -318
  10. package/dist/chunk-2A5XJB43.js.map +1 -0
  11. package/dist/chunk-47X6LRCE.js +76 -0
  12. package/dist/chunk-47X6LRCE.js.map +1 -0
  13. package/dist/{chunk-6M774GY6.js → chunk-4F5DQN55.js} +1 -1
  14. package/dist/chunk-4F5DQN55.js.map +1 -0
  15. package/dist/{chunk-KAO3Q65R.js → chunk-4S4BM3QQ.js} +15 -13
  16. package/dist/chunk-4S4BM3QQ.js.map +1 -0
  17. package/dist/chunk-5BKGXME7.js +65 -0
  18. package/dist/chunk-5BKGXME7.js.map +1 -0
  19. package/dist/{chunk-6KQG5HAH.js → chunk-5LBB5B3Z.js} +376 -72
  20. package/dist/chunk-5LBB5B3Z.js.map +1 -0
  21. package/dist/{chunk-42I2QC2L.js → chunk-6QDKWHLS.js} +18 -14
  22. package/dist/chunk-6QDKWHLS.js.map +1 -0
  23. package/dist/{chunk-VQQSPGSM.js → chunk-EDUKQ5AM.js} +247 -189
  24. package/dist/chunk-EDUKQ5AM.js.map +1 -0
  25. package/dist/chunk-I4MBDTY5.js +272 -0
  26. package/dist/chunk-I4MBDTY5.js.map +1 -0
  27. package/dist/chunk-JLZQWFV3.js +618 -0
  28. package/dist/chunk-JLZQWFV3.js.map +1 -0
  29. package/dist/chunk-K2TPS5LB.js +569 -0
  30. package/dist/chunk-K2TPS5LB.js.map +1 -0
  31. package/dist/chunk-KKHDIONI.js +414 -0
  32. package/dist/chunk-KKHDIONI.js.map +1 -0
  33. package/dist/chunk-KMPRBJK4.js +74 -0
  34. package/dist/chunk-KMPRBJK4.js.map +1 -0
  35. package/dist/{chunk-QUKKGHTZ.js → chunk-KTGTIOFD.js} +6 -3
  36. package/dist/chunk-KTGTIOFD.js.map +1 -0
  37. package/dist/chunk-LSH4MMOZ.js +838 -0
  38. package/dist/chunk-LSH4MMOZ.js.map +1 -0
  39. package/dist/chunk-NG236HPC.js +57 -0
  40. package/dist/chunk-NG236HPC.js.map +1 -0
  41. package/dist/{chunk-QBW3YBTR.js → chunk-NLMNWKVM.js} +14 -6
  42. package/dist/chunk-NLMNWKVM.js.map +1 -0
  43. package/dist/chunk-NU65VQ7M.js +99 -0
  44. package/dist/chunk-NU65VQ7M.js.map +1 -0
  45. package/dist/chunk-OWLAAMME.js +250 -0
  46. package/dist/chunk-OWLAAMME.js.map +1 -0
  47. package/dist/{chunk-SQQLHODJ.js → chunk-PC4UYEBM.js} +7 -4
  48. package/dist/chunk-PC4UYEBM.js.map +1 -0
  49. package/dist/{chunk-7EAUOUQS.js → chunk-RAF443UI.js} +213 -115
  50. package/dist/chunk-RAF443UI.js.map +1 -0
  51. package/dist/chunk-RZTMDUO7.js +49 -0
  52. package/dist/chunk-RZTMDUO7.js.map +1 -0
  53. package/dist/{chunk-EXGR4XEM.js → chunk-SESZDQPX.js} +23 -19
  54. package/dist/chunk-SESZDQPX.js.map +1 -0
  55. package/dist/{chunk-5IIQKMD5.js → chunk-TVVP3ZZQ.js} +14 -4
  56. package/dist/chunk-TVVP3ZZQ.js.map +1 -0
  57. package/dist/chunk-WWYCWKUM.js +196 -0
  58. package/dist/chunk-WWYCWKUM.js.map +1 -0
  59. package/dist/{chunk-AXHNWLIX.js → chunk-YRZ4M5GS.js} +2 -90
  60. package/dist/chunk-YRZ4M5GS.js.map +1 -0
  61. package/dist/chunk-ZN274SWR.js +613 -0
  62. package/dist/chunk-ZN274SWR.js.map +1 -0
  63. package/dist/cli.js +10 -6
  64. package/dist/cli.js.map +1 -1
  65. package/dist/{control-DvkH87qJ.d.ts → control-CBShYYA6.d.ts} +32 -33
  66. package/dist/control-runtime-BuJHoLg0.d.ts +180 -0
  67. package/dist/control.d.ts +8 -6
  68. package/dist/control.js +10 -7
  69. package/dist/{dataset-B9qvlm_o.d.ts → dataset-CiK_3LDr.d.ts} +5 -2
  70. package/dist/{emitter-B2XqDKFU.d.ts → emitter-DP_cSSiw.d.ts} +1 -1
  71. package/dist/errors-BZ9sTdz7.d.ts +70 -0
  72. package/dist/failure-cluster-C2EGSDiT.d.ts +76 -0
  73. package/dist/feedback-trajectory-DfFdrraJ.d.ts +169 -0
  74. package/dist/governance/index.d.ts +5 -0
  75. package/dist/governance/index.js +18 -0
  76. package/dist/governance/index.js.map +1 -0
  77. package/dist/{index-DDTlbHEK.d.ts → index--fVrWDiR.d.ts} +1 -1
  78. package/dist/index-Oj9fAPPN.d.ts +270 -0
  79. package/dist/index.d.ts +2018 -3003
  80. package/dist/index.js +7443 -9102
  81. package/dist/index.js.map +1 -1
  82. package/dist/{integrity-Cr5YodSY.d.ts → integrity-DK2EBVZC.d.ts} +4 -3
  83. package/dist/knowledge/index.d.ts +102 -0
  84. package/dist/knowledge/index.js +18 -0
  85. package/dist/knowledge/index.js.map +1 -0
  86. package/dist/meta-eval/index.d.ts +99 -0
  87. package/dist/meta-eval/index.js +324 -0
  88. package/dist/meta-eval/index.js.map +1 -0
  89. package/dist/multi-layer-verifier-LkP3LVKj.d.ts +141 -0
  90. package/dist/openapi.json +491 -1
  91. package/dist/optimization.d.ts +11 -8
  92. package/dist/optimization.js +11 -9
  93. package/dist/outcome-store-D6KWmYvj.d.ts +63 -0
  94. package/dist/pipelines/index.d.ts +172 -0
  95. package/dist/pipelines/index.js +345 -0
  96. package/dist/pipelines/index.js.map +1 -0
  97. package/dist/prm/index.d.ts +99 -0
  98. package/dist/prm/index.js +222 -0
  99. package/dist/prm/index.js.map +1 -0
  100. package/dist/query-DODUYdPg.d.ts +30 -0
  101. package/dist/release-report-BNgMdqPF.d.ts +292 -0
  102. package/dist/replay-BL96gCEP.d.ts +226 -0
  103. package/dist/reporting.d.ts +10 -295
  104. package/dist/reporting.js +10 -6
  105. package/dist/{eval-campaign-Ds5QljIh.d.ts → researcher-BPT8x_NT.d.ts} +148 -146
  106. package/dist/rl.d.ts +1762 -8
  107. package/dist/rl.js +2035 -58
  108. package/dist/rl.js.map +1 -1
  109. package/dist/rubric-D5tjHNJQ.d.ts +72 -0
  110. package/dist/rubric-predictive-validity-C0uDYwG6.d.ts +105 -0
  111. package/dist/{run-record-DNiOMBrZ.d.ts → run-record-CqzahIbx.d.ts} +4 -1
  112. package/dist/sequential-Dgz1n51-.d.ts +139 -0
  113. package/dist/{store-u47QaJ9G.d.ts → store-Db2Bv8Cf.d.ts} +1 -1
  114. package/dist/{summary-report-Ce1r4EYo.d.ts → summary-report-C7VPYEj2.d.ts} +3 -76
  115. package/dist/telemetry/file.js +4 -1
  116. package/dist/telemetry/file.js.map +1 -1
  117. package/dist/telemetry/index.js +57 -57
  118. package/dist/telemetry/index.js.map +1 -1
  119. package/dist/test-graded-scenario-B2kWEdh9.d.ts +146 -0
  120. package/dist/traces.d.ts +142 -387
  121. package/dist/traces.js +1302 -40
  122. package/dist/traces.js.map +1 -1
  123. package/dist/trajectory-CnoBo-JY.d.ts +32 -0
  124. package/dist/wire/index.d.ts +369 -25
  125. package/dist/wire/index.js +22 -3
  126. package/package.json +44 -18
  127. package/dist/chunk-42I2QC2L.js.map +0 -1
  128. package/dist/chunk-5IIQKMD5.js.map +0 -1
  129. package/dist/chunk-6KQG5HAH.js.map +0 -1
  130. package/dist/chunk-6M774GY6.js.map +0 -1
  131. package/dist/chunk-7EAUOUQS.js.map +0 -1
  132. package/dist/chunk-AXHNWLIX.js.map +0 -1
  133. package/dist/chunk-EXGR4XEM.js.map +0 -1
  134. package/dist/chunk-IOXMGMHQ.js.map +0 -1
  135. package/dist/chunk-KAO3Q65R.js.map +0 -1
  136. package/dist/chunk-LZKIOBG2.js +0 -2026
  137. package/dist/chunk-LZKIOBG2.js.map +0 -1
  138. package/dist/chunk-QBW3YBTR.js.map +0 -1
  139. package/dist/chunk-QUKKGHTZ.js.map +0 -1
  140. package/dist/chunk-SQQLHODJ.js.map +0 -1
  141. package/dist/chunk-V5QSWN7L.js +0 -1310
  142. package/dist/chunk-V5QSWN7L.js.map +0 -1
  143. package/dist/chunk-VQQSPGSM.js.map +0 -1
  144. package/dist/chunk-XPHOZPOM.js +0 -1947
  145. package/dist/chunk-XPHOZPOM.js.map +0 -1
  146. package/dist/feedback-trajectory-c43WGtTX.d.ts +0 -346
  147. package/dist/index-ekBXweiQ.d.ts +0 -1894
  148. package/dist/sequential-DgU2mFsE.d.ts +0 -304
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/wire/schemas.ts","../src/wire/rubrics.ts","../src/wire/handlers.ts","../src/wire/openapi.ts","../src/wire/rpc.ts","../src/wire/server.ts"],"sourcesContent":["/**\n * Wire-protocol schemas.\n *\n * These Zod schemas are the contract between the agent-eval runtime and\n * any non-TypeScript client (Python, Rust, Go, …). They get rendered to\n * OpenAPI by `wire/openapi.ts` and code-generators consume that spec to\n * produce typed clients in other languages.\n *\n * Rule: if it's not in this file, it isn't on the wire. Keep names and\n * shapes self-explanatory — every field has a `.describe()` so the\n * generated docs are useful without reading the source.\n */\nimport { extendZodWithOpenApi } from '@asteasolutions/zod-to-openapi'\nimport { z } from 'zod'\n\nextendZodWithOpenApi(z)\n\n// ── Building blocks ─────────────────────────────────────────────────\n\nexport const RubricDimensionSchema = z\n .object({\n id: z\n .string()\n .min(1)\n .describe('Short stable id like \"buyer_quality\" — used as the key in scoring output.'),\n description: z\n .string()\n .min(1)\n .describe('One-line plain-English meaning. Read by humans reviewing low scores.'),\n weight: z\n .number()\n .min(0)\n .default(1)\n .describe('Relative weight in the composite score. Default 1; 0 disables.'),\n min: z.number().default(0).describe('Lower bound of valid score for this dimension.'),\n max: z.number().default(1).describe('Upper bound of valid score for this dimension.'),\n })\n .openapi('RubricDimension')\n\nexport const FailureModeSchema = z\n .object({\n id: z.string().min(1).describe('Short stable id like \"ai-cadence\" — used in detection lists.'),\n description: z.string().min(1).describe('Plain-English description of the failure pattern.'),\n })\n .openapi('FailureMode')\n\n// ── Rubric ──────────────────────────────────────────────────────────\n\nexport const RubricSchema = z\n .object({\n name: z\n .string()\n .min(1)\n .describe('Stable name like \"anti-slop\" — used by clients to invoke this rubric.'),\n description: z\n .string()\n .min(1)\n .describe('What this rubric measures. Shown in /v1/rubrics listing.'),\n systemPrompt: z\n .string()\n .min(1)\n .describe(\n 'Instructs the judging LLM. Should explain the persona (e.g. \"senior engineer reviewing voice\"), what to score on, and what to return.',\n ),\n dimensions: z\n .array(RubricDimensionSchema)\n .min(1)\n .describe('Scoring axes. The composite score is a weighted sum of these.'),\n failureModes: z\n .array(FailureModeSchema)\n .default([])\n .describe('Patterns to detect; each detected mode appears in the result.failureModes list.'),\n wins: z\n .array(FailureModeSchema)\n .default([])\n .describe('Positive patterns; each detected one appears in the result.wins list.'),\n })\n .openapi('Rubric')\n\n// ── Judge call ──────────────────────────────────────────────────────\n\nexport const JudgeRequestSchema = z\n .object({\n rubricName: z\n .string()\n .optional()\n .describe('Use a built-in rubric by name. Mutually exclusive with `rubric`.'),\n rubric: RubricSchema.optional().describe(\n 'Inline rubric definition. Mutually exclusive with `rubricName`.',\n ),\n content: z\n .string()\n .min(1)\n .describe('The text being judged — a tweet, a blog post, a code snippet, anything stringly.'),\n context: z\n .record(z.string(), z.unknown())\n .optional()\n .describe(\n 'Free-form metadata for the rubric to use — analytics, source URL, author, etc. Surfaced to the LLM.',\n ),\n model: z\n .string()\n .optional()\n .describe('Override the judge model (default routes via tcloud). e.g. \"claude-opus-4-7\".'),\n })\n .refine((v) => Boolean(v.rubricName) !== Boolean(v.rubric), {\n message: 'Provide exactly one of `rubricName` or `rubric`.',\n })\n .openapi('JudgeRequest')\n\nexport const JudgeResultSchema = z\n .object({\n composite: z\n .number()\n .min(0)\n .max(1)\n .describe('Weighted combination of dimension scores in 0..1. The single number to gate on.'),\n dimensions: z\n .record(z.string(), z.number())\n .describe('Per-dimension score, keyed by RubricDimension.id.'),\n failureModes: z\n .array(z.string())\n .default([])\n .describe('Failure-mode ids detected in the content (subset of rubric.failureModes ids).'),\n wins: z\n .array(z.string())\n .default([])\n .describe('Win ids detected in the content (subset of rubric.wins ids).'),\n rationale: z\n .string()\n .describe('Plain-English explanation of the score. Surfaced to the human reviewer.'),\n rubricVersion: z\n .string()\n .describe(\n 'Stable hash of the rubric used. Scores are only comparable across runs when this matches.',\n ),\n model: z.string().describe('Model that produced the judgement, for reproducibility.'),\n durationMs: z.number().int().nonnegative().describe('End-to-end wall time for this call.'),\n })\n .openapi('JudgeResult')\n\n// ── Rubric listing ──────────────────────────────────────────────────\n\nexport const RubricInfoSchema = z\n .object({\n name: z.string().describe('Pass this to /v1/judge as `rubricName`.'),\n description: z.string().describe('What this rubric measures.'),\n dimensions: z\n .array(z.object({ id: z.string(), description: z.string(), weight: z.number() }))\n .describe('The scoring axes this rubric uses, with weights.'),\n failureModes: z.array(z.string()).default([]).describe('Failure-mode ids this rubric detects.'),\n rubricVersion: z.string().describe('Stable hash — match this to compare scores across runs.'),\n })\n .openapi('RubricInfo')\n\nexport const ListRubricsResponseSchema = z\n .object({\n rubrics: z.array(RubricInfoSchema),\n })\n .openapi('ListRubricsResponse')\n\n// ── Version / health ────────────────────────────────────────────────\n\nexport const VersionResponseSchema = z\n .object({\n package: z.string().describe('Package name (always \"@tangle-network/agent-eval\").'),\n version: z.string().describe('Semver of the running server. Match your client to this.'),\n wireVersion: z\n .string()\n .describe(\n 'Wire-protocol semver. Bumps separately from package version when the schema changes.',\n ),\n apiSurface: z.array(z.string()).describe('List of supported method names.'),\n })\n .openapi('VersionResponse')\n\nexport const HealthResponseSchema = z\n .object({\n status: z.literal('ok'),\n uptimeSec: z.number(),\n })\n .openapi('HealthResponse')\n\n// ── Ingestion: production traces + user feedback (0.25.0) ───────────\n\n/**\n * Minimal `TraceEvent` shape that the production runtime emits.\n * Matches `trace/schema.ts` `TraceEvent` but is duplicated here as a\n * wire schema so non-TypeScript clients can validate without depending\n * on internal types.\n */\nexport const TraceEventSchema = z\n .object({\n eventId: z.string().min(1).describe('Stable id for the event. Use ULID or UUID.'),\n runId: z.string().min(1).describe('Run this event belongs to.'),\n spanId: z.string().optional().describe('Span that emitted the event, if any.'),\n kind: z\n .enum([\n 'log',\n 'error',\n 'budget_decrement',\n 'budget_breach',\n 'state_mutation',\n 'policy_violation',\n 'redaction_applied',\n 'custom',\n ])\n .describe('Coarse event category — matches the TraceSchema v1 EventKind enum.'),\n timestamp: z\n .number()\n .int()\n .nonnegative()\n .describe('Unix millis. Must be monotonically non-decreasing within a span.'),\n payload: z\n .record(z.string(), z.unknown())\n .describe('Free-form payload — the runtime owns the shape.'),\n })\n .openapi('TraceEvent')\n\nexport const TracesIngestRequestSchema = z\n .object({\n events: z\n .array(TraceEventSchema)\n .min(1)\n .max(10_000)\n .describe('Batch of events. Max 10k per call — bigger streams should be chunked.'),\n })\n .openapi('TracesIngestRequest')\n\nexport const TracesIngestResponseSchema = z\n .object({\n accepted: z.number().int().nonnegative().describe('Number of events persisted.'),\n rejected: z\n .number()\n .int()\n .nonnegative()\n .describe('Number of events the store refused — see `errors[]` for reasons.'),\n errors: z\n .array(\n z.object({\n eventId: z.string().describe('Event id this error applies to.'),\n message: z.string().describe('Why the event was rejected.'),\n }),\n )\n .default([]),\n })\n .openapi('TracesIngestResponse')\n\nexport const FeedbackLabelSchema = z\n .object({\n id: z.string().optional(),\n source: z.enum(['user', 'judge', 'environment', 'metric', 'policy', 'system']),\n kind: z.enum([\n 'approve',\n 'reject',\n 'select',\n 'edit',\n 'rank',\n 'rate',\n 'comment',\n 'metric_outcome',\n 'policy_block',\n 'revision_request',\n ]),\n value: z.unknown(),\n reason: z.string().optional(),\n severity: z.enum(['info', 'warning', 'error', 'critical']).optional(),\n createdAt: z.string().describe('ISO-8601 UTC.'),\n metadata: z.record(z.string(), z.unknown()).optional(),\n })\n .openapi('FeedbackLabel')\n\nexport const FeedbackAttemptSchema = z\n .object({\n id: z.string().min(1),\n stepIndex: z.number().int().nonnegative(),\n artifactType: z.enum([\n 'text',\n 'code',\n 'plan',\n 'research',\n 'action',\n 'ui',\n 'decision',\n 'data',\n 'other',\n ]),\n artifact: z.unknown(),\n options: z.array(z.unknown()).optional(),\n proposedAction: z\n .object({\n type: z.string(),\n risk: z.enum(['low', 'medium', 'high']).optional(),\n costUsd: z.number().optional(),\n externalSideEffect: z.boolean().optional(),\n requiresApproval: z.boolean().optional(),\n metadata: z.record(z.string(), z.unknown()).optional(),\n })\n .optional(),\n feedback: z.array(FeedbackLabelSchema).optional(),\n createdAt: z.string(),\n metadata: z.record(z.string(), z.unknown()).optional(),\n })\n .openapi('FeedbackAttempt')\n\nexport const FeedbackTrajectorySchema = z\n .object({\n id: z.string().min(1).describe('Stable id; idempotency key for the trajectory.'),\n projectId: z.string().optional(),\n scenarioId: z.string().optional(),\n task: z.object({\n intent: z.string().min(1),\n context: z.unknown().optional(),\n }),\n attempts: z.array(FeedbackAttemptSchema).default([]),\n labels: z.array(FeedbackLabelSchema).default([]),\n outcome: z\n .object({\n success: z.boolean().optional(),\n score: z.number().optional(),\n metrics: z.record(z.string(), z.number()).optional(),\n costUsd: z.number().optional(),\n detail: z.string().optional(),\n observedAt: z.string().optional(),\n metadata: z.record(z.string(), z.unknown()).optional(),\n })\n .optional(),\n split: z.enum(['train', 'dev', 'test', 'holdout']).optional(),\n tags: z.record(z.string(), z.string()).optional(),\n createdAt: z.string().describe('ISO-8601 UTC.'),\n updatedAt: z.string().optional(),\n metadata: z.record(z.string(), z.unknown()).optional(),\n })\n .openapi('FeedbackTrajectory')\n\nexport const FeedbackIngestResponseSchema = z\n .object({\n id: z.string().describe('Trajectory id that was persisted.'),\n persisted: z.boolean().describe('True when the trajectory was saved (idempotent on id).'),\n })\n .openapi('FeedbackIngestResponse')\n\nexport type TraceEvent = z.infer<typeof TraceEventSchema>\nexport type TracesIngestRequest = z.infer<typeof TracesIngestRequestSchema>\nexport type TracesIngestResponse = z.infer<typeof TracesIngestResponseSchema>\nexport type FeedbackTrajectory = z.infer<typeof FeedbackTrajectorySchema>\nexport type FeedbackIngestResponse = z.infer<typeof FeedbackIngestResponseSchema>\n\n// ── Errors ──────────────────────────────────────────────────────────\n\nexport const ErrorResponseSchema = z\n .object({\n error: z\n .object({\n code: z\n .string()\n .describe(\n 'Machine-readable code: \"validation_error\", \"rubric_not_found\", \"judge_error\".',\n ),\n message: z.string().describe('Human-readable message.'),\n details: z.unknown().optional().describe('Optional structured detail.'),\n })\n .describe('Errors are always wrapped in this shape across all endpoints.'),\n })\n .openapi('ErrorResponse')\n\n// ── Type exports for callers in the same package ────────────────────\n\nexport type RubricDimension = z.infer<typeof RubricDimensionSchema>\nexport type FailureMode = z.infer<typeof FailureModeSchema>\nexport type Rubric = z.infer<typeof RubricSchema>\nexport type JudgeRequest = z.infer<typeof JudgeRequestSchema>\nexport type JudgeResult = z.infer<typeof JudgeResultSchema>\nexport type RubricInfo = z.infer<typeof RubricInfoSchema>\nexport type ListRubricsResponse = z.infer<typeof ListRubricsResponseSchema>\nexport type VersionResponse = z.infer<typeof VersionResponseSchema>\nexport type ErrorResponse = z.infer<typeof ErrorResponseSchema>\n\n// ── Wire-protocol version ───────────────────────────────────────────\n\n/**\n * Bump on any breaking change to a request/response schema.\n * Non-breaking (additive) changes don't require a bump.\n */\nexport const WIRE_VERSION = '1.0.0'\n\n/**\n * Stable hash of a rubric. Used to make scores comparable across runs:\n * if the rubricVersion matches, the rubric was identical.\n */\nexport function hashRubric(rubric: Rubric): string {\n const stable = stableStringify(rubric)\n let h = 5381\n for (let i = 0; i < stable.length; i++) {\n h = (h * 33) ^ stable.charCodeAt(i)\n }\n // Unsigned 32-bit hex, prefixed with rubric name + version slot\n return `${rubric.name}@${(h >>> 0).toString(16).padStart(8, '0')}`\n}\n\nfunction stableStringify(value: unknown): string {\n if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(',')}]`\n if (value && typeof value === 'object') {\n const entries = Object.entries(value as Record<string, unknown>)\n .sort(([a], [b]) => a.localeCompare(b))\n .map(([key, item]) => `${JSON.stringify(key)}:${stableStringify(item)}`)\n return `{${entries.join(',')}}`\n }\n return JSON.stringify(value)\n}\n","/**\n * Built-in rubrics shipped with agent-eval.\n *\n * A rubric is a set of scoring axes plus a system prompt that tells the\n * judging LLM how to grade against those axes. Built-in rubrics are\n * curated for use cases that recur across Tangle projects — call them\n * by name from any client.\n *\n * Adding a rubric:\n * 1. Define the Rubric object below with a clear `description` and\n * named `dimensions`.\n * 2. Register it in `BUILTIN_RUBRICS` at the bottom.\n * 3. Add a test in `tests/wire/rubrics.test.ts`.\n *\n * Custom rubrics: callers pass `rubric` inline to /v1/judge instead of\n * `rubricName` — see schemas.ts.\n */\nimport type { Rubric } from './schemas'\nimport { hashRubric } from './schemas'\n\n// ── anti-slop ───────────────────────────────────────────────────────\n// Voice/style judge tuned for technical-buyer audiences. Used by the\n// Postiz autoresearch loop and any content-quality gate.\n\nconst ANTI_SLOP: Rubric = {\n name: 'anti-slop',\n description:\n 'Voice and signal quality for content aimed at senior engineers. Catches AI cadence, marketing tone, and engagement-bait shapes.',\n systemPrompt: `You are evaluating a piece of content written for senior engineers and technical founders.\n\nYou score three things:\n- buyer_quality (0..1): would a senior engineer in the target ICP find this worth their attention? High = specific, earned, technically interesting. Low = generic, hyped, off-target.\n- voice (0..1): does it read like a person who built the thing, or like AI/marketing copy?\n- signal (0..1): does it contain a non-obvious detail, constraint, or claim a reader couldn't get from the public docs?\n\nDetect failure modes (return ids matching):\n- ai-cadence: rule-of-three openings, em-dash flourish, \"Let me explain\", \"Here's the thing\", AI rhythm\n- marketing-tone: \"We're excited to announce\", \"thrilled\", \"delighted\", \"game-changer\", buzzword stack\n- vague-claim: technical claim without a specific component, file, or measurement\n- no-hook: opening doesn't earn attention from the target reader\n- engagement-bait: \"agree?\", \"thoughts?\", listicles, controversy-fishing, hook-detail-pitch\n- off-icp: content shape would attract motivational/grift/hype audiences instead of buyers\n- stale-claim: repeats a positioning line we've used many times this month\n\nDetect wins (return ids matching):\n- specific-component: names a real file, component, or measurement\n- earned-detail: shares a non-obvious detail not derivable from public docs\n- constraint-articulated: names a real tradeoff and the side chosen\n- honest-failure: describes a real failure mode and what was done about it\n\nReturn ONLY JSON matching the response schema. Be conservative — most content has 0-1 wins and 1-2 failure modes, not many of each.`,\n dimensions: [\n {\n id: 'buyer_quality',\n description: 'Would the target buyer find this worth attention?',\n weight: 0.5,\n min: 0,\n max: 1,\n },\n {\n id: 'voice',\n description: 'Does it sound like a builder, not AI or marketing?',\n weight: 0.3,\n min: 0,\n max: 1,\n },\n {\n id: 'signal',\n description: 'Non-obvious detail, constraint, or claim?',\n weight: 0.2,\n min: 0,\n max: 1,\n },\n ],\n failureModes: [\n { id: 'ai-cadence', description: 'AI-rhythm openings and transitions' },\n { id: 'marketing-tone', description: 'Buzzwords, hype, corporate-PR voice' },\n { id: 'vague-claim', description: 'Technical claim without specifics' },\n { id: 'no-hook', description: 'Opening fails to earn attention' },\n { id: 'engagement-bait', description: 'Listicle/controversy/agree-pattern' },\n { id: 'off-icp', description: 'Voice attracts the wrong audience' },\n { id: 'stale-claim', description: 'Reuses an over-used positioning line' },\n ],\n wins: [\n { id: 'specific-component', description: 'Names a real file/component/number' },\n { id: 'earned-detail', description: 'Detail not in public docs' },\n { id: 'constraint-articulated', description: 'Names a real tradeoff' },\n { id: 'honest-failure', description: 'Describes a real failure honestly' },\n ],\n}\n\n// ── Registry ────────────────────────────────────────────────────────\n\nexport const BUILTIN_RUBRICS: Record<string, Rubric> = {\n 'anti-slop': ANTI_SLOP,\n}\n\n/** Get a built-in rubric by name, or undefined. */\nexport function getBuiltinRubric(name: string): Rubric | undefined {\n return BUILTIN_RUBRICS[name]\n}\n\n/** List built-in rubrics with their stable versions. */\nexport function listBuiltinRubrics() {\n return Object.values(BUILTIN_RUBRICS).map((r) => ({\n name: r.name,\n description: r.description,\n dimensions: r.dimensions.map((d) => ({\n id: d.id,\n description: d.description,\n weight: d.weight,\n })),\n failureModes: r.failureModes.map((f) => f.id),\n rubricVersion: hashRubric(r),\n }))\n}\n","/**\n * Pure handler functions — the \"business logic\" behind every wire-protocol\n * method. The HTTP server (`server.ts`) and the stdio RPC (`rpc.ts`) both\n * call these. Tests call these directly without spinning a server.\n *\n * Each handler:\n * - Takes a parsed request (already Zod-validated by the transport).\n * - Returns a result that matches the response schema.\n * - Throws `WireError` for caller-fixable errors (404, 400, 422).\n * - Lets unexpected errors bubble — the transport maps them to 500.\n */\nimport type { FeedbackTrajectoryStore } from '../feedback-trajectory'\nimport { callLlmJson } from '../llm-client'\nimport type { TraceEvent as InternalTraceEvent } from '../trace/schema'\nimport type { TraceStore } from '../trace/store'\nimport { getBuiltinRubric, listBuiltinRubrics } from './rubrics'\nimport {\n type FeedbackIngestResponse,\n hashRubric,\n type JudgeRequest,\n type JudgeResult,\n type ListRubricsResponse,\n type Rubric,\n type TracesIngestRequest,\n type TracesIngestResponse,\n type VersionResponse,\n WIRE_VERSION,\n type FeedbackTrajectory as WireFeedbackTrajectory,\n} from './schemas'\n\n/** Caller-fixable error. The transport renders this to 4xx + ErrorResponse. */\nexport class WireError extends Error {\n constructor(\n public readonly code: string,\n message: string,\n public readonly status: number = 400,\n public readonly details?: unknown,\n ) {\n super(message)\n this.name = 'WireError'\n }\n}\n\n// ── judge ───────────────────────────────────────────────────────────\n\n/** The JSON schema we ask the judging LLM to fill in. */\nfunction judgeOutputSchema(rubric: Rubric) {\n return {\n name: 'JudgeOutput',\n schema: {\n type: 'object',\n additionalProperties: false,\n properties: {\n dimensions: {\n type: 'object',\n additionalProperties: false,\n properties: Object.fromEntries(\n rubric.dimensions.map((d) => [\n d.id,\n { type: 'number', minimum: d.min, maximum: d.max },\n ]),\n ),\n required: rubric.dimensions.map((d) => d.id),\n },\n failureModes: {\n type: 'array',\n items: { type: 'string', enum: rubric.failureModes.map((f) => f.id) },\n },\n wins: {\n type: 'array',\n items: { type: 'string', enum: rubric.wins.map((w) => w.id) },\n },\n rationale: { type: 'string' },\n },\n required: ['dimensions', 'rationale'],\n } as Record<string, unknown>,\n }\n}\n\ninterface JudgeOutput {\n dimensions: Record<string, number>\n failureModes?: string[]\n wins?: string[]\n rationale: string\n}\n\nfunction validateJudgeOutput(value: unknown, rubric: Rubric): JudgeOutput {\n if (!value || typeof value !== 'object') {\n throw new WireError('judge_error', 'Judge returned malformed output.', 500, value)\n }\n const raw = value as Record<string, unknown>\n const rawDimensions = raw.dimensions\n if (!rawDimensions || typeof rawDimensions !== 'object' || Array.isArray(rawDimensions)) {\n throw new WireError('judge_error', 'Judge returned malformed dimensions.', 500, value)\n }\n\n const dimensions: Record<string, number> = {}\n const dimensionRecord = rawDimensions as Record<string, unknown>\n for (const dim of rubric.dimensions) {\n const score = dimensionRecord[dim.id]\n if (\n typeof score !== 'number' ||\n !Number.isFinite(score) ||\n score < dim.min ||\n score > dim.max\n ) {\n throw new WireError(\n 'judge_error',\n `Judge returned invalid score for dimension \"${dim.id}\".`,\n 500,\n value,\n )\n }\n dimensions[dim.id] = score\n }\n\n const allowedFailures = new Set(rubric.failureModes.map((mode) => mode.id))\n const allowedWins = new Set(rubric.wins.map((win) => win.id))\n const failureModes = validateIdArray(raw.failureModes, allowedFailures, 'failureModes', value)\n const wins = validateIdArray(raw.wins, allowedWins, 'wins', value)\n if (typeof raw.rationale !== 'string' || raw.rationale.trim().length === 0) {\n throw new WireError('judge_error', 'Judge returned missing rationale.', 500, value)\n }\n\n return { dimensions, failureModes, wins, rationale: raw.rationale }\n}\n\nfunction validateIdArray(\n raw: unknown,\n allowed: Set<string>,\n field: 'failureModes' | 'wins',\n original: unknown,\n): string[] {\n if (raw === undefined) return []\n if (!Array.isArray(raw)) {\n throw new WireError('judge_error', `Judge returned non-array ${field}.`, 500, original)\n }\n const out: string[] = []\n for (const item of raw) {\n if (typeof item !== 'string' || !allowed.has(item)) {\n throw new WireError(\n 'judge_error',\n `Judge returned unknown ${field} id \"${String(item)}\".`,\n 500,\n original,\n )\n }\n out.push(item)\n }\n return out\n}\n\nfunction compositeScore(dimensions: Record<string, number>, rubric: Rubric): number {\n let weighted = 0\n let totalWeight = 0\n for (const dim of rubric.dimensions) {\n const raw = dimensions[dim.id] ?? 0\n const range = dim.max - dim.min || 1\n const normalized = Math.max(0, Math.min(1, (raw - dim.min) / range))\n weighted += normalized * dim.weight\n totalWeight += dim.weight\n }\n return totalWeight > 0 ? weighted / totalWeight : 0\n}\n\nfunction buildJudgePrompt(content: string, context: unknown): string {\n const ctx = context && Object.keys(context as object).length ? JSON.stringify(context) : ''\n return [\n `CONTENT TO JUDGE:`,\n content,\n '',\n ctx ? `CONTEXT (metadata, analytics, etc.):` : '',\n ctx ? ctx : '',\n ]\n .filter(Boolean)\n .join('\\n')\n}\n\nconst DEFAULT_JUDGE_MODEL = 'claude-sonnet-4-6'\n\nexport async function handleJudge(req: JudgeRequest): Promise<JudgeResult> {\n // Resolve rubric\n let rubric: Rubric\n if (req.rubricName) {\n const found = getBuiltinRubric(req.rubricName)\n if (!found) {\n throw new WireError('rubric_not_found', `No built-in rubric named \"${req.rubricName}\".`, 404)\n }\n rubric = found\n } else if (req.rubric) {\n rubric = req.rubric\n } else {\n // refine() in the schema should already have caught this — defense in depth\n throw new WireError('validation_error', 'Provide either `rubricName` or `rubric`.', 422)\n }\n\n const startedAt = Date.now()\n const model = req.model ?? DEFAULT_JUDGE_MODEL\n\n const { value, result } = await callLlmJson<JudgeOutput>({\n model,\n messages: [\n { role: 'system', content: rubric.systemPrompt },\n { role: 'user', content: buildJudgePrompt(req.content, req.context) },\n ],\n jsonSchema: judgeOutputSchema(rubric),\n temperature: 0.0,\n timeoutMs: 60_000,\n })\n\n const output = validateJudgeOutput(value, rubric)\n\n const composite = compositeScore(output.dimensions, rubric)\n const durationMs = Date.now() - startedAt\n\n return {\n composite,\n dimensions: output.dimensions,\n failureModes: output.failureModes ?? [],\n wins: output.wins ?? [],\n rationale: output.rationale,\n rubricVersion: hashRubric(rubric),\n model: result.model,\n durationMs,\n }\n}\n\n// ── listRubrics ─────────────────────────────────────────────────────\n\nexport function handleListRubrics(): ListRubricsResponse {\n return { rubrics: listBuiltinRubrics() }\n}\n\n// ── version ─────────────────────────────────────────────────────────\n\nimport { readFileSync } from 'node:fs'\nimport { dirname, resolve } from 'node:path'\nimport { fileURLToPath } from 'node:url'\n\nlet CACHED_VERSION: string | undefined\n\nfunction readPackageVersion(): string {\n if (CACHED_VERSION) return CACHED_VERSION\n // Walk up from this file looking for the nearest package.json.\n // In dist/ this is dist/.., in src/wire/ this is ../../package.json.\n const here = dirname(fileURLToPath(import.meta.url))\n const candidates = [\n resolve(here, '..', '..', 'package.json'), // src/wire → repo root\n resolve(here, '..', 'package.json'), // dist → repo root\n ]\n for (const path of candidates) {\n try {\n const pkg = JSON.parse(readFileSync(path, 'utf-8')) as { version?: string }\n if (pkg.version) {\n CACHED_VERSION = pkg.version\n return pkg.version\n }\n } catch {\n // try next\n }\n }\n return '0.0.0-unknown'\n}\n\nexport function handleVersion(): VersionResponse {\n return {\n package: '@tangle-network/agent-eval',\n version: readPackageVersion(),\n wireVersion: WIRE_VERSION,\n apiSurface: ['judge', 'listRubrics', 'version', 'feedback.ingest', 'traces.ingest'],\n }\n}\n\n// ── Ingestion handlers (0.25.0) ─────────────────────────────────────\n\n/**\n * Pluggable stores the wire layer routes ingestion writes into. Both\n * are optional — when omitted, the corresponding endpoint returns 503.\n *\n * Production deployments wire a `FileSystemTraceStore` and\n * `FileSystemFeedbackTrajectoryStore` here. Tests substitute in-memory\n * stores.\n */\nexport interface IngestionStores {\n traceStore?: TraceStore\n feedbackStore?: FeedbackTrajectoryStore\n}\n\n/**\n * `POST /v1/traces/ingest` — accept a batch of `TraceEvent`s from the\n * production runtime. Best-effort: each event is appended independently;\n * one bad event does not poison the batch.\n *\n * Idempotency: the underlying store is append-only; consumers retrying\n * the same payload will get duplicate events. Consumers should\n * de-duplicate by `eventId` downstream — production traces frequently\n * land via at-least-once buses (Kafka, SQS) where dedup is unavoidable.\n */\nexport async function handleTracesIngest(\n req: TracesIngestRequest,\n stores: IngestionStores,\n): Promise<TracesIngestResponse> {\n if (!stores.traceStore) {\n throw new WireError(\n 'service_unavailable',\n 'No trace store configured on this server. Pass `traceStore` to `createApp`.',\n 503,\n )\n }\n const errors: Array<{ eventId: string; message: string }> = []\n let accepted = 0\n for (const event of req.events) {\n try {\n // The wire `TraceEvent` is structurally identical to the internal one.\n await stores.traceStore.appendEvent(event as InternalTraceEvent)\n accepted++\n } catch (err) {\n errors.push({\n eventId: event.eventId,\n message: err instanceof Error ? err.message : String(err),\n })\n }\n }\n return { accepted, rejected: errors.length, errors }\n}\n\n/**\n * `POST /v1/feedback` — accept a single `FeedbackTrajectory` from the\n * production runtime. Idempotent on `id`: re-posting the same trajectory\n * replaces the prior record.\n */\nexport async function handleFeedbackIngest(\n req: WireFeedbackTrajectory,\n stores: IngestionStores,\n): Promise<FeedbackIngestResponse> {\n if (!stores.feedbackStore) {\n throw new WireError(\n 'service_unavailable',\n 'No feedback store configured on this server. Pass `feedbackStore` to `createApp`.',\n 503,\n )\n }\n // The wire `FeedbackTrajectory` aligns 1:1 with the internal type;\n // cast through `unknown` since the wire schema is a Zod-inferred\n // structural type with optional fields the internal store consumes.\n await stores.feedbackStore.save(req as unknown as Parameters<FeedbackTrajectoryStore['save']>[0])\n return { id: req.id, persisted: true }\n}\n","/**\n * Build an OpenAPI spec from the wire schemas.\n *\n * The spec is the contract that other-language clients (Python, Rust,\n * Go, …) generate from. There is no hand-written client — clients are\n * derived artifacts of this file plus `schemas.ts`.\n *\n * Run `pnpm openapi` (defined in package.json) to write the spec to\n * `dist/openapi.json`. CI uses that file to regenerate the Python\n * client and gate the dual-publish workflow.\n */\nimport { OpenAPIRegistry, OpenApiGeneratorV31 } from '@asteasolutions/zod-to-openapi'\nimport type { OpenAPIObject } from 'openapi3-ts/oas31'\n\nimport {\n ErrorResponseSchema,\n FeedbackIngestResponseSchema,\n FeedbackTrajectorySchema,\n HealthResponseSchema,\n JudgeRequestSchema,\n JudgeResultSchema,\n ListRubricsResponseSchema,\n TracesIngestRequestSchema,\n TracesIngestResponseSchema,\n VersionResponseSchema,\n WIRE_VERSION,\n} from './schemas'\n\nexport function buildOpenApi(packageVersion: string): OpenAPIObject {\n const registry = new OpenAPIRegistry()\n\n // Components — each schema becomes a $ref-able component\n registry.register('JudgeRequest', JudgeRequestSchema)\n registry.register('JudgeResult', JudgeResultSchema)\n registry.register('ListRubricsResponse', ListRubricsResponseSchema)\n registry.register('VersionResponse', VersionResponseSchema)\n registry.register('HealthResponse', HealthResponseSchema)\n registry.register('ErrorResponse', ErrorResponseSchema)\n registry.register('TracesIngestRequest', TracesIngestRequestSchema)\n registry.register('TracesIngestResponse', TracesIngestResponseSchema)\n registry.register('FeedbackTrajectory', FeedbackTrajectorySchema)\n registry.register('FeedbackIngestResponse', FeedbackIngestResponseSchema)\n\n // Routes\n registry.registerPath({\n method: 'post',\n path: '/v1/judge',\n summary: 'Score a piece of content against a rubric',\n description:\n 'Runs the judging LLM with the named (or inline) rubric and returns dimension scores, detected failure modes, wins, and a composite score in 0..1.',\n request: {\n body: {\n content: {\n 'application/json': { schema: JudgeRequestSchema },\n },\n },\n },\n responses: {\n 200: {\n description: 'Successful judgement',\n content: { 'application/json': { schema: JudgeResultSchema } },\n },\n 400: {\n description: 'Validation error',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 404: {\n description: 'Rubric not found',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 500: {\n description: 'Judge error',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n },\n })\n\n registry.registerPath({\n method: 'get',\n path: '/v1/rubrics',\n summary: 'List built-in rubrics',\n description:\n 'Returns every rubric registered server-side, with their dimensions and stable rubricVersion hash.',\n responses: {\n 200: {\n description: 'Listing',\n content: { 'application/json': { schema: ListRubricsResponseSchema } },\n },\n },\n })\n\n registry.registerPath({\n method: 'get',\n path: '/v1/version',\n summary: 'Server and wire-protocol version',\n description: 'Match your client version to `version`; check `wireVersion` for compatibility.',\n responses: {\n 200: {\n description: 'Version info',\n content: { 'application/json': { schema: VersionResponseSchema } },\n },\n },\n })\n\n registry.registerPath({\n method: 'get',\n path: '/healthz',\n summary: 'Liveness check',\n responses: {\n 200: {\n description: 'OK',\n content: { 'application/json': { schema: HealthResponseSchema } },\n },\n },\n })\n\n registry.registerPath({\n method: 'post',\n path: '/v1/traces/ingest',\n summary: 'Ingest a batch of production TraceEvents',\n description:\n 'Append a batch of TraceEvents to the configured TraceStore. Accepts application/json ({events:[...]}) or application/x-ndjson (one event per line). Returns counts of accepted + rejected events.',\n request: {\n body: {\n content: {\n 'application/json': { schema: TracesIngestRequestSchema },\n 'application/x-ndjson': { schema: TracesIngestRequestSchema },\n },\n },\n },\n responses: {\n 200: {\n description: 'Ingestion summary',\n content: { 'application/json': { schema: TracesIngestResponseSchema } },\n },\n 400: {\n description: 'Validation error',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 401: {\n description: 'Unauthorized (when bearer auth is configured)',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 503: {\n description: 'No trace store configured',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n },\n })\n\n registry.registerPath({\n method: 'post',\n path: '/v1/feedback',\n summary: 'Ingest a FeedbackTrajectory from production',\n description:\n 'Persist a single FeedbackTrajectory. Idempotent on trajectory.id — re-posting replaces the prior record. Used by production runtimes to forward user 👍/👎/edits into the eval substrate.',\n request: {\n body: {\n content: {\n 'application/json': { schema: FeedbackTrajectorySchema },\n },\n },\n },\n responses: {\n 200: {\n description: 'Persisted',\n content: { 'application/json': { schema: FeedbackIngestResponseSchema } },\n },\n 400: {\n description: 'Validation error',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 401: {\n description: 'Unauthorized (when bearer auth is configured)',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 503: {\n description: 'No feedback store configured',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n },\n })\n\n const generator = new OpenApiGeneratorV31(registry.definitions)\n const doc = generator.generateDocument({\n openapi: '3.1.0',\n info: {\n title: '@tangle-network/agent-eval — wire protocol',\n version: packageVersion,\n description: `HTTP and stdio RPC interface to agent-eval. The TypeScript runtime is the source of truth; this spec is the contract that cross-language clients (Python, Rust, Go) generate from.\n\nWire-protocol version: ${WIRE_VERSION}. Bumps on breaking changes to request/response schemas.`,\n contact: { name: 'Tangle Network', url: 'https://github.com/tangle-network/agent-eval' },\n license: { name: 'MIT' },\n },\n servers: [{ url: 'http://localhost:5005', description: 'Local agent-eval serve' }],\n })\n const rubricRef = { $ref: '#/components/schemas/Rubric' } as const\n const commonJudgeFields = {\n content: { type: 'string', minLength: 1 },\n context: { type: 'object', additionalProperties: true },\n model: { type: 'string' },\n } as const\n doc.components ??= {}\n doc.components.schemas ??= {}\n doc.components.schemas.JudgeRequest = {\n oneOf: [\n {\n type: 'object',\n additionalProperties: false,\n required: ['rubricName', 'content'],\n properties: {\n rubricName: { type: 'string', minLength: 1 },\n ...commonJudgeFields,\n },\n },\n {\n type: 'object',\n additionalProperties: false,\n required: ['rubric', 'content'],\n properties: {\n rubric: rubricRef,\n ...commonJudgeFields,\n },\n },\n ],\n description: 'Judge request. Provide exactly one of rubricName or rubric.',\n }\n return doc\n}\n","/**\n * stdio RPC transport.\n *\n * For batch / cron use without a running server. The Python client falls\n * back to this when no server is reachable.\n *\n * Protocol (line-delimited JSON over stdin/stdout):\n * IN: one JSON object on stdin: {\"method\":\"judge\",\"params\":{...}}\n * OUT: one JSON object on stdout: {\"result\":{...}} or {\"error\":{...}}\n *\n * One request per process invocation. To pipeline many calls, the client\n * writes JSONL to stdin and reads JSONL from stdout — see batch mode below.\n */\nimport { handleJudge, handleListRubrics, handleVersion, WireError } from './handlers'\nimport { JudgeRequestSchema } from './schemas'\n\ninterface RpcRequest {\n method: 'judge' | 'listRubrics' | 'version'\n params?: unknown\n}\n\ninterface RpcSuccess {\n result: unknown\n}\n\ninterface RpcError {\n error: { code: string; message: string; details?: unknown }\n}\n\nexport async function dispatchRpc(req: RpcRequest): Promise<RpcSuccess | RpcError> {\n try {\n switch (req.method) {\n case 'judge': {\n const parsed = JudgeRequestSchema.safeParse(req.params)\n if (!parsed.success) {\n return {\n error: {\n code: 'validation_error',\n message: 'params did not match JudgeRequest schema.',\n details: parsed.error.issues,\n },\n }\n }\n return { result: await handleJudge(parsed.data) }\n }\n case 'listRubrics':\n return { result: handleListRubrics() }\n case 'version':\n return { result: handleVersion() }\n default:\n return {\n error: {\n code: 'unknown_method',\n message: `No such method: ${(req as { method: string }).method}`,\n },\n }\n }\n } catch (err) {\n if (err instanceof WireError) {\n return { error: { code: err.code, message: err.message, details: err.details } }\n }\n const message = err instanceof Error ? err.message : String(err)\n return { error: { code: 'internal_error', message } }\n }\n}\n\n// ── stdin/stdout driver ─────────────────────────────────────────────\n\nasync function readAll(stream: NodeJS.ReadableStream): Promise<string> {\n const chunks: Buffer[] = []\n for await (const chunk of stream) {\n chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string))\n }\n return Buffer.concat(chunks).toString('utf-8')\n}\n\n/** Read one JSON request from stdin, write one JSON response to stdout. */\nexport async function runRpcOnce(method?: string): Promise<number> {\n const raw = await readAll(process.stdin)\n let req: RpcRequest\n try {\n const body = JSON.parse(raw)\n req = method ? { method: method as RpcRequest['method'], params: body } : (body as RpcRequest)\n } catch (err) {\n process.stdout.write(\n `${JSON.stringify({\n error: {\n code: 'parse_error',\n message: `stdin was not valid JSON: ${err instanceof Error ? err.message : String(err)}`,\n },\n })}\\n`,\n )\n return 1\n }\n const out = await dispatchRpc(req)\n process.stdout.write(`${JSON.stringify(out)}\\n`)\n return 'error' in out ? 1 : 0\n}\n\n/** Read JSONL requests from stdin, write JSONL responses to stdout. */\nexport async function runRpcBatch(method?: string): Promise<number> {\n const raw = await readAll(process.stdin)\n const lines = raw.split('\\n').filter((l) => l.trim().length > 0)\n let exitCode = 0\n for (const line of lines) {\n let req: RpcRequest\n try {\n const body = JSON.parse(line)\n req = method ? { method: method as RpcRequest['method'], params: body } : (body as RpcRequest)\n } catch (err) {\n process.stdout.write(\n `${JSON.stringify({\n error: {\n code: 'parse_error',\n message: `line was not valid JSON: ${err instanceof Error ? err.message : String(err)}`,\n },\n })}\\n`,\n )\n exitCode = 1\n continue\n }\n const out = await dispatchRpc(req)\n process.stdout.write(`${JSON.stringify(out)}\\n`)\n if ('error' in out) exitCode = 1\n }\n return exitCode\n}\n","/**\n * HTTP transport for the wire protocol.\n *\n * Hono + @hono/node-server. Every endpoint:\n * 1. Validates the request against its Zod schema.\n * 2. Calls the matching handler in `handlers.ts`.\n * 3. Renders 4xx for `WireError` with structured body, 500 for unexpected.\n *\n * The server holds optional `IngestionStores` (passed to `createApp`)\n * to receive production traces and user feedback. With no stores wired,\n * the ingestion endpoints return 503 — read endpoints (`/v1/judge`,\n * `/v1/rubrics`, `/v1/version`) remain fully functional.\n *\n * Run via `agent-eval serve --port 5005`.\n */\nimport { type ServerType, serve } from '@hono/node-server'\nimport { Hono } from 'hono'\nimport { cors } from 'hono/cors'\n\nimport {\n handleFeedbackIngest,\n handleJudge,\n handleListRubrics,\n handleTracesIngest,\n handleVersion,\n type IngestionStores,\n WireError,\n} from './handlers'\nimport { buildOpenApi } from './openapi'\nimport { FeedbackTrajectorySchema, JudgeRequestSchema, TracesIngestRequestSchema } from './schemas'\n\nconst STARTED_AT = Date.now()\n\nexport interface CreateAppOptions {\n /** Stores wired to the ingestion endpoints. */\n stores?: IngestionStores\n /**\n * Bearer-token auth. When provided, every endpoint EXCEPT `/healthz`\n * and `/v1/version` requires `Authorization: Bearer <token>`. The\n * token may be a static string OR a function for time-bounded /\n * rotating tokens.\n *\n * Recommended for any server that accepts ingestion writes from the\n * public internet. Read-only deployments may omit it.\n */\n auth?: {\n bearer: string | ((token: string) => boolean | Promise<boolean>)\n }\n}\n\nconst AUTH_EXEMPT_PATHS = new Set(['/healthz', '/v1/version', '/openapi.json'])\n\nexport function createApp(opts: CreateAppOptions = {}) {\n const app = new Hono()\n\n app.use('*', cors())\n\n // Bearer-token middleware (only attached when configured).\n if (opts.auth) {\n const verify = opts.auth.bearer\n app.use('*', async (c, next) => {\n const path = new URL(c.req.url).pathname\n if (AUTH_EXEMPT_PATHS.has(path)) return next()\n const raw = c.req.header('authorization') ?? ''\n const match = raw.match(/^Bearer\\s+(.+)$/i)\n if (!match) {\n throw new WireError('unauthorized', 'Missing or malformed Authorization header.', 401)\n }\n const token = match[1] as string\n const ok = typeof verify === 'string' ? token === verify : await verify(token)\n if (!ok) {\n throw new WireError('unauthorized', 'Invalid bearer token.', 401)\n }\n return next()\n })\n }\n\n app.onError((err, c) => {\n if (err instanceof WireError) {\n const status = err.status as 400 | 401 | 404 | 422 | 500 | 503\n return c.json(\n { error: { code: err.code, message: err.message, details: err.details } },\n status,\n )\n }\n // Unexpected — log and return generic 500 without leaking internals.\n console.error('[agent-eval] unhandled error:', err)\n return c.json({ error: { code: 'internal_error', message: 'Internal server error.' } }, 500)\n })\n\n // ── Health ──\n app.get('/healthz', (c) =>\n c.json({ status: 'ok' as const, uptimeSec: (Date.now() - STARTED_AT) / 1000 }),\n )\n\n // ── Version ──\n app.get('/v1/version', (c) => c.json(handleVersion()))\n\n // ── Rubrics ──\n app.get('/v1/rubrics', (c) => c.json(handleListRubrics()))\n\n // ── Judge ──\n app.post('/v1/judge', async (c) => {\n const raw = await c.req.json().catch(() => null)\n if (raw == null) {\n throw new WireError('validation_error', 'Request body must be JSON.', 400)\n }\n const parsed = JudgeRequestSchema.safeParse(raw)\n if (!parsed.success) {\n throw new WireError(\n 'validation_error',\n 'Request did not match JudgeRequest schema.',\n 400,\n parsed.error.issues,\n )\n }\n const result = await handleJudge(parsed.data)\n return c.json(result)\n })\n\n // ── Traces ingest (NDJSON-friendly: accepts either {events:[...]} or NDJSON) ──\n app.post('/v1/traces/ingest', async (c) => {\n const contentType = c.req.header('content-type') ?? ''\n let payload: unknown\n if (contentType.includes('application/x-ndjson')) {\n const text = await c.req.text()\n const events = text\n .split('\\n')\n .map((line) => line.trim())\n .filter((line) => line.length > 0)\n .map((line) => {\n try {\n return JSON.parse(line)\n } catch {\n throw new WireError(\n 'validation_error',\n 'NDJSON line did not parse as JSON.',\n 400,\n line.slice(0, 200),\n )\n }\n })\n payload = { events }\n } else {\n payload = await c.req.json().catch(() => null)\n }\n if (payload == null) {\n throw new WireError('validation_error', 'Request body must be JSON or NDJSON.', 400)\n }\n const parsed = TracesIngestRequestSchema.safeParse(payload)\n if (!parsed.success) {\n throw new WireError(\n 'validation_error',\n 'Request did not match TracesIngestRequest schema.',\n 400,\n parsed.error.issues,\n )\n }\n const result = await handleTracesIngest(parsed.data, opts.stores ?? {})\n return c.json(result)\n })\n\n // ── Feedback ingest ──\n app.post('/v1/feedback', async (c) => {\n const raw = await c.req.json().catch(() => null)\n if (raw == null) {\n throw new WireError('validation_error', 'Request body must be JSON.', 400)\n }\n const parsed = FeedbackTrajectorySchema.safeParse(raw)\n if (!parsed.success) {\n throw new WireError(\n 'validation_error',\n 'Request did not match FeedbackTrajectory schema.',\n 400,\n parsed.error.issues,\n )\n }\n const result = await handleFeedbackIngest(parsed.data, opts.stores ?? {})\n return c.json(result)\n })\n\n // ── OpenAPI spec ──\n app.get('/openapi.json', (c) => c.json(buildOpenApi(handleVersion().version)))\n\n return app\n}\n\nexport interface ServeOptions extends CreateAppOptions {\n /** Default 5005. */\n port?: number\n /** Default '127.0.0.1'. Set to '0.0.0.0' to listen on all interfaces. */\n host?: string\n}\n\nexport function startServer(opts: ServeOptions = {}): ServerType {\n const app = createApp(opts)\n const port = opts.port ?? 5005\n const host = opts.host ?? '127.0.0.1'\n return serve({ fetch: app.fetch, port, hostname: host }, ({ address, port: actualPort }) => {\n // eslint-disable-next-line no-console\n console.log(`[agent-eval] serving on http://${address}:${actualPort}`)\n })\n}\n"],"mappings":";;;;;AAYA,SAAS,4BAA4B;AACrC,SAAS,SAAS;AAElB,qBAAqB,CAAC;AAIf,IAAM,wBAAwB,EAClC,OAAO;AAAA,EACN,IAAI,EACD,OAAO,EACP,IAAI,CAAC,EACL,SAAS,gFAA2E;AAAA,EACvF,aAAa,EACV,OAAO,EACP,IAAI,CAAC,EACL,SAAS,sEAAsE;AAAA,EAClF,QAAQ,EACL,OAAO,EACP,IAAI,CAAC,EACL,QAAQ,CAAC,EACT,SAAS,gEAAgE;AAAA,EAC5E,KAAK,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,SAAS,gDAAgD;AAAA,EACpF,KAAK,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,SAAS,gDAAgD;AACtF,CAAC,EACA,QAAQ,iBAAiB;AAErB,IAAM,oBAAoB,EAC9B,OAAO;AAAA,EACN,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,mEAA8D;AAAA,EAC7F,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,mDAAmD;AAC7F,CAAC,EACA,QAAQ,aAAa;AAIjB,IAAM,eAAe,EACzB,OAAO;AAAA,EACN,MAAM,EACH,OAAO,EACP,IAAI,CAAC,EACL,SAAS,4EAAuE;AAAA,EACnF,aAAa,EACV,OAAO,EACP,IAAI,CAAC,EACL,SAAS,0DAA0D;AAAA,EACtE,cAAc,EACX,OAAO,EACP,IAAI,CAAC,EACL;AAAA,IACC;AAAA,EACF;AAAA,EACF,YAAY,EACT,MAAM,qBAAqB,EAC3B,IAAI,CAAC,EACL,SAAS,+DAA+D;AAAA,EAC3E,cAAc,EACX,MAAM,iBAAiB,EACvB,QAAQ,CAAC,CAAC,EACV,SAAS,iFAAiF;AAAA,EAC7F,MAAM,EACH,MAAM,iBAAiB,EACvB,QAAQ,CAAC,CAAC,EACV,SAAS,uEAAuE;AACrF,CAAC,EACA,QAAQ,QAAQ;AAIZ,IAAM,qBAAqB,EAC/B,OAAO;AAAA,EACN,YAAY,EACT,OAAO,EACP,SAAS,EACT,SAAS,kEAAkE;AAAA,EAC9E,QAAQ,aAAa,SAAS,EAAE;AAAA,IAC9B;AAAA,EACF;AAAA,EACA,SAAS,EACN,OAAO,EACP,IAAI,CAAC,EACL,SAAS,uFAAkF;AAAA,EAC9F,SAAS,EACN,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAC9B,SAAS,EACT;AAAA,IACC;AAAA,EACF;AAAA,EACF,OAAO,EACJ,OAAO,EACP,SAAS,EACT,SAAS,+EAA+E;AAC7F,CAAC,EACA,OAAO,CAAC,MAAM,QAAQ,EAAE,UAAU,MAAM,QAAQ,EAAE,MAAM,GAAG;AAAA,EAC1D,SAAS;AACX,CAAC,EACA,QAAQ,cAAc;AAElB,IAAM,oBAAoB,EAC9B,OAAO;AAAA,EACN,WAAW,EACR,OAAO,EACP,IAAI,CAAC,EACL,IAAI,CAAC,EACL,SAAS,iFAAiF;AAAA,EAC7F,YAAY,EACT,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC,EAC7B,SAAS,mDAAmD;AAAA,EAC/D,cAAc,EACX,MAAM,EAAE,OAAO,CAAC,EAChB,QAAQ,CAAC,CAAC,EACV,SAAS,+EAA+E;AAAA,EAC3F,MAAM,EACH,MAAM,EAAE,OAAO,CAAC,EAChB,QAAQ,CAAC,CAAC,EACV,SAAS,8DAA8D;AAAA,EAC1E,WAAW,EACR,OAAO,EACP,SAAS,yEAAyE;AAAA,EACrF,eAAe,EACZ,OAAO,EACP;AAAA,IACC;AAAA,EACF;AAAA,EACF,OAAO,EAAE,OAAO,EAAE,SAAS,yDAAyD;AAAA,EACpF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,qCAAqC;AAC3F,CAAC,EACA,QAAQ,aAAa;AAIjB,IAAM,mBAAmB,EAC7B,OAAO;AAAA,EACN,MAAM,EAAE,OAAO,EAAE,SAAS,yCAAyC;AAAA,EACnE,aAAa,EAAE,OAAO,EAAE,SAAS,4BAA4B;AAAA,EAC7D,YAAY,EACT,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,GAAG,aAAa,EAAE,OAAO,GAAG,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC,EAC/E,SAAS,kDAAkD;AAAA,EAC9D,cAAc,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,QAAQ,CAAC,CAAC,EAAE,SAAS,uCAAuC;AAAA,EAC9F,eAAe,EAAE,OAAO,EAAE,SAAS,8DAAyD;AAC9F,CAAC,EACA,QAAQ,YAAY;AAEhB,IAAM,4BAA4B,EACtC,OAAO;AAAA,EACN,SAAS,EAAE,MAAM,gBAAgB;AACnC,CAAC,EACA,QAAQ,qBAAqB;AAIzB,IAAM,wBAAwB,EAClC,OAAO;AAAA,EACN,SAAS,EAAE,OAAO,EAAE,SAAS,qDAAqD;AAAA,EAClF,SAAS,EAAE,OAAO,EAAE,SAAS,0DAA0D;AAAA,EACvF,aAAa,EACV,OAAO,EACP;AAAA,IACC;AAAA,EACF;AAAA,EACF,YAAY,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,SAAS,iCAAiC;AAC5E,CAAC,EACA,QAAQ,iBAAiB;AAErB,IAAM,uBAAuB,EACjC,OAAO;AAAA,EACN,QAAQ,EAAE,QAAQ,IAAI;AAAA,EACtB,WAAW,EAAE,OAAO;AACtB,CAAC,EACA,QAAQ,gBAAgB;AAUpB,IAAM,mBAAmB,EAC7B,OAAO;AAAA,EACN,SAAS,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,4CAA4C;AAAA,EAChF,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,4BAA4B;AAAA,EAC9D,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,sCAAsC;AAAA,EAC7E,MAAM,EACH,KAAK;AAAA,IACJ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC,EACA,SAAS,yEAAoE;AAAA,EAChF,WAAW,EACR,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,kEAAkE;AAAA,EAC9E,SAAS,EACN,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAC9B,SAAS,sDAAiD;AAC/D,CAAC,EACA,QAAQ,YAAY;AAEhB,IAAM,4BAA4B,EACtC,OAAO;AAAA,EACN,QAAQ,EACL,MAAM,gBAAgB,EACtB,IAAI,CAAC,EACL,IAAI,GAAM,EACV,SAAS,4EAAuE;AACrF,CAAC,EACA,QAAQ,qBAAqB;AAEzB,IAAM,6BAA6B,EACvC,OAAO;AAAA,EACN,UAAU,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,6BAA6B;AAAA,EAC/E,UAAU,EACP,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,uEAAkE;AAAA,EAC9E,QAAQ,EACL;AAAA,IACC,EAAE,OAAO;AAAA,MACP,SAAS,EAAE,OAAO,EAAE,SAAS,iCAAiC;AAAA,MAC9D,SAAS,EAAE,OAAO,EAAE,SAAS,6BAA6B;AAAA,IAC5D,CAAC;AAAA,EACH,EACC,QAAQ,CAAC,CAAC;AACf,CAAC,EACA,QAAQ,sBAAsB;AAE1B,IAAM,sBAAsB,EAChC,OAAO;AAAA,EACN,IAAI,EAAE,OAAO,EAAE,SAAS;AAAA,EACxB,QAAQ,EAAE,KAAK,CAAC,QAAQ,SAAS,eAAe,UAAU,UAAU,QAAQ,CAAC;AAAA,EAC7E,MAAM,EAAE,KAAK;AAAA,IACX;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AAAA,EACD,OAAO,EAAE,QAAQ;AAAA,EACjB,QAAQ,EAAE,OAAO,EAAE,SAAS;AAAA,EAC5B,UAAU,EAAE,KAAK,CAAC,QAAQ,WAAW,SAAS,UAAU,CAAC,EAAE,SAAS;AAAA,EACpE,WAAW,EAAE,OAAO,EAAE,SAAS,eAAe;AAAA,EAC9C,UAAU,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAAE,SAAS;AACvD,CAAC,EACA,QAAQ,eAAe;AAEnB,IAAM,wBAAwB,EAClC,OAAO;AAAA,EACN,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACpB,WAAW,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACxC,cAAc,EAAE,KAAK;AAAA,IACnB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AAAA,EACD,UAAU,EAAE,QAAQ;AAAA,EACpB,SAAS,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,SAAS;AAAA,EACvC,gBAAgB,EACb,OAAO;AAAA,IACN,MAAM,EAAE,OAAO;AAAA,IACf,MAAM,EAAE,KAAK,CAAC,OAAO,UAAU,MAAM,CAAC,EAAE,SAAS;AAAA,IACjD,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,IAC7B,oBAAoB,EAAE,QAAQ,EAAE,SAAS;AAAA,IACzC,kBAAkB,EAAE,QAAQ,EAAE,SAAS;AAAA,IACvC,UAAU,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAAE,SAAS;AAAA,EACvD,CAAC,EACA,SAAS;AAAA,EACZ,UAAU,EAAE,MAAM,mBAAmB,EAAE,SAAS;AAAA,EAChD,WAAW,EAAE,OAAO;AAAA,EACpB,UAAU,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAAE,SAAS;AACvD,CAAC,EACA,QAAQ,iBAAiB;AAErB,IAAM,2BAA2B,EACrC,OAAO;AAAA,EACN,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,gDAAgD;AAAA,EAC/E,WAAW,EAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,YAAY,EAAE,OAAO,EAAE,SAAS;AAAA,EAChC,MAAM,EAAE,OAAO;AAAA,IACb,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,IACxB,SAAS,EAAE,QAAQ,EAAE,SAAS;AAAA,EAChC,CAAC;AAAA,EACD,UAAU,EAAE,MAAM,qBAAqB,EAAE,QAAQ,CAAC,CAAC;AAAA,EACnD,QAAQ,EAAE,MAAM,mBAAmB,EAAE,QAAQ,CAAC,CAAC;AAAA,EAC/C,SAAS,EACN,OAAO;AAAA,IACN,SAAS,EAAE,QAAQ,EAAE,SAAS;AAAA,IAC9B,OAAO,EAAE,OAAO,EAAE,SAAS;AAAA,IAC3B,SAAS,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC,EAAE,SAAS;AAAA,IACnD,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,IAC7B,QAAQ,EAAE,OAAO,EAAE,SAAS;AAAA,IAC5B,YAAY,EAAE,OAAO,EAAE,SAAS;AAAA,IAChC,UAAU,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAAE,SAAS;AAAA,EACvD,CAAC,EACA,SAAS;AAAA,EACZ,OAAO,EAAE,KAAK,CAAC,SAAS,OAAO,QAAQ,SAAS,CAAC,EAAE,SAAS;AAAA,EAC5D,MAAM,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC,EAAE,SAAS;AAAA,EAChD,WAAW,EAAE,OAAO,EAAE,SAAS,eAAe;AAAA,EAC9C,WAAW,EAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,UAAU,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAAE,SAAS;AACvD,CAAC,EACA,QAAQ,oBAAoB;AAExB,IAAM,+BAA+B,EACzC,OAAO;AAAA,EACN,IAAI,EAAE,OAAO,EAAE,SAAS,mCAAmC;AAAA,EAC3D,WAAW,EAAE,QAAQ,EAAE,SAAS,wDAAwD;AAC1F,CAAC,EACA,QAAQ,wBAAwB;AAU5B,IAAM,sBAAsB,EAChC,OAAO;AAAA,EACN,OAAO,EACJ,OAAO;AAAA,IACN,MAAM,EACH,OAAO,EACP;AAAA,MACC;AAAA,IACF;AAAA,IACF,SAAS,EAAE,OAAO,EAAE,SAAS,yBAAyB;AAAA,IACtD,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS,6BAA6B;AAAA,EACxE,CAAC,EACA,SAAS,+DAA+D;AAC7E,CAAC,EACA,QAAQ,eAAe;AAoBnB,IAAM,eAAe;AAMrB,SAAS,WAAW,QAAwB;AACjD,QAAM,SAAS,gBAAgB,MAAM;AACrC,MAAI,IAAI;AACR,WAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACtC,QAAK,IAAI,KAAM,OAAO,WAAW,CAAC;AAAA,EACpC;AAEA,SAAO,GAAG,OAAO,IAAI,KAAK,MAAM,GAAG,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC;AAClE;AAEA,SAAS,gBAAgB,OAAwB;AAC/C,MAAI,MAAM,QAAQ,KAAK,EAAG,QAAO,IAAI,MAAM,IAAI,CAAC,SAAS,gBAAgB,IAAI,CAAC,EAAE,KAAK,GAAG,CAAC;AACzF,MAAI,SAAS,OAAO,UAAU,UAAU;AACtC,UAAM,UAAU,OAAO,QAAQ,KAAgC,EAC5D,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC,EACrC,IAAI,CAAC,CAAC,KAAK,IAAI,MAAM,GAAG,KAAK,UAAU,GAAG,CAAC,IAAI,gBAAgB,IAAI,CAAC,EAAE;AACzE,WAAO,IAAI,QAAQ,KAAK,GAAG,CAAC;AAAA,EAC9B;AACA,SAAO,KAAK,UAAU,KAAK;AAC7B;;;ACjYA,IAAM,YAAoB;AAAA,EACxB,MAAM;AAAA,EACN,aACE;AAAA,EACF,cAAc;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAuBd,YAAY;AAAA,IACV;AAAA,MACE,IAAI;AAAA,MACJ,aAAa;AAAA,MACb,QAAQ;AAAA,MACR,KAAK;AAAA,MACL,KAAK;AAAA,IACP;AAAA,IACA;AAAA,MACE,IAAI;AAAA,MACJ,aAAa;AAAA,MACb,QAAQ;AAAA,MACR,KAAK;AAAA,MACL,KAAK;AAAA,IACP;AAAA,IACA;AAAA,MACE,IAAI;AAAA,MACJ,aAAa;AAAA,MACb,QAAQ;AAAA,MACR,KAAK;AAAA,MACL,KAAK;AAAA,IACP;AAAA,EACF;AAAA,EACA,cAAc;AAAA,IACZ,EAAE,IAAI,cAAc,aAAa,qCAAqC;AAAA,IACtE,EAAE,IAAI,kBAAkB,aAAa,sCAAsC;AAAA,IAC3E,EAAE,IAAI,eAAe,aAAa,oCAAoC;AAAA,IACtE,EAAE,IAAI,WAAW,aAAa,kCAAkC;AAAA,IAChE,EAAE,IAAI,mBAAmB,aAAa,qCAAqC;AAAA,IAC3E,EAAE,IAAI,WAAW,aAAa,oCAAoC;AAAA,IAClE,EAAE,IAAI,eAAe,aAAa,uCAAuC;AAAA,EAC3E;AAAA,EACA,MAAM;AAAA,IACJ,EAAE,IAAI,sBAAsB,aAAa,qCAAqC;AAAA,IAC9E,EAAE,IAAI,iBAAiB,aAAa,4BAA4B;AAAA,IAChE,EAAE,IAAI,0BAA0B,aAAa,wBAAwB;AAAA,IACrE,EAAE,IAAI,kBAAkB,aAAa,oCAAoC;AAAA,EAC3E;AACF;AAIO,IAAM,kBAA0C;AAAA,EACrD,aAAa;AACf;AAGO,SAAS,iBAAiB,MAAkC;AACjE,SAAO,gBAAgB,IAAI;AAC7B;AAGO,SAAS,qBAAqB;AACnC,SAAO,OAAO,OAAO,eAAe,EAAE,IAAI,CAAC,OAAO;AAAA,IAChD,MAAM,EAAE;AAAA,IACR,aAAa,EAAE;AAAA,IACf,YAAY,EAAE,WAAW,IAAI,CAAC,OAAO;AAAA,MACnC,IAAI,EAAE;AAAA,MACN,aAAa,EAAE;AAAA,MACf,QAAQ,EAAE;AAAA,IACZ,EAAE;AAAA,IACF,cAAc,EAAE,aAAa,IAAI,CAAC,MAAM,EAAE,EAAE;AAAA,IAC5C,eAAe,WAAW,CAAC;AAAA,EAC7B,EAAE;AACJ;;;ACwHA,SAAS,oBAAoB;AAC7B,SAAS,SAAS,eAAe;AACjC,SAAS,qBAAqB;AA9MvB,IAAM,YAAN,cAAwB,MAAM;AAAA,EACnC,YACkB,MAChB,SACgB,SAAiB,KACjB,SAChB;AACA,UAAM,OAAO;AALG;AAEA;AACA;AAGhB,SAAK,OAAO;AAAA,EACd;AAAA,EAPkB;AAAA,EAEA;AAAA,EACA;AAKpB;AAKA,SAAS,kBAAkB,QAAgB;AACzC,SAAO;AAAA,IACL,MAAM;AAAA,IACN,QAAQ;AAAA,MACN,MAAM;AAAA,MACN,sBAAsB;AAAA,MACtB,YAAY;AAAA,QACV,YAAY;AAAA,UACV,MAAM;AAAA,UACN,sBAAsB;AAAA,UACtB,YAAY,OAAO;AAAA,YACjB,OAAO,WAAW,IAAI,CAAC,MAAM;AAAA,cAC3B,EAAE;AAAA,cACF,EAAE,MAAM,UAAU,SAAS,EAAE,KAAK,SAAS,EAAE,IAAI;AAAA,YACnD,CAAC;AAAA,UACH;AAAA,UACA,UAAU,OAAO,WAAW,IAAI,CAAC,MAAM,EAAE,EAAE;AAAA,QAC7C;AAAA,QACA,cAAc;AAAA,UACZ,MAAM;AAAA,UACN,OAAO,EAAE,MAAM,UAAU,MAAM,OAAO,aAAa,IAAI,CAAC,MAAM,EAAE,EAAE,EAAE;AAAA,QACtE;AAAA,QACA,MAAM;AAAA,UACJ,MAAM;AAAA,UACN,OAAO,EAAE,MAAM,UAAU,MAAM,OAAO,KAAK,IAAI,CAAC,MAAM,EAAE,EAAE,EAAE;AAAA,QAC9D;AAAA,QACA,WAAW,EAAE,MAAM,SAAS;AAAA,MAC9B;AAAA,MACA,UAAU,CAAC,cAAc,WAAW;AAAA,IACtC;AAAA,EACF;AACF;AASA,SAAS,oBAAoB,OAAgB,QAA6B;AACxE,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACvC,UAAM,IAAI,UAAU,eAAe,oCAAoC,KAAK,KAAK;AAAA,EACnF;AACA,QAAM,MAAM;AACZ,QAAM,gBAAgB,IAAI;AAC1B,MAAI,CAAC,iBAAiB,OAAO,kBAAkB,YAAY,MAAM,QAAQ,aAAa,GAAG;AACvF,UAAM,IAAI,UAAU,eAAe,wCAAwC,KAAK,KAAK;AAAA,EACvF;AAEA,QAAM,aAAqC,CAAC;AAC5C,QAAM,kBAAkB;AACxB,aAAW,OAAO,OAAO,YAAY;AACnC,UAAM,QAAQ,gBAAgB,IAAI,EAAE;AACpC,QACE,OAAO,UAAU,YACjB,CAAC,OAAO,SAAS,KAAK,KACtB,QAAQ,IAAI,OACZ,QAAQ,IAAI,KACZ;AACA,YAAM,IAAI;AAAA,QACR;AAAA,QACA,+CAA+C,IAAI,EAAE;AAAA,QACrD;AAAA,QACA;AAAA,MACF;AAAA,IACF;AACA,eAAW,IAAI,EAAE,IAAI;AAAA,EACvB;AAEA,QAAM,kBAAkB,IAAI,IAAI,OAAO,aAAa,IAAI,CAAC,SAAS,KAAK,EAAE,CAAC;AAC1E,QAAM,cAAc,IAAI,IAAI,OAAO,KAAK,IAAI,CAAC,QAAQ,IAAI,EAAE,CAAC;AAC5D,QAAM,eAAe,gBAAgB,IAAI,cAAc,iBAAiB,gBAAgB,KAAK;AAC7F,QAAM,OAAO,gBAAgB,IAAI,MAAM,aAAa,QAAQ,KAAK;AACjE,MAAI,OAAO,IAAI,cAAc,YAAY,IAAI,UAAU,KAAK,EAAE,WAAW,GAAG;AAC1E,UAAM,IAAI,UAAU,eAAe,qCAAqC,KAAK,KAAK;AAAA,EACpF;AAEA,SAAO,EAAE,YAAY,cAAc,MAAM,WAAW,IAAI,UAAU;AACpE;AAEA,SAAS,gBACP,KACA,SACA,OACA,UACU;AACV,MAAI,QAAQ,OAAW,QAAO,CAAC;AAC/B,MAAI,CAAC,MAAM,QAAQ,GAAG,GAAG;AACvB,UAAM,IAAI,UAAU,eAAe,4BAA4B,KAAK,KAAK,KAAK,QAAQ;AAAA,EACxF;AACA,QAAM,MAAgB,CAAC;AACvB,aAAW,QAAQ,KAAK;AACtB,QAAI,OAAO,SAAS,YAAY,CAAC,QAAQ,IAAI,IAAI,GAAG;AAClD,YAAM,IAAI;AAAA,QACR;AAAA,QACA,0BAA0B,KAAK,QAAQ,OAAO,IAAI,CAAC;AAAA,QACnD;AAAA,QACA;AAAA,MACF;AAAA,IACF;AACA,QAAI,KAAK,IAAI;AAAA,EACf;AACA,SAAO;AACT;AAEA,SAAS,eAAe,YAAoC,QAAwB;AAClF,MAAI,WAAW;AACf,MAAI,cAAc;AAClB,aAAW,OAAO,OAAO,YAAY;AACnC,UAAM,MAAM,WAAW,IAAI,EAAE,KAAK;AAClC,UAAM,QAAQ,IAAI,MAAM,IAAI,OAAO;AACnC,UAAM,aAAa,KAAK,IAAI,GAAG,KAAK,IAAI,IAAI,MAAM,IAAI,OAAO,KAAK,CAAC;AACnE,gBAAY,aAAa,IAAI;AAC7B,mBAAe,IAAI;AAAA,EACrB;AACA,SAAO,cAAc,IAAI,WAAW,cAAc;AACpD;AAEA,SAAS,iBAAiB,SAAiB,SAA0B;AACnE,QAAM,MAAM,WAAW,OAAO,KAAK,OAAiB,EAAE,SAAS,KAAK,UAAU,OAAO,IAAI;AACzF,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA,MAAM,yCAAyC;AAAA,IAC/C,MAAM,MAAM;AAAA,EACd,EACG,OAAO,OAAO,EACd,KAAK,IAAI;AACd;AAEA,IAAM,sBAAsB;AAE5B,eAAsB,YAAY,KAAyC;AAEzE,MAAI;AACJ,MAAI,IAAI,YAAY;AAClB,UAAM,QAAQ,iBAAiB,IAAI,UAAU;AAC7C,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,UAAU,oBAAoB,6BAA6B,IAAI,UAAU,MAAM,GAAG;AAAA,IAC9F;AACA,aAAS;AAAA,EACX,WAAW,IAAI,QAAQ;AACrB,aAAS,IAAI;AAAA,EACf,OAAO;AAEL,UAAM,IAAI,UAAU,oBAAoB,4CAA4C,GAAG;AAAA,EACzF;AAEA,QAAM,YAAY,KAAK,IAAI;AAC3B,QAAM,QAAQ,IAAI,SAAS;AAE3B,QAAM,EAAE,OAAO,OAAO,IAAI,MAAM,YAAyB;AAAA,IACvD;AAAA,IACA,UAAU;AAAA,MACR,EAAE,MAAM,UAAU,SAAS,OAAO,aAAa;AAAA,MAC/C,EAAE,MAAM,QAAQ,SAAS,iBAAiB,IAAI,SAAS,IAAI,OAAO,EAAE;AAAA,IACtE;AAAA,IACA,YAAY,kBAAkB,MAAM;AAAA,IACpC,aAAa;AAAA,IACb,WAAW;AAAA,EACb,CAAC;AAED,QAAM,SAAS,oBAAoB,OAAO,MAAM;AAEhD,QAAM,YAAY,eAAe,OAAO,YAAY,MAAM;AAC1D,QAAM,aAAa,KAAK,IAAI,IAAI;AAEhC,SAAO;AAAA,IACL;AAAA,IACA,YAAY,OAAO;AAAA,IACnB,cAAc,OAAO,gBAAgB,CAAC;AAAA,IACtC,MAAM,OAAO,QAAQ,CAAC;AAAA,IACtB,WAAW,OAAO;AAAA,IAClB,eAAe,WAAW,MAAM;AAAA,IAChC,OAAO,OAAO;AAAA,IACd;AAAA,EACF;AACF;AAIO,SAAS,oBAAyC;AACvD,SAAO,EAAE,SAAS,mBAAmB,EAAE;AACzC;AAQA,IAAI;AAEJ,SAAS,qBAA6B;AACpC,MAAI,eAAgB,QAAO;AAG3B,QAAM,OAAO,QAAQ,cAAc,YAAY,GAAG,CAAC;AACnD,QAAM,aAAa;AAAA,IACjB,QAAQ,MAAM,MAAM,MAAM,cAAc;AAAA;AAAA,IACxC,QAAQ,MAAM,MAAM,cAAc;AAAA;AAAA,EACpC;AACA,aAAW,QAAQ,YAAY;AAC7B,QAAI;AACF,YAAM,MAAM,KAAK,MAAM,aAAa,MAAM,OAAO,CAAC;AAClD,UAAI,IAAI,SAAS;AACf,yBAAiB,IAAI;AACrB,eAAO,IAAI;AAAA,MACb;AAAA,IACF,QAAQ;AAAA,IAER;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,gBAAiC;AAC/C,SAAO;AAAA,IACL,SAAS;AAAA,IACT,SAAS,mBAAmB;AAAA,IAC5B,aAAa;AAAA,IACb,YAAY,CAAC,SAAS,eAAe,WAAW,mBAAmB,eAAe;AAAA,EACpF;AACF;AA2BA,eAAsB,mBACpB,KACA,QAC+B;AAC/B,MAAI,CAAC,OAAO,YAAY;AACtB,UAAM,IAAI;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACA,QAAM,SAAsD,CAAC;AAC7D,MAAI,WAAW;AACf,aAAW,SAAS,IAAI,QAAQ;AAC9B,QAAI;AAEF,YAAM,OAAO,WAAW,YAAY,KAA2B;AAC/D;AAAA,IACF,SAAS,KAAK;AACZ,aAAO,KAAK;AAAA,QACV,SAAS,MAAM;AAAA,QACf,SAAS,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,MAC1D,CAAC;AAAA,IACH;AAAA,EACF;AACA,SAAO,EAAE,UAAU,UAAU,OAAO,QAAQ,OAAO;AACrD;AAOA,eAAsB,qBACpB,KACA,QACiC;AACjC,MAAI,CAAC,OAAO,eAAe;AACzB,UAAM,IAAI;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAIA,QAAM,OAAO,cAAc,KAAK,GAAgE;AAChG,SAAO,EAAE,IAAI,IAAI,IAAI,WAAW,KAAK;AACvC;;;AChVA,SAAS,iBAAiB,2BAA2B;AAiB9C,SAAS,aAAa,gBAAuC;AAClE,QAAM,WAAW,IAAI,gBAAgB;AAGrC,WAAS,SAAS,gBAAgB,kBAAkB;AACpD,WAAS,SAAS,eAAe,iBAAiB;AAClD,WAAS,SAAS,uBAAuB,yBAAyB;AAClE,WAAS,SAAS,mBAAmB,qBAAqB;AAC1D,WAAS,SAAS,kBAAkB,oBAAoB;AACxD,WAAS,SAAS,iBAAiB,mBAAmB;AACtD,WAAS,SAAS,uBAAuB,yBAAyB;AAClE,WAAS,SAAS,wBAAwB,0BAA0B;AACpE,WAAS,SAAS,sBAAsB,wBAAwB;AAChE,WAAS,SAAS,0BAA0B,4BAA4B;AAGxE,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,aACE;AAAA,IACF,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,SAAS;AAAA,UACP,oBAAoB,EAAE,QAAQ,mBAAmB;AAAA,QACnD;AAAA,MACF;AAAA,IACF;AAAA,IACA,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,kBAAkB,EAAE;AAAA,MAC/D;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,IACF;AAAA,EACF,CAAC;AAED,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,aACE;AAAA,IACF,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,0BAA0B,EAAE;AAAA,MACvE;AAAA,IACF;AAAA,EACF,CAAC;AAED,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,aAAa;AAAA,IACb,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,sBAAsB,EAAE;AAAA,MACnE;AAAA,IACF;AAAA,EACF,CAAC;AAED,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,qBAAqB,EAAE;AAAA,MAClE;AAAA,IACF;AAAA,EACF,CAAC;AAED,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,aACE;AAAA,IACF,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,SAAS;AAAA,UACP,oBAAoB,EAAE,QAAQ,0BAA0B;AAAA,UACxD,wBAAwB,EAAE,QAAQ,0BAA0B;AAAA,QAC9D;AAAA,MACF;AAAA,IACF;AAAA,IACA,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,2BAA2B,EAAE;AAAA,MACxE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,IACF;AAAA,EACF,CAAC;AAED,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,aACE;AAAA,IACF,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,SAAS;AAAA,UACP,oBAAoB,EAAE,QAAQ,yBAAyB;AAAA,QACzD;AAAA,MACF;AAAA,IACF;AAAA,IACA,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,6BAA6B,EAAE;AAAA,MAC1E;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,IACF;AAAA,EACF,CAAC;AAED,QAAM,YAAY,IAAI,oBAAoB,SAAS,WAAW;AAC9D,QAAM,MAAM,UAAU,iBAAiB;AAAA,IACrC,SAAS;AAAA,IACT,MAAM;AAAA,MACJ,OAAO;AAAA,MACP,SAAS;AAAA,MACT,aAAa;AAAA;AAAA,yBAEM,YAAY;AAAA,MAC/B,SAAS,EAAE,MAAM,kBAAkB,KAAK,+CAA+C;AAAA,MACvF,SAAS,EAAE,MAAM,MAAM;AAAA,IACzB;AAAA,IACA,SAAS,CAAC,EAAE,KAAK,yBAAyB,aAAa,yBAAyB,CAAC;AAAA,EACnF,CAAC;AACD,QAAM,YAAY,EAAE,MAAM,8BAA8B;AACxD,QAAM,oBAAoB;AAAA,IACxB,SAAS,EAAE,MAAM,UAAU,WAAW,EAAE;AAAA,IACxC,SAAS,EAAE,MAAM,UAAU,sBAAsB,KAAK;AAAA,IACtD,OAAO,EAAE,MAAM,SAAS;AAAA,EAC1B;AACA,MAAI,eAAe,CAAC;AACpB,MAAI,WAAW,YAAY,CAAC;AAC5B,MAAI,WAAW,QAAQ,eAAe;AAAA,IACpC,OAAO;AAAA,MACL;AAAA,QACE,MAAM;AAAA,QACN,sBAAsB;AAAA,QACtB,UAAU,CAAC,cAAc,SAAS;AAAA,QAClC,YAAY;AAAA,UACV,YAAY,EAAE,MAAM,UAAU,WAAW,EAAE;AAAA,UAC3C,GAAG;AAAA,QACL;AAAA,MACF;AAAA,MACA;AAAA,QACE,MAAM;AAAA,QACN,sBAAsB;AAAA,QACtB,UAAU,CAAC,UAAU,SAAS;AAAA,QAC9B,YAAY;AAAA,UACV,QAAQ;AAAA,UACR,GAAG;AAAA,QACL;AAAA,MACF;AAAA,IACF;AAAA,IACA,aAAa;AAAA,EACf;AACA,SAAO;AACT;;;ACxMA,eAAsB,YAAY,KAAiD;AACjF,MAAI;AACF,YAAQ,IAAI,QAAQ;AAAA,MAClB,KAAK,SAAS;AACZ,cAAM,SAAS,mBAAmB,UAAU,IAAI,MAAM;AACtD,YAAI,CAAC,OAAO,SAAS;AACnB,iBAAO;AAAA,YACL,OAAO;AAAA,cACL,MAAM;AAAA,cACN,SAAS;AAAA,cACT,SAAS,OAAO,MAAM;AAAA,YACxB;AAAA,UACF;AAAA,QACF;AACA,eAAO,EAAE,QAAQ,MAAM,YAAY,OAAO,IAAI,EAAE;AAAA,MAClD;AAAA,MACA,KAAK;AACH,eAAO,EAAE,QAAQ,kBAAkB,EAAE;AAAA,MACvC,KAAK;AACH,eAAO,EAAE,QAAQ,cAAc,EAAE;AAAA,MACnC;AACE,eAAO;AAAA,UACL,OAAO;AAAA,YACL,MAAM;AAAA,YACN,SAAS,mBAAoB,IAA2B,MAAM;AAAA,UAChE;AAAA,QACF;AAAA,IACJ;AAAA,EACF,SAAS,KAAK;AACZ,QAAI,eAAe,WAAW;AAC5B,aAAO,EAAE,OAAO,EAAE,MAAM,IAAI,MAAM,SAAS,IAAI,SAAS,SAAS,IAAI,QAAQ,EAAE;AAAA,IACjF;AACA,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,EAAE,OAAO,EAAE,MAAM,kBAAkB,QAAQ,EAAE;AAAA,EACtD;AACF;AAIA,eAAe,QAAQ,QAAgD;AACrE,QAAM,SAAmB,CAAC;AAC1B,mBAAiB,SAAS,QAAQ;AAChC,WAAO,KAAK,OAAO,SAAS,KAAK,IAAI,QAAQ,OAAO,KAAK,KAAe,CAAC;AAAA,EAC3E;AACA,SAAO,OAAO,OAAO,MAAM,EAAE,SAAS,OAAO;AAC/C;AAGA,eAAsB,WAAW,QAAkC;AACjE,QAAM,MAAM,MAAM,QAAQ,QAAQ,KAAK;AACvC,MAAI;AACJ,MAAI;AACF,UAAM,OAAO,KAAK,MAAM,GAAG;AAC3B,UAAM,SAAS,EAAE,QAAwC,QAAQ,KAAK,IAAK;AAAA,EAC7E,SAAS,KAAK;AACZ,YAAQ,OAAO;AAAA,MACb,GAAG,KAAK,UAAU;AAAA,QAChB,OAAO;AAAA,UACL,MAAM;AAAA,UACN,SAAS,6BAA6B,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,QACxF;AAAA,MACF,CAAC,CAAC;AAAA;AAAA,IACJ;AACA,WAAO;AAAA,EACT;AACA,QAAM,MAAM,MAAM,YAAY,GAAG;AACjC,UAAQ,OAAO,MAAM,GAAG,KAAK,UAAU,GAAG,CAAC;AAAA,CAAI;AAC/C,SAAO,WAAW,MAAM,IAAI;AAC9B;AAGA,eAAsB,YAAY,QAAkC;AAClE,QAAM,MAAM,MAAM,QAAQ,QAAQ,KAAK;AACvC,QAAM,QAAQ,IAAI,MAAM,IAAI,EAAE,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC;AAC/D,MAAI,WAAW;AACf,aAAW,QAAQ,OAAO;AACxB,QAAI;AACJ,QAAI;AACF,YAAM,OAAO,KAAK,MAAM,IAAI;AAC5B,YAAM,SAAS,EAAE,QAAwC,QAAQ,KAAK,IAAK;AAAA,IAC7E,SAAS,KAAK;AACZ,cAAQ,OAAO;AAAA,QACb,GAAG,KAAK,UAAU;AAAA,UAChB,OAAO;AAAA,YACL,MAAM;AAAA,YACN,SAAS,4BAA4B,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,UACvF;AAAA,QACF,CAAC,CAAC;AAAA;AAAA,MACJ;AACA,iBAAW;AACX;AAAA,IACF;AACA,UAAM,MAAM,MAAM,YAAY,GAAG;AACjC,YAAQ,OAAO,MAAM,GAAG,KAAK,UAAU,GAAG,CAAC;AAAA,CAAI;AAC/C,QAAI,WAAW,IAAK,YAAW;AAAA,EACjC;AACA,SAAO;AACT;;;AC/GA,SAA0B,aAAa;AACvC,SAAS,YAAY;AACrB,SAAS,YAAY;AAcrB,IAAM,aAAa,KAAK,IAAI;AAmB5B,IAAM,oBAAoB,oBAAI,IAAI,CAAC,YAAY,eAAe,eAAe,CAAC;AAEvE,SAAS,UAAU,OAAyB,CAAC,GAAG;AACrD,QAAM,MAAM,IAAI,KAAK;AAErB,MAAI,IAAI,KAAK,KAAK,CAAC;AAGnB,MAAI,KAAK,MAAM;AACb,UAAM,SAAS,KAAK,KAAK;AACzB,QAAI,IAAI,KAAK,OAAO,GAAG,SAAS;AAC9B,YAAM,OAAO,IAAI,IAAI,EAAE,IAAI,GAAG,EAAE;AAChC,UAAI,kBAAkB,IAAI,IAAI,EAAG,QAAO,KAAK;AAC7C,YAAM,MAAM,EAAE,IAAI,OAAO,eAAe,KAAK;AAC7C,YAAM,QAAQ,IAAI,MAAM,kBAAkB;AAC1C,UAAI,CAAC,OAAO;AACV,cAAM,IAAI,UAAU,gBAAgB,8CAA8C,GAAG;AAAA,MACvF;AACA,YAAM,QAAQ,MAAM,CAAC;AACrB,YAAM,KAAK,OAAO,WAAW,WAAW,UAAU,SAAS,MAAM,OAAO,KAAK;AAC7E,UAAI,CAAC,IAAI;AACP,cAAM,IAAI,UAAU,gBAAgB,yBAAyB,GAAG;AAAA,MAClE;AACA,aAAO,KAAK;AAAA,IACd,CAAC;AAAA,EACH;AAEA,MAAI,QAAQ,CAAC,KAAK,MAAM;AACtB,QAAI,eAAe,WAAW;AAC5B,YAAM,SAAS,IAAI;AACnB,aAAO,EAAE;AAAA,QACP,EAAE,OAAO,EAAE,MAAM,IAAI,MAAM,SAAS,IAAI,SAAS,SAAS,IAAI,QAAQ,EAAE;AAAA,QACxE;AAAA,MACF;AAAA,IACF;AAEA,YAAQ,MAAM,iCAAiC,GAAG;AAClD,WAAO,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,kBAAkB,SAAS,yBAAyB,EAAE,GAAG,GAAG;AAAA,EAC7F,CAAC;AAGD,MAAI;AAAA,IAAI;AAAA,IAAY,CAAC,MACnB,EAAE,KAAK,EAAE,QAAQ,MAAe,YAAY,KAAK,IAAI,IAAI,cAAc,IAAK,CAAC;AAAA,EAC/E;AAGA,MAAI,IAAI,eAAe,CAAC,MAAM,EAAE,KAAK,cAAc,CAAC,CAAC;AAGrD,MAAI,IAAI,eAAe,CAAC,MAAM,EAAE,KAAK,kBAAkB,CAAC,CAAC;AAGzD,MAAI,KAAK,aAAa,OAAO,MAAM;AACjC,UAAM,MAAM,MAAM,EAAE,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI;AAC/C,QAAI,OAAO,MAAM;AACf,YAAM,IAAI,UAAU,oBAAoB,8BAA8B,GAAG;AAAA,IAC3E;AACA,UAAM,SAAS,mBAAmB,UAAU,GAAG;AAC/C,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,IAAI;AAAA,QACR;AAAA,QACA;AAAA,QACA;AAAA,QACA,OAAO,MAAM;AAAA,MACf;AAAA,IACF;AACA,UAAM,SAAS,MAAM,YAAY,OAAO,IAAI;AAC5C,WAAO,EAAE,KAAK,MAAM;AAAA,EACtB,CAAC;AAGD,MAAI,KAAK,qBAAqB,OAAO,MAAM;AACzC,UAAM,cAAc,EAAE,IAAI,OAAO,cAAc,KAAK;AACpD,QAAI;AACJ,QAAI,YAAY,SAAS,sBAAsB,GAAG;AAChD,YAAM,OAAO,MAAM,EAAE,IAAI,KAAK;AAC9B,YAAM,SAAS,KACZ,MAAM,IAAI,EACV,IAAI,CAAC,SAAS,KAAK,KAAK,CAAC,EACzB,OAAO,CAAC,SAAS,KAAK,SAAS,CAAC,EAChC,IAAI,CAAC,SAAS;AACb,YAAI;AACF,iBAAO,KAAK,MAAM,IAAI;AAAA,QACxB,QAAQ;AACN,gBAAM,IAAI;AAAA,YACR;AAAA,YACA;AAAA,YACA;AAAA,YACA,KAAK,MAAM,GAAG,GAAG;AAAA,UACnB;AAAA,QACF;AAAA,MACF,CAAC;AACH,gBAAU,EAAE,OAAO;AAAA,IACrB,OAAO;AACL,gBAAU,MAAM,EAAE,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI;AAAA,IAC/C;AACA,QAAI,WAAW,MAAM;AACnB,YAAM,IAAI,UAAU,oBAAoB,wCAAwC,GAAG;AAAA,IACrF;AACA,UAAM,SAAS,0BAA0B,UAAU,OAAO;AAC1D,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,IAAI;AAAA,QACR;AAAA,QACA;AAAA,QACA;AAAA,QACA,OAAO,MAAM;AAAA,MACf;AAAA,IACF;AACA,UAAM,SAAS,MAAM,mBAAmB,OAAO,MAAM,KAAK,UAAU,CAAC,CAAC;AACtE,WAAO,EAAE,KAAK,MAAM;AAAA,EACtB,CAAC;AAGD,MAAI,KAAK,gBAAgB,OAAO,MAAM;AACpC,UAAM,MAAM,MAAM,EAAE,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI;AAC/C,QAAI,OAAO,MAAM;AACf,YAAM,IAAI,UAAU,oBAAoB,8BAA8B,GAAG;AAAA,IAC3E;AACA,UAAM,SAAS,yBAAyB,UAAU,GAAG;AACrD,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,IAAI;AAAA,QACR;AAAA,QACA;AAAA,QACA;AAAA,QACA,OAAO,MAAM;AAAA,MACf;AAAA,IACF;AACA,UAAM,SAAS,MAAM,qBAAqB,OAAO,MAAM,KAAK,UAAU,CAAC,CAAC;AACxE,WAAO,EAAE,KAAK,MAAM;AAAA,EACtB,CAAC;AAGD,MAAI,IAAI,iBAAiB,CAAC,MAAM,EAAE,KAAK,aAAa,cAAc,EAAE,OAAO,CAAC,CAAC;AAE7E,SAAO;AACT;AASO,SAAS,YAAY,OAAqB,CAAC,GAAe;AAC/D,QAAM,MAAM,UAAU,IAAI;AAC1B,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,OAAO,KAAK,QAAQ;AAC1B,SAAO,MAAM,EAAE,OAAO,IAAI,OAAO,MAAM,UAAU,KAAK,GAAG,CAAC,EAAE,SAAS,MAAM,WAAW,MAAM;AAE1F,YAAQ,IAAI,kCAAkC,OAAO,IAAI,UAAU,EAAE;AAAA,EACvE,CAAC;AACH;","names":[]}
@@ -10,6 +10,17 @@ __export(benchmarks_exports, {
10
10
  routing: () => routing_exports
11
11
  });
12
12
 
13
+ // src/benchmarks/routing/index.ts
14
+ var routing_exports = {};
15
+ __export(routing_exports, {
16
+ ROUTING_DATASET: () => ROUTING_DATASET,
17
+ RoutingAdapter: () => RoutingAdapter,
18
+ assignSplit: () => assignSplit,
19
+ evaluate: () => evaluate,
20
+ extractRouteTokens: () => extractRouteTokens,
21
+ loadDataset: () => loadDataset
22
+ });
23
+
13
24
  // src/benchmarks/types.ts
14
25
  function fnv1a32(input) {
15
26
  let h = 2166136261;
@@ -28,17 +39,6 @@ function deterministicSplit(itemId, seed = BENCHMARK_SPLIT_SEED) {
28
39
  return "holdout";
29
40
  }
30
41
 
31
- // src/benchmarks/routing/index.ts
32
- var routing_exports = {};
33
- __export(routing_exports, {
34
- ROUTING_DATASET: () => ROUTING_DATASET,
35
- RoutingAdapter: () => RoutingAdapter,
36
- assignSplit: () => assignSplit,
37
- evaluate: () => evaluate,
38
- extractRouteTokens: () => extractRouteTokens,
39
- loadDataset: () => loadDataset
40
- });
41
-
42
42
  // src/benchmarks/routing/dataset.ts
43
43
  var ROUTING_DATASET = [
44
44
  {
@@ -174,11 +174,15 @@ var ROUTING_DATASET = [
174
174
  // src/benchmarks/routing/index.ts
175
175
  var RoutingAdapter = class {
176
176
  async loadDataset(split) {
177
- return ROUTING_DATASET.map((item) => ({ id: item.id, payload: item })).filter((it) => assignSplitImpl(it.id) === split);
177
+ return ROUTING_DATASET.map((item) => ({ id: item.id, payload: item })).filter(
178
+ (it) => assignSplitImpl(it.id) === split
179
+ );
178
180
  }
179
181
  async evaluate(item, response) {
180
182
  const tokens = extractRouteTokens(response);
181
- const correct = new Set([item.payload.route, ...item.payload.synonyms].map((s) => s.toLowerCase()));
183
+ const correct = new Set(
184
+ [item.payload.route, ...item.payload.synonyms].map((s) => s.toLowerCase())
185
+ );
182
186
  const hardNeg = new Set(item.payload.hardNegatives.map((s) => s.toLowerCase()));
183
187
  const firstMatch = tokens.find((t) => correct.has(t.toLowerCase())) ?? null;
184
188
  const firstHardNeg = tokens.find((t) => hardNeg.has(t.toLowerCase())) ?? null;
@@ -216,4 +220,4 @@ export {
216
220
  routing_exports,
217
221
  benchmarks_exports
218
222
  };
219
- //# sourceMappingURL=chunk-42I2QC2L.js.map
223
+ //# sourceMappingURL=chunk-6QDKWHLS.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/benchmarks/index.ts","../src/benchmarks/routing/index.ts","../src/benchmarks/types.ts","../src/benchmarks/routing/dataset.ts"],"sourcesContent":["/**\n * Reference benchmark wrappers — entry point.\n *\n * Core surface (exported here):\n * - The `BenchmarkAdapter` contract.\n * - `deterministicSplit` + `BENCHMARK_SPLIT_SEED` for split assignment.\n * - `routing` — synthetic 16-task router benchmark. The only novel\n * benchmark we built; ships in the package.\n *\n * Example wrappers (under `examples/benchmarks/`, NOT in the bundle):\n * - `gsm8k` — exact-match math reasoning (HF mirror, dataset\n * not bundled).\n * - `swebench-lite` — 30-instance SWE-Bench subset via an external\n * grader command.\n *\n * The example wrappers are reference implementations of `BenchmarkAdapter`.\n * Read them, copy them, adapt them. They're intentionally not in the main\n * entry — every team will configure them differently.\n */\n\nexport * as routing from './routing/index'\nexport type {\n BenchmarkAdapter,\n BenchmarkDatasetItem,\n BenchmarkEvaluation,\n} from './types'\nexport { BENCHMARK_SPLIT_SEED, deterministicSplit } from './types'\n","/**\n * Routing benchmark — synthetic, dependency-free, ships in the\n * package. 16 cross-category items in `dataset.ts`. See\n * `routing/README.md` for the format.\n *\n * `evaluate` does case-insensitive exact match against the canonical\n * route plus declared synonyms. The first valid route token in the\n * response wins; everything else is ignored. Wrong answers also\n * report whether they hit a hard negative — useful when triaging\n * \"always picks the popular route\" failure modes.\n */\n\nimport type { RunSplitTag } from '../../run-record'\nimport type { BenchmarkAdapter, BenchmarkDatasetItem, BenchmarkEvaluation } from '../types'\nimport { deterministicSplit } from '../types'\nimport { ROUTING_DATASET, type RoutingItem } from './dataset'\n\nexport type { RoutingItem }\nexport type RoutingPayload = RoutingItem\nexport type RoutingDatasetItem = BenchmarkDatasetItem<RoutingPayload>\n\nclass RoutingAdapter implements BenchmarkAdapter<RoutingDatasetItem, RoutingPayload> {\n async loadDataset(split: RunSplitTag): Promise<RoutingDatasetItem[]> {\n return ROUTING_DATASET.map((item) => ({ id: item.id, payload: item })).filter(\n (it) => assignSplitImpl(it.id) === split,\n )\n }\n\n async evaluate(item: RoutingDatasetItem, response: string): Promise<BenchmarkEvaluation> {\n const tokens = extractRouteTokens(response)\n const correct = new Set<string>(\n [item.payload.route, ...item.payload.synonyms].map((s) => s.toLowerCase()),\n )\n const hardNeg = new Set<string>(item.payload.hardNegatives.map((s) => s.toLowerCase()))\n const firstMatch = tokens.find((t) => correct.has(t.toLowerCase())) ?? null\n const firstHardNeg = tokens.find((t) => hardNeg.has(t.toLowerCase())) ?? null\n const score = firstMatch ? 1 : 0\n return {\n score,\n raw: {\n firstToken: tokens[0] ?? null,\n matchedRoute: firstMatch,\n hitHardNegative: Boolean(firstHardNeg),\n hardNegativeRoute: firstHardNeg,\n category: item.payload.category,\n },\n }\n }\n\n assignSplit(itemId: string): RunSplitTag {\n return assignSplitImpl(itemId)\n }\n}\n\nfunction assignSplitImpl(itemId: string): RunSplitTag {\n return deterministicSplit(`routing::${itemId}`)\n}\n\n/**\n * Pull route-shaped tokens out of a model response. Routes look like\n * `category.action` (`fs.write`, `chat.reply`). Bare alphanumerics\n * are not routes, but `category.action` patterns are robust to most\n * model wrappers (JSON output, prose explanations, code fences).\n */\nexport function extractRouteTokens(response: string): string[] {\n const matches = response.match(/[a-z][a-z0-9_]*\\.[a-z][a-z0-9_]*/gi)\n return matches ?? []\n}\n\nconst adapter = new RoutingAdapter()\n\nexport const loadDataset = adapter.loadDataset.bind(adapter)\nexport const evaluate = adapter.evaluate.bind(adapter)\nexport const assignSplit = adapter.assignSplit.bind(adapter)\nexport { ROUTING_DATASET, RoutingAdapter }\n","/**\n * Shared types for the reference benchmark wrappers under\n * `src/benchmarks/`. Each wrapper exports the three functions in\n * `BenchmarkAdapter` plus its own typed `DatasetItem` shape.\n */\n\nimport type { RunSplitTag } from '../run-record'\n\nexport interface BenchmarkDatasetItem<TPayload = unknown> {\n /** Stable dataset-local item id (used for split assignment + paper\n * references). Unique within a benchmark. */\n id: string\n /** Free-form payload. Each benchmark defines its own shape. */\n payload: TPayload\n}\n\nexport interface BenchmarkEvaluation {\n /** [0, 1] score for the response on this item. Exact-match\n * benchmarks use 0/1; partial-credit benchmarks may return\n * fractional values. */\n score: number\n /** Optional bag of raw scoring signals — e.g. parsed numeric\n * answer, regex match, judge sub-scores. */\n raw: Record<string, unknown>\n}\n\n/** Common signature implemented by every adapter under `src/benchmarks/*`. */\n// `TPayload` is the per-item payload type; `_TItem` is preserved for\n// downstream type-narrowing extensions (a richer `BenchmarkDatasetItem`\n// subclass that adds e.g. provenance metadata) but is intentionally\n// unused here. `noUnusedLocals` requires the leading underscore.\nexport interface BenchmarkAdapter<_TItem = unknown, TPayload = unknown> {\n /** Load the dataset for the given split. May hit the network on\n * first call but should be cache-friendly. Adapters that don't\n * ship the dataset itself MUST throw a clearly-marked error\n * pointing the caller at the loader script. */\n loadDataset(split: RunSplitTag): Promise<BenchmarkDatasetItem<TPayload>[]>\n /** Score a single response. Pure with respect to the inputs. */\n evaluate(item: BenchmarkDatasetItem<TPayload>, response: string): Promise<BenchmarkEvaluation>\n /** Deterministic split assignment via item id hashing. The\n * fraction of items in each split is implementation-defined but\n * MUST be stable across processes and platforms. */\n assignSplit(itemId: string): RunSplitTag\n}\n\n// ── Deterministic split assignment ───────────────────────────────────\n\n/**\n * 32-bit FNV-1a hash. Stable, allocation-free, deterministic across\n * runtimes. We use it to assign items to splits rather than depending\n * on a polyfilled crypto.subtle path.\n */\nfunction fnv1a32(input: string): number {\n let h = 0x811c9dc5\n for (let i = 0; i < input.length; i++) {\n h ^= input.charCodeAt(i) & 0xff\n h = (h + ((h << 1) + (h << 4) + (h << 7) + (h << 8) + (h << 24))) >>> 0\n }\n return h >>> 0\n}\n\n/** Split-assignment seed shared across all benchmarks. Bumping this\n * value reshuffles every split — do NOT do that lightly. */\nexport const BENCHMARK_SPLIT_SEED = 'agent-eval-v1'\n\n/**\n * Assign an item id to one of `'search' | 'dev' | 'holdout'` using a\n * stable 32-bit hash of `${seed}::${id}`. Default proportions:\n *\n * search: 60% (optimization-readable)\n * dev: 20% (held-out for tuning, leak-on-purpose during dev)\n * holdout:20% (paper-grade held-out, gated reads)\n */\nexport function deterministicSplit(\n itemId: string,\n seed: string = BENCHMARK_SPLIT_SEED,\n): RunSplitTag {\n const h = fnv1a32(`${seed}::${itemId}`)\n const pos = h / 0x100000000\n if (pos < 0.6) return 'search'\n if (pos < 0.8) return 'dev'\n return 'holdout'\n}\n","/**\n * Synthetic routing dataset. 16 tasks across 4 categories. Used as a\n * deterministic, dependency-free benchmark for any router that maps a\n * natural-language request to one of a fixed set of route labels.\n *\n * Format (see `routing/README.md` for prose):\n *\n * {\n * id: stable per-task ID (matches across processes).\n * category: one of the four route labels.\n * prompt: the user-facing request the router must classify.\n * route: the ground-truth route the router should pick.\n * synonyms: other strings that count as a correct answer.\n * hardNegatives:close-but-wrong route labels — used to detect the\n * \"always picks the popular route\" failure mode.\n * }\n *\n * The four categories are intentionally cross-domain (file ops,\n * math, search, conversation) so a router that collapses to one\n * category is easy to spot.\n */\n\nexport interface RoutingItem {\n id: string\n category: 'file' | 'math' | 'search' | 'chat'\n prompt: string\n /** Canonical correct route label. */\n route: string\n /** Alternate route labels that also count as correct. */\n synonyms: string[]\n /** Wrong-but-tempting route labels (for analysis, not grading). */\n hardNegatives: string[]\n}\n\nexport const ROUTING_DATASET: RoutingItem[] = [\n {\n id: 'file_001',\n category: 'file',\n prompt: 'Save the meeting notes to /tmp/notes-2025-04.md as markdown.',\n route: 'fs.write',\n synonyms: ['filesystem.write', 'write_file'],\n hardNegatives: ['fs.read', 'chat.reply'],\n },\n {\n id: 'file_002',\n category: 'file',\n prompt: 'Read the contents of /etc/hosts and summarize the entries.',\n route: 'fs.read',\n synonyms: ['filesystem.read', 'read_file'],\n hardNegatives: ['fs.write', 'search.web'],\n },\n {\n id: 'file_003',\n category: 'file',\n prompt: 'List every Python file under src/ recursively.',\n route: 'fs.list',\n synonyms: ['filesystem.list', 'list_files'],\n hardNegatives: ['fs.read', 'search.code'],\n },\n {\n id: 'file_004',\n category: 'file',\n prompt: 'Delete the cached build at .turbo/cache.',\n route: 'fs.delete',\n synonyms: ['filesystem.delete', 'remove_file'],\n hardNegatives: ['fs.write', 'fs.list'],\n },\n {\n id: 'math_001',\n category: 'math',\n prompt: 'What is the integral of 3x^2 + 2x from 0 to 5?',\n route: 'math.integral',\n synonyms: ['calculator.integral', 'math.solve'],\n hardNegatives: ['math.derivative', 'chat.reply'],\n },\n {\n id: 'math_002',\n category: 'math',\n prompt: 'Compute the derivative of sin(x) * cos(x).',\n route: 'math.derivative',\n synonyms: ['calculator.derivative', 'math.solve'],\n hardNegatives: ['math.integral', 'math.algebra'],\n },\n {\n id: 'math_003',\n category: 'math',\n prompt: 'Solve 2x + 7 = 19 for x.',\n route: 'math.algebra',\n synonyms: ['calculator.algebra', 'math.solve'],\n hardNegatives: ['math.derivative', 'math.integral'],\n },\n {\n id: 'math_004',\n category: 'math',\n prompt: 'What is the prime factorization of 360?',\n route: 'math.numbertheory',\n synonyms: ['calculator.factor', 'math.solve'],\n hardNegatives: ['math.algebra', 'search.web'],\n },\n {\n id: 'search_001',\n category: 'search',\n prompt: 'Find recent papers on agent prompt optimization with held-out promotion gates.',\n route: 'search.web',\n synonyms: ['web.search', 'search.papers'],\n hardNegatives: ['search.code', 'chat.reply'],\n },\n {\n id: 'search_002',\n category: 'search',\n prompt: 'Search the codebase for every call site of `runProposeReview`.',\n route: 'search.code',\n synonyms: ['code.search', 'grep'],\n hardNegatives: ['search.web', 'fs.read'],\n },\n {\n id: 'search_003',\n category: 'search',\n prompt: 'What is the latest release of the Tangle network on GitHub?',\n route: 'search.web',\n synonyms: ['web.search', 'github.releases'],\n hardNegatives: ['search.code', 'chat.reply'],\n },\n {\n id: 'search_004',\n category: 'search',\n prompt: 'Find all TODO comments in the agent-eval src tree.',\n route: 'search.code',\n synonyms: ['code.search', 'grep'],\n hardNegatives: ['search.web', 'fs.list'],\n },\n {\n id: 'chat_001',\n category: 'chat',\n prompt: 'Hi there, how are you doing today?',\n route: 'chat.reply',\n synonyms: ['conversation.reply'],\n hardNegatives: ['search.web', 'fs.read'],\n },\n {\n id: 'chat_002',\n category: 'chat',\n prompt: 'Please explain the difference between an LLM and a foundation model.',\n route: 'chat.reply',\n synonyms: ['conversation.reply', 'qa.answer'],\n hardNegatives: ['search.web', 'math.algebra'],\n },\n {\n id: 'chat_003',\n category: 'chat',\n prompt: 'Tell me a short joke about distributed systems.',\n route: 'chat.reply',\n synonyms: ['conversation.reply'],\n hardNegatives: ['search.web', 'fs.read'],\n },\n {\n id: 'chat_004',\n category: 'chat',\n prompt: 'Acknowledge my last message with a thumbs up.',\n route: 'chat.reply',\n synonyms: ['conversation.reply', 'react'],\n hardNegatives: ['fs.write', 'search.web'],\n },\n]\n"],"mappings":";;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACoDA,SAAS,QAAQ,OAAuB;AACtC,MAAI,IAAI;AACR,WAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,SAAK,MAAM,WAAW,CAAC,IAAI;AAC3B,QAAK,MAAM,KAAK,MAAM,KAAK,MAAM,KAAK,MAAM,KAAK,MAAM,KAAK,SAAU;AAAA,EACxE;AACA,SAAO,MAAM;AACf;AAIO,IAAM,uBAAuB;AAU7B,SAAS,mBACd,QACA,OAAe,sBACF;AACb,QAAM,IAAI,QAAQ,GAAG,IAAI,KAAK,MAAM,EAAE;AACtC,QAAM,MAAM,IAAI;AAChB,MAAI,MAAM,IAAK,QAAO;AACtB,MAAI,MAAM,IAAK,QAAO;AACtB,SAAO;AACT;;;AChDO,IAAM,kBAAiC;AAAA,EAC5C;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,oBAAoB,YAAY;AAAA,IAC3C,eAAe,CAAC,WAAW,YAAY;AAAA,EACzC;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,mBAAmB,WAAW;AAAA,IACzC,eAAe,CAAC,YAAY,YAAY;AAAA,EAC1C;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,mBAAmB,YAAY;AAAA,IAC1C,eAAe,CAAC,WAAW,aAAa;AAAA,EAC1C;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,qBAAqB,aAAa;AAAA,IAC7C,eAAe,CAAC,YAAY,SAAS;AAAA,EACvC;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,uBAAuB,YAAY;AAAA,IAC9C,eAAe,CAAC,mBAAmB,YAAY;AAAA,EACjD;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,yBAAyB,YAAY;AAAA,IAChD,eAAe,CAAC,iBAAiB,cAAc;AAAA,EACjD;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,sBAAsB,YAAY;AAAA,IAC7C,eAAe,CAAC,mBAAmB,eAAe;AAAA,EACpD;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,qBAAqB,YAAY;AAAA,IAC5C,eAAe,CAAC,gBAAgB,YAAY;AAAA,EAC9C;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,cAAc,eAAe;AAAA,IACxC,eAAe,CAAC,eAAe,YAAY;AAAA,EAC7C;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,eAAe,MAAM;AAAA,IAChC,eAAe,CAAC,cAAc,SAAS;AAAA,EACzC;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,cAAc,iBAAiB;AAAA,IAC1C,eAAe,CAAC,eAAe,YAAY;AAAA,EAC7C;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,eAAe,MAAM;AAAA,IAChC,eAAe,CAAC,cAAc,SAAS;AAAA,EACzC;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,oBAAoB;AAAA,IAC/B,eAAe,CAAC,cAAc,SAAS;AAAA,EACzC;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,sBAAsB,WAAW;AAAA,IAC5C,eAAe,CAAC,cAAc,cAAc;AAAA,EAC9C;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,oBAAoB;AAAA,IAC/B,eAAe,CAAC,cAAc,SAAS;AAAA,EACzC;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,UAAU;AAAA,IACV,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU,CAAC,sBAAsB,OAAO;AAAA,IACxC,eAAe,CAAC,YAAY,YAAY;AAAA,EAC1C;AACF;;;AF9IA,IAAM,iBAAN,MAAqF;AAAA,EACnF,MAAM,YAAY,OAAmD;AACnE,WAAO,gBAAgB,IAAI,CAAC,UAAU,EAAE,IAAI,KAAK,IAAI,SAAS,KAAK,EAAE,EAAE;AAAA,MACrE,CAAC,OAAO,gBAAgB,GAAG,EAAE,MAAM;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,MAAM,SAAS,MAA0B,UAAgD;AACvF,UAAM,SAAS,mBAAmB,QAAQ;AAC1C,UAAM,UAAU,IAAI;AAAA,MAClB,CAAC,KAAK,QAAQ,OAAO,GAAG,KAAK,QAAQ,QAAQ,EAAE,IAAI,CAAC,MAAM,EAAE,YAAY,CAAC;AAAA,IAC3E;AACA,UAAM,UAAU,IAAI,IAAY,KAAK,QAAQ,cAAc,IAAI,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;AACtF,UAAM,aAAa,OAAO,KAAK,CAAC,MAAM,QAAQ,IAAI,EAAE,YAAY,CAAC,CAAC,KAAK;AACvE,UAAM,eAAe,OAAO,KAAK,CAAC,MAAM,QAAQ,IAAI,EAAE,YAAY,CAAC,CAAC,KAAK;AACzE,UAAM,QAAQ,aAAa,IAAI;AAC/B,WAAO;AAAA,MACL;AAAA,MACA,KAAK;AAAA,QACH,YAAY,OAAO,CAAC,KAAK;AAAA,QACzB,cAAc;AAAA,QACd,iBAAiB,QAAQ,YAAY;AAAA,QACrC,mBAAmB;AAAA,QACnB,UAAU,KAAK,QAAQ;AAAA,MACzB;AAAA,IACF;AAAA,EACF;AAAA,EAEA,YAAY,QAA6B;AACvC,WAAO,gBAAgB,MAAM;AAAA,EAC/B;AACF;AAEA,SAAS,gBAAgB,QAA6B;AACpD,SAAO,mBAAmB,YAAY,MAAM,EAAE;AAChD;AAQO,SAAS,mBAAmB,UAA4B;AAC7D,QAAM,UAAU,SAAS,MAAM,oCAAoC;AACnE,SAAO,WAAW,CAAC;AACrB;AAEA,IAAM,UAAU,IAAI,eAAe;AAE5B,IAAM,cAAc,QAAQ,YAAY,KAAK,OAAO;AACpD,IAAM,WAAW,QAAQ,SAAS,KAAK,OAAO;AAC9C,IAAM,cAAc,QAAQ,YAAY,KAAK,OAAO;","names":[]}