@tangle-network/agent-eval 0.25.0 → 0.27.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +145 -0
- package/README.md +5 -5
- package/dist/builder-eval/index.js +1 -1
- package/dist/{chunk-WWYCWKUM.js → chunk-3CKU6VGU.js} +2 -2
- package/dist/{chunk-K2TPS5LB.js → chunk-4U4BKCXK.js} +2 -2
- package/dist/chunk-4U4BKCXK.js.map +1 -0
- package/dist/{chunk-2A5XJB43.js → chunk-5AKPEK5L.js} +3 -3
- package/dist/chunk-5AKPEK5L.js.map +1 -0
- package/dist/{chunk-RAF443UI.js → chunk-DBIGN5MJ.js} +2 -2
- package/dist/{chunk-JLZQWFV3.js → chunk-K33INZHH.js} +2 -2
- package/dist/chunk-K33INZHH.js.map +1 -0
- package/dist/{chunk-NU65VQ7M.js → chunk-MAZ26DC7.js} +1 -1
- package/dist/chunk-MAZ26DC7.js.map +1 -0
- package/dist/{chunk-LSH4MMOZ.js → chunk-NCRFYPS3.js} +1 -1
- package/dist/chunk-NCRFYPS3.js.map +1 -0
- package/dist/{chunk-ZN274SWR.js → chunk-PALJO75S.js} +2 -2
- package/dist/{chunk-OWLAAMME.js → chunk-QHF6EQKK.js} +3 -2
- package/dist/chunk-QHF6EQKK.js.map +1 -0
- package/dist/chunk-R5UQJNKC.js +722 -0
- package/dist/chunk-R5UQJNKC.js.map +1 -0
- package/dist/{chunk-SESZDQPX.js → chunk-RUI6SIHY.js} +3 -3
- package/dist/chunk-RUI6SIHY.js.map +1 -0
- package/dist/{chunk-EDUKQ5AM.js → chunk-SZSBQUIJ.js} +2 -2
- package/dist/chunk-SZSBQUIJ.js.map +1 -0
- package/dist/{chunk-4F5DQN55.js → chunk-VSMTAMNK.js} +1 -1
- package/dist/chunk-VSMTAMNK.js.map +1 -0
- package/dist/{chunk-5LBB5B3Z.js → chunk-XFZCM5Z3.js} +1 -1
- package/dist/chunk-XFZCM5Z3.js.map +1 -0
- package/dist/cli.js +1 -1
- package/dist/{control-CBShYYA6.d.ts → control-BT4qnXiS.d.ts} +2 -2
- package/dist/{control-runtime-BuJHoLg0.d.ts → control-runtime-BZ_lVLYW.d.ts} +1 -0
- package/dist/control.d.ts +3 -3
- package/dist/control.js +2 -2
- package/dist/{failure-cluster-C2EGSDiT.d.ts → failure-cluster-Cw65_5FY.d.ts} +1 -2
- package/dist/{feedback-trajectory-DfFdrraJ.d.ts → feedback-trajectory-D1aGKusy.d.ts} +1 -1
- package/dist/governance/index.d.ts +1 -1
- package/dist/{index-Oj9fAPPN.d.ts → index-BhLlu-qO.d.ts} +63 -2
- package/dist/index.d.ts +279 -72
- package/dist/index.js +222 -136
- package/dist/index.js.map +1 -1
- package/dist/knowledge/index.d.ts +1 -1
- package/dist/knowledge/index.js +2 -2
- package/dist/{multi-layer-verifier-LkP3LVKj.d.ts → multi-layer-verifier-U-c8ge1k.d.ts} +1 -1
- package/dist/openapi.json +1 -1
- package/dist/optimization.d.ts +5 -5
- package/dist/optimization.js +5 -5
- package/dist/pipelines/index.d.ts +1 -1
- package/dist/pipelines/index.js +2 -2
- package/dist/{release-report-BNgMdqPF.d.ts → release-report-CCQqnK46.d.ts} +1 -1
- package/dist/{replay-BL96gCEP.d.ts → replay-D7z0J43-.d.ts} +4 -5
- package/dist/reporting.d.ts +4 -4
- package/dist/reporting.js +5 -5
- package/dist/{researcher-BPT8x_NT.d.ts → researcher-G81CWc0q.d.ts} +9 -10
- package/dist/rl.d.ts +26 -44
- package/dist/rl.js +5 -5
- package/dist/rl.js.map +1 -1
- package/dist/{sequential-Dgz1n51-.d.ts → sequential-5iSVfzl2.d.ts} +2 -2
- package/dist/{summary-report-C7VPYEj2.d.ts → summary-report-Dl4akLKX.d.ts} +13 -1
- package/dist/traces.d.ts +1 -1
- package/dist/traces.js +2 -2
- package/dist/wire/index.d.ts +2 -2
- package/dist/wire/index.js +1 -1
- package/docs/concepts.md +11 -0
- package/docs/research-report-methodology.md +4 -4
- package/docs/three-package-architecture.md +12 -24
- package/package.json +1 -1
- package/dist/chunk-2A5XJB43.js.map +0 -1
- package/dist/chunk-4F5DQN55.js.map +0 -1
- package/dist/chunk-5LBB5B3Z.js.map +0 -1
- package/dist/chunk-EDUKQ5AM.js.map +0 -1
- package/dist/chunk-I4MBDTY5.js +0 -272
- package/dist/chunk-I4MBDTY5.js.map +0 -1
- package/dist/chunk-JLZQWFV3.js.map +0 -1
- package/dist/chunk-K2TPS5LB.js.map +0 -1
- package/dist/chunk-LSH4MMOZ.js.map +0 -1
- package/dist/chunk-NU65VQ7M.js.map +0 -1
- package/dist/chunk-OWLAAMME.js.map +0 -1
- package/dist/chunk-SESZDQPX.js.map +0 -1
- /package/dist/{chunk-WWYCWKUM.js.map → chunk-3CKU6VGU.js.map} +0 -0
- /package/dist/{chunk-RAF443UI.js.map → chunk-DBIGN5MJ.js.map} +0 -0
- /package/dist/{chunk-ZN274SWR.js.map → chunk-PALJO75S.js.map} +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/wire/schemas.ts","../src/wire/rubrics.ts","../src/wire/handlers.ts","../src/wire/openapi.ts","../src/wire/rpc.ts","../src/wire/server.ts"],"sourcesContent":["/**\n * Wire-protocol schemas.\n *\n * These Zod schemas are the contract between the agent-eval runtime and\n * any non-TypeScript client (Python, Rust, Go, …). They get rendered to\n * OpenAPI by `wire/openapi.ts` and code-generators consume that spec to\n * produce typed clients in other languages.\n *\n * Rule: if it's not in this file, it isn't on the wire. Keep names and\n * shapes self-explanatory — every field has a `.describe()` so the\n * generated docs are useful without reading the source.\n */\nimport { extendZodWithOpenApi } from '@asteasolutions/zod-to-openapi'\nimport { z } from 'zod'\n\nextendZodWithOpenApi(z)\n\n// ── Building blocks ─────────────────────────────────────────────────\n\nexport const RubricDimensionSchema = z\n .object({\n id: z\n .string()\n .min(1)\n .describe('Short stable id like \"buyer_quality\" — used as the key in scoring output.'),\n description: z\n .string()\n .min(1)\n .describe('One-line plain-English meaning. Read by humans reviewing low scores.'),\n weight: z\n .number()\n .min(0)\n .default(1)\n .describe('Relative weight in the composite score. Default 1; 0 disables.'),\n min: z.number().default(0).describe('Lower bound of valid score for this dimension.'),\n max: z.number().default(1).describe('Upper bound of valid score for this dimension.'),\n })\n .openapi('RubricDimension')\n\nexport const FailureModeSchema = z\n .object({\n id: z.string().min(1).describe('Short stable id like \"ai-cadence\" — used in detection lists.'),\n description: z.string().min(1).describe('Plain-English description of the failure pattern.'),\n })\n .openapi('FailureMode')\n\n// ── Rubric ──────────────────────────────────────────────────────────\n\nexport const RubricSchema = z\n .object({\n name: z\n .string()\n .min(1)\n .describe('Stable name like \"anti-slop\" — used by clients to invoke this rubric.'),\n description: z\n .string()\n .min(1)\n .describe('What this rubric measures. Shown in /v1/rubrics listing.'),\n systemPrompt: z\n .string()\n .min(1)\n .describe(\n 'Instructs the judging LLM. Should explain the persona (e.g. \"senior engineer reviewing voice\"), what to score on, and what to return.',\n ),\n dimensions: z\n .array(RubricDimensionSchema)\n .min(1)\n .describe('Scoring axes. The composite score is a weighted sum of these.'),\n failureModes: z\n .array(FailureModeSchema)\n .default([])\n .describe('Patterns to detect; each detected mode appears in the result.failureModes list.'),\n wins: z\n .array(FailureModeSchema)\n .default([])\n .describe('Positive patterns; each detected one appears in the result.wins list.'),\n })\n .openapi('Rubric')\n\n// ── Judge call ──────────────────────────────────────────────────────\n\nexport const JudgeRequestSchema = z\n .object({\n rubricName: z\n .string()\n .optional()\n .describe('Use a built-in rubric by name. Mutually exclusive with `rubric`.'),\n rubric: RubricSchema.optional().describe(\n 'Inline rubric definition. Mutually exclusive with `rubricName`.',\n ),\n content: z\n .string()\n .min(1)\n .describe('The text being judged — a tweet, a blog post, a code snippet, anything stringly.'),\n context: z\n .record(z.string(), z.unknown())\n .optional()\n .describe(\n 'Free-form metadata for the rubric to use — analytics, source URL, author, etc. Surfaced to the LLM.',\n ),\n model: z\n .string()\n .optional()\n .describe('Override the judge model (default routes via tcloud). e.g. \"claude-opus-4-7\".'),\n })\n .refine((v) => Boolean(v.rubricName) !== Boolean(v.rubric), {\n message: 'Provide exactly one of `rubricName` or `rubric`.',\n })\n .openapi('JudgeRequest')\n\nexport const JudgeResultSchema = z\n .object({\n composite: z\n .number()\n .min(0)\n .max(1)\n .describe('Weighted combination of dimension scores in 0..1. The single number to gate on.'),\n dimensions: z\n .record(z.string(), z.number())\n .describe('Per-dimension score, keyed by RubricDimension.id.'),\n failureModes: z\n .array(z.string())\n .default([])\n .describe('Failure-mode ids detected in the content (subset of rubric.failureModes ids).'),\n wins: z\n .array(z.string())\n .default([])\n .describe('Win ids detected in the content (subset of rubric.wins ids).'),\n rationale: z\n .string()\n .describe('Plain-English explanation of the score. Surfaced to the human reviewer.'),\n rubricVersion: z\n .string()\n .describe(\n 'Stable hash of the rubric used. Scores are only comparable across runs when this matches.',\n ),\n model: z.string().describe('Model that produced the judgement, for reproducibility.'),\n durationMs: z.number().int().nonnegative().describe('End-to-end wall time for this call.'),\n })\n .openapi('JudgeResult')\n\n// ── Rubric listing ──────────────────────────────────────────────────\n\nexport const RubricInfoSchema = z\n .object({\n name: z.string().describe('Pass this to /v1/judge as `rubricName`.'),\n description: z.string().describe('What this rubric measures.'),\n dimensions: z\n .array(z.object({ id: z.string(), description: z.string(), weight: z.number() }))\n .describe('The scoring axes this rubric uses, with weights.'),\n failureModes: z.array(z.string()).default([]).describe('Failure-mode ids this rubric detects.'),\n rubricVersion: z.string().describe('Stable hash — match this to compare scores across runs.'),\n })\n .openapi('RubricInfo')\n\nexport const ListRubricsResponseSchema = z\n .object({\n rubrics: z.array(RubricInfoSchema),\n })\n .openapi('ListRubricsResponse')\n\n// ── Version / health ────────────────────────────────────────────────\n\nexport const VersionResponseSchema = z\n .object({\n package: z.string().describe('Package name (always \"@tangle-network/agent-eval\").'),\n version: z.string().describe('Semver of the running server. Match your client to this.'),\n wireVersion: z\n .string()\n .describe(\n 'Wire-protocol semver. Bumps separately from package version when the schema changes.',\n ),\n apiSurface: z.array(z.string()).describe('List of supported method names.'),\n })\n .openapi('VersionResponse')\n\nexport const HealthResponseSchema = z\n .object({\n status: z.literal('ok'),\n uptimeSec: z.number(),\n })\n .openapi('HealthResponse')\n\n// ── Ingestion: production traces + user feedback ────────────────────\n\n/**\n * Minimal `TraceEvent` shape that the production runtime emits.\n * Matches `trace/schema.ts` `TraceEvent` but is duplicated here as a\n * wire schema so non-TypeScript clients can validate without depending\n * on internal types.\n */\nexport const TraceEventSchema = z\n .object({\n eventId: z.string().min(1).describe('Stable id for the event. Use ULID or UUID.'),\n runId: z.string().min(1).describe('Run this event belongs to.'),\n spanId: z.string().optional().describe('Span that emitted the event, if any.'),\n kind: z\n .enum([\n 'log',\n 'error',\n 'budget_decrement',\n 'budget_breach',\n 'state_mutation',\n 'policy_violation',\n 'redaction_applied',\n 'custom',\n ])\n .describe('Coarse event category — matches the TraceSchema v1 EventKind enum.'),\n timestamp: z\n .number()\n .int()\n .nonnegative()\n .describe('Unix millis. Must be monotonically non-decreasing within a span.'),\n payload: z\n .record(z.string(), z.unknown())\n .describe('Free-form payload — the runtime owns the shape.'),\n })\n .openapi('TraceEvent')\n\nexport const TracesIngestRequestSchema = z\n .object({\n events: z\n .array(TraceEventSchema)\n .min(1)\n .max(10_000)\n .describe('Batch of events. Max 10k per call — bigger streams should be chunked.'),\n })\n .openapi('TracesIngestRequest')\n\nexport const TracesIngestResponseSchema = z\n .object({\n accepted: z.number().int().nonnegative().describe('Number of events persisted.'),\n rejected: z\n .number()\n .int()\n .nonnegative()\n .describe('Number of events the store refused — see `errors[]` for reasons.'),\n errors: z\n .array(\n z.object({\n eventId: z.string().describe('Event id this error applies to.'),\n message: z.string().describe('Why the event was rejected.'),\n }),\n )\n .default([]),\n })\n .openapi('TracesIngestResponse')\n\nexport const FeedbackLabelSchema = z\n .object({\n id: z.string().optional(),\n source: z.enum(['user', 'judge', 'environment', 'metric', 'policy', 'system']),\n kind: z.enum([\n 'approve',\n 'reject',\n 'select',\n 'edit',\n 'rank',\n 'rate',\n 'comment',\n 'metric_outcome',\n 'policy_block',\n 'revision_request',\n ]),\n value: z.unknown(),\n reason: z.string().optional(),\n severity: z.enum(['info', 'warning', 'error', 'critical']).optional(),\n createdAt: z.string().describe('ISO-8601 UTC.'),\n metadata: z.record(z.string(), z.unknown()).optional(),\n })\n .openapi('FeedbackLabel')\n\nexport const FeedbackAttemptSchema = z\n .object({\n id: z.string().min(1),\n stepIndex: z.number().int().nonnegative(),\n artifactType: z.enum([\n 'text',\n 'code',\n 'plan',\n 'research',\n 'action',\n 'ui',\n 'decision',\n 'data',\n 'other',\n ]),\n artifact: z.unknown(),\n options: z.array(z.unknown()).optional(),\n proposedAction: z\n .object({\n type: z.string(),\n risk: z.enum(['low', 'medium', 'high']).optional(),\n costUsd: z.number().optional(),\n externalSideEffect: z.boolean().optional(),\n requiresApproval: z.boolean().optional(),\n metadata: z.record(z.string(), z.unknown()).optional(),\n })\n .optional(),\n feedback: z.array(FeedbackLabelSchema).optional(),\n createdAt: z.string(),\n metadata: z.record(z.string(), z.unknown()).optional(),\n })\n .openapi('FeedbackAttempt')\n\nexport const FeedbackTrajectorySchema = z\n .object({\n id: z.string().min(1).describe('Stable id; idempotency key for the trajectory.'),\n projectId: z.string().optional(),\n scenarioId: z.string().optional(),\n task: z.object({\n intent: z.string().min(1),\n context: z.unknown().optional(),\n }),\n attempts: z.array(FeedbackAttemptSchema).default([]),\n labels: z.array(FeedbackLabelSchema).default([]),\n outcome: z\n .object({\n success: z.boolean().optional(),\n score: z.number().optional(),\n metrics: z.record(z.string(), z.number()).optional(),\n costUsd: z.number().optional(),\n detail: z.string().optional(),\n observedAt: z.string().optional(),\n metadata: z.record(z.string(), z.unknown()).optional(),\n })\n .optional(),\n split: z.enum(['train', 'dev', 'test', 'holdout']).optional(),\n tags: z.record(z.string(), z.string()).optional(),\n createdAt: z.string().describe('ISO-8601 UTC.'),\n updatedAt: z.string().optional(),\n metadata: z.record(z.string(), z.unknown()).optional(),\n })\n .openapi('FeedbackTrajectory')\n\nexport const FeedbackIngestResponseSchema = z\n .object({\n id: z.string().describe('Trajectory id that was persisted.'),\n persisted: z.boolean().describe('True when the trajectory was saved (idempotent on id).'),\n })\n .openapi('FeedbackIngestResponse')\n\nexport type TraceEvent = z.infer<typeof TraceEventSchema>\nexport type TracesIngestRequest = z.infer<typeof TracesIngestRequestSchema>\nexport type TracesIngestResponse = z.infer<typeof TracesIngestResponseSchema>\nexport type FeedbackTrajectory = z.infer<typeof FeedbackTrajectorySchema>\nexport type FeedbackIngestResponse = z.infer<typeof FeedbackIngestResponseSchema>\n\n// ── Errors ──────────────────────────────────────────────────────────\n\nexport const ErrorResponseSchema = z\n .object({\n error: z\n .object({\n code: z\n .string()\n .describe(\n 'Machine-readable code: \"validation_error\", \"rubric_not_found\", \"judge_error\".',\n ),\n message: z.string().describe('Human-readable message.'),\n details: z.unknown().optional().describe('Optional structured detail.'),\n })\n .describe('Errors are always wrapped in this shape across all endpoints.'),\n })\n .openapi('ErrorResponse')\n\n// ── Type exports for callers in the same package ────────────────────\n\nexport type RubricDimension = z.infer<typeof RubricDimensionSchema>\nexport type FailureMode = z.infer<typeof FailureModeSchema>\nexport type Rubric = z.infer<typeof RubricSchema>\nexport type JudgeRequest = z.infer<typeof JudgeRequestSchema>\nexport type JudgeResult = z.infer<typeof JudgeResultSchema>\nexport type RubricInfo = z.infer<typeof RubricInfoSchema>\nexport type ListRubricsResponse = z.infer<typeof ListRubricsResponseSchema>\nexport type VersionResponse = z.infer<typeof VersionResponseSchema>\nexport type ErrorResponse = z.infer<typeof ErrorResponseSchema>\n\n// ── Wire-protocol version ───────────────────────────────────────────\n\n/**\n * Bump on any breaking change to a request/response schema.\n * Non-breaking (additive) changes don't require a bump.\n */\nexport const WIRE_VERSION = '1.0.0'\n\n/**\n * Stable hash of a rubric. Used to make scores comparable across runs:\n * if the rubricVersion matches, the rubric was identical.\n */\nexport function hashRubric(rubric: Rubric): string {\n const stable = stableStringify(rubric)\n let h = 5381\n for (let i = 0; i < stable.length; i++) {\n h = (h * 33) ^ stable.charCodeAt(i)\n }\n // Unsigned 32-bit hex, prefixed with rubric name + version slot\n return `${rubric.name}@${(h >>> 0).toString(16).padStart(8, '0')}`\n}\n\nfunction stableStringify(value: unknown): string {\n if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(',')}]`\n if (value && typeof value === 'object') {\n const entries = Object.entries(value as Record<string, unknown>)\n .sort(([a], [b]) => a.localeCompare(b))\n .map(([key, item]) => `${JSON.stringify(key)}:${stableStringify(item)}`)\n return `{${entries.join(',')}}`\n }\n return JSON.stringify(value)\n}\n","/**\n * Built-in rubrics shipped with agent-eval.\n *\n * A rubric is a set of scoring axes plus a system prompt that tells the\n * judging LLM how to grade against those axes. Built-in rubrics are\n * curated for use cases that recur across Tangle projects — call them\n * by name from any client.\n *\n * Adding a rubric:\n * 1. Define the Rubric object below with a clear `description` and\n * named `dimensions`.\n * 2. Register it in `BUILTIN_RUBRICS` at the bottom.\n * 3. Add a test in `tests/wire/rubrics.test.ts`.\n *\n * Custom rubrics: callers pass `rubric` inline to /v1/judge instead of\n * `rubricName` — see schemas.ts.\n */\nimport type { Rubric } from './schemas'\nimport { hashRubric } from './schemas'\n\n// ── anti-slop ───────────────────────────────────────────────────────\n// Voice/style judge tuned for technical-buyer audiences. Used by the\n// Postiz autoresearch loop and any content-quality gate.\n\nconst ANTI_SLOP: Rubric = {\n name: 'anti-slop',\n description:\n 'Voice and signal quality for content aimed at senior engineers. Catches AI cadence, marketing tone, and engagement-bait shapes.',\n systemPrompt: `You are evaluating a piece of content written for senior engineers and technical founders.\n\nYou score three things:\n- buyer_quality (0..1): would a senior engineer in the target ICP find this worth their attention? High = specific, earned, technically interesting. Low = generic, hyped, off-target.\n- voice (0..1): does it read like a person who built the thing, or like AI/marketing copy?\n- signal (0..1): does it contain a non-obvious detail, constraint, or claim a reader couldn't get from the public docs?\n\nDetect failure modes (return ids matching):\n- ai-cadence: rule-of-three openings, em-dash flourish, \"Let me explain\", \"Here's the thing\", AI rhythm\n- marketing-tone: \"We're excited to announce\", \"thrilled\", \"delighted\", \"game-changer\", buzzword stack\n- vague-claim: technical claim without a specific component, file, or measurement\n- no-hook: opening doesn't earn attention from the target reader\n- engagement-bait: \"agree?\", \"thoughts?\", listicles, controversy-fishing, hook-detail-pitch\n- off-icp: content shape would attract motivational/grift/hype audiences instead of buyers\n- stale-claim: repeats a positioning line we've used many times this month\n\nDetect wins (return ids matching):\n- specific-component: names a real file, component, or measurement\n- earned-detail: shares a non-obvious detail not derivable from public docs\n- constraint-articulated: names a real tradeoff and the side chosen\n- honest-failure: describes a real failure mode and what was done about it\n\nReturn ONLY JSON matching the response schema. Be conservative — most content has 0-1 wins and 1-2 failure modes, not many of each.`,\n dimensions: [\n {\n id: 'buyer_quality',\n description: 'Would the target buyer find this worth attention?',\n weight: 0.5,\n min: 0,\n max: 1,\n },\n {\n id: 'voice',\n description: 'Does it sound like a builder, not AI or marketing?',\n weight: 0.3,\n min: 0,\n max: 1,\n },\n {\n id: 'signal',\n description: 'Non-obvious detail, constraint, or claim?',\n weight: 0.2,\n min: 0,\n max: 1,\n },\n ],\n failureModes: [\n { id: 'ai-cadence', description: 'AI-rhythm openings and transitions' },\n { id: 'marketing-tone', description: 'Buzzwords, hype, corporate-PR voice' },\n { id: 'vague-claim', description: 'Technical claim without specifics' },\n { id: 'no-hook', description: 'Opening fails to earn attention' },\n { id: 'engagement-bait', description: 'Listicle/controversy/agree-pattern' },\n { id: 'off-icp', description: 'Voice attracts the wrong audience' },\n { id: 'stale-claim', description: 'Reuses an over-used positioning line' },\n ],\n wins: [\n { id: 'specific-component', description: 'Names a real file/component/number' },\n { id: 'earned-detail', description: 'Detail not in public docs' },\n { id: 'constraint-articulated', description: 'Names a real tradeoff' },\n { id: 'honest-failure', description: 'Describes a real failure honestly' },\n ],\n}\n\n// ── Registry ────────────────────────────────────────────────────────\n\nexport const BUILTIN_RUBRICS: Record<string, Rubric> = {\n 'anti-slop': ANTI_SLOP,\n}\n\n/** Get a built-in rubric by name, or undefined. */\nexport function getBuiltinRubric(name: string): Rubric | undefined {\n return BUILTIN_RUBRICS[name]\n}\n\n/** List built-in rubrics with their stable versions. */\nexport function listBuiltinRubrics() {\n return Object.values(BUILTIN_RUBRICS).map((r) => ({\n name: r.name,\n description: r.description,\n dimensions: r.dimensions.map((d) => ({\n id: d.id,\n description: d.description,\n weight: d.weight,\n })),\n failureModes: r.failureModes.map((f) => f.id),\n rubricVersion: hashRubric(r),\n }))\n}\n","/**\n * Pure handler functions — the \"business logic\" behind every wire-protocol\n * method. The HTTP server (`server.ts`) and the stdio RPC (`rpc.ts`) both\n * call these. Tests call these directly without spinning a server.\n *\n * Each handler:\n * - Takes a parsed request (already Zod-validated by the transport).\n * - Returns a result that matches the response schema.\n * - Throws `WireError` for caller-fixable errors (404, 400, 422).\n * - Lets unexpected errors bubble — the transport maps them to 500.\n */\nimport type { FeedbackTrajectoryStore } from '../feedback-trajectory'\nimport { callLlmJson } from '../llm-client'\nimport type { TraceEvent as InternalTraceEvent } from '../trace/schema'\nimport type { TraceStore } from '../trace/store'\nimport { getBuiltinRubric, listBuiltinRubrics } from './rubrics'\nimport {\n type FeedbackIngestResponse,\n hashRubric,\n type JudgeRequest,\n type JudgeResult,\n type ListRubricsResponse,\n type Rubric,\n type TracesIngestRequest,\n type TracesIngestResponse,\n type VersionResponse,\n WIRE_VERSION,\n type FeedbackTrajectory as WireFeedbackTrajectory,\n} from './schemas'\n\n/** Caller-fixable error. The transport renders this to 4xx + ErrorResponse. */\nexport class WireError extends Error {\n constructor(\n public readonly code: string,\n message: string,\n public readonly status: number = 400,\n public readonly details?: unknown,\n ) {\n super(message)\n this.name = 'WireError'\n }\n}\n\n// ── judge ───────────────────────────────────────────────────────────\n\n/** The JSON schema we ask the judging LLM to fill in. */\nfunction judgeOutputSchema(rubric: Rubric) {\n return {\n name: 'JudgeOutput',\n schema: {\n type: 'object',\n additionalProperties: false,\n properties: {\n dimensions: {\n type: 'object',\n additionalProperties: false,\n properties: Object.fromEntries(\n rubric.dimensions.map((d) => [\n d.id,\n { type: 'number', minimum: d.min, maximum: d.max },\n ]),\n ),\n required: rubric.dimensions.map((d) => d.id),\n },\n failureModes: {\n type: 'array',\n items: { type: 'string', enum: rubric.failureModes.map((f) => f.id) },\n },\n wins: {\n type: 'array',\n items: { type: 'string', enum: rubric.wins.map((w) => w.id) },\n },\n rationale: { type: 'string' },\n },\n required: ['dimensions', 'rationale'],\n } as Record<string, unknown>,\n }\n}\n\ninterface JudgeOutput {\n dimensions: Record<string, number>\n failureModes?: string[]\n wins?: string[]\n rationale: string\n}\n\nfunction validateJudgeOutput(value: unknown, rubric: Rubric): JudgeOutput {\n if (!value || typeof value !== 'object') {\n throw new WireError('judge_error', 'Judge returned malformed output.', 500, value)\n }\n const raw = value as Record<string, unknown>\n const rawDimensions = raw.dimensions\n if (!rawDimensions || typeof rawDimensions !== 'object' || Array.isArray(rawDimensions)) {\n throw new WireError('judge_error', 'Judge returned malformed dimensions.', 500, value)\n }\n\n const dimensions: Record<string, number> = {}\n const dimensionRecord = rawDimensions as Record<string, unknown>\n for (const dim of rubric.dimensions) {\n const score = dimensionRecord[dim.id]\n if (\n typeof score !== 'number' ||\n !Number.isFinite(score) ||\n score < dim.min ||\n score > dim.max\n ) {\n throw new WireError(\n 'judge_error',\n `Judge returned invalid score for dimension \"${dim.id}\".`,\n 500,\n value,\n )\n }\n dimensions[dim.id] = score\n }\n\n const allowedFailures = new Set(rubric.failureModes.map((mode) => mode.id))\n const allowedWins = new Set(rubric.wins.map((win) => win.id))\n const failureModes = validateIdArray(raw.failureModes, allowedFailures, 'failureModes', value)\n const wins = validateIdArray(raw.wins, allowedWins, 'wins', value)\n if (typeof raw.rationale !== 'string' || raw.rationale.trim().length === 0) {\n throw new WireError('judge_error', 'Judge returned missing rationale.', 500, value)\n }\n\n return { dimensions, failureModes, wins, rationale: raw.rationale }\n}\n\nfunction validateIdArray(\n raw: unknown,\n allowed: Set<string>,\n field: 'failureModes' | 'wins',\n original: unknown,\n): string[] {\n if (raw === undefined) return []\n if (!Array.isArray(raw)) {\n throw new WireError('judge_error', `Judge returned non-array ${field}.`, 500, original)\n }\n const out: string[] = []\n for (const item of raw) {\n if (typeof item !== 'string' || !allowed.has(item)) {\n throw new WireError(\n 'judge_error',\n `Judge returned unknown ${field} id \"${String(item)}\".`,\n 500,\n original,\n )\n }\n out.push(item)\n }\n return out\n}\n\nfunction compositeScore(dimensions: Record<string, number>, rubric: Rubric): number {\n let weighted = 0\n let totalWeight = 0\n for (const dim of rubric.dimensions) {\n const raw = dimensions[dim.id] ?? 0\n const range = dim.max - dim.min || 1\n const normalized = Math.max(0, Math.min(1, (raw - dim.min) / range))\n weighted += normalized * dim.weight\n totalWeight += dim.weight\n }\n return totalWeight > 0 ? weighted / totalWeight : 0\n}\n\nfunction buildJudgePrompt(content: string, context: unknown): string {\n const ctx = context && Object.keys(context as object).length ? JSON.stringify(context) : ''\n return [\n `CONTENT TO JUDGE:`,\n content,\n '',\n ctx ? `CONTEXT (metadata, analytics, etc.):` : '',\n ctx ? ctx : '',\n ]\n .filter(Boolean)\n .join('\\n')\n}\n\nconst DEFAULT_JUDGE_MODEL = 'claude-sonnet-4-6'\n\nexport async function handleJudge(req: JudgeRequest): Promise<JudgeResult> {\n // Resolve rubric\n let rubric: Rubric\n if (req.rubricName) {\n const found = getBuiltinRubric(req.rubricName)\n if (!found) {\n throw new WireError('rubric_not_found', `No built-in rubric named \"${req.rubricName}\".`, 404)\n }\n rubric = found\n } else if (req.rubric) {\n rubric = req.rubric\n } else {\n // refine() in the schema should already have caught this — defense in depth\n throw new WireError('validation_error', 'Provide either `rubricName` or `rubric`.', 422)\n }\n\n const startedAt = Date.now()\n const model = req.model ?? DEFAULT_JUDGE_MODEL\n\n const { value, result } = await callLlmJson<JudgeOutput>({\n model,\n messages: [\n { role: 'system', content: rubric.systemPrompt },\n { role: 'user', content: buildJudgePrompt(req.content, req.context) },\n ],\n jsonSchema: judgeOutputSchema(rubric),\n temperature: 0.0,\n timeoutMs: 60_000,\n })\n\n const output = validateJudgeOutput(value, rubric)\n\n const composite = compositeScore(output.dimensions, rubric)\n const durationMs = Date.now() - startedAt\n\n return {\n composite,\n dimensions: output.dimensions,\n failureModes: output.failureModes ?? [],\n wins: output.wins ?? [],\n rationale: output.rationale,\n rubricVersion: hashRubric(rubric),\n model: result.model,\n durationMs,\n }\n}\n\n// ── listRubrics ─────────────────────────────────────────────────────\n\nexport function handleListRubrics(): ListRubricsResponse {\n return { rubrics: listBuiltinRubrics() }\n}\n\n// ── version ─────────────────────────────────────────────────────────\n\nimport { readFileSync } from 'node:fs'\nimport { dirname, resolve } from 'node:path'\nimport { fileURLToPath } from 'node:url'\n\nlet CACHED_VERSION: string | undefined\n\nfunction readPackageVersion(): string {\n if (CACHED_VERSION) return CACHED_VERSION\n // Walk up from this file looking for the nearest package.json.\n // In dist/ this is dist/.., in src/wire/ this is ../../package.json.\n const here = dirname(fileURLToPath(import.meta.url))\n const candidates = [\n resolve(here, '..', '..', 'package.json'), // src/wire → repo root\n resolve(here, '..', 'package.json'), // dist → repo root\n ]\n for (const path of candidates) {\n try {\n const pkg = JSON.parse(readFileSync(path, 'utf-8')) as { version?: string }\n if (pkg.version) {\n CACHED_VERSION = pkg.version\n return pkg.version\n }\n } catch {\n // try next\n }\n }\n return '0.0.0-unknown'\n}\n\nexport function handleVersion(): VersionResponse {\n return {\n package: '@tangle-network/agent-eval',\n version: readPackageVersion(),\n wireVersion: WIRE_VERSION,\n apiSurface: ['judge', 'listRubrics', 'version', 'feedback.ingest', 'traces.ingest'],\n }\n}\n\n// ── Ingestion handlers ───────────────────────────────────────────────\n\n/**\n * Pluggable stores the wire layer routes ingestion writes into. Both\n * are optional — when omitted, the corresponding endpoint returns 503.\n *\n * Production deployments wire a `FileSystemTraceStore` and\n * `FileSystemFeedbackTrajectoryStore` here. Tests substitute in-memory\n * stores.\n */\nexport interface IngestionStores {\n traceStore?: TraceStore\n feedbackStore?: FeedbackTrajectoryStore\n}\n\n/**\n * `POST /v1/traces/ingest` — accept a batch of `TraceEvent`s from the\n * production runtime. Best-effort: each event is appended independently;\n * one bad event does not poison the batch.\n *\n * Idempotency: the underlying store is append-only; consumers retrying\n * the same payload will get duplicate events. Consumers should\n * de-duplicate by `eventId` downstream — production traces frequently\n * land via at-least-once buses (Kafka, SQS) where dedup is unavoidable.\n */\nexport async function handleTracesIngest(\n req: TracesIngestRequest,\n stores: IngestionStores,\n): Promise<TracesIngestResponse> {\n if (!stores.traceStore) {\n throw new WireError(\n 'service_unavailable',\n 'No trace store configured on this server. Pass `traceStore` to `createApp`.',\n 503,\n )\n }\n const errors: Array<{ eventId: string; message: string }> = []\n let accepted = 0\n for (const event of req.events) {\n try {\n // The wire `TraceEvent` is structurally identical to the internal one.\n await stores.traceStore.appendEvent(event as InternalTraceEvent)\n accepted++\n } catch (err) {\n errors.push({\n eventId: event.eventId,\n message: err instanceof Error ? err.message : String(err),\n })\n }\n }\n return { accepted, rejected: errors.length, errors }\n}\n\n/**\n * `POST /v1/feedback` — accept a single `FeedbackTrajectory` from the\n * production runtime. Idempotent on `id`: re-posting the same trajectory\n * replaces the prior record.\n */\nexport async function handleFeedbackIngest(\n req: WireFeedbackTrajectory,\n stores: IngestionStores,\n): Promise<FeedbackIngestResponse> {\n if (!stores.feedbackStore) {\n throw new WireError(\n 'service_unavailable',\n 'No feedback store configured on this server. Pass `feedbackStore` to `createApp`.',\n 503,\n )\n }\n // The wire `FeedbackTrajectory` aligns 1:1 with the internal type;\n // cast through `unknown` since the wire schema is a Zod-inferred\n // structural type with optional fields the internal store consumes.\n await stores.feedbackStore.save(req as unknown as Parameters<FeedbackTrajectoryStore['save']>[0])\n return { id: req.id, persisted: true }\n}\n","/**\n * Build an OpenAPI spec from the wire schemas.\n *\n * The spec is the contract that other-language clients (Python, Rust,\n * Go, …) generate from. There is no hand-written client — clients are\n * derived artifacts of this file plus `schemas.ts`.\n *\n * Run `pnpm openapi` (defined in package.json) to write the spec to\n * `dist/openapi.json`. CI uses that file to regenerate the Python\n * client and gate the dual-publish workflow.\n */\nimport { OpenAPIRegistry, OpenApiGeneratorV31 } from '@asteasolutions/zod-to-openapi'\nimport type { OpenAPIObject } from 'openapi3-ts/oas31'\n\nimport {\n ErrorResponseSchema,\n FeedbackIngestResponseSchema,\n FeedbackTrajectorySchema,\n HealthResponseSchema,\n JudgeRequestSchema,\n JudgeResultSchema,\n ListRubricsResponseSchema,\n TracesIngestRequestSchema,\n TracesIngestResponseSchema,\n VersionResponseSchema,\n WIRE_VERSION,\n} from './schemas'\n\nexport function buildOpenApi(packageVersion: string): OpenAPIObject {\n const registry = new OpenAPIRegistry()\n\n // Components — each schema becomes a $ref-able component\n registry.register('JudgeRequest', JudgeRequestSchema)\n registry.register('JudgeResult', JudgeResultSchema)\n registry.register('ListRubricsResponse', ListRubricsResponseSchema)\n registry.register('VersionResponse', VersionResponseSchema)\n registry.register('HealthResponse', HealthResponseSchema)\n registry.register('ErrorResponse', ErrorResponseSchema)\n registry.register('TracesIngestRequest', TracesIngestRequestSchema)\n registry.register('TracesIngestResponse', TracesIngestResponseSchema)\n registry.register('FeedbackTrajectory', FeedbackTrajectorySchema)\n registry.register('FeedbackIngestResponse', FeedbackIngestResponseSchema)\n\n // Routes\n registry.registerPath({\n method: 'post',\n path: '/v1/judge',\n summary: 'Score a piece of content against a rubric',\n description:\n 'Runs the judging LLM with the named (or inline) rubric and returns dimension scores, detected failure modes, wins, and a composite score in 0..1.',\n request: {\n body: {\n content: {\n 'application/json': { schema: JudgeRequestSchema },\n },\n },\n },\n responses: {\n 200: {\n description: 'Successful judgement',\n content: { 'application/json': { schema: JudgeResultSchema } },\n },\n 400: {\n description: 'Validation error',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 404: {\n description: 'Rubric not found',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 500: {\n description: 'Judge error',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n },\n })\n\n registry.registerPath({\n method: 'get',\n path: '/v1/rubrics',\n summary: 'List built-in rubrics',\n description:\n 'Returns every rubric registered server-side, with their dimensions and stable rubricVersion hash.',\n responses: {\n 200: {\n description: 'Listing',\n content: { 'application/json': { schema: ListRubricsResponseSchema } },\n },\n },\n })\n\n registry.registerPath({\n method: 'get',\n path: '/v1/version',\n summary: 'Server and wire-protocol version',\n description: 'Match your client version to `version`; check `wireVersion` for compatibility.',\n responses: {\n 200: {\n description: 'Version info',\n content: { 'application/json': { schema: VersionResponseSchema } },\n },\n },\n })\n\n registry.registerPath({\n method: 'get',\n path: '/healthz',\n summary: 'Liveness check',\n responses: {\n 200: {\n description: 'OK',\n content: { 'application/json': { schema: HealthResponseSchema } },\n },\n },\n })\n\n registry.registerPath({\n method: 'post',\n path: '/v1/traces/ingest',\n summary: 'Ingest a batch of production TraceEvents',\n description:\n 'Append a batch of TraceEvents to the configured TraceStore. Accepts application/json ({events:[...]}) or application/x-ndjson (one event per line). Returns counts of accepted + rejected events.',\n request: {\n body: {\n content: {\n 'application/json': { schema: TracesIngestRequestSchema },\n 'application/x-ndjson': { schema: TracesIngestRequestSchema },\n },\n },\n },\n responses: {\n 200: {\n description: 'Ingestion summary',\n content: { 'application/json': { schema: TracesIngestResponseSchema } },\n },\n 400: {\n description: 'Validation error',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 401: {\n description: 'Unauthorized (when bearer auth is configured)',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 503: {\n description: 'No trace store configured',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n },\n })\n\n registry.registerPath({\n method: 'post',\n path: '/v1/feedback',\n summary: 'Ingest a FeedbackTrajectory from production',\n description:\n 'Persist a single FeedbackTrajectory. Idempotent on trajectory.id — re-posting replaces the prior record. Used by production runtimes to forward user 👍/👎/edits into the eval substrate.',\n request: {\n body: {\n content: {\n 'application/json': { schema: FeedbackTrajectorySchema },\n },\n },\n },\n responses: {\n 200: {\n description: 'Persisted',\n content: { 'application/json': { schema: FeedbackIngestResponseSchema } },\n },\n 400: {\n description: 'Validation error',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 401: {\n description: 'Unauthorized (when bearer auth is configured)',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 503: {\n description: 'No feedback store configured',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n },\n })\n\n const generator = new OpenApiGeneratorV31(registry.definitions)\n const doc = generator.generateDocument({\n openapi: '3.1.0',\n info: {\n title: '@tangle-network/agent-eval — wire protocol',\n version: packageVersion,\n description: `HTTP and stdio RPC interface to agent-eval. The TypeScript runtime is the source of truth; this spec is the contract that cross-language clients (Python, Rust, Go) generate from.\n\nWire-protocol version: ${WIRE_VERSION}. Bumps on breaking changes to request/response schemas.`,\n contact: { name: 'Tangle Network', url: 'https://github.com/tangle-network/agent-eval' },\n license: { name: 'MIT' },\n },\n servers: [{ url: 'http://localhost:5005', description: 'Local agent-eval serve' }],\n })\n const rubricRef = { $ref: '#/components/schemas/Rubric' } as const\n const commonJudgeFields = {\n content: { type: 'string', minLength: 1 },\n context: { type: 'object', additionalProperties: true },\n model: { type: 'string' },\n } as const\n doc.components ??= {}\n doc.components.schemas ??= {}\n doc.components.schemas.JudgeRequest = {\n oneOf: [\n {\n type: 'object',\n additionalProperties: false,\n required: ['rubricName', 'content'],\n properties: {\n rubricName: { type: 'string', minLength: 1 },\n ...commonJudgeFields,\n },\n },\n {\n type: 'object',\n additionalProperties: false,\n required: ['rubric', 'content'],\n properties: {\n rubric: rubricRef,\n ...commonJudgeFields,\n },\n },\n ],\n description: 'Judge request. Provide exactly one of rubricName or rubric.',\n }\n return doc\n}\n","/**\n * stdio RPC transport.\n *\n * For batch / cron use without a running server. The Python client falls\n * back to this when no server is reachable.\n *\n * Protocol (line-delimited JSON over stdin/stdout):\n * IN: one JSON object on stdin: {\"method\":\"judge\",\"params\":{...}}\n * OUT: one JSON object on stdout: {\"result\":{...}} or {\"error\":{...}}\n *\n * One request per process invocation. To pipeline many calls, the client\n * writes JSONL to stdin and reads JSONL from stdout — see batch mode below.\n */\nimport { handleJudge, handleListRubrics, handleVersion, WireError } from './handlers'\nimport { JudgeRequestSchema } from './schemas'\n\ninterface RpcRequest {\n method: 'judge' | 'listRubrics' | 'version'\n params?: unknown\n}\n\ninterface RpcSuccess {\n result: unknown\n}\n\ninterface RpcError {\n error: { code: string; message: string; details?: unknown }\n}\n\nexport async function dispatchRpc(req: RpcRequest): Promise<RpcSuccess | RpcError> {\n try {\n switch (req.method) {\n case 'judge': {\n const parsed = JudgeRequestSchema.safeParse(req.params)\n if (!parsed.success) {\n return {\n error: {\n code: 'validation_error',\n message: 'params did not match JudgeRequest schema.',\n details: parsed.error.issues,\n },\n }\n }\n return { result: await handleJudge(parsed.data) }\n }\n case 'listRubrics':\n return { result: handleListRubrics() }\n case 'version':\n return { result: handleVersion() }\n default:\n return {\n error: {\n code: 'unknown_method',\n message: `No such method: ${(req as { method: string }).method}`,\n },\n }\n }\n } catch (err) {\n if (err instanceof WireError) {\n return { error: { code: err.code, message: err.message, details: err.details } }\n }\n const message = err instanceof Error ? err.message : String(err)\n return { error: { code: 'internal_error', message } }\n }\n}\n\n// ── stdin/stdout driver ─────────────────────────────────────────────\n\nasync function readAll(stream: NodeJS.ReadableStream): Promise<string> {\n const chunks: Buffer[] = []\n for await (const chunk of stream) {\n chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string))\n }\n return Buffer.concat(chunks).toString('utf-8')\n}\n\n/** Read one JSON request from stdin, write one JSON response to stdout. */\nexport async function runRpcOnce(method?: string): Promise<number> {\n const raw = await readAll(process.stdin)\n let req: RpcRequest\n try {\n const body = JSON.parse(raw)\n req = method ? { method: method as RpcRequest['method'], params: body } : (body as RpcRequest)\n } catch (err) {\n process.stdout.write(\n `${JSON.stringify({\n error: {\n code: 'parse_error',\n message: `stdin was not valid JSON: ${err instanceof Error ? err.message : String(err)}`,\n },\n })}\\n`,\n )\n return 1\n }\n const out = await dispatchRpc(req)\n process.stdout.write(`${JSON.stringify(out)}\\n`)\n return 'error' in out ? 1 : 0\n}\n\n/** Read JSONL requests from stdin, write JSONL responses to stdout. */\nexport async function runRpcBatch(method?: string): Promise<number> {\n const raw = await readAll(process.stdin)\n const lines = raw.split('\\n').filter((l) => l.trim().length > 0)\n let exitCode = 0\n for (const line of lines) {\n let req: RpcRequest\n try {\n const body = JSON.parse(line)\n req = method ? { method: method as RpcRequest['method'], params: body } : (body as RpcRequest)\n } catch (err) {\n process.stdout.write(\n `${JSON.stringify({\n error: {\n code: 'parse_error',\n message: `line was not valid JSON: ${err instanceof Error ? err.message : String(err)}`,\n },\n })}\\n`,\n )\n exitCode = 1\n continue\n }\n const out = await dispatchRpc(req)\n process.stdout.write(`${JSON.stringify(out)}\\n`)\n if ('error' in out) exitCode = 1\n }\n return exitCode\n}\n","/**\n * HTTP transport for the wire protocol.\n *\n * Hono + @hono/node-server. Every endpoint:\n * 1. Validates the request against its Zod schema.\n * 2. Calls the matching handler in `handlers.ts`.\n * 3. Renders 4xx for `WireError` with structured body, 500 for unexpected.\n *\n * The server holds optional `IngestionStores` (passed to `createApp`)\n * to receive production traces and user feedback. With no stores wired,\n * the ingestion endpoints return 503 — read endpoints (`/v1/judge`,\n * `/v1/rubrics`, `/v1/version`) remain fully functional.\n *\n * Run via `agent-eval serve --port 5005`.\n */\nimport { type ServerType, serve } from '@hono/node-server'\nimport { Hono } from 'hono'\nimport { cors } from 'hono/cors'\n\nimport {\n handleFeedbackIngest,\n handleJudge,\n handleListRubrics,\n handleTracesIngest,\n handleVersion,\n type IngestionStores,\n WireError,\n} from './handlers'\nimport { buildOpenApi } from './openapi'\nimport { FeedbackTrajectorySchema, JudgeRequestSchema, TracesIngestRequestSchema } from './schemas'\n\nconst STARTED_AT = Date.now()\n\nexport interface CreateAppOptions {\n /** Stores wired to the ingestion endpoints. */\n stores?: IngestionStores\n /**\n * Bearer-token auth. When provided, every endpoint EXCEPT `/healthz`\n * and `/v1/version` requires `Authorization: Bearer <token>`. The\n * token may be a static string OR a function for time-bounded /\n * rotating tokens.\n *\n * Recommended for any server that accepts ingestion writes from the\n * public internet. Read-only deployments may omit it.\n */\n auth?: {\n bearer: string | ((token: string) => boolean | Promise<boolean>)\n }\n}\n\nconst AUTH_EXEMPT_PATHS = new Set(['/healthz', '/v1/version', '/openapi.json'])\n\nexport function createApp(opts: CreateAppOptions = {}) {\n const app = new Hono()\n\n app.use('*', cors())\n\n // Bearer-token middleware (only attached when configured).\n if (opts.auth) {\n const verify = opts.auth.bearer\n app.use('*', async (c, next) => {\n const path = new URL(c.req.url).pathname\n if (AUTH_EXEMPT_PATHS.has(path)) return next()\n const raw = c.req.header('authorization') ?? ''\n const match = raw.match(/^Bearer\\s+(.+)$/i)\n if (!match) {\n throw new WireError('unauthorized', 'Missing or malformed Authorization header.', 401)\n }\n const token = match[1] as string\n const ok = typeof verify === 'string' ? token === verify : await verify(token)\n if (!ok) {\n throw new WireError('unauthorized', 'Invalid bearer token.', 401)\n }\n return next()\n })\n }\n\n app.onError((err, c) => {\n if (err instanceof WireError) {\n const status = err.status as 400 | 401 | 404 | 422 | 500 | 503\n return c.json(\n { error: { code: err.code, message: err.message, details: err.details } },\n status,\n )\n }\n // Unexpected — log and return generic 500 without leaking internals.\n console.error('[agent-eval] unhandled error:', err)\n return c.json({ error: { code: 'internal_error', message: 'Internal server error.' } }, 500)\n })\n\n // ── Health ──\n app.get('/healthz', (c) =>\n c.json({ status: 'ok' as const, uptimeSec: (Date.now() - STARTED_AT) / 1000 }),\n )\n\n // ── Version ──\n app.get('/v1/version', (c) => c.json(handleVersion()))\n\n // ── Rubrics ──\n app.get('/v1/rubrics', (c) => c.json(handleListRubrics()))\n\n // ── Judge ──\n app.post('/v1/judge', async (c) => {\n const raw = await c.req.json().catch(() => null)\n if (raw == null) {\n throw new WireError('validation_error', 'Request body must be JSON.', 400)\n }\n const parsed = JudgeRequestSchema.safeParse(raw)\n if (!parsed.success) {\n throw new WireError(\n 'validation_error',\n 'Request did not match JudgeRequest schema.',\n 400,\n parsed.error.issues,\n )\n }\n const result = await handleJudge(parsed.data)\n return c.json(result)\n })\n\n // ── Traces ingest (NDJSON-friendly: accepts either {events:[...]} or NDJSON) ──\n app.post('/v1/traces/ingest', async (c) => {\n const contentType = c.req.header('content-type') ?? ''\n let payload: unknown\n if (contentType.includes('application/x-ndjson')) {\n const text = await c.req.text()\n const events = text\n .split('\\n')\n .map((line) => line.trim())\n .filter((line) => line.length > 0)\n .map((line) => {\n try {\n return JSON.parse(line)\n } catch {\n throw new WireError(\n 'validation_error',\n 'NDJSON line did not parse as JSON.',\n 400,\n line.slice(0, 200),\n )\n }\n })\n payload = { events }\n } else {\n payload = await c.req.json().catch(() => null)\n }\n if (payload == null) {\n throw new WireError('validation_error', 'Request body must be JSON or NDJSON.', 400)\n }\n const parsed = TracesIngestRequestSchema.safeParse(payload)\n if (!parsed.success) {\n throw new WireError(\n 'validation_error',\n 'Request did not match TracesIngestRequest schema.',\n 400,\n parsed.error.issues,\n )\n }\n const result = await handleTracesIngest(parsed.data, opts.stores ?? {})\n return c.json(result)\n })\n\n // ── Feedback ingest ──\n app.post('/v1/feedback', async (c) => {\n const raw = await c.req.json().catch(() => null)\n if (raw == null) {\n throw new WireError('validation_error', 'Request body must be JSON.', 400)\n }\n const parsed = FeedbackTrajectorySchema.safeParse(raw)\n if (!parsed.success) {\n throw new WireError(\n 'validation_error',\n 'Request did not match FeedbackTrajectory schema.',\n 400,\n parsed.error.issues,\n )\n }\n const result = await handleFeedbackIngest(parsed.data, opts.stores ?? {})\n return c.json(result)\n })\n\n // ── OpenAPI spec ──\n app.get('/openapi.json', (c) => c.json(buildOpenApi(handleVersion().version)))\n\n return app\n}\n\nexport interface ServeOptions extends CreateAppOptions {\n /** Default 5005. */\n port?: number\n /** Default '127.0.0.1'. Set to '0.0.0.0' to listen on all interfaces. */\n host?: string\n}\n\nexport function startServer(opts: ServeOptions = {}): ServerType {\n const app = createApp(opts)\n const port = opts.port ?? 5005\n const host = opts.host ?? '127.0.0.1'\n return serve({ fetch: app.fetch, port, hostname: host }, ({ address, port: actualPort }) => {\n // eslint-disable-next-line no-console\n console.log(`[agent-eval] serving on http://${address}:${actualPort}`)\n })\n}\n"],"mappings":";;;;;AAYA,SAAS,4BAA4B;AACrC,SAAS,SAAS;AAElB,qBAAqB,CAAC;AAIf,IAAM,wBAAwB,EAClC,OAAO;AAAA,EACN,IAAI,EACD,OAAO,EACP,IAAI,CAAC,EACL,SAAS,gFAA2E;AAAA,EACvF,aAAa,EACV,OAAO,EACP,IAAI,CAAC,EACL,SAAS,sEAAsE;AAAA,EAClF,QAAQ,EACL,OAAO,EACP,IAAI,CAAC,EACL,QAAQ,CAAC,EACT,SAAS,gEAAgE;AAAA,EAC5E,KAAK,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,SAAS,gDAAgD;AAAA,EACpF,KAAK,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,SAAS,gDAAgD;AACtF,CAAC,EACA,QAAQ,iBAAiB;AAErB,IAAM,oBAAoB,EAC9B,OAAO;AAAA,EACN,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,mEAA8D;AAAA,EAC7F,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,mDAAmD;AAC7F,CAAC,EACA,QAAQ,aAAa;AAIjB,IAAM,eAAe,EACzB,OAAO;AAAA,EACN,MAAM,EACH,OAAO,EACP,IAAI,CAAC,EACL,SAAS,4EAAuE;AAAA,EACnF,aAAa,EACV,OAAO,EACP,IAAI,CAAC,EACL,SAAS,0DAA0D;AAAA,EACtE,cAAc,EACX,OAAO,EACP,IAAI,CAAC,EACL;AAAA,IACC;AAAA,EACF;AAAA,EACF,YAAY,EACT,MAAM,qBAAqB,EAC3B,IAAI,CAAC,EACL,SAAS,+DAA+D;AAAA,EAC3E,cAAc,EACX,MAAM,iBAAiB,EACvB,QAAQ,CAAC,CAAC,EACV,SAAS,iFAAiF;AAAA,EAC7F,MAAM,EACH,MAAM,iBAAiB,EACvB,QAAQ,CAAC,CAAC,EACV,SAAS,uEAAuE;AACrF,CAAC,EACA,QAAQ,QAAQ;AAIZ,IAAM,qBAAqB,EAC/B,OAAO;AAAA,EACN,YAAY,EACT,OAAO,EACP,SAAS,EACT,SAAS,kEAAkE;AAAA,EAC9E,QAAQ,aAAa,SAAS,EAAE;AAAA,IAC9B;AAAA,EACF;AAAA,EACA,SAAS,EACN,OAAO,EACP,IAAI,CAAC,EACL,SAAS,uFAAkF;AAAA,EAC9F,SAAS,EACN,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAC9B,SAAS,EACT;AAAA,IACC;AAAA,EACF;AAAA,EACF,OAAO,EACJ,OAAO,EACP,SAAS,EACT,SAAS,+EAA+E;AAC7F,CAAC,EACA,OAAO,CAAC,MAAM,QAAQ,EAAE,UAAU,MAAM,QAAQ,EAAE,MAAM,GAAG;AAAA,EAC1D,SAAS;AACX,CAAC,EACA,QAAQ,cAAc;AAElB,IAAM,oBAAoB,EAC9B,OAAO;AAAA,EACN,WAAW,EACR,OAAO,EACP,IAAI,CAAC,EACL,IAAI,CAAC,EACL,SAAS,iFAAiF;AAAA,EAC7F,YAAY,EACT,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC,EAC7B,SAAS,mDAAmD;AAAA,EAC/D,cAAc,EACX,MAAM,EAAE,OAAO,CAAC,EAChB,QAAQ,CAAC,CAAC,EACV,SAAS,+EAA+E;AAAA,EAC3F,MAAM,EACH,MAAM,EAAE,OAAO,CAAC,EAChB,QAAQ,CAAC,CAAC,EACV,SAAS,8DAA8D;AAAA,EAC1E,WAAW,EACR,OAAO,EACP,SAAS,yEAAyE;AAAA,EACrF,eAAe,EACZ,OAAO,EACP;AAAA,IACC;AAAA,EACF;AAAA,EACF,OAAO,EAAE,OAAO,EAAE,SAAS,yDAAyD;AAAA,EACpF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,qCAAqC;AAC3F,CAAC,EACA,QAAQ,aAAa;AAIjB,IAAM,mBAAmB,EAC7B,OAAO;AAAA,EACN,MAAM,EAAE,OAAO,EAAE,SAAS,yCAAyC;AAAA,EACnE,aAAa,EAAE,OAAO,EAAE,SAAS,4BAA4B;AAAA,EAC7D,YAAY,EACT,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,GAAG,aAAa,EAAE,OAAO,GAAG,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC,EAC/E,SAAS,kDAAkD;AAAA,EAC9D,cAAc,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,QAAQ,CAAC,CAAC,EAAE,SAAS,uCAAuC;AAAA,EAC9F,eAAe,EAAE,OAAO,EAAE,SAAS,8DAAyD;AAC9F,CAAC,EACA,QAAQ,YAAY;AAEhB,IAAM,4BAA4B,EACtC,OAAO;AAAA,EACN,SAAS,EAAE,MAAM,gBAAgB;AACnC,CAAC,EACA,QAAQ,qBAAqB;AAIzB,IAAM,wBAAwB,EAClC,OAAO;AAAA,EACN,SAAS,EAAE,OAAO,EAAE,SAAS,qDAAqD;AAAA,EAClF,SAAS,EAAE,OAAO,EAAE,SAAS,0DAA0D;AAAA,EACvF,aAAa,EACV,OAAO,EACP;AAAA,IACC;AAAA,EACF;AAAA,EACF,YAAY,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,SAAS,iCAAiC;AAC5E,CAAC,EACA,QAAQ,iBAAiB;AAErB,IAAM,uBAAuB,EACjC,OAAO;AAAA,EACN,QAAQ,EAAE,QAAQ,IAAI;AAAA,EACtB,WAAW,EAAE,OAAO;AACtB,CAAC,EACA,QAAQ,gBAAgB;AAUpB,IAAM,mBAAmB,EAC7B,OAAO;AAAA,EACN,SAAS,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,4CAA4C;AAAA,EAChF,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,4BAA4B;AAAA,EAC9D,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,sCAAsC;AAAA,EAC7E,MAAM,EACH,KAAK;AAAA,IACJ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC,EACA,SAAS,yEAAoE;AAAA,EAChF,WAAW,EACR,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,kEAAkE;AAAA,EAC9E,SAAS,EACN,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAC9B,SAAS,sDAAiD;AAC/D,CAAC,EACA,QAAQ,YAAY;AAEhB,IAAM,4BAA4B,EACtC,OAAO;AAAA,EACN,QAAQ,EACL,MAAM,gBAAgB,EACtB,IAAI,CAAC,EACL,IAAI,GAAM,EACV,SAAS,4EAAuE;AACrF,CAAC,EACA,QAAQ,qBAAqB;AAEzB,IAAM,6BAA6B,EACvC,OAAO;AAAA,EACN,UAAU,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,6BAA6B;AAAA,EAC/E,UAAU,EACP,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,uEAAkE;AAAA,EAC9E,QAAQ,EACL;AAAA,IACC,EAAE,OAAO;AAAA,MACP,SAAS,EAAE,OAAO,EAAE,SAAS,iCAAiC;AAAA,MAC9D,SAAS,EAAE,OAAO,EAAE,SAAS,6BAA6B;AAAA,IAC5D,CAAC;AAAA,EACH,EACC,QAAQ,CAAC,CAAC;AACf,CAAC,EACA,QAAQ,sBAAsB;AAE1B,IAAM,sBAAsB,EAChC,OAAO;AAAA,EACN,IAAI,EAAE,OAAO,EAAE,SAAS;AAAA,EACxB,QAAQ,EAAE,KAAK,CAAC,QAAQ,SAAS,eAAe,UAAU,UAAU,QAAQ,CAAC;AAAA,EAC7E,MAAM,EAAE,KAAK;AAAA,IACX;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AAAA,EACD,OAAO,EAAE,QAAQ;AAAA,EACjB,QAAQ,EAAE,OAAO,EAAE,SAAS;AAAA,EAC5B,UAAU,EAAE,KAAK,CAAC,QAAQ,WAAW,SAAS,UAAU,CAAC,EAAE,SAAS;AAAA,EACpE,WAAW,EAAE,OAAO,EAAE,SAAS,eAAe;AAAA,EAC9C,UAAU,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAAE,SAAS;AACvD,CAAC,EACA,QAAQ,eAAe;AAEnB,IAAM,wBAAwB,EAClC,OAAO;AAAA,EACN,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACpB,WAAW,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACxC,cAAc,EAAE,KAAK;AAAA,IACnB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AAAA,EACD,UAAU,EAAE,QAAQ;AAAA,EACpB,SAAS,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,SAAS;AAAA,EACvC,gBAAgB,EACb,OAAO;AAAA,IACN,MAAM,EAAE,OAAO;AAAA,IACf,MAAM,EAAE,KAAK,CAAC,OAAO,UAAU,MAAM,CAAC,EAAE,SAAS;AAAA,IACjD,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,IAC7B,oBAAoB,EAAE,QAAQ,EAAE,SAAS;AAAA,IACzC,kBAAkB,EAAE,QAAQ,EAAE,SAAS;AAAA,IACvC,UAAU,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAAE,SAAS;AAAA,EACvD,CAAC,EACA,SAAS;AAAA,EACZ,UAAU,EAAE,MAAM,mBAAmB,EAAE,SAAS;AAAA,EAChD,WAAW,EAAE,OAAO;AAAA,EACpB,UAAU,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAAE,SAAS;AACvD,CAAC,EACA,QAAQ,iBAAiB;AAErB,IAAM,2BAA2B,EACrC,OAAO;AAAA,EACN,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,gDAAgD;AAAA,EAC/E,WAAW,EAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,YAAY,EAAE,OAAO,EAAE,SAAS;AAAA,EAChC,MAAM,EAAE,OAAO;AAAA,IACb,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,IACxB,SAAS,EAAE,QAAQ,EAAE,SAAS;AAAA,EAChC,CAAC;AAAA,EACD,UAAU,EAAE,MAAM,qBAAqB,EAAE,QAAQ,CAAC,CAAC;AAAA,EACnD,QAAQ,EAAE,MAAM,mBAAmB,EAAE,QAAQ,CAAC,CAAC;AAAA,EAC/C,SAAS,EACN,OAAO;AAAA,IACN,SAAS,EAAE,QAAQ,EAAE,SAAS;AAAA,IAC9B,OAAO,EAAE,OAAO,EAAE,SAAS;AAAA,IAC3B,SAAS,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC,EAAE,SAAS;AAAA,IACnD,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,IAC7B,QAAQ,EAAE,OAAO,EAAE,SAAS;AAAA,IAC5B,YAAY,EAAE,OAAO,EAAE,SAAS;AAAA,IAChC,UAAU,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAAE,SAAS;AAAA,EACvD,CAAC,EACA,SAAS;AAAA,EACZ,OAAO,EAAE,KAAK,CAAC,SAAS,OAAO,QAAQ,SAAS,CAAC,EAAE,SAAS;AAAA,EAC5D,MAAM,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC,EAAE,SAAS;AAAA,EAChD,WAAW,EAAE,OAAO,EAAE,SAAS,eAAe;AAAA,EAC9C,WAAW,EAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,UAAU,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAAE,SAAS;AACvD,CAAC,EACA,QAAQ,oBAAoB;AAExB,IAAM,+BAA+B,EACzC,OAAO;AAAA,EACN,IAAI,EAAE,OAAO,EAAE,SAAS,mCAAmC;AAAA,EAC3D,WAAW,EAAE,QAAQ,EAAE,SAAS,wDAAwD;AAC1F,CAAC,EACA,QAAQ,wBAAwB;AAU5B,IAAM,sBAAsB,EAChC,OAAO;AAAA,EACN,OAAO,EACJ,OAAO;AAAA,IACN,MAAM,EACH,OAAO,EACP;AAAA,MACC;AAAA,IACF;AAAA,IACF,SAAS,EAAE,OAAO,EAAE,SAAS,yBAAyB;AAAA,IACtD,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS,6BAA6B;AAAA,EACxE,CAAC,EACA,SAAS,+DAA+D;AAC7E,CAAC,EACA,QAAQ,eAAe;AAoBnB,IAAM,eAAe;AAMrB,SAAS,WAAW,QAAwB;AACjD,QAAM,SAAS,gBAAgB,MAAM;AACrC,MAAI,IAAI;AACR,WAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACtC,QAAK,IAAI,KAAM,OAAO,WAAW,CAAC;AAAA,EACpC;AAEA,SAAO,GAAG,OAAO,IAAI,KAAK,MAAM,GAAG,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC;AAClE;AAEA,SAAS,gBAAgB,OAAwB;AAC/C,MAAI,MAAM,QAAQ,KAAK,EAAG,QAAO,IAAI,MAAM,IAAI,CAAC,SAAS,gBAAgB,IAAI,CAAC,EAAE,KAAK,GAAG,CAAC;AACzF,MAAI,SAAS,OAAO,UAAU,UAAU;AACtC,UAAM,UAAU,OAAO,QAAQ,KAAgC,EAC5D,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC,EACrC,IAAI,CAAC,CAAC,KAAK,IAAI,MAAM,GAAG,KAAK,UAAU,GAAG,CAAC,IAAI,gBAAgB,IAAI,CAAC,EAAE;AACzE,WAAO,IAAI,QAAQ,KAAK,GAAG,CAAC;AAAA,EAC9B;AACA,SAAO,KAAK,UAAU,KAAK;AAC7B;;;ACjYA,IAAM,YAAoB;AAAA,EACxB,MAAM;AAAA,EACN,aACE;AAAA,EACF,cAAc;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAuBd,YAAY;AAAA,IACV;AAAA,MACE,IAAI;AAAA,MACJ,aAAa;AAAA,MACb,QAAQ;AAAA,MACR,KAAK;AAAA,MACL,KAAK;AAAA,IACP;AAAA,IACA;AAAA,MACE,IAAI;AAAA,MACJ,aAAa;AAAA,MACb,QAAQ;AAAA,MACR,KAAK;AAAA,MACL,KAAK;AAAA,IACP;AAAA,IACA;AAAA,MACE,IAAI;AAAA,MACJ,aAAa;AAAA,MACb,QAAQ;AAAA,MACR,KAAK;AAAA,MACL,KAAK;AAAA,IACP;AAAA,EACF;AAAA,EACA,cAAc;AAAA,IACZ,EAAE,IAAI,cAAc,aAAa,qCAAqC;AAAA,IACtE,EAAE,IAAI,kBAAkB,aAAa,sCAAsC;AAAA,IAC3E,EAAE,IAAI,eAAe,aAAa,oCAAoC;AAAA,IACtE,EAAE,IAAI,WAAW,aAAa,kCAAkC;AAAA,IAChE,EAAE,IAAI,mBAAmB,aAAa,qCAAqC;AAAA,IAC3E,EAAE,IAAI,WAAW,aAAa,oCAAoC;AAAA,IAClE,EAAE,IAAI,eAAe,aAAa,uCAAuC;AAAA,EAC3E;AAAA,EACA,MAAM;AAAA,IACJ,EAAE,IAAI,sBAAsB,aAAa,qCAAqC;AAAA,IAC9E,EAAE,IAAI,iBAAiB,aAAa,4BAA4B;AAAA,IAChE,EAAE,IAAI,0BAA0B,aAAa,wBAAwB;AAAA,IACrE,EAAE,IAAI,kBAAkB,aAAa,oCAAoC;AAAA,EAC3E;AACF;AAIO,IAAM,kBAA0C;AAAA,EACrD,aAAa;AACf;AAGO,SAAS,iBAAiB,MAAkC;AACjE,SAAO,gBAAgB,IAAI;AAC7B;AAGO,SAAS,qBAAqB;AACnC,SAAO,OAAO,OAAO,eAAe,EAAE,IAAI,CAAC,OAAO;AAAA,IAChD,MAAM,EAAE;AAAA,IACR,aAAa,EAAE;AAAA,IACf,YAAY,EAAE,WAAW,IAAI,CAAC,OAAO;AAAA,MACnC,IAAI,EAAE;AAAA,MACN,aAAa,EAAE;AAAA,MACf,QAAQ,EAAE;AAAA,IACZ,EAAE;AAAA,IACF,cAAc,EAAE,aAAa,IAAI,CAAC,MAAM,EAAE,EAAE;AAAA,IAC5C,eAAe,WAAW,CAAC;AAAA,EAC7B,EAAE;AACJ;;;ACwHA,SAAS,oBAAoB;AAC7B,SAAS,SAAS,eAAe;AACjC,SAAS,qBAAqB;AA9MvB,IAAM,YAAN,cAAwB,MAAM;AAAA,EACnC,YACkB,MAChB,SACgB,SAAiB,KACjB,SAChB;AACA,UAAM,OAAO;AALG;AAEA;AACA;AAGhB,SAAK,OAAO;AAAA,EACd;AAAA,EAPkB;AAAA,EAEA;AAAA,EACA;AAKpB;AAKA,SAAS,kBAAkB,QAAgB;AACzC,SAAO;AAAA,IACL,MAAM;AAAA,IACN,QAAQ;AAAA,MACN,MAAM;AAAA,MACN,sBAAsB;AAAA,MACtB,YAAY;AAAA,QACV,YAAY;AAAA,UACV,MAAM;AAAA,UACN,sBAAsB;AAAA,UACtB,YAAY,OAAO;AAAA,YACjB,OAAO,WAAW,IAAI,CAAC,MAAM;AAAA,cAC3B,EAAE;AAAA,cACF,EAAE,MAAM,UAAU,SAAS,EAAE,KAAK,SAAS,EAAE,IAAI;AAAA,YACnD,CAAC;AAAA,UACH;AAAA,UACA,UAAU,OAAO,WAAW,IAAI,CAAC,MAAM,EAAE,EAAE;AAAA,QAC7C;AAAA,QACA,cAAc;AAAA,UACZ,MAAM;AAAA,UACN,OAAO,EAAE,MAAM,UAAU,MAAM,OAAO,aAAa,IAAI,CAAC,MAAM,EAAE,EAAE,EAAE;AAAA,QACtE;AAAA,QACA,MAAM;AAAA,UACJ,MAAM;AAAA,UACN,OAAO,EAAE,MAAM,UAAU,MAAM,OAAO,KAAK,IAAI,CAAC,MAAM,EAAE,EAAE,EAAE;AAAA,QAC9D;AAAA,QACA,WAAW,EAAE,MAAM,SAAS;AAAA,MAC9B;AAAA,MACA,UAAU,CAAC,cAAc,WAAW;AAAA,IACtC;AAAA,EACF;AACF;AASA,SAAS,oBAAoB,OAAgB,QAA6B;AACxE,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACvC,UAAM,IAAI,UAAU,eAAe,oCAAoC,KAAK,KAAK;AAAA,EACnF;AACA,QAAM,MAAM;AACZ,QAAM,gBAAgB,IAAI;AAC1B,MAAI,CAAC,iBAAiB,OAAO,kBAAkB,YAAY,MAAM,QAAQ,aAAa,GAAG;AACvF,UAAM,IAAI,UAAU,eAAe,wCAAwC,KAAK,KAAK;AAAA,EACvF;AAEA,QAAM,aAAqC,CAAC;AAC5C,QAAM,kBAAkB;AACxB,aAAW,OAAO,OAAO,YAAY;AACnC,UAAM,QAAQ,gBAAgB,IAAI,EAAE;AACpC,QACE,OAAO,UAAU,YACjB,CAAC,OAAO,SAAS,KAAK,KACtB,QAAQ,IAAI,OACZ,QAAQ,IAAI,KACZ;AACA,YAAM,IAAI;AAAA,QACR;AAAA,QACA,+CAA+C,IAAI,EAAE;AAAA,QACrD;AAAA,QACA;AAAA,MACF;AAAA,IACF;AACA,eAAW,IAAI,EAAE,IAAI;AAAA,EACvB;AAEA,QAAM,kBAAkB,IAAI,IAAI,OAAO,aAAa,IAAI,CAAC,SAAS,KAAK,EAAE,CAAC;AAC1E,QAAM,cAAc,IAAI,IAAI,OAAO,KAAK,IAAI,CAAC,QAAQ,IAAI,EAAE,CAAC;AAC5D,QAAM,eAAe,gBAAgB,IAAI,cAAc,iBAAiB,gBAAgB,KAAK;AAC7F,QAAM,OAAO,gBAAgB,IAAI,MAAM,aAAa,QAAQ,KAAK;AACjE,MAAI,OAAO,IAAI,cAAc,YAAY,IAAI,UAAU,KAAK,EAAE,WAAW,GAAG;AAC1E,UAAM,IAAI,UAAU,eAAe,qCAAqC,KAAK,KAAK;AAAA,EACpF;AAEA,SAAO,EAAE,YAAY,cAAc,MAAM,WAAW,IAAI,UAAU;AACpE;AAEA,SAAS,gBACP,KACA,SACA,OACA,UACU;AACV,MAAI,QAAQ,OAAW,QAAO,CAAC;AAC/B,MAAI,CAAC,MAAM,QAAQ,GAAG,GAAG;AACvB,UAAM,IAAI,UAAU,eAAe,4BAA4B,KAAK,KAAK,KAAK,QAAQ;AAAA,EACxF;AACA,QAAM,MAAgB,CAAC;AACvB,aAAW,QAAQ,KAAK;AACtB,QAAI,OAAO,SAAS,YAAY,CAAC,QAAQ,IAAI,IAAI,GAAG;AAClD,YAAM,IAAI;AAAA,QACR;AAAA,QACA,0BAA0B,KAAK,QAAQ,OAAO,IAAI,CAAC;AAAA,QACnD;AAAA,QACA;AAAA,MACF;AAAA,IACF;AACA,QAAI,KAAK,IAAI;AAAA,EACf;AACA,SAAO;AACT;AAEA,SAAS,eAAe,YAAoC,QAAwB;AAClF,MAAI,WAAW;AACf,MAAI,cAAc;AAClB,aAAW,OAAO,OAAO,YAAY;AACnC,UAAM,MAAM,WAAW,IAAI,EAAE,KAAK;AAClC,UAAM,QAAQ,IAAI,MAAM,IAAI,OAAO;AACnC,UAAM,aAAa,KAAK,IAAI,GAAG,KAAK,IAAI,IAAI,MAAM,IAAI,OAAO,KAAK,CAAC;AACnE,gBAAY,aAAa,IAAI;AAC7B,mBAAe,IAAI;AAAA,EACrB;AACA,SAAO,cAAc,IAAI,WAAW,cAAc;AACpD;AAEA,SAAS,iBAAiB,SAAiB,SAA0B;AACnE,QAAM,MAAM,WAAW,OAAO,KAAK,OAAiB,EAAE,SAAS,KAAK,UAAU,OAAO,IAAI;AACzF,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA,MAAM,yCAAyC;AAAA,IAC/C,MAAM,MAAM;AAAA,EACd,EACG,OAAO,OAAO,EACd,KAAK,IAAI;AACd;AAEA,IAAM,sBAAsB;AAE5B,eAAsB,YAAY,KAAyC;AAEzE,MAAI;AACJ,MAAI,IAAI,YAAY;AAClB,UAAM,QAAQ,iBAAiB,IAAI,UAAU;AAC7C,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,UAAU,oBAAoB,6BAA6B,IAAI,UAAU,MAAM,GAAG;AAAA,IAC9F;AACA,aAAS;AAAA,EACX,WAAW,IAAI,QAAQ;AACrB,aAAS,IAAI;AAAA,EACf,OAAO;AAEL,UAAM,IAAI,UAAU,oBAAoB,4CAA4C,GAAG;AAAA,EACzF;AAEA,QAAM,YAAY,KAAK,IAAI;AAC3B,QAAM,QAAQ,IAAI,SAAS;AAE3B,QAAM,EAAE,OAAO,OAAO,IAAI,MAAM,YAAyB;AAAA,IACvD;AAAA,IACA,UAAU;AAAA,MACR,EAAE,MAAM,UAAU,SAAS,OAAO,aAAa;AAAA,MAC/C,EAAE,MAAM,QAAQ,SAAS,iBAAiB,IAAI,SAAS,IAAI,OAAO,EAAE;AAAA,IACtE;AAAA,IACA,YAAY,kBAAkB,MAAM;AAAA,IACpC,aAAa;AAAA,IACb,WAAW;AAAA,EACb,CAAC;AAED,QAAM,SAAS,oBAAoB,OAAO,MAAM;AAEhD,QAAM,YAAY,eAAe,OAAO,YAAY,MAAM;AAC1D,QAAM,aAAa,KAAK,IAAI,IAAI;AAEhC,SAAO;AAAA,IACL;AAAA,IACA,YAAY,OAAO;AAAA,IACnB,cAAc,OAAO,gBAAgB,CAAC;AAAA,IACtC,MAAM,OAAO,QAAQ,CAAC;AAAA,IACtB,WAAW,OAAO;AAAA,IAClB,eAAe,WAAW,MAAM;AAAA,IAChC,OAAO,OAAO;AAAA,IACd;AAAA,EACF;AACF;AAIO,SAAS,oBAAyC;AACvD,SAAO,EAAE,SAAS,mBAAmB,EAAE;AACzC;AAQA,IAAI;AAEJ,SAAS,qBAA6B;AACpC,MAAI,eAAgB,QAAO;AAG3B,QAAM,OAAO,QAAQ,cAAc,YAAY,GAAG,CAAC;AACnD,QAAM,aAAa;AAAA,IACjB,QAAQ,MAAM,MAAM,MAAM,cAAc;AAAA;AAAA,IACxC,QAAQ,MAAM,MAAM,cAAc;AAAA;AAAA,EACpC;AACA,aAAW,QAAQ,YAAY;AAC7B,QAAI;AACF,YAAM,MAAM,KAAK,MAAM,aAAa,MAAM,OAAO,CAAC;AAClD,UAAI,IAAI,SAAS;AACf,yBAAiB,IAAI;AACrB,eAAO,IAAI;AAAA,MACb;AAAA,IACF,QAAQ;AAAA,IAER;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,gBAAiC;AAC/C,SAAO;AAAA,IACL,SAAS;AAAA,IACT,SAAS,mBAAmB;AAAA,IAC5B,aAAa;AAAA,IACb,YAAY,CAAC,SAAS,eAAe,WAAW,mBAAmB,eAAe;AAAA,EACpF;AACF;AA2BA,eAAsB,mBACpB,KACA,QAC+B;AAC/B,MAAI,CAAC,OAAO,YAAY;AACtB,UAAM,IAAI;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACA,QAAM,SAAsD,CAAC;AAC7D,MAAI,WAAW;AACf,aAAW,SAAS,IAAI,QAAQ;AAC9B,QAAI;AAEF,YAAM,OAAO,WAAW,YAAY,KAA2B;AAC/D;AAAA,IACF,SAAS,KAAK;AACZ,aAAO,KAAK;AAAA,QACV,SAAS,MAAM;AAAA,QACf,SAAS,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,MAC1D,CAAC;AAAA,IACH;AAAA,EACF;AACA,SAAO,EAAE,UAAU,UAAU,OAAO,QAAQ,OAAO;AACrD;AAOA,eAAsB,qBACpB,KACA,QACiC;AACjC,MAAI,CAAC,OAAO,eAAe;AACzB,UAAM,IAAI;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAIA,QAAM,OAAO,cAAc,KAAK,GAAgE;AAChG,SAAO,EAAE,IAAI,IAAI,IAAI,WAAW,KAAK;AACvC;;;AChVA,SAAS,iBAAiB,2BAA2B;AAiB9C,SAAS,aAAa,gBAAuC;AAClE,QAAM,WAAW,IAAI,gBAAgB;AAGrC,WAAS,SAAS,gBAAgB,kBAAkB;AACpD,WAAS,SAAS,eAAe,iBAAiB;AAClD,WAAS,SAAS,uBAAuB,yBAAyB;AAClE,WAAS,SAAS,mBAAmB,qBAAqB;AAC1D,WAAS,SAAS,kBAAkB,oBAAoB;AACxD,WAAS,SAAS,iBAAiB,mBAAmB;AACtD,WAAS,SAAS,uBAAuB,yBAAyB;AAClE,WAAS,SAAS,wBAAwB,0BAA0B;AACpE,WAAS,SAAS,sBAAsB,wBAAwB;AAChE,WAAS,SAAS,0BAA0B,4BAA4B;AAGxE,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,aACE;AAAA,IACF,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,SAAS;AAAA,UACP,oBAAoB,EAAE,QAAQ,mBAAmB;AAAA,QACnD;AAAA,MACF;AAAA,IACF;AAAA,IACA,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,kBAAkB,EAAE;AAAA,MAC/D;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,IACF;AAAA,EACF,CAAC;AAED,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,aACE;AAAA,IACF,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,0BAA0B,EAAE;AAAA,MACvE;AAAA,IACF;AAAA,EACF,CAAC;AAED,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,aAAa;AAAA,IACb,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,sBAAsB,EAAE;AAAA,MACnE;AAAA,IACF;AAAA,EACF,CAAC;AAED,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,qBAAqB,EAAE;AAAA,MAClE;AAAA,IACF;AAAA,EACF,CAAC;AAED,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,aACE;AAAA,IACF,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,SAAS;AAAA,UACP,oBAAoB,EAAE,QAAQ,0BAA0B;AAAA,UACxD,wBAAwB,EAAE,QAAQ,0BAA0B;AAAA,QAC9D;AAAA,MACF;AAAA,IACF;AAAA,IACA,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,2BAA2B,EAAE;AAAA,MACxE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,IACF;AAAA,EACF,CAAC;AAED,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,aACE;AAAA,IACF,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,SAAS;AAAA,UACP,oBAAoB,EAAE,QAAQ,yBAAyB;AAAA,QACzD;AAAA,MACF;AAAA,IACF;AAAA,IACA,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,6BAA6B,EAAE;AAAA,MAC1E;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,IACF;AAAA,EACF,CAAC;AAED,QAAM,YAAY,IAAI,oBAAoB,SAAS,WAAW;AAC9D,QAAM,MAAM,UAAU,iBAAiB;AAAA,IACrC,SAAS;AAAA,IACT,MAAM;AAAA,MACJ,OAAO;AAAA,MACP,SAAS;AAAA,MACT,aAAa;AAAA;AAAA,yBAEM,YAAY;AAAA,MAC/B,SAAS,EAAE,MAAM,kBAAkB,KAAK,+CAA+C;AAAA,MACvF,SAAS,EAAE,MAAM,MAAM;AAAA,IACzB;AAAA,IACA,SAAS,CAAC,EAAE,KAAK,yBAAyB,aAAa,yBAAyB,CAAC;AAAA,EACnF,CAAC;AACD,QAAM,YAAY,EAAE,MAAM,8BAA8B;AACxD,QAAM,oBAAoB;AAAA,IACxB,SAAS,EAAE,MAAM,UAAU,WAAW,EAAE;AAAA,IACxC,SAAS,EAAE,MAAM,UAAU,sBAAsB,KAAK;AAAA,IACtD,OAAO,EAAE,MAAM,SAAS;AAAA,EAC1B;AACA,MAAI,eAAe,CAAC;AACpB,MAAI,WAAW,YAAY,CAAC;AAC5B,MAAI,WAAW,QAAQ,eAAe;AAAA,IACpC,OAAO;AAAA,MACL;AAAA,QACE,MAAM;AAAA,QACN,sBAAsB;AAAA,QACtB,UAAU,CAAC,cAAc,SAAS;AAAA,QAClC,YAAY;AAAA,UACV,YAAY,EAAE,MAAM,UAAU,WAAW,EAAE;AAAA,UAC3C,GAAG;AAAA,QACL;AAAA,MACF;AAAA,MACA;AAAA,QACE,MAAM;AAAA,QACN,sBAAsB;AAAA,QACtB,UAAU,CAAC,UAAU,SAAS;AAAA,QAC9B,YAAY;AAAA,UACV,QAAQ;AAAA,UACR,GAAG;AAAA,QACL;AAAA,MACF;AAAA,IACF;AAAA,IACA,aAAa;AAAA,EACf;AACA,SAAO;AACT;;;ACxMA,eAAsB,YAAY,KAAiD;AACjF,MAAI;AACF,YAAQ,IAAI,QAAQ;AAAA,MAClB,KAAK,SAAS;AACZ,cAAM,SAAS,mBAAmB,UAAU,IAAI,MAAM;AACtD,YAAI,CAAC,OAAO,SAAS;AACnB,iBAAO;AAAA,YACL,OAAO;AAAA,cACL,MAAM;AAAA,cACN,SAAS;AAAA,cACT,SAAS,OAAO,MAAM;AAAA,YACxB;AAAA,UACF;AAAA,QACF;AACA,eAAO,EAAE,QAAQ,MAAM,YAAY,OAAO,IAAI,EAAE;AAAA,MAClD;AAAA,MACA,KAAK;AACH,eAAO,EAAE,QAAQ,kBAAkB,EAAE;AAAA,MACvC,KAAK;AACH,eAAO,EAAE,QAAQ,cAAc,EAAE;AAAA,MACnC;AACE,eAAO;AAAA,UACL,OAAO;AAAA,YACL,MAAM;AAAA,YACN,SAAS,mBAAoB,IAA2B,MAAM;AAAA,UAChE;AAAA,QACF;AAAA,IACJ;AAAA,EACF,SAAS,KAAK;AACZ,QAAI,eAAe,WAAW;AAC5B,aAAO,EAAE,OAAO,EAAE,MAAM,IAAI,MAAM,SAAS,IAAI,SAAS,SAAS,IAAI,QAAQ,EAAE;AAAA,IACjF;AACA,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,EAAE,OAAO,EAAE,MAAM,kBAAkB,QAAQ,EAAE;AAAA,EACtD;AACF;AAIA,eAAe,QAAQ,QAAgD;AACrE,QAAM,SAAmB,CAAC;AAC1B,mBAAiB,SAAS,QAAQ;AAChC,WAAO,KAAK,OAAO,SAAS,KAAK,IAAI,QAAQ,OAAO,KAAK,KAAe,CAAC;AAAA,EAC3E;AACA,SAAO,OAAO,OAAO,MAAM,EAAE,SAAS,OAAO;AAC/C;AAGA,eAAsB,WAAW,QAAkC;AACjE,QAAM,MAAM,MAAM,QAAQ,QAAQ,KAAK;AACvC,MAAI;AACJ,MAAI;AACF,UAAM,OAAO,KAAK,MAAM,GAAG;AAC3B,UAAM,SAAS,EAAE,QAAwC,QAAQ,KAAK,IAAK;AAAA,EAC7E,SAAS,KAAK;AACZ,YAAQ,OAAO;AAAA,MACb,GAAG,KAAK,UAAU;AAAA,QAChB,OAAO;AAAA,UACL,MAAM;AAAA,UACN,SAAS,6BAA6B,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,QACxF;AAAA,MACF,CAAC,CAAC;AAAA;AAAA,IACJ;AACA,WAAO;AAAA,EACT;AACA,QAAM,MAAM,MAAM,YAAY,GAAG;AACjC,UAAQ,OAAO,MAAM,GAAG,KAAK,UAAU,GAAG,CAAC;AAAA,CAAI;AAC/C,SAAO,WAAW,MAAM,IAAI;AAC9B;AAGA,eAAsB,YAAY,QAAkC;AAClE,QAAM,MAAM,MAAM,QAAQ,QAAQ,KAAK;AACvC,QAAM,QAAQ,IAAI,MAAM,IAAI,EAAE,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC;AAC/D,MAAI,WAAW;AACf,aAAW,QAAQ,OAAO;AACxB,QAAI;AACJ,QAAI;AACF,YAAM,OAAO,KAAK,MAAM,IAAI;AAC5B,YAAM,SAAS,EAAE,QAAwC,QAAQ,KAAK,IAAK;AAAA,IAC7E,SAAS,KAAK;AACZ,cAAQ,OAAO;AAAA,QACb,GAAG,KAAK,UAAU;AAAA,UAChB,OAAO;AAAA,YACL,MAAM;AAAA,YACN,SAAS,4BAA4B,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,UACvF;AAAA,QACF,CAAC,CAAC;AAAA;AAAA,MACJ;AACA,iBAAW;AACX;AAAA,IACF;AACA,UAAM,MAAM,MAAM,YAAY,GAAG;AACjC,YAAQ,OAAO,MAAM,GAAG,KAAK,UAAU,GAAG,CAAC;AAAA,CAAI;AAC/C,QAAI,WAAW,IAAK,YAAW;AAAA,EACjC;AACA,SAAO;AACT;;;AC/GA,SAA0B,aAAa;AACvC,SAAS,YAAY;AACrB,SAAS,YAAY;AAcrB,IAAM,aAAa,KAAK,IAAI;AAmB5B,IAAM,oBAAoB,oBAAI,IAAI,CAAC,YAAY,eAAe,eAAe,CAAC;AAEvE,SAAS,UAAU,OAAyB,CAAC,GAAG;AACrD,QAAM,MAAM,IAAI,KAAK;AAErB,MAAI,IAAI,KAAK,KAAK,CAAC;AAGnB,MAAI,KAAK,MAAM;AACb,UAAM,SAAS,KAAK,KAAK;AACzB,QAAI,IAAI,KAAK,OAAO,GAAG,SAAS;AAC9B,YAAM,OAAO,IAAI,IAAI,EAAE,IAAI,GAAG,EAAE;AAChC,UAAI,kBAAkB,IAAI,IAAI,EAAG,QAAO,KAAK;AAC7C,YAAM,MAAM,EAAE,IAAI,OAAO,eAAe,KAAK;AAC7C,YAAM,QAAQ,IAAI,MAAM,kBAAkB;AAC1C,UAAI,CAAC,OAAO;AACV,cAAM,IAAI,UAAU,gBAAgB,8CAA8C,GAAG;AAAA,MACvF;AACA,YAAM,QAAQ,MAAM,CAAC;AACrB,YAAM,KAAK,OAAO,WAAW,WAAW,UAAU,SAAS,MAAM,OAAO,KAAK;AAC7E,UAAI,CAAC,IAAI;AACP,cAAM,IAAI,UAAU,gBAAgB,yBAAyB,GAAG;AAAA,MAClE;AACA,aAAO,KAAK;AAAA,IACd,CAAC;AAAA,EACH;AAEA,MAAI,QAAQ,CAAC,KAAK,MAAM;AACtB,QAAI,eAAe,WAAW;AAC5B,YAAM,SAAS,IAAI;AACnB,aAAO,EAAE;AAAA,QACP,EAAE,OAAO,EAAE,MAAM,IAAI,MAAM,SAAS,IAAI,SAAS,SAAS,IAAI,QAAQ,EAAE;AAAA,QACxE;AAAA,MACF;AAAA,IACF;AAEA,YAAQ,MAAM,iCAAiC,GAAG;AAClD,WAAO,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,kBAAkB,SAAS,yBAAyB,EAAE,GAAG,GAAG;AAAA,EAC7F,CAAC;AAGD,MAAI;AAAA,IAAI;AAAA,IAAY,CAAC,MACnB,EAAE,KAAK,EAAE,QAAQ,MAAe,YAAY,KAAK,IAAI,IAAI,cAAc,IAAK,CAAC;AAAA,EAC/E;AAGA,MAAI,IAAI,eAAe,CAAC,MAAM,EAAE,KAAK,cAAc,CAAC,CAAC;AAGrD,MAAI,IAAI,eAAe,CAAC,MAAM,EAAE,KAAK,kBAAkB,CAAC,CAAC;AAGzD,MAAI,KAAK,aAAa,OAAO,MAAM;AACjC,UAAM,MAAM,MAAM,EAAE,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI;AAC/C,QAAI,OAAO,MAAM;AACf,YAAM,IAAI,UAAU,oBAAoB,8BAA8B,GAAG;AAAA,IAC3E;AACA,UAAM,SAAS,mBAAmB,UAAU,GAAG;AAC/C,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,IAAI;AAAA,QACR;AAAA,QACA;AAAA,QACA;AAAA,QACA,OAAO,MAAM;AAAA,MACf;AAAA,IACF;AACA,UAAM,SAAS,MAAM,YAAY,OAAO,IAAI;AAC5C,WAAO,EAAE,KAAK,MAAM;AAAA,EACtB,CAAC;AAGD,MAAI,KAAK,qBAAqB,OAAO,MAAM;AACzC,UAAM,cAAc,EAAE,IAAI,OAAO,cAAc,KAAK;AACpD,QAAI;AACJ,QAAI,YAAY,SAAS,sBAAsB,GAAG;AAChD,YAAM,OAAO,MAAM,EAAE,IAAI,KAAK;AAC9B,YAAM,SAAS,KACZ,MAAM,IAAI,EACV,IAAI,CAAC,SAAS,KAAK,KAAK,CAAC,EACzB,OAAO,CAAC,SAAS,KAAK,SAAS,CAAC,EAChC,IAAI,CAAC,SAAS;AACb,YAAI;AACF,iBAAO,KAAK,MAAM,IAAI;AAAA,QACxB,QAAQ;AACN,gBAAM,IAAI;AAAA,YACR;AAAA,YACA;AAAA,YACA;AAAA,YACA,KAAK,MAAM,GAAG,GAAG;AAAA,UACnB;AAAA,QACF;AAAA,MACF,CAAC;AACH,gBAAU,EAAE,OAAO;AAAA,IACrB,OAAO;AACL,gBAAU,MAAM,EAAE,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI;AAAA,IAC/C;AACA,QAAI,WAAW,MAAM;AACnB,YAAM,IAAI,UAAU,oBAAoB,wCAAwC,GAAG;AAAA,IACrF;AACA,UAAM,SAAS,0BAA0B,UAAU,OAAO;AAC1D,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,IAAI;AAAA,QACR;AAAA,QACA;AAAA,QACA;AAAA,QACA,OAAO,MAAM;AAAA,MACf;AAAA,IACF;AACA,UAAM,SAAS,MAAM,mBAAmB,OAAO,MAAM,KAAK,UAAU,CAAC,CAAC;AACtE,WAAO,EAAE,KAAK,MAAM;AAAA,EACtB,CAAC;AAGD,MAAI,KAAK,gBAAgB,OAAO,MAAM;AACpC,UAAM,MAAM,MAAM,EAAE,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI;AAC/C,QAAI,OAAO,MAAM;AACf,YAAM,IAAI,UAAU,oBAAoB,8BAA8B,GAAG;AAAA,IAC3E;AACA,UAAM,SAAS,yBAAyB,UAAU,GAAG;AACrD,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,IAAI;AAAA,QACR;AAAA,QACA;AAAA,QACA;AAAA,QACA,OAAO,MAAM;AAAA,MACf;AAAA,IACF;AACA,UAAM,SAAS,MAAM,qBAAqB,OAAO,MAAM,KAAK,UAAU,CAAC,CAAC;AACxE,WAAO,EAAE,KAAK,MAAM;AAAA,EACtB,CAAC;AAGD,MAAI,IAAI,iBAAiB,CAAC,MAAM,EAAE,KAAK,aAAa,cAAc,EAAE,OAAO,CAAC,CAAC;AAE7E,SAAO;AACT;AASO,SAAS,YAAY,OAAqB,CAAC,GAAe;AAC/D,QAAM,MAAM,UAAU,IAAI;AAC1B,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,OAAO,KAAK,QAAQ;AAC1B,SAAO,MAAM,EAAE,OAAO,IAAI,OAAO,MAAM,UAAU,KAAK,GAAG,CAAC,EAAE,SAAS,MAAM,WAAW,MAAM;AAE1F,YAAQ,IAAI,kCAAkC,OAAO,IAAI,UAAU,EAAE;AAAA,EACvE,CAAC;AACH;","names":[]}
|
package/dist/cli.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { b as FeedbackLabel, p as ProposedSideEffect } from './feedback-trajectory-
|
|
2
|
-
import { C as ControlEvalResult, a as ControlRunResult, h as ControlRuntimeConfig } from './control-runtime-
|
|
1
|
+
import { b as FeedbackLabel, p as ProposedSideEffect } from './feedback-trajectory-D1aGKusy.js';
|
|
2
|
+
import { C as ControlEvalResult, a as ControlRunResult, h as ControlRuntimeConfig } from './control-runtime-BZ_lVLYW.js';
|
|
3
3
|
import { T as TraceEmitter } from './emitter-DP_cSSiw.js';
|
|
4
4
|
import { T as TraceStore, F as FailureClass } from './store-Db2Bv8Cf.js';
|
|
5
5
|
import { a as RunSplitTag, b as RunTokenUsage, R as RunRecord } from './run-record-CqzahIbx.js';
|
|
@@ -120,6 +120,7 @@ interface ControlRunResult<TState, TAction, TActionResult, TEval extends Control
|
|
|
120
120
|
finalEvals: TEval[];
|
|
121
121
|
wallMs: number;
|
|
122
122
|
spentCostUsd: number;
|
|
123
|
+
/** null when the run executed without a TraceEmitter wired (no run record was persisted). */
|
|
123
124
|
runId: string | null;
|
|
124
125
|
failureClass?: FailureClass;
|
|
125
126
|
runtimeErrors: ControlRuntimeError[];
|
package/dist/control.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, p as RunEvidenceMetadata, s as controlRunToRunRecord, u as evaluateActionPolicy, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-
|
|
2
|
-
export { c as ControlActionFailureMode, d as ControlActionOutcome, e as ControlBudget, f as ControlContext, g as ControlDecision, C as ControlEvalResult, a as ControlRunResult, h as ControlRuntimeConfig, i as ControlRuntimeError, j as ControlSeverity, b as ControlStep, k as ControlStopPolicies, S as StopDecision, l as allCriticalPassed, o as objectiveEval, r as runAgentControlLoop, s as stopOnNoProgress, m as stopOnRepeatedAction, n as subjectiveEval } from './control-runtime-
|
|
3
|
-
import './feedback-trajectory-
|
|
1
|
+
export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, p as RunEvidenceMetadata, s as controlRunToRunRecord, u as evaluateActionPolicy, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-BT4qnXiS.js';
|
|
2
|
+
export { c as ControlActionFailureMode, d as ControlActionOutcome, e as ControlBudget, f as ControlContext, g as ControlDecision, C as ControlEvalResult, a as ControlRunResult, h as ControlRuntimeConfig, i as ControlRuntimeError, j as ControlSeverity, b as ControlStep, k as ControlStopPolicies, S as StopDecision, l as allCriticalPassed, o as objectiveEval, r as runAgentControlLoop, s as stopOnNoProgress, m as stopOnRepeatedAction, n as subjectiveEval } from './control-runtime-BZ_lVLYW.js';
|
|
3
|
+
import './feedback-trajectory-D1aGKusy.js';
|
|
4
4
|
import './dataset-CiK_3LDr.js';
|
|
5
5
|
import './errors-BZ9sTdz7.js';
|
|
6
6
|
import './emitter-DP_cSSiw.js';
|
package/dist/control.js
CHANGED
|
@@ -4,7 +4,7 @@ import {
|
|
|
4
4
|
runProposeReview,
|
|
5
5
|
runProposeReviewAsControlLoop,
|
|
6
6
|
scoreFromEvals
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-PALJO75S.js";
|
|
8
8
|
import {
|
|
9
9
|
allCriticalPassed,
|
|
10
10
|
objectiveEval,
|
|
@@ -12,7 +12,7 @@ import {
|
|
|
12
12
|
stopOnNoProgress,
|
|
13
13
|
stopOnRepeatedAction,
|
|
14
14
|
subjectiveEval
|
|
15
|
-
} from "./chunk-
|
|
15
|
+
} from "./chunk-NCRFYPS3.js";
|
|
16
16
|
import "./chunk-NLMNWKVM.js";
|
|
17
17
|
import "./chunk-TVVP3ZZQ.js";
|
|
18
18
|
import "./chunk-NG236HPC.js";
|
|
@@ -54,8 +54,7 @@ interface FailureCluster {
|
|
|
54
54
|
* Source dimension when the trigger was a judge span (e.g. `'format'`,
|
|
55
55
|
* `'safety'`, `'correctness'`). Lets cross-template aggregators
|
|
56
56
|
* group failures by the dimension that fired without overloading
|
|
57
|
-
* `argPrefix`. Optional —
|
|
58
|
-
* deserialize cleanly.
|
|
57
|
+
* `argPrefix`. Optional — clusters without this field deserialize cleanly.
|
|
59
58
|
*/
|
|
60
59
|
dimension?: string;
|
|
61
60
|
runCount: number;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { C as ControlEvalResult, a as ControlRunResult, b as ControlStep } from './control-runtime-
|
|
1
|
+
import { C as ControlEvalResult, a as ControlRunResult, b as ControlStep } from './control-runtime-BZ_lVLYW.js';
|
|
2
2
|
import { D as DatasetSplit, a as DatasetScenario } from './dataset-CiK_3LDr.js';
|
|
3
3
|
|
|
4
4
|
type FeedbackArtifactType = 'text' | 'code' | 'plan' | 'research' | 'action' | 'ui' | 'decision' | 'data' | 'other';
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export { E as EuRiskClass,
|
|
1
|
+
export { E as EuRiskClass, e as GovernanceContext, f as GovernanceFinding, g as GovernanceReport, U as UseCaseSignals, n as classifyEuAiRisk, p as euAiActReport, q as nistAiRmfReport, u as renderMarkdown, x as soc2Report, y as summarize } from '../index-BhLlu-qO.js';
|
|
2
2
|
import '../dataset-CiK_3LDr.js';
|
|
3
3
|
import '../errors-BZ9sTdz7.js';
|
|
4
4
|
import '../outcome-store-D6KWmYvj.js';
|
|
@@ -9,8 +9,15 @@ import { T as TraceStore } from './store-Db2Bv8Cf.js';
|
|
|
9
9
|
* 1. Build a golden set: {itemId, humanScore}[].
|
|
10
10
|
* 2. Run candidate judges; each produces {itemId, score}.
|
|
11
11
|
* 3. `calibrateJudge(golden, candidate)` reports κ + Pearson + MAE.
|
|
12
|
-
* 4.
|
|
12
|
+
* 4. `calibrateJudgeContinuous(golden, candidate)` adds quadratic-weighted
|
|
13
|
+
* κ over the un-rounded [0,1] scores plus ICC(2,1), Pearson, Spearman,
|
|
14
|
+
* and bootstrap CIs — use this for fine-grained judges where rounding
|
|
15
|
+
* to int discards information (e.g. 0.78 vs 0.81 both round to 1 and
|
|
16
|
+
* look "perfectly agreed" to integer κ).
|
|
17
|
+
* 5. Run bias probes (positional, verbosity, self-preference) to
|
|
13
18
|
* detect systematic score inflation.
|
|
19
|
+
* 6. For N≥2 judges on the same items, `continuousAgreement(scores)`
|
|
20
|
+
* reports ICC(2,1) + κ_w + Pearson + Spearman with bootstrap CIs.
|
|
14
21
|
*
|
|
15
22
|
* Returns actionable diagnostics, not a single number. Consumers then
|
|
16
23
|
* decide whether to trust the judge, retrain it, or add a tie-breaker.
|
|
@@ -81,6 +88,60 @@ declare function selfPreference(samples: Array<{
|
|
|
81
88
|
score: number;
|
|
82
89
|
inFamily: boolean;
|
|
83
90
|
}>): SelfPreferenceResult;
|
|
91
|
+
interface ContinuousAgreement {
|
|
92
|
+
/** Cohen's κ_w with quadratic weights, computed on raw [0,1] scores. */
|
|
93
|
+
weightedKappa: number;
|
|
94
|
+
/** ICC(2,1): two-way random effects, absolute agreement, single rater. */
|
|
95
|
+
icc: number;
|
|
96
|
+
/** Pearson product-moment correlation (averaged over rater pairs if N>2). */
|
|
97
|
+
pearson: number;
|
|
98
|
+
/** Spearman rank correlation (averaged over rater pairs if N>2). */
|
|
99
|
+
spearman: number;
|
|
100
|
+
/** 95% bootstrap percentile CIs over items. */
|
|
101
|
+
ci: {
|
|
102
|
+
icc: [number, number];
|
|
103
|
+
weightedKappa: [number, number];
|
|
104
|
+
};
|
|
105
|
+
/** Number of complete items (no NaN across raters). */
|
|
106
|
+
n: number;
|
|
107
|
+
/** Number of raters. */
|
|
108
|
+
raters: number;
|
|
109
|
+
}
|
|
110
|
+
interface ContinuousAgreementOptions {
|
|
111
|
+
/** Bootstrap iterations. Default 1000. Set to 0 to skip CIs (CI = [NaN, NaN]). */
|
|
112
|
+
bootstrap?: number;
|
|
113
|
+
/** κ weighting scheme. Default 'quadratic'. */
|
|
114
|
+
weights?: 'linear' | 'quadratic';
|
|
115
|
+
/** PRNG seed for reproducible bootstrap. Default 0xC0FFEE. */
|
|
116
|
+
seed?: number;
|
|
117
|
+
/** Confidence level for percentile CI. Default 0.95. */
|
|
118
|
+
ciLevel?: number;
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Inter-rater agreement on continuous (typically [0,1]) scores.
|
|
122
|
+
*
|
|
123
|
+
* `scores` has shape [n_items][n_raters]. Rows with any non-finite entry
|
|
124
|
+
* are dropped. Returns NaN metrics if fewer than 2 raters or 2 complete
|
|
125
|
+
* items remain.
|
|
126
|
+
*/
|
|
127
|
+
declare function continuousAgreement(scores: number[][], opts?: ContinuousAgreementOptions): ContinuousAgreement;
|
|
128
|
+
interface ContinuousCalibrationResult extends CalibrationResult {
|
|
129
|
+
/** Cohen's κ_w computed on raw (un-rounded) scores. */
|
|
130
|
+
weightedKappaContinuous: number;
|
|
131
|
+
/** ICC(2,1) treating golden + candidate as two raters. */
|
|
132
|
+
icc: number;
|
|
133
|
+
spearman: number;
|
|
134
|
+
ci: {
|
|
135
|
+
icc: [number, number];
|
|
136
|
+
weightedKappa: [number, number];
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Drop-in superset of `calibrateJudge` that adds continuous-value
|
|
141
|
+
* agreement metrics. The old fields (n, pearson, kappa, mae, worstItems)
|
|
142
|
+
* are preserved unchanged so existing callers continue to work.
|
|
143
|
+
*/
|
|
144
|
+
declare function calibrateJudgeContinuous(golden: GoldenItem[], candidate: CandidateScore[], opts?: ContinuousAgreementOptions): ContinuousCalibrationResult;
|
|
84
145
|
|
|
85
146
|
/**
|
|
86
147
|
* Red-team battery — adversarial scenario corpus with per-category
|
|
@@ -267,4 +328,4 @@ declare function nistAiRmfReport(ctx: GovernanceContext): Promise<GovernanceRepo
|
|
|
267
328
|
|
|
268
329
|
declare function soc2Report(ctx: GovernanceContext): Promise<GovernanceReport>;
|
|
269
330
|
|
|
270
|
-
export { type
|
|
331
|
+
export { verbosityBias as A, type ContinuousAgreementOptions as C, DEFAULT_RED_TEAM_CORPUS as D, type EuRiskClass as E, type GoldenItem as G, type PositionalBiasResult as P, type RedTeamCase as R, type SelfPreferenceResult as S, type UseCaseSignals as U, type VerbosityBiasResult as V, type ContinuousAgreement as a, type CalibrationResult as b, type CandidateScore as c, type ContinuousCalibrationResult as d, type GovernanceContext as e, type GovernanceFinding as f, type GovernanceReport as g, type RedTeamCategory as h, type RedTeamFinding as i, type RedTeamPayload as j, type RedTeamReport as k, calibrateJudge as l, calibrateJudgeContinuous as m, classifyEuAiRisk as n, continuousAgreement as o, euAiActReport as p, nistAiRmfReport as q, positionalBias as r, redTeamDataset as s, redTeamReport as t, renderMarkdown as u, scoreRedTeamOutput as v, selfPreference as w, soc2Report as x, summarize as y, toolNamesForRun as z };
|