@tangle-network/agent-eval 0.23.1 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/CHANGELOG.md +145 -0
  2. package/README.md +212 -79
  3. package/dist/baseline-4R5deP0N.d.ts +108 -0
  4. package/dist/benchmarks/index.d.ts +3 -2
  5. package/dist/benchmarks/index.js +1 -1
  6. package/dist/builder-eval/index.d.ts +249 -0
  7. package/dist/builder-eval/index.js +391 -0
  8. package/dist/builder-eval/index.js.map +1 -0
  9. package/dist/{chunk-IOXMGMHQ.js → chunk-2A5XJB43.js} +142 -318
  10. package/dist/chunk-2A5XJB43.js.map +1 -0
  11. package/dist/chunk-47X6LRCE.js +76 -0
  12. package/dist/chunk-47X6LRCE.js.map +1 -0
  13. package/dist/{chunk-6M774GY6.js → chunk-4F5DQN55.js} +1 -1
  14. package/dist/chunk-4F5DQN55.js.map +1 -0
  15. package/dist/{chunk-KAO3Q65R.js → chunk-4S4BM3QQ.js} +15 -13
  16. package/dist/chunk-4S4BM3QQ.js.map +1 -0
  17. package/dist/chunk-5BKGXME7.js +65 -0
  18. package/dist/chunk-5BKGXME7.js.map +1 -0
  19. package/dist/{chunk-6KQG5HAH.js → chunk-5LBB5B3Z.js} +376 -72
  20. package/dist/chunk-5LBB5B3Z.js.map +1 -0
  21. package/dist/{chunk-42I2QC2L.js → chunk-6QDKWHLS.js} +18 -14
  22. package/dist/chunk-6QDKWHLS.js.map +1 -0
  23. package/dist/{chunk-VQQSPGSM.js → chunk-EDUKQ5AM.js} +247 -189
  24. package/dist/chunk-EDUKQ5AM.js.map +1 -0
  25. package/dist/chunk-I4MBDTY5.js +272 -0
  26. package/dist/chunk-I4MBDTY5.js.map +1 -0
  27. package/dist/chunk-JLZQWFV3.js +618 -0
  28. package/dist/chunk-JLZQWFV3.js.map +1 -0
  29. package/dist/chunk-K2TPS5LB.js +569 -0
  30. package/dist/chunk-K2TPS5LB.js.map +1 -0
  31. package/dist/chunk-KKHDIONI.js +414 -0
  32. package/dist/chunk-KKHDIONI.js.map +1 -0
  33. package/dist/chunk-KMPRBJK4.js +74 -0
  34. package/dist/chunk-KMPRBJK4.js.map +1 -0
  35. package/dist/{chunk-QUKKGHTZ.js → chunk-KTGTIOFD.js} +6 -3
  36. package/dist/chunk-KTGTIOFD.js.map +1 -0
  37. package/dist/chunk-LSH4MMOZ.js +838 -0
  38. package/dist/chunk-LSH4MMOZ.js.map +1 -0
  39. package/dist/chunk-NG236HPC.js +57 -0
  40. package/dist/chunk-NG236HPC.js.map +1 -0
  41. package/dist/{chunk-QBW3YBTR.js → chunk-NLMNWKVM.js} +14 -6
  42. package/dist/chunk-NLMNWKVM.js.map +1 -0
  43. package/dist/chunk-NU65VQ7M.js +99 -0
  44. package/dist/chunk-NU65VQ7M.js.map +1 -0
  45. package/dist/chunk-OWLAAMME.js +250 -0
  46. package/dist/chunk-OWLAAMME.js.map +1 -0
  47. package/dist/{chunk-SQQLHODJ.js → chunk-PC4UYEBM.js} +7 -4
  48. package/dist/chunk-PC4UYEBM.js.map +1 -0
  49. package/dist/{chunk-7EAUOUQS.js → chunk-RAF443UI.js} +213 -115
  50. package/dist/chunk-RAF443UI.js.map +1 -0
  51. package/dist/chunk-RZTMDUO7.js +49 -0
  52. package/dist/chunk-RZTMDUO7.js.map +1 -0
  53. package/dist/{chunk-EXGR4XEM.js → chunk-SESZDQPX.js} +23 -19
  54. package/dist/chunk-SESZDQPX.js.map +1 -0
  55. package/dist/{chunk-5IIQKMD5.js → chunk-TVVP3ZZQ.js} +14 -4
  56. package/dist/chunk-TVVP3ZZQ.js.map +1 -0
  57. package/dist/chunk-WWYCWKUM.js +196 -0
  58. package/dist/chunk-WWYCWKUM.js.map +1 -0
  59. package/dist/{chunk-AXHNWLIX.js → chunk-YRZ4M5GS.js} +2 -90
  60. package/dist/chunk-YRZ4M5GS.js.map +1 -0
  61. package/dist/chunk-ZN274SWR.js +613 -0
  62. package/dist/chunk-ZN274SWR.js.map +1 -0
  63. package/dist/cli.js +10 -6
  64. package/dist/cli.js.map +1 -1
  65. package/dist/{control-DvkH87qJ.d.ts → control-CBShYYA6.d.ts} +32 -33
  66. package/dist/control-runtime-BuJHoLg0.d.ts +180 -0
  67. package/dist/control.d.ts +8 -6
  68. package/dist/control.js +10 -7
  69. package/dist/{dataset-B9qvlm_o.d.ts → dataset-CiK_3LDr.d.ts} +5 -2
  70. package/dist/{emitter-B2XqDKFU.d.ts → emitter-DP_cSSiw.d.ts} +1 -1
  71. package/dist/errors-BZ9sTdz7.d.ts +70 -0
  72. package/dist/failure-cluster-C2EGSDiT.d.ts +76 -0
  73. package/dist/feedback-trajectory-DfFdrraJ.d.ts +169 -0
  74. package/dist/governance/index.d.ts +5 -0
  75. package/dist/governance/index.js +18 -0
  76. package/dist/governance/index.js.map +1 -0
  77. package/dist/{index-DDTlbHEK.d.ts → index--fVrWDiR.d.ts} +1 -1
  78. package/dist/index-Oj9fAPPN.d.ts +270 -0
  79. package/dist/index.d.ts +2018 -3003
  80. package/dist/index.js +7443 -9102
  81. package/dist/index.js.map +1 -1
  82. package/dist/{integrity-Cr5YodSY.d.ts → integrity-DK2EBVZC.d.ts} +4 -3
  83. package/dist/knowledge/index.d.ts +102 -0
  84. package/dist/knowledge/index.js +18 -0
  85. package/dist/knowledge/index.js.map +1 -0
  86. package/dist/meta-eval/index.d.ts +99 -0
  87. package/dist/meta-eval/index.js +324 -0
  88. package/dist/meta-eval/index.js.map +1 -0
  89. package/dist/multi-layer-verifier-LkP3LVKj.d.ts +141 -0
  90. package/dist/openapi.json +491 -1
  91. package/dist/optimization.d.ts +11 -8
  92. package/dist/optimization.js +11 -9
  93. package/dist/outcome-store-D6KWmYvj.d.ts +63 -0
  94. package/dist/pipelines/index.d.ts +172 -0
  95. package/dist/pipelines/index.js +345 -0
  96. package/dist/pipelines/index.js.map +1 -0
  97. package/dist/prm/index.d.ts +99 -0
  98. package/dist/prm/index.js +222 -0
  99. package/dist/prm/index.js.map +1 -0
  100. package/dist/query-DODUYdPg.d.ts +30 -0
  101. package/dist/release-report-BNgMdqPF.d.ts +292 -0
  102. package/dist/replay-BL96gCEP.d.ts +226 -0
  103. package/dist/reporting.d.ts +10 -295
  104. package/dist/reporting.js +10 -6
  105. package/dist/{eval-campaign-Ds5QljIh.d.ts → researcher-BPT8x_NT.d.ts} +148 -146
  106. package/dist/rl.d.ts +1762 -8
  107. package/dist/rl.js +2035 -58
  108. package/dist/rl.js.map +1 -1
  109. package/dist/rubric-D5tjHNJQ.d.ts +72 -0
  110. package/dist/rubric-predictive-validity-C0uDYwG6.d.ts +105 -0
  111. package/dist/{run-record-DNiOMBrZ.d.ts → run-record-CqzahIbx.d.ts} +4 -1
  112. package/dist/sequential-Dgz1n51-.d.ts +139 -0
  113. package/dist/{store-u47QaJ9G.d.ts → store-Db2Bv8Cf.d.ts} +1 -1
  114. package/dist/{summary-report-Ce1r4EYo.d.ts → summary-report-C7VPYEj2.d.ts} +3 -76
  115. package/dist/telemetry/file.js +4 -1
  116. package/dist/telemetry/file.js.map +1 -1
  117. package/dist/telemetry/index.js +57 -57
  118. package/dist/telemetry/index.js.map +1 -1
  119. package/dist/test-graded-scenario-B2kWEdh9.d.ts +146 -0
  120. package/dist/traces.d.ts +142 -387
  121. package/dist/traces.js +1302 -40
  122. package/dist/traces.js.map +1 -1
  123. package/dist/trajectory-CnoBo-JY.d.ts +32 -0
  124. package/dist/wire/index.d.ts +369 -25
  125. package/dist/wire/index.js +22 -3
  126. package/package.json +44 -18
  127. package/dist/chunk-42I2QC2L.js.map +0 -1
  128. package/dist/chunk-5IIQKMD5.js.map +0 -1
  129. package/dist/chunk-6KQG5HAH.js.map +0 -1
  130. package/dist/chunk-6M774GY6.js.map +0 -1
  131. package/dist/chunk-7EAUOUQS.js.map +0 -1
  132. package/dist/chunk-AXHNWLIX.js.map +0 -1
  133. package/dist/chunk-EXGR4XEM.js.map +0 -1
  134. package/dist/chunk-IOXMGMHQ.js.map +0 -1
  135. package/dist/chunk-KAO3Q65R.js.map +0 -1
  136. package/dist/chunk-LZKIOBG2.js +0 -2026
  137. package/dist/chunk-LZKIOBG2.js.map +0 -1
  138. package/dist/chunk-QBW3YBTR.js.map +0 -1
  139. package/dist/chunk-QUKKGHTZ.js.map +0 -1
  140. package/dist/chunk-SQQLHODJ.js.map +0 -1
  141. package/dist/chunk-V5QSWN7L.js +0 -1310
  142. package/dist/chunk-V5QSWN7L.js.map +0 -1
  143. package/dist/chunk-VQQSPGSM.js.map +0 -1
  144. package/dist/chunk-XPHOZPOM.js +0 -1947
  145. package/dist/chunk-XPHOZPOM.js.map +0 -1
  146. package/dist/feedback-trajectory-c43WGtTX.d.ts +0 -346
  147. package/dist/index-ekBXweiQ.d.ts +0 -1894
  148. package/dist/sequential-DgU2mFsE.d.ts +0 -304
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/trace/emitter.ts"],"sourcesContent":["/**\n * TraceEmitter — hierarchical span builder that auto-parents using an\n * internal stack. One emitter per Run; emitters do NOT share state.\n *\n * Convenience methods (`llm`, `tool`, `retrieval`, `judge`, `sandbox`)\n * return a `SpanHandle` with `.end()` / `.fail()` so callers don't\n * have to thread spanIds manually. For async workflows that can't use\n * the stack (e.g. fan-out parallel calls), pass `parentSpanId`\n * explicitly.\n */\n\nimport type {\n Artifact,\n BudgetLedgerEntry,\n EventKind,\n JudgeSpan,\n LlmSpan,\n Message,\n RetrievalSpan,\n Run,\n RunOutcome,\n SandboxSpan,\n Span,\n SpanKind,\n ToolSpan,\n TraceEvent,\n} from './schema'\nimport type { TraceStore } from './store'\n\nexport interface SpanHandle<S extends Span = Span> {\n span: S\n end(patch?: Partial<S>): Promise<void>\n fail(error: string | Error, patch?: Partial<S>): Promise<void>\n}\n\nexport interface RunCompleteHookContext {\n runId: string\n emitter: TraceEmitter\n store: TraceStore\n /** Outcome the caller passed to `endRun` (undefined for `abortRun`). */\n outcome?: RunOutcome\n /** Final run status. */\n status: 'completed' | 'failed' | 'aborted'\n}\n\nexport type RunCompleteHook = (ctx: RunCompleteHookContext) => Promise<void> | void\n\nexport interface TraceEmitterOptions {\n runId?: string\n /** Inject a clock for deterministic tests. */\n now?: () => number\n /** Inject an id generator for deterministic tests. */\n id?: () => string\n /**\n * Hooks fired after `endRun` / `abortRun` writes the final run state.\n * Designed for trace-analyst auto-execution, integrity assertions, and\n * outbound notifications. Hooks run sequentially in the order supplied.\n *\n * By default a hook that throws is swallowed and logged as a `note` event\n * on the run — auto-orchestration must not crash the underlying flow.\n * Set `hookErrors: 'throw'` to propagate.\n */\n onRunComplete?: RunCompleteHook[]\n /** `'swallow'` (default) | `'throw'`. */\n hookErrors?: 'swallow' | 'throw'\n}\n\nexport class TraceEmitter {\n private store: TraceStore\n private stack: string[] = []\n private _runId: string\n private now: () => number\n private id: () => string\n private hooks: RunCompleteHook[]\n private hookErrors: 'swallow' | 'throw'\n\n constructor(store: TraceStore, options: TraceEmitterOptions = {}) {\n this.store = store\n this.now = options.now ?? (() => Date.now())\n this.id = options.id ?? (() => cryptoRandomId())\n this._runId = options.runId ?? this.id()\n this.hooks = options.onRunComplete ?? []\n this.hookErrors = options.hookErrors ?? 'swallow'\n }\n\n get runId(): string { return this._runId }\n\n get traceStore(): TraceStore { return this.store }\n\n /** Append a hook after construction (e.g. attach the trace analyst). */\n addRunCompleteHook(hook: RunCompleteHook): void {\n this.hooks.push(hook)\n }\n\n // ── Run lifecycle ──────────────────────────────────────────────────\n\n /**\n * Begin a Run.\n *\n * `scenarioId` is required on the persisted Run shape — every Run downstream\n * gets a non-empty scenarioId so filters and aggregations stay simple — but\n * the INPUT here accepts it as optional. When omitted, startRun substitutes\n * a sensible default (`run.layer ?? run.tags?.['kind'] ?? 'runtime'`) so\n * runtime / operator / meta-eval runs that have no curated-scenario corpus\n * to anchor to don't have to invent placeholder strings at the call site.\n */\n async startRun(\n run: Omit<Run, 'runId' | 'scenarioId' | 'startedAt' | 'status'> & { scenarioId?: string },\n ): Promise<Run> {\n const scenarioId =\n run.scenarioId ??\n run.layer ??\n run.tags?.['kind'] ??\n 'runtime'\n const full: Run = {\n ...run,\n scenarioId,\n runId: this._runId,\n startedAt: this.now(),\n status: 'running',\n }\n await this.store.appendRun(full)\n return full\n }\n\n async endRun(outcome?: RunOutcome): Promise<void> {\n const status: 'completed' | 'failed' = outcome?.pass === false ? 'failed' : 'completed'\n await this.store.updateRun(this._runId, { endedAt: this.now(), status, outcome })\n await this.runHooks({ runId: this._runId, emitter: this, store: this.store, outcome, status })\n }\n\n async abortRun(reason: string): Promise<void> {\n const outcome = { pass: false, notes: reason }\n await this.store.updateRun(this._runId, {\n endedAt: this.now(),\n status: 'aborted',\n outcome,\n })\n await this.runHooks({ runId: this._runId, emitter: this, store: this.store, outcome, status: 'aborted' })\n }\n\n private async runHooks(ctx: RunCompleteHookContext): Promise<void> {\n for (const hook of this.hooks) {\n try {\n await hook(ctx)\n } catch (err) {\n if (this.hookErrors === 'throw') throw err\n try {\n await this.store.appendEvent({\n eventId: this.id(),\n runId: this._runId,\n kind: 'log',\n timestamp: this.now(),\n payload: {\n source: 'run_complete_hook',\n error: err instanceof Error ? err.message : String(err),\n },\n })\n } catch {\n // best-effort\n }\n }\n }\n }\n\n // ── Generic span ───────────────────────────────────────────────────\n\n async span<S extends Span = Span>(init: {\n kind: SpanKind\n name: string\n parentSpanId?: string\n attributes?: Record<string, unknown>\n } & Partial<Omit<S, 'spanId' | 'runId' | 'startedAt' | 'kind' | 'name'>>): Promise<SpanHandle<S>> {\n const spanId = this.id()\n const parent = init.parentSpanId ?? this.stack[this.stack.length - 1]\n const span = {\n spanId,\n parentSpanId: parent,\n runId: this._runId,\n startedAt: this.now(),\n ...init,\n } as unknown as S\n await this.store.appendSpan(span)\n this.stack.push(spanId)\n return this.handle<S>(span)\n }\n\n private handle<S extends Span>(span: S): SpanHandle<S> {\n return {\n span,\n end: async (patch?: Partial<S>) => {\n const endedAt = this.now()\n await this.store.updateSpan(span.spanId, { endedAt, status: 'ok', ...patch } as Partial<Span>)\n this.pop(span.spanId)\n },\n fail: async (error: string | Error, patch?: Partial<S>) => {\n const endedAt = this.now()\n const errStr = error instanceof Error ? error.message : error\n await this.store.updateSpan(span.spanId, {\n endedAt,\n status: 'error',\n error: errStr,\n ...patch,\n } as Partial<Span>)\n this.pop(span.spanId)\n },\n }\n }\n\n private pop(spanId: string): void {\n const idx = this.stack.lastIndexOf(spanId)\n if (idx >= 0) this.stack.splice(idx, 1)\n }\n\n // ── Typed span conveniences ────────────────────────────────────────\n\n llm(init: Omit<LlmSpan, 'spanId' | 'runId' | 'kind' | 'startedAt'>): Promise<SpanHandle<LlmSpan>> {\n return this.span<LlmSpan>({ kind: 'llm', ...init })\n }\n\n tool(init: Omit<ToolSpan, 'spanId' | 'runId' | 'kind' | 'startedAt'>): Promise<SpanHandle<ToolSpan>> {\n return this.span<ToolSpan>({ kind: 'tool', ...init })\n }\n\n retrieval(init: Omit<RetrievalSpan, 'spanId' | 'runId' | 'kind' | 'startedAt'>): Promise<SpanHandle<RetrievalSpan>> {\n return this.span<RetrievalSpan>({ kind: 'retrieval', ...init })\n }\n\n async recordJudge(verdict: Omit<JudgeSpan, 'spanId' | 'runId' | 'kind' | 'startedAt' | 'endedAt'>): Promise<JudgeSpan> {\n const spanId = this.id()\n const now = this.now()\n const full: JudgeSpan = {\n spanId,\n runId: this._runId,\n kind: 'judge',\n startedAt: now,\n endedAt: now,\n status: 'ok',\n ...verdict,\n }\n await this.store.appendSpan(full)\n return full\n }\n\n sandbox(init: Omit<SandboxSpan, 'spanId' | 'runId' | 'kind' | 'startedAt'>): Promise<SpanHandle<SandboxSpan>> {\n return this.span<SandboxSpan>({ kind: 'sandbox', ...init })\n }\n\n // ── Events ─────────────────────────────────────────────────────────\n\n async emit(event: { kind: EventKind; spanId?: string; payload?: Record<string, unknown> }): Promise<TraceEvent> {\n const full: TraceEvent = {\n eventId: this.id(),\n runId: this._runId,\n spanId: event.spanId ?? this.stack[this.stack.length - 1],\n kind: event.kind,\n timestamp: this.now(),\n payload: event.payload ?? {},\n }\n await this.store.appendEvent(full)\n return full\n }\n\n // ── Budget ledger ──────────────────────────────────────────────────\n\n async recordBudget(entry: Omit<BudgetLedgerEntry, 'runId' | 'timestamp'> & { timestamp?: number }): Promise<BudgetLedgerEntry> {\n const full: BudgetLedgerEntry = {\n runId: this._runId,\n timestamp: entry.timestamp ?? this.now(),\n dimension: entry.dimension,\n limit: entry.limit,\n consumed: entry.consumed,\n remaining: entry.remaining,\n breached: entry.breached,\n spanId: entry.spanId ?? this.stack[this.stack.length - 1],\n }\n await this.store.appendBudgetEntry(full)\n if (full.breached) {\n await this.emit({\n kind: 'budget_breach',\n spanId: full.spanId,\n payload: { dimension: full.dimension, limit: full.limit, consumed: full.consumed },\n })\n }\n return full\n }\n\n // ── Artifacts ──────────────────────────────────────────────────────\n\n async recordArtifact(artifact: Omit<Artifact, 'artifactId' | 'runId'>): Promise<Artifact> {\n const full: Artifact = { artifactId: this.id(), runId: this._runId, ...artifact }\n await this.store.appendArtifact(full)\n return full\n }\n\n // ── Nested composition ─────────────────────────────────────────────\n\n /**\n * Runs `fn` inside a span; auto-ends on success, auto-fails on throw.\n * Returns the fn's return value. Use this for the 95% case.\n */\n async within<T>(\n init: Parameters<TraceEmitter['span']>[0],\n fn: (handle: SpanHandle) => Promise<T>,\n ): Promise<T> {\n const handle = await this.span(init)\n try {\n const result = await fn(handle)\n await handle.end()\n return result\n } catch (err) {\n await handle.fail(err instanceof Error ? err : String(err))\n throw err\n }\n }\n}\n\n// Helpers -------------------------------------------------------------\n\nfunction cryptoRandomId(): string {\n if (typeof globalThis.crypto?.randomUUID === 'function') return globalThis.crypto.randomUUID()\n return `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 10)}`\n}\n\n/** Helper to build an LLM span handle args object from a provider-shaped response. */\nexport function llmSpanFromProvider(args: {\n name?: string\n model: string\n messages: Message[]\n output: string\n usage?: { inputTokens?: number; outputTokens?: number; cachedTokens?: number; reasoningTokens?: number }\n costUsd?: number\n finishReason?: string\n}): Omit<LlmSpan, 'spanId' | 'runId' | 'kind' | 'startedAt'> {\n return {\n name: args.name ?? args.model,\n model: args.model,\n messages: args.messages,\n output: args.output,\n inputTokens: args.usage?.inputTokens,\n outputTokens: args.usage?.outputTokens,\n cachedTokens: args.usage?.cachedTokens,\n reasoningTokens: args.usage?.reasoningTokens,\n costUsd: args.costUsd,\n finishReason: args.finishReason,\n }\n}\n"],"mappings":";AAmEO,IAAM,eAAN,MAAmB;AAAA,EAChB;AAAA,EACA,QAAkB,CAAC;AAAA,EACnB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,OAAmB,UAA+B,CAAC,GAAG;AAChE,SAAK,QAAQ;AACb,SAAK,MAAM,QAAQ,QAAQ,MAAM,KAAK,IAAI;AAC1C,SAAK,KAAK,QAAQ,OAAO,MAAM,eAAe;AAC9C,SAAK,SAAS,QAAQ,SAAS,KAAK,GAAG;AACvC,SAAK,QAAQ,QAAQ,iBAAiB,CAAC;AACvC,SAAK,aAAa,QAAQ,cAAc;AAAA,EAC1C;AAAA,EAEA,IAAI,QAAgB;AAAE,WAAO,KAAK;AAAA,EAAO;AAAA,EAEzC,IAAI,aAAyB;AAAE,WAAO,KAAK;AAAA,EAAM;AAAA;AAAA,EAGjD,mBAAmB,MAA6B;AAC9C,SAAK,MAAM,KAAK,IAAI;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAcA,MAAM,SACJ,KACc;AACd,UAAM,aACJ,IAAI,cACJ,IAAI,SACJ,IAAI,OAAO,MAAM,KACjB;AACF,UAAM,OAAY;AAAA,MAChB,GAAG;AAAA,MACH;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,WAAW,KAAK,IAAI;AAAA,MACpB,QAAQ;AAAA,IACV;AACA,UAAM,KAAK,MAAM,UAAU,IAAI;AAC/B,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,OAAO,SAAqC;AAChD,UAAM,SAAiC,SAAS,SAAS,QAAQ,WAAW;AAC5E,UAAM,KAAK,MAAM,UAAU,KAAK,QAAQ,EAAE,SAAS,KAAK,IAAI,GAAG,QAAQ,QAAQ,CAAC;AAChF,UAAM,KAAK,SAAS,EAAE,OAAO,KAAK,QAAQ,SAAS,MAAM,OAAO,KAAK,OAAO,SAAS,OAAO,CAAC;AAAA,EAC/F;AAAA,EAEA,MAAM,SAAS,QAA+B;AAC5C,UAAM,UAAU,EAAE,MAAM,OAAO,OAAO,OAAO;AAC7C,UAAM,KAAK,MAAM,UAAU,KAAK,QAAQ;AAAA,MACtC,SAAS,KAAK,IAAI;AAAA,MAClB,QAAQ;AAAA,MACR;AAAA,IACF,CAAC;AACD,UAAM,KAAK,SAAS,EAAE,OAAO,KAAK,QAAQ,SAAS,MAAM,OAAO,KAAK,OAAO,SAAS,QAAQ,UAAU,CAAC;AAAA,EAC1G;AAAA,EAEA,MAAc,SAAS,KAA4C;AACjE,eAAW,QAAQ,KAAK,OAAO;AAC7B,UAAI;AACF,cAAM,KAAK,GAAG;AAAA,MAChB,SAAS,KAAK;AACZ,YAAI,KAAK,eAAe,QAAS,OAAM;AACvC,YAAI;AACF,gBAAM,KAAK,MAAM,YAAY;AAAA,YAC3B,SAAS,KAAK,GAAG;AAAA,YACjB,OAAO,KAAK;AAAA,YACZ,MAAM;AAAA,YACN,WAAW,KAAK,IAAI;AAAA,YACpB,SAAS;AAAA,cACP,QAAQ;AAAA,cACR,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,YACxD;AAAA,UACF,CAAC;AAAA,QACH,QAAQ;AAAA,QAER;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAIA,MAAM,KAA4B,MAKgE;AAChG,UAAM,SAAS,KAAK,GAAG;AACvB,UAAM,SAAS,KAAK,gBAAgB,KAAK,MAAM,KAAK,MAAM,SAAS,CAAC;AACpE,UAAM,OAAO;AAAA,MACX;AAAA,MACA,cAAc;AAAA,MACd,OAAO,KAAK;AAAA,MACZ,WAAW,KAAK,IAAI;AAAA,MACpB,GAAG;AAAA,IACL;AACA,UAAM,KAAK,MAAM,WAAW,IAAI;AAChC,SAAK,MAAM,KAAK,MAAM;AACtB,WAAO,KAAK,OAAU,IAAI;AAAA,EAC5B;AAAA,EAEQ,OAAuB,MAAwB;AACrD,WAAO;AAAA,MACL;AAAA,MACA,KAAK,OAAO,UAAuB;AACjC,cAAM,UAAU,KAAK,IAAI;AACzB,cAAM,KAAK,MAAM,WAAW,KAAK,QAAQ,EAAE,SAAS,QAAQ,MAAM,GAAG,MAAM,CAAkB;AAC7F,aAAK,IAAI,KAAK,MAAM;AAAA,MACtB;AAAA,MACA,MAAM,OAAO,OAAuB,UAAuB;AACzD,cAAM,UAAU,KAAK,IAAI;AACzB,cAAM,SAAS,iBAAiB,QAAQ,MAAM,UAAU;AACxD,cAAM,KAAK,MAAM,WAAW,KAAK,QAAQ;AAAA,UACvC;AAAA,UACA,QAAQ;AAAA,UACR,OAAO;AAAA,UACP,GAAG;AAAA,QACL,CAAkB;AAClB,aAAK,IAAI,KAAK,MAAM;AAAA,MACtB;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,IAAI,QAAsB;AAChC,UAAM,MAAM,KAAK,MAAM,YAAY,MAAM;AACzC,QAAI,OAAO,EAAG,MAAK,MAAM,OAAO,KAAK,CAAC;AAAA,EACxC;AAAA;AAAA,EAIA,IAAI,MAA8F;AAChG,WAAO,KAAK,KAAc,EAAE,MAAM,OAAO,GAAG,KAAK,CAAC;AAAA,EACpD;AAAA,EAEA,KAAK,MAAgG;AACnG,WAAO,KAAK,KAAe,EAAE,MAAM,QAAQ,GAAG,KAAK,CAAC;AAAA,EACtD;AAAA,EAEA,UAAU,MAA0G;AAClH,WAAO,KAAK,KAAoB,EAAE,MAAM,aAAa,GAAG,KAAK,CAAC;AAAA,EAChE;AAAA,EAEA,MAAM,YAAY,SAAqG;AACrH,UAAM,SAAS,KAAK,GAAG;AACvB,UAAM,MAAM,KAAK,IAAI;AACrB,UAAM,OAAkB;AAAA,MACtB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,MAAM;AAAA,MACN,WAAW;AAAA,MACX,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,GAAG;AAAA,IACL;AACA,UAAM,KAAK,MAAM,WAAW,IAAI;AAChC,WAAO;AAAA,EACT;AAAA,EAEA,QAAQ,MAAsG;AAC5G,WAAO,KAAK,KAAkB,EAAE,MAAM,WAAW,GAAG,KAAK,CAAC;AAAA,EAC5D;AAAA;AAAA,EAIA,MAAM,KAAK,OAAqG;AAC9G,UAAM,OAAmB;AAAA,MACvB,SAAS,KAAK,GAAG;AAAA,MACjB,OAAO,KAAK;AAAA,MACZ,QAAQ,MAAM,UAAU,KAAK,MAAM,KAAK,MAAM,SAAS,CAAC;AAAA,MACxD,MAAM,MAAM;AAAA,MACZ,WAAW,KAAK,IAAI;AAAA,MACpB,SAAS,MAAM,WAAW,CAAC;AAAA,IAC7B;AACA,UAAM,KAAK,MAAM,YAAY,IAAI;AACjC,WAAO;AAAA,EACT;AAAA;AAAA,EAIA,MAAM,aAAa,OAA4G;AAC7H,UAAM,OAA0B;AAAA,MAC9B,OAAO,KAAK;AAAA,MACZ,WAAW,MAAM,aAAa,KAAK,IAAI;AAAA,MACvC,WAAW,MAAM;AAAA,MACjB,OAAO,MAAM;AAAA,MACb,UAAU,MAAM;AAAA,MAChB,WAAW,MAAM;AAAA,MACjB,UAAU,MAAM;AAAA,MAChB,QAAQ,MAAM,UAAU,KAAK,MAAM,KAAK,MAAM,SAAS,CAAC;AAAA,IAC1D;AACA,UAAM,KAAK,MAAM,kBAAkB,IAAI;AACvC,QAAI,KAAK,UAAU;AACjB,YAAM,KAAK,KAAK;AAAA,QACd,MAAM;AAAA,QACN,QAAQ,KAAK;AAAA,QACb,SAAS,EAAE,WAAW,KAAK,WAAW,OAAO,KAAK,OAAO,UAAU,KAAK,SAAS;AAAA,MACnF,CAAC;AAAA,IACH;AACA,WAAO;AAAA,EACT;AAAA;AAAA,EAIA,MAAM,eAAe,UAAqE;AACxF,UAAM,OAAiB,EAAE,YAAY,KAAK,GAAG,GAAG,OAAO,KAAK,QAAQ,GAAG,SAAS;AAChF,UAAM,KAAK,MAAM,eAAe,IAAI;AACpC,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,OACJ,MACA,IACY;AACZ,UAAM,SAAS,MAAM,KAAK,KAAK,IAAI;AACnC,QAAI;AACF,YAAM,SAAS,MAAM,GAAG,MAAM;AAC9B,YAAM,OAAO,IAAI;AACjB,aAAO;AAAA,IACT,SAAS,KAAK;AACZ,YAAM,OAAO,KAAK,eAAe,QAAQ,MAAM,OAAO,GAAG,CAAC;AAC1D,YAAM;AAAA,IACR;AAAA,EACF;AACF;AAIA,SAAS,iBAAyB;AAChC,MAAI,OAAO,WAAW,QAAQ,eAAe,WAAY,QAAO,WAAW,OAAO,WAAW;AAC7F,SAAO,GAAG,KAAK,IAAI,EAAE,SAAS,EAAE,CAAC,IAAI,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,MAAM,GAAG,EAAE,CAAC;AAC9E;AAGO,SAAS,oBAAoB,MAQyB;AAC3D,SAAO;AAAA,IACL,MAAM,KAAK,QAAQ,KAAK;AAAA,IACxB,OAAO,KAAK;AAAA,IACZ,UAAU,KAAK;AAAA,IACf,QAAQ,KAAK;AAAA,IACb,aAAa,KAAK,OAAO;AAAA,IACzB,cAAc,KAAK,OAAO;AAAA,IAC1B,cAAc,KAAK,OAAO;AAAA,IAC1B,iBAAiB,KAAK,OAAO;AAAA,IAC7B,SAAS,KAAK;AAAA,IACd,cAAc,KAAK;AAAA,EACrB;AACF;","names":[]}
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/wire/schemas.ts","../src/wire/rubrics.ts","../src/wire/handlers.ts","../src/wire/openapi.ts","../src/wire/server.ts","../src/wire/rpc.ts"],"sourcesContent":["/**\n * Wire-protocol schemas.\n *\n * These Zod schemas are the contract between the agent-eval runtime and\n * any non-TypeScript client (Python, Rust, Go, …). They get rendered to\n * OpenAPI by `wire/openapi.ts` and code-generators consume that spec to\n * produce typed clients in other languages.\n *\n * Rule: if it's not in this file, it isn't on the wire. Keep names and\n * shapes self-explanatory — every field has a `.describe()` so the\n * generated docs are useful without reading the source.\n */\nimport { extendZodWithOpenApi } from '@asteasolutions/zod-to-openapi'\nimport { z } from 'zod'\n\nextendZodWithOpenApi(z)\n\n// ── Building blocks ─────────────────────────────────────────────────\n\nexport const RubricDimensionSchema = z\n .object({\n id: z\n .string()\n .min(1)\n .describe('Short stable id like \"buyer_quality\" — used as the key in scoring output.'),\n description: z\n .string()\n .min(1)\n .describe('One-line plain-English meaning. Read by humans reviewing low scores.'),\n weight: z\n .number()\n .min(0)\n .default(1)\n .describe('Relative weight in the composite score. Default 1; 0 disables.'),\n min: z.number().default(0).describe('Lower bound of valid score for this dimension.'),\n max: z.number().default(1).describe('Upper bound of valid score for this dimension.'),\n })\n .openapi('RubricDimension')\n\nexport const FailureModeSchema = z\n .object({\n id: z.string().min(1).describe('Short stable id like \"ai-cadence\" — used in detection lists.'),\n description: z.string().min(1).describe('Plain-English description of the failure pattern.'),\n })\n .openapi('FailureMode')\n\n// ── Rubric ──────────────────────────────────────────────────────────\n\nexport const RubricSchema = z\n .object({\n name: z\n .string()\n .min(1)\n .describe('Stable name like \"anti-slop\" — used by clients to invoke this rubric.'),\n description: z\n .string()\n .min(1)\n .describe('What this rubric measures. Shown in /v1/rubrics listing.'),\n systemPrompt: z\n .string()\n .min(1)\n .describe(\n 'Instructs the judging LLM. Should explain the persona (e.g. \"senior engineer reviewing voice\"), what to score on, and what to return.',\n ),\n dimensions: z\n .array(RubricDimensionSchema)\n .min(1)\n .describe('Scoring axes. The composite score is a weighted sum of these.'),\n failureModes: z\n .array(FailureModeSchema)\n .default([])\n .describe('Patterns to detect; each detected mode appears in the result.failureModes list.'),\n wins: z\n .array(FailureModeSchema)\n .default([])\n .describe('Positive patterns; each detected one appears in the result.wins list.'),\n })\n .openapi('Rubric')\n\n// ── Judge call ──────────────────────────────────────────────────────\n\nexport const JudgeRequestSchema = z\n .object({\n rubricName: z\n .string()\n .optional()\n .describe('Use a built-in rubric by name. Mutually exclusive with `rubric`.'),\n rubric: RubricSchema.optional().describe(\n 'Inline rubric definition. Mutually exclusive with `rubricName`.',\n ),\n content: z\n .string()\n .min(1)\n .describe('The text being judged — a tweet, a blog post, a code snippet, anything stringly.'),\n context: z\n .record(z.string(), z.unknown())\n .optional()\n .describe(\n 'Free-form metadata for the rubric to use — analytics, source URL, author, etc. Surfaced to the LLM.',\n ),\n model: z\n .string()\n .optional()\n .describe('Override the judge model (default routes via tcloud). e.g. \"claude-opus-4-7\".'),\n })\n .refine((v) => Boolean(v.rubricName) !== Boolean(v.rubric), {\n message: 'Provide exactly one of `rubricName` or `rubric`.',\n })\n .openapi('JudgeRequest')\n\nexport const JudgeResultSchema = z\n .object({\n composite: z\n .number()\n .min(0)\n .max(1)\n .describe('Weighted combination of dimension scores in 0..1. The single number to gate on.'),\n dimensions: z\n .record(z.string(), z.number())\n .describe('Per-dimension score, keyed by RubricDimension.id.'),\n failureModes: z\n .array(z.string())\n .default([])\n .describe('Failure-mode ids detected in the content (subset of rubric.failureModes ids).'),\n wins: z\n .array(z.string())\n .default([])\n .describe('Win ids detected in the content (subset of rubric.wins ids).'),\n rationale: z\n .string()\n .describe('Plain-English explanation of the score. Surfaced to the human reviewer.'),\n rubricVersion: z\n .string()\n .describe(\n 'Stable hash of the rubric used. Scores are only comparable across runs when this matches.',\n ),\n model: z.string().describe('Model that produced the judgement, for reproducibility.'),\n durationMs: z.number().int().nonnegative().describe('End-to-end wall time for this call.'),\n })\n .openapi('JudgeResult')\n\n// ── Rubric listing ──────────────────────────────────────────────────\n\nexport const RubricInfoSchema = z\n .object({\n name: z.string().describe('Pass this to /v1/judge as `rubricName`.'),\n description: z.string().describe('What this rubric measures.'),\n dimensions: z\n .array(z.object({ id: z.string(), description: z.string(), weight: z.number() }))\n .describe('The scoring axes this rubric uses, with weights.'),\n failureModes: z.array(z.string()).default([]).describe('Failure-mode ids this rubric detects.'),\n rubricVersion: z.string().describe('Stable hash — match this to compare scores across runs.'),\n })\n .openapi('RubricInfo')\n\nexport const ListRubricsResponseSchema = z\n .object({\n rubrics: z.array(RubricInfoSchema),\n })\n .openapi('ListRubricsResponse')\n\n// ── Version / health ────────────────────────────────────────────────\n\nexport const VersionResponseSchema = z\n .object({\n package: z.string().describe('Package name (always \"@tangle-network/agent-eval\").'),\n version: z.string().describe('Semver of the running server. Match your client to this.'),\n wireVersion: z\n .string()\n .describe(\n 'Wire-protocol semver. Bumps separately from package version when the schema changes.',\n ),\n apiSurface: z.array(z.string()).describe('List of supported method names.'),\n })\n .openapi('VersionResponse')\n\nexport const HealthResponseSchema = z\n .object({\n status: z.literal('ok'),\n uptimeSec: z.number(),\n })\n .openapi('HealthResponse')\n\n// ── Errors ──────────────────────────────────────────────────────────\n\nexport const ErrorResponseSchema = z\n .object({\n error: z\n .object({\n code: z\n .string()\n .describe('Machine-readable code: \"validation_error\", \"rubric_not_found\", \"judge_error\".'),\n message: z.string().describe('Human-readable message.'),\n details: z.unknown().optional().describe('Optional structured detail.'),\n })\n .describe('Errors are always wrapped in this shape across all endpoints.'),\n })\n .openapi('ErrorResponse')\n\n// ── Type exports for callers in the same package ────────────────────\n\nexport type RubricDimension = z.infer<typeof RubricDimensionSchema>\nexport type FailureMode = z.infer<typeof FailureModeSchema>\nexport type Rubric = z.infer<typeof RubricSchema>\nexport type JudgeRequest = z.infer<typeof JudgeRequestSchema>\nexport type JudgeResult = z.infer<typeof JudgeResultSchema>\nexport type RubricInfo = z.infer<typeof RubricInfoSchema>\nexport type ListRubricsResponse = z.infer<typeof ListRubricsResponseSchema>\nexport type VersionResponse = z.infer<typeof VersionResponseSchema>\nexport type ErrorResponse = z.infer<typeof ErrorResponseSchema>\n\n// ── Wire-protocol version ───────────────────────────────────────────\n\n/**\n * Bump on any breaking change to a request/response schema.\n * Non-breaking (additive) changes don't require a bump.\n */\nexport const WIRE_VERSION = '1.0.0'\n\n/**\n * Stable hash of a rubric. Used to make scores comparable across runs:\n * if the rubricVersion matches, the rubric was identical.\n */\nexport function hashRubric(rubric: Rubric): string {\n const stable = stableStringify(rubric)\n let h = 5381\n for (let i = 0; i < stable.length; i++) {\n h = (h * 33) ^ stable.charCodeAt(i)\n }\n // Unsigned 32-bit hex, prefixed with rubric name + version slot\n return `${rubric.name}@${(h >>> 0).toString(16).padStart(8, '0')}`\n}\n\nfunction stableStringify(value: unknown): string {\n if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(',')}]`\n if (value && typeof value === 'object') {\n const entries = Object.entries(value as Record<string, unknown>)\n .sort(([a], [b]) => a.localeCompare(b))\n .map(([key, item]) => `${JSON.stringify(key)}:${stableStringify(item)}`)\n return `{${entries.join(',')}}`\n }\n return JSON.stringify(value)\n}\n","/**\n * Built-in rubrics shipped with agent-eval.\n *\n * A rubric is a set of scoring axes plus a system prompt that tells the\n * judging LLM how to grade against those axes. Built-in rubrics are\n * curated for use cases that recur across Tangle projects — call them\n * by name from any client.\n *\n * Adding a rubric:\n * 1. Define the Rubric object below with a clear `description` and\n * named `dimensions`.\n * 2. Register it in `BUILTIN_RUBRICS` at the bottom.\n * 3. Add a test in `tests/wire/rubrics.test.ts`.\n *\n * Custom rubrics: callers pass `rubric` inline to /v1/judge instead of\n * `rubricName` — see schemas.ts.\n */\nimport type { Rubric } from './schemas'\nimport { hashRubric } from './schemas'\n\n// ── anti-slop ───────────────────────────────────────────────────────\n// Voice/style judge tuned for technical-buyer audiences. Used by the\n// Postiz autoresearch loop and any content-quality gate.\n\nconst ANTI_SLOP: Rubric = {\n name: 'anti-slop',\n description:\n 'Voice and signal quality for content aimed at senior engineers. Catches AI cadence, marketing tone, and engagement-bait shapes.',\n systemPrompt: `You are evaluating a piece of content written for senior engineers and technical founders.\n\nYou score three things:\n- buyer_quality (0..1): would a senior engineer in the target ICP find this worth their attention? High = specific, earned, technically interesting. Low = generic, hyped, off-target.\n- voice (0..1): does it read like a person who built the thing, or like AI/marketing copy?\n- signal (0..1): does it contain a non-obvious detail, constraint, or claim a reader couldn't get from the public docs?\n\nDetect failure modes (return ids matching):\n- ai-cadence: rule-of-three openings, em-dash flourish, \"Let me explain\", \"Here's the thing\", AI rhythm\n- marketing-tone: \"We're excited to announce\", \"thrilled\", \"delighted\", \"game-changer\", buzzword stack\n- vague-claim: technical claim without a specific component, file, or measurement\n- no-hook: opening doesn't earn attention from the target reader\n- engagement-bait: \"agree?\", \"thoughts?\", listicles, controversy-fishing, hook-detail-pitch\n- off-icp: content shape would attract motivational/grift/hype audiences instead of buyers\n- stale-claim: repeats a positioning line we've used many times this month\n\nDetect wins (return ids matching):\n- specific-component: names a real file, component, or measurement\n- earned-detail: shares a non-obvious detail not derivable from public docs\n- constraint-articulated: names a real tradeoff and the side chosen\n- honest-failure: describes a real failure mode and what was done about it\n\nReturn ONLY JSON matching the response schema. Be conservative — most content has 0-1 wins and 1-2 failure modes, not many of each.`,\n dimensions: [\n {\n id: 'buyer_quality',\n description: 'Would the target buyer find this worth attention?',\n weight: 0.5,\n min: 0,\n max: 1,\n },\n {\n id: 'voice',\n description: 'Does it sound like a builder, not AI or marketing?',\n weight: 0.3,\n min: 0,\n max: 1,\n },\n {\n id: 'signal',\n description: 'Non-obvious detail, constraint, or claim?',\n weight: 0.2,\n min: 0,\n max: 1,\n },\n ],\n failureModes: [\n { id: 'ai-cadence', description: 'AI-rhythm openings and transitions' },\n { id: 'marketing-tone', description: 'Buzzwords, hype, corporate-PR voice' },\n { id: 'vague-claim', description: 'Technical claim without specifics' },\n { id: 'no-hook', description: 'Opening fails to earn attention' },\n { id: 'engagement-bait', description: 'Listicle/controversy/agree-pattern' },\n { id: 'off-icp', description: 'Voice attracts the wrong audience' },\n { id: 'stale-claim', description: 'Reuses an over-used positioning line' },\n ],\n wins: [\n { id: 'specific-component', description: 'Names a real file/component/number' },\n { id: 'earned-detail', description: 'Detail not in public docs' },\n { id: 'constraint-articulated', description: 'Names a real tradeoff' },\n { id: 'honest-failure', description: 'Describes a real failure honestly' },\n ],\n}\n\n// ── Registry ────────────────────────────────────────────────────────\n\nexport const BUILTIN_RUBRICS: Record<string, Rubric> = {\n 'anti-slop': ANTI_SLOP,\n}\n\n/** Get a built-in rubric by name, or undefined. */\nexport function getBuiltinRubric(name: string): Rubric | undefined {\n return BUILTIN_RUBRICS[name]\n}\n\n/** List built-in rubrics with their stable versions. */\nexport function listBuiltinRubrics() {\n return Object.values(BUILTIN_RUBRICS).map((r) => ({\n name: r.name,\n description: r.description,\n dimensions: r.dimensions.map((d) => ({\n id: d.id,\n description: d.description,\n weight: d.weight,\n })),\n failureModes: r.failureModes.map((f) => f.id),\n rubricVersion: hashRubric(r),\n }))\n}\n","/**\n * Pure handler functions — the \"business logic\" behind every wire-protocol\n * method. The HTTP server (`server.ts`) and the stdio RPC (`rpc.ts`) both\n * call these. Tests call these directly without spinning a server.\n *\n * Each handler:\n * - Takes a parsed request (already Zod-validated by the transport).\n * - Returns a result that matches the response schema.\n * - Throws `WireError` for caller-fixable errors (404, 400, 422).\n * - Lets unexpected errors bubble — the transport maps them to 500.\n */\nimport { callLlmJson } from '../llm-client'\nimport { getBuiltinRubric, listBuiltinRubrics } from './rubrics'\nimport {\n hashRubric,\n WIRE_VERSION,\n type JudgeRequest,\n type JudgeResult,\n type ListRubricsResponse,\n type Rubric,\n type VersionResponse,\n} from './schemas'\n\n/** Caller-fixable error. The transport renders this to 4xx + ErrorResponse. */\nexport class WireError extends Error {\n constructor(\n public readonly code: string,\n message: string,\n public readonly status: number = 400,\n public readonly details?: unknown,\n ) {\n super(message)\n this.name = 'WireError'\n }\n}\n\n// ── judge ───────────────────────────────────────────────────────────\n\n/** The JSON schema we ask the judging LLM to fill in. */\nfunction judgeOutputSchema(rubric: Rubric) {\n return {\n name: 'JudgeOutput',\n schema: {\n type: 'object',\n additionalProperties: false,\n properties: {\n dimensions: {\n type: 'object',\n additionalProperties: false,\n properties: Object.fromEntries(\n rubric.dimensions.map((d) => [\n d.id,\n { type: 'number', minimum: d.min, maximum: d.max },\n ]),\n ),\n required: rubric.dimensions.map((d) => d.id),\n },\n failureModes: {\n type: 'array',\n items: { type: 'string', enum: rubric.failureModes.map((f) => f.id) },\n },\n wins: {\n type: 'array',\n items: { type: 'string', enum: rubric.wins.map((w) => w.id) },\n },\n rationale: { type: 'string' },\n },\n required: ['dimensions', 'rationale'],\n } as Record<string, unknown>,\n }\n}\n\ninterface JudgeOutput {\n dimensions: Record<string, number>\n failureModes?: string[]\n wins?: string[]\n rationale: string\n}\n\nfunction validateJudgeOutput(value: unknown, rubric: Rubric): JudgeOutput {\n if (!value || typeof value !== 'object') {\n throw new WireError('judge_error', 'Judge returned malformed output.', 500, value)\n }\n const raw = value as Record<string, unknown>\n const rawDimensions = raw.dimensions\n if (!rawDimensions || typeof rawDimensions !== 'object' || Array.isArray(rawDimensions)) {\n throw new WireError('judge_error', 'Judge returned malformed dimensions.', 500, value)\n }\n\n const dimensions: Record<string, number> = {}\n const dimensionRecord = rawDimensions as Record<string, unknown>\n for (const dim of rubric.dimensions) {\n const score = dimensionRecord[dim.id]\n if (typeof score !== 'number' || !Number.isFinite(score) || score < dim.min || score > dim.max) {\n throw new WireError('judge_error', `Judge returned invalid score for dimension \"${dim.id}\".`, 500, value)\n }\n dimensions[dim.id] = score\n }\n\n const allowedFailures = new Set(rubric.failureModes.map((mode) => mode.id))\n const allowedWins = new Set(rubric.wins.map((win) => win.id))\n const failureModes = validateIdArray(raw.failureModes, allowedFailures, 'failureModes', value)\n const wins = validateIdArray(raw.wins, allowedWins, 'wins', value)\n if (typeof raw.rationale !== 'string' || raw.rationale.trim().length === 0) {\n throw new WireError('judge_error', 'Judge returned missing rationale.', 500, value)\n }\n\n return { dimensions, failureModes, wins, rationale: raw.rationale }\n}\n\nfunction validateIdArray(\n raw: unknown,\n allowed: Set<string>,\n field: 'failureModes' | 'wins',\n original: unknown,\n): string[] {\n if (raw === undefined) return []\n if (!Array.isArray(raw)) {\n throw new WireError('judge_error', `Judge returned non-array ${field}.`, 500, original)\n }\n const out: string[] = []\n for (const item of raw) {\n if (typeof item !== 'string' || !allowed.has(item)) {\n throw new WireError('judge_error', `Judge returned unknown ${field} id \"${String(item)}\".`, 500, original)\n }\n out.push(item)\n }\n return out\n}\n\nfunction compositeScore(dimensions: Record<string, number>, rubric: Rubric): number {\n let weighted = 0\n let totalWeight = 0\n for (const dim of rubric.dimensions) {\n const raw = dimensions[dim.id] ?? 0\n const range = dim.max - dim.min || 1\n const normalized = Math.max(0, Math.min(1, (raw - dim.min) / range))\n weighted += normalized * dim.weight\n totalWeight += dim.weight\n }\n return totalWeight > 0 ? weighted / totalWeight : 0\n}\n\nfunction buildJudgePrompt(content: string, context: unknown): string {\n const ctx = context && Object.keys(context as object).length ? JSON.stringify(context) : ''\n return [\n `CONTENT TO JUDGE:`,\n content,\n '',\n ctx ? `CONTEXT (metadata, analytics, etc.):` : '',\n ctx ? ctx : '',\n ]\n .filter(Boolean)\n .join('\\n')\n}\n\nconst DEFAULT_JUDGE_MODEL = 'claude-sonnet-4-6'\n\nexport async function handleJudge(req: JudgeRequest): Promise<JudgeResult> {\n // Resolve rubric\n let rubric: Rubric\n if (req.rubricName) {\n const found = getBuiltinRubric(req.rubricName)\n if (!found) {\n throw new WireError('rubric_not_found', `No built-in rubric named \"${req.rubricName}\".`, 404)\n }\n rubric = found\n } else if (req.rubric) {\n rubric = req.rubric\n } else {\n // refine() in the schema should already have caught this — defense in depth\n throw new WireError('validation_error', 'Provide either `rubricName` or `rubric`.', 422)\n }\n\n const startedAt = Date.now()\n const model = req.model ?? DEFAULT_JUDGE_MODEL\n\n const { value, result } = await callLlmJson<JudgeOutput>({\n model,\n messages: [\n { role: 'system', content: rubric.systemPrompt },\n { role: 'user', content: buildJudgePrompt(req.content, req.context) },\n ],\n jsonSchema: judgeOutputSchema(rubric),\n temperature: 0.0,\n timeoutMs: 60_000,\n })\n\n const output = validateJudgeOutput(value, rubric)\n\n const composite = compositeScore(output.dimensions, rubric)\n const durationMs = Date.now() - startedAt\n\n return {\n composite,\n dimensions: output.dimensions,\n failureModes: output.failureModes ?? [],\n wins: output.wins ?? [],\n rationale: output.rationale,\n rubricVersion: hashRubric(rubric),\n model: result.model,\n durationMs,\n }\n}\n\n// ── listRubrics ─────────────────────────────────────────────────────\n\nexport function handleListRubrics(): ListRubricsResponse {\n return { rubrics: listBuiltinRubrics() }\n}\n\n// ── version ─────────────────────────────────────────────────────────\n\nimport { readFileSync } from 'node:fs'\nimport { dirname, resolve } from 'node:path'\nimport { fileURLToPath } from 'node:url'\n\nlet CACHED_VERSION: string | undefined\n\nfunction readPackageVersion(): string {\n if (CACHED_VERSION) return CACHED_VERSION\n // Walk up from this file looking for the nearest package.json.\n // In dist/ this is dist/.., in src/wire/ this is ../../package.json.\n const here = dirname(fileURLToPath(import.meta.url))\n const candidates = [\n resolve(here, '..', '..', 'package.json'), // src/wire → repo root\n resolve(here, '..', 'package.json'), // dist → repo root\n ]\n for (const path of candidates) {\n try {\n const pkg = JSON.parse(readFileSync(path, 'utf-8')) as { version?: string }\n if (pkg.version) {\n CACHED_VERSION = pkg.version\n return pkg.version\n }\n } catch {\n // try next\n }\n }\n return '0.0.0-unknown'\n}\n\nexport function handleVersion(): VersionResponse {\n return {\n package: '@tangle-network/agent-eval',\n version: readPackageVersion(),\n wireVersion: WIRE_VERSION,\n apiSurface: ['judge', 'listRubrics', 'version'],\n }\n}\n","/**\n * Build an OpenAPI spec from the wire schemas.\n *\n * The spec is the contract that other-language clients (Python, Rust,\n * Go, …) generate from. There is no hand-written client — clients are\n * derived artifacts of this file plus `schemas.ts`.\n *\n * Run `pnpm openapi` (defined in package.json) to write the spec to\n * `dist/openapi.json`. CI uses that file to regenerate the Python\n * client and gate the dual-publish workflow.\n */\nimport { OpenApiGeneratorV31, OpenAPIRegistry } from '@asteasolutions/zod-to-openapi'\nimport type { OpenAPIObject } from 'openapi3-ts/oas31'\n\nimport {\n ErrorResponseSchema,\n HealthResponseSchema,\n JudgeRequestSchema,\n JudgeResultSchema,\n ListRubricsResponseSchema,\n VersionResponseSchema,\n WIRE_VERSION,\n} from './schemas'\n\nexport function buildOpenApi(packageVersion: string): OpenAPIObject {\n const registry = new OpenAPIRegistry()\n\n // Components — each schema becomes a $ref-able component\n registry.register('JudgeRequest', JudgeRequestSchema)\n registry.register('JudgeResult', JudgeResultSchema)\n registry.register('ListRubricsResponse', ListRubricsResponseSchema)\n registry.register('VersionResponse', VersionResponseSchema)\n registry.register('HealthResponse', HealthResponseSchema)\n registry.register('ErrorResponse', ErrorResponseSchema)\n\n // Routes\n registry.registerPath({\n method: 'post',\n path: '/v1/judge',\n summary: 'Score a piece of content against a rubric',\n description:\n 'Runs the judging LLM with the named (or inline) rubric and returns dimension scores, detected failure modes, wins, and a composite score in 0..1.',\n request: {\n body: {\n content: {\n 'application/json': { schema: JudgeRequestSchema },\n },\n },\n },\n responses: {\n 200: {\n description: 'Successful judgement',\n content: { 'application/json': { schema: JudgeResultSchema } },\n },\n 400: {\n description: 'Validation error',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 404: {\n description: 'Rubric not found',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n 500: {\n description: 'Judge error',\n content: { 'application/json': { schema: ErrorResponseSchema } },\n },\n },\n })\n\n registry.registerPath({\n method: 'get',\n path: '/v1/rubrics',\n summary: 'List built-in rubrics',\n description:\n 'Returns every rubric registered server-side, with their dimensions and stable rubricVersion hash.',\n responses: {\n 200: {\n description: 'Listing',\n content: { 'application/json': { schema: ListRubricsResponseSchema } },\n },\n },\n })\n\n registry.registerPath({\n method: 'get',\n path: '/v1/version',\n summary: 'Server and wire-protocol version',\n description: 'Match your client version to `version`; check `wireVersion` for compatibility.',\n responses: {\n 200: {\n description: 'Version info',\n content: { 'application/json': { schema: VersionResponseSchema } },\n },\n },\n })\n\n registry.registerPath({\n method: 'get',\n path: '/healthz',\n summary: 'Liveness check',\n responses: {\n 200: {\n description: 'OK',\n content: { 'application/json': { schema: HealthResponseSchema } },\n },\n },\n })\n\n const generator = new OpenApiGeneratorV31(registry.definitions)\n const doc = generator.generateDocument({\n openapi: '3.1.0',\n info: {\n title: '@tangle-network/agent-eval — wire protocol',\n version: packageVersion,\n description: `HTTP and stdio RPC interface to agent-eval. The TypeScript runtime is the source of truth; this spec is the contract that cross-language clients (Python, Rust, Go) generate from.\n\nWire-protocol version: ${WIRE_VERSION}. Bumps on breaking changes to request/response schemas.`,\n contact: { name: 'Tangle Network', url: 'https://github.com/tangle-network/agent-eval' },\n license: { name: 'MIT' },\n },\n servers: [{ url: 'http://localhost:5005', description: 'Local agent-eval serve' }],\n })\n const rubricRef = { $ref: '#/components/schemas/Rubric' } as const\n const commonJudgeFields = {\n content: { type: 'string', minLength: 1 },\n context: { type: 'object', additionalProperties: true },\n model: { type: 'string' },\n } as const\n doc.components ??= {}\n doc.components.schemas ??= {}\n doc.components.schemas.JudgeRequest = {\n oneOf: [\n {\n type: 'object',\n additionalProperties: false,\n required: ['rubricName', 'content'],\n properties: {\n rubricName: { type: 'string', minLength: 1 },\n ...commonJudgeFields,\n },\n },\n {\n type: 'object',\n additionalProperties: false,\n required: ['rubric', 'content'],\n properties: {\n rubric: rubricRef,\n ...commonJudgeFields,\n },\n },\n ],\n description: 'Judge request. Provide exactly one of rubricName or rubric.',\n }\n return doc\n}\n","/**\n * HTTP transport for the wire protocol.\n *\n * Hono + @hono/node-server. Every endpoint:\n * 1. Validates the request against its Zod schema.\n * 2. Calls the matching handler in `handlers.ts`.\n * 3. Renders 4xx for `WireError` with structured body, 500 for unexpected.\n *\n * The server has no internal state besides the handler imports — restart\n * costs nothing. Run via `agent-eval serve --port 5005`.\n */\nimport { serve, type ServerType } from '@hono/node-server'\nimport { Hono } from 'hono'\nimport { cors } from 'hono/cors'\n\nimport {\n handleJudge,\n handleListRubrics,\n handleVersion,\n WireError,\n} from './handlers'\nimport { buildOpenApi } from './openapi'\nimport { JudgeRequestSchema } from './schemas'\n\nconst STARTED_AT = Date.now()\n\nexport function createApp() {\n const app = new Hono()\n\n app.use('*', cors())\n\n app.onError((err, c) => {\n if (err instanceof WireError) {\n return c.json(\n { error: { code: err.code, message: err.message, details: err.details } },\n err.status as 400 | 404 | 422 | 500,\n )\n }\n // Unexpected — log and return generic 500 without leaking internals.\n console.error('[agent-eval] unhandled error:', err)\n return c.json(\n { error: { code: 'internal_error', message: 'Internal server error.' } },\n 500,\n )\n })\n\n // ── Health ──\n app.get('/healthz', (c) =>\n c.json({ status: 'ok' as const, uptimeSec: (Date.now() - STARTED_AT) / 1000 }),\n )\n\n // ── Version ──\n app.get('/v1/version', (c) => c.json(handleVersion()))\n\n // ── Rubrics ──\n app.get('/v1/rubrics', (c) => c.json(handleListRubrics()))\n\n // ── Judge ──\n app.post('/v1/judge', async (c) => {\n const raw = await c.req.json().catch(() => null)\n if (raw == null) {\n throw new WireError('validation_error', 'Request body must be JSON.', 400)\n }\n const parsed = JudgeRequestSchema.safeParse(raw)\n if (!parsed.success) {\n throw new WireError(\n 'validation_error',\n 'Request did not match JudgeRequest schema.',\n 400,\n parsed.error.issues,\n )\n }\n const result = await handleJudge(parsed.data)\n return c.json(result)\n })\n\n // ── OpenAPI spec ──\n app.get('/openapi.json', (c) => c.json(buildOpenApi(handleVersion().version)))\n\n return app\n}\n\nexport interface ServeOptions {\n /** Default 5005. */\n port?: number\n /** Default '127.0.0.1'. Set to '0.0.0.0' to listen on all interfaces. */\n host?: string\n}\n\nexport function startServer(opts: ServeOptions = {}): ServerType {\n const app = createApp()\n const port = opts.port ?? 5005\n const host = opts.host ?? '127.0.0.1'\n return serve({ fetch: app.fetch, port, hostname: host }, ({ address, port: actualPort }) => {\n // eslint-disable-next-line no-console\n console.log(`[agent-eval] serving on http://${address}:${actualPort}`)\n })\n}\n","/**\n * stdio RPC transport.\n *\n * For batch / cron use without a running server. The Python client falls\n * back to this when no server is reachable.\n *\n * Protocol (line-delimited JSON over stdin/stdout):\n * IN: one JSON object on stdin: {\"method\":\"judge\",\"params\":{...}}\n * OUT: one JSON object on stdout: {\"result\":{...}} or {\"error\":{...}}\n *\n * One request per process invocation. To pipeline many calls, the client\n * writes JSONL to stdin and reads JSONL from stdout — see batch mode below.\n */\nimport { handleJudge, handleListRubrics, handleVersion, WireError } from './handlers'\nimport { JudgeRequestSchema } from './schemas'\n\ninterface RpcRequest {\n method: 'judge' | 'listRubrics' | 'version'\n params?: unknown\n}\n\ninterface RpcSuccess {\n result: unknown\n}\n\ninterface RpcError {\n error: { code: string; message: string; details?: unknown }\n}\n\nexport async function dispatchRpc(req: RpcRequest): Promise<RpcSuccess | RpcError> {\n try {\n switch (req.method) {\n case 'judge': {\n const parsed = JudgeRequestSchema.safeParse(req.params)\n if (!parsed.success) {\n return {\n error: {\n code: 'validation_error',\n message: 'params did not match JudgeRequest schema.',\n details: parsed.error.issues,\n },\n }\n }\n return { result: await handleJudge(parsed.data) }\n }\n case 'listRubrics':\n return { result: handleListRubrics() }\n case 'version':\n return { result: handleVersion() }\n default:\n return {\n error: {\n code: 'unknown_method',\n message: `No such method: ${(req as { method: string }).method}`,\n },\n }\n }\n } catch (err) {\n if (err instanceof WireError) {\n return { error: { code: err.code, message: err.message, details: err.details } }\n }\n const message = err instanceof Error ? err.message : String(err)\n return { error: { code: 'internal_error', message } }\n }\n}\n\n// ── stdin/stdout driver ─────────────────────────────────────────────\n\nasync function readAll(stream: NodeJS.ReadableStream): Promise<string> {\n const chunks: Buffer[] = []\n for await (const chunk of stream) {\n chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string))\n }\n return Buffer.concat(chunks).toString('utf-8')\n}\n\n/** Read one JSON request from stdin, write one JSON response to stdout. */\nexport async function runRpcOnce(method?: string): Promise<number> {\n const raw = await readAll(process.stdin)\n let req: RpcRequest\n try {\n const body = JSON.parse(raw)\n req = method ? { method: method as RpcRequest['method'], params: body } : (body as RpcRequest)\n } catch (err) {\n process.stdout.write(\n JSON.stringify({\n error: {\n code: 'parse_error',\n message: `stdin was not valid JSON: ${err instanceof Error ? err.message : String(err)}`,\n },\n }) + '\\n',\n )\n return 1\n }\n const out = await dispatchRpc(req)\n process.stdout.write(JSON.stringify(out) + '\\n')\n return 'error' in out ? 1 : 0\n}\n\n/** Read JSONL requests from stdin, write JSONL responses to stdout. */\nexport async function runRpcBatch(method?: string): Promise<number> {\n const raw = await readAll(process.stdin)\n const lines = raw.split('\\n').filter((l) => l.trim().length > 0)\n let exitCode = 0\n for (const line of lines) {\n let req: RpcRequest\n try {\n const body = JSON.parse(line)\n req = method ? { method: method as RpcRequest['method'], params: body } : (body as RpcRequest)\n } catch (err) {\n process.stdout.write(\n JSON.stringify({\n error: {\n code: 'parse_error',\n message: `line was not valid JSON: ${err instanceof Error ? err.message : String(err)}`,\n },\n }) + '\\n',\n )\n exitCode = 1\n continue\n }\n const out = await dispatchRpc(req)\n process.stdout.write(JSON.stringify(out) + '\\n')\n if ('error' in out) exitCode = 1\n }\n return exitCode\n}\n"],"mappings":";;;;;AAYA,SAAS,4BAA4B;AACrC,SAAS,SAAS;AAElB,qBAAqB,CAAC;AAIf,IAAM,wBAAwB,EAClC,OAAO;AAAA,EACN,IAAI,EACD,OAAO,EACP,IAAI,CAAC,EACL,SAAS,gFAA2E;AAAA,EACvF,aAAa,EACV,OAAO,EACP,IAAI,CAAC,EACL,SAAS,sEAAsE;AAAA,EAClF,QAAQ,EACL,OAAO,EACP,IAAI,CAAC,EACL,QAAQ,CAAC,EACT,SAAS,gEAAgE;AAAA,EAC5E,KAAK,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,SAAS,gDAAgD;AAAA,EACpF,KAAK,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,SAAS,gDAAgD;AACtF,CAAC,EACA,QAAQ,iBAAiB;AAErB,IAAM,oBAAoB,EAC9B,OAAO;AAAA,EACN,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,mEAA8D;AAAA,EAC7F,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,mDAAmD;AAC7F,CAAC,EACA,QAAQ,aAAa;AAIjB,IAAM,eAAe,EACzB,OAAO;AAAA,EACN,MAAM,EACH,OAAO,EACP,IAAI,CAAC,EACL,SAAS,4EAAuE;AAAA,EACnF,aAAa,EACV,OAAO,EACP,IAAI,CAAC,EACL,SAAS,0DAA0D;AAAA,EACtE,cAAc,EACX,OAAO,EACP,IAAI,CAAC,EACL;AAAA,IACC;AAAA,EACF;AAAA,EACF,YAAY,EACT,MAAM,qBAAqB,EAC3B,IAAI,CAAC,EACL,SAAS,+DAA+D;AAAA,EAC3E,cAAc,EACX,MAAM,iBAAiB,EACvB,QAAQ,CAAC,CAAC,EACV,SAAS,iFAAiF;AAAA,EAC7F,MAAM,EACH,MAAM,iBAAiB,EACvB,QAAQ,CAAC,CAAC,EACV,SAAS,uEAAuE;AACrF,CAAC,EACA,QAAQ,QAAQ;AAIZ,IAAM,qBAAqB,EAC/B,OAAO;AAAA,EACN,YAAY,EACT,OAAO,EACP,SAAS,EACT,SAAS,kEAAkE;AAAA,EAC9E,QAAQ,aAAa,SAAS,EAAE;AAAA,IAC9B;AAAA,EACF;AAAA,EACA,SAAS,EACN,OAAO,EACP,IAAI,CAAC,EACL,SAAS,uFAAkF;AAAA,EAC9F,SAAS,EACN,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAC9B,SAAS,EACT;AAAA,IACC;AAAA,EACF;AAAA,EACF,OAAO,EACJ,OAAO,EACP,SAAS,EACT,SAAS,+EAA+E;AAC7F,CAAC,EACA,OAAO,CAAC,MAAM,QAAQ,EAAE,UAAU,MAAM,QAAQ,EAAE,MAAM,GAAG;AAAA,EAC1D,SAAS;AACX,CAAC,EACA,QAAQ,cAAc;AAElB,IAAM,oBAAoB,EAC9B,OAAO;AAAA,EACN,WAAW,EACR,OAAO,EACP,IAAI,CAAC,EACL,IAAI,CAAC,EACL,SAAS,iFAAiF;AAAA,EAC7F,YAAY,EACT,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC,EAC7B,SAAS,mDAAmD;AAAA,EAC/D,cAAc,EACX,MAAM,EAAE,OAAO,CAAC,EAChB,QAAQ,CAAC,CAAC,EACV,SAAS,+EAA+E;AAAA,EAC3F,MAAM,EACH,MAAM,EAAE,OAAO,CAAC,EAChB,QAAQ,CAAC,CAAC,EACV,SAAS,8DAA8D;AAAA,EAC1E,WAAW,EACR,OAAO,EACP,SAAS,yEAAyE;AAAA,EACrF,eAAe,EACZ,OAAO,EACP;AAAA,IACC;AAAA,EACF;AAAA,EACF,OAAO,EAAE,OAAO,EAAE,SAAS,yDAAyD;AAAA,EACpF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,qCAAqC;AAC3F,CAAC,EACA,QAAQ,aAAa;AAIjB,IAAM,mBAAmB,EAC7B,OAAO;AAAA,EACN,MAAM,EAAE,OAAO,EAAE,SAAS,yCAAyC;AAAA,EACnE,aAAa,EAAE,OAAO,EAAE,SAAS,4BAA4B;AAAA,EAC7D,YAAY,EACT,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,GAAG,aAAa,EAAE,OAAO,GAAG,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC,EAC/E,SAAS,kDAAkD;AAAA,EAC9D,cAAc,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,QAAQ,CAAC,CAAC,EAAE,SAAS,uCAAuC;AAAA,EAC9F,eAAe,EAAE,OAAO,EAAE,SAAS,8DAAyD;AAC9F,CAAC,EACA,QAAQ,YAAY;AAEhB,IAAM,4BAA4B,EACtC,OAAO;AAAA,EACN,SAAS,EAAE,MAAM,gBAAgB;AACnC,CAAC,EACA,QAAQ,qBAAqB;AAIzB,IAAM,wBAAwB,EAClC,OAAO;AAAA,EACN,SAAS,EAAE,OAAO,EAAE,SAAS,qDAAqD;AAAA,EAClF,SAAS,EAAE,OAAO,EAAE,SAAS,0DAA0D;AAAA,EACvF,aAAa,EACV,OAAO,EACP;AAAA,IACC;AAAA,EACF;AAAA,EACF,YAAY,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,SAAS,iCAAiC;AAC5E,CAAC,EACA,QAAQ,iBAAiB;AAErB,IAAM,uBAAuB,EACjC,OAAO;AAAA,EACN,QAAQ,EAAE,QAAQ,IAAI;AAAA,EACtB,WAAW,EAAE,OAAO;AACtB,CAAC,EACA,QAAQ,gBAAgB;AAIpB,IAAM,sBAAsB,EAChC,OAAO;AAAA,EACN,OAAO,EACJ,OAAO;AAAA,IACN,MAAM,EACH,OAAO,EACP,SAAS,+EAA+E;AAAA,IAC3F,SAAS,EAAE,OAAO,EAAE,SAAS,yBAAyB;AAAA,IACtD,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS,6BAA6B;AAAA,EACxE,CAAC,EACA,SAAS,+DAA+D;AAC7E,CAAC,EACA,QAAQ,eAAe;AAoBnB,IAAM,eAAe;AAMrB,SAAS,WAAW,QAAwB;AACjD,QAAM,SAAS,gBAAgB,MAAM;AACrC,MAAI,IAAI;AACR,WAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACtC,QAAK,IAAI,KAAM,OAAO,WAAW,CAAC;AAAA,EACpC;AAEA,SAAO,GAAG,OAAO,IAAI,KAAK,MAAM,GAAG,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC;AAClE;AAEA,SAAS,gBAAgB,OAAwB;AAC/C,MAAI,MAAM,QAAQ,KAAK,EAAG,QAAO,IAAI,MAAM,IAAI,CAAC,SAAS,gBAAgB,IAAI,CAAC,EAAE,KAAK,GAAG,CAAC;AACzF,MAAI,SAAS,OAAO,UAAU,UAAU;AACtC,UAAM,UAAU,OAAO,QAAQ,KAAgC,EAC5D,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC,EACrC,IAAI,CAAC,CAAC,KAAK,IAAI,MAAM,GAAG,KAAK,UAAU,GAAG,CAAC,IAAI,gBAAgB,IAAI,CAAC,EAAE;AACzE,WAAO,IAAI,QAAQ,KAAK,GAAG,CAAC;AAAA,EAC9B;AACA,SAAO,KAAK,UAAU,KAAK;AAC7B;;;AC1NA,IAAM,YAAoB;AAAA,EACxB,MAAM;AAAA,EACN,aACE;AAAA,EACF,cAAc;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAuBd,YAAY;AAAA,IACV;AAAA,MACE,IAAI;AAAA,MACJ,aAAa;AAAA,MACb,QAAQ;AAAA,MACR,KAAK;AAAA,MACL,KAAK;AAAA,IACP;AAAA,IACA;AAAA,MACE,IAAI;AAAA,MACJ,aAAa;AAAA,MACb,QAAQ;AAAA,MACR,KAAK;AAAA,MACL,KAAK;AAAA,IACP;AAAA,IACA;AAAA,MACE,IAAI;AAAA,MACJ,aAAa;AAAA,MACb,QAAQ;AAAA,MACR,KAAK;AAAA,MACL,KAAK;AAAA,IACP;AAAA,EACF;AAAA,EACA,cAAc;AAAA,IACZ,EAAE,IAAI,cAAc,aAAa,qCAAqC;AAAA,IACtE,EAAE,IAAI,kBAAkB,aAAa,sCAAsC;AAAA,IAC3E,EAAE,IAAI,eAAe,aAAa,oCAAoC;AAAA,IACtE,EAAE,IAAI,WAAW,aAAa,kCAAkC;AAAA,IAChE,EAAE,IAAI,mBAAmB,aAAa,qCAAqC;AAAA,IAC3E,EAAE,IAAI,WAAW,aAAa,oCAAoC;AAAA,IAClE,EAAE,IAAI,eAAe,aAAa,uCAAuC;AAAA,EAC3E;AAAA,EACA,MAAM;AAAA,IACJ,EAAE,IAAI,sBAAsB,aAAa,qCAAqC;AAAA,IAC9E,EAAE,IAAI,iBAAiB,aAAa,4BAA4B;AAAA,IAChE,EAAE,IAAI,0BAA0B,aAAa,wBAAwB;AAAA,IACrE,EAAE,IAAI,kBAAkB,aAAa,oCAAoC;AAAA,EAC3E;AACF;AAIO,IAAM,kBAA0C;AAAA,EACrD,aAAa;AACf;AAGO,SAAS,iBAAiB,MAAkC;AACjE,SAAO,gBAAgB,IAAI;AAC7B;AAGO,SAAS,qBAAqB;AACnC,SAAO,OAAO,OAAO,eAAe,EAAE,IAAI,CAAC,OAAO;AAAA,IAChD,MAAM,EAAE;AAAA,IACR,aAAa,EAAE;AAAA,IACf,YAAY,EAAE,WAAW,IAAI,CAAC,OAAO;AAAA,MACnC,IAAI,EAAE;AAAA,MACN,aAAa,EAAE;AAAA,MACf,QAAQ,EAAE;AAAA,IACZ,EAAE;AAAA,IACF,cAAc,EAAE,aAAa,IAAI,CAAC,MAAM,EAAE,EAAE;AAAA,IAC5C,eAAe,WAAW,CAAC;AAAA,EAC7B,EAAE;AACJ;;;ACkGA,SAAS,oBAAoB;AAC7B,SAAS,SAAS,eAAe;AACjC,SAAS,qBAAqB;AA/LvB,IAAM,YAAN,cAAwB,MAAM;AAAA,EACnC,YACkB,MAChB,SACgB,SAAiB,KACjB,SAChB;AACA,UAAM,OAAO;AALG;AAEA;AACA;AAGhB,SAAK,OAAO;AAAA,EACd;AAAA,EAPkB;AAAA,EAEA;AAAA,EACA;AAKpB;AAKA,SAAS,kBAAkB,QAAgB;AACzC,SAAO;AAAA,IACL,MAAM;AAAA,IACN,QAAQ;AAAA,MACN,MAAM;AAAA,MACN,sBAAsB;AAAA,MACtB,YAAY;AAAA,QACV,YAAY;AAAA,UACV,MAAM;AAAA,UACN,sBAAsB;AAAA,UACtB,YAAY,OAAO;AAAA,YACjB,OAAO,WAAW,IAAI,CAAC,MAAM;AAAA,cAC3B,EAAE;AAAA,cACF,EAAE,MAAM,UAAU,SAAS,EAAE,KAAK,SAAS,EAAE,IAAI;AAAA,YACnD,CAAC;AAAA,UACH;AAAA,UACA,UAAU,OAAO,WAAW,IAAI,CAAC,MAAM,EAAE,EAAE;AAAA,QAC7C;AAAA,QACA,cAAc;AAAA,UACZ,MAAM;AAAA,UACN,OAAO,EAAE,MAAM,UAAU,MAAM,OAAO,aAAa,IAAI,CAAC,MAAM,EAAE,EAAE,EAAE;AAAA,QACtE;AAAA,QACA,MAAM;AAAA,UACJ,MAAM;AAAA,UACN,OAAO,EAAE,MAAM,UAAU,MAAM,OAAO,KAAK,IAAI,CAAC,MAAM,EAAE,EAAE,EAAE;AAAA,QAC9D;AAAA,QACA,WAAW,EAAE,MAAM,SAAS;AAAA,MAC9B;AAAA,MACA,UAAU,CAAC,cAAc,WAAW;AAAA,IACtC;AAAA,EACF;AACF;AASA,SAAS,oBAAoB,OAAgB,QAA6B;AACxE,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACvC,UAAM,IAAI,UAAU,eAAe,oCAAoC,KAAK,KAAK;AAAA,EACnF;AACA,QAAM,MAAM;AACZ,QAAM,gBAAgB,IAAI;AAC1B,MAAI,CAAC,iBAAiB,OAAO,kBAAkB,YAAY,MAAM,QAAQ,aAAa,GAAG;AACvF,UAAM,IAAI,UAAU,eAAe,wCAAwC,KAAK,KAAK;AAAA,EACvF;AAEA,QAAM,aAAqC,CAAC;AAC5C,QAAM,kBAAkB;AACxB,aAAW,OAAO,OAAO,YAAY;AACnC,UAAM,QAAQ,gBAAgB,IAAI,EAAE;AACpC,QAAI,OAAO,UAAU,YAAY,CAAC,OAAO,SAAS,KAAK,KAAK,QAAQ,IAAI,OAAO,QAAQ,IAAI,KAAK;AAC9F,YAAM,IAAI,UAAU,eAAe,+CAA+C,IAAI,EAAE,MAAM,KAAK,KAAK;AAAA,IAC1G;AACA,eAAW,IAAI,EAAE,IAAI;AAAA,EACvB;AAEA,QAAM,kBAAkB,IAAI,IAAI,OAAO,aAAa,IAAI,CAAC,SAAS,KAAK,EAAE,CAAC;AAC1E,QAAM,cAAc,IAAI,IAAI,OAAO,KAAK,IAAI,CAAC,QAAQ,IAAI,EAAE,CAAC;AAC5D,QAAM,eAAe,gBAAgB,IAAI,cAAc,iBAAiB,gBAAgB,KAAK;AAC7F,QAAM,OAAO,gBAAgB,IAAI,MAAM,aAAa,QAAQ,KAAK;AACjE,MAAI,OAAO,IAAI,cAAc,YAAY,IAAI,UAAU,KAAK,EAAE,WAAW,GAAG;AAC1E,UAAM,IAAI,UAAU,eAAe,qCAAqC,KAAK,KAAK;AAAA,EACpF;AAEA,SAAO,EAAE,YAAY,cAAc,MAAM,WAAW,IAAI,UAAU;AACpE;AAEA,SAAS,gBACP,KACA,SACA,OACA,UACU;AACV,MAAI,QAAQ,OAAW,QAAO,CAAC;AAC/B,MAAI,CAAC,MAAM,QAAQ,GAAG,GAAG;AACvB,UAAM,IAAI,UAAU,eAAe,4BAA4B,KAAK,KAAK,KAAK,QAAQ;AAAA,EACxF;AACA,QAAM,MAAgB,CAAC;AACvB,aAAW,QAAQ,KAAK;AACtB,QAAI,OAAO,SAAS,YAAY,CAAC,QAAQ,IAAI,IAAI,GAAG;AAClD,YAAM,IAAI,UAAU,eAAe,0BAA0B,KAAK,QAAQ,OAAO,IAAI,CAAC,MAAM,KAAK,QAAQ;AAAA,IAC3G;AACA,QAAI,KAAK,IAAI;AAAA,EACf;AACA,SAAO;AACT;AAEA,SAAS,eAAe,YAAoC,QAAwB;AAClF,MAAI,WAAW;AACf,MAAI,cAAc;AAClB,aAAW,OAAO,OAAO,YAAY;AACnC,UAAM,MAAM,WAAW,IAAI,EAAE,KAAK;AAClC,UAAM,QAAQ,IAAI,MAAM,IAAI,OAAO;AACnC,UAAM,aAAa,KAAK,IAAI,GAAG,KAAK,IAAI,IAAI,MAAM,IAAI,OAAO,KAAK,CAAC;AACnE,gBAAY,aAAa,IAAI;AAC7B,mBAAe,IAAI;AAAA,EACrB;AACA,SAAO,cAAc,IAAI,WAAW,cAAc;AACpD;AAEA,SAAS,iBAAiB,SAAiB,SAA0B;AACnE,QAAM,MAAM,WAAW,OAAO,KAAK,OAAiB,EAAE,SAAS,KAAK,UAAU,OAAO,IAAI;AACzF,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA,MAAM,yCAAyC;AAAA,IAC/C,MAAM,MAAM;AAAA,EACd,EACG,OAAO,OAAO,EACd,KAAK,IAAI;AACd;AAEA,IAAM,sBAAsB;AAE5B,eAAsB,YAAY,KAAyC;AAEzE,MAAI;AACJ,MAAI,IAAI,YAAY;AAClB,UAAM,QAAQ,iBAAiB,IAAI,UAAU;AAC7C,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,UAAU,oBAAoB,6BAA6B,IAAI,UAAU,MAAM,GAAG;AAAA,IAC9F;AACA,aAAS;AAAA,EACX,WAAW,IAAI,QAAQ;AACrB,aAAS,IAAI;AAAA,EACf,OAAO;AAEL,UAAM,IAAI,UAAU,oBAAoB,4CAA4C,GAAG;AAAA,EACzF;AAEA,QAAM,YAAY,KAAK,IAAI;AAC3B,QAAM,QAAQ,IAAI,SAAS;AAE3B,QAAM,EAAE,OAAO,OAAO,IAAI,MAAM,YAAyB;AAAA,IACvD;AAAA,IACA,UAAU;AAAA,MACR,EAAE,MAAM,UAAU,SAAS,OAAO,aAAa;AAAA,MAC/C,EAAE,MAAM,QAAQ,SAAS,iBAAiB,IAAI,SAAS,IAAI,OAAO,EAAE;AAAA,IACtE;AAAA,IACA,YAAY,kBAAkB,MAAM;AAAA,IACpC,aAAa;AAAA,IACb,WAAW;AAAA,EACb,CAAC;AAED,QAAM,SAAS,oBAAoB,OAAO,MAAM;AAEhD,QAAM,YAAY,eAAe,OAAO,YAAY,MAAM;AAC1D,QAAM,aAAa,KAAK,IAAI,IAAI;AAEhC,SAAO;AAAA,IACL;AAAA,IACA,YAAY,OAAO;AAAA,IACnB,cAAc,OAAO,gBAAgB,CAAC;AAAA,IACtC,MAAM,OAAO,QAAQ,CAAC;AAAA,IACtB,WAAW,OAAO;AAAA,IAClB,eAAe,WAAW,MAAM;AAAA,IAChC,OAAO,OAAO;AAAA,IACd;AAAA,EACF;AACF;AAIO,SAAS,oBAAyC;AACvD,SAAO,EAAE,SAAS,mBAAmB,EAAE;AACzC;AAQA,IAAI;AAEJ,SAAS,qBAA6B;AACpC,MAAI,eAAgB,QAAO;AAG3B,QAAM,OAAO,QAAQ,cAAc,YAAY,GAAG,CAAC;AACnD,QAAM,aAAa;AAAA,IACjB,QAAQ,MAAM,MAAM,MAAM,cAAc;AAAA;AAAA,IACxC,QAAQ,MAAM,MAAM,cAAc;AAAA;AAAA,EACpC;AACA,aAAW,QAAQ,YAAY;AAC7B,QAAI;AACF,YAAM,MAAM,KAAK,MAAM,aAAa,MAAM,OAAO,CAAC;AAClD,UAAI,IAAI,SAAS;AACf,yBAAiB,IAAI;AACrB,eAAO,IAAI;AAAA,MACb;AAAA,IACF,QAAQ;AAAA,IAER;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,gBAAiC;AAC/C,SAAO;AAAA,IACL,SAAS;AAAA,IACT,SAAS,mBAAmB;AAAA,IAC5B,aAAa;AAAA,IACb,YAAY,CAAC,SAAS,eAAe,SAAS;AAAA,EAChD;AACF;;;AC9OA,SAAS,qBAAqB,uBAAuB;AAa9C,SAAS,aAAa,gBAAuC;AAClE,QAAM,WAAW,IAAI,gBAAgB;AAGrC,WAAS,SAAS,gBAAgB,kBAAkB;AACpD,WAAS,SAAS,eAAe,iBAAiB;AAClD,WAAS,SAAS,uBAAuB,yBAAyB;AAClE,WAAS,SAAS,mBAAmB,qBAAqB;AAC1D,WAAS,SAAS,kBAAkB,oBAAoB;AACxD,WAAS,SAAS,iBAAiB,mBAAmB;AAGtD,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,aACE;AAAA,IACF,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,SAAS;AAAA,UACP,oBAAoB,EAAE,QAAQ,mBAAmB;AAAA,QACnD;AAAA,MACF;AAAA,IACF;AAAA,IACA,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,kBAAkB,EAAE;AAAA,MAC/D;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,MACA,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,oBAAoB,EAAE;AAAA,MACjE;AAAA,IACF;AAAA,EACF,CAAC;AAED,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,aACE;AAAA,IACF,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,0BAA0B,EAAE;AAAA,MACvE;AAAA,IACF;AAAA,EACF,CAAC;AAED,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,aAAa;AAAA,IACb,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,sBAAsB,EAAE;AAAA,MACnE;AAAA,IACF;AAAA,EACF,CAAC;AAED,WAAS,aAAa;AAAA,IACpB,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,SAAS;AAAA,IACT,WAAW;AAAA,MACT,KAAK;AAAA,QACH,aAAa;AAAA,QACb,SAAS,EAAE,oBAAoB,EAAE,QAAQ,qBAAqB,EAAE;AAAA,MAClE;AAAA,IACF;AAAA,EACF,CAAC;AAED,QAAM,YAAY,IAAI,oBAAoB,SAAS,WAAW;AAC9D,QAAM,MAAM,UAAU,iBAAiB;AAAA,IACrC,SAAS;AAAA,IACT,MAAM;AAAA,MACJ,OAAO;AAAA,MACP,SAAS;AAAA,MACT,aAAa;AAAA;AAAA,yBAEM,YAAY;AAAA,MAC/B,SAAS,EAAE,MAAM,kBAAkB,KAAK,+CAA+C;AAAA,MACvF,SAAS,EAAE,MAAM,MAAM;AAAA,IACzB;AAAA,IACA,SAAS,CAAC,EAAE,KAAK,yBAAyB,aAAa,yBAAyB,CAAC;AAAA,EACnF,CAAC;AACD,QAAM,YAAY,EAAE,MAAM,8BAA8B;AACxD,QAAM,oBAAoB;AAAA,IACxB,SAAS,EAAE,MAAM,UAAU,WAAW,EAAE;AAAA,IACxC,SAAS,EAAE,MAAM,UAAU,sBAAsB,KAAK;AAAA,IACtD,OAAO,EAAE,MAAM,SAAS;AAAA,EAC1B;AACA,MAAI,eAAe,CAAC;AACpB,MAAI,WAAW,YAAY,CAAC;AAC5B,MAAI,WAAW,QAAQ,eAAe;AAAA,IACpC,OAAO;AAAA,MACL;AAAA,QACE,MAAM;AAAA,QACN,sBAAsB;AAAA,QACtB,UAAU,CAAC,cAAc,SAAS;AAAA,QAClC,YAAY;AAAA,UACV,YAAY,EAAE,MAAM,UAAU,WAAW,EAAE;AAAA,UAC3C,GAAG;AAAA,QACL;AAAA,MACF;AAAA,MACA;AAAA,QACE,MAAM;AAAA,QACN,sBAAsB;AAAA,QACtB,UAAU,CAAC,UAAU,SAAS;AAAA,QAC9B,YAAY;AAAA,UACV,QAAQ;AAAA,UACR,GAAG;AAAA,QACL;AAAA,MACF;AAAA,IACF;AAAA,IACA,aAAa;AAAA,EACf;AACA,SAAO;AACT;;;AC/IA,SAAS,aAA8B;AACvC,SAAS,YAAY;AACrB,SAAS,YAAY;AAWrB,IAAM,aAAa,KAAK,IAAI;AAErB,SAAS,YAAY;AAC1B,QAAM,MAAM,IAAI,KAAK;AAErB,MAAI,IAAI,KAAK,KAAK,CAAC;AAEnB,MAAI,QAAQ,CAAC,KAAK,MAAM;AACtB,QAAI,eAAe,WAAW;AAC5B,aAAO,EAAE;AAAA,QACP,EAAE,OAAO,EAAE,MAAM,IAAI,MAAM,SAAS,IAAI,SAAS,SAAS,IAAI,QAAQ,EAAE;AAAA,QACxE,IAAI;AAAA,MACN;AAAA,IACF;AAEA,YAAQ,MAAM,iCAAiC,GAAG;AAClD,WAAO,EAAE;AAAA,MACP,EAAE,OAAO,EAAE,MAAM,kBAAkB,SAAS,yBAAyB,EAAE;AAAA,MACvE;AAAA,IACF;AAAA,EACF,CAAC;AAGD,MAAI;AAAA,IAAI;AAAA,IAAY,CAAC,MACnB,EAAE,KAAK,EAAE,QAAQ,MAAe,YAAY,KAAK,IAAI,IAAI,cAAc,IAAK,CAAC;AAAA,EAC/E;AAGA,MAAI,IAAI,eAAe,CAAC,MAAM,EAAE,KAAK,cAAc,CAAC,CAAC;AAGrD,MAAI,IAAI,eAAe,CAAC,MAAM,EAAE,KAAK,kBAAkB,CAAC,CAAC;AAGzD,MAAI,KAAK,aAAa,OAAO,MAAM;AACjC,UAAM,MAAM,MAAM,EAAE,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI;AAC/C,QAAI,OAAO,MAAM;AACf,YAAM,IAAI,UAAU,oBAAoB,8BAA8B,GAAG;AAAA,IAC3E;AACA,UAAM,SAAS,mBAAmB,UAAU,GAAG;AAC/C,QAAI,CAAC,OAAO,SAAS;AACnB,YAAM,IAAI;AAAA,QACR;AAAA,QACA;AAAA,QACA;AAAA,QACA,OAAO,MAAM;AAAA,MACf;AAAA,IACF;AACA,UAAM,SAAS,MAAM,YAAY,OAAO,IAAI;AAC5C,WAAO,EAAE,KAAK,MAAM;AAAA,EACtB,CAAC;AAGD,MAAI,IAAI,iBAAiB,CAAC,MAAM,EAAE,KAAK,aAAa,cAAc,EAAE,OAAO,CAAC,CAAC;AAE7E,SAAO;AACT;AASO,SAAS,YAAY,OAAqB,CAAC,GAAe;AAC/D,QAAM,MAAM,UAAU;AACtB,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,OAAO,KAAK,QAAQ;AAC1B,SAAO,MAAM,EAAE,OAAO,IAAI,OAAO,MAAM,UAAU,KAAK,GAAG,CAAC,EAAE,SAAS,MAAM,WAAW,MAAM;AAE1F,YAAQ,IAAI,kCAAkC,OAAO,IAAI,UAAU,EAAE;AAAA,EACvE,CAAC;AACH;;;ACpEA,eAAsB,YAAY,KAAiD;AACjF,MAAI;AACF,YAAQ,IAAI,QAAQ;AAAA,MAClB,KAAK,SAAS;AACZ,cAAM,SAAS,mBAAmB,UAAU,IAAI,MAAM;AACtD,YAAI,CAAC,OAAO,SAAS;AACnB,iBAAO;AAAA,YACL,OAAO;AAAA,cACL,MAAM;AAAA,cACN,SAAS;AAAA,cACT,SAAS,OAAO,MAAM;AAAA,YACxB;AAAA,UACF;AAAA,QACF;AACA,eAAO,EAAE,QAAQ,MAAM,YAAY,OAAO,IAAI,EAAE;AAAA,MAClD;AAAA,MACA,KAAK;AACH,eAAO,EAAE,QAAQ,kBAAkB,EAAE;AAAA,MACvC,KAAK;AACH,eAAO,EAAE,QAAQ,cAAc,EAAE;AAAA,MACnC;AACE,eAAO;AAAA,UACL,OAAO;AAAA,YACL,MAAM;AAAA,YACN,SAAS,mBAAoB,IAA2B,MAAM;AAAA,UAChE;AAAA,QACF;AAAA,IACJ;AAAA,EACF,SAAS,KAAK;AACZ,QAAI,eAAe,WAAW;AAC5B,aAAO,EAAE,OAAO,EAAE,MAAM,IAAI,MAAM,SAAS,IAAI,SAAS,SAAS,IAAI,QAAQ,EAAE;AAAA,IACjF;AACA,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,EAAE,OAAO,EAAE,MAAM,kBAAkB,QAAQ,EAAE;AAAA,EACtD;AACF;AAIA,eAAe,QAAQ,QAAgD;AACrE,QAAM,SAAmB,CAAC;AAC1B,mBAAiB,SAAS,QAAQ;AAChC,WAAO,KAAK,OAAO,SAAS,KAAK,IAAI,QAAQ,OAAO,KAAK,KAAe,CAAC;AAAA,EAC3E;AACA,SAAO,OAAO,OAAO,MAAM,EAAE,SAAS,OAAO;AAC/C;AAGA,eAAsB,WAAW,QAAkC;AACjE,QAAM,MAAM,MAAM,QAAQ,QAAQ,KAAK;AACvC,MAAI;AACJ,MAAI;AACF,UAAM,OAAO,KAAK,MAAM,GAAG;AAC3B,UAAM,SAAS,EAAE,QAAwC,QAAQ,KAAK,IAAK;AAAA,EAC7E,SAAS,KAAK;AACZ,YAAQ,OAAO;AAAA,MACb,KAAK,UAAU;AAAA,QACb,OAAO;AAAA,UACL,MAAM;AAAA,UACN,SAAS,6BAA6B,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,QACxF;AAAA,MACF,CAAC,IAAI;AAAA,IACP;AACA,WAAO;AAAA,EACT;AACA,QAAM,MAAM,MAAM,YAAY,GAAG;AACjC,UAAQ,OAAO,MAAM,KAAK,UAAU,GAAG,IAAI,IAAI;AAC/C,SAAO,WAAW,MAAM,IAAI;AAC9B;AAGA,eAAsB,YAAY,QAAkC;AAClE,QAAM,MAAM,MAAM,QAAQ,QAAQ,KAAK;AACvC,QAAM,QAAQ,IAAI,MAAM,IAAI,EAAE,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC;AAC/D,MAAI,WAAW;AACf,aAAW,QAAQ,OAAO;AACxB,QAAI;AACJ,QAAI;AACF,YAAM,OAAO,KAAK,MAAM,IAAI;AAC5B,YAAM,SAAS,EAAE,QAAwC,QAAQ,KAAK,IAAK;AAAA,IAC7E,SAAS,KAAK;AACZ,cAAQ,OAAO;AAAA,QACb,KAAK,UAAU;AAAA,UACb,OAAO;AAAA,YACL,MAAM;AAAA,YACN,SAAS,4BAA4B,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,UACvF;AAAA,QACF,CAAC,IAAI;AAAA,MACP;AACA,iBAAW;AACX;AAAA,IACF;AACA,UAAM,MAAM,MAAM,YAAY,GAAG;AACjC,YAAQ,OAAO,MAAM,KAAK,UAAU,GAAG,IAAI,IAAI;AAC/C,QAAI,WAAW,IAAK,YAAW;AAAA,EACjC;AACA,SAAO;AACT;","names":[]}
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/pre-registration.ts"],"sourcesContent":["/**\n * Pre-registered hypotheses — declare what you're testing BEFORE the\n * run, check it AFTER. Prevents p-hacking, optional stopping, and the\n * \"we ran until it looked good\" failure mode.\n *\n * Manifest is a plain JSON-friendly object. Sign it with a content hash\n * + timestamp; the registered record becomes immutable. Post-run,\n * evaluate the manifest against observed results — the library refuses\n * to let you re-interpret a different metric as the declared one.\n */\n\nexport interface HypothesisManifest {\n id: string\n /** Human prose — goes into the audit trail. */\n hypothesis: string\n /** Metric the hypothesis claims to move. */\n metric: string\n /** 'increase' = candidate should score higher than baseline; 'decrease' = lower. */\n direction: 'increase' | 'decrease'\n /** Minimum effect size to count (same units as the metric). */\n minEffect: number\n /** Alpha threshold. */\n alpha: number\n /** Target statistical power at which sample size was pre-computed. */\n power: number\n /** Declared N per arm before running. */\n preRegisteredN: number\n /** ISO8601 timestamp the manifest was registered. */\n registeredAt: string\n /** Optional identifiers to tie into the trace corpus. */\n baselineLabel?: string\n candidateLabel?: string\n}\n\n/**\n * Identifier for the hashing scheme used to produce `contentHash`.\n *\n * `'sha256-content'` — sha256 hex over the canonicalized manifest with\n * the `contentHash` and `algo` fields stripped. This is what\n * `signManifest` produces today.\n *\n * Held as a string union so future schemes can be added without\n * breaking parsers; legacy SignedManifest values written before this\n * field existed will deserialize cleanly because the field is optional.\n */\nexport type SignedManifestAlgo = 'sha256-content'\n\nexport interface SignedManifest extends HypothesisManifest {\n /** sha256 hex of canonicalized manifest (everything except contentHash and algo). */\n contentHash: string\n /**\n * Algorithm string describing how `contentHash` was produced.\n *\n * Optional on the type so legacy serialized manifests (pre-`algo`)\n * still parse, but ALWAYS populated by {@link signManifest}.\n * Consumers that want to enforce a known algorithm should reject\n * manifests where this field is missing or unrecognized.\n */\n algo?: SignedManifestAlgo\n}\n\nexport interface HypothesisResult {\n manifest: SignedManifest\n observedN: number\n observedEffect: number\n observedPValue: number\n /** True iff the observed effect hits the pre-declared direction with\n * magnitude ≥ minEffect AND p < alpha. */\n confirmed: boolean\n /** Enumerated reasons the hypothesis was rejected (each a machine-tag). */\n rejectionReasons: Array<'wrong_direction' | 'effect_too_small' | 'not_significant' | 'undersampled'>\n notes?: string\n}\n\n/**\n * Deterministic JSON canonicalization — sort object keys recursively.\n *\n * Two semantically-equal objects produce byte-identical canonicalized output;\n * this is what makes a content-hash stable across encoders, key insertion\n * orders, and runtime versions. Exported for any consumer that needs the same\n * canonicalization guarantee outside the manifest-signing path (e.g., signing\n * an artifact bundle, hashing a dataset version, etc.).\n */\nexport function canonicalize(v: unknown): unknown {\n if (v === null || typeof v !== 'object') return v\n if (Array.isArray(v)) return v.map(canonicalize)\n const keys = Object.keys(v as Record<string, unknown>).sort()\n const out: Record<string, unknown> = {}\n for (const k of keys) out[k] = canonicalize((v as Record<string, unknown>)[k])\n return out\n}\n\n/**\n * SHA-256 hex (full 64 chars) over the canonicalized JSON encoding of `obj`.\n *\n * The same primitive `signManifest` and `verifyManifest` are built on, exposed\n * directly so consumers signing arbitrary structured content (artifact bundles,\n * production packets, dataset manifests, etc.) don't have to re-derive\n * canonicalize+sha256 from scratch.\n *\n * Stable across:\n * - object key insertion order (canonicalization sorts keys recursively)\n * - encoder choice (UTF-8 via TextEncoder, fixed)\n * - runtime (uses the Web Crypto subtle digest, present in Node ≥18 and browsers)\n *\n * Naming note: `hashJson` rather than `hashContent` because `hashContent` is\n * already taken in `prompt-registry.ts` for the truncated 12-char prompt-id\n * helper, which has different semantics (string input, short return). Both\n * coexist; `hashJson` is the right name when you mean \"canonicalize then hash.\"\n *\n * @example\n * const hash = await hashJson({ id: '1', kind: 'spec' })\n * // 'a3f1...' (64 hex chars)\n */\nexport async function hashJson<T>(obj: T): Promise<string> {\n const canonical = canonicalize(obj)\n const bytes = new TextEncoder().encode(JSON.stringify(canonical))\n const digest = await globalThis.crypto.subtle.digest('SHA-256', bytes)\n return Array.from(new Uint8Array(digest))\n .map((b) => b.toString(16).padStart(2, '0'))\n .join('')\n}\n\n/**\n * Sign a manifest with a SHA-256 content hash.\n *\n * The hash covers the canonicalized manifest with the `contentHash`\n * and `algo` fields stripped; this lets verifiers re-sign the rest and\n * compare. Returned manifest always carries `algo: 'sha256-content'`\n * so downstream consumers can identify the scheme; legacy serialized\n * manifests without `algo` still verify because it is stripped before\n * hashing on both sides.\n */\nexport async function signManifest(m: HypothesisManifest): Promise<SignedManifest> {\n const hash = await hashJson(m)\n return { ...m, contentHash: hash, algo: 'sha256-content' }\n}\n\n/**\n * Verify that a signed manifest has not been tampered with.\n *\n * Strips `contentHash` and `algo` before re-signing so legacy manifests\n * (written before `algo` was emitted) verify identically to current\n * ones.\n */\nexport async function verifyManifest(m: SignedManifest): Promise<boolean> {\n const { contentHash, algo: _algo, ...rest } = m\n void _algo\n const resigned = await signManifest(rest)\n return resigned.contentHash === contentHash\n}\n\n/**\n * Evaluate a pre-registered hypothesis against observed results.\n * Mechanical — no re-interpretation permitted.\n */\nexport async function evaluateHypothesis(\n manifest: SignedManifest,\n observed: { n: number; effect: number; pValue: number },\n): Promise<HypothesisResult> {\n if (!(await verifyManifest(manifest))) {\n throw new Error('evaluateHypothesis: manifest content hash mismatch (tampered)')\n }\n const reasons: HypothesisResult['rejectionReasons'] = []\n const directionOk =\n manifest.direction === 'increase' ? observed.effect > 0 : observed.effect < 0\n if (!directionOk) reasons.push('wrong_direction')\n if (Math.abs(observed.effect) < manifest.minEffect) reasons.push('effect_too_small')\n if (observed.pValue >= manifest.alpha) reasons.push('not_significant')\n if (observed.n < manifest.preRegisteredN) reasons.push('undersampled')\n return {\n manifest,\n observedN: observed.n,\n observedEffect: observed.effect,\n observedPValue: observed.pValue,\n confirmed: reasons.length === 0,\n rejectionReasons: reasons,\n }\n}\n"],"mappings":";AAmFO,SAAS,aAAa,GAAqB;AAChD,MAAI,MAAM,QAAQ,OAAO,MAAM,SAAU,QAAO;AAChD,MAAI,MAAM,QAAQ,CAAC,EAAG,QAAO,EAAE,IAAI,YAAY;AAC/C,QAAM,OAAO,OAAO,KAAK,CAA4B,EAAE,KAAK;AAC5D,QAAM,MAA+B,CAAC;AACtC,aAAW,KAAK,KAAM,KAAI,CAAC,IAAI,aAAc,EAA8B,CAAC,CAAC;AAC7E,SAAO;AACT;AAwBA,eAAsB,SAAY,KAAyB;AACzD,QAAM,YAAY,aAAa,GAAG;AAClC,QAAM,QAAQ,IAAI,YAAY,EAAE,OAAO,KAAK,UAAU,SAAS,CAAC;AAChE,QAAM,SAAS,MAAM,WAAW,OAAO,OAAO,OAAO,WAAW,KAAK;AACrE,SAAO,MAAM,KAAK,IAAI,WAAW,MAAM,CAAC,EACrC,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC,EAC1C,KAAK,EAAE;AACZ;AAYA,eAAsB,aAAa,GAAgD;AACjF,QAAM,OAAO,MAAM,SAAS,CAAC;AAC7B,SAAO,EAAE,GAAG,GAAG,aAAa,MAAM,MAAM,iBAAiB;AAC3D;AASA,eAAsB,eAAe,GAAqC;AACxE,QAAM,EAAE,aAAa,MAAM,OAAO,GAAG,KAAK,IAAI;AAC9C,OAAK;AACL,QAAM,WAAW,MAAM,aAAa,IAAI;AACxC,SAAO,SAAS,gBAAgB;AAClC;AAMA,eAAsB,mBACpB,UACA,UAC2B;AAC3B,MAAI,CAAE,MAAM,eAAe,QAAQ,GAAI;AACrC,UAAM,IAAI,MAAM,+DAA+D;AAAA,EACjF;AACA,QAAM,UAAgD,CAAC;AACvD,QAAM,cACJ,SAAS,cAAc,aAAa,SAAS,SAAS,IAAI,SAAS,SAAS;AAC9E,MAAI,CAAC,YAAa,SAAQ,KAAK,iBAAiB;AAChD,MAAI,KAAK,IAAI,SAAS,MAAM,IAAI,SAAS,UAAW,SAAQ,KAAK,kBAAkB;AACnF,MAAI,SAAS,UAAU,SAAS,MAAO,SAAQ,KAAK,iBAAiB;AACrE,MAAI,SAAS,IAAI,SAAS,eAAgB,SAAQ,KAAK,cAAc;AACrE,SAAO;AAAA,IACL;AAAA,IACA,WAAW,SAAS;AAAA,IACpB,gBAAgB,SAAS;AAAA,IACzB,gBAAgB,SAAS;AAAA,IACzB,WAAW,QAAQ,WAAW;AAAA,IAC9B,kBAAkB;AAAA,EACpB;AACF;","names":[]}
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/release-confidence.ts","../src/release-report.ts","../src/promotion-gate.ts"],"sourcesContent":["/**\n * Release confidence gate.\n *\n * This is the production-facing composition layer over the lower-level\n * primitives:\n * - Dataset manifests prove corpus/version coverage.\n * - RunRecord rows prove reproducible search/holdout outcomes.\n * - Multi-shot trace evidence carries turn counts and ASI diagnostics.\n * - HeldOutGate decisions remain the paired promotion authority.\n *\n * The gate is intentionally pure and conservative. Missing declared evidence\n * fails closed instead of being treated as a neutral zero.\n */\n\nimport type { DatasetManifest, DatasetScenario, DatasetSplit } from './dataset'\nimport type { GateDecision } from './held-out-gate'\nimport type { ActionableSideInfo, MultiShotTrialResult } from './multi-shot-optimization'\nimport type { RunRecord, RunSplitTag } from './run-record'\n\nexport type ReleaseConfidenceStatus = 'pass' | 'warn' | 'fail'\nexport type ReleaseConfidenceAxisName =\n | 'corpus'\n | 'quality'\n | 'generalization'\n | 'diagnostics'\n | 'efficiency'\n\nexport interface ReleaseTraceEvidence {\n scenarioId: string\n candidateId?: string\n split?: RunSplitTag\n score?: number\n ok?: boolean\n turnCount?: number\n costUsd?: number\n durationMs?: number\n failureMode?: string\n asi?: ActionableSideInfo[]\n metadata?: Record<string, unknown>\n}\n\nexport interface ReleaseConfidenceThresholds {\n /** Require a Dataset manifest or explicit scenarios. Default true. */\n requireCorpus?: boolean\n minScenarioCount?: number\n minSearchRuns?: number\n minHoldoutRuns?: number\n /** Require at least one holdout scenario/run. Default true. */\n requireHoldout?: boolean\n minPassRate?: number\n minMeanScore?: number\n /** Search mean may exceed holdout mean by at most this much. */\n maxOverfitGap?: number\n maxMeanCostUsd?: number\n maxP95WallMs?: number\n /** Low-score/failed rows must carry ASI. Default true. */\n requireAsiForFailures?: boolean\n /** Score below this is considered a failure for ASI coverage. Default 0.5. */\n failureScoreThreshold?: number\n}\n\nexport interface ReleaseConfidenceInput {\n target: string\n candidateId?: string\n baselineId?: string\n dataset?: DatasetManifest\n scenarios?: readonly DatasetScenario[]\n runs?: readonly RunRecord[]\n traces?: readonly ReleaseTraceEvidence[]\n gateDecision?: GateDecision | null\n thresholds?: ReleaseConfidenceThresholds\n}\n\nexport interface ReleaseConfidenceAxis {\n name: ReleaseConfidenceAxisName\n status: ReleaseConfidenceStatus\n score: number\n detail: string\n}\n\nexport interface ReleaseConfidenceIssue {\n axis: ReleaseConfidenceAxisName\n severity: 'critical' | 'warning'\n code: string\n detail: string\n}\n\nexport interface ReleaseConfidenceMetrics {\n scenarioCount: number\n searchRuns: number\n holdoutRuns: number\n passRate: number\n meanScore: number\n searchMeanScore: number\n holdoutMeanScore: number\n overfitGap: number\n meanCostUsd: number\n p95WallMs: number\n failedRows: number\n failuresWithAsi: number\n singleShotTraces: number\n multiShotTraces: number\n splitCounts: Record<DatasetSplit, number>\n domainCounts: Record<string, number>\n failureModeCounts: Record<string, number>\n responsibleSurfaceCounts: Record<string, number>\n}\n\nexport interface ReleaseConfidenceScorecard {\n target: string\n candidateId: string | null\n baselineId: string | null\n status: ReleaseConfidenceStatus\n promote: boolean\n axes: ReleaseConfidenceAxis[]\n issues: ReleaseConfidenceIssue[]\n metrics: ReleaseConfidenceMetrics\n dataset: DatasetManifest | null\n gateDecision: GateDecision | null\n summary: string\n}\n\nconst DEFAULT_THRESHOLDS: Required<ReleaseConfidenceThresholds> = {\n requireCorpus: true,\n minScenarioCount: 1,\n minSearchRuns: 1,\n minHoldoutRuns: 1,\n requireHoldout: true,\n minPassRate: 0.8,\n minMeanScore: 0.7,\n maxOverfitGap: 0.15,\n maxMeanCostUsd: Number.POSITIVE_INFINITY,\n maxP95WallMs: Number.POSITIVE_INFINITY,\n requireAsiForFailures: true,\n failureScoreThreshold: 0.5,\n}\n\nexport function releaseTraceEvidenceFromMultiShotTrials(\n trials: readonly MultiShotTrialResult[],\n): ReleaseTraceEvidence[] {\n return trials.map((trial) => ({\n scenarioId: trial.scenarioId,\n candidateId: trial.variantId,\n split: trial.split === 'holdout' ? 'holdout' : trial.split === 'dev' ? 'dev' : 'search',\n score: trial.score,\n ok: trial.ok,\n turnCount: Array.isArray(trial.trace?.turns) ? trial.trace.turns.length : undefined,\n costUsd: trial.cost,\n durationMs: trial.durationMs,\n failureMode: trial.error ? 'runtime_error' : undefined,\n asi: trial.asi,\n metadata: trial.metadata,\n }))\n}\n\nexport function evaluateReleaseConfidence(input: ReleaseConfidenceInput): ReleaseConfidenceScorecard {\n const thresholds = { ...DEFAULT_THRESHOLDS, ...input.thresholds }\n const candidateId = input.candidateId ?? null\n const runs = filterCandidate(input.runs ?? [], candidateId, input.baselineId)\n const traces = filterTraceCandidate(input.traces ?? [], candidateId, input.baselineId)\n const scenarios = input.scenarios ?? []\n const scenarioCount = input.dataset?.scenarioCount ?? scenarios.length\n const splitCounts = input.dataset?.splitCounts ?? countScenarioSplits(scenarios)\n const searchScores = scoresFor(runs, 'search')\n const holdoutScores = scoresFor(runs, 'holdout')\n const allScores = [...searchScores, ...holdoutScores]\n const traceScores = traces.map((t) => t.score).filter(isFiniteNumber)\n const scoreUniverse = allScores.length > 0 ? allScores : traceScores\n const searchRuns = runs.filter((r) => r.splitTag === 'search').length\n const holdoutRuns = runs.filter((r) => r.splitTag === 'holdout').length\n const searchMeanScore = mean(searchScores)\n const holdoutMeanScore = mean(holdoutScores)\n const metrics: ReleaseConfidenceMetrics = {\n scenarioCount,\n searchRuns,\n holdoutRuns,\n passRate: passRate(runs, traces, thresholds.failureScoreThreshold),\n meanScore: mean(scoreUniverse),\n searchMeanScore,\n holdoutMeanScore,\n overfitGap: safeDiff(searchMeanScore, holdoutMeanScore),\n meanCostUsd: mean([...runs.map((r) => r.costUsd), ...traces.map((t) => t.costUsd).filter(isFiniteNumber)]),\n p95WallMs: percentile([...runs.map((r) => r.wallMs), ...traces.map((t) => t.durationMs).filter(isFiniteNumber)], 0.95),\n failedRows: failedRows(runs, traces, thresholds.failureScoreThreshold).length,\n failuresWithAsi: failedRows(runs, traces, thresholds.failureScoreThreshold).filter((row) => row.hasAsi).length,\n singleShotTraces: traces.filter((t) => t.turnCount === 1).length,\n multiShotTraces: traces.filter((t) => (t.turnCount ?? 0) > 1).length,\n splitCounts,\n domainCounts: countDomains(scenarios),\n failureModeCounts: countFailureModes(runs, traces, thresholds.failureScoreThreshold),\n responsibleSurfaceCounts: countResponsibleSurfaces(traces),\n }\n\n const issues: ReleaseConfidenceIssue[] = []\n checkCorpus(input, thresholds, metrics, issues)\n checkQuality(thresholds, metrics, issues)\n checkGeneralization(input.gateDecision ?? null, thresholds, metrics, issues)\n checkDiagnostics(thresholds, metrics, issues)\n checkEfficiency(thresholds, metrics, issues)\n\n const axes = buildAxes(metrics, thresholds, input.gateDecision ?? null, issues)\n const status = issues.some((i) => i.severity === 'critical') ? 'fail'\n : issues.length > 0 ? 'warn'\n : 'pass'\n\n return {\n target: input.target,\n candidateId,\n baselineId: input.baselineId ?? null,\n status,\n promote: status === 'pass' && (input.gateDecision ? input.gateDecision.promote : true),\n axes,\n issues,\n metrics,\n dataset: input.dataset ?? null,\n gateDecision: input.gateDecision ?? null,\n summary: renderSummary(input.target, status, metrics, issues),\n }\n}\n\nexport function assertReleaseConfidence(input: ReleaseConfidenceInput): ReleaseConfidenceScorecard {\n const scorecard = evaluateReleaseConfidence(input)\n if (scorecard.status === 'fail') {\n throw new Error(scorecard.summary)\n }\n return scorecard\n}\n\nfunction filterCandidate(\n runs: readonly RunRecord[],\n candidateId: string | null,\n baselineId?: string,\n): RunRecord[] {\n if (candidateId) return runs.filter((r) => r.candidateId === candidateId)\n if (baselineId) return runs.filter((r) => r.candidateId !== baselineId)\n return [...runs]\n}\n\nfunction filterTraceCandidate(\n traces: readonly ReleaseTraceEvidence[],\n candidateId: string | null,\n baselineId?: string,\n): ReleaseTraceEvidence[] {\n if (candidateId) return traces.filter((t) => t.candidateId === undefined || t.candidateId === candidateId)\n if (baselineId) return traces.filter((t) => t.candidateId === undefined || t.candidateId !== baselineId)\n return [...traces]\n}\n\nfunction checkCorpus(\n input: ReleaseConfidenceInput,\n thresholds: Required<ReleaseConfidenceThresholds>,\n metrics: ReleaseConfidenceMetrics,\n issues: ReleaseConfidenceIssue[],\n): void {\n if (thresholds.requireCorpus && !input.dataset && (input.scenarios?.length ?? 0) === 0) {\n issues.push({ axis: 'corpus', severity: 'critical', code: 'missing_corpus', detail: 'No Dataset manifest or scenarios supplied.' })\n }\n if (metrics.scenarioCount < thresholds.minScenarioCount) {\n issues.push({ axis: 'corpus', severity: 'critical', code: 'few_scenarios', detail: `${metrics.scenarioCount} scenario(s) < min ${thresholds.minScenarioCount}.` })\n }\n if (thresholds.requireHoldout && metrics.splitCounts.holdout === 0) {\n issues.push({ axis: 'corpus', severity: 'critical', code: 'missing_holdout_split', detail: 'Corpus has no holdout scenarios.' })\n }\n}\n\nfunction checkQuality(\n thresholds: Required<ReleaseConfidenceThresholds>,\n metrics: ReleaseConfidenceMetrics,\n issues: ReleaseConfidenceIssue[],\n): void {\n if (metrics.searchRuns < thresholds.minSearchRuns) {\n issues.push({ axis: 'quality', severity: 'critical', code: 'few_search_runs', detail: `${metrics.searchRuns} search run(s) < min ${thresholds.minSearchRuns}.` })\n }\n if (metrics.passRate < thresholds.minPassRate) {\n issues.push({ axis: 'quality', severity: 'critical', code: 'low_pass_rate', detail: `passRate ${fmt(metrics.passRate)} < ${fmt(thresholds.minPassRate)}.` })\n }\n if (metrics.meanScore < thresholds.minMeanScore) {\n issues.push({ axis: 'quality', severity: 'critical', code: 'low_mean_score', detail: `meanScore ${fmt(metrics.meanScore)} < ${fmt(thresholds.minMeanScore)}.` })\n }\n}\n\nfunction checkGeneralization(\n gateDecision: GateDecision | null,\n thresholds: Required<ReleaseConfidenceThresholds>,\n metrics: ReleaseConfidenceMetrics,\n issues: ReleaseConfidenceIssue[],\n): void {\n if (thresholds.requireHoldout && metrics.holdoutRuns < thresholds.minHoldoutRuns) {\n issues.push({ axis: 'generalization', severity: 'critical', code: 'few_holdout_runs', detail: `${metrics.holdoutRuns} holdout run(s) < min ${thresholds.minHoldoutRuns}.` })\n }\n if (Number.isFinite(metrics.overfitGap) && metrics.overfitGap > thresholds.maxOverfitGap) {\n issues.push({ axis: 'generalization', severity: 'critical', code: 'overfit_gap', detail: `search-holdout gap ${fmt(metrics.overfitGap)} > ${fmt(thresholds.maxOverfitGap)}.` })\n }\n if (gateDecision && !gateDecision.promote) {\n issues.push({ axis: 'generalization', severity: 'critical', code: `gate_${gateDecision.rejectionCode ?? 'reject'}`, detail: gateDecision.reason })\n }\n}\n\nfunction checkDiagnostics(\n thresholds: Required<ReleaseConfidenceThresholds>,\n metrics: ReleaseConfidenceMetrics,\n issues: ReleaseConfidenceIssue[],\n): void {\n if (!thresholds.requireAsiForFailures) return\n if (metrics.failedRows > metrics.failuresWithAsi) {\n issues.push({\n axis: 'diagnostics',\n severity: 'critical',\n code: 'missing_failure_asi',\n detail: `${metrics.failedRows - metrics.failuresWithAsi} failed row(s) have no actionable side information.`,\n })\n }\n}\n\nfunction checkEfficiency(\n thresholds: Required<ReleaseConfidenceThresholds>,\n metrics: ReleaseConfidenceMetrics,\n issues: ReleaseConfidenceIssue[],\n): void {\n if (metrics.meanCostUsd > thresholds.maxMeanCostUsd) {\n issues.push({ axis: 'efficiency', severity: 'critical', code: 'cost_budget', detail: `meanCostUsd ${fmt(metrics.meanCostUsd)} > ${fmt(thresholds.maxMeanCostUsd)}.` })\n }\n if (metrics.p95WallMs > thresholds.maxP95WallMs) {\n issues.push({ axis: 'efficiency', severity: 'critical', code: 'latency_budget', detail: `p95WallMs ${fmt(metrics.p95WallMs)} > ${fmt(thresholds.maxP95WallMs)}.` })\n }\n}\n\nfunction buildAxes(\n metrics: ReleaseConfidenceMetrics,\n thresholds: Required<ReleaseConfidenceThresholds>,\n gateDecision: GateDecision | null,\n issues: ReleaseConfidenceIssue[],\n): ReleaseConfidenceAxis[] {\n return [\n axis('corpus', issues, bounded(metrics.scenarioCount / Math.max(1, thresholds.minScenarioCount)), `${metrics.scenarioCount} scenarios; holdout=${metrics.splitCounts.holdout}`),\n axis('quality', issues, Math.min(metrics.passRate, metrics.meanScore), `passRate=${fmt(metrics.passRate)} meanScore=${fmt(metrics.meanScore)}`),\n axis('generalization', issues, gateDecision && !gateDecision.promote ? 0 : gapScore(metrics.overfitGap, thresholds.maxOverfitGap), `holdoutRuns=${metrics.holdoutRuns} overfitGap=${fmt(metrics.overfitGap)}`),\n axis('diagnostics', issues, metrics.failedRows === 0 ? 1 : metrics.failuresWithAsi / metrics.failedRows, `failuresWithAsi=${metrics.failuresWithAsi}/${metrics.failedRows}`),\n axis('efficiency', issues, efficiencyScore(metrics, thresholds), `meanCostUsd=${fmt(metrics.meanCostUsd)} p95WallMs=${fmt(metrics.p95WallMs)}`),\n ]\n}\n\nfunction axis(\n name: ReleaseConfidenceAxisName,\n issues: ReleaseConfidenceIssue[],\n score: number,\n detail: string,\n): ReleaseConfidenceAxis {\n const own = issues.filter((i) => i.axis === name)\n const status = own.some((i) => i.severity === 'critical') ? 'fail'\n : own.length > 0 ? 'warn'\n : 'pass'\n return { name, status, score: bounded(score), detail }\n}\n\nfunction countScenarioSplits(scenarios: readonly DatasetScenario[]): Record<DatasetSplit, number> {\n const counts: Record<DatasetSplit, number> = { train: 0, dev: 0, test: 0, holdout: 0 }\n for (const scenario of scenarios) counts[scenario.split ?? 'train']++\n return counts\n}\n\nfunction countDomains(scenarios: readonly DatasetScenario[]): Record<string, number> {\n const out: Record<string, number> = {}\n for (const scenario of scenarios) {\n const domain = scenario.tags?.domain ?? scenario.tags?.category ?? 'uncategorized'\n out[domain] = (out[domain] ?? 0) + 1\n }\n return out\n}\n\nfunction countFailureModes(\n runs: readonly RunRecord[],\n traces: readonly ReleaseTraceEvidence[],\n threshold: number,\n): Record<string, number> {\n const out: Record<string, number> = {}\n for (const run of runs) {\n const score = run.outcome.holdoutScore ?? run.outcome.searchScore\n if (run.failureMode || (score !== undefined && score < threshold)) {\n const mode = run.failureMode ?? 'low_score'\n out[mode] = (out[mode] ?? 0) + 1\n }\n }\n for (const trace of traces) {\n if (trace.failureMode || trace.ok === false || (trace.score !== undefined && trace.score < threshold)) {\n const mode = trace.failureMode ?? (trace.ok === false ? 'not_ok' : 'low_score')\n out[mode] = (out[mode] ?? 0) + 1\n }\n }\n return out\n}\n\nfunction countResponsibleSurfaces(traces: readonly ReleaseTraceEvidence[]): Record<string, number> {\n const out: Record<string, number> = {}\n for (const trace of traces) {\n for (const asi of trace.asi ?? []) {\n const surface = asi.responsibleSurface ?? 'unknown'\n out[surface] = (out[surface] ?? 0) + 1\n }\n }\n return out\n}\n\nfunction failedRows(\n runs: readonly RunRecord[],\n traces: readonly ReleaseTraceEvidence[],\n threshold: number,\n): Array<{ hasAsi: boolean }> {\n const out: Array<{ hasAsi: boolean }> = []\n for (const run of runs) {\n const score = run.outcome.holdoutScore ?? run.outcome.searchScore\n if (run.failureMode || (score !== undefined && score < threshold)) {\n const asiMetric = run.outcome.raw.asi\n out.push({ hasAsi: typeof asiMetric === 'number' && asiMetric > 0 })\n }\n }\n for (const trace of traces) {\n if (trace.failureMode || trace.ok === false || (trace.score !== undefined && trace.score < threshold)) {\n out.push({ hasAsi: (trace.asi?.length ?? 0) > 0 })\n }\n }\n return out\n}\n\nfunction passRate(\n runs: readonly RunRecord[],\n traces: readonly ReleaseTraceEvidence[],\n threshold: number,\n): number {\n const outcomes = [\n ...runs.map((run) => {\n const score = run.outcome.holdoutScore ?? run.outcome.searchScore\n return !run.failureMode && score !== undefined && score >= threshold\n }),\n ...traces.map((trace) => trace.ok !== false && (trace.score === undefined || trace.score >= threshold)),\n ]\n if (outcomes.length === 0) return 0\n return outcomes.filter(Boolean).length / outcomes.length\n}\n\nfunction scoresFor(runs: readonly RunRecord[], split: RunSplitTag): number[] {\n return runs\n .filter((run) => run.splitTag === split)\n .map((run) => split === 'holdout' ? run.outcome.holdoutScore : run.outcome.searchScore)\n .filter(isFiniteNumber)\n}\n\nfunction mean(xs: readonly number[]): number {\n if (xs.length === 0) return Number.NaN\n return xs.reduce((sum, x) => sum + x, 0) / xs.length\n}\n\nfunction percentile(xs: readonly number[], p: number): number {\n if (xs.length === 0) return Number.NaN\n const sorted = [...xs].sort((a, b) => a - b)\n return sorted[Math.min(sorted.length - 1, Math.max(0, Math.ceil(p * sorted.length) - 1))]!\n}\n\nfunction isFiniteNumber(value: unknown): value is number {\n return typeof value === 'number' && Number.isFinite(value)\n}\n\nfunction safeDiff(a: number, b: number): number {\n if (!Number.isFinite(a) || !Number.isFinite(b)) return Number.NaN\n return a - b\n}\n\nfunction gapScore(gap: number, maxGap: number): number {\n if (!Number.isFinite(gap)) return 0\n if (maxGap <= 0) return gap <= 0 ? 1 : 0\n return bounded(1 - Math.max(0, gap) / maxGap)\n}\n\nfunction efficiencyScore(\n metrics: ReleaseConfidenceMetrics,\n thresholds: Required<ReleaseConfidenceThresholds>,\n): number {\n const cost = Number.isFinite(thresholds.maxMeanCostUsd) && Number.isFinite(metrics.meanCostUsd)\n ? bounded(thresholds.maxMeanCostUsd / Math.max(metrics.meanCostUsd, 1e-12))\n : 1\n const latency = Number.isFinite(thresholds.maxP95WallMs) && Number.isFinite(metrics.p95WallMs)\n ? bounded(thresholds.maxP95WallMs / Math.max(metrics.p95WallMs, 1e-12))\n : 1\n return Math.min(cost, latency)\n}\n\nfunction bounded(x: number): number {\n if (!Number.isFinite(x)) return 0\n return Math.max(0, Math.min(1, x))\n}\n\nfunction renderSummary(\n target: string,\n status: ReleaseConfidenceStatus,\n metrics: ReleaseConfidenceMetrics,\n issues: ReleaseConfidenceIssue[],\n): string {\n const prefix = `release confidence ${status}: ${target}`\n const metricText = `scenarios=${metrics.scenarioCount} searchRuns=${metrics.searchRuns} holdoutRuns=${metrics.holdoutRuns} passRate=${fmt(metrics.passRate)} meanScore=${fmt(metrics.meanScore)}`\n if (issues.length === 0) return `${prefix}; ${metricText}`\n return `${prefix}; ${metricText}; issues=${issues.map((i) => i.code).join(',')}`\n}\n\nfunction fmt(x: number): string {\n if (!Number.isFinite(x)) return String(x)\n return x.toFixed(4)\n}\n","import type { ReleaseConfidenceScorecard } from './release-confidence'\nimport { summaryTable } from './summary-report'\nimport type { RunRecord } from './run-record'\n\nexport interface RenderReleaseReportOptions {\n title?: string\n runs?: readonly RunRecord[]\n comparator?: string\n traceAnalystFindings?: readonly string[]\n nextActions?: readonly string[]\n}\n\nexport function renderReleaseReport(\n scorecard: ReleaseConfidenceScorecard,\n options: RenderReleaseReportOptions = {},\n): string {\n const title = options.title ?? `Release Report: ${scorecard.target}`\n const lines: string[] = []\n lines.push(`# ${title}`)\n lines.push('')\n lines.push(`Status: **${scorecard.status.toUpperCase()}**`)\n lines.push(`Promote: **${scorecard.promote ? 'yes' : 'no'}**`)\n if (scorecard.candidateId) lines.push(`Candidate: \\`${scorecard.candidateId}\\``)\n if (scorecard.baselineId) lines.push(`Baseline: \\`${scorecard.baselineId}\\``)\n lines.push('')\n lines.push(scorecard.summary)\n lines.push('')\n\n lines.push('## Metrics')\n lines.push('')\n lines.push('| Metric | Value |')\n lines.push('|---|---:|')\n lines.push(`| Scenarios | ${scorecard.metrics.scenarioCount} |`)\n lines.push(`| Search runs | ${scorecard.metrics.searchRuns} |`)\n lines.push(`| Holdout runs | ${scorecard.metrics.holdoutRuns} |`)\n lines.push(`| Pass rate | ${pct(scorecard.metrics.passRate)} |`)\n lines.push(`| Mean score | ${num(scorecard.metrics.meanScore)} |`)\n lines.push(`| Search mean | ${num(scorecard.metrics.searchMeanScore)} |`)\n lines.push(`| Holdout mean | ${num(scorecard.metrics.holdoutMeanScore)} |`)\n lines.push(`| Overfit gap | ${num(scorecard.metrics.overfitGap)} |`)\n lines.push(`| Mean cost | $${num(scorecard.metrics.meanCostUsd)} |`)\n lines.push(`| p95 wall time | ${Math.round(scorecard.metrics.p95WallMs)} ms |`)\n lines.push('')\n\n if (scorecard.issues.length > 0) {\n lines.push('## Issues')\n lines.push('')\n for (const issue of scorecard.issues) {\n lines.push(`- **${issue.severity}** \\`${issue.code}\\` (${issue.axis}): ${issue.detail}`)\n }\n lines.push('')\n }\n\n const surfaces = entries(scorecard.metrics.responsibleSurfaceCounts)\n if (surfaces.length > 0) {\n lines.push('## Responsible Surfaces')\n lines.push('')\n for (const [surface, count] of surfaces) lines.push(`- ${surface}: ${count}`)\n lines.push('')\n }\n\n const failures = entries(scorecard.metrics.failureModeCounts)\n if (failures.length > 0) {\n lines.push('## Failure Modes')\n lines.push('')\n for (const [mode, count] of failures) lines.push(`- ${mode}: ${count}`)\n lines.push('')\n }\n\n if (options.runs && options.runs.length > 0) {\n lines.push('## Run Summary')\n lines.push('')\n lines.push(summaryTable([...options.runs], {\n comparator: options.comparator ?? scorecard.baselineId ?? undefined,\n split: 'holdout',\n }).markdown)\n lines.push('')\n }\n\n if (options.traceAnalystFindings && options.traceAnalystFindings.length > 0) {\n lines.push('## TraceAnalyst Findings')\n lines.push('')\n for (const finding of options.traceAnalystFindings) lines.push(`- ${finding}`)\n lines.push('')\n }\n\n const nextActions = options.nextActions ?? defaultNextActions(scorecard)\n if (nextActions.length > 0) {\n lines.push('## Next Actions')\n lines.push('')\n for (const action of nextActions) lines.push(`- ${action}`)\n lines.push('')\n }\n\n return lines.join('\\n').trimEnd() + '\\n'\n}\n\nfunction defaultNextActions(scorecard: ReleaseConfidenceScorecard): string[] {\n if (scorecard.promote) return ['Promote the candidate and keep canaries enabled.']\n return scorecard.issues\n .filter((issue) => issue.severity === 'critical')\n .map((issue) => `Resolve ${issue.code}: ${issue.detail}`)\n}\n\nfunction entries(values: Record<string, number>): Array<[string, number]> {\n return Object.entries(values)\n .filter(([, count]) => count > 0)\n .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))\n}\n\nfunction pct(value: number): string {\n return Number.isFinite(value) ? `${(value * 100).toFixed(1)}%` : 'n/a'\n}\n\nfunction num(value: number): string {\n return Number.isFinite(value) ? value.toFixed(3) : 'n/a'\n}\n","/**\n * Bootstrap-CI promotion gate.\n *\n * In any iterative-improvement loop (GEPA, prompt evolution, dataset\n * curation), the question is \"did this generation actually improve, or are\n * we celebrating noise?\". With small N and noisy outcomes, point-estimate\n * deltas lie. Bootstrap confidence intervals tell the operator whether the\n * delta is real before code or prompts get promoted.\n *\n * This module is pure functions — no I/O, no model calls. Easy to unit-test\n * and to compose into any verdict gate.\n *\n * Default gate:\n * - Bootstrap mean baseline vs candidate (1k resamples).\n * - Compute the delta distribution; pass if the lower CI bound > 0.\n * - Tunable confidence (default 95%) and resample count.\n *\n * Verdict semantics intentionally match the existing `experiments.jsonl`\n * vocabulary:\n * - ADVANCE: candidate's CI lower bound > baseline mean (real win)\n * - KEEP: overlap, but candidate point estimate >= baseline (neutral)\n * - REVERT: candidate's CI upper bound < baseline mean (real regression)\n * - INCONCLUSIVE: not enough samples or CI straddles zero with no signal\n */\n\nexport type Verdict = 'ADVANCE' | 'KEEP' | 'REVERT' | 'INCONCLUSIVE'\n\nexport interface BootstrapResult {\n baselineMean: number\n candidateMean: number\n /** candidateMean - baselineMean, point estimate. */\n delta: number\n /** Lower bound of the (1 - alpha) CI on the delta. */\n ciLower: number\n /** Upper bound of the (1 - alpha) CI on the delta. */\n ciUpper: number\n /** Number of bootstrap resamples used. */\n iterations: number\n alpha: number\n verdict: Verdict\n}\n\nexport interface BootstrapOptions {\n /** Confidence level alpha (default 0.05 → 95% CI). */\n alpha?: number\n /** Number of resamples (default 1000). */\n iterations?: number\n /**\n * Minimum total samples (baseline + candidate) below which we always\n * return INCONCLUSIVE — bootstrap with too few samples is meaningless.\n * Default 6 (combined).\n */\n minTotalSamples?: number\n /** RNG seed for reproducibility. Default: Math.random. */\n seed?: number\n}\n\n/**\n * Compute the bootstrap CI on (candidateMean - baselineMean) and a verdict.\n *\n * Uses simple percentile bootstrap on the difference of resampled means.\n * That's the standard non-parametric primitive — no distributional\n * assumptions, robust to skew, easy to reason about.\n */\nexport function bootstrapCi(\n baseline: number[],\n candidate: number[],\n options: BootstrapOptions = {},\n): BootstrapResult {\n const alpha = options.alpha ?? 0.05\n const iterations = options.iterations ?? 1000\n const minTotal = options.minTotalSamples ?? 6\n const rng = mulberry32(options.seed ?? hashSeed(baseline, candidate))\n\n const baselineMean = mean(baseline)\n const candidateMean = mean(candidate)\n const delta = candidateMean - baselineMean\n\n if (baseline.length + candidate.length < minTotal || baseline.length === 0 || candidate.length === 0) {\n return {\n baselineMean,\n candidateMean,\n delta,\n ciLower: -Infinity,\n ciUpper: Infinity,\n iterations: 0,\n alpha,\n verdict: 'INCONCLUSIVE',\n }\n }\n\n const deltas: number[] = new Array(iterations)\n for (let i = 0; i < iterations; i++) {\n const bResample = resample(baseline, rng)\n const cResample = resample(candidate, rng)\n deltas[i] = mean(cResample) - mean(bResample)\n }\n deltas.sort((a, b) => a - b)\n const lowerIdx = Math.floor((alpha / 2) * iterations)\n const upperIdx = Math.floor((1 - alpha / 2) * iterations) - 1\n const ciLower = deltas[Math.max(0, lowerIdx)]!\n const ciUpper = deltas[Math.min(iterations - 1, upperIdx)]!\n\n let verdict: Verdict\n if (ciLower > 0) verdict = 'ADVANCE'\n else if (ciUpper < 0) verdict = 'REVERT'\n else if (delta >= 0) verdict = 'KEEP'\n else verdict = 'INCONCLUSIVE'\n\n return {\n baselineMean,\n candidateMean,\n delta,\n ciLower,\n ciUpper,\n iterations,\n alpha,\n verdict,\n }\n}\n\nfunction mean(xs: number[]): number {\n if (xs.length === 0) return 0\n let s = 0\n for (const x of xs) s += x\n return s / xs.length\n}\n\nfunction resample(xs: number[], rng: () => number): number[] {\n const out = new Array(xs.length)\n for (let i = 0; i < xs.length; i++) out[i] = xs[Math.floor(rng() * xs.length)]\n return out\n}\n\n/** Mulberry32 — fast deterministic PRNG. Stable across runs given the same seed. */\nfunction mulberry32(seed: number): () => number {\n let t = seed >>> 0\n return () => {\n t += 0x6d2b79f5\n let r = t\n r = Math.imul(r ^ (r >>> 15), r | 1)\n r ^= r + Math.imul(r ^ (r >>> 7), r | 61)\n return ((r ^ (r >>> 14)) >>> 0) / 4294967296\n }\n}\n\n/** Stable seed derived from the inputs — same data → same CI bounds. */\nfunction hashSeed(a: number[], b: number[]): number {\n let h = 2166136261\n for (const x of [...a, ...b]) {\n const view = new Float64Array([x])\n const bytes = new Uint8Array(view.buffer)\n for (const byte of bytes) {\n h ^= byte\n h = Math.imul(h, 16777619)\n }\n }\n return h >>> 0\n}\n\n/**\n * Judge-replay promotion gate.\n *\n * The cheap inner-loop judge that drives an evolution run is by definition\n * fast and noisy. When you're about to promote a winning variant to the\n * canonical default, you want a STRONGER judge (a more expensive model, a\n * human grader, a separately-trained reward model) to confirm the win\n * generalises beyond the inner loop.\n *\n * This helper takes raw winner + baseline outputs, scores both through the\n * stronger judge, and applies `bootstrapCi`. ADVANCE means the stronger\n * judge agrees the winner is real with the configured confidence. Doesn't\n * matter what shape your \"output\" is — pass a string, an object, anything\n * the judge can read.\n */\nexport interface JudgeReplayGateArgs<TOutput> {\n baselineOutputs: TOutput[]\n candidateOutputs: TOutput[]\n /** Stronger judge — async to allow LLM calls. Return a 0..N scalar score. */\n judge: (output: TOutput) => Promise<number> | number\n alpha?: number\n iterations?: number\n /** RNG seed for reproducibility. */\n seed?: number\n /** Maximum concurrent judge calls. Default 4. */\n judgeConcurrency?: number\n}\n\nexport async function judgeReplayGate<TOutput>(\n args: JudgeReplayGateArgs<TOutput>,\n): Promise<BootstrapResult & { baselineSamples: number; candidateSamples: number }> {\n const concurrency = args.judgeConcurrency ?? 4\n const baselineScores = await scoreAll(args.baselineOutputs, args.judge, concurrency)\n const candidateScores = await scoreAll(args.candidateOutputs, args.judge, concurrency)\n const ci = bootstrapCi(baselineScores, candidateScores, {\n ...(args.alpha !== undefined ? { alpha: args.alpha } : {}),\n ...(args.iterations !== undefined ? { iterations: args.iterations } : {}),\n ...(args.seed !== undefined ? { seed: args.seed } : {}),\n })\n return {\n ...ci,\n baselineSamples: baselineScores.length,\n candidateSamples: candidateScores.length,\n }\n}\n\nasync function scoreAll<TOutput>(\n outputs: TOutput[],\n judge: (output: TOutput) => Promise<number> | number,\n concurrency: number,\n): Promise<number[]> {\n const results: number[] = new Array(outputs.length)\n let next = 0\n async function worker(): Promise<void> {\n while (true) {\n const i = next++\n if (i >= outputs.length) return\n const v = await judge(outputs[i]!)\n results[i] = Number.isFinite(v) ? v : 0\n }\n }\n await Promise.all(Array.from({ length: Math.max(1, concurrency) }, () => worker()))\n return results\n}\n"],"mappings":";;;;;AA0HA,IAAM,qBAA4D;AAAA,EAChE,eAAe;AAAA,EACf,kBAAkB;AAAA,EAClB,eAAe;AAAA,EACf,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,aAAa;AAAA,EACb,cAAc;AAAA,EACd,eAAe;AAAA,EACf,gBAAgB,OAAO;AAAA,EACvB,cAAc,OAAO;AAAA,EACrB,uBAAuB;AAAA,EACvB,uBAAuB;AACzB;AAEO,SAAS,wCACd,QACwB;AACxB,SAAO,OAAO,IAAI,CAAC,WAAW;AAAA,IAC5B,YAAY,MAAM;AAAA,IAClB,aAAa,MAAM;AAAA,IACnB,OAAO,MAAM,UAAU,YAAY,YAAY,MAAM,UAAU,QAAQ,QAAQ;AAAA,IAC/E,OAAO,MAAM;AAAA,IACb,IAAI,MAAM;AAAA,IACV,WAAW,MAAM,QAAQ,MAAM,OAAO,KAAK,IAAI,MAAM,MAAM,MAAM,SAAS;AAAA,IAC1E,SAAS,MAAM;AAAA,IACf,YAAY,MAAM;AAAA,IAClB,aAAa,MAAM,QAAQ,kBAAkB;AAAA,IAC7C,KAAK,MAAM;AAAA,IACX,UAAU,MAAM;AAAA,EAClB,EAAE;AACJ;AAEO,SAAS,0BAA0B,OAA2D;AACnG,QAAM,aAAa,EAAE,GAAG,oBAAoB,GAAG,MAAM,WAAW;AAChE,QAAM,cAAc,MAAM,eAAe;AACzC,QAAM,OAAO,gBAAgB,MAAM,QAAQ,CAAC,GAAG,aAAa,MAAM,UAAU;AAC5E,QAAM,SAAS,qBAAqB,MAAM,UAAU,CAAC,GAAG,aAAa,MAAM,UAAU;AACrF,QAAM,YAAY,MAAM,aAAa,CAAC;AACtC,QAAM,gBAAgB,MAAM,SAAS,iBAAiB,UAAU;AAChE,QAAM,cAAc,MAAM,SAAS,eAAe,oBAAoB,SAAS;AAC/E,QAAM,eAAe,UAAU,MAAM,QAAQ;AAC7C,QAAM,gBAAgB,UAAU,MAAM,SAAS;AAC/C,QAAM,YAAY,CAAC,GAAG,cAAc,GAAG,aAAa;AACpD,QAAM,cAAc,OAAO,IAAI,CAAC,MAAM,EAAE,KAAK,EAAE,OAAO,cAAc;AACpE,QAAM,gBAAgB,UAAU,SAAS,IAAI,YAAY;AACzD,QAAM,aAAa,KAAK,OAAO,CAAC,MAAM,EAAE,aAAa,QAAQ,EAAE;AAC/D,QAAM,cAAc,KAAK,OAAO,CAAC,MAAM,EAAE,aAAa,SAAS,EAAE;AACjE,QAAM,kBAAkB,KAAK,YAAY;AACzC,QAAM,mBAAmB,KAAK,aAAa;AAC3C,QAAM,UAAoC;AAAA,IACxC;AAAA,IACA;AAAA,IACA;AAAA,IACA,UAAU,SAAS,MAAM,QAAQ,WAAW,qBAAqB;AAAA,IACjE,WAAW,KAAK,aAAa;AAAA,IAC7B;AAAA,IACA;AAAA,IACA,YAAY,SAAS,iBAAiB,gBAAgB;AAAA,IACtD,aAAa,KAAK,CAAC,GAAG,KAAK,IAAI,CAAC,MAAM,EAAE,OAAO,GAAG,GAAG,OAAO,IAAI,CAAC,MAAM,EAAE,OAAO,EAAE,OAAO,cAAc,CAAC,CAAC;AAAA,IACzG,WAAW,WAAW,CAAC,GAAG,KAAK,IAAI,CAAC,MAAM,EAAE,MAAM,GAAG,GAAG,OAAO,IAAI,CAAC,MAAM,EAAE,UAAU,EAAE,OAAO,cAAc,CAAC,GAAG,IAAI;AAAA,IACrH,YAAY,WAAW,MAAM,QAAQ,WAAW,qBAAqB,EAAE;AAAA,IACvE,iBAAiB,WAAW,MAAM,QAAQ,WAAW,qBAAqB,EAAE,OAAO,CAAC,QAAQ,IAAI,MAAM,EAAE;AAAA,IACxG,kBAAkB,OAAO,OAAO,CAAC,MAAM,EAAE,cAAc,CAAC,EAAE;AAAA,IAC1D,iBAAiB,OAAO,OAAO,CAAC,OAAO,EAAE,aAAa,KAAK,CAAC,EAAE;AAAA,IAC9D;AAAA,IACA,cAAc,aAAa,SAAS;AAAA,IACpC,mBAAmB,kBAAkB,MAAM,QAAQ,WAAW,qBAAqB;AAAA,IACnF,0BAA0B,yBAAyB,MAAM;AAAA,EAC3D;AAEA,QAAM,SAAmC,CAAC;AAC1C,cAAY,OAAO,YAAY,SAAS,MAAM;AAC9C,eAAa,YAAY,SAAS,MAAM;AACxC,sBAAoB,MAAM,gBAAgB,MAAM,YAAY,SAAS,MAAM;AAC3E,mBAAiB,YAAY,SAAS,MAAM;AAC5C,kBAAgB,YAAY,SAAS,MAAM;AAE3C,QAAM,OAAO,UAAU,SAAS,YAAY,MAAM,gBAAgB,MAAM,MAAM;AAC9E,QAAM,SAAS,OAAO,KAAK,CAAC,MAAM,EAAE,aAAa,UAAU,IAAI,SAC3D,OAAO,SAAS,IAAI,SACpB;AAEJ,SAAO;AAAA,IACL,QAAQ,MAAM;AAAA,IACd;AAAA,IACA,YAAY,MAAM,cAAc;AAAA,IAChC;AAAA,IACA,SAAS,WAAW,WAAW,MAAM,eAAe,MAAM,aAAa,UAAU;AAAA,IACjF;AAAA,IACA;AAAA,IACA;AAAA,IACA,SAAS,MAAM,WAAW;AAAA,IAC1B,cAAc,MAAM,gBAAgB;AAAA,IACpC,SAAS,cAAc,MAAM,QAAQ,QAAQ,SAAS,MAAM;AAAA,EAC9D;AACF;AAEO,SAAS,wBAAwB,OAA2D;AACjG,QAAM,YAAY,0BAA0B,KAAK;AACjD,MAAI,UAAU,WAAW,QAAQ;AAC/B,UAAM,IAAI,MAAM,UAAU,OAAO;AAAA,EACnC;AACA,SAAO;AACT;AAEA,SAAS,gBACP,MACA,aACA,YACa;AACb,MAAI,YAAa,QAAO,KAAK,OAAO,CAAC,MAAM,EAAE,gBAAgB,WAAW;AACxE,MAAI,WAAY,QAAO,KAAK,OAAO,CAAC,MAAM,EAAE,gBAAgB,UAAU;AACtE,SAAO,CAAC,GAAG,IAAI;AACjB;AAEA,SAAS,qBACP,QACA,aACA,YACwB;AACxB,MAAI,YAAa,QAAO,OAAO,OAAO,CAAC,MAAM,EAAE,gBAAgB,UAAa,EAAE,gBAAgB,WAAW;AACzG,MAAI,WAAY,QAAO,OAAO,OAAO,CAAC,MAAM,EAAE,gBAAgB,UAAa,EAAE,gBAAgB,UAAU;AACvG,SAAO,CAAC,GAAG,MAAM;AACnB;AAEA,SAAS,YACP,OACA,YACA,SACA,QACM;AACN,MAAI,WAAW,iBAAiB,CAAC,MAAM,YAAY,MAAM,WAAW,UAAU,OAAO,GAAG;AACtF,WAAO,KAAK,EAAE,MAAM,UAAU,UAAU,YAAY,MAAM,kBAAkB,QAAQ,6CAA6C,CAAC;AAAA,EACpI;AACA,MAAI,QAAQ,gBAAgB,WAAW,kBAAkB;AACvD,WAAO,KAAK,EAAE,MAAM,UAAU,UAAU,YAAY,MAAM,iBAAiB,QAAQ,GAAG,QAAQ,aAAa,sBAAsB,WAAW,gBAAgB,IAAI,CAAC;AAAA,EACnK;AACA,MAAI,WAAW,kBAAkB,QAAQ,YAAY,YAAY,GAAG;AAClE,WAAO,KAAK,EAAE,MAAM,UAAU,UAAU,YAAY,MAAM,yBAAyB,QAAQ,mCAAmC,CAAC;AAAA,EACjI;AACF;AAEA,SAAS,aACP,YACA,SACA,QACM;AACN,MAAI,QAAQ,aAAa,WAAW,eAAe;AACjD,WAAO,KAAK,EAAE,MAAM,WAAW,UAAU,YAAY,MAAM,mBAAmB,QAAQ,GAAG,QAAQ,UAAU,wBAAwB,WAAW,aAAa,IAAI,CAAC;AAAA,EAClK;AACA,MAAI,QAAQ,WAAW,WAAW,aAAa;AAC7C,WAAO,KAAK,EAAE,MAAM,WAAW,UAAU,YAAY,MAAM,iBAAiB,QAAQ,YAAY,IAAI,QAAQ,QAAQ,CAAC,MAAM,IAAI,WAAW,WAAW,CAAC,IAAI,CAAC;AAAA,EAC7J;AACA,MAAI,QAAQ,YAAY,WAAW,cAAc;AAC/C,WAAO,KAAK,EAAE,MAAM,WAAW,UAAU,YAAY,MAAM,kBAAkB,QAAQ,aAAa,IAAI,QAAQ,SAAS,CAAC,MAAM,IAAI,WAAW,YAAY,CAAC,IAAI,CAAC;AAAA,EACjK;AACF;AAEA,SAAS,oBACP,cACA,YACA,SACA,QACM;AACN,MAAI,WAAW,kBAAkB,QAAQ,cAAc,WAAW,gBAAgB;AAChF,WAAO,KAAK,EAAE,MAAM,kBAAkB,UAAU,YAAY,MAAM,oBAAoB,QAAQ,GAAG,QAAQ,WAAW,yBAAyB,WAAW,cAAc,IAAI,CAAC;AAAA,EAC7K;AACA,MAAI,OAAO,SAAS,QAAQ,UAAU,KAAK,QAAQ,aAAa,WAAW,eAAe;AACxF,WAAO,KAAK,EAAE,MAAM,kBAAkB,UAAU,YAAY,MAAM,eAAe,QAAQ,sBAAsB,IAAI,QAAQ,UAAU,CAAC,MAAM,IAAI,WAAW,aAAa,CAAC,IAAI,CAAC;AAAA,EAChL;AACA,MAAI,gBAAgB,CAAC,aAAa,SAAS;AACzC,WAAO,KAAK,EAAE,MAAM,kBAAkB,UAAU,YAAY,MAAM,QAAQ,aAAa,iBAAiB,QAAQ,IAAI,QAAQ,aAAa,OAAO,CAAC;AAAA,EACnJ;AACF;AAEA,SAAS,iBACP,YACA,SACA,QACM;AACN,MAAI,CAAC,WAAW,sBAAuB;AACvC,MAAI,QAAQ,aAAa,QAAQ,iBAAiB;AAChD,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,UAAU;AAAA,MACV,MAAM;AAAA,MACN,QAAQ,GAAG,QAAQ,aAAa,QAAQ,eAAe;AAAA,IACzD,CAAC;AAAA,EACH;AACF;AAEA,SAAS,gBACP,YACA,SACA,QACM;AACN,MAAI,QAAQ,cAAc,WAAW,gBAAgB;AACnD,WAAO,KAAK,EAAE,MAAM,cAAc,UAAU,YAAY,MAAM,eAAe,QAAQ,eAAe,IAAI,QAAQ,WAAW,CAAC,MAAM,IAAI,WAAW,cAAc,CAAC,IAAI,CAAC;AAAA,EACvK;AACA,MAAI,QAAQ,YAAY,WAAW,cAAc;AAC/C,WAAO,KAAK,EAAE,MAAM,cAAc,UAAU,YAAY,MAAM,kBAAkB,QAAQ,aAAa,IAAI,QAAQ,SAAS,CAAC,MAAM,IAAI,WAAW,YAAY,CAAC,IAAI,CAAC;AAAA,EACpK;AACF;AAEA,SAAS,UACP,SACA,YACA,cACA,QACyB;AACzB,SAAO;AAAA,IACL,KAAK,UAAU,QAAQ,QAAQ,QAAQ,gBAAgB,KAAK,IAAI,GAAG,WAAW,gBAAgB,CAAC,GAAG,GAAG,QAAQ,aAAa,uBAAuB,QAAQ,YAAY,OAAO,EAAE;AAAA,IAC9K,KAAK,WAAW,QAAQ,KAAK,IAAI,QAAQ,UAAU,QAAQ,SAAS,GAAG,YAAY,IAAI,QAAQ,QAAQ,CAAC,cAAc,IAAI,QAAQ,SAAS,CAAC,EAAE;AAAA,IAC9I,KAAK,kBAAkB,QAAQ,gBAAgB,CAAC,aAAa,UAAU,IAAI,SAAS,QAAQ,YAAY,WAAW,aAAa,GAAG,eAAe,QAAQ,WAAW,eAAe,IAAI,QAAQ,UAAU,CAAC,EAAE;AAAA,IAC7M,KAAK,eAAe,QAAQ,QAAQ,eAAe,IAAI,IAAI,QAAQ,kBAAkB,QAAQ,YAAY,mBAAmB,QAAQ,eAAe,IAAI,QAAQ,UAAU,EAAE;AAAA,IAC3K,KAAK,cAAc,QAAQ,gBAAgB,SAAS,UAAU,GAAG,eAAe,IAAI,QAAQ,WAAW,CAAC,cAAc,IAAI,QAAQ,SAAS,CAAC,EAAE;AAAA,EAChJ;AACF;AAEA,SAAS,KACP,MACA,QACA,OACA,QACuB;AACvB,QAAM,MAAM,OAAO,OAAO,CAAC,MAAM,EAAE,SAAS,IAAI;AAChD,QAAM,SAAS,IAAI,KAAK,CAAC,MAAM,EAAE,aAAa,UAAU,IAAI,SACxD,IAAI,SAAS,IAAI,SACjB;AACJ,SAAO,EAAE,MAAM,QAAQ,OAAO,QAAQ,KAAK,GAAG,OAAO;AACvD;AAEA,SAAS,oBAAoB,WAAqE;AAChG,QAAM,SAAuC,EAAE,OAAO,GAAG,KAAK,GAAG,MAAM,GAAG,SAAS,EAAE;AACrF,aAAW,YAAY,UAAW,QAAO,SAAS,SAAS,OAAO;AAClE,SAAO;AACT;AAEA,SAAS,aAAa,WAA+D;AACnF,QAAM,MAA8B,CAAC;AACrC,aAAW,YAAY,WAAW;AAChC,UAAM,SAAS,SAAS,MAAM,UAAU,SAAS,MAAM,YAAY;AACnE,QAAI,MAAM,KAAK,IAAI,MAAM,KAAK,KAAK;AAAA,EACrC;AACA,SAAO;AACT;AAEA,SAAS,kBACP,MACA,QACA,WACwB;AACxB,QAAM,MAA8B,CAAC;AACrC,aAAW,OAAO,MAAM;AACtB,UAAM,QAAQ,IAAI,QAAQ,gBAAgB,IAAI,QAAQ;AACtD,QAAI,IAAI,eAAgB,UAAU,UAAa,QAAQ,WAAY;AACjE,YAAM,OAAO,IAAI,eAAe;AAChC,UAAI,IAAI,KAAK,IAAI,IAAI,KAAK,KAAK;AAAA,IACjC;AAAA,EACF;AACA,aAAW,SAAS,QAAQ;AAC1B,QAAI,MAAM,eAAe,MAAM,OAAO,SAAU,MAAM,UAAU,UAAa,MAAM,QAAQ,WAAY;AACrG,YAAM,OAAO,MAAM,gBAAgB,MAAM,OAAO,QAAQ,WAAW;AACnE,UAAI,IAAI,KAAK,IAAI,IAAI,KAAK,KAAK;AAAA,IACjC;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,yBAAyB,QAAiE;AACjG,QAAM,MAA8B,CAAC;AACrC,aAAW,SAAS,QAAQ;AAC1B,eAAW,OAAO,MAAM,OAAO,CAAC,GAAG;AACjC,YAAM,UAAU,IAAI,sBAAsB;AAC1C,UAAI,OAAO,KAAK,IAAI,OAAO,KAAK,KAAK;AAAA,IACvC;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,WACP,MACA,QACA,WAC4B;AAC5B,QAAM,MAAkC,CAAC;AACzC,aAAW,OAAO,MAAM;AACtB,UAAM,QAAQ,IAAI,QAAQ,gBAAgB,IAAI,QAAQ;AACtD,QAAI,IAAI,eAAgB,UAAU,UAAa,QAAQ,WAAY;AACjE,YAAM,YAAY,IAAI,QAAQ,IAAI;AAClC,UAAI,KAAK,EAAE,QAAQ,OAAO,cAAc,YAAY,YAAY,EAAE,CAAC;AAAA,IACrE;AAAA,EACF;AACA,aAAW,SAAS,QAAQ;AAC1B,QAAI,MAAM,eAAe,MAAM,OAAO,SAAU,MAAM,UAAU,UAAa,MAAM,QAAQ,WAAY;AACrG,UAAI,KAAK,EAAE,SAAS,MAAM,KAAK,UAAU,KAAK,EAAE,CAAC;AAAA,IACnD;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,SACP,MACA,QACA,WACQ;AACR,QAAM,WAAW;AAAA,IACf,GAAG,KAAK,IAAI,CAAC,QAAQ;AACnB,YAAM,QAAQ,IAAI,QAAQ,gBAAgB,IAAI,QAAQ;AACtD,aAAO,CAAC,IAAI,eAAe,UAAU,UAAa,SAAS;AAAA,IAC7D,CAAC;AAAA,IACD,GAAG,OAAO,IAAI,CAAC,UAAU,MAAM,OAAO,UAAU,MAAM,UAAU,UAAa,MAAM,SAAS,UAAU;AAAA,EACxG;AACA,MAAI,SAAS,WAAW,EAAG,QAAO;AAClC,SAAO,SAAS,OAAO,OAAO,EAAE,SAAS,SAAS;AACpD;AAEA,SAAS,UAAU,MAA4B,OAA8B;AAC3E,SAAO,KACJ,OAAO,CAAC,QAAQ,IAAI,aAAa,KAAK,EACtC,IAAI,CAAC,QAAQ,UAAU,YAAY,IAAI,QAAQ,eAAe,IAAI,QAAQ,WAAW,EACrF,OAAO,cAAc;AAC1B;AAEA,SAAS,KAAK,IAA+B;AAC3C,MAAI,GAAG,WAAW,EAAG,QAAO,OAAO;AACnC,SAAO,GAAG,OAAO,CAAC,KAAK,MAAM,MAAM,GAAG,CAAC,IAAI,GAAG;AAChD;AAEA,SAAS,WAAW,IAAuB,GAAmB;AAC5D,MAAI,GAAG,WAAW,EAAG,QAAO,OAAO;AACnC,QAAM,SAAS,CAAC,GAAG,EAAE,EAAE,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAC3C,SAAO,OAAO,KAAK,IAAI,OAAO,SAAS,GAAG,KAAK,IAAI,GAAG,KAAK,KAAK,IAAI,OAAO,MAAM,IAAI,CAAC,CAAC,CAAC;AAC1F;AAEA,SAAS,eAAe,OAAiC;AACvD,SAAO,OAAO,UAAU,YAAY,OAAO,SAAS,KAAK;AAC3D;AAEA,SAAS,SAAS,GAAW,GAAmB;AAC9C,MAAI,CAAC,OAAO,SAAS,CAAC,KAAK,CAAC,OAAO,SAAS,CAAC,EAAG,QAAO,OAAO;AAC9D,SAAO,IAAI;AACb;AAEA,SAAS,SAAS,KAAa,QAAwB;AACrD,MAAI,CAAC,OAAO,SAAS,GAAG,EAAG,QAAO;AAClC,MAAI,UAAU,EAAG,QAAO,OAAO,IAAI,IAAI;AACvC,SAAO,QAAQ,IAAI,KAAK,IAAI,GAAG,GAAG,IAAI,MAAM;AAC9C;AAEA,SAAS,gBACP,SACA,YACQ;AACR,QAAM,OAAO,OAAO,SAAS,WAAW,cAAc,KAAK,OAAO,SAAS,QAAQ,WAAW,IAC1F,QAAQ,WAAW,iBAAiB,KAAK,IAAI,QAAQ,aAAa,KAAK,CAAC,IACxE;AACJ,QAAM,UAAU,OAAO,SAAS,WAAW,YAAY,KAAK,OAAO,SAAS,QAAQ,SAAS,IACzF,QAAQ,WAAW,eAAe,KAAK,IAAI,QAAQ,WAAW,KAAK,CAAC,IACpE;AACJ,SAAO,KAAK,IAAI,MAAM,OAAO;AAC/B;AAEA,SAAS,QAAQ,GAAmB;AAClC,MAAI,CAAC,OAAO,SAAS,CAAC,EAAG,QAAO;AAChC,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,CAAC,CAAC;AACnC;AAEA,SAAS,cACP,QACA,QACA,SACA,QACQ;AACR,QAAM,SAAS,sBAAsB,MAAM,KAAK,MAAM;AACtD,QAAM,aAAa,aAAa,QAAQ,aAAa,eAAe,QAAQ,UAAU,gBAAgB,QAAQ,WAAW,aAAa,IAAI,QAAQ,QAAQ,CAAC,cAAc,IAAI,QAAQ,SAAS,CAAC;AAC/L,MAAI,OAAO,WAAW,EAAG,QAAO,GAAG,MAAM,KAAK,UAAU;AACxD,SAAO,GAAG,MAAM,KAAK,UAAU,YAAY,OAAO,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,GAAG,CAAC;AAChF;AAEA,SAAS,IAAI,GAAmB;AAC9B,MAAI,CAAC,OAAO,SAAS,CAAC,EAAG,QAAO,OAAO,CAAC;AACxC,SAAO,EAAE,QAAQ,CAAC;AACpB;;;AC9eO,SAAS,oBACd,WACA,UAAsC,CAAC,GAC/B;AACR,QAAM,QAAQ,QAAQ,SAAS,mBAAmB,UAAU,MAAM;AAClE,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,KAAK,KAAK,EAAE;AACvB,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,aAAa,UAAU,OAAO,YAAY,CAAC,IAAI;AAC1D,QAAM,KAAK,cAAc,UAAU,UAAU,QAAQ,IAAI,IAAI;AAC7D,MAAI,UAAU,YAAa,OAAM,KAAK,gBAAgB,UAAU,WAAW,IAAI;AAC/E,MAAI,UAAU,WAAY,OAAM,KAAK,eAAe,UAAU,UAAU,IAAI;AAC5E,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,UAAU,OAAO;AAC5B,QAAM,KAAK,EAAE;AAEb,QAAM,KAAK,YAAY;AACvB,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,oBAAoB;AAC/B,QAAM,KAAK,YAAY;AACvB,QAAM,KAAK,iBAAiB,UAAU,QAAQ,aAAa,IAAI;AAC/D,QAAM,KAAK,mBAAmB,UAAU,QAAQ,UAAU,IAAI;AAC9D,QAAM,KAAK,oBAAoB,UAAU,QAAQ,WAAW,IAAI;AAChE,QAAM,KAAK,iBAAiB,IAAI,UAAU,QAAQ,QAAQ,CAAC,IAAI;AAC/D,QAAM,KAAK,kBAAkB,IAAI,UAAU,QAAQ,SAAS,CAAC,IAAI;AACjE,QAAM,KAAK,mBAAmB,IAAI,UAAU,QAAQ,eAAe,CAAC,IAAI;AACxE,QAAM,KAAK,oBAAoB,IAAI,UAAU,QAAQ,gBAAgB,CAAC,IAAI;AAC1E,QAAM,KAAK,mBAAmB,IAAI,UAAU,QAAQ,UAAU,CAAC,IAAI;AACnE,QAAM,KAAK,kBAAkB,IAAI,UAAU,QAAQ,WAAW,CAAC,IAAI;AACnE,QAAM,KAAK,qBAAqB,KAAK,MAAM,UAAU,QAAQ,SAAS,CAAC,OAAO;AAC9E,QAAM,KAAK,EAAE;AAEb,MAAI,UAAU,OAAO,SAAS,GAAG;AAC/B,UAAM,KAAK,WAAW;AACtB,UAAM,KAAK,EAAE;AACb,eAAW,SAAS,UAAU,QAAQ;AACpC,YAAM,KAAK,OAAO,MAAM,QAAQ,QAAQ,MAAM,IAAI,OAAO,MAAM,IAAI,MAAM,MAAM,MAAM,EAAE;AAAA,IACzF;AACA,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,QAAM,WAAW,QAAQ,UAAU,QAAQ,wBAAwB;AACnE,MAAI,SAAS,SAAS,GAAG;AACvB,UAAM,KAAK,yBAAyB;AACpC,UAAM,KAAK,EAAE;AACb,eAAW,CAAC,SAAS,KAAK,KAAK,SAAU,OAAM,KAAK,KAAK,OAAO,KAAK,KAAK,EAAE;AAC5E,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,QAAM,WAAW,QAAQ,UAAU,QAAQ,iBAAiB;AAC5D,MAAI,SAAS,SAAS,GAAG;AACvB,UAAM,KAAK,kBAAkB;AAC7B,UAAM,KAAK,EAAE;AACb,eAAW,CAAC,MAAM,KAAK,KAAK,SAAU,OAAM,KAAK,KAAK,IAAI,KAAK,KAAK,EAAE;AACtE,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,QAAQ,QAAQ,QAAQ,KAAK,SAAS,GAAG;AAC3C,UAAM,KAAK,gBAAgB;AAC3B,UAAM,KAAK,EAAE;AACb,UAAM,KAAK,aAAa,CAAC,GAAG,QAAQ,IAAI,GAAG;AAAA,MACzC,YAAY,QAAQ,cAAc,UAAU,cAAc;AAAA,MAC1D,OAAO;AAAA,IACT,CAAC,EAAE,QAAQ;AACX,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,QAAQ,wBAAwB,QAAQ,qBAAqB,SAAS,GAAG;AAC3E,UAAM,KAAK,0BAA0B;AACrC,UAAM,KAAK,EAAE;AACb,eAAW,WAAW,QAAQ,qBAAsB,OAAM,KAAK,KAAK,OAAO,EAAE;AAC7E,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,QAAM,cAAc,QAAQ,eAAe,mBAAmB,SAAS;AACvE,MAAI,YAAY,SAAS,GAAG;AAC1B,UAAM,KAAK,iBAAiB;AAC5B,UAAM,KAAK,EAAE;AACb,eAAW,UAAU,YAAa,OAAM,KAAK,KAAK,MAAM,EAAE;AAC1D,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,SAAO,MAAM,KAAK,IAAI,EAAE,QAAQ,IAAI;AACtC;AAEA,SAAS,mBAAmB,WAAiD;AAC3E,MAAI,UAAU,QAAS,QAAO,CAAC,kDAAkD;AACjF,SAAO,UAAU,OACd,OAAO,CAAC,UAAU,MAAM,aAAa,UAAU,EAC/C,IAAI,CAAC,UAAU,WAAW,MAAM,IAAI,KAAK,MAAM,MAAM,EAAE;AAC5D;AAEA,SAAS,QAAQ,QAAyD;AACxE,SAAO,OAAO,QAAQ,MAAM,EACzB,OAAO,CAAC,CAAC,EAAE,KAAK,MAAM,QAAQ,CAAC,EAC/B,KAAK,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,EAAE,CAAC,EAAE,cAAc,EAAE,CAAC,CAAC,CAAC;AAC3D;AAEA,SAAS,IAAI,OAAuB;AAClC,SAAO,OAAO,SAAS,KAAK,IAAI,IAAI,QAAQ,KAAK,QAAQ,CAAC,CAAC,MAAM;AACnE;AAEA,SAAS,IAAI,OAAuB;AAClC,SAAO,OAAO,SAAS,KAAK,IAAI,MAAM,QAAQ,CAAC,IAAI;AACrD;;;ACpDO,SAAS,YACd,UACA,WACA,UAA4B,CAAC,GACZ;AACjB,QAAM,QAAQ,QAAQ,SAAS;AAC/B,QAAM,aAAa,QAAQ,cAAc;AACzC,QAAM,WAAW,QAAQ,mBAAmB;AAC5C,QAAM,MAAM,WAAW,QAAQ,QAAQ,SAAS,UAAU,SAAS,CAAC;AAEpE,QAAM,eAAeA,MAAK,QAAQ;AAClC,QAAM,gBAAgBA,MAAK,SAAS;AACpC,QAAM,QAAQ,gBAAgB;AAE9B,MAAI,SAAS,SAAS,UAAU,SAAS,YAAY,SAAS,WAAW,KAAK,UAAU,WAAW,GAAG;AACpG,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,SAAS;AAAA,MACT,YAAY;AAAA,MACZ;AAAA,MACA,SAAS;AAAA,IACX;AAAA,EACF;AAEA,QAAM,SAAmB,IAAI,MAAM,UAAU;AAC7C,WAAS,IAAI,GAAG,IAAI,YAAY,KAAK;AACnC,UAAM,YAAY,SAAS,UAAU,GAAG;AACxC,UAAM,YAAY,SAAS,WAAW,GAAG;AACzC,WAAO,CAAC,IAAIA,MAAK,SAAS,IAAIA,MAAK,SAAS;AAAA,EAC9C;AACA,SAAO,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAC3B,QAAM,WAAW,KAAK,MAAO,QAAQ,IAAK,UAAU;AACpD,QAAM,WAAW,KAAK,OAAO,IAAI,QAAQ,KAAK,UAAU,IAAI;AAC5D,QAAM,UAAU,OAAO,KAAK,IAAI,GAAG,QAAQ,CAAC;AAC5C,QAAM,UAAU,OAAO,KAAK,IAAI,aAAa,GAAG,QAAQ,CAAC;AAEzD,MAAI;AACJ,MAAI,UAAU,EAAG,WAAU;AAAA,WAClB,UAAU,EAAG,WAAU;AAAA,WACvB,SAAS,EAAG,WAAU;AAAA,MAC1B,WAAU;AAEf,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEA,SAASA,MAAK,IAAsB;AAClC,MAAI,GAAG,WAAW,EAAG,QAAO;AAC5B,MAAI,IAAI;AACR,aAAW,KAAK,GAAI,MAAK;AACzB,SAAO,IAAI,GAAG;AAChB;AAEA,SAAS,SAAS,IAAc,KAA6B;AAC3D,QAAM,MAAM,IAAI,MAAM,GAAG,MAAM;AAC/B,WAAS,IAAI,GAAG,IAAI,GAAG,QAAQ,IAAK,KAAI,CAAC,IAAI,GAAG,KAAK,MAAM,IAAI,IAAI,GAAG,MAAM,CAAC;AAC7E,SAAO;AACT;AAGA,SAAS,WAAW,MAA4B;AAC9C,MAAI,IAAI,SAAS;AACjB,SAAO,MAAM;AACX,SAAK;AACL,QAAI,IAAI;AACR,QAAI,KAAK,KAAK,IAAK,MAAM,IAAK,IAAI,CAAC;AACnC,SAAK,IAAI,KAAK,KAAK,IAAK,MAAM,GAAI,IAAI,EAAE;AACxC,aAAS,IAAK,MAAM,QAAS,KAAK;AAAA,EACpC;AACF;AAGA,SAAS,SAAS,GAAa,GAAqB;AAClD,MAAI,IAAI;AACR,aAAW,KAAK,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG;AAC5B,UAAM,OAAO,IAAI,aAAa,CAAC,CAAC,CAAC;AACjC,UAAM,QAAQ,IAAI,WAAW,KAAK,MAAM;AACxC,eAAW,QAAQ,OAAO;AACxB,WAAK;AACL,UAAI,KAAK,KAAK,GAAG,QAAQ;AAAA,IAC3B;AAAA,EACF;AACA,SAAO,MAAM;AACf;AA8BA,eAAsB,gBACpB,MACkF;AAClF,QAAM,cAAc,KAAK,oBAAoB;AAC7C,QAAM,iBAAiB,MAAM,SAAS,KAAK,iBAAiB,KAAK,OAAO,WAAW;AACnF,QAAM,kBAAkB,MAAM,SAAS,KAAK,kBAAkB,KAAK,OAAO,WAAW;AACrF,QAAM,KAAK,YAAY,gBAAgB,iBAAiB;AAAA,IACtD,GAAI,KAAK,UAAU,SAAY,EAAE,OAAO,KAAK,MAAM,IAAI,CAAC;AAAA,IACxD,GAAI,KAAK,eAAe,SAAY,EAAE,YAAY,KAAK,WAAW,IAAI,CAAC;AAAA,IACvE,GAAI,KAAK,SAAS,SAAY,EAAE,MAAM,KAAK,KAAK,IAAI,CAAC;AAAA,EACvD,CAAC;AACD,SAAO;AAAA,IACL,GAAG;AAAA,IACH,iBAAiB,eAAe;AAAA,IAChC,kBAAkB,gBAAgB;AAAA,EACpC;AACF;AAEA,eAAe,SACb,SACA,OACA,aACmB;AACnB,QAAM,UAAoB,IAAI,MAAM,QAAQ,MAAM;AAClD,MAAI,OAAO;AACX,iBAAe,SAAwB;AACrC,WAAO,MAAM;AACX,YAAM,IAAI;AACV,UAAI,KAAK,QAAQ,OAAQ;AACzB,YAAM,IAAI,MAAM,MAAM,QAAQ,CAAC,CAAE;AACjC,cAAQ,CAAC,IAAI,OAAO,SAAS,CAAC,IAAI,IAAI;AAAA,IACxC;AAAA,EACF;AACA,QAAM,QAAQ,IAAI,MAAM,KAAK,EAAE,QAAQ,KAAK,IAAI,GAAG,WAAW,EAAE,GAAG,MAAM,OAAO,CAAC,CAAC;AAClF,SAAO;AACT;","names":["mean"]}
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/meta-eval/rubric-predictive-validity.ts","../src/sequential.ts"],"sourcesContent":["/**\n * Rubric predictive validity — does our eval rubric predict deployment\n * outcomes?\n *\n * `correlationStudy` (already in this package) joins a `TraceStore` to an\n * `OutcomeStore` and computes Pearson + Spearman + bootstrap CI for each\n * (eval-metric, outcome-metric) pair. That answers \"does X correlate with\n * Y at all.\" `rubricPredictiveValidity` is the campaign-shaped wrapper\n * around it: take a sequence of `RunRecord`s (the canonical campaign\n * artifact) and a `DeploymentOutcomeStore`, join on `runId`, return a\n * ranked verdict on every rubric whose dimension scores were captured in\n * `outcome.raw`.\n *\n * The point — quoting the methodology doc — is that **without this loop\n * every rubric is faith-based**. Once it's wired, you know which rubrics\n * have earned their promotion power and which ones are decoration.\n *\n * const validity = await rubricPredictiveValidity({\n * runs: lastQuarter,\n * outcomes: shipFlagOutcomeStore,\n * outcomeMetrics: ['revenue_lift', 'retention_30d', 'csat'],\n * rubrics: ['anti_slop', 'semantic_concept', 'tool_recovery'],\n * })\n * for (const r of validity.ranked) {\n * console.log(`${r.rubric} → ${r.bestOutcome}: ρ=${r.spearman.toFixed(2)}`)\n * }\n *\n * The function is intentionally read-only. Use the verdict to deprecate\n * decorative rubrics, re-weight composite scores, or trigger a\n * recalibration sweep when predictive validity drops below a threshold.\n */\n\nimport type { RunRecord } from '../run-record'\nimport type { DeploymentOutcome, OutcomeStore } from './outcome-store'\n\nexport interface RubricPredictiveValidityInput {\n /**\n * Canonical campaign output. Each record's `outcome.raw[<rubricId>]`\n * provides the eval score; missing keys are silently skipped per pair.\n */\n runs: RunRecord[]\n outcomes: OutcomeStore\n /**\n * Outcome metric names to evaluate against. Each must appear in at\n * least one `DeploymentOutcome.metrics` keyspace; pairs with too few\n * joined samples are excluded from the result.\n */\n outcomeMetrics: string[]\n /**\n * Rubric ids to evaluate. Must appear as keys in `RunRecord.outcome.raw`.\n * If omitted, every numeric key in `outcome.raw` across the run set is\n * treated as a rubric.\n */\n rubrics?: string[]\n /** Minimum joined-sample count before a pair is reported. Default 8. */\n minSamples?: number\n /** Bootstrap resamples for CI. Default 500. */\n bootstrapResamples?: number\n /** Random seed for the bootstrap (mulberry32). Default unset (Math.random). */\n seed?: number\n /**\n * Reduction when multiple outcomes attach to one runId. Default `'latest'`\n * (most recently captured).\n */\n reduction?: 'latest' | 'mean' | 'max'\n}\n\nexport interface RubricOutcomePair {\n rubric: string\n outcome: string\n n: number\n pearson: number\n spearman: number\n ci95: { low: number; high: number }\n /**\n * Verdict bucket. `load_bearing` ≥ 0.7, `informative` ≥ 0.4,\n * `decorative` < 0.4 in absolute correlation. A negative correlation\n * with a desired outcome is also `decorative` — actively misleading\n * is worse than uninformative.\n */\n verdict: 'load_bearing' | 'informative' | 'decorative'\n}\n\nexport interface RubricRanking {\n rubric: string\n /** Outcome metric this rubric correlated best with. */\n bestOutcome: string\n spearman: number\n pearson: number\n n: number\n verdict: RubricOutcomePair['verdict']\n}\n\nexport interface RubricPredictiveValidityReport {\n pairs: RubricOutcomePair[]\n /** Per-rubric best pair, sorted descending by |spearman|. */\n ranked: RubricRanking[]\n joinedSamples: number\n skippedRuns: number\n /** Rubrics that were declared but never produced a usable score. */\n rubricsWithoutData: string[]\n}\n\nexport async function rubricPredictiveValidity(\n input: RubricPredictiveValidityInput,\n): Promise<RubricPredictiveValidityReport> {\n const minSamples = input.minSamples ?? 8\n const reduction = input.reduction ?? 'latest'\n const resamples = input.bootstrapResamples ?? 500\n const rng = makeRng(input.seed)\n\n const outcomes = await input.outcomes.list()\n const outcomesByRun = new Map<string, DeploymentOutcome[]>()\n for (const o of outcomes) {\n const arr = outcomesByRun.get(o.runId) ?? []\n arr.push(o)\n outcomesByRun.set(o.runId, arr)\n }\n\n // Discover rubrics: caller-declared OR every numeric key in outcome.raw\n // observed across runs.\n const observedRubrics = new Set<string>()\n for (const r of input.runs) {\n for (const k of Object.keys(r.outcome.raw)) observedRubrics.add(k)\n }\n const rubrics = input.rubrics ?? [...observedRubrics]\n\n // Collect aligned (x, y) pairs per (rubric, outcome).\n type Bucket = { rubric: string; outcome: string; xs: number[]; ys: number[] }\n const buckets: Bucket[] = []\n for (const r of rubrics) {\n for (const o of input.outcomeMetrics) {\n buckets.push({ rubric: r, outcome: o, xs: [], ys: [] })\n }\n }\n\n let joined = 0\n let skipped = 0\n for (const run of input.runs) {\n const os = outcomesByRun.get(run.runId)\n if (!os || os.length === 0) { skipped++; continue }\n let joinedThisRun = false\n for (const r of rubrics) {\n const x = run.outcome.raw[r]\n if (typeof x !== 'number' || !Number.isFinite(x)) continue\n for (const o of input.outcomeMetrics) {\n const values = os\n .map((row) => row.metrics[o])\n .filter((v): v is number => typeof v === 'number' && Number.isFinite(v))\n if (values.length === 0) continue\n const y = reduce(values, os, o, reduction)\n if (y === null) continue\n const bucket = buckets.find((b) => b.rubric === r && b.outcome === o)!\n bucket.xs.push(x)\n bucket.ys.push(y)\n joinedThisRun = true\n }\n }\n if (joinedThisRun) joined++\n }\n\n const pairs: RubricOutcomePair[] = []\n for (const b of buckets) {\n if (b.xs.length < minSamples) continue\n const pearson = pearsonR(b.xs, b.ys)\n const spearman = pearsonR(rankWithTies(b.xs), rankWithTies(b.ys))\n const ci = bootstrapCi(b.xs, b.ys, resamples, rng)\n const verdict: RubricOutcomePair['verdict'] =\n Math.abs(spearman) >= 0.7 ? 'load_bearing'\n : Math.abs(spearman) >= 0.4 ? 'informative'\n : 'decorative'\n pairs.push({\n rubric: b.rubric, outcome: b.outcome, n: b.xs.length,\n pearson, spearman, ci95: ci, verdict,\n })\n }\n\n const byRubric = new Map<string, RubricOutcomePair[]>()\n for (const p of pairs) {\n const arr = byRubric.get(p.rubric) ?? []\n arr.push(p)\n byRubric.set(p.rubric, arr)\n }\n const ranked: RubricRanking[] = [...byRubric.entries()]\n .map(([rubric, ps]) => {\n const best = ps.reduce((a, b) => (Math.abs(b.spearman) > Math.abs(a.spearman) ? b : a))\n return {\n rubric,\n bestOutcome: best.outcome,\n spearman: best.spearman,\n pearson: best.pearson,\n n: best.n,\n verdict: best.verdict,\n }\n })\n .sort((a, b) => Math.abs(b.spearman) - Math.abs(a.spearman))\n\n const rubricsWithoutData = rubrics.filter((r) => !byRubric.has(r))\n\n return { pairs, ranked, joinedSamples: joined, skippedRuns: skipped, rubricsWithoutData }\n}\n\n// ── Helpers ──────────────────────────────────────────────────────────────\n\nfunction reduce(\n values: number[],\n outcomes: DeploymentOutcome[],\n metric: string,\n kind: 'latest' | 'mean' | 'max',\n): number | null {\n if (values.length === 0) return null\n if (kind === 'mean') return values.reduce((s, v) => s + v, 0) / values.length\n if (kind === 'max') return Math.max(...values)\n // 'latest'\n const sorted = [...outcomes]\n .filter((o) => typeof o.metrics[metric] === 'number')\n .sort((a, b) => b.capturedAt - a.capturedAt)\n return sorted[0]?.metrics[metric] ?? null\n}\n\nfunction pearsonR(a: number[], b: number[]): number {\n if (a.length !== b.length || a.length < 2) return Number.NaN\n const ma = a.reduce((s, v) => s + v, 0) / a.length\n const mb = b.reduce((s, v) => s + v, 0) / b.length\n let num = 0, da = 0, db = 0\n for (let i = 0; i < a.length; i++) {\n const xa = a[i]! - ma\n const xb = b[i]! - mb\n num += xa * xb; da += xa * xa; db += xb * xb\n }\n if (da === 0 || db === 0) return da === 0 && db === 0 ? 1 : 0\n return num / Math.sqrt(da * db)\n}\n\nfunction rankWithTies(xs: number[]): number[] {\n const indexed = xs.map((v, i) => ({ v, i })).sort((a, b) => a.v - b.v)\n const r = new Array<number>(xs.length)\n for (let i = 0; i < indexed.length; ) {\n let j = i\n while (j + 1 < indexed.length && indexed[j + 1]!.v === indexed[i]!.v) j++\n const avg = (i + j + 2) / 2\n for (let k = i; k <= j; k++) r[indexed[k]!.i] = avg\n i = j + 1\n }\n return r\n}\n\nfunction bootstrapCi(\n xs: number[],\n ys: number[],\n iterations: number,\n rng: () => number,\n): { low: number; high: number } {\n const n = xs.length\n if (n < 3) return { low: Number.NaN, high: Number.NaN }\n const samples: number[] = []\n for (let b = 0; b < iterations; b++) {\n const rx = new Array<number>(n)\n const ry = new Array<number>(n)\n for (let i = 0; i < n; i++) {\n const idx = Math.floor(rng() * n)\n rx[i] = xs[idx]!\n ry[i] = ys[idx]!\n }\n const r = pearsonR(rx, ry)\n if (Number.isFinite(r)) samples.push(r)\n }\n samples.sort((a, b) => a - b)\n if (samples.length === 0) return { low: Number.NaN, high: Number.NaN }\n return {\n low: samples[Math.floor(0.025 * samples.length)]!,\n high: samples[Math.min(samples.length - 1, Math.floor(0.975 * samples.length))]!,\n }\n}\n\nfunction makeRng(seed?: number): () => number {\n if (seed === undefined) return Math.random\n let s = seed >>> 0\n return () => {\n s = (s + 0x6D2B79F5) >>> 0\n let t = s\n t = Math.imul(t ^ (t >>> 15), t | 1)\n t ^= t + Math.imul(t ^ (t >>> 7), t | 61)\n return ((t ^ (t >>> 14)) >>> 0) / 4294967296\n }\n}\n","/**\n * Always-valid sequential evaluation.\n *\n * `researchReport` (0.21+) assumes a single pre-specified analysis. Real\n * consumers run campaigns weekly / nightly / per-PR; each new run silently\n * inflates the false-discovery rate, because the BH-FDR guarantee was for\n * the *first* look, not the 47th. Without time-uniform inference,\n * launch-decision teams either (a) don't peek, which forfeits the cost\n * advantage of stop-when-decisive, or (b) peek and pretend they didn't,\n * which forfeits scientific validity.\n *\n * This module ships **e-value-based confidence sequences** for paired\n * bounded outcomes. The methodology is the predictable plug-in betting\n * martingale of Waudby-Smith & Ramdas (2024) — provably valid at *any*\n * stopping time. Concretely:\n *\n * For paired deltas D_1, D_2, … ∈ [-c, c] with the null H_0: E[D] ≤ 0,\n * a betting fraction λ_i is chosen using only D_{1..i-1} (predictable\n * plug-in), and the running e-value is\n *\n * E_t = ∏_{i=1}^{t} (1 + λ_i · D_i)\n *\n * E_t is a non-negative martingale under H_0 with E[E_t] ≤ 1, so by\n * Ville's inequality, P(∃ t : E_t ≥ 1/α) ≤ α — we can reject the null\n * at any time without inflating the type-I error.\n *\n * Combined with `runEvalCampaign`, every consumer running rolling\n * campaigns gains the ability to ship the moment evidence is decisive,\n * stop-early on dead-on-arrival variants, and accumulate evidence across\n * partial runs without spending the FDR budget. No new sweep is wasted.\n *\n * References:\n * - Howard, S. R., Ramdas, A., McAuliffe, J., Sekhon, J. (2021).\n * Time-uniform, nonparametric, nonasymptotic confidence sequences.\n * Annals of Statistics, 49(2), 1055–1080.\n * - Waudby-Smith, I., Ramdas, A. (2024). Estimating means of bounded\n * random variables by betting. JRSS B, 86(1), 1–27.\n */\n\nexport type SequentialDecision = 'promote_now' | 'continue' | 'reject_now' | 'equivalent'\n\nexport interface PairedEvalueOptions {\n /**\n * Bound on |delta|. Default 1 (matching most score scales). Must satisfy\n * c > 0; deltas outside [-c, c] are clipped with a warning attached to\n * the return value.\n */\n bound?: number\n /** Target Type-I error. Default 0.05. */\n alpha?: number\n /**\n * Region of Practical Equivalence on the *mean* paired delta. When\n * supplied, the verdict can return `'equivalent'` once the running\n * confidence sequence on the mean is fully contained in [low, high].\n */\n rope?: { low: number; high: number }\n /** Initial bet shrinkage (0 < scale ≤ 1). Default 0.5 — empirically robust. */\n initialBetShrinkage?: number\n}\n\nexport interface PairedEvalueStep {\n /** 1-indexed observation count. */\n t: number\n delta: number\n /** Running e-value E_t = ∏ (1 + λ_i · D_i). */\n evalue: number\n /** Time-uniform p-value at stopping time t. */\n pValue: number\n /** Lower bound of the empirical Bernstein confidence sequence at level 1-α. */\n csLow: number\n csHigh: number\n /** Verdict at this stopping time. */\n decision: SequentialDecision\n}\n\nexport interface PairedEvalueSequence {\n steps: PairedEvalueStep[]\n /** The decision at the final step. */\n finalDecision: SequentialDecision\n /** Index (1-based) at which a non-`continue` decision first fired, or null. */\n decisionFiredAt: number | null\n /** True if any deltas were clipped to [-bound, bound]. */\n clipped: boolean\n}\n\n/**\n * Run the paired e-value sequence over an in-order delta stream.\n *\n * Use for *streaming* / interim analyses: pass the deltas you have so\n * far, get the verdict at every prefix length. The decision is\n * monotone-stable in the sense that once `'reject_now'` or `'promote_now'`\n * fires, the verdict at later steps remains decisive (the e-value is a\n * non-negative martingale; once it crosses the threshold, it's crossed).\n */\nexport function pairedEvalueSequence(\n deltas: number[],\n opts: PairedEvalueOptions = {},\n): PairedEvalueSequence {\n const c = opts.bound ?? 1\n const alpha = opts.alpha ?? 0.05\n const initialShrink = opts.initialBetShrinkage ?? 0.5\n const rope = opts.rope ?? null\n if (c <= 0) throw new Error('pairedEvalueSequence: bound must be > 0')\n if (alpha <= 0 || alpha >= 1) throw new Error('pairedEvalueSequence: alpha must be in (0,1)')\n if (rope && !(Number.isFinite(rope.low) && Number.isFinite(rope.high) && rope.low <= rope.high)) {\n throw new Error('pairedEvalueSequence: rope must satisfy low ≤ high')\n }\n\n const steps: PairedEvalueStep[] = []\n let clipped = false\n let evalue = 1\n let decisionFiredAt: number | null = null\n\n // Running statistics (using only D_{1..i-1} for the bet → predictable plug-in).\n let sum = 0\n let sumSq = 0\n let count = 0\n\n for (let i = 0; i < deltas.length; i++) {\n let d = deltas[i]!\n if (d < -c || d > c) {\n d = Math.max(-c, Math.min(c, d))\n clipped = true\n }\n\n // Predictable plug-in bet (positive λ tests for E[D] > 0; we run a two-sided\n // test by tracking the symmetric e-value via |bet|).\n // λ_i ∝ mean / (variance + bound^2). Shrink early to avoid overbetting.\n const muHat = count === 0 ? 0 : sum / count\n const varHat = count === 0 ? c * c : Math.max(1e-12, sumSq / count - muHat * muHat)\n const t = i + 1\n const shrink = initialShrink * Math.min(1, count / 32) // anneal toward 1\n let lambda = (muHat / (varHat + c * c)) * shrink\n // Clip to ensure 1 + λ·D > 0 for all |D| ≤ c (so the e-value stays non-negative).\n const lambdaMax = 0.99 / c\n if (lambda > lambdaMax) lambda = lambdaMax\n if (lambda < -lambdaMax) lambda = -lambdaMax\n\n evalue = evalue * (1 + lambda * d)\n if (!Number.isFinite(evalue) || evalue < 0) evalue = 0\n\n sum += d\n sumSq += d * d\n count += 1\n\n const pValue = Math.min(1, 1 / Math.max(evalue, 1e-300))\n\n // Empirical Bernstein confidence sequence on the mean. Howard et al.\n // (2021), Theorem 4.4 with σ̂² the running sample variance and a\n // calibration constant tuned for two-sided coverage at level 1 - α.\n const cs = empiricalBernsteinCs(sum, sumSq, count, c, alpha)\n\n let decision: SequentialDecision = 'continue'\n if (rope && cs.low >= rope.low && cs.high <= rope.high) decision = 'equivalent'\n else if (evalue >= 2 / alpha && muHat > 0) decision = 'promote_now'\n else if (evalue >= 2 / alpha && muHat < 0) decision = 'reject_now'\n else if (rope && cs.high < rope.low) decision = 'reject_now'\n\n if (decision !== 'continue' && decisionFiredAt === null) decisionFiredAt = t\n\n steps.push({ t, delta: d, evalue, pValue, csLow: cs.low, csHigh: cs.high, decision })\n }\n\n const finalDecision = steps.length === 0 ? 'continue' : steps[steps.length - 1]!.decision\n return { steps, finalDecision, decisionFiredAt, clipped }\n}\n\nexport interface InterimReleaseConfidenceInput {\n /**\n * One delta series per candidate (paired deltas vs comparator). Order\n * within a series is the order the campaigns were run.\n */\n deltaSeries: Array<{ candidateId: string; deltas: number[] }>\n alpha?: number\n bound?: number\n rope?: { low: number; high: number }\n}\n\nexport interface InterimReleaseConfidence {\n candidates: Array<{\n candidateId: string\n decision: SequentialDecision\n decisionFiredAt: number | null\n finalEvalue: number\n finalPValue: number\n pairs: number\n csLow: number\n csHigh: number\n }>\n /**\n * Campaign-level recommendation: pick the strongest 'promote_now', else\n * 'continue' if any candidate is still live, else 'reject_now' if every\n * candidate is dead, else 'equivalent'.\n */\n recommendation: { decision: SequentialDecision; candidateId: string | null }\n}\n\n/**\n * Run interim sequential analyses across many candidates at once,\n * preserving the time-uniform α guarantee for each candidate's series and\n * synthesising a campaign-level recommendation. Designed to be called on\n * every campaign tick — the recommendation is anytime-valid.\n */\nexport function evaluateInterimReleaseConfidence(\n input: InterimReleaseConfidenceInput,\n): InterimReleaseConfidence {\n const candidates = input.deltaSeries.map((s) => {\n const seq = pairedEvalueSequence(s.deltas, {\n alpha: input.alpha,\n bound: input.bound,\n rope: input.rope,\n })\n const last = seq.steps[seq.steps.length - 1]\n return {\n candidateId: s.candidateId,\n decision: seq.finalDecision,\n decisionFiredAt: seq.decisionFiredAt,\n finalEvalue: last?.evalue ?? 1,\n finalPValue: last?.pValue ?? 1,\n pairs: seq.steps.length,\n csLow: last?.csLow ?? Number.NEGATIVE_INFINITY,\n csHigh: last?.csHigh ?? Number.POSITIVE_INFINITY,\n }\n })\n\n const promote = candidates.find((c) => c.decision === 'promote_now')\n if (promote) return { candidates, recommendation: { decision: 'promote_now', candidateId: promote.candidateId } }\n const live = candidates.find((c) => c.decision === 'continue')\n if (live) return { candidates, recommendation: { decision: 'continue', candidateId: null } }\n const equiv = candidates.find((c) => c.decision === 'equivalent')\n if (equiv) return { candidates, recommendation: { decision: 'equivalent', candidateId: equiv.candidateId } }\n return { candidates, recommendation: { decision: 'reject_now', candidateId: null } }\n}\n\n// ── Internals ────────────────────────────────────────────────────────────\n\n/**\n * Empirical Bernstein confidence sequence on the mean of bounded variables.\n * Adapted from Howard et al. (2021) §4.4. Provides a time-uniform CI on\n * the running mean; valid at every stopping time.\n */\nfunction empiricalBernsteinCs(\n sum: number,\n sumSq: number,\n n: number,\n bound: number,\n alpha: number,\n): { low: number; high: number } {\n if (n === 0) return { low: -bound, high: bound }\n const mean = sum / n\n const variance = Math.max(0, sumSq / n - mean * mean)\n // Iterated-log calibration constant. The 1.7 exponent matches the\n // recommended choice in Howard et al. for two-sided coverage at level\n // 1 - α with mild log-corrections; tightening further requires a\n // tuned mixture and is out of scope.\n const psi = Math.log(2 / alpha) + 1.7 * Math.log(Math.log(Math.max(Math.E, n)) + 1)\n const radius = Math.sqrt((2 * variance * psi) / n) + (3 * bound * psi) / n\n return { low: mean - radius, high: mean + radius }\n}\n"],"mappings":";AAuGA,eAAsB,yBACpB,OACyC;AACzC,QAAM,aAAa,MAAM,cAAc;AACvC,QAAM,YAAY,MAAM,aAAa;AACrC,QAAM,YAAY,MAAM,sBAAsB;AAC9C,QAAM,MAAM,QAAQ,MAAM,IAAI;AAE9B,QAAM,WAAW,MAAM,MAAM,SAAS,KAAK;AAC3C,QAAM,gBAAgB,oBAAI,IAAiC;AAC3D,aAAW,KAAK,UAAU;AACxB,UAAM,MAAM,cAAc,IAAI,EAAE,KAAK,KAAK,CAAC;AAC3C,QAAI,KAAK,CAAC;AACV,kBAAc,IAAI,EAAE,OAAO,GAAG;AAAA,EAChC;AAIA,QAAM,kBAAkB,oBAAI,IAAY;AACxC,aAAW,KAAK,MAAM,MAAM;AAC1B,eAAW,KAAK,OAAO,KAAK,EAAE,QAAQ,GAAG,EAAG,iBAAgB,IAAI,CAAC;AAAA,EACnE;AACA,QAAM,UAAU,MAAM,WAAW,CAAC,GAAG,eAAe;AAIpD,QAAM,UAAoB,CAAC;AAC3B,aAAW,KAAK,SAAS;AACvB,eAAW,KAAK,MAAM,gBAAgB;AACpC,cAAQ,KAAK,EAAE,QAAQ,GAAG,SAAS,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,EAAE,CAAC;AAAA,IACxD;AAAA,EACF;AAEA,MAAI,SAAS;AACb,MAAI,UAAU;AACd,aAAW,OAAO,MAAM,MAAM;AAC5B,UAAM,KAAK,cAAc,IAAI,IAAI,KAAK;AACtC,QAAI,CAAC,MAAM,GAAG,WAAW,GAAG;AAAE;AAAW;AAAA,IAAS;AAClD,QAAI,gBAAgB;AACpB,eAAW,KAAK,SAAS;AACvB,YAAM,IAAI,IAAI,QAAQ,IAAI,CAAC;AAC3B,UAAI,OAAO,MAAM,YAAY,CAAC,OAAO,SAAS,CAAC,EAAG;AAClD,iBAAW,KAAK,MAAM,gBAAgB;AACpC,cAAM,SAAS,GACZ,IAAI,CAAC,QAAQ,IAAI,QAAQ,CAAC,CAAC,EAC3B,OAAO,CAAC,MAAmB,OAAO,MAAM,YAAY,OAAO,SAAS,CAAC,CAAC;AACzE,YAAI,OAAO,WAAW,EAAG;AACzB,cAAM,IAAI,OAAO,QAAQ,IAAI,GAAG,SAAS;AACzC,YAAI,MAAM,KAAM;AAChB,cAAM,SAAS,QAAQ,KAAK,CAAC,MAAM,EAAE,WAAW,KAAK,EAAE,YAAY,CAAC;AACpE,eAAO,GAAG,KAAK,CAAC;AAChB,eAAO,GAAG,KAAK,CAAC;AAChB,wBAAgB;AAAA,MAClB;AAAA,IACF;AACA,QAAI,cAAe;AAAA,EACrB;AAEA,QAAM,QAA6B,CAAC;AACpC,aAAW,KAAK,SAAS;AACvB,QAAI,EAAE,GAAG,SAAS,WAAY;AAC9B,UAAM,UAAU,SAAS,EAAE,IAAI,EAAE,EAAE;AACnC,UAAM,WAAW,SAAS,aAAa,EAAE,EAAE,GAAG,aAAa,EAAE,EAAE,CAAC;AAChE,UAAM,KAAK,YAAY,EAAE,IAAI,EAAE,IAAI,WAAW,GAAG;AACjD,UAAM,UACJ,KAAK,IAAI,QAAQ,KAAK,MAAM,iBAC1B,KAAK,IAAI,QAAQ,KAAK,MAAM,gBAC5B;AACJ,UAAM,KAAK;AAAA,MACT,QAAQ,EAAE;AAAA,MAAQ,SAAS,EAAE;AAAA,MAAS,GAAG,EAAE,GAAG;AAAA,MAC9C;AAAA,MAAS;AAAA,MAAU,MAAM;AAAA,MAAI;AAAA,IAC/B,CAAC;AAAA,EACH;AAEA,QAAM,WAAW,oBAAI,IAAiC;AACtD,aAAW,KAAK,OAAO;AACrB,UAAM,MAAM,SAAS,IAAI,EAAE,MAAM,KAAK,CAAC;AACvC,QAAI,KAAK,CAAC;AACV,aAAS,IAAI,EAAE,QAAQ,GAAG;AAAA,EAC5B;AACA,QAAM,SAA0B,CAAC,GAAG,SAAS,QAAQ,CAAC,EACnD,IAAI,CAAC,CAAC,QAAQ,EAAE,MAAM;AACrB,UAAM,OAAO,GAAG,OAAO,CAAC,GAAG,MAAO,KAAK,IAAI,EAAE,QAAQ,IAAI,KAAK,IAAI,EAAE,QAAQ,IAAI,IAAI,CAAE;AACtF,WAAO;AAAA,MACL;AAAA,MACA,aAAa,KAAK;AAAA,MAClB,UAAU,KAAK;AAAA,MACf,SAAS,KAAK;AAAA,MACd,GAAG,KAAK;AAAA,MACR,SAAS,KAAK;AAAA,IAChB;AAAA,EACF,CAAC,EACA,KAAK,CAAC,GAAG,MAAM,KAAK,IAAI,EAAE,QAAQ,IAAI,KAAK,IAAI,EAAE,QAAQ,CAAC;AAE7D,QAAM,qBAAqB,QAAQ,OAAO,CAAC,MAAM,CAAC,SAAS,IAAI,CAAC,CAAC;AAEjE,SAAO,EAAE,OAAO,QAAQ,eAAe,QAAQ,aAAa,SAAS,mBAAmB;AAC1F;AAIA,SAAS,OACP,QACA,UACA,QACA,MACe;AACf,MAAI,OAAO,WAAW,EAAG,QAAO;AAChC,MAAI,SAAS,OAAQ,QAAO,OAAO,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,OAAO;AACvE,MAAI,SAAS,MAAO,QAAO,KAAK,IAAI,GAAG,MAAM;AAE7C,QAAM,SAAS,CAAC,GAAG,QAAQ,EACxB,OAAO,CAAC,MAAM,OAAO,EAAE,QAAQ,MAAM,MAAM,QAAQ,EACnD,KAAK,CAAC,GAAG,MAAM,EAAE,aAAa,EAAE,UAAU;AAC7C,SAAO,OAAO,CAAC,GAAG,QAAQ,MAAM,KAAK;AACvC;AAEA,SAAS,SAAS,GAAa,GAAqB;AAClD,MAAI,EAAE,WAAW,EAAE,UAAU,EAAE,SAAS,EAAG,QAAO,OAAO;AACzD,QAAM,KAAK,EAAE,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE;AAC5C,QAAM,KAAK,EAAE,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE;AAC5C,MAAI,MAAM,GAAG,KAAK,GAAG,KAAK;AAC1B,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK;AACjC,UAAM,KAAK,EAAE,CAAC,IAAK;AACnB,UAAM,KAAK,EAAE,CAAC,IAAK;AACnB,WAAO,KAAK;AAAI,UAAM,KAAK;AAAI,UAAM,KAAK;AAAA,EAC5C;AACA,MAAI,OAAO,KAAK,OAAO,EAAG,QAAO,OAAO,KAAK,OAAO,IAAI,IAAI;AAC5D,SAAO,MAAM,KAAK,KAAK,KAAK,EAAE;AAChC;AAEA,SAAS,aAAa,IAAwB;AAC5C,QAAM,UAAU,GAAG,IAAI,CAAC,GAAG,OAAO,EAAE,GAAG,EAAE,EAAE,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,IAAI,EAAE,CAAC;AACrE,QAAM,IAAI,IAAI,MAAc,GAAG,MAAM;AACrC,WAAS,IAAI,GAAG,IAAI,QAAQ,UAAU;AACpC,QAAI,IAAI;AACR,WAAO,IAAI,IAAI,QAAQ,UAAU,QAAQ,IAAI,CAAC,EAAG,MAAM,QAAQ,CAAC,EAAG,EAAG;AACtE,UAAM,OAAO,IAAI,IAAI,KAAK;AAC1B,aAAS,IAAI,GAAG,KAAK,GAAG,IAAK,GAAE,QAAQ,CAAC,EAAG,CAAC,IAAI;AAChD,QAAI,IAAI;AAAA,EACV;AACA,SAAO;AACT;AAEA,SAAS,YACP,IACA,IACA,YACA,KAC+B;AAC/B,QAAM,IAAI,GAAG;AACb,MAAI,IAAI,EAAG,QAAO,EAAE,KAAK,OAAO,KAAK,MAAM,OAAO,IAAI;AACtD,QAAM,UAAoB,CAAC;AAC3B,WAAS,IAAI,GAAG,IAAI,YAAY,KAAK;AACnC,UAAM,KAAK,IAAI,MAAc,CAAC;AAC9B,UAAM,KAAK,IAAI,MAAc,CAAC;AAC9B,aAAS,IAAI,GAAG,IAAI,GAAG,KAAK;AAC1B,YAAM,MAAM,KAAK,MAAM,IAAI,IAAI,CAAC;AAChC,SAAG,CAAC,IAAI,GAAG,GAAG;AACd,SAAG,CAAC,IAAI,GAAG,GAAG;AAAA,IAChB;AACA,UAAM,IAAI,SAAS,IAAI,EAAE;AACzB,QAAI,OAAO,SAAS,CAAC,EAAG,SAAQ,KAAK,CAAC;AAAA,EACxC;AACA,UAAQ,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAC5B,MAAI,QAAQ,WAAW,EAAG,QAAO,EAAE,KAAK,OAAO,KAAK,MAAM,OAAO,IAAI;AACrE,SAAO;AAAA,IACL,KAAK,QAAQ,KAAK,MAAM,QAAQ,QAAQ,MAAM,CAAC;AAAA,IAC/C,MAAM,QAAQ,KAAK,IAAI,QAAQ,SAAS,GAAG,KAAK,MAAM,QAAQ,QAAQ,MAAM,CAAC,CAAC;AAAA,EAChF;AACF;AAEA,SAAS,QAAQ,MAA6B;AAC5C,MAAI,SAAS,OAAW,QAAO,KAAK;AACpC,MAAI,IAAI,SAAS;AACjB,SAAO,MAAM;AACX,QAAK,IAAI,eAAgB;AACzB,QAAI,IAAI;AACR,QAAI,KAAK,KAAK,IAAK,MAAM,IAAK,IAAI,CAAC;AACnC,SAAK,IAAI,KAAK,KAAK,IAAK,MAAM,GAAI,IAAI,EAAE;AACxC,aAAS,IAAK,MAAM,QAAS,KAAK;AAAA,EACpC;AACF;;;AC/LO,SAAS,qBACd,QACA,OAA4B,CAAC,GACP;AACtB,QAAM,IAAI,KAAK,SAAS;AACxB,QAAM,QAAQ,KAAK,SAAS;AAC5B,QAAM,gBAAgB,KAAK,uBAAuB;AAClD,QAAM,OAAO,KAAK,QAAQ;AAC1B,MAAI,KAAK,EAAG,OAAM,IAAI,MAAM,yCAAyC;AACrE,MAAI,SAAS,KAAK,SAAS,EAAG,OAAM,IAAI,MAAM,8CAA8C;AAC5F,MAAI,QAAQ,EAAE,OAAO,SAAS,KAAK,GAAG,KAAK,OAAO,SAAS,KAAK,IAAI,KAAK,KAAK,OAAO,KAAK,OAAO;AAC/F,UAAM,IAAI,MAAM,yDAAoD;AAAA,EACtE;AAEA,QAAM,QAA4B,CAAC;AACnC,MAAI,UAAU;AACd,MAAI,SAAS;AACb,MAAI,kBAAiC;AAGrC,MAAI,MAAM;AACV,MAAI,QAAQ;AACZ,MAAI,QAAQ;AAEZ,WAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACtC,QAAI,IAAI,OAAO,CAAC;AAChB,QAAI,IAAI,CAAC,KAAK,IAAI,GAAG;AACnB,UAAI,KAAK,IAAI,CAAC,GAAG,KAAK,IAAI,GAAG,CAAC,CAAC;AAC/B,gBAAU;AAAA,IACZ;AAKA,UAAM,QAAQ,UAAU,IAAI,IAAI,MAAM;AACtC,UAAM,SAAS,UAAU,IAAI,IAAI,IAAI,KAAK,IAAI,OAAO,QAAQ,QAAQ,QAAQ,KAAK;AAClF,UAAM,IAAI,IAAI;AACd,UAAM,SAAS,gBAAgB,KAAK,IAAI,GAAG,QAAQ,EAAE;AACrD,QAAI,SAAU,SAAS,SAAS,IAAI,KAAM;AAE1C,UAAM,YAAY,OAAO;AACzB,QAAI,SAAS,UAAW,UAAS;AACjC,QAAI,SAAS,CAAC,UAAW,UAAS,CAAC;AAEnC,aAAS,UAAU,IAAI,SAAS;AAChC,QAAI,CAAC,OAAO,SAAS,MAAM,KAAK,SAAS,EAAG,UAAS;AAErD,WAAO;AACP,aAAS,IAAI;AACb,aAAS;AAET,UAAM,SAAS,KAAK,IAAI,GAAG,IAAI,KAAK,IAAI,QAAQ,MAAM,CAAC;AAKvD,UAAM,KAAK,qBAAqB,KAAK,OAAO,OAAO,GAAG,KAAK;AAE3D,QAAI,WAA+B;AACnC,QAAI,QAAQ,GAAG,OAAO,KAAK,OAAO,GAAG,QAAQ,KAAK,KAAM,YAAW;AAAA,aAC1D,UAAU,IAAI,SAAS,QAAQ,EAAG,YAAW;AAAA,aAC7C,UAAU,IAAI,SAAS,QAAQ,EAAG,YAAW;AAAA,aAC7C,QAAQ,GAAG,OAAO,KAAK,IAAK,YAAW;AAEhD,QAAI,aAAa,cAAc,oBAAoB,KAAM,mBAAkB;AAE3E,UAAM,KAAK,EAAE,GAAG,OAAO,GAAG,QAAQ,QAAQ,OAAO,GAAG,KAAK,QAAQ,GAAG,MAAM,SAAS,CAAC;AAAA,EACtF;AAEA,QAAM,gBAAgB,MAAM,WAAW,IAAI,aAAa,MAAM,MAAM,SAAS,CAAC,EAAG;AACjF,SAAO,EAAE,OAAO,eAAe,iBAAiB,QAAQ;AAC1D;AAsCO,SAAS,iCACd,OAC0B;AAC1B,QAAM,aAAa,MAAM,YAAY,IAAI,CAAC,MAAM;AAC9C,UAAM,MAAM,qBAAqB,EAAE,QAAQ;AAAA,MACzC,OAAO,MAAM;AAAA,MACb,OAAO,MAAM;AAAA,MACb,MAAM,MAAM;AAAA,IACd,CAAC;AACD,UAAM,OAAO,IAAI,MAAM,IAAI,MAAM,SAAS,CAAC;AAC3C,WAAO;AAAA,MACL,aAAa,EAAE;AAAA,MACf,UAAU,IAAI;AAAA,MACd,iBAAiB,IAAI;AAAA,MACrB,aAAa,MAAM,UAAU;AAAA,MAC7B,aAAa,MAAM,UAAU;AAAA,MAC7B,OAAO,IAAI,MAAM;AAAA,MACjB,OAAO,MAAM,SAAS,OAAO;AAAA,MAC7B,QAAQ,MAAM,UAAU,OAAO;AAAA,IACjC;AAAA,EACF,CAAC;AAED,QAAM,UAAU,WAAW,KAAK,CAAC,MAAM,EAAE,aAAa,aAAa;AACnE,MAAI,QAAS,QAAO,EAAE,YAAY,gBAAgB,EAAE,UAAU,eAAe,aAAa,QAAQ,YAAY,EAAE;AAChH,QAAM,OAAO,WAAW,KAAK,CAAC,MAAM,EAAE,aAAa,UAAU;AAC7D,MAAI,KAAM,QAAO,EAAE,YAAY,gBAAgB,EAAE,UAAU,YAAY,aAAa,KAAK,EAAE;AAC3F,QAAM,QAAQ,WAAW,KAAK,CAAC,MAAM,EAAE,aAAa,YAAY;AAChE,MAAI,MAAO,QAAO,EAAE,YAAY,gBAAgB,EAAE,UAAU,cAAc,aAAa,MAAM,YAAY,EAAE;AAC3G,SAAO,EAAE,YAAY,gBAAgB,EAAE,UAAU,cAAc,aAAa,KAAK,EAAE;AACrF;AASA,SAAS,qBACP,KACA,OACA,GACA,OACA,OAC+B;AAC/B,MAAI,MAAM,EAAG,QAAO,EAAE,KAAK,CAAC,OAAO,MAAM,MAAM;AAC/C,QAAM,OAAO,MAAM;AACnB,QAAM,WAAW,KAAK,IAAI,GAAG,QAAQ,IAAI,OAAO,IAAI;AAKpD,QAAM,MAAM,KAAK,IAAI,IAAI,KAAK,IAAI,MAAM,KAAK,IAAI,KAAK,IAAI,KAAK,IAAI,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC;AAClF,QAAM,SAAS,KAAK,KAAM,IAAI,WAAW,MAAO,CAAC,IAAK,IAAI,QAAQ,MAAO;AACzE,SAAO,EAAE,KAAK,OAAO,QAAQ,MAAM,OAAO,OAAO;AACnD;","names":[]}
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/eval-campaign.ts"],"sourcesContent":["/**\n * EvalCampaign — opinionated matrix runner that wires the four\n * capture-integrity directives by construction.\n *\n * Every consumer that ran a launch-grade benchmark before 0.22 reinvented\n * the same shape: matrix runner → for each (variant, scenario, seed) →\n * start a TraceEmitter → call LLMs → end the run → maybe analyze.\n * The bug class blueprint-agent reported (raw events not captured, route\n * silently wrong, integrity not asserted, analyst never ran) lives at the\n * integration boundary — not the agent-eval API surface. The four\n * directives in `SKILL.md § Capture integrity` are mitigations.\n *\n * `EvalCampaign` is the structural fix. Consumers don't wire the integrity\n * surface anymore; the campaign owns it. Specifically, the campaign:\n *\n * - calls `assertLlmRoute` once at preflight before any work runs\n * - constructs a per-run `TraceStore` and `RawProviderSink` via factories\n * - constructs the `TraceEmitter` with `onRunComplete: [analyst hook]`\n * - hands the runner an `LlmClientOptions` pre-wired with the sink and\n * trace context — the runner can't accidentally call an LLM without\n * capturing the raw HTTP envelope\n * - calls `assertRunCaptured` after every `endRun` and routes failures\n * through a configurable policy (`throw` / `mark_failed` / `log`)\n * - assembles per-run `RunRecord`s and runs `researchReport` at the end\n * so the campaign artifact is launch-decision-grade by default\n * - embeds the campaign fingerprint (a SHA-256 over the canonicalised\n * run set) and optional `preregistrationHash` in the report\n *\n * The runner contract is intentionally narrow: produce a `CampaignRunOutcome`\n * given a fully-wired `CampaignRunContext`. Everything orchestration-shaped\n * lives in the campaign. This is the inversion-of-control point — consumers\n * stop writing matrix runners and start writing scenario-runners.\n *\n * Out of scope for v1 (tracked in `docs/research-report-methodology.md`):\n *\n * - Distributed/cluster execution (concurrency is local async)\n * - Adaptive sampling / sequential interim looks\n * - Resume from partial state across crashes\n * - LLM-call retry beyond what `LlmClient` already does\n */\n\nimport { canonicalize, hashJson } from './pre-registration'\nimport { assertLlmRoute, type LlmClientOptions, type LlmRouteRequirements } from './llm-client'\nimport { TraceEmitter } from './trace/emitter'\nimport {\n FileSystemRawProviderSink,\n type RawProviderSink,\n} from './trace/raw-provider-sink'\nimport {\n RunIntegrityError,\n assertRunCaptured,\n type RunIntegrityExpectations,\n type RunIntegrityReport,\n} from './trace/integrity'\nimport type { RunCompleteHook } from './trace/emitter'\nimport type { TraceStore } from './trace/store'\nimport type {\n RunJudgeMetadata,\n RunOutcome,\n RunRecord,\n RunSplitTag,\n RunTokenUsage,\n} from './run-record'\nimport {\n researchReport,\n type ResearchReport,\n type ResearchReportOptions,\n} from './summary-report'\n\n// ── Public types ─────────────────────────────────────────────────────────\n\nexport interface CampaignVariant<V> {\n id: string\n payload: V\n}\n\nexport interface CampaignScenario {\n scenarioId: string\n /** Free-form metadata propagated to runs and reports. */\n tags?: Record<string, string>\n}\n\nexport interface CampaignRunContext<V> {\n /** Stable run id. The campaign generates this; the runner does not. */\n runId: string\n /** Logical experiment id (campaignId by default; overridable per-run via opts). */\n experimentId: string\n variant: V\n variantId: string\n scenarioId: string\n scenarioTags: Record<string, string>\n seed: number\n splitTag: RunSplitTag\n /**\n * The TraceEmitter for this run, with `onRunComplete` hooks pre-wired\n * (analyst auto-execution if configured, plus integrity check). The\n * runner MUST call `emitter.startRun` before doing any work and either\n * `emitter.endRun` or `emitter.abortRun` before returning.\n */\n emitter: TraceEmitter\n store: TraceStore\n rawSink: RawProviderSink\n /**\n * Pre-wired LLM client options — `rawSink` and `traceContext` are populated\n * so any `callLlm(req, ctx.llmOpts)` automatically captures raw HTTP. The\n * runner can spread additional fields if needed.\n */\n llmOpts: LlmClientOptions\n}\n\nexport interface CampaignRunOutcome {\n /** Did the run pass? Mirrors `RunOutcome.pass` semantics. */\n pass: boolean\n /** Score for the run on its split. Maps to `searchScore` or `holdoutScore`. */\n score: number\n /** Mandatory cost in USD. Use 0 + raw.cost_unknown=1 only if truly unknown. */\n costUsd: number\n tokenUsage: RunTokenUsage\n /** Snapshot model id (e.g. `claude-sonnet-4-6@2025-04-15`). */\n model: string\n /** sha256 of the effective prompt sent to the model. */\n promptHash: string\n /** sha256 of the effective config (model, temperature, tools, judges, splits). */\n configHash: string\n /** Optional extra numeric metrics to land in `outcome.raw`. */\n raw?: Record<string, number>\n /** Optional failure-taxonomy tag if the run failed. */\n failureMode?: string\n /** Optional judge metadata when a judge was used. */\n judgeMetadata?: RunJudgeMetadata\n}\n\nexport type CampaignRunner<V> = (ctx: CampaignRunContext<V>) => Promise<CampaignRunOutcome>\n\nexport type CampaignIntegrityPolicy = 'throw' | 'mark_failed' | 'log'\n\nexport interface EvalCampaignOptions<V> {\n /**\n * Stable id for the campaign. Used as the default `experimentId` on\n * every run, and folded into the campaign fingerprint.\n */\n campaignId: string\n variants: CampaignVariant<V>[]\n scenarios: CampaignScenario[]\n /** Default `[0, 1, 2]`. */\n seeds?: number[]\n /** Default `'holdout'` — the split that anchors a launch decision. */\n splitTag?: RunSplitTag\n /** Git SHA the campaign is run against. Mandatory; `RunRecord` rejects unset. */\n commitSha: string\n /**\n * LLM client config. Augmented per-run with `rawSink` and `traceContext`\n * before being passed to the runner. The campaign asserts this config\n * matches `routeRequirements` once at preflight.\n */\n llmOpts: LlmClientOptions\n /**\n * Default `{ requireExplicitBaseUrl: true, requireAuth: true }` — fail\n * loud if the campaign would silently fall back to the public router or\n * run unauthenticated. Override with an empty object to disable.\n */\n routeRequirements?: LlmRouteRequirements\n /**\n * Per-run TraceStore factory. Common shape: a fresh store per run keyed\n * on `runId`. Implementations that share a store across the campaign\n * are valid — the campaign only writes through `emitter`.\n */\n storeFactory: (params: CampaignFactoryParams) => TraceStore\n /**\n * Per-run RawProviderSink factory. Defaults to `FileSystemRawProviderSink`\n * rooted at `${workDir}/raw-events/${runId}` if `workDir` is supplied;\n * otherwise required. Forensic capture is non-negotiable in a campaign\n * run — pass `NoopRawProviderSink` explicitly if you want to opt out.\n */\n rawSinkFactory?: (params: CampaignFactoryParams) => RawProviderSink\n /**\n * Filesystem root for default `rawSinkFactory`. Ignored if\n * `rawSinkFactory` is supplied.\n */\n workDir?: string\n /**\n * Extra `onRunComplete` hooks the campaign appends (after its own\n * integrity-check hook). Pass `traceAnalystOnRunComplete(...)` here.\n */\n onRunComplete?: RunCompleteHook[]\n /**\n * Per-run integrity expectations. Defaults to:\n * `{ llmSpansMin: 1, requireRawCoverageOfLlmSpans: true, requireOutcome: true }`.\n * Override (e.g. `{ llmSpansMin: 0 }`) for runs that don't call LLMs.\n */\n integrity?: RunIntegrityExpectations\n /** Behaviour when integrity fails. Default `'mark_failed'`. */\n onIntegrityFailure?: CampaignIntegrityPolicy\n /**\n * Per-run runner. Receives a fully-wired context; produces an outcome\n * the campaign converts into a `RunRecord`.\n */\n runner: CampaignRunner<V>\n /**\n * If set, the campaign computes `researchReport` at the end. `comparator`\n * is a `variantId`. Other fields are forwarded verbatim.\n */\n report?: { comparator?: string } & Omit<ResearchReportOptions, 'comparator' | 'preregistrationHash' | 'generatedAt'>\n /**\n * Hash of a signed `HypothesisManifest` (see `pre-registration.ts`).\n * Embedded in the campaign fingerprint and the research report.\n */\n preregistrationHash?: string\n /** Local concurrency. Default `1` (sequential). */\n concurrency?: number\n /**\n * Override the time source. Tests pass a mock to make wallMs deterministic.\n */\n now?: () => number\n /** Override the runId generator. Tests pin this. */\n runId?: (params: CampaignFactoryParams) => string\n}\n\nexport interface CampaignFactoryParams {\n campaignId: string\n runId: string\n variantId: string\n scenarioId: string\n seed: number\n}\n\nexport interface FailedRun {\n runId: string\n variantId: string\n scenarioId: string\n seed: number\n reason: string\n error?: string\n}\n\nexport interface EvalCampaignResult {\n campaignId: string\n /** SHA-256 over canonicalised `(variantIds, scenarioIds, seeds, comparator, splitTag, baseUrl, provider, preregistrationHash)`. */\n campaignFingerprint: string\n preregistrationHash: string | null\n /** Successful runs only. Failed runs land in `failedRuns`. */\n runs: RunRecord[]\n /** Integrity reports for every successful run. */\n integrityReports: RunIntegrityReport[]\n failedRuns: FailedRun[]\n /** Computed when `report` is set on options. */\n report?: ResearchReport\n startedAt: string\n endedAt: string\n}\n\n// ── Implementation ───────────────────────────────────────────────────────\n\nconst DEFAULT_INTEGRITY: RunIntegrityExpectations = {\n llmSpansMin: 1,\n requireRawCoverageOfLlmSpans: true,\n requireOutcome: true,\n}\n\nconst DEFAULT_ROUTE: LlmRouteRequirements = {\n requireExplicitBaseUrl: true,\n requireAuth: true,\n}\n\nexport async function runEvalCampaign<V>(opts: EvalCampaignOptions<V>): Promise<EvalCampaignResult> {\n // ── Preflight ──────────────────────────────────────────────────────\n assertLlmRoute(opts.llmOpts, opts.routeRequirements ?? DEFAULT_ROUTE)\n\n if (opts.variants.length === 0) {\n throw new Error('runEvalCampaign: variants must be non-empty.')\n }\n if (opts.scenarios.length === 0) {\n throw new Error('runEvalCampaign: scenarios must be non-empty.')\n }\n const variantIds = new Set<string>()\n for (const v of opts.variants) {\n if (variantIds.has(v.id)) {\n throw new Error(`runEvalCampaign: duplicate variant id \"${v.id}\".`)\n }\n variantIds.add(v.id)\n }\n const scenarioIds = new Set<string>()\n for (const s of opts.scenarios) {\n if (scenarioIds.has(s.scenarioId)) {\n throw new Error(`runEvalCampaign: duplicate scenarioId \"${s.scenarioId}\".`)\n }\n scenarioIds.add(s.scenarioId)\n }\n if (opts.report?.comparator && !variantIds.has(opts.report.comparator)) {\n throw new Error(`runEvalCampaign: report.comparator \"${opts.report.comparator}\" is not a configured variantId.`)\n }\n if (!opts.commitSha) {\n throw new Error('runEvalCampaign: commitSha is required (every RunRecord needs it).')\n }\n\n const seeds = opts.seeds ?? [0, 1, 2]\n const splitTag: RunSplitTag = opts.splitTag ?? 'holdout'\n const concurrency = Math.max(1, opts.concurrency ?? 1)\n const integrity = { ...DEFAULT_INTEGRITY, ...(opts.integrity ?? {}) }\n const onIntegrityFailure: CampaignIntegrityPolicy = opts.onIntegrityFailure ?? 'mark_failed'\n const now = opts.now ?? (() => Date.now())\n const baseUrl = (opts.llmOpts.baseUrl ?? '').replace(/\\/+$/, '')\n const provider = opts.llmOpts.provider ?? null\n const preregistrationHash = opts.preregistrationHash ?? null\n\n const rawSinkFactory = opts.rawSinkFactory ?? defaultRawSinkFactory(opts.workDir)\n\n // ── Fingerprint ────────────────────────────────────────────────────\n const campaignFingerprint = await hashJson(canonicalize({\n campaignId: opts.campaignId,\n variants: opts.variants.map((v) => v.id).sort(),\n scenarios: opts.scenarios.map((s) => s.scenarioId).sort(),\n seeds: [...seeds].sort((a, b) => a - b),\n splitTag,\n comparator: opts.report?.comparator ?? null,\n baseUrl,\n provider,\n preregistrationHash,\n }))\n\n // ── Plan the matrix ────────────────────────────────────────────────\n type Cell = { variant: CampaignVariant<V>; scenario: CampaignScenario; seed: number }\n const cells: Cell[] = []\n for (const variant of opts.variants) {\n for (const scenario of opts.scenarios) {\n for (const seed of seeds) {\n cells.push({ variant, scenario, seed })\n }\n }\n }\n\n const startedAt = new Date(now()).toISOString()\n const runs: RunRecord[] = []\n const integrityReports: RunIntegrityReport[] = []\n const failedRuns: FailedRun[] = []\n\n // ── Execute (bounded-concurrency worker pool) ──────────────────────\n let cursor = 0\n async function worker(): Promise<void> {\n while (true) {\n const i = cursor++\n if (i >= cells.length) return\n const cell = cells[i]!\n try {\n const result = await runOneCell(cell)\n runs.push(result.record)\n integrityReports.push(result.integrity)\n } catch (err) {\n if (err instanceof CellExecutionError) {\n failedRuns.push(err.failed)\n if (err.integrity) integrityReports.push(err.integrity)\n } else {\n // Genuine bug — not a runner failure, not an integrity failure.\n // Surface it; don't silently mask.\n throw err\n }\n }\n }\n }\n\n async function runOneCell(cell: Cell): Promise<{ record: RunRecord; integrity: RunIntegrityReport }> {\n const runId = (opts.runId ?? defaultRunId)({\n campaignId: opts.campaignId,\n runId: '', // unused by default generator\n variantId: cell.variant.id,\n scenarioId: cell.scenario.scenarioId,\n seed: cell.seed,\n })\n const factoryParams: CampaignFactoryParams = {\n campaignId: opts.campaignId,\n runId,\n variantId: cell.variant.id,\n scenarioId: cell.scenario.scenarioId,\n seed: cell.seed,\n }\n const store = opts.storeFactory(factoryParams)\n const rawSink = rawSinkFactory(factoryParams)\n\n const emitter = new TraceEmitter(store, {\n runId,\n now: opts.now,\n onRunComplete: opts.onRunComplete,\n })\n\n const llmOpts: LlmClientOptions = {\n ...opts.llmOpts,\n rawSink,\n traceContext: { runId },\n }\n\n const ctx: CampaignRunContext<V> = {\n runId,\n experimentId: opts.campaignId,\n variant: cell.variant.payload,\n variantId: cell.variant.id,\n scenarioId: cell.scenario.scenarioId,\n scenarioTags: cell.scenario.tags ?? {},\n seed: cell.seed,\n splitTag,\n emitter,\n store,\n rawSink,\n llmOpts,\n }\n\n const wallStart = now()\n let outcome: CampaignRunOutcome\n try {\n outcome = await opts.runner(ctx)\n } catch (err) {\n const message = err instanceof Error ? err.message : String(err)\n // The runner threw mid-execution; give it a chance to have aborted.\n try {\n await emitter.abortRun(message)\n } catch {\n // Already aborted/ended; ignore.\n }\n throw new CellExecutionError({\n runId,\n variantId: cell.variant.id,\n scenarioId: cell.scenario.scenarioId,\n seed: cell.seed,\n reason: 'runner_threw',\n error: message,\n })\n }\n const wallMs = now() - wallStart\n\n const integrityReport = await assertRunCaptured(store, runId, { ...integrity, rawSink })\n if (!integrityReport.ok) {\n switch (onIntegrityFailure) {\n case 'throw':\n throw new RunIntegrityError(integrityReport)\n case 'mark_failed':\n throw new CellExecutionError(\n {\n runId,\n variantId: cell.variant.id,\n scenarioId: cell.scenario.scenarioId,\n seed: cell.seed,\n reason: 'integrity_failed',\n error: integrityReport.issues.map((i) => i.code).join(', '),\n },\n integrityReport,\n )\n case 'log':\n // Caller wants the run admitted with a flagged report; fall through.\n break\n }\n }\n\n const recordOutcome: RunOutcome = {\n raw: outcome.raw ?? {},\n }\n if (splitTag === 'holdout') recordOutcome.holdoutScore = outcome.score\n else recordOutcome.searchScore = outcome.score\n\n const record: RunRecord = {\n runId,\n experimentId: opts.campaignId,\n candidateId: cell.variant.id,\n seed: cell.seed,\n model: outcome.model,\n promptHash: outcome.promptHash,\n configHash: outcome.configHash,\n commitSha: opts.commitSha,\n wallMs,\n costUsd: outcome.costUsd,\n tokenUsage: outcome.tokenUsage,\n judgeMetadata: outcome.judgeMetadata,\n outcome: recordOutcome,\n failureMode: outcome.failureMode,\n splitTag,\n scenarioId: cell.scenario.scenarioId,\n }\n return { record, integrity: integrityReport }\n }\n\n const workers = Array.from({ length: Math.min(concurrency, cells.length) }, () => worker())\n await Promise.all(workers)\n\n // ── Optional research report ───────────────────────────────────────\n let report: ResearchReport | undefined\n if (opts.report) {\n const reportOpts: ResearchReportOptions = {\n ...opts.report,\n comparator: opts.report.comparator,\n split: splitTag === 'dev' ? 'search' : splitTag,\n generatedAt: new Date(now()).toISOString(),\n preregistrationHash: preregistrationHash ?? undefined,\n }\n report = await researchReport(runs, reportOpts)\n }\n\n const endedAt = new Date(now()).toISOString()\n\n return {\n campaignId: opts.campaignId,\n campaignFingerprint,\n preregistrationHash,\n runs,\n integrityReports,\n failedRuns,\n report,\n startedAt,\n endedAt,\n }\n}\n\n// ── Internal ─────────────────────────────────────────────────────────────\n\nclass CellExecutionError extends Error {\n readonly failed: FailedRun\n readonly integrity?: RunIntegrityReport\n constructor(failed: FailedRun, integrity?: RunIntegrityReport) {\n super(`cell ${failed.variantId}/${failed.scenarioId}@${failed.seed} failed: ${failed.reason}`)\n this.failed = failed\n this.integrity = integrity\n }\n}\n\nfunction defaultRawSinkFactory(workDir: string | undefined) {\n return (params: CampaignFactoryParams): RawProviderSink => {\n if (!workDir) {\n throw new Error(\n 'runEvalCampaign: rawSinkFactory not supplied and workDir not set. Pass either to enable raw provider capture, or pass `new NoopRawProviderSink()` via rawSinkFactory to opt out explicitly.',\n )\n }\n return new FileSystemRawProviderSink({\n dir: `${workDir}/raw-events/${params.runId}`,\n })\n }\n}\n\nfunction defaultRunId(params: CampaignFactoryParams): string {\n // Stable across re-runs: fingerprint of (campaignId, variantId, scenarioId, seed).\n // Caller can override via opts.runId for non-deterministic IDs.\n const base = `${params.campaignId}::${params.variantId}::${params.scenarioId}::${params.seed}`\n // Lightweight hex: we don't need crypto-grade here, just stability + uniqueness.\n let h1 = 0x811c9dc5\n let h2 = 0x12345678\n for (let i = 0; i < base.length; i++) {\n const c = base.charCodeAt(i)\n h1 = Math.imul(h1 ^ c, 0x01000193) >>> 0\n h2 = Math.imul(h2 ^ c, 0x9e3779b1) >>> 0\n }\n return `run-${h1.toString(16).padStart(8, '0')}${h2.toString(16).padStart(8, '0')}`\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;AA6PA,IAAM,oBAA8C;AAAA,EAClD,aAAa;AAAA,EACb,8BAA8B;AAAA,EAC9B,gBAAgB;AAClB;AAEA,IAAM,gBAAsC;AAAA,EAC1C,wBAAwB;AAAA,EACxB,aAAa;AACf;AAEA,eAAsB,gBAAmB,MAA2D;AAElG,iBAAe,KAAK,SAAS,KAAK,qBAAqB,aAAa;AAEpE,MAAI,KAAK,SAAS,WAAW,GAAG;AAC9B,UAAM,IAAI,MAAM,8CAA8C;AAAA,EAChE;AACA,MAAI,KAAK,UAAU,WAAW,GAAG;AAC/B,UAAM,IAAI,MAAM,+CAA+C;AAAA,EACjE;AACA,QAAM,aAAa,oBAAI,IAAY;AACnC,aAAW,KAAK,KAAK,UAAU;AAC7B,QAAI,WAAW,IAAI,EAAE,EAAE,GAAG;AACxB,YAAM,IAAI,MAAM,0CAA0C,EAAE,EAAE,IAAI;AAAA,IACpE;AACA,eAAW,IAAI,EAAE,EAAE;AAAA,EACrB;AACA,QAAM,cAAc,oBAAI,IAAY;AACpC,aAAW,KAAK,KAAK,WAAW;AAC9B,QAAI,YAAY,IAAI,EAAE,UAAU,GAAG;AACjC,YAAM,IAAI,MAAM,0CAA0C,EAAE,UAAU,IAAI;AAAA,IAC5E;AACA,gBAAY,IAAI,EAAE,UAAU;AAAA,EAC9B;AACA,MAAI,KAAK,QAAQ,cAAc,CAAC,WAAW,IAAI,KAAK,OAAO,UAAU,GAAG;AACtE,UAAM,IAAI,MAAM,uCAAuC,KAAK,OAAO,UAAU,kCAAkC;AAAA,EACjH;AACA,MAAI,CAAC,KAAK,WAAW;AACnB,UAAM,IAAI,MAAM,oEAAoE;AAAA,EACtF;AAEA,QAAM,QAAQ,KAAK,SAAS,CAAC,GAAG,GAAG,CAAC;AACpC,QAAM,WAAwB,KAAK,YAAY;AAC/C,QAAM,cAAc,KAAK,IAAI,GAAG,KAAK,eAAe,CAAC;AACrD,QAAM,YAAY,EAAE,GAAG,mBAAmB,GAAI,KAAK,aAAa,CAAC,EAAG;AACpE,QAAM,qBAA8C,KAAK,sBAAsB;AAC/E,QAAM,MAAM,KAAK,QAAQ,MAAM,KAAK,IAAI;AACxC,QAAM,WAAW,KAAK,QAAQ,WAAW,IAAI,QAAQ,QAAQ,EAAE;AAC/D,QAAM,WAAW,KAAK,QAAQ,YAAY;AAC1C,QAAM,sBAAsB,KAAK,uBAAuB;AAExD,QAAM,iBAAiB,KAAK,kBAAkB,sBAAsB,KAAK,OAAO;AAGhF,QAAM,sBAAsB,MAAM,SAAS,aAAa;AAAA,IACtD,YAAY,KAAK;AAAA,IACjB,UAAU,KAAK,SAAS,IAAI,CAAC,MAAM,EAAE,EAAE,EAAE,KAAK;AAAA,IAC9C,WAAW,KAAK,UAAU,IAAI,CAAC,MAAM,EAAE,UAAU,EAAE,KAAK;AAAA,IACxD,OAAO,CAAC,GAAG,KAAK,EAAE,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAAA,IACtC;AAAA,IACA,YAAY,KAAK,QAAQ,cAAc;AAAA,IACvC;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC,CAAC;AAIF,QAAM,QAAgB,CAAC;AACvB,aAAW,WAAW,KAAK,UAAU;AACnC,eAAW,YAAY,KAAK,WAAW;AACrC,iBAAW,QAAQ,OAAO;AACxB,cAAM,KAAK,EAAE,SAAS,UAAU,KAAK,CAAC;AAAA,MACxC;AAAA,IACF;AAAA,EACF;AAEA,QAAM,YAAY,IAAI,KAAK,IAAI,CAAC,EAAE,YAAY;AAC9C,QAAM,OAAoB,CAAC;AAC3B,QAAM,mBAAyC,CAAC;AAChD,QAAM,aAA0B,CAAC;AAGjC,MAAI,SAAS;AACb,iBAAe,SAAwB;AACrC,WAAO,MAAM;AACX,YAAM,IAAI;AACV,UAAI,KAAK,MAAM,OAAQ;AACvB,YAAM,OAAO,MAAM,CAAC;AACpB,UAAI;AACF,cAAM,SAAS,MAAM,WAAW,IAAI;AACpC,aAAK,KAAK,OAAO,MAAM;AACvB,yBAAiB,KAAK,OAAO,SAAS;AAAA,MACxC,SAAS,KAAK;AACZ,YAAI,eAAe,oBAAoB;AACrC,qBAAW,KAAK,IAAI,MAAM;AAC1B,cAAI,IAAI,UAAW,kBAAiB,KAAK,IAAI,SAAS;AAAA,QACxD,OAAO;AAGL,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,iBAAe,WAAW,MAA2E;AACnG,UAAM,SAAS,KAAK,SAAS,cAAc;AAAA,MACzC,YAAY,KAAK;AAAA,MACjB,OAAO;AAAA;AAAA,MACP,WAAW,KAAK,QAAQ;AAAA,MACxB,YAAY,KAAK,SAAS;AAAA,MAC1B,MAAM,KAAK;AAAA,IACb,CAAC;AACD,UAAM,gBAAuC;AAAA,MAC3C,YAAY,KAAK;AAAA,MACjB;AAAA,MACA,WAAW,KAAK,QAAQ;AAAA,MACxB,YAAY,KAAK,SAAS;AAAA,MAC1B,MAAM,KAAK;AAAA,IACb;AACA,UAAM,QAAQ,KAAK,aAAa,aAAa;AAC7C,UAAM,UAAU,eAAe,aAAa;AAE5C,UAAM,UAAU,IAAI,aAAa,OAAO;AAAA,MACtC;AAAA,MACA,KAAK,KAAK;AAAA,MACV,eAAe,KAAK;AAAA,IACtB,CAAC;AAED,UAAM,UAA4B;AAAA,MAChC,GAAG,KAAK;AAAA,MACR;AAAA,MACA,cAAc,EAAE,MAAM;AAAA,IACxB;AAEA,UAAM,MAA6B;AAAA,MACjC;AAAA,MACA,cAAc,KAAK;AAAA,MACnB,SAAS,KAAK,QAAQ;AAAA,MACtB,WAAW,KAAK,QAAQ;AAAA,MACxB,YAAY,KAAK,SAAS;AAAA,MAC1B,cAAc,KAAK,SAAS,QAAQ,CAAC;AAAA,MACrC,MAAM,KAAK;AAAA,MACX;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,UAAM,YAAY,IAAI;AACtB,QAAI;AACJ,QAAI;AACF,gBAAU,MAAM,KAAK,OAAO,GAAG;AAAA,IACjC,SAAS,KAAK;AACZ,YAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAE/D,UAAI;AACF,cAAM,QAAQ,SAAS,OAAO;AAAA,MAChC,QAAQ;AAAA,MAER;AACA,YAAM,IAAI,mBAAmB;AAAA,QAC3B;AAAA,QACA,WAAW,KAAK,QAAQ;AAAA,QACxB,YAAY,KAAK,SAAS;AAAA,QAC1B,MAAM,KAAK;AAAA,QACX,QAAQ;AAAA,QACR,OAAO;AAAA,MACT,CAAC;AAAA,IACH;AACA,UAAM,SAAS,IAAI,IAAI;AAEvB,UAAM,kBAAkB,MAAM,kBAAkB,OAAO,OAAO,EAAE,GAAG,WAAW,QAAQ,CAAC;AACvF,QAAI,CAAC,gBAAgB,IAAI;AACvB,cAAQ,oBAAoB;AAAA,QAC1B,KAAK;AACH,gBAAM,IAAI,kBAAkB,eAAe;AAAA,QAC7C,KAAK;AACH,gBAAM,IAAI;AAAA,YACR;AAAA,cACE;AAAA,cACA,WAAW,KAAK,QAAQ;AAAA,cACxB,YAAY,KAAK,SAAS;AAAA,cAC1B,MAAM,KAAK;AAAA,cACX,QAAQ;AAAA,cACR,OAAO,gBAAgB,OAAO,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,IAAI;AAAA,YAC5D;AAAA,YACA;AAAA,UACF;AAAA,QACF,KAAK;AAEH;AAAA,MACJ;AAAA,IACF;AAEA,UAAM,gBAA4B;AAAA,MAChC,KAAK,QAAQ,OAAO,CAAC;AAAA,IACvB;AACA,QAAI,aAAa,UAAW,eAAc,eAAe,QAAQ;AAAA,QAC5D,eAAc,cAAc,QAAQ;AAEzC,UAAM,SAAoB;AAAA,MACxB;AAAA,MACA,cAAc,KAAK;AAAA,MACnB,aAAa,KAAK,QAAQ;AAAA,MAC1B,MAAM,KAAK;AAAA,MACX,OAAO,QAAQ;AAAA,MACf,YAAY,QAAQ;AAAA,MACpB,YAAY,QAAQ;AAAA,MACpB,WAAW,KAAK;AAAA,MAChB;AAAA,MACA,SAAS,QAAQ;AAAA,MACjB,YAAY,QAAQ;AAAA,MACpB,eAAe,QAAQ;AAAA,MACvB,SAAS;AAAA,MACT,aAAa,QAAQ;AAAA,MACrB;AAAA,MACA,YAAY,KAAK,SAAS;AAAA,IAC5B;AACA,WAAO,EAAE,QAAQ,WAAW,gBAAgB;AAAA,EAC9C;AAEA,QAAM,UAAU,MAAM,KAAK,EAAE,QAAQ,KAAK,IAAI,aAAa,MAAM,MAAM,EAAE,GAAG,MAAM,OAAO,CAAC;AAC1F,QAAM,QAAQ,IAAI,OAAO;AAGzB,MAAI;AACJ,MAAI,KAAK,QAAQ;AACf,UAAM,aAAoC;AAAA,MACxC,GAAG,KAAK;AAAA,MACR,YAAY,KAAK,OAAO;AAAA,MACxB,OAAO,aAAa,QAAQ,WAAW;AAAA,MACvC,aAAa,IAAI,KAAK,IAAI,CAAC,EAAE,YAAY;AAAA,MACzC,qBAAqB,uBAAuB;AAAA,IAC9C;AACA,aAAS,MAAM,eAAe,MAAM,UAAU;AAAA,EAChD;AAEA,QAAM,UAAU,IAAI,KAAK,IAAI,CAAC,EAAE,YAAY;AAE5C,SAAO;AAAA,IACL,YAAY,KAAK;AAAA,IACjB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAIA,IAAM,qBAAN,cAAiC,MAAM;AAAA,EAC5B;AAAA,EACA;AAAA,EACT,YAAY,QAAmB,WAAgC;AAC7D,UAAM,QAAQ,OAAO,SAAS,IAAI,OAAO,UAAU,IAAI,OAAO,IAAI,YAAY,OAAO,MAAM,EAAE;AAC7F,SAAK,SAAS;AACd,SAAK,YAAY;AAAA,EACnB;AACF;AAEA,SAAS,sBAAsB,SAA6B;AAC1D,SAAO,CAAC,WAAmD;AACzD,QAAI,CAAC,SAAS;AACZ,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AACA,WAAO,IAAI,0BAA0B;AAAA,MACnC,KAAK,GAAG,OAAO,eAAe,OAAO,KAAK;AAAA,IAC5C,CAAC;AAAA,EACH;AACF;AAEA,SAAS,aAAa,QAAuC;AAG3D,QAAM,OAAO,GAAG,OAAO,UAAU,KAAK,OAAO,SAAS,KAAK,OAAO,UAAU,KAAK,OAAO,IAAI;AAE5F,MAAI,KAAK;AACT,MAAI,KAAK;AACT,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,UAAM,IAAI,KAAK,WAAW,CAAC;AAC3B,SAAK,KAAK,KAAK,KAAK,GAAG,QAAU,MAAM;AACvC,SAAK,KAAK,KAAK,KAAK,GAAG,UAAU,MAAM;AAAA,EACzC;AACA,SAAO,OAAO,GAAG,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC,GAAG,GAAG,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC;AACnF;","names":[]}