@tangle-network/agent-eval 0.23.1 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +145 -0
- package/README.md +212 -79
- package/dist/baseline-4R5deP0N.d.ts +108 -0
- package/dist/benchmarks/index.d.ts +3 -2
- package/dist/benchmarks/index.js +1 -1
- package/dist/builder-eval/index.d.ts +249 -0
- package/dist/builder-eval/index.js +391 -0
- package/dist/builder-eval/index.js.map +1 -0
- package/dist/{chunk-IOXMGMHQ.js → chunk-2A5XJB43.js} +142 -318
- package/dist/chunk-2A5XJB43.js.map +1 -0
- package/dist/chunk-47X6LRCE.js +76 -0
- package/dist/chunk-47X6LRCE.js.map +1 -0
- package/dist/{chunk-6M774GY6.js → chunk-4F5DQN55.js} +1 -1
- package/dist/chunk-4F5DQN55.js.map +1 -0
- package/dist/{chunk-KAO3Q65R.js → chunk-4S4BM3QQ.js} +15 -13
- package/dist/chunk-4S4BM3QQ.js.map +1 -0
- package/dist/chunk-5BKGXME7.js +65 -0
- package/dist/chunk-5BKGXME7.js.map +1 -0
- package/dist/{chunk-6KQG5HAH.js → chunk-5LBB5B3Z.js} +376 -72
- package/dist/chunk-5LBB5B3Z.js.map +1 -0
- package/dist/{chunk-42I2QC2L.js → chunk-6QDKWHLS.js} +18 -14
- package/dist/chunk-6QDKWHLS.js.map +1 -0
- package/dist/{chunk-VQQSPGSM.js → chunk-EDUKQ5AM.js} +247 -189
- package/dist/chunk-EDUKQ5AM.js.map +1 -0
- package/dist/chunk-I4MBDTY5.js +272 -0
- package/dist/chunk-I4MBDTY5.js.map +1 -0
- package/dist/chunk-JLZQWFV3.js +618 -0
- package/dist/chunk-JLZQWFV3.js.map +1 -0
- package/dist/chunk-K2TPS5LB.js +569 -0
- package/dist/chunk-K2TPS5LB.js.map +1 -0
- package/dist/chunk-KKHDIONI.js +414 -0
- package/dist/chunk-KKHDIONI.js.map +1 -0
- package/dist/chunk-KMPRBJK4.js +74 -0
- package/dist/chunk-KMPRBJK4.js.map +1 -0
- package/dist/{chunk-QUKKGHTZ.js → chunk-KTGTIOFD.js} +6 -3
- package/dist/chunk-KTGTIOFD.js.map +1 -0
- package/dist/chunk-LSH4MMOZ.js +838 -0
- package/dist/chunk-LSH4MMOZ.js.map +1 -0
- package/dist/chunk-NG236HPC.js +57 -0
- package/dist/chunk-NG236HPC.js.map +1 -0
- package/dist/{chunk-QBW3YBTR.js → chunk-NLMNWKVM.js} +14 -6
- package/dist/chunk-NLMNWKVM.js.map +1 -0
- package/dist/chunk-NU65VQ7M.js +99 -0
- package/dist/chunk-NU65VQ7M.js.map +1 -0
- package/dist/chunk-OWLAAMME.js +250 -0
- package/dist/chunk-OWLAAMME.js.map +1 -0
- package/dist/{chunk-SQQLHODJ.js → chunk-PC4UYEBM.js} +7 -4
- package/dist/chunk-PC4UYEBM.js.map +1 -0
- package/dist/{chunk-7EAUOUQS.js → chunk-RAF443UI.js} +213 -115
- package/dist/chunk-RAF443UI.js.map +1 -0
- package/dist/chunk-RZTMDUO7.js +49 -0
- package/dist/chunk-RZTMDUO7.js.map +1 -0
- package/dist/{chunk-EXGR4XEM.js → chunk-SESZDQPX.js} +23 -19
- package/dist/chunk-SESZDQPX.js.map +1 -0
- package/dist/{chunk-5IIQKMD5.js → chunk-TVVP3ZZQ.js} +14 -4
- package/dist/chunk-TVVP3ZZQ.js.map +1 -0
- package/dist/chunk-WWYCWKUM.js +196 -0
- package/dist/chunk-WWYCWKUM.js.map +1 -0
- package/dist/{chunk-AXHNWLIX.js → chunk-YRZ4M5GS.js} +2 -90
- package/dist/chunk-YRZ4M5GS.js.map +1 -0
- package/dist/chunk-ZN274SWR.js +613 -0
- package/dist/chunk-ZN274SWR.js.map +1 -0
- package/dist/cli.js +10 -6
- package/dist/cli.js.map +1 -1
- package/dist/{control-DvkH87qJ.d.ts → control-CBShYYA6.d.ts} +32 -33
- package/dist/control-runtime-BuJHoLg0.d.ts +180 -0
- package/dist/control.d.ts +8 -6
- package/dist/control.js +10 -7
- package/dist/{dataset-B9qvlm_o.d.ts → dataset-CiK_3LDr.d.ts} +5 -2
- package/dist/{emitter-B2XqDKFU.d.ts → emitter-DP_cSSiw.d.ts} +1 -1
- package/dist/errors-BZ9sTdz7.d.ts +70 -0
- package/dist/failure-cluster-C2EGSDiT.d.ts +76 -0
- package/dist/feedback-trajectory-DfFdrraJ.d.ts +169 -0
- package/dist/governance/index.d.ts +5 -0
- package/dist/governance/index.js +18 -0
- package/dist/governance/index.js.map +1 -0
- package/dist/{index-DDTlbHEK.d.ts → index--fVrWDiR.d.ts} +1 -1
- package/dist/index-Oj9fAPPN.d.ts +270 -0
- package/dist/index.d.ts +2018 -3003
- package/dist/index.js +7443 -9102
- package/dist/index.js.map +1 -1
- package/dist/{integrity-Cr5YodSY.d.ts → integrity-DK2EBVZC.d.ts} +4 -3
- package/dist/knowledge/index.d.ts +102 -0
- package/dist/knowledge/index.js +18 -0
- package/dist/knowledge/index.js.map +1 -0
- package/dist/meta-eval/index.d.ts +99 -0
- package/dist/meta-eval/index.js +324 -0
- package/dist/meta-eval/index.js.map +1 -0
- package/dist/multi-layer-verifier-LkP3LVKj.d.ts +141 -0
- package/dist/openapi.json +491 -1
- package/dist/optimization.d.ts +11 -8
- package/dist/optimization.js +11 -9
- package/dist/outcome-store-D6KWmYvj.d.ts +63 -0
- package/dist/pipelines/index.d.ts +172 -0
- package/dist/pipelines/index.js +345 -0
- package/dist/pipelines/index.js.map +1 -0
- package/dist/prm/index.d.ts +99 -0
- package/dist/prm/index.js +222 -0
- package/dist/prm/index.js.map +1 -0
- package/dist/query-DODUYdPg.d.ts +30 -0
- package/dist/release-report-BNgMdqPF.d.ts +292 -0
- package/dist/replay-BL96gCEP.d.ts +226 -0
- package/dist/reporting.d.ts +10 -295
- package/dist/reporting.js +10 -6
- package/dist/{eval-campaign-Ds5QljIh.d.ts → researcher-BPT8x_NT.d.ts} +148 -146
- package/dist/rl.d.ts +1762 -8
- package/dist/rl.js +2035 -58
- package/dist/rl.js.map +1 -1
- package/dist/rubric-D5tjHNJQ.d.ts +72 -0
- package/dist/rubric-predictive-validity-C0uDYwG6.d.ts +105 -0
- package/dist/{run-record-DNiOMBrZ.d.ts → run-record-CqzahIbx.d.ts} +4 -1
- package/dist/sequential-Dgz1n51-.d.ts +139 -0
- package/dist/{store-u47QaJ9G.d.ts → store-Db2Bv8Cf.d.ts} +1 -1
- package/dist/{summary-report-Ce1r4EYo.d.ts → summary-report-C7VPYEj2.d.ts} +3 -76
- package/dist/telemetry/file.js +4 -1
- package/dist/telemetry/file.js.map +1 -1
- package/dist/telemetry/index.js +57 -57
- package/dist/telemetry/index.js.map +1 -1
- package/dist/test-graded-scenario-B2kWEdh9.d.ts +146 -0
- package/dist/traces.d.ts +142 -387
- package/dist/traces.js +1302 -40
- package/dist/traces.js.map +1 -1
- package/dist/trajectory-CnoBo-JY.d.ts +32 -0
- package/dist/wire/index.d.ts +369 -25
- package/dist/wire/index.js +22 -3
- package/package.json +44 -18
- package/dist/chunk-42I2QC2L.js.map +0 -1
- package/dist/chunk-5IIQKMD5.js.map +0 -1
- package/dist/chunk-6KQG5HAH.js.map +0 -1
- package/dist/chunk-6M774GY6.js.map +0 -1
- package/dist/chunk-7EAUOUQS.js.map +0 -1
- package/dist/chunk-AXHNWLIX.js.map +0 -1
- package/dist/chunk-EXGR4XEM.js.map +0 -1
- package/dist/chunk-IOXMGMHQ.js.map +0 -1
- package/dist/chunk-KAO3Q65R.js.map +0 -1
- package/dist/chunk-LZKIOBG2.js +0 -2026
- package/dist/chunk-LZKIOBG2.js.map +0 -1
- package/dist/chunk-QBW3YBTR.js.map +0 -1
- package/dist/chunk-QUKKGHTZ.js.map +0 -1
- package/dist/chunk-SQQLHODJ.js.map +0 -1
- package/dist/chunk-V5QSWN7L.js +0 -1310
- package/dist/chunk-V5QSWN7L.js.map +0 -1
- package/dist/chunk-VQQSPGSM.js.map +0 -1
- package/dist/chunk-XPHOZPOM.js +0 -1947
- package/dist/chunk-XPHOZPOM.js.map +0 -1
- package/dist/feedback-trajectory-c43WGtTX.d.ts +0 -346
- package/dist/index-ekBXweiQ.d.ts +0 -1894
- package/dist/sequential-DgU2mFsE.d.ts +0 -304
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/control-runtime.ts"],"sourcesContent":["/**\n * Policy-based agent control runtime.\n *\n * This is the minimal reusable loop behind driver-agent patterns:\n *\n * observe state -> validate -> decide next action -> act -> observe -> ...\n *\n * It deliberately does not model named \"topologies\". Direct execution,\n * critic/revise, driver intervention, specialist calls, and human escalation\n * are all just actions chosen by the control policy.\n */\n\nimport { type SpanHandle, TraceEmitter } from './trace/emitter'\nimport type { FailureClass } from './trace/schema'\nimport type { TraceStore } from './trace/store'\n\nexport type ControlSeverity = 'info' | 'warning' | 'error' | 'critical'\nexport type ControlActionFailureMode = 'continue' | 'stop'\n\nexport interface ControlEvalResult {\n /** Stable validator or judge id. */\n id: string\n /** Whether this check passed. */\n passed: boolean\n /** Optional normalized score. 1 = best, 0 = worst. */\n score?: number\n /** Objective validators should usually be \"error\" or \"critical\" when failed. */\n severity?: ControlSeverity\n /** Human-readable result. */\n detail?: string\n /** Small evidence string or pointer. Avoid large payloads. */\n evidence?: string\n /** True when the result came from deterministic state, not LLM judgment. */\n objective?: boolean\n /** Structured details for downstream control policies and reports. */\n metadata?: Record<string, unknown>\n}\n\nexport interface ControlBudget {\n maxSteps: number\n maxWallMs?: number\n maxCostUsd?: number\n}\n\nexport interface ControlStopPolicies<TState, TAction> {\n /**\n * Stop after N consecutive steps with no state fingerprint change and\n * less than `minScoreDelta` score movement. Disabled when omitted.\n */\n maxNoProgressSteps?: number\n /**\n * Stop after the same action fingerprint is selected N consecutive\n * times. Disabled when omitted.\n */\n maxRepeatedActions?: number\n /** Minimum score movement that counts as progress. Default 0.001. */\n minScoreDelta?: number\n /** Override the default JSON/string fingerprint for state comparisons. */\n stateFingerprint?: (state: TState) => string\n /** Override the default JSON/string fingerprint for repeated-action checks. */\n actionFingerprint?: (action: TAction) => string\n}\n\nexport interface ControlContext<\n TState,\n TAction,\n TActionResult,\n TEval extends ControlEvalResult = ControlEvalResult,\n> {\n intent: string\n state: TState\n evals: TEval[]\n history: ControlStep<TState, TAction, TActionResult, TEval>[]\n budget: ControlBudget\n stepIndex: number\n wallMs: number\n spentCostUsd: number\n remainingCostUsd?: number\n abortSignal: AbortSignal\n emitter?: TraceEmitter\n}\n\nexport type ControlDecision<TAction> =\n | {\n type: 'continue'\n action: TAction\n reason?: string\n }\n | {\n type: 'stop'\n reason: string\n pass?: boolean\n score?: number\n }\n\nexport interface StopDecision {\n stop: boolean\n pass: boolean\n reason: string\n score?: number\n failureClass?: FailureClass\n}\n\nexport interface ControlActionOutcome<TActionResult> {\n ok: boolean\n result?: TActionResult\n error?: string\n costUsd?: number\n durationMs: number\n}\n\nexport interface ControlRuntimeError {\n phase: 'observe' | 'validate' | 'decide' | 'act' | 'stop-policy' | 'on-step' | 'trace'\n stepIndex: number\n message: string\n}\n\nexport interface ControlStep<\n TState,\n TAction,\n TActionResult,\n TEval extends ControlEvalResult = ControlEvalResult,\n> {\n index: number\n decision: ControlDecision<TAction>\n beforeState: TState\n afterState: TState\n evalsBefore: TEval[]\n evalsAfter: TEval[]\n actionOutcome?: ControlActionOutcome<TActionResult>\n startedAt: string\n endedAt: string\n}\n\nexport interface ControlRunResult<\n TState,\n TAction,\n TActionResult,\n TEval extends ControlEvalResult = ControlEvalResult,\n> {\n intent: string\n pass: boolean\n completed: boolean\n reason: string\n score?: number\n steps: ControlStep<TState, TAction, TActionResult, TEval>[]\n finalState: TState | undefined\n finalEvals: TEval[]\n wallMs: number\n spentCostUsd: number\n runId: string | null\n failureClass?: FailureClass\n runtimeErrors: ControlRuntimeError[]\n stoppedBy: 'policy' | 'stop-policy' | 'budget' | 'abort' | 'runtime-error'\n}\n\nexport interface ControlRuntimeConfig<\n TState,\n TAction,\n TActionResult,\n TEval extends ControlEvalResult = ControlEvalResult,\n> {\n intent: string\n budget?: Partial<ControlBudget>\n signal?: AbortSignal\n /** Defaults to `continue`: action failures are recorded, then the policy gets another chance. */\n actionFailure?: ControlActionFailureMode\n /**\n * Extract cost from an action result. Used for `maxCostUsd` budget\n * enforcement and trace budget ledger emission.\n */\n getActionCostUsd?: (ctx: {\n action: TAction\n result: TActionResult\n state: TState\n evals: TEval[]\n history: ControlStep<TState, TAction, TActionResult, TEval>[]\n }) => number | undefined\n\n /** Read typed task/product state. Prefer structured state over transcript-only context. */\n observe: (ctx: {\n history: ControlStep<TState, TAction, TActionResult, TEval>[]\n abortSignal: AbortSignal\n }) => Promise<TState> | TState\n\n /** Objective validators first, subjective judges only where objective state is insufficient. */\n validate: (ctx: {\n intent: string\n state: TState\n history: ControlStep<TState, TAction, TActionResult, TEval>[]\n abortSignal: AbortSignal\n }) => Promise<TEval[]> | TEval[]\n\n /** Choose the next control action. Can call a worker, ask user, run critic, inspect state, or stop. */\n decide: (\n ctx: ControlContext<TState, TAction, TActionResult, TEval>,\n ) => Promise<ControlDecision<TAction>> | ControlDecision<TAction>\n\n /** Execute the action selected by the policy. */\n act: (\n action: TAction,\n ctx: ControlContext<TState, TAction, TActionResult, TEval>,\n ) => Promise<TActionResult> | TActionResult\n\n /** Final stopping policy. Called before decide and after each action. */\n shouldStop?: (\n ctx: ControlContext<TState, TAction, TActionResult, TEval>,\n ) => Promise<StopDecision> | StopDecision\n\n /** Optional hook for tracing or live progress updates. */\n onStep?: (step: ControlStep<TState, TAction, TActionResult, TEval>) => Promise<void> | void\n\n /** Optional generic stuck-loop policies. Custom `shouldStop` still runs first. */\n stopPolicies?: ControlStopPolicies<TState, TAction>\n\n /** Optional trace sink. Emits one run plus one span per control step. */\n store?: TraceStore\n scenarioId?: string\n projectId?: string\n variantId?: string\n}\n\nconst DEFAULT_BUDGET: ControlBudget = {\n maxSteps: 8,\n maxWallMs: 5 * 60 * 1000,\n}\n\nexport async function runAgentControlLoop<\n TState,\n TAction,\n TActionResult,\n TEval extends ControlEvalResult = ControlEvalResult,\n>(\n config: ControlRuntimeConfig<TState, TAction, TActionResult, TEval>,\n): Promise<ControlRunResult<TState, TAction, TActionResult, TEval>> {\n const budget = normalizeBudget(config.budget)\n const actionFailure = config.actionFailure ?? 'continue'\n const controller = new AbortController()\n const upstreamAbort = () => controller.abort(config.signal?.reason)\n if (config.signal) {\n if (config.signal.aborted) controller.abort(config.signal.reason)\n else config.signal.addEventListener('abort', upstreamAbort, { once: true })\n }\n\n const started = Date.now()\n const wallTimer = budget.maxWallMs\n ? setTimeout(\n () => controller.abort(new Error('control runtime wall timeout')),\n budget.maxWallMs,\n )\n : undefined\n const history: ControlStep<TState, TAction, TActionResult, TEval>[] = []\n const emitter = config.store ? new TraceEmitter(config.store) : undefined\n let spentCostUsd = 0\n const runtimeErrors: ControlRuntimeError[] = []\n let lastStateFingerprint: string | undefined\n let lastActionFingerprint: string | undefined\n let noProgressStreak = 0\n let repeatedActionStreak = 0\n\n try {\n if (emitter) {\n await runTrace(runtimeErrors, 0, () =>\n emitter.startRun({\n scenarioId: config.scenarioId ?? 'agent-control-loop',\n projectId: config.projectId,\n variantId: config.variantId,\n layer: 'meta',\n tags: {\n intent: config.intent.slice(0, 120),\n maxSteps: String(budget.maxSteps),\n ...(budget.maxCostUsd !== undefined ? { maxCostUsd: String(budget.maxCostUsd) } : {}),\n },\n }),\n )\n }\n\n let state: TState\n let evals: TEval[]\n try {\n state = await config.observe({ history, abortSignal: controller.signal })\n } catch (err) {\n const error = runtimeError('observe', 0, err)\n runtimeErrors.push(error)\n return finish(emitter, {\n intent: config.intent,\n pass: false,\n completed: false,\n reason: error.message,\n steps: history,\n finalState: undefined,\n finalEvals: [],\n wallMs: Date.now() - started,\n spentCostUsd,\n runId: emitter?.runId ?? null,\n failureClass: 'unknown',\n runtimeErrors,\n stoppedBy: 'runtime-error',\n })\n }\n try {\n evals = await config.validate({\n intent: config.intent,\n state,\n history,\n abortSignal: controller.signal,\n })\n await recordEvalSpans(emitter, evals, 'initial', runtimeErrors, 0)\n } catch (err) {\n const error = runtimeError('validate', 0, err)\n runtimeErrors.push(error)\n return finish(emitter, {\n intent: config.intent,\n pass: false,\n completed: false,\n reason: error.message,\n steps: history,\n finalState: state,\n finalEvals: [],\n wallMs: Date.now() - started,\n spentCostUsd,\n runId: emitter?.runId ?? null,\n failureClass: 'unknown',\n runtimeErrors,\n stoppedBy: 'runtime-error',\n })\n }\n lastStateFingerprint = fingerprintState(state, config.stopPolicies)\n\n for (let stepIndex = 0; stepIndex < budget.maxSteps; stepIndex++) {\n if (controller.signal.aborted) {\n return finish(emitter, {\n intent: config.intent,\n pass: false,\n completed: false,\n reason: abortReason(controller.signal),\n score: undefined,\n steps: history,\n finalState: state,\n finalEvals: evals,\n wallMs: Date.now() - started,\n spentCostUsd,\n runId: emitter?.runId ?? null,\n failureClass: 'timeout',\n runtimeErrors,\n stoppedBy: 'abort',\n })\n }\n\n const budgetStop = budgetStopDecision(budget, spentCostUsd)\n if (budgetStop.stop) {\n return finish(emitter, {\n intent: config.intent,\n pass: false,\n completed: false,\n reason: budgetStop.reason,\n score: averageScore(evals),\n steps: history,\n finalState: state,\n finalEvals: evals,\n wallMs: Date.now() - started,\n spentCostUsd,\n runId: emitter?.runId ?? null,\n failureClass: 'budget_exceeded',\n runtimeErrors,\n stoppedBy: 'budget',\n })\n }\n\n const ctx = makeContext(\n config.intent,\n state,\n evals,\n history,\n budget,\n stepIndex,\n started,\n spentCostUsd,\n controller.signal,\n emitter,\n )\n let stop: StopDecision\n try {\n stop = config.shouldStop ? await config.shouldStop(ctx) : defaultStopDecision(evals)\n } catch (err) {\n runtimeErrors.push(runtimeError('stop-policy', stepIndex, err))\n return finish(emitter, {\n intent: config.intent,\n pass: false,\n completed: false,\n reason: runtimeErrors[runtimeErrors.length - 1]!.message,\n score: averageScore(evals),\n steps: history,\n finalState: state,\n finalEvals: evals,\n wallMs: Date.now() - started,\n spentCostUsd,\n runId: emitter?.runId ?? null,\n failureClass: 'unknown',\n runtimeErrors,\n stoppedBy: 'runtime-error',\n })\n }\n if (stop.stop) {\n return finish(emitter, {\n intent: config.intent,\n pass: stop.pass,\n completed: true,\n reason: stop.reason,\n score: stop.score,\n steps: history,\n finalState: state,\n finalEvals: evals,\n wallMs: Date.now() - started,\n spentCostUsd,\n runId: emitter?.runId ?? null,\n failureClass: stop.failureClass,\n runtimeErrors,\n stoppedBy: 'stop-policy',\n })\n }\n\n let decision: ControlDecision<TAction>\n try {\n decision = await config.decide(ctx)\n } catch (err) {\n runtimeErrors.push(runtimeError('decide', stepIndex, err))\n return finish(emitter, {\n intent: config.intent,\n pass: false,\n completed: false,\n reason: runtimeErrors[runtimeErrors.length - 1]!.message,\n score: averageScore(evals),\n steps: history,\n finalState: state,\n finalEvals: evals,\n wallMs: Date.now() - started,\n spentCostUsd,\n runId: emitter?.runId ?? null,\n failureClass: 'unknown',\n runtimeErrors,\n stoppedBy: 'runtime-error',\n })\n }\n if (decision.type === 'stop') {\n return finish(emitter, {\n intent: config.intent,\n pass: decision.pass ?? false,\n completed: true,\n reason: decision.reason,\n score: decision.score,\n steps: history,\n finalState: state,\n finalEvals: evals,\n wallMs: Date.now() - started,\n spentCostUsd,\n runId: emitter?.runId ?? null,\n failureClass: decision.pass === false ? 'unknown' : undefined,\n runtimeErrors,\n stoppedBy: 'policy',\n })\n }\n\n const actionFingerprint = fingerprintAction(decision.action, config.stopPolicies)\n repeatedActionStreak =\n actionFingerprint === lastActionFingerprint ? repeatedActionStreak + 1 : 1\n lastActionFingerprint = actionFingerprint\n const repeatedActionStop = repeatedActionStopDecision(\n config.stopPolicies,\n repeatedActionStreak,\n )\n if (repeatedActionStop.stop) {\n return finish(emitter, {\n intent: config.intent,\n pass: false,\n completed: true,\n reason: repeatedActionStop.reason,\n score: averageScore(evals),\n steps: history,\n finalState: state,\n finalEvals: evals,\n wallMs: Date.now() - started,\n spentCostUsd,\n runId: emitter?.runId ?? null,\n failureClass: 'tool_recovery_failure',\n runtimeErrors,\n stoppedBy: 'stop-policy',\n })\n }\n\n const beforeState = state\n const evalsBefore = evals\n const scoreBefore = averageScore(evals)\n const actionStarted = Date.now()\n const stepHandle = emitter\n ? await runTrace(runtimeErrors, stepIndex, () =>\n emitter.tool({\n name: `control-step-${stepIndex}`,\n toolName: 'agent-control-action',\n args: decision.action,\n attributes: {\n decision: decision.reason ?? 'continue',\n repeatedActionStreak,\n },\n }),\n )\n : undefined\n let actionOutcome: ControlActionOutcome<TActionResult>\n try {\n const result = await config.act(decision.action, ctx)\n const rawCostUsd = config.getActionCostUsd?.({\n action: decision.action,\n result,\n state,\n evals,\n history,\n })\n const costUsd = normalizeActionCostUsd(rawCostUsd, runtimeErrors, stepIndex)\n if (costUsd !== undefined && Number.isFinite(costUsd) && costUsd > 0) {\n spentCostUsd += costUsd\n await recordCostBudget(\n emitter,\n budget,\n spentCostUsd,\n stepHandle,\n runtimeErrors,\n stepIndex,\n )\n }\n actionOutcome = {\n ok: true,\n result,\n ...(costUsd !== undefined ? { costUsd } : {}),\n durationMs: Date.now() - actionStarted,\n }\n } catch (err) {\n runtimeErrors.push(runtimeError('act', stepIndex, err))\n actionOutcome = {\n ok: false,\n error: runtimeErrors[runtimeErrors.length - 1]!.message,\n durationMs: Date.now() - actionStarted,\n }\n if (actionFailure === 'stop') {\n await runTrace(runtimeErrors, stepIndex, () =>\n stepHandle?.fail(actionOutcome.error ?? 'action failed'),\n )\n const step: ControlStep<TState, TAction, TActionResult, TEval> = {\n index: stepIndex,\n decision,\n beforeState,\n afterState: state,\n evalsBefore,\n evalsAfter: evals,\n actionOutcome,\n startedAt: new Date(actionStarted).toISOString(),\n endedAt: new Date().toISOString(),\n }\n history.push(step)\n await runOnStep(config.onStep, step, runtimeErrors)\n return finish(emitter, {\n intent: config.intent,\n pass: false,\n completed: false,\n reason: actionOutcome.error ?? 'action failed',\n score: averageScore(evals),\n steps: history,\n finalState: state,\n finalEvals: evals,\n wallMs: Date.now() - started,\n spentCostUsd,\n runId: emitter?.runId ?? null,\n failureClass: 'unknown',\n runtimeErrors,\n stoppedBy: 'runtime-error',\n })\n }\n }\n\n try {\n state = await config.observe({ history, abortSignal: controller.signal })\n } catch (err) {\n runtimeErrors.push(runtimeError('observe', stepIndex, err))\n const step: ControlStep<TState, TAction, TActionResult, TEval> = {\n index: stepIndex,\n decision,\n beforeState,\n afterState: beforeState,\n evalsBefore,\n evalsAfter: evals,\n actionOutcome,\n startedAt: new Date(actionStarted).toISOString(),\n endedAt: new Date().toISOString(),\n }\n history.push(step)\n await runTrace(runtimeErrors, stepIndex, () =>\n stepHandle?.fail(runtimeErrors[runtimeErrors.length - 1]!.message),\n )\n await runOnStep(config.onStep, step, runtimeErrors)\n return finish(emitter, {\n intent: config.intent,\n pass: false,\n completed: false,\n reason: runtimeErrors[runtimeErrors.length - 1]!.message,\n score: averageScore(evals),\n steps: history,\n finalState: beforeState,\n finalEvals: evals,\n wallMs: Date.now() - started,\n spentCostUsd,\n runId: emitter?.runId ?? null,\n failureClass: 'unknown',\n runtimeErrors,\n stoppedBy: 'runtime-error',\n })\n }\n try {\n evals = await config.validate({\n intent: config.intent,\n state,\n history,\n abortSignal: controller.signal,\n })\n await recordEvalSpans(\n emitter,\n evals,\n `step-${stepIndex}`,\n runtimeErrors,\n stepIndex,\n stepHandle?.span.spanId,\n )\n } catch (err) {\n runtimeErrors.push(runtimeError('validate', stepIndex, err))\n const step: ControlStep<TState, TAction, TActionResult, TEval> = {\n index: stepIndex,\n decision,\n beforeState,\n afterState: state,\n evalsBefore,\n evalsAfter: evals,\n actionOutcome,\n startedAt: new Date(actionStarted).toISOString(),\n endedAt: new Date().toISOString(),\n }\n history.push(step)\n await runTrace(runtimeErrors, stepIndex, () =>\n stepHandle?.fail(runtimeErrors[runtimeErrors.length - 1]!.message),\n )\n await runOnStep(config.onStep, step, runtimeErrors)\n return finish(emitter, {\n intent: config.intent,\n pass: false,\n completed: false,\n reason: runtimeErrors[runtimeErrors.length - 1]!.message,\n score: averageScore(evals),\n steps: history,\n finalState: state,\n finalEvals: evals,\n wallMs: Date.now() - started,\n spentCostUsd,\n runId: emitter?.runId ?? null,\n failureClass: 'unknown',\n runtimeErrors,\n stoppedBy: 'runtime-error',\n })\n }\n const scoreAfter = averageScore(evals)\n const stateFingerprint = fingerprintState(state, config.stopPolicies)\n const noProgressStop = noProgressStopDecision({\n policies: config.stopPolicies,\n lastStateFingerprint,\n stateFingerprint,\n scoreBefore,\n scoreAfter,\n currentStreak: noProgressStreak,\n })\n noProgressStreak = noProgressStop.streak\n lastStateFingerprint = stateFingerprint\n\n const step: ControlStep<TState, TAction, TActionResult, TEval> = {\n index: stepIndex,\n decision,\n beforeState,\n afterState: state,\n evalsBefore,\n evalsAfter: evals,\n actionOutcome,\n startedAt: new Date(actionStarted).toISOString(),\n endedAt: new Date().toISOString(),\n }\n history.push(step)\n if (actionOutcome.ok) {\n await runTrace(runtimeErrors, stepIndex, () =>\n stepHandle?.end({\n attributes: {\n actionCostUsd: actionOutcome.costUsd ?? null,\n spentCostUsd,\n scoreBefore: scoreBefore ?? null,\n scoreAfter: scoreAfter ?? null,\n noProgressStreak,\n },\n }),\n )\n } else {\n await runTrace(runtimeErrors, stepIndex, () =>\n stepHandle?.fail(actionOutcome.error ?? 'action failed', {\n attributes: {\n spentCostUsd,\n noProgressStreak,\n },\n }),\n )\n }\n await runOnStep(config.onStep, step, runtimeErrors)\n\n if (noProgressStop.stop) {\n return finish(emitter, {\n intent: config.intent,\n pass: false,\n completed: true,\n reason: noProgressStop.reason,\n score: scoreAfter,\n steps: history,\n finalState: state,\n finalEvals: evals,\n wallMs: Date.now() - started,\n spentCostUsd,\n runId: emitter?.runId ?? null,\n failureClass: 'tool_recovery_failure',\n runtimeErrors,\n stoppedBy: 'stop-policy',\n })\n }\n\n const postStepBudgetStop = budgetStopDecision(budget, spentCostUsd)\n if (postStepBudgetStop.stop) {\n return finish(emitter, {\n intent: config.intent,\n pass: false,\n completed: false,\n reason: postStepBudgetStop.reason,\n score: scoreAfter,\n steps: history,\n finalState: state,\n finalEvals: evals,\n wallMs: Date.now() - started,\n spentCostUsd,\n runId: emitter?.runId ?? null,\n failureClass: 'budget_exceeded',\n runtimeErrors,\n stoppedBy: 'budget',\n })\n }\n\n const postStepCtx = makeContext(\n config.intent,\n state,\n evals,\n history,\n budget,\n stepIndex + 1,\n started,\n spentCostUsd,\n controller.signal,\n emitter,\n )\n let postStepStop: StopDecision\n try {\n postStepStop = config.shouldStop\n ? await config.shouldStop(postStepCtx)\n : defaultStopDecision(evals)\n } catch (err) {\n runtimeErrors.push(runtimeError('stop-policy', stepIndex + 1, err))\n return finish(emitter, {\n intent: config.intent,\n pass: false,\n completed: false,\n reason: runtimeErrors[runtimeErrors.length - 1]!.message,\n score: averageScore(evals),\n steps: history,\n finalState: state,\n finalEvals: evals,\n wallMs: Date.now() - started,\n spentCostUsd,\n runId: emitter?.runId ?? null,\n failureClass: 'unknown',\n runtimeErrors,\n stoppedBy: 'runtime-error',\n })\n }\n if (postStepStop.stop) {\n return finish(emitter, {\n intent: config.intent,\n pass: postStepStop.pass,\n completed: true,\n reason: postStepStop.reason,\n score: postStepStop.score,\n steps: history,\n finalState: state,\n finalEvals: evals,\n wallMs: Date.now() - started,\n spentCostUsd,\n runId: emitter?.runId ?? null,\n failureClass: postStepStop.failureClass,\n runtimeErrors,\n stoppedBy: 'stop-policy',\n })\n }\n }\n\n return finish(emitter, {\n intent: config.intent,\n pass: false,\n completed: false,\n reason: `budget exhausted: maxSteps=${budget.maxSteps}`,\n steps: history,\n finalState: state,\n finalEvals: evals,\n wallMs: Date.now() - started,\n spentCostUsd,\n runId: emitter?.runId ?? null,\n failureClass: 'budget_exceeded',\n runtimeErrors,\n stoppedBy: 'budget',\n })\n } catch (err) {\n runtimeErrors.push(runtimeError('act', history.length, err))\n return finish(emitter, {\n intent: config.intent,\n pass: false,\n completed: false,\n reason: runtimeErrors[runtimeErrors.length - 1]!.message,\n steps: history,\n finalState: undefined,\n finalEvals: [],\n wallMs: Date.now() - started,\n spentCostUsd,\n runId: emitter?.runId ?? null,\n failureClass: 'unknown',\n runtimeErrors,\n stoppedBy: 'runtime-error',\n })\n } finally {\n if (wallTimer) clearTimeout(wallTimer)\n if (config.signal) config.signal.removeEventListener('abort', upstreamAbort)\n }\n}\n\nexport function stopOnNoProgress<TState, TAction>(\n maxNoProgressSteps: number,\n options: Omit<ControlStopPolicies<TState, TAction>, 'maxNoProgressSteps'> = {},\n): ControlStopPolicies<TState, TAction> {\n return { ...options, maxNoProgressSteps }\n}\n\nexport function stopOnRepeatedAction<TState, TAction>(\n maxRepeatedActions: number,\n options: Omit<ControlStopPolicies<TState, TAction>, 'maxRepeatedActions'> = {},\n): ControlStopPolicies<TState, TAction> {\n return { ...options, maxRepeatedActions }\n}\n\nexport function objectiveEval(input: Omit<ControlEvalResult, 'objective'>): ControlEvalResult {\n return { ...input, objective: true }\n}\n\nexport function subjectiveEval(input: Omit<ControlEvalResult, 'objective'>): ControlEvalResult {\n return { ...input, objective: false }\n}\n\nfunction normalizeBudget(input: Partial<ControlBudget> | undefined): ControlBudget {\n const raw = { ...DEFAULT_BUDGET, ...input } as Record<string, unknown>\n if (!Number.isInteger(raw.maxSteps) || (raw.maxSteps as number) < 1) {\n throw new RangeError(\n `ControlRuntime budget.maxSteps must be an integer >= 1, got ${String(raw.maxSteps)}`,\n )\n }\n const budget: ControlBudget = { maxSteps: raw.maxSteps as number }\n if (raw.maxWallMs !== undefined) {\n if (\n typeof raw.maxWallMs !== 'number' ||\n !Number.isFinite(raw.maxWallMs) ||\n raw.maxWallMs <= 0\n ) {\n throw new RangeError(\n `ControlRuntime budget.maxWallMs must be a positive finite number, got ${String(raw.maxWallMs)}`,\n )\n }\n budget.maxWallMs = raw.maxWallMs\n }\n if (raw.maxCostUsd !== undefined) {\n if (\n typeof raw.maxCostUsd !== 'number' ||\n !Number.isFinite(raw.maxCostUsd) ||\n raw.maxCostUsd < 0\n ) {\n throw new RangeError(\n `ControlRuntime budget.maxCostUsd must be a nonnegative finite number, got ${String(raw.maxCostUsd)}`,\n )\n }\n budget.maxCostUsd = raw.maxCostUsd\n }\n return budget\n}\n\nfunction normalizeActionCostUsd(\n costUsd: number | undefined,\n runtimeErrors: ControlRuntimeError[],\n stepIndex: number,\n): number | undefined {\n if (costUsd === undefined) return undefined\n if (!Number.isFinite(costUsd) || costUsd < 0) {\n runtimeErrors.push(\n runtimeError('act', stepIndex, new Error(`invalid action costUsd: ${String(costUsd)}`)),\n )\n return undefined\n }\n return costUsd\n}\n\nexport function allCriticalPassed(evals: ControlEvalResult[]): boolean {\n return evals.every(\n (result) => result.passed || (result.severity !== 'critical' && result.severity !== 'error'),\n )\n}\n\nfunction makeContext<TState, TAction, TActionResult, TEval extends ControlEvalResult>(\n intent: string,\n state: TState,\n evals: TEval[],\n history: ControlStep<TState, TAction, TActionResult, TEval>[],\n budget: ControlBudget,\n stepIndex: number,\n started: number,\n spentCostUsd: number,\n abortSignal: AbortSignal,\n emitter?: TraceEmitter,\n): ControlContext<TState, TAction, TActionResult, TEval> {\n return {\n intent,\n state,\n evals,\n history,\n budget,\n stepIndex,\n wallMs: Date.now() - started,\n spentCostUsd,\n remainingCostUsd:\n budget.maxCostUsd === undefined ? undefined : Math.max(0, budget.maxCostUsd - spentCostUsd),\n abortSignal,\n emitter,\n }\n}\n\nfunction defaultStopDecision(evals: ControlEvalResult[]): StopDecision {\n if (!evals.length) return { stop: false, pass: false, reason: 'no evals yet' }\n const pass = allCriticalPassed(evals)\n return pass\n ? { stop: true, pass: true, reason: 'all critical evals passed', score: averageScore(evals) }\n : {\n stop: false,\n pass: false,\n reason: 'critical evals still failing',\n score: averageScore(evals),\n }\n}\n\nfunction averageScore(evals: ControlEvalResult[]): number | undefined {\n const scored = evals\n .map((result) => result.score)\n .filter((score): score is number => typeof score === 'number')\n if (!scored.length) return undefined\n return Math.round((scored.reduce((sum, score) => sum + score, 0) / scored.length) * 1000) / 1000\n}\n\nfunction budgetStopDecision(\n budget: ControlBudget,\n spentCostUsd: number,\n): { stop: boolean; reason: string } {\n if (budget.maxCostUsd !== undefined && spentCostUsd >= budget.maxCostUsd) {\n return {\n stop: true,\n reason: `budget exhausted: maxCostUsd=${budget.maxCostUsd}`,\n }\n }\n return { stop: false, reason: '' }\n}\n\nasync function recordCostBudget(\n emitter: TraceEmitter | undefined,\n budget: ControlBudget,\n spentCostUsd: number,\n handle: SpanHandle | undefined,\n runtimeErrors: ControlRuntimeError[],\n stepIndex: number,\n): Promise<void> {\n if (!emitter || budget.maxCostUsd === undefined) return\n const maxCostUsd = budget.maxCostUsd\n await runTrace(runtimeErrors, stepIndex, () =>\n emitter.recordBudget({\n dimension: 'usd',\n limit: maxCostUsd,\n consumed: spentCostUsd,\n remaining: Math.max(0, maxCostUsd - spentCostUsd),\n breached: spentCostUsd >= maxCostUsd,\n spanId: handle?.span.spanId,\n }),\n )\n}\n\nasync function recordEvalSpans(\n emitter: TraceEmitter | undefined,\n evals: ControlEvalResult[],\n phase: string,\n runtimeErrors: ControlRuntimeError[],\n stepIndex: number,\n targetSpanId?: string,\n): Promise<void> {\n if (!emitter) return\n for (const result of evals) {\n await runTrace(runtimeErrors, stepIndex, () =>\n emitter.recordJudge({\n judgeId: result.objective ? 'objective-validator' : 'subjective-judge',\n targetSpanId: targetSpanId ?? emitter.runId,\n name: `control-eval/${result.id}`,\n dimension: result.id,\n score: typeof result.score === 'number' ? result.score : result.passed ? 1 : 0,\n rationale: result.detail,\n evidence: result.evidence,\n attributes: {\n phase,\n passed: result.passed,\n severity: result.severity,\n objective: result.objective,\n },\n }),\n )\n }\n}\n\nasync function runOnStep<TState, TAction, TActionResult, TEval extends ControlEvalResult>(\n onStep: ControlRuntimeConfig<TState, TAction, TActionResult, TEval>['onStep'] | undefined,\n step: ControlStep<TState, TAction, TActionResult, TEval>,\n runtimeErrors: ControlRuntimeError[],\n): Promise<void> {\n if (!onStep) return\n try {\n await onStep(step)\n } catch (err) {\n runtimeErrors.push(runtimeError('on-step', step.index, err))\n }\n}\n\nasync function runTrace<T>(\n runtimeErrors: ControlRuntimeError[],\n stepIndex: number,\n write: () => Promise<T | undefined> | T | undefined,\n): Promise<T | undefined> {\n try {\n return await write()\n } catch (err) {\n runtimeErrors.push(runtimeError('trace', stepIndex, err))\n return undefined\n }\n}\n\nfunction noProgressStopDecision<TState, TAction>(args: {\n policies: ControlStopPolicies<TState, TAction> | undefined\n lastStateFingerprint: string | undefined\n stateFingerprint: string\n scoreBefore: number | undefined\n scoreAfter: number | undefined\n currentStreak: number\n}): { stop: boolean; reason: string; streak: number } {\n const max = args.policies?.maxNoProgressSteps\n if (!max || max <= 0) return { stop: false, reason: '', streak: 0 }\n const minScoreDelta = args.policies?.minScoreDelta ?? 0.001\n const scoreDelta = Math.abs((args.scoreAfter ?? 0) - (args.scoreBefore ?? 0))\n const stateUnchanged =\n args.lastStateFingerprint !== undefined && args.lastStateFingerprint === args.stateFingerprint\n const scoreFlat = scoreDelta < minScoreDelta\n const streak = stateUnchanged && scoreFlat ? args.currentStreak + 1 : 0\n return streak >= max\n ? { stop: true, reason: `stuck: no state/score progress for ${streak} step(s)`, streak }\n : { stop: false, reason: '', streak }\n}\n\nfunction repeatedActionStopDecision<TState, TAction>(\n policies: ControlStopPolicies<TState, TAction> | undefined,\n streak: number,\n): { stop: boolean; reason: string } {\n const max = policies?.maxRepeatedActions\n if (!max || max <= 0 || streak < max) return { stop: false, reason: '' }\n return {\n stop: true,\n reason: `stuck: repeated same action for ${streak} step(s)`,\n }\n}\n\nfunction fingerprintState<TState, TAction>(\n state: TState,\n policies?: ControlStopPolicies<TState, TAction>,\n): string {\n if (policies?.stateFingerprint) return policies.stateFingerprint(state)\n return stableFingerprint(state)\n}\n\nfunction fingerprintAction<TState, TAction>(\n action: TAction,\n policies?: ControlStopPolicies<TState, TAction>,\n): string {\n if (policies?.actionFingerprint) return policies.actionFingerprint(action)\n return stableFingerprint(action)\n}\n\nfunction stableFingerprint(value: unknown): string {\n if (typeof value === 'string') return value\n if (typeof value === 'number' || typeof value === 'boolean' || value == null) return String(value)\n try {\n return JSON.stringify(sortForFingerprint(value))\n } catch {\n return String(value)\n }\n}\n\nfunction sortForFingerprint(value: unknown): unknown {\n if (Array.isArray(value)) return value.map(sortForFingerprint)\n if (!value || typeof value !== 'object') return value\n const record = value as Record<string, unknown>\n const sorted: Record<string, unknown> = {}\n for (const key of Object.keys(record).sort()) {\n sorted[key] = sortForFingerprint(record[key])\n }\n return sorted\n}\n\nfunction abortReason(signal: AbortSignal): string {\n const reason = signal.reason\n if (reason instanceof Error) return reason.message\n return reason ? String(reason) : 'aborted'\n}\n\nfunction runtimeError(\n phase: ControlRuntimeError['phase'],\n stepIndex: number,\n err: unknown,\n): ControlRuntimeError {\n const message = err instanceof Error ? err.message : String(err)\n return { phase, stepIndex, message }\n}\n\nasync function finish<TState, TAction, TActionResult, TEval extends ControlEvalResult>(\n emitter: TraceEmitter | undefined,\n result: ControlRunResult<TState, TAction, TActionResult, TEval>,\n): Promise<ControlRunResult<TState, TAction, TActionResult, TEval>> {\n await runTrace(result.runtimeErrors, result.steps.length, () =>\n emitter?.endRun({\n pass: result.pass,\n score: result.score ?? averageScore(result.finalEvals),\n failureClass: result.failureClass,\n notes: result.reason,\n }),\n )\n return result\n}\n"],"mappings":";;;;;AA8NA,IAAM,iBAAgC;AAAA,EACpC,UAAU;AAAA,EACV,WAAW,IAAI,KAAK;AACtB;AAEA,eAAsB,oBAMpB,QACkE;AAClE,QAAM,SAAS,gBAAgB,OAAO,MAAM;AAC5C,QAAM,gBAAgB,OAAO,iBAAiB;AAC9C,QAAM,aAAa,IAAI,gBAAgB;AACvC,QAAM,gBAAgB,MAAM,WAAW,MAAM,OAAO,QAAQ,MAAM;AAClE,MAAI,OAAO,QAAQ;AACjB,QAAI,OAAO,OAAO,QAAS,YAAW,MAAM,OAAO,OAAO,MAAM;AAAA,QAC3D,QAAO,OAAO,iBAAiB,SAAS,eAAe,EAAE,MAAM,KAAK,CAAC;AAAA,EAC5E;AAEA,QAAM,UAAU,KAAK,IAAI;AACzB,QAAM,YAAY,OAAO,YACrB;AAAA,IACE,MAAM,WAAW,MAAM,IAAI,MAAM,8BAA8B,CAAC;AAAA,IAChE,OAAO;AAAA,EACT,IACA;AACJ,QAAM,UAAgE,CAAC;AACvE,QAAM,UAAU,OAAO,QAAQ,IAAI,aAAa,OAAO,KAAK,IAAI;AAChE,MAAI,eAAe;AACnB,QAAM,gBAAuC,CAAC;AAC9C,MAAI;AACJ,MAAI;AACJ,MAAI,mBAAmB;AACvB,MAAI,uBAAuB;AAE3B,MAAI;AACF,QAAI,SAAS;AACX,YAAM;AAAA,QAAS;AAAA,QAAe;AAAA,QAAG,MAC/B,QAAQ,SAAS;AAAA,UACf,YAAY,OAAO,cAAc;AAAA,UACjC,WAAW,OAAO;AAAA,UAClB,WAAW,OAAO;AAAA,UAClB,OAAO;AAAA,UACP,MAAM;AAAA,YACJ,QAAQ,OAAO,OAAO,MAAM,GAAG,GAAG;AAAA,YAClC,UAAU,OAAO,OAAO,QAAQ;AAAA,YAChC,GAAI,OAAO,eAAe,SAAY,EAAE,YAAY,OAAO,OAAO,UAAU,EAAE,IAAI,CAAC;AAAA,UACrF;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI;AACJ,QAAI;AACJ,QAAI;AACF,cAAQ,MAAM,OAAO,QAAQ,EAAE,SAAS,aAAa,WAAW,OAAO,CAAC;AAAA,IAC1E,SAAS,KAAK;AACZ,YAAM,QAAQ,aAAa,WAAW,GAAG,GAAG;AAC5C,oBAAc,KAAK,KAAK;AACxB,aAAO,OAAO,SAAS;AAAA,QACrB,QAAQ,OAAO;AAAA,QACf,MAAM;AAAA,QACN,WAAW;AAAA,QACX,QAAQ,MAAM;AAAA,QACd,OAAO;AAAA,QACP,YAAY;AAAA,QACZ,YAAY,CAAC;AAAA,QACb,QAAQ,KAAK,IAAI,IAAI;AAAA,QACrB;AAAA,QACA,OAAO,SAAS,SAAS;AAAA,QACzB,cAAc;AAAA,QACd;AAAA,QACA,WAAW;AAAA,MACb,CAAC;AAAA,IACH;AACA,QAAI;AACF,cAAQ,MAAM,OAAO,SAAS;AAAA,QAC5B,QAAQ,OAAO;AAAA,QACf;AAAA,QACA;AAAA,QACA,aAAa,WAAW;AAAA,MAC1B,CAAC;AACD,YAAM,gBAAgB,SAAS,OAAO,WAAW,eAAe,CAAC;AAAA,IACnE,SAAS,KAAK;AACZ,YAAM,QAAQ,aAAa,YAAY,GAAG,GAAG;AAC7C,oBAAc,KAAK,KAAK;AACxB,aAAO,OAAO,SAAS;AAAA,QACrB,QAAQ,OAAO;AAAA,QACf,MAAM;AAAA,QACN,WAAW;AAAA,QACX,QAAQ,MAAM;AAAA,QACd,OAAO;AAAA,QACP,YAAY;AAAA,QACZ,YAAY,CAAC;AAAA,QACb,QAAQ,KAAK,IAAI,IAAI;AAAA,QACrB;AAAA,QACA,OAAO,SAAS,SAAS;AAAA,QACzB,cAAc;AAAA,QACd;AAAA,QACA,WAAW;AAAA,MACb,CAAC;AAAA,IACH;AACA,2BAAuB,iBAAiB,OAAO,OAAO,YAAY;AAElE,aAAS,YAAY,GAAG,YAAY,OAAO,UAAU,aAAa;AAChE,UAAI,WAAW,OAAO,SAAS;AAC7B,eAAO,OAAO,SAAS;AAAA,UACrB,QAAQ,OAAO;AAAA,UACf,MAAM;AAAA,UACN,WAAW;AAAA,UACX,QAAQ,YAAY,WAAW,MAAM;AAAA,UACrC,OAAO;AAAA,UACP,OAAO;AAAA,UACP,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,QAAQ,KAAK,IAAI,IAAI;AAAA,UACrB;AAAA,UACA,OAAO,SAAS,SAAS;AAAA,UACzB,cAAc;AAAA,UACd;AAAA,UACA,WAAW;AAAA,QACb,CAAC;AAAA,MACH;AAEA,YAAM,aAAa,mBAAmB,QAAQ,YAAY;AAC1D,UAAI,WAAW,MAAM;AACnB,eAAO,OAAO,SAAS;AAAA,UACrB,QAAQ,OAAO;AAAA,UACf,MAAM;AAAA,UACN,WAAW;AAAA,UACX,QAAQ,WAAW;AAAA,UACnB,OAAO,aAAa,KAAK;AAAA,UACzB,OAAO;AAAA,UACP,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,QAAQ,KAAK,IAAI,IAAI;AAAA,UACrB;AAAA,UACA,OAAO,SAAS,SAAS;AAAA,UACzB,cAAc;AAAA,UACd;AAAA,UACA,WAAW;AAAA,QACb,CAAC;AAAA,MACH;AAEA,YAAM,MAAM;AAAA,QACV,OAAO;AAAA,QACP;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,WAAW;AAAA,QACX;AAAA,MACF;AACA,UAAI;AACJ,UAAI;AACF,eAAO,OAAO,aAAa,MAAM,OAAO,WAAW,GAAG,IAAI,oBAAoB,KAAK;AAAA,MACrF,SAAS,KAAK;AACZ,sBAAc,KAAK,aAAa,eAAe,WAAW,GAAG,CAAC;AAC9D,eAAO,OAAO,SAAS;AAAA,UACrB,QAAQ,OAAO;AAAA,UACf,MAAM;AAAA,UACN,WAAW;AAAA,UACX,QAAQ,cAAc,cAAc,SAAS,CAAC,EAAG;AAAA,UACjD,OAAO,aAAa,KAAK;AAAA,UACzB,OAAO;AAAA,UACP,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,QAAQ,KAAK,IAAI,IAAI;AAAA,UACrB;AAAA,UACA,OAAO,SAAS,SAAS;AAAA,UACzB,cAAc;AAAA,UACd;AAAA,UACA,WAAW;AAAA,QACb,CAAC;AAAA,MACH;AACA,UAAI,KAAK,MAAM;AACb,eAAO,OAAO,SAAS;AAAA,UACrB,QAAQ,OAAO;AAAA,UACf,MAAM,KAAK;AAAA,UACX,WAAW;AAAA,UACX,QAAQ,KAAK;AAAA,UACb,OAAO,KAAK;AAAA,UACZ,OAAO;AAAA,UACP,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,QAAQ,KAAK,IAAI,IAAI;AAAA,UACrB;AAAA,UACA,OAAO,SAAS,SAAS;AAAA,UACzB,cAAc,KAAK;AAAA,UACnB;AAAA,UACA,WAAW;AAAA,QACb,CAAC;AAAA,MACH;AAEA,UAAI;AACJ,UAAI;AACF,mBAAW,MAAM,OAAO,OAAO,GAAG;AAAA,MACpC,SAAS,KAAK;AACZ,sBAAc,KAAK,aAAa,UAAU,WAAW,GAAG,CAAC;AACzD,eAAO,OAAO,SAAS;AAAA,UACrB,QAAQ,OAAO;AAAA,UACf,MAAM;AAAA,UACN,WAAW;AAAA,UACX,QAAQ,cAAc,cAAc,SAAS,CAAC,EAAG;AAAA,UACjD,OAAO,aAAa,KAAK;AAAA,UACzB,OAAO;AAAA,UACP,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,QAAQ,KAAK,IAAI,IAAI;AAAA,UACrB;AAAA,UACA,OAAO,SAAS,SAAS;AAAA,UACzB,cAAc;AAAA,UACd;AAAA,UACA,WAAW;AAAA,QACb,CAAC;AAAA,MACH;AACA,UAAI,SAAS,SAAS,QAAQ;AAC5B,eAAO,OAAO,SAAS;AAAA,UACrB,QAAQ,OAAO;AAAA,UACf,MAAM,SAAS,QAAQ;AAAA,UACvB,WAAW;AAAA,UACX,QAAQ,SAAS;AAAA,UACjB,OAAO,SAAS;AAAA,UAChB,OAAO;AAAA,UACP,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,QAAQ,KAAK,IAAI,IAAI;AAAA,UACrB;AAAA,UACA,OAAO,SAAS,SAAS;AAAA,UACzB,cAAc,SAAS,SAAS,QAAQ,YAAY;AAAA,UACpD;AAAA,UACA,WAAW;AAAA,QACb,CAAC;AAAA,MACH;AAEA,YAAM,oBAAoB,kBAAkB,SAAS,QAAQ,OAAO,YAAY;AAChF,6BACE,sBAAsB,wBAAwB,uBAAuB,IAAI;AAC3E,8BAAwB;AACxB,YAAM,qBAAqB;AAAA,QACzB,OAAO;AAAA,QACP;AAAA,MACF;AACA,UAAI,mBAAmB,MAAM;AAC3B,eAAO,OAAO,SAAS;AAAA,UACrB,QAAQ,OAAO;AAAA,UACf,MAAM;AAAA,UACN,WAAW;AAAA,UACX,QAAQ,mBAAmB;AAAA,UAC3B,OAAO,aAAa,KAAK;AAAA,UACzB,OAAO;AAAA,UACP,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,QAAQ,KAAK,IAAI,IAAI;AAAA,UACrB;AAAA,UACA,OAAO,SAAS,SAAS;AAAA,UACzB,cAAc;AAAA,UACd;AAAA,UACA,WAAW;AAAA,QACb,CAAC;AAAA,MACH;AAEA,YAAM,cAAc;AACpB,YAAM,cAAc;AACpB,YAAM,cAAc,aAAa,KAAK;AACtC,YAAM,gBAAgB,KAAK,IAAI;AAC/B,YAAM,aAAa,UACf,MAAM;AAAA,QAAS;AAAA,QAAe;AAAA,QAAW,MACvC,QAAQ,KAAK;AAAA,UACX,MAAM,gBAAgB,SAAS;AAAA,UAC/B,UAAU;AAAA,UACV,MAAM,SAAS;AAAA,UACf,YAAY;AAAA,YACV,UAAU,SAAS,UAAU;AAAA,YAC7B;AAAA,UACF;AAAA,QACF,CAAC;AAAA,MACH,IACA;AACJ,UAAI;AACJ,UAAI;AACF,cAAM,SAAS,MAAM,OAAO,IAAI,SAAS,QAAQ,GAAG;AACpD,cAAM,aAAa,OAAO,mBAAmB;AAAA,UAC3C,QAAQ,SAAS;AAAA,UACjB;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,QACF,CAAC;AACD,cAAM,UAAU,uBAAuB,YAAY,eAAe,SAAS;AAC3E,YAAI,YAAY,UAAa,OAAO,SAAS,OAAO,KAAK,UAAU,GAAG;AACpE,0BAAgB;AAChB,gBAAM;AAAA,YACJ;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,UACF;AAAA,QACF;AACA,wBAAgB;AAAA,UACd,IAAI;AAAA,UACJ;AAAA,UACA,GAAI,YAAY,SAAY,EAAE,QAAQ,IAAI,CAAC;AAAA,UAC3C,YAAY,KAAK,IAAI,IAAI;AAAA,QAC3B;AAAA,MACF,SAAS,KAAK;AACZ,sBAAc,KAAK,aAAa,OAAO,WAAW,GAAG,CAAC;AACtD,wBAAgB;AAAA,UACd,IAAI;AAAA,UACJ,OAAO,cAAc,cAAc,SAAS,CAAC,EAAG;AAAA,UAChD,YAAY,KAAK,IAAI,IAAI;AAAA,QAC3B;AACA,YAAI,kBAAkB,QAAQ;AAC5B,gBAAM;AAAA,YAAS;AAAA,YAAe;AAAA,YAAW,MACvC,YAAY,KAAK,cAAc,SAAS,eAAe;AAAA,UACzD;AACA,gBAAMA,QAA2D;AAAA,YAC/D,OAAO;AAAA,YACP;AAAA,YACA;AAAA,YACA,YAAY;AAAA,YACZ;AAAA,YACA,YAAY;AAAA,YACZ;AAAA,YACA,WAAW,IAAI,KAAK,aAAa,EAAE,YAAY;AAAA,YAC/C,UAAS,oBAAI,KAAK,GAAE,YAAY;AAAA,UAClC;AACA,kBAAQ,KAAKA,KAAI;AACjB,gBAAM,UAAU,OAAO,QAAQA,OAAM,aAAa;AAClD,iBAAO,OAAO,SAAS;AAAA,YACrB,QAAQ,OAAO;AAAA,YACf,MAAM;AAAA,YACN,WAAW;AAAA,YACX,QAAQ,cAAc,SAAS;AAAA,YAC/B,OAAO,aAAa,KAAK;AAAA,YACzB,OAAO;AAAA,YACP,YAAY;AAAA,YACZ,YAAY;AAAA,YACZ,QAAQ,KAAK,IAAI,IAAI;AAAA,YACrB;AAAA,YACA,OAAO,SAAS,SAAS;AAAA,YACzB,cAAc;AAAA,YACd;AAAA,YACA,WAAW;AAAA,UACb,CAAC;AAAA,QACH;AAAA,MACF;AAEA,UAAI;AACF,gBAAQ,MAAM,OAAO,QAAQ,EAAE,SAAS,aAAa,WAAW,OAAO,CAAC;AAAA,MAC1E,SAAS,KAAK;AACZ,sBAAc,KAAK,aAAa,WAAW,WAAW,GAAG,CAAC;AAC1D,cAAMA,QAA2D;AAAA,UAC/D,OAAO;AAAA,UACP;AAAA,UACA;AAAA,UACA,YAAY;AAAA,UACZ;AAAA,UACA,YAAY;AAAA,UACZ;AAAA,UACA,WAAW,IAAI,KAAK,aAAa,EAAE,YAAY;AAAA,UAC/C,UAAS,oBAAI,KAAK,GAAE,YAAY;AAAA,QAClC;AACA,gBAAQ,KAAKA,KAAI;AACjB,cAAM;AAAA,UAAS;AAAA,UAAe;AAAA,UAAW,MACvC,YAAY,KAAK,cAAc,cAAc,SAAS,CAAC,EAAG,OAAO;AAAA,QACnE;AACA,cAAM,UAAU,OAAO,QAAQA,OAAM,aAAa;AAClD,eAAO,OAAO,SAAS;AAAA,UACrB,QAAQ,OAAO;AAAA,UACf,MAAM;AAAA,UACN,WAAW;AAAA,UACX,QAAQ,cAAc,cAAc,SAAS,CAAC,EAAG;AAAA,UACjD,OAAO,aAAa,KAAK;AAAA,UACzB,OAAO;AAAA,UACP,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,QAAQ,KAAK,IAAI,IAAI;AAAA,UACrB;AAAA,UACA,OAAO,SAAS,SAAS;AAAA,UACzB,cAAc;AAAA,UACd;AAAA,UACA,WAAW;AAAA,QACb,CAAC;AAAA,MACH;AACA,UAAI;AACF,gBAAQ,MAAM,OAAO,SAAS;AAAA,UAC5B,QAAQ,OAAO;AAAA,UACf;AAAA,UACA;AAAA,UACA,aAAa,WAAW;AAAA,QAC1B,CAAC;AACD,cAAM;AAAA,UACJ;AAAA,UACA;AAAA,UACA,QAAQ,SAAS;AAAA,UACjB;AAAA,UACA;AAAA,UACA,YAAY,KAAK;AAAA,QACnB;AAAA,MACF,SAAS,KAAK;AACZ,sBAAc,KAAK,aAAa,YAAY,WAAW,GAAG,CAAC;AAC3D,cAAMA,QAA2D;AAAA,UAC/D,OAAO;AAAA,UACP;AAAA,UACA;AAAA,UACA,YAAY;AAAA,UACZ;AAAA,UACA,YAAY;AAAA,UACZ;AAAA,UACA,WAAW,IAAI,KAAK,aAAa,EAAE,YAAY;AAAA,UAC/C,UAAS,oBAAI,KAAK,GAAE,YAAY;AAAA,QAClC;AACA,gBAAQ,KAAKA,KAAI;AACjB,cAAM;AAAA,UAAS;AAAA,UAAe;AAAA,UAAW,MACvC,YAAY,KAAK,cAAc,cAAc,SAAS,CAAC,EAAG,OAAO;AAAA,QACnE;AACA,cAAM,UAAU,OAAO,QAAQA,OAAM,aAAa;AAClD,eAAO,OAAO,SAAS;AAAA,UACrB,QAAQ,OAAO;AAAA,UACf,MAAM;AAAA,UACN,WAAW;AAAA,UACX,QAAQ,cAAc,cAAc,SAAS,CAAC,EAAG;AAAA,UACjD,OAAO,aAAa,KAAK;AAAA,UACzB,OAAO;AAAA,UACP,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,QAAQ,KAAK,IAAI,IAAI;AAAA,UACrB;AAAA,UACA,OAAO,SAAS,SAAS;AAAA,UACzB,cAAc;AAAA,UACd;AAAA,UACA,WAAW;AAAA,QACb,CAAC;AAAA,MACH;AACA,YAAM,aAAa,aAAa,KAAK;AACrC,YAAM,mBAAmB,iBAAiB,OAAO,OAAO,YAAY;AACpE,YAAM,iBAAiB,uBAAuB;AAAA,QAC5C,UAAU,OAAO;AAAA,QACjB;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,eAAe;AAAA,MACjB,CAAC;AACD,yBAAmB,eAAe;AAClC,6BAAuB;AAEvB,YAAM,OAA2D;AAAA,QAC/D,OAAO;AAAA,QACP;AAAA,QACA;AAAA,QACA,YAAY;AAAA,QACZ;AAAA,QACA,YAAY;AAAA,QACZ;AAAA,QACA,WAAW,IAAI,KAAK,aAAa,EAAE,YAAY;AAAA,QAC/C,UAAS,oBAAI,KAAK,GAAE,YAAY;AAAA,MAClC;AACA,cAAQ,KAAK,IAAI;AACjB,UAAI,cAAc,IAAI;AACpB,cAAM;AAAA,UAAS;AAAA,UAAe;AAAA,UAAW,MACvC,YAAY,IAAI;AAAA,YACd,YAAY;AAAA,cACV,eAAe,cAAc,WAAW;AAAA,cACxC;AAAA,cACA,aAAa,eAAe;AAAA,cAC5B,YAAY,cAAc;AAAA,cAC1B;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF,OAAO;AACL,cAAM;AAAA,UAAS;AAAA,UAAe;AAAA,UAAW,MACvC,YAAY,KAAK,cAAc,SAAS,iBAAiB;AAAA,YACvD,YAAY;AAAA,cACV;AAAA,cACA;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF;AACA,YAAM,UAAU,OAAO,QAAQ,MAAM,aAAa;AAElD,UAAI,eAAe,MAAM;AACvB,eAAO,OAAO,SAAS;AAAA,UACrB,QAAQ,OAAO;AAAA,UACf,MAAM;AAAA,UACN,WAAW;AAAA,UACX,QAAQ,eAAe;AAAA,UACvB,OAAO;AAAA,UACP,OAAO;AAAA,UACP,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,QAAQ,KAAK,IAAI,IAAI;AAAA,UACrB;AAAA,UACA,OAAO,SAAS,SAAS;AAAA,UACzB,cAAc;AAAA,UACd;AAAA,UACA,WAAW;AAAA,QACb,CAAC;AAAA,MACH;AAEA,YAAM,qBAAqB,mBAAmB,QAAQ,YAAY;AAClE,UAAI,mBAAmB,MAAM;AAC3B,eAAO,OAAO,SAAS;AAAA,UACrB,QAAQ,OAAO;AAAA,UACf,MAAM;AAAA,UACN,WAAW;AAAA,UACX,QAAQ,mBAAmB;AAAA,UAC3B,OAAO;AAAA,UACP,OAAO;AAAA,UACP,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,QAAQ,KAAK,IAAI,IAAI;AAAA,UACrB;AAAA,UACA,OAAO,SAAS,SAAS;AAAA,UACzB,cAAc;AAAA,UACd;AAAA,UACA,WAAW;AAAA,QACb,CAAC;AAAA,MACH;AAEA,YAAM,cAAc;AAAA,QAClB,OAAO;AAAA,QACP;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,YAAY;AAAA,QACZ;AAAA,QACA;AAAA,QACA,WAAW;AAAA,QACX;AAAA,MACF;AACA,UAAI;AACJ,UAAI;AACF,uBAAe,OAAO,aAClB,MAAM,OAAO,WAAW,WAAW,IACnC,oBAAoB,KAAK;AAAA,MAC/B,SAAS,KAAK;AACZ,sBAAc,KAAK,aAAa,eAAe,YAAY,GAAG,GAAG,CAAC;AAClE,eAAO,OAAO,SAAS;AAAA,UACrB,QAAQ,OAAO;AAAA,UACf,MAAM;AAAA,UACN,WAAW;AAAA,UACX,QAAQ,cAAc,cAAc,SAAS,CAAC,EAAG;AAAA,UACjD,OAAO,aAAa,KAAK;AAAA,UACzB,OAAO;AAAA,UACP,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,QAAQ,KAAK,IAAI,IAAI;AAAA,UACrB;AAAA,UACA,OAAO,SAAS,SAAS;AAAA,UACzB,cAAc;AAAA,UACd;AAAA,UACA,WAAW;AAAA,QACb,CAAC;AAAA,MACH;AACA,UAAI,aAAa,MAAM;AACrB,eAAO,OAAO,SAAS;AAAA,UACrB,QAAQ,OAAO;AAAA,UACf,MAAM,aAAa;AAAA,UACnB,WAAW;AAAA,UACX,QAAQ,aAAa;AAAA,UACrB,OAAO,aAAa;AAAA,UACpB,OAAO;AAAA,UACP,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,QAAQ,KAAK,IAAI,IAAI;AAAA,UACrB;AAAA,UACA,OAAO,SAAS,SAAS;AAAA,UACzB,cAAc,aAAa;AAAA,UAC3B;AAAA,UACA,WAAW;AAAA,QACb,CAAC;AAAA,MACH;AAAA,IACF;AAEA,WAAO,OAAO,SAAS;AAAA,MACrB,QAAQ,OAAO;AAAA,MACf,MAAM;AAAA,MACN,WAAW;AAAA,MACX,QAAQ,8BAA8B,OAAO,QAAQ;AAAA,MACrD,OAAO;AAAA,MACP,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ,QAAQ,KAAK,IAAI,IAAI;AAAA,MACrB;AAAA,MACA,OAAO,SAAS,SAAS;AAAA,MACzB,cAAc;AAAA,MACd;AAAA,MACA,WAAW;AAAA,IACb,CAAC;AAAA,EACH,SAAS,KAAK;AACZ,kBAAc,KAAK,aAAa,OAAO,QAAQ,QAAQ,GAAG,CAAC;AAC3D,WAAO,OAAO,SAAS;AAAA,MACrB,QAAQ,OAAO;AAAA,MACf,MAAM;AAAA,MACN,WAAW;AAAA,MACX,QAAQ,cAAc,cAAc,SAAS,CAAC,EAAG;AAAA,MACjD,OAAO;AAAA,MACP,YAAY;AAAA,MACZ,YAAY,CAAC;AAAA,MACb,QAAQ,KAAK,IAAI,IAAI;AAAA,MACrB;AAAA,MACA,OAAO,SAAS,SAAS;AAAA,MACzB,cAAc;AAAA,MACd;AAAA,MACA,WAAW;AAAA,IACb,CAAC;AAAA,EACH,UAAE;AACA,QAAI,UAAW,cAAa,SAAS;AACrC,QAAI,OAAO,OAAQ,QAAO,OAAO,oBAAoB,SAAS,aAAa;AAAA,EAC7E;AACF;AAEO,SAAS,iBACd,oBACA,UAA4E,CAAC,GACvC;AACtC,SAAO,EAAE,GAAG,SAAS,mBAAmB;AAC1C;AAEO,SAAS,qBACd,oBACA,UAA4E,CAAC,GACvC;AACtC,SAAO,EAAE,GAAG,SAAS,mBAAmB;AAC1C;AAEO,SAAS,cAAc,OAAgE;AAC5F,SAAO,EAAE,GAAG,OAAO,WAAW,KAAK;AACrC;AAEO,SAAS,eAAe,OAAgE;AAC7F,SAAO,EAAE,GAAG,OAAO,WAAW,MAAM;AACtC;AAEA,SAAS,gBAAgB,OAA0D;AACjF,QAAM,MAAM,EAAE,GAAG,gBAAgB,GAAG,MAAM;AAC1C,MAAI,CAAC,OAAO,UAAU,IAAI,QAAQ,KAAM,IAAI,WAAsB,GAAG;AACnE,UAAM,IAAI;AAAA,MACR,+DAA+D,OAAO,IAAI,QAAQ,CAAC;AAAA,IACrF;AAAA,EACF;AACA,QAAM,SAAwB,EAAE,UAAU,IAAI,SAAmB;AACjE,MAAI,IAAI,cAAc,QAAW;AAC/B,QACE,OAAO,IAAI,cAAc,YACzB,CAAC,OAAO,SAAS,IAAI,SAAS,KAC9B,IAAI,aAAa,GACjB;AACA,YAAM,IAAI;AAAA,QACR,yEAAyE,OAAO,IAAI,SAAS,CAAC;AAAA,MAChG;AAAA,IACF;AACA,WAAO,YAAY,IAAI;AAAA,EACzB;AACA,MAAI,IAAI,eAAe,QAAW;AAChC,QACE,OAAO,IAAI,eAAe,YAC1B,CAAC,OAAO,SAAS,IAAI,UAAU,KAC/B,IAAI,aAAa,GACjB;AACA,YAAM,IAAI;AAAA,QACR,6EAA6E,OAAO,IAAI,UAAU,CAAC;AAAA,MACrG;AAAA,IACF;AACA,WAAO,aAAa,IAAI;AAAA,EAC1B;AACA,SAAO;AACT;AAEA,SAAS,uBACP,SACA,eACA,WACoB;AACpB,MAAI,YAAY,OAAW,QAAO;AAClC,MAAI,CAAC,OAAO,SAAS,OAAO,KAAK,UAAU,GAAG;AAC5C,kBAAc;AAAA,MACZ,aAAa,OAAO,WAAW,IAAI,MAAM,2BAA2B,OAAO,OAAO,CAAC,EAAE,CAAC;AAAA,IACxF;AACA,WAAO;AAAA,EACT;AACA,SAAO;AACT;AAEO,SAAS,kBAAkB,OAAqC;AACrE,SAAO,MAAM;AAAA,IACX,CAAC,WAAW,OAAO,UAAW,OAAO,aAAa,cAAc,OAAO,aAAa;AAAA,EACtF;AACF;AAEA,SAAS,YACP,QACA,OACA,OACA,SACA,QACA,WACA,SACA,cACA,aACA,SACuD;AACvD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,QAAQ,KAAK,IAAI,IAAI;AAAA,IACrB;AAAA,IACA,kBACE,OAAO,eAAe,SAAY,SAAY,KAAK,IAAI,GAAG,OAAO,aAAa,YAAY;AAAA,IAC5F;AAAA,IACA;AAAA,EACF;AACF;AAEA,SAAS,oBAAoB,OAA0C;AACrE,MAAI,CAAC,MAAM,OAAQ,QAAO,EAAE,MAAM,OAAO,MAAM,OAAO,QAAQ,eAAe;AAC7E,QAAM,OAAO,kBAAkB,KAAK;AACpC,SAAO,OACH,EAAE,MAAM,MAAM,MAAM,MAAM,QAAQ,6BAA6B,OAAO,aAAa,KAAK,EAAE,IAC1F;AAAA,IACE,MAAM;AAAA,IACN,MAAM;AAAA,IACN,QAAQ;AAAA,IACR,OAAO,aAAa,KAAK;AAAA,EAC3B;AACN;AAEA,SAAS,aAAa,OAAgD;AACpE,QAAM,SAAS,MACZ,IAAI,CAAC,WAAW,OAAO,KAAK,EAC5B,OAAO,CAAC,UAA2B,OAAO,UAAU,QAAQ;AAC/D,MAAI,CAAC,OAAO,OAAQ,QAAO;AAC3B,SAAO,KAAK,MAAO,OAAO,OAAO,CAAC,KAAK,UAAU,MAAM,OAAO,CAAC,IAAI,OAAO,SAAU,GAAI,IAAI;AAC9F;AAEA,SAAS,mBACP,QACA,cACmC;AACnC,MAAI,OAAO,eAAe,UAAa,gBAAgB,OAAO,YAAY;AACxE,WAAO;AAAA,MACL,MAAM;AAAA,MACN,QAAQ,gCAAgC,OAAO,UAAU;AAAA,IAC3D;AAAA,EACF;AACA,SAAO,EAAE,MAAM,OAAO,QAAQ,GAAG;AACnC;AAEA,eAAe,iBACb,SACA,QACA,cACA,QACA,eACA,WACe;AACf,MAAI,CAAC,WAAW,OAAO,eAAe,OAAW;AACjD,QAAM,aAAa,OAAO;AAC1B,QAAM;AAAA,IAAS;AAAA,IAAe;AAAA,IAAW,MACvC,QAAQ,aAAa;AAAA,MACnB,WAAW;AAAA,MACX,OAAO;AAAA,MACP,UAAU;AAAA,MACV,WAAW,KAAK,IAAI,GAAG,aAAa,YAAY;AAAA,MAChD,UAAU,gBAAgB;AAAA,MAC1B,QAAQ,QAAQ,KAAK;AAAA,IACvB,CAAC;AAAA,EACH;AACF;AAEA,eAAe,gBACb,SACA,OACA,OACA,eACA,WACA,cACe;AACf,MAAI,CAAC,QAAS;AACd,aAAW,UAAU,OAAO;AAC1B,UAAM;AAAA,MAAS;AAAA,MAAe;AAAA,MAAW,MACvC,QAAQ,YAAY;AAAA,QAClB,SAAS,OAAO,YAAY,wBAAwB;AAAA,QACpD,cAAc,gBAAgB,QAAQ;AAAA,QACtC,MAAM,gBAAgB,OAAO,EAAE;AAAA,QAC/B,WAAW,OAAO;AAAA,QAClB,OAAO,OAAO,OAAO,UAAU,WAAW,OAAO,QAAQ,OAAO,SAAS,IAAI;AAAA,QAC7E,WAAW,OAAO;AAAA,QAClB,UAAU,OAAO;AAAA,QACjB,YAAY;AAAA,UACV;AAAA,UACA,QAAQ,OAAO;AAAA,UACf,UAAU,OAAO;AAAA,UACjB,WAAW,OAAO;AAAA,QACpB;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AACF;AAEA,eAAe,UACb,QACA,MACA,eACe;AACf,MAAI,CAAC,OAAQ;AACb,MAAI;AACF,UAAM,OAAO,IAAI;AAAA,EACnB,SAAS,KAAK;AACZ,kBAAc,KAAK,aAAa,WAAW,KAAK,OAAO,GAAG,CAAC;AAAA,EAC7D;AACF;AAEA,eAAe,SACb,eACA,WACA,OACwB;AACxB,MAAI;AACF,WAAO,MAAM,MAAM;AAAA,EACrB,SAAS,KAAK;AACZ,kBAAc,KAAK,aAAa,SAAS,WAAW,GAAG,CAAC;AACxD,WAAO;AAAA,EACT;AACF;AAEA,SAAS,uBAAwC,MAOK;AACpD,QAAM,MAAM,KAAK,UAAU;AAC3B,MAAI,CAAC,OAAO,OAAO,EAAG,QAAO,EAAE,MAAM,OAAO,QAAQ,IAAI,QAAQ,EAAE;AAClE,QAAM,gBAAgB,KAAK,UAAU,iBAAiB;AACtD,QAAM,aAAa,KAAK,KAAK,KAAK,cAAc,MAAM,KAAK,eAAe,EAAE;AAC5E,QAAM,iBACJ,KAAK,yBAAyB,UAAa,KAAK,yBAAyB,KAAK;AAChF,QAAM,YAAY,aAAa;AAC/B,QAAM,SAAS,kBAAkB,YAAY,KAAK,gBAAgB,IAAI;AACtE,SAAO,UAAU,MACb,EAAE,MAAM,MAAM,QAAQ,sCAAsC,MAAM,YAAY,OAAO,IACrF,EAAE,MAAM,OAAO,QAAQ,IAAI,OAAO;AACxC;AAEA,SAAS,2BACP,UACA,QACmC;AACnC,QAAM,MAAM,UAAU;AACtB,MAAI,CAAC,OAAO,OAAO,KAAK,SAAS,IAAK,QAAO,EAAE,MAAM,OAAO,QAAQ,GAAG;AACvE,SAAO;AAAA,IACL,MAAM;AAAA,IACN,QAAQ,mCAAmC,MAAM;AAAA,EACnD;AACF;AAEA,SAAS,iBACP,OACA,UACQ;AACR,MAAI,UAAU,iBAAkB,QAAO,SAAS,iBAAiB,KAAK;AACtE,SAAO,kBAAkB,KAAK;AAChC;AAEA,SAAS,kBACP,QACA,UACQ;AACR,MAAI,UAAU,kBAAmB,QAAO,SAAS,kBAAkB,MAAM;AACzE,SAAO,kBAAkB,MAAM;AACjC;AAEA,SAAS,kBAAkB,OAAwB;AACjD,MAAI,OAAO,UAAU,SAAU,QAAO;AACtC,MAAI,OAAO,UAAU,YAAY,OAAO,UAAU,aAAa,SAAS,KAAM,QAAO,OAAO,KAAK;AACjG,MAAI;AACF,WAAO,KAAK,UAAU,mBAAmB,KAAK,CAAC;AAAA,EACjD,QAAQ;AACN,WAAO,OAAO,KAAK;AAAA,EACrB;AACF;AAEA,SAAS,mBAAmB,OAAyB;AACnD,MAAI,MAAM,QAAQ,KAAK,EAAG,QAAO,MAAM,IAAI,kBAAkB;AAC7D,MAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAChD,QAAM,SAAS;AACf,QAAM,SAAkC,CAAC;AACzC,aAAW,OAAO,OAAO,KAAK,MAAM,EAAE,KAAK,GAAG;AAC5C,WAAO,GAAG,IAAI,mBAAmB,OAAO,GAAG,CAAC;AAAA,EAC9C;AACA,SAAO;AACT;AAEA,SAAS,YAAY,QAA6B;AAChD,QAAM,SAAS,OAAO;AACtB,MAAI,kBAAkB,MAAO,QAAO,OAAO;AAC3C,SAAO,SAAS,OAAO,MAAM,IAAI;AACnC;AAEA,SAAS,aACP,OACA,WACA,KACqB;AACrB,QAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,SAAO,EAAE,OAAO,WAAW,QAAQ;AACrC;AAEA,eAAe,OACb,SACA,QACkE;AAClE,QAAM;AAAA,IAAS,OAAO;AAAA,IAAe,OAAO,MAAM;AAAA,IAAQ,MACxD,SAAS,OAAO;AAAA,MACd,MAAM,OAAO;AAAA,MACb,OAAO,OAAO,SAAS,aAAa,OAAO,UAAU;AAAA,MACrD,cAAc,OAAO;AAAA,MACrB,OAAO,OAAO;AAAA,IAChB,CAAC;AAAA,EACH;AACA,SAAO;AACT;","names":["step"]}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
// src/errors.ts
|
|
2
|
+
var AgentEvalError = class extends Error {
|
|
3
|
+
/** Stable string code. Survives minification; safe to switch on. */
|
|
4
|
+
code;
|
|
5
|
+
constructor(code, message, options) {
|
|
6
|
+
super(message, options);
|
|
7
|
+
this.name = this.constructor.name;
|
|
8
|
+
this.code = code;
|
|
9
|
+
}
|
|
10
|
+
};
|
|
11
|
+
var ValidationError = class extends AgentEvalError {
|
|
12
|
+
constructor(message, options) {
|
|
13
|
+
super("validation", message, options);
|
|
14
|
+
}
|
|
15
|
+
};
|
|
16
|
+
var NotFoundError = class extends AgentEvalError {
|
|
17
|
+
constructor(message, options) {
|
|
18
|
+
super("not_found", message, options);
|
|
19
|
+
}
|
|
20
|
+
};
|
|
21
|
+
var ConfigError = class extends AgentEvalError {
|
|
22
|
+
constructor(message, options) {
|
|
23
|
+
super("config", message, options);
|
|
24
|
+
}
|
|
25
|
+
};
|
|
26
|
+
var CaptureIntegrityError = class extends AgentEvalError {
|
|
27
|
+
constructor(message, options) {
|
|
28
|
+
super("capture_integrity", message, options);
|
|
29
|
+
}
|
|
30
|
+
};
|
|
31
|
+
var JudgeError = class extends AgentEvalError {
|
|
32
|
+
constructor(message, options) {
|
|
33
|
+
super("judge", message, options);
|
|
34
|
+
}
|
|
35
|
+
};
|
|
36
|
+
var VerificationError = class extends AgentEvalError {
|
|
37
|
+
constructor(message, options) {
|
|
38
|
+
super("verification", message, options);
|
|
39
|
+
}
|
|
40
|
+
};
|
|
41
|
+
var ReplayError = class extends AgentEvalError {
|
|
42
|
+
constructor(message, options) {
|
|
43
|
+
super("replay", message, options);
|
|
44
|
+
}
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
export {
|
|
48
|
+
AgentEvalError,
|
|
49
|
+
ValidationError,
|
|
50
|
+
NotFoundError,
|
|
51
|
+
ConfigError,
|
|
52
|
+
CaptureIntegrityError,
|
|
53
|
+
JudgeError,
|
|
54
|
+
VerificationError,
|
|
55
|
+
ReplayError
|
|
56
|
+
};
|
|
57
|
+
//# sourceMappingURL=chunk-NG236HPC.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/errors.ts"],"sourcesContent":["/**\n * Error taxonomy for `@tangle-network/agent-eval`.\n *\n * Every error this package throws as part of its *public contract* extends\n * `AgentEvalError`. Consumers can pattern-match by `instanceof <Subclass>` or\n * by the stable string `code` carried on the base class.\n *\n * The codes are stable across minor versions; new codes can be added, but\n * existing codes never change meaning. New subclasses are non-breaking.\n *\n * Internal invariant guards (`throw new Error('this should never happen')`)\n * remain plain `Error`s on purpose — they're programmer-mistake assertions,\n * not consumer-catchable contract failures.\n */\n\nexport type AgentEvalErrorCode =\n | 'validation'\n | 'not_found'\n | 'config'\n | 'capture_integrity'\n | 'judge'\n | 'verification'\n | 'replay'\n\nexport class AgentEvalError extends Error {\n /** Stable string code. Survives minification; safe to switch on. */\n readonly code: AgentEvalErrorCode\n\n constructor(code: AgentEvalErrorCode, message: string, options?: { cause?: unknown }) {\n super(message, options)\n this.name = this.constructor.name\n this.code = code\n }\n}\n\n/** Caller passed invalid arguments (out of range, mutually-exclusive options, bad shape). */\nexport class ValidationError extends AgentEvalError {\n constructor(message: string, options?: { cause?: unknown }) {\n super('validation', message, options)\n }\n}\n\n/** A named resource (run, span, rubric, scenario, dataset row, route) does not exist. */\nexport class NotFoundError extends AgentEvalError {\n constructor(message: string, options?: { cause?: unknown }) {\n super('not_found', message, options)\n }\n}\n\n/** Configuration missing or malformed (`HOME` unset, required image not supplied, env var absent). */\nexport class ConfigError extends AgentEvalError {\n constructor(message: string, options?: { cause?: unknown }) {\n super('config', message, options)\n }\n}\n\n/**\n * A run is missing the artifacts a launch-grade check requires:\n * raw HTTP capture absent, no LLM spans, route assertion failed, run-end\n * assertion tripped. Block ship on this; do not catch and move on.\n */\nexport class CaptureIntegrityError extends AgentEvalError {\n constructor(message: string, options?: { cause?: unknown }) {\n super('capture_integrity', message, options)\n }\n}\n\n/** A judge call failed in a way that's not retryable: schema parse failure, bad rubric, conflicting dimensions. */\nexport class JudgeError extends AgentEvalError {\n constructor(message: string, options?: { cause?: unknown }) {\n super('judge', message, options)\n }\n}\n\n/** A verifier signalled a hard failure (compile, test, schema) — distinct from a low judge score. */\nexport class VerificationError extends AgentEvalError {\n constructor(message: string, options?: { cause?: unknown }) {\n super('verification', message, options)\n }\n}\n\n/** Replay cache cannot satisfy a request: miss with no fallback, sink lacks list(), unsupported URL. */\nexport class ReplayError extends AgentEvalError {\n constructor(message: string, options?: { cause?: unknown }) {\n super('replay', message, options)\n }\n}\n"],"mappings":";AAwBO,IAAM,iBAAN,cAA6B,MAAM;AAAA;AAAA,EAE/B;AAAA,EAET,YAAY,MAA0B,SAAiB,SAA+B;AACpF,UAAM,SAAS,OAAO;AACtB,SAAK,OAAO,KAAK,YAAY;AAC7B,SAAK,OAAO;AAAA,EACd;AACF;AAGO,IAAM,kBAAN,cAA8B,eAAe;AAAA,EAClD,YAAY,SAAiB,SAA+B;AAC1D,UAAM,cAAc,SAAS,OAAO;AAAA,EACtC;AACF;AAGO,IAAM,gBAAN,cAA4B,eAAe;AAAA,EAChD,YAAY,SAAiB,SAA+B;AAC1D,UAAM,aAAa,SAAS,OAAO;AAAA,EACrC;AACF;AAGO,IAAM,cAAN,cAA0B,eAAe;AAAA,EAC9C,YAAY,SAAiB,SAA+B;AAC1D,UAAM,UAAU,SAAS,OAAO;AAAA,EAClC;AACF;AAOO,IAAM,wBAAN,cAAoC,eAAe;AAAA,EACxD,YAAY,SAAiB,SAA+B;AAC1D,UAAM,qBAAqB,SAAS,OAAO;AAAA,EAC7C;AACF;AAGO,IAAM,aAAN,cAAyB,eAAe;AAAA,EAC7C,YAAY,SAAiB,SAA+B;AAC1D,UAAM,SAAS,SAAS,OAAO;AAAA,EACjC;AACF;AAGO,IAAM,oBAAN,cAAgC,eAAe;AAAA,EACpD,YAAY,SAAiB,SAA+B;AAC1D,UAAM,gBAAgB,SAAS,OAAO;AAAA,EACxC;AACF;AAGO,IAAM,cAAN,cAA0B,eAAe;AAAA,EAC9C,YAAY,SAAiB,SAA+B;AAC1D,UAAM,UAAU,SAAS,OAAO;AAAA,EAClC;AACF;","names":[]}
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
import {
|
|
2
|
+
ValidationError
|
|
3
|
+
} from "./chunk-NG236HPC.js";
|
|
4
|
+
|
|
1
5
|
// src/run-record.ts
|
|
2
6
|
var MANDATORY_TOP_LEVEL = [
|
|
3
7
|
"runId",
|
|
@@ -15,11 +19,10 @@ var MANDATORY_TOP_LEVEL = [
|
|
|
15
19
|
"splitTag"
|
|
16
20
|
];
|
|
17
21
|
var SPLIT_TAGS = ["search", "dev", "holdout"];
|
|
18
|
-
var RunRecordValidationError = class extends
|
|
22
|
+
var RunRecordValidationError = class extends ValidationError {
|
|
19
23
|
path;
|
|
20
24
|
constructor(message, path = "") {
|
|
21
25
|
super(path ? `${message} (at ${path})` : message);
|
|
22
|
-
this.name = "RunRecordValidationError";
|
|
23
26
|
this.path = path;
|
|
24
27
|
}
|
|
25
28
|
};
|
|
@@ -68,7 +71,10 @@ function validateRunRecord(input) {
|
|
|
68
71
|
expectString(jmRec.promptVersion, "judgeMetadata.promptVersion");
|
|
69
72
|
expectFiniteNumber(jmRec.confidence, "judgeMetadata.confidence");
|
|
70
73
|
if (typeof jmRec.fallback !== "boolean") {
|
|
71
|
-
throw new RunRecordValidationError(
|
|
74
|
+
throw new RunRecordValidationError(
|
|
75
|
+
"judgeMetadata.fallback must be boolean",
|
|
76
|
+
"judgeMetadata.fallback"
|
|
77
|
+
);
|
|
72
78
|
}
|
|
73
79
|
}
|
|
74
80
|
const out = obj.outcome;
|
|
@@ -76,8 +82,10 @@ function validateRunRecord(input) {
|
|
|
76
82
|
throw new RunRecordValidationError("outcome must be an object", "outcome");
|
|
77
83
|
}
|
|
78
84
|
const outRec = out;
|
|
79
|
-
if (outRec.searchScore !== void 0)
|
|
80
|
-
|
|
85
|
+
if (outRec.searchScore !== void 0)
|
|
86
|
+
expectFiniteNumber(outRec.searchScore, "outcome.searchScore");
|
|
87
|
+
if (outRec.holdoutScore !== void 0)
|
|
88
|
+
expectFiniteNumber(outRec.holdoutScore, "outcome.holdoutScore");
|
|
81
89
|
if (outRec.searchScore === void 0 && outRec.holdoutScore === void 0) {
|
|
82
90
|
throw new RunRecordValidationError(
|
|
83
91
|
"outcome must define searchScore or holdoutScore (or both)",
|
|
@@ -145,4 +153,4 @@ export {
|
|
|
145
153
|
parseRunRecordSafe,
|
|
146
154
|
roundTripRunRecord
|
|
147
155
|
};
|
|
148
|
-
//# sourceMappingURL=chunk-
|
|
156
|
+
//# sourceMappingURL=chunk-NLMNWKVM.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/run-record.ts"],"sourcesContent":["/**\n * Paper-grade RunRecord schema + runtime validator.\n *\n * Every run that participates in a promotion gate, paper table, or\n * researcher loop SHOULD be recorded as a `RunRecord`. The mandatory\n * fields are exactly those the paper \"Two Loops, Three Roles\" requires\n * for reproducibility: who/what/when/cost/seed/hash, plus the search vs\n * holdout split tag and either a `searchScore` or a `holdoutScore`.\n *\n * This is intentionally NOT a replacement for the rich `Run` /\n * `ProposeReviewReport` / `ScenarioResult` types already in the\n * package. Those are runtime structures with full provenance. A\n * `RunRecord` is the analysis-time projection — the JSON-friendly\n * row you'd put in a parquet file or paste into a notebook.\n *\n * Validate at the boundary:\n *\n * const rec = validateRunRecord(rawJson) // throws on missing\n * const ok = isRunRecord(rawJson) // boolean check\n * const rec = parseRunRecordSafe(rawJson) // { ok, value | error }\n *\n * The validator runs in pure TS — zod is intentionally NOT a\n * dependency. Round-trip tested in `tests/run-record.test.ts`.\n */\n\n/** Search/dev/holdout split tag. 'search' is the paper-grade alias for the\n * combined train+test pool that the optimizer is allowed to read. */\nexport type RunSplitTag = 'search' | 'dev' | 'holdout'\n\nexport interface RunTokenUsage {\n input: number\n output: number\n cached?: number\n}\n\nexport interface RunJudgeMetadata {\n model: string\n promptVersion: string\n /** [0,1] confidence the judge declared. Constant judge confidence\n * across many runs is a fallback signal (see `canary.ts`). */\n confidence: number\n /** True if the judge degraded to a fallback path (rules-only,\n * prior-call cache, etc.). The canary uses this to alert. */\n fallback: boolean\n}\n\nexport interface RunOutcome {\n /** Score on the search/optimization split. Optional because a\n * holdout-only evaluation only fills `holdoutScore`. */\n searchScore?: number\n /** Score on the held-out split. Optional because a search-only run\n * only fills `searchScore`. At least one must be present. */\n holdoutScore?: number\n /** Bag of any other metric the run produced — judge dimensions,\n * pass/fail counters, latency stats, etc. Numeric only — keeps\n * reporters honest. */\n raw: Record<string, number>\n}\n\n/**\n * Mandatory paper-grade fields for a single evaluation run. Optional\n * fields are extension points; mandatory fields throw if missing.\n *\n * Hash discipline:\n * - `promptHash` is the sha256 of the EFFECTIVE prompt sent to the\n * model (after any steering bundle merge).\n * - `configHash` is the sha256 of the effective run config (model,\n * temperature, tools, judges, splits). The pair (promptHash,\n * configHash) uniquely identifies an experimental cell.\n *\n * Model snapshot discipline:\n * - `model` MUST encode a snapshot version. Bare aliases like\n * `claude-sonnet-4` or `gpt-4o` are banned — they remap silently.\n * Use `claude-sonnet-4-6@2025-04-15` or `gpt-4o-2024-11-20`.\n */\nexport interface RunRecord {\n /** UUID for the run. */\n runId: string\n /** Logical experiment grouping (a treatment vs a baseline within\n * the same sweep should share `experimentId`). */\n experimentId: string\n /** Stable identifier for the candidate (variant) being run. The\n * promotion gate compares two `candidateId`s on matched items. */\n candidateId: string\n /** RNG seed for the run. Always recorded — silent re-seeding is\n * the most common cause of non-reproducible numbers. */\n seed: number\n /** Model identifier WITH snapshot version. */\n model: string\n /** sha256 of the effective prompt (post-steering). */\n promptHash: string\n /** sha256 of the effective config. */\n configHash: string\n /** Git SHA the harness was run from. */\n commitSha: string\n /** End-to-end wall-clock duration in milliseconds. */\n wallMs: number\n /** Time spent queued before execution started, if known. */\n queueMs?: number\n /** Total USD cost. Mandatory — runs without a cost number are\n * unbounded by definition and must not be admitted into the gate. */\n costUsd: number\n /** Token usage breakdown. */\n tokenUsage: RunTokenUsage\n /** Judge-side metadata, if a judge was used. */\n judgeMetadata?: RunJudgeMetadata\n /** Per-split scores + raw bag. */\n outcome: RunOutcome\n /** Categorical failure tag, when the run failed and the harness\n * classified it. Free-form string; standard tags live in\n * `failure-taxonomy.ts`. */\n failureMode?: string\n /** Which split this run was drawn from. */\n splitTag: RunSplitTag\n /**\n * Stable scenario identifier the run was scored against. Optional for\n * backwards compatibility, but **strongly recommended**: every primitive\n * that pairs runs by scenario (preferences, paired stats, BT tournament)\n * keys on this. The campaign artifact populates it canonically; legacy\n * runs without it fall back to inference from `outcome.raw.scenario_id`\n * or `experimentId`.\n */\n scenarioId?: string\n}\n\n// ── Validation ───────────────────────────────────────────────────────\n\nconst MANDATORY_TOP_LEVEL = [\n 'runId',\n 'experimentId',\n 'candidateId',\n 'seed',\n 'model',\n 'promptHash',\n 'configHash',\n 'commitSha',\n 'wallMs',\n 'costUsd',\n 'tokenUsage',\n 'outcome',\n 'splitTag',\n] as const\n\nimport { ValidationError } from './errors'\n\nconst SPLIT_TAGS: ReadonlyArray<RunSplitTag> = ['search', 'dev', 'holdout']\n\nexport class RunRecordValidationError extends ValidationError {\n readonly path: string\n constructor(message: string, path = '') {\n super(path ? `${message} (at ${path})` : message)\n this.path = path\n }\n}\n\n/**\n * Strict validator. Throws `RunRecordValidationError` on the first\n * missing or wrongly-typed field. Returns the input cast to\n * `RunRecord` on success — the validator does not coerce.\n */\nexport function validateRunRecord(input: unknown): RunRecord {\n if (input === null || typeof input !== 'object') {\n throw new RunRecordValidationError('expected object')\n }\n const obj = input as Record<string, unknown>\n\n for (const key of MANDATORY_TOP_LEVEL) {\n if (!(key in obj)) {\n throw new RunRecordValidationError(`missing mandatory field \"${key}\"`)\n }\n }\n\n expectString(obj.runId, 'runId')\n expectString(obj.experimentId, 'experimentId')\n expectString(obj.candidateId, 'candidateId')\n expectFiniteNumber(obj.seed, 'seed')\n expectString(obj.model, 'model')\n expectString(obj.promptHash, 'promptHash')\n expectString(obj.configHash, 'configHash')\n expectString(obj.commitSha, 'commitSha')\n expectFiniteNumber(obj.wallMs, 'wallMs')\n if (obj.queueMs !== undefined) expectFiniteNumber(obj.queueMs, 'queueMs')\n expectFiniteNumber(obj.costUsd, 'costUsd')\n\n // Snapshot discipline: bare model aliases are not paper-grade.\n if (!modelHasSnapshot(obj.model as string)) {\n throw new RunRecordValidationError(\n `model \"${obj.model}\" lacks a snapshot version (use 'name@YYYY-MM-DD' or 'name-YYYYMMDD')`,\n 'model',\n )\n }\n\n // Token usage.\n const tu = obj.tokenUsage\n if (tu === null || typeof tu !== 'object') {\n throw new RunRecordValidationError('tokenUsage must be an object', 'tokenUsage')\n }\n const tuRec = tu as Record<string, unknown>\n expectFiniteNumber(tuRec.input, 'tokenUsage.input')\n expectFiniteNumber(tuRec.output, 'tokenUsage.output')\n if (tuRec.cached !== undefined) expectFiniteNumber(tuRec.cached, 'tokenUsage.cached')\n\n // Judge metadata, optional.\n if (obj.judgeMetadata !== undefined) {\n const jm = obj.judgeMetadata\n if (jm === null || typeof jm !== 'object') {\n throw new RunRecordValidationError('judgeMetadata must be an object', 'judgeMetadata')\n }\n const jmRec = jm as Record<string, unknown>\n expectString(jmRec.model, 'judgeMetadata.model')\n expectString(jmRec.promptVersion, 'judgeMetadata.promptVersion')\n expectFiniteNumber(jmRec.confidence, 'judgeMetadata.confidence')\n if (typeof jmRec.fallback !== 'boolean') {\n throw new RunRecordValidationError(\n 'judgeMetadata.fallback must be boolean',\n 'judgeMetadata.fallback',\n )\n }\n }\n\n // Outcome.\n const out = obj.outcome\n if (out === null || typeof out !== 'object') {\n throw new RunRecordValidationError('outcome must be an object', 'outcome')\n }\n const outRec = out as Record<string, unknown>\n if (outRec.searchScore !== undefined)\n expectFiniteNumber(outRec.searchScore, 'outcome.searchScore')\n if (outRec.holdoutScore !== undefined)\n expectFiniteNumber(outRec.holdoutScore, 'outcome.holdoutScore')\n if (outRec.searchScore === undefined && outRec.holdoutScore === undefined) {\n throw new RunRecordValidationError(\n 'outcome must define searchScore or holdoutScore (or both)',\n 'outcome',\n )\n }\n const raw = outRec.raw\n if (raw === null || typeof raw !== 'object') {\n throw new RunRecordValidationError('outcome.raw must be an object', 'outcome.raw')\n }\n for (const [k, v] of Object.entries(raw as Record<string, unknown>)) {\n expectFiniteNumber(v, `outcome.raw.${k}`)\n }\n\n // Failure mode optional.\n if (obj.failureMode !== undefined) expectString(obj.failureMode, 'failureMode')\n\n // Split tag.\n if (typeof obj.splitTag !== 'string' || !SPLIT_TAGS.includes(obj.splitTag as RunSplitTag)) {\n throw new RunRecordValidationError(\n `splitTag must be one of ${SPLIT_TAGS.join(', ')}, got ${String(obj.splitTag)}`,\n 'splitTag',\n )\n }\n\n return input as RunRecord\n}\n\n/** Boolean validator — convenience for filtering arrays. */\nexport function isRunRecord(input: unknown): input is RunRecord {\n try {\n validateRunRecord(input)\n return true\n } catch {\n return false\n }\n}\n\n/** Non-throwing validator — returns a discriminated union. */\nexport function parseRunRecordSafe(\n input: unknown,\n): { ok: true; value: RunRecord } | { ok: false; error: RunRecordValidationError } {\n try {\n return { ok: true, value: validateRunRecord(input) }\n } catch (e) {\n if (e instanceof RunRecordValidationError) return { ok: false, error: e }\n throw e\n }\n}\n\n/** Round-trip helper — `JSON.parse(JSON.stringify(record))` then validate. */\nexport function roundTripRunRecord(record: RunRecord): RunRecord {\n const json = JSON.stringify(record)\n return validateRunRecord(JSON.parse(json))\n}\n\n// ── Internals ────────────────────────────────────────────────────────\n\nfunction expectString(value: unknown, path: string): void {\n if (typeof value !== 'string' || value.length === 0) {\n throw new RunRecordValidationError(`expected non-empty string`, path)\n }\n}\n\nfunction expectFiniteNumber(value: unknown, path: string): void {\n if (typeof value !== 'number' || !Number.isFinite(value)) {\n throw new RunRecordValidationError(`expected finite number`, path)\n }\n}\n\n/**\n * Heuristic snapshot check. Accepts:\n * - `name@YYYY-MM-DD` (Anthropic style: `claude-sonnet-4-6@2025-04-15`)\n * - `name-YYYYMMDD` (OpenAI style: `gpt-4o-2024-11-20`)\n * - `name@<arbitrary-token>` (allow opaque snapshots like `@v3`)\n * - explicit `:date-...` Vertex-style tags\n *\n * Rejects bare aliases like `claude-sonnet-4` or `gpt-4o` that remap\n * silently as providers ship new snapshots.\n */\nfunction modelHasSnapshot(model: string): boolean {\n if (model.includes('@')) return true\n if (/-\\d{8}$/.test(model)) return true\n if (/-\\d{4}-\\d{2}-\\d{2}$/.test(model)) return true\n if (/:date-/.test(model)) return true\n return false\n}\n"],"mappings":";;;;;AA+HA,IAAM,sBAAsB;AAAA,EAC1B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAIA,IAAM,aAAyC,CAAC,UAAU,OAAO,SAAS;AAEnE,IAAM,2BAAN,cAAuC,gBAAgB;AAAA,EACnD;AAAA,EACT,YAAY,SAAiB,OAAO,IAAI;AACtC,UAAM,OAAO,GAAG,OAAO,QAAQ,IAAI,MAAM,OAAO;AAChD,SAAK,OAAO;AAAA,EACd;AACF;AAOO,SAAS,kBAAkB,OAA2B;AAC3D,MAAI,UAAU,QAAQ,OAAO,UAAU,UAAU;AAC/C,UAAM,IAAI,yBAAyB,iBAAiB;AAAA,EACtD;AACA,QAAM,MAAM;AAEZ,aAAW,OAAO,qBAAqB;AACrC,QAAI,EAAE,OAAO,MAAM;AACjB,YAAM,IAAI,yBAAyB,4BAA4B,GAAG,GAAG;AAAA,IACvE;AAAA,EACF;AAEA,eAAa,IAAI,OAAO,OAAO;AAC/B,eAAa,IAAI,cAAc,cAAc;AAC7C,eAAa,IAAI,aAAa,aAAa;AAC3C,qBAAmB,IAAI,MAAM,MAAM;AACnC,eAAa,IAAI,OAAO,OAAO;AAC/B,eAAa,IAAI,YAAY,YAAY;AACzC,eAAa,IAAI,YAAY,YAAY;AACzC,eAAa,IAAI,WAAW,WAAW;AACvC,qBAAmB,IAAI,QAAQ,QAAQ;AACvC,MAAI,IAAI,YAAY,OAAW,oBAAmB,IAAI,SAAS,SAAS;AACxE,qBAAmB,IAAI,SAAS,SAAS;AAGzC,MAAI,CAAC,iBAAiB,IAAI,KAAe,GAAG;AAC1C,UAAM,IAAI;AAAA,MACR,UAAU,IAAI,KAAK;AAAA,MACnB;AAAA,IACF;AAAA,EACF;AAGA,QAAM,KAAK,IAAI;AACf,MAAI,OAAO,QAAQ,OAAO,OAAO,UAAU;AACzC,UAAM,IAAI,yBAAyB,gCAAgC,YAAY;AAAA,EACjF;AACA,QAAM,QAAQ;AACd,qBAAmB,MAAM,OAAO,kBAAkB;AAClD,qBAAmB,MAAM,QAAQ,mBAAmB;AACpD,MAAI,MAAM,WAAW,OAAW,oBAAmB,MAAM,QAAQ,mBAAmB;AAGpF,MAAI,IAAI,kBAAkB,QAAW;AACnC,UAAM,KAAK,IAAI;AACf,QAAI,OAAO,QAAQ,OAAO,OAAO,UAAU;AACzC,YAAM,IAAI,yBAAyB,mCAAmC,eAAe;AAAA,IACvF;AACA,UAAM,QAAQ;AACd,iBAAa,MAAM,OAAO,qBAAqB;AAC/C,iBAAa,MAAM,eAAe,6BAA6B;AAC/D,uBAAmB,MAAM,YAAY,0BAA0B;AAC/D,QAAI,OAAO,MAAM,aAAa,WAAW;AACvC,YAAM,IAAI;AAAA,QACR;AAAA,QACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAGA,QAAM,MAAM,IAAI;AAChB,MAAI,QAAQ,QAAQ,OAAO,QAAQ,UAAU;AAC3C,UAAM,IAAI,yBAAyB,6BAA6B,SAAS;AAAA,EAC3E;AACA,QAAM,SAAS;AACf,MAAI,OAAO,gBAAgB;AACzB,uBAAmB,OAAO,aAAa,qBAAqB;AAC9D,MAAI,OAAO,iBAAiB;AAC1B,uBAAmB,OAAO,cAAc,sBAAsB;AAChE,MAAI,OAAO,gBAAgB,UAAa,OAAO,iBAAiB,QAAW;AACzE,UAAM,IAAI;AAAA,MACR;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACA,QAAM,MAAM,OAAO;AACnB,MAAI,QAAQ,QAAQ,OAAO,QAAQ,UAAU;AAC3C,UAAM,IAAI,yBAAyB,iCAAiC,aAAa;AAAA,EACnF;AACA,aAAW,CAAC,GAAG,CAAC,KAAK,OAAO,QAAQ,GAA8B,GAAG;AACnE,uBAAmB,GAAG,eAAe,CAAC,EAAE;AAAA,EAC1C;AAGA,MAAI,IAAI,gBAAgB,OAAW,cAAa,IAAI,aAAa,aAAa;AAG9E,MAAI,OAAO,IAAI,aAAa,YAAY,CAAC,WAAW,SAAS,IAAI,QAAuB,GAAG;AACzF,UAAM,IAAI;AAAA,MACR,2BAA2B,WAAW,KAAK,IAAI,CAAC,SAAS,OAAO,IAAI,QAAQ,CAAC;AAAA,MAC7E;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAGO,SAAS,YAAY,OAAoC;AAC9D,MAAI;AACF,sBAAkB,KAAK;AACvB,WAAO;AAAA,EACT,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAGO,SAAS,mBACd,OACiF;AACjF,MAAI;AACF,WAAO,EAAE,IAAI,MAAM,OAAO,kBAAkB,KAAK,EAAE;AAAA,EACrD,SAAS,GAAG;AACV,QAAI,aAAa,yBAA0B,QAAO,EAAE,IAAI,OAAO,OAAO,EAAE;AACxE,UAAM;AAAA,EACR;AACF;AAGO,SAAS,mBAAmB,QAA8B;AAC/D,QAAM,OAAO,KAAK,UAAU,MAAM;AAClC,SAAO,kBAAkB,KAAK,MAAM,IAAI,CAAC;AAC3C;AAIA,SAAS,aAAa,OAAgB,MAAoB;AACxD,MAAI,OAAO,UAAU,YAAY,MAAM,WAAW,GAAG;AACnD,UAAM,IAAI,yBAAyB,6BAA6B,IAAI;AAAA,EACtE;AACF;AAEA,SAAS,mBAAmB,OAAgB,MAAoB;AAC9D,MAAI,OAAO,UAAU,YAAY,CAAC,OAAO,SAAS,KAAK,GAAG;AACxD,UAAM,IAAI,yBAAyB,0BAA0B,IAAI;AAAA,EACnE;AACF;AAYA,SAAS,iBAAiB,OAAwB;AAChD,MAAI,MAAM,SAAS,GAAG,EAAG,QAAO;AAChC,MAAI,UAAU,KAAK,KAAK,EAAG,QAAO;AAClC,MAAI,sBAAsB,KAAK,KAAK,EAAG,QAAO;AAC9C,MAAI,SAAS,KAAK,KAAK,EAAG,QAAO;AACjC,SAAO;AACT;","names":[]}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
// src/sequential.ts
|
|
2
|
+
function pairedEvalueSequence(deltas, opts = {}) {
|
|
3
|
+
const c = opts.bound ?? 1;
|
|
4
|
+
const alpha = opts.alpha ?? 0.05;
|
|
5
|
+
const initialShrink = opts.initialBetShrinkage ?? 0.5;
|
|
6
|
+
const rope = opts.rope ?? null;
|
|
7
|
+
if (c <= 0) throw new Error("pairedEvalueSequence: bound must be > 0");
|
|
8
|
+
if (alpha <= 0 || alpha >= 1) throw new Error("pairedEvalueSequence: alpha must be in (0,1)");
|
|
9
|
+
if (rope && !(Number.isFinite(rope.low) && Number.isFinite(rope.high) && rope.low <= rope.high)) {
|
|
10
|
+
throw new Error("pairedEvalueSequence: rope must satisfy low \u2264 high");
|
|
11
|
+
}
|
|
12
|
+
const steps = [];
|
|
13
|
+
let clipped = false;
|
|
14
|
+
let evalue = 1;
|
|
15
|
+
let decisionFiredAt = null;
|
|
16
|
+
let sum = 0;
|
|
17
|
+
let sumSq = 0;
|
|
18
|
+
let count = 0;
|
|
19
|
+
for (let i = 0; i < deltas.length; i++) {
|
|
20
|
+
let d = deltas[i];
|
|
21
|
+
if (d < -c || d > c) {
|
|
22
|
+
d = Math.max(-c, Math.min(c, d));
|
|
23
|
+
clipped = true;
|
|
24
|
+
}
|
|
25
|
+
const muHat = count === 0 ? 0 : sum / count;
|
|
26
|
+
const varHat = count === 0 ? c * c : Math.max(1e-12, sumSq / count - muHat * muHat);
|
|
27
|
+
const t = i + 1;
|
|
28
|
+
const shrink = initialShrink * Math.min(1, count / 32);
|
|
29
|
+
let lambda = muHat / (varHat + c * c) * shrink;
|
|
30
|
+
const lambdaMax = 0.99 / c;
|
|
31
|
+
if (lambda > lambdaMax) lambda = lambdaMax;
|
|
32
|
+
if (lambda < -lambdaMax) lambda = -lambdaMax;
|
|
33
|
+
evalue = evalue * (1 + lambda * d);
|
|
34
|
+
if (!Number.isFinite(evalue) || evalue < 0) evalue = 0;
|
|
35
|
+
sum += d;
|
|
36
|
+
sumSq += d * d;
|
|
37
|
+
count += 1;
|
|
38
|
+
const pValue = Math.min(1, 1 / Math.max(evalue, 1e-300));
|
|
39
|
+
const cs = empiricalBernsteinCs(sum, sumSq, count, c, alpha);
|
|
40
|
+
let decision = "continue";
|
|
41
|
+
if (rope && cs.low >= rope.low && cs.high <= rope.high) decision = "equivalent";
|
|
42
|
+
else if (evalue >= 2 / alpha && muHat > 0) decision = "promote_now";
|
|
43
|
+
else if (evalue >= 2 / alpha && muHat < 0) decision = "reject_now";
|
|
44
|
+
else if (rope && cs.high < rope.low) decision = "reject_now";
|
|
45
|
+
if (decision !== "continue" && decisionFiredAt === null) decisionFiredAt = t;
|
|
46
|
+
steps.push({ t, delta: d, evalue, pValue, csLow: cs.low, csHigh: cs.high, decision });
|
|
47
|
+
}
|
|
48
|
+
const finalDecision = steps.length === 0 ? "continue" : steps[steps.length - 1].decision;
|
|
49
|
+
return { steps, finalDecision, decisionFiredAt, clipped };
|
|
50
|
+
}
|
|
51
|
+
function evaluateInterimReleaseConfidence(input) {
|
|
52
|
+
const candidates = input.deltaSeries.map((s) => {
|
|
53
|
+
const seq = pairedEvalueSequence(s.deltas, {
|
|
54
|
+
alpha: input.alpha,
|
|
55
|
+
bound: input.bound,
|
|
56
|
+
rope: input.rope
|
|
57
|
+
});
|
|
58
|
+
const last = seq.steps[seq.steps.length - 1];
|
|
59
|
+
return {
|
|
60
|
+
candidateId: s.candidateId,
|
|
61
|
+
decision: seq.finalDecision,
|
|
62
|
+
decisionFiredAt: seq.decisionFiredAt,
|
|
63
|
+
finalEvalue: last?.evalue ?? 1,
|
|
64
|
+
finalPValue: last?.pValue ?? 1,
|
|
65
|
+
pairs: seq.steps.length,
|
|
66
|
+
csLow: last?.csLow ?? Number.NEGATIVE_INFINITY,
|
|
67
|
+
csHigh: last?.csHigh ?? Number.POSITIVE_INFINITY
|
|
68
|
+
};
|
|
69
|
+
});
|
|
70
|
+
const promote = candidates.find((c) => c.decision === "promote_now");
|
|
71
|
+
if (promote)
|
|
72
|
+
return {
|
|
73
|
+
candidates,
|
|
74
|
+
recommendation: { decision: "promote_now", candidateId: promote.candidateId }
|
|
75
|
+
};
|
|
76
|
+
const live = candidates.find((c) => c.decision === "continue");
|
|
77
|
+
if (live) return { candidates, recommendation: { decision: "continue", candidateId: null } };
|
|
78
|
+
const equiv = candidates.find((c) => c.decision === "equivalent");
|
|
79
|
+
if (equiv)
|
|
80
|
+
return {
|
|
81
|
+
candidates,
|
|
82
|
+
recommendation: { decision: "equivalent", candidateId: equiv.candidateId }
|
|
83
|
+
};
|
|
84
|
+
return { candidates, recommendation: { decision: "reject_now", candidateId: null } };
|
|
85
|
+
}
|
|
86
|
+
function empiricalBernsteinCs(sum, sumSq, n, bound, alpha) {
|
|
87
|
+
if (n === 0) return { low: -bound, high: bound };
|
|
88
|
+
const mean = sum / n;
|
|
89
|
+
const variance = Math.max(0, sumSq / n - mean * mean);
|
|
90
|
+
const psi = Math.log(2 / alpha) + 1.7 * Math.log(Math.log(Math.max(Math.E, n)) + 1);
|
|
91
|
+
const radius = Math.sqrt(2 * variance * psi / n) + 3 * bound * psi / n;
|
|
92
|
+
return { low: mean - radius, high: mean + radius };
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
export {
|
|
96
|
+
pairedEvalueSequence,
|
|
97
|
+
evaluateInterimReleaseConfidence
|
|
98
|
+
};
|
|
99
|
+
//# sourceMappingURL=chunk-NU65VQ7M.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/sequential.ts"],"sourcesContent":["/**\n * Always-valid sequential evaluation.\n *\n * `researchReport` (0.21+) assumes a single pre-specified analysis. Real\n * consumers run campaigns weekly / nightly / per-PR; each new run silently\n * inflates the false-discovery rate, because the BH-FDR guarantee was for\n * the *first* look, not the 47th. Without time-uniform inference,\n * launch-decision teams either (a) don't peek, which forfeits the cost\n * advantage of stop-when-decisive, or (b) peek and pretend they didn't,\n * which forfeits scientific validity.\n *\n * This module ships **e-value-based confidence sequences** for paired\n * bounded outcomes. The methodology is the predictable plug-in betting\n * martingale of Waudby-Smith & Ramdas (2024) — provably valid at *any*\n * stopping time. Concretely:\n *\n * For paired deltas D_1, D_2, … ∈ [-c, c] with the null H_0: E[D] ≤ 0,\n * a betting fraction λ_i is chosen using only D_{1..i-1} (predictable\n * plug-in), and the running e-value is\n *\n * E_t = ∏_{i=1}^{t} (1 + λ_i · D_i)\n *\n * E_t is a non-negative martingale under H_0 with E[E_t] ≤ 1, so by\n * Ville's inequality, P(∃ t : E_t ≥ 1/α) ≤ α — we can reject the null\n * at any time without inflating the type-I error.\n *\n * Combined with `runEvalCampaign`, every consumer running rolling\n * campaigns gains the ability to ship the moment evidence is decisive,\n * stop-early on dead-on-arrival variants, and accumulate evidence across\n * partial runs without spending the FDR budget. No new sweep is wasted.\n *\n * References:\n * - Howard, S. R., Ramdas, A., McAuliffe, J., Sekhon, J. (2021).\n * Time-uniform, nonparametric, nonasymptotic confidence sequences.\n * Annals of Statistics, 49(2), 1055–1080.\n * - Waudby-Smith, I., Ramdas, A. (2024). Estimating means of bounded\n * random variables by betting. JRSS B, 86(1), 1–27.\n */\n\nexport type SequentialDecision = 'promote_now' | 'continue' | 'reject_now' | 'equivalent'\n\nexport interface PairedEvalueOptions {\n /**\n * Bound on |delta|. Default 1 (matching most score scales). Must satisfy\n * c > 0; deltas outside [-c, c] are clipped with a warning attached to\n * the return value.\n */\n bound?: number\n /** Target Type-I error. Default 0.05. */\n alpha?: number\n /**\n * Region of Practical Equivalence on the *mean* paired delta. When\n * supplied, the verdict can return `'equivalent'` once the running\n * confidence sequence on the mean is fully contained in [low, high].\n */\n rope?: { low: number; high: number }\n /** Initial bet shrinkage (0 < scale ≤ 1). Default 0.5 — empirically robust. */\n initialBetShrinkage?: number\n}\n\nexport interface PairedEvalueStep {\n /** 1-indexed observation count. */\n t: number\n delta: number\n /** Running e-value E_t = ∏ (1 + λ_i · D_i). */\n evalue: number\n /** Time-uniform p-value at stopping time t. */\n pValue: number\n /** Lower bound of the empirical Bernstein confidence sequence at level 1-α. */\n csLow: number\n csHigh: number\n /** Verdict at this stopping time. */\n decision: SequentialDecision\n}\n\nexport interface PairedEvalueSequence {\n steps: PairedEvalueStep[]\n /** The decision at the final step. */\n finalDecision: SequentialDecision\n /** Index (1-based) at which a non-`continue` decision first fired, or null. */\n decisionFiredAt: number | null\n /** True if any deltas were clipped to [-bound, bound]. */\n clipped: boolean\n}\n\n/**\n * Run the paired e-value sequence over an in-order delta stream.\n *\n * Use for *streaming* / interim analyses: pass the deltas you have so\n * far, get the verdict at every prefix length. The decision is\n * monotone-stable in the sense that once `'reject_now'` or `'promote_now'`\n * fires, the verdict at later steps remains decisive (the e-value is a\n * non-negative martingale; once it crosses the threshold, it's crossed).\n */\nexport function pairedEvalueSequence(\n deltas: number[],\n opts: PairedEvalueOptions = {},\n): PairedEvalueSequence {\n const c = opts.bound ?? 1\n const alpha = opts.alpha ?? 0.05\n const initialShrink = opts.initialBetShrinkage ?? 0.5\n const rope = opts.rope ?? null\n if (c <= 0) throw new Error('pairedEvalueSequence: bound must be > 0')\n if (alpha <= 0 || alpha >= 1) throw new Error('pairedEvalueSequence: alpha must be in (0,1)')\n if (rope && !(Number.isFinite(rope.low) && Number.isFinite(rope.high) && rope.low <= rope.high)) {\n throw new Error('pairedEvalueSequence: rope must satisfy low ≤ high')\n }\n\n const steps: PairedEvalueStep[] = []\n let clipped = false\n let evalue = 1\n let decisionFiredAt: number | null = null\n\n // Running statistics (using only D_{1..i-1} for the bet → predictable plug-in).\n let sum = 0\n let sumSq = 0\n let count = 0\n\n for (let i = 0; i < deltas.length; i++) {\n let d = deltas[i]!\n if (d < -c || d > c) {\n d = Math.max(-c, Math.min(c, d))\n clipped = true\n }\n\n // Predictable plug-in bet (positive λ tests for E[D] > 0; we run a two-sided\n // test by tracking the symmetric e-value via |bet|).\n // λ_i ∝ mean / (variance + bound^2). Shrink early to avoid overbetting.\n const muHat = count === 0 ? 0 : sum / count\n const varHat = count === 0 ? c * c : Math.max(1e-12, sumSq / count - muHat * muHat)\n const t = i + 1\n const shrink = initialShrink * Math.min(1, count / 32) // anneal toward 1\n let lambda = (muHat / (varHat + c * c)) * shrink\n // Clip to ensure 1 + λ·D > 0 for all |D| ≤ c (so the e-value stays non-negative).\n const lambdaMax = 0.99 / c\n if (lambda > lambdaMax) lambda = lambdaMax\n if (lambda < -lambdaMax) lambda = -lambdaMax\n\n evalue = evalue * (1 + lambda * d)\n if (!Number.isFinite(evalue) || evalue < 0) evalue = 0\n\n sum += d\n sumSq += d * d\n count += 1\n\n const pValue = Math.min(1, 1 / Math.max(evalue, 1e-300))\n\n // Empirical Bernstein confidence sequence on the mean. Howard et al.\n // (2021), Theorem 4.4 with σ̂² the running sample variance and a\n // calibration constant tuned for two-sided coverage at level 1 - α.\n const cs = empiricalBernsteinCs(sum, sumSq, count, c, alpha)\n\n let decision: SequentialDecision = 'continue'\n if (rope && cs.low >= rope.low && cs.high <= rope.high) decision = 'equivalent'\n else if (evalue >= 2 / alpha && muHat > 0) decision = 'promote_now'\n else if (evalue >= 2 / alpha && muHat < 0) decision = 'reject_now'\n else if (rope && cs.high < rope.low) decision = 'reject_now'\n\n if (decision !== 'continue' && decisionFiredAt === null) decisionFiredAt = t\n\n steps.push({ t, delta: d, evalue, pValue, csLow: cs.low, csHigh: cs.high, decision })\n }\n\n const finalDecision = steps.length === 0 ? 'continue' : steps[steps.length - 1]!.decision\n return { steps, finalDecision, decisionFiredAt, clipped }\n}\n\nexport interface InterimReleaseConfidenceInput {\n /**\n * One delta series per candidate (paired deltas vs comparator). Order\n * within a series is the order the campaigns were run.\n */\n deltaSeries: Array<{ candidateId: string; deltas: number[] }>\n alpha?: number\n bound?: number\n rope?: { low: number; high: number }\n}\n\nexport interface InterimReleaseConfidence {\n candidates: Array<{\n candidateId: string\n decision: SequentialDecision\n decisionFiredAt: number | null\n finalEvalue: number\n finalPValue: number\n pairs: number\n csLow: number\n csHigh: number\n }>\n /**\n * Campaign-level recommendation: pick the strongest 'promote_now', else\n * 'continue' if any candidate is still live, else 'reject_now' if every\n * candidate is dead, else 'equivalent'.\n */\n recommendation: { decision: SequentialDecision; candidateId: string | null }\n}\n\n/**\n * Run interim sequential analyses across many candidates at once,\n * preserving the time-uniform α guarantee for each candidate's series and\n * synthesising a campaign-level recommendation. Designed to be called on\n * every campaign tick — the recommendation is anytime-valid.\n */\nexport function evaluateInterimReleaseConfidence(\n input: InterimReleaseConfidenceInput,\n): InterimReleaseConfidence {\n const candidates = input.deltaSeries.map((s) => {\n const seq = pairedEvalueSequence(s.deltas, {\n alpha: input.alpha,\n bound: input.bound,\n rope: input.rope,\n })\n const last = seq.steps[seq.steps.length - 1]\n return {\n candidateId: s.candidateId,\n decision: seq.finalDecision,\n decisionFiredAt: seq.decisionFiredAt,\n finalEvalue: last?.evalue ?? 1,\n finalPValue: last?.pValue ?? 1,\n pairs: seq.steps.length,\n csLow: last?.csLow ?? Number.NEGATIVE_INFINITY,\n csHigh: last?.csHigh ?? Number.POSITIVE_INFINITY,\n }\n })\n\n const promote = candidates.find((c) => c.decision === 'promote_now')\n if (promote)\n return {\n candidates,\n recommendation: { decision: 'promote_now', candidateId: promote.candidateId },\n }\n const live = candidates.find((c) => c.decision === 'continue')\n if (live) return { candidates, recommendation: { decision: 'continue', candidateId: null } }\n const equiv = candidates.find((c) => c.decision === 'equivalent')\n if (equiv)\n return {\n candidates,\n recommendation: { decision: 'equivalent', candidateId: equiv.candidateId },\n }\n return { candidates, recommendation: { decision: 'reject_now', candidateId: null } }\n}\n\n// ── Internals ────────────────────────────────────────────────────────────\n\n/**\n * Empirical Bernstein confidence sequence on the mean of bounded variables.\n * Adapted from Howard et al. (2021) §4.4. Provides a time-uniform CI on\n * the running mean; valid at every stopping time.\n */\nfunction empiricalBernsteinCs(\n sum: number,\n sumSq: number,\n n: number,\n bound: number,\n alpha: number,\n): { low: number; high: number } {\n if (n === 0) return { low: -bound, high: bound }\n const mean = sum / n\n const variance = Math.max(0, sumSq / n - mean * mean)\n // Iterated-log calibration constant. The 1.7 exponent matches the\n // recommended choice in Howard et al. for two-sided coverage at level\n // 1 - α with mild log-corrections; tightening further requires a\n // tuned mixture and is out of scope.\n const psi = Math.log(2 / alpha) + 1.7 * Math.log(Math.log(Math.max(Math.E, n)) + 1)\n const radius = Math.sqrt((2 * variance * psi) / n) + (3 * bound * psi) / n\n return { low: mean - radius, high: mean + radius }\n}\n"],"mappings":";AA8FO,SAAS,qBACd,QACA,OAA4B,CAAC,GACP;AACtB,QAAM,IAAI,KAAK,SAAS;AACxB,QAAM,QAAQ,KAAK,SAAS;AAC5B,QAAM,gBAAgB,KAAK,uBAAuB;AAClD,QAAM,OAAO,KAAK,QAAQ;AAC1B,MAAI,KAAK,EAAG,OAAM,IAAI,MAAM,yCAAyC;AACrE,MAAI,SAAS,KAAK,SAAS,EAAG,OAAM,IAAI,MAAM,8CAA8C;AAC5F,MAAI,QAAQ,EAAE,OAAO,SAAS,KAAK,GAAG,KAAK,OAAO,SAAS,KAAK,IAAI,KAAK,KAAK,OAAO,KAAK,OAAO;AAC/F,UAAM,IAAI,MAAM,yDAAoD;AAAA,EACtE;AAEA,QAAM,QAA4B,CAAC;AACnC,MAAI,UAAU;AACd,MAAI,SAAS;AACb,MAAI,kBAAiC;AAGrC,MAAI,MAAM;AACV,MAAI,QAAQ;AACZ,MAAI,QAAQ;AAEZ,WAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACtC,QAAI,IAAI,OAAO,CAAC;AAChB,QAAI,IAAI,CAAC,KAAK,IAAI,GAAG;AACnB,UAAI,KAAK,IAAI,CAAC,GAAG,KAAK,IAAI,GAAG,CAAC,CAAC;AAC/B,gBAAU;AAAA,IACZ;AAKA,UAAM,QAAQ,UAAU,IAAI,IAAI,MAAM;AACtC,UAAM,SAAS,UAAU,IAAI,IAAI,IAAI,KAAK,IAAI,OAAO,QAAQ,QAAQ,QAAQ,KAAK;AAClF,UAAM,IAAI,IAAI;AACd,UAAM,SAAS,gBAAgB,KAAK,IAAI,GAAG,QAAQ,EAAE;AACrD,QAAI,SAAU,SAAS,SAAS,IAAI,KAAM;AAE1C,UAAM,YAAY,OAAO;AACzB,QAAI,SAAS,UAAW,UAAS;AACjC,QAAI,SAAS,CAAC,UAAW,UAAS,CAAC;AAEnC,aAAS,UAAU,IAAI,SAAS;AAChC,QAAI,CAAC,OAAO,SAAS,MAAM,KAAK,SAAS,EAAG,UAAS;AAErD,WAAO;AACP,aAAS,IAAI;AACb,aAAS;AAET,UAAM,SAAS,KAAK,IAAI,GAAG,IAAI,KAAK,IAAI,QAAQ,MAAM,CAAC;AAKvD,UAAM,KAAK,qBAAqB,KAAK,OAAO,OAAO,GAAG,KAAK;AAE3D,QAAI,WAA+B;AACnC,QAAI,QAAQ,GAAG,OAAO,KAAK,OAAO,GAAG,QAAQ,KAAK,KAAM,YAAW;AAAA,aAC1D,UAAU,IAAI,SAAS,QAAQ,EAAG,YAAW;AAAA,aAC7C,UAAU,IAAI,SAAS,QAAQ,EAAG,YAAW;AAAA,aAC7C,QAAQ,GAAG,OAAO,KAAK,IAAK,YAAW;AAEhD,QAAI,aAAa,cAAc,oBAAoB,KAAM,mBAAkB;AAE3E,UAAM,KAAK,EAAE,GAAG,OAAO,GAAG,QAAQ,QAAQ,OAAO,GAAG,KAAK,QAAQ,GAAG,MAAM,SAAS,CAAC;AAAA,EACtF;AAEA,QAAM,gBAAgB,MAAM,WAAW,IAAI,aAAa,MAAM,MAAM,SAAS,CAAC,EAAG;AACjF,SAAO,EAAE,OAAO,eAAe,iBAAiB,QAAQ;AAC1D;AAsCO,SAAS,iCACd,OAC0B;AAC1B,QAAM,aAAa,MAAM,YAAY,IAAI,CAAC,MAAM;AAC9C,UAAM,MAAM,qBAAqB,EAAE,QAAQ;AAAA,MACzC,OAAO,MAAM;AAAA,MACb,OAAO,MAAM;AAAA,MACb,MAAM,MAAM;AAAA,IACd,CAAC;AACD,UAAM,OAAO,IAAI,MAAM,IAAI,MAAM,SAAS,CAAC;AAC3C,WAAO;AAAA,MACL,aAAa,EAAE;AAAA,MACf,UAAU,IAAI;AAAA,MACd,iBAAiB,IAAI;AAAA,MACrB,aAAa,MAAM,UAAU;AAAA,MAC7B,aAAa,MAAM,UAAU;AAAA,MAC7B,OAAO,IAAI,MAAM;AAAA,MACjB,OAAO,MAAM,SAAS,OAAO;AAAA,MAC7B,QAAQ,MAAM,UAAU,OAAO;AAAA,IACjC;AAAA,EACF,CAAC;AAED,QAAM,UAAU,WAAW,KAAK,CAAC,MAAM,EAAE,aAAa,aAAa;AACnE,MAAI;AACF,WAAO;AAAA,MACL;AAAA,MACA,gBAAgB,EAAE,UAAU,eAAe,aAAa,QAAQ,YAAY;AAAA,IAC9E;AACF,QAAM,OAAO,WAAW,KAAK,CAAC,MAAM,EAAE,aAAa,UAAU;AAC7D,MAAI,KAAM,QAAO,EAAE,YAAY,gBAAgB,EAAE,UAAU,YAAY,aAAa,KAAK,EAAE;AAC3F,QAAM,QAAQ,WAAW,KAAK,CAAC,MAAM,EAAE,aAAa,YAAY;AAChE,MAAI;AACF,WAAO;AAAA,MACL;AAAA,MACA,gBAAgB,EAAE,UAAU,cAAc,aAAa,MAAM,YAAY;AAAA,IAC3E;AACF,SAAO,EAAE,YAAY,gBAAgB,EAAE,UAAU,cAAc,aAAa,KAAK,EAAE;AACrF;AASA,SAAS,qBACP,KACA,OACA,GACA,OACA,OAC+B;AAC/B,MAAI,MAAM,EAAG,QAAO,EAAE,KAAK,CAAC,OAAO,MAAM,MAAM;AAC/C,QAAM,OAAO,MAAM;AACnB,QAAM,WAAW,KAAK,IAAI,GAAG,QAAQ,IAAI,OAAO,IAAI;AAKpD,QAAM,MAAM,KAAK,IAAI,IAAI,KAAK,IAAI,MAAM,KAAK,IAAI,KAAK,IAAI,KAAK,IAAI,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC;AAClF,QAAM,SAAS,KAAK,KAAM,IAAI,WAAW,MAAO,CAAC,IAAK,IAAI,QAAQ,MAAO;AACzE,SAAO,EAAE,KAAK,OAAO,QAAQ,MAAM,OAAO,OAAO;AACnD;","names":[]}
|