@tangle-network/agent-eval 0.23.1 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/CHANGELOG.md +145 -0
  2. package/README.md +212 -79
  3. package/dist/baseline-4R5deP0N.d.ts +108 -0
  4. package/dist/benchmarks/index.d.ts +3 -2
  5. package/dist/benchmarks/index.js +1 -1
  6. package/dist/builder-eval/index.d.ts +249 -0
  7. package/dist/builder-eval/index.js +391 -0
  8. package/dist/builder-eval/index.js.map +1 -0
  9. package/dist/{chunk-IOXMGMHQ.js → chunk-2A5XJB43.js} +142 -318
  10. package/dist/chunk-2A5XJB43.js.map +1 -0
  11. package/dist/chunk-47X6LRCE.js +76 -0
  12. package/dist/chunk-47X6LRCE.js.map +1 -0
  13. package/dist/{chunk-6M774GY6.js → chunk-4F5DQN55.js} +1 -1
  14. package/dist/chunk-4F5DQN55.js.map +1 -0
  15. package/dist/{chunk-KAO3Q65R.js → chunk-4S4BM3QQ.js} +15 -13
  16. package/dist/chunk-4S4BM3QQ.js.map +1 -0
  17. package/dist/chunk-5BKGXME7.js +65 -0
  18. package/dist/chunk-5BKGXME7.js.map +1 -0
  19. package/dist/{chunk-6KQG5HAH.js → chunk-5LBB5B3Z.js} +376 -72
  20. package/dist/chunk-5LBB5B3Z.js.map +1 -0
  21. package/dist/{chunk-42I2QC2L.js → chunk-6QDKWHLS.js} +18 -14
  22. package/dist/chunk-6QDKWHLS.js.map +1 -0
  23. package/dist/{chunk-VQQSPGSM.js → chunk-EDUKQ5AM.js} +247 -189
  24. package/dist/chunk-EDUKQ5AM.js.map +1 -0
  25. package/dist/chunk-I4MBDTY5.js +272 -0
  26. package/dist/chunk-I4MBDTY5.js.map +1 -0
  27. package/dist/chunk-JLZQWFV3.js +618 -0
  28. package/dist/chunk-JLZQWFV3.js.map +1 -0
  29. package/dist/chunk-K2TPS5LB.js +569 -0
  30. package/dist/chunk-K2TPS5LB.js.map +1 -0
  31. package/dist/chunk-KKHDIONI.js +414 -0
  32. package/dist/chunk-KKHDIONI.js.map +1 -0
  33. package/dist/chunk-KMPRBJK4.js +74 -0
  34. package/dist/chunk-KMPRBJK4.js.map +1 -0
  35. package/dist/{chunk-QUKKGHTZ.js → chunk-KTGTIOFD.js} +6 -3
  36. package/dist/chunk-KTGTIOFD.js.map +1 -0
  37. package/dist/chunk-LSH4MMOZ.js +838 -0
  38. package/dist/chunk-LSH4MMOZ.js.map +1 -0
  39. package/dist/chunk-NG236HPC.js +57 -0
  40. package/dist/chunk-NG236HPC.js.map +1 -0
  41. package/dist/{chunk-QBW3YBTR.js → chunk-NLMNWKVM.js} +14 -6
  42. package/dist/chunk-NLMNWKVM.js.map +1 -0
  43. package/dist/chunk-NU65VQ7M.js +99 -0
  44. package/dist/chunk-NU65VQ7M.js.map +1 -0
  45. package/dist/chunk-OWLAAMME.js +250 -0
  46. package/dist/chunk-OWLAAMME.js.map +1 -0
  47. package/dist/{chunk-SQQLHODJ.js → chunk-PC4UYEBM.js} +7 -4
  48. package/dist/chunk-PC4UYEBM.js.map +1 -0
  49. package/dist/{chunk-7EAUOUQS.js → chunk-RAF443UI.js} +213 -115
  50. package/dist/chunk-RAF443UI.js.map +1 -0
  51. package/dist/chunk-RZTMDUO7.js +49 -0
  52. package/dist/chunk-RZTMDUO7.js.map +1 -0
  53. package/dist/{chunk-EXGR4XEM.js → chunk-SESZDQPX.js} +23 -19
  54. package/dist/chunk-SESZDQPX.js.map +1 -0
  55. package/dist/{chunk-5IIQKMD5.js → chunk-TVVP3ZZQ.js} +14 -4
  56. package/dist/chunk-TVVP3ZZQ.js.map +1 -0
  57. package/dist/chunk-WWYCWKUM.js +196 -0
  58. package/dist/chunk-WWYCWKUM.js.map +1 -0
  59. package/dist/{chunk-AXHNWLIX.js → chunk-YRZ4M5GS.js} +2 -90
  60. package/dist/chunk-YRZ4M5GS.js.map +1 -0
  61. package/dist/chunk-ZN274SWR.js +613 -0
  62. package/dist/chunk-ZN274SWR.js.map +1 -0
  63. package/dist/cli.js +10 -6
  64. package/dist/cli.js.map +1 -1
  65. package/dist/{control-DvkH87qJ.d.ts → control-CBShYYA6.d.ts} +32 -33
  66. package/dist/control-runtime-BuJHoLg0.d.ts +180 -0
  67. package/dist/control.d.ts +8 -6
  68. package/dist/control.js +10 -7
  69. package/dist/{dataset-B9qvlm_o.d.ts → dataset-CiK_3LDr.d.ts} +5 -2
  70. package/dist/{emitter-B2XqDKFU.d.ts → emitter-DP_cSSiw.d.ts} +1 -1
  71. package/dist/errors-BZ9sTdz7.d.ts +70 -0
  72. package/dist/failure-cluster-C2EGSDiT.d.ts +76 -0
  73. package/dist/feedback-trajectory-DfFdrraJ.d.ts +169 -0
  74. package/dist/governance/index.d.ts +5 -0
  75. package/dist/governance/index.js +18 -0
  76. package/dist/governance/index.js.map +1 -0
  77. package/dist/{index-DDTlbHEK.d.ts → index--fVrWDiR.d.ts} +1 -1
  78. package/dist/index-Oj9fAPPN.d.ts +270 -0
  79. package/dist/index.d.ts +2018 -3003
  80. package/dist/index.js +7443 -9102
  81. package/dist/index.js.map +1 -1
  82. package/dist/{integrity-Cr5YodSY.d.ts → integrity-DK2EBVZC.d.ts} +4 -3
  83. package/dist/knowledge/index.d.ts +102 -0
  84. package/dist/knowledge/index.js +18 -0
  85. package/dist/knowledge/index.js.map +1 -0
  86. package/dist/meta-eval/index.d.ts +99 -0
  87. package/dist/meta-eval/index.js +324 -0
  88. package/dist/meta-eval/index.js.map +1 -0
  89. package/dist/multi-layer-verifier-LkP3LVKj.d.ts +141 -0
  90. package/dist/openapi.json +491 -1
  91. package/dist/optimization.d.ts +11 -8
  92. package/dist/optimization.js +11 -9
  93. package/dist/outcome-store-D6KWmYvj.d.ts +63 -0
  94. package/dist/pipelines/index.d.ts +172 -0
  95. package/dist/pipelines/index.js +345 -0
  96. package/dist/pipelines/index.js.map +1 -0
  97. package/dist/prm/index.d.ts +99 -0
  98. package/dist/prm/index.js +222 -0
  99. package/dist/prm/index.js.map +1 -0
  100. package/dist/query-DODUYdPg.d.ts +30 -0
  101. package/dist/release-report-BNgMdqPF.d.ts +292 -0
  102. package/dist/replay-BL96gCEP.d.ts +226 -0
  103. package/dist/reporting.d.ts +10 -295
  104. package/dist/reporting.js +10 -6
  105. package/dist/{eval-campaign-Ds5QljIh.d.ts → researcher-BPT8x_NT.d.ts} +148 -146
  106. package/dist/rl.d.ts +1762 -8
  107. package/dist/rl.js +2035 -58
  108. package/dist/rl.js.map +1 -1
  109. package/dist/rubric-D5tjHNJQ.d.ts +72 -0
  110. package/dist/rubric-predictive-validity-C0uDYwG6.d.ts +105 -0
  111. package/dist/{run-record-DNiOMBrZ.d.ts → run-record-CqzahIbx.d.ts} +4 -1
  112. package/dist/sequential-Dgz1n51-.d.ts +139 -0
  113. package/dist/{store-u47QaJ9G.d.ts → store-Db2Bv8Cf.d.ts} +1 -1
  114. package/dist/{summary-report-Ce1r4EYo.d.ts → summary-report-C7VPYEj2.d.ts} +3 -76
  115. package/dist/telemetry/file.js +4 -1
  116. package/dist/telemetry/file.js.map +1 -1
  117. package/dist/telemetry/index.js +57 -57
  118. package/dist/telemetry/index.js.map +1 -1
  119. package/dist/test-graded-scenario-B2kWEdh9.d.ts +146 -0
  120. package/dist/traces.d.ts +142 -387
  121. package/dist/traces.js +1302 -40
  122. package/dist/traces.js.map +1 -1
  123. package/dist/trajectory-CnoBo-JY.d.ts +32 -0
  124. package/dist/wire/index.d.ts +369 -25
  125. package/dist/wire/index.js +22 -3
  126. package/package.json +44 -18
  127. package/dist/chunk-42I2QC2L.js.map +0 -1
  128. package/dist/chunk-5IIQKMD5.js.map +0 -1
  129. package/dist/chunk-6KQG5HAH.js.map +0 -1
  130. package/dist/chunk-6M774GY6.js.map +0 -1
  131. package/dist/chunk-7EAUOUQS.js.map +0 -1
  132. package/dist/chunk-AXHNWLIX.js.map +0 -1
  133. package/dist/chunk-EXGR4XEM.js.map +0 -1
  134. package/dist/chunk-IOXMGMHQ.js.map +0 -1
  135. package/dist/chunk-KAO3Q65R.js.map +0 -1
  136. package/dist/chunk-LZKIOBG2.js +0 -2026
  137. package/dist/chunk-LZKIOBG2.js.map +0 -1
  138. package/dist/chunk-QBW3YBTR.js.map +0 -1
  139. package/dist/chunk-QUKKGHTZ.js.map +0 -1
  140. package/dist/chunk-SQQLHODJ.js.map +0 -1
  141. package/dist/chunk-V5QSWN7L.js +0 -1310
  142. package/dist/chunk-V5QSWN7L.js.map +0 -1
  143. package/dist/chunk-VQQSPGSM.js.map +0 -1
  144. package/dist/chunk-XPHOZPOM.js +0 -1947
  145. package/dist/chunk-XPHOZPOM.js.map +0 -1
  146. package/dist/feedback-trajectory-c43WGtTX.d.ts +0 -346
  147. package/dist/index-ekBXweiQ.d.ts +0 -1894
  148. package/dist/sequential-DgU2mFsE.d.ts +0 -304
@@ -1,23 +1,23 @@
1
1
  import {
2
2
  assertLlmRoute
3
- } from "./chunk-KAO3Q65R.js";
3
+ } from "./chunk-4S4BM3QQ.js";
4
4
  import {
5
5
  researchReport
6
- } from "./chunk-IOXMGMHQ.js";
6
+ } from "./chunk-2A5XJB43.js";
7
7
  import {
8
8
  RunIntegrityError,
9
9
  assertRunCaptured
10
- } from "./chunk-QUKKGHTZ.js";
10
+ } from "./chunk-KTGTIOFD.js";
11
11
  import {
12
12
  FileSystemRawProviderSink
13
- } from "./chunk-SQQLHODJ.js";
13
+ } from "./chunk-PC4UYEBM.js";
14
14
  import {
15
15
  TraceEmitter
16
- } from "./chunk-5IIQKMD5.js";
16
+ } from "./chunk-TVVP3ZZQ.js";
17
17
  import {
18
18
  canonicalize,
19
19
  hashJson
20
- } from "./chunk-6M774GY6.js";
20
+ } from "./chunk-4F5DQN55.js";
21
21
 
22
22
  // src/eval-campaign.ts
23
23
  var DEFAULT_INTEGRITY = {
@@ -52,7 +52,9 @@ async function runEvalCampaign(opts) {
52
52
  scenarioIds.add(s.scenarioId);
53
53
  }
54
54
  if (opts.report?.comparator && !variantIds.has(opts.report.comparator)) {
55
- throw new Error(`runEvalCampaign: report.comparator "${opts.report.comparator}" is not a configured variantId.`);
55
+ throw new Error(
56
+ `runEvalCampaign: report.comparator "${opts.report.comparator}" is not a configured variantId.`
57
+ );
56
58
  }
57
59
  if (!opts.commitSha) {
58
60
  throw new Error("runEvalCampaign: commitSha is required (every RunRecord needs it).");
@@ -67,17 +69,19 @@ async function runEvalCampaign(opts) {
67
69
  const provider = opts.llmOpts.provider ?? null;
68
70
  const preregistrationHash = opts.preregistrationHash ?? null;
69
71
  const rawSinkFactory = opts.rawSinkFactory ?? defaultRawSinkFactory(opts.workDir);
70
- const campaignFingerprint = await hashJson(canonicalize({
71
- campaignId: opts.campaignId,
72
- variants: opts.variants.map((v) => v.id).sort(),
73
- scenarios: opts.scenarios.map((s) => s.scenarioId).sort(),
74
- seeds: [...seeds].sort((a, b) => a - b),
75
- splitTag,
76
- comparator: opts.report?.comparator ?? null,
77
- baseUrl,
78
- provider,
79
- preregistrationHash
80
- }));
72
+ const campaignFingerprint = await hashJson(
73
+ canonicalize({
74
+ campaignId: opts.campaignId,
75
+ variants: opts.variants.map((v) => v.id).sort(),
76
+ scenarios: opts.scenarios.map((s) => s.scenarioId).sort(),
77
+ seeds: [...seeds].sort((a, b) => a - b),
78
+ splitTag,
79
+ comparator: opts.report?.comparator ?? null,
80
+ baseUrl,
81
+ provider,
82
+ preregistrationHash
83
+ })
84
+ );
81
85
  const cells = [];
82
86
  for (const variant of opts.variants) {
83
87
  for (const scenario of opts.scenarios) {
@@ -280,4 +284,4 @@ function defaultRunId(params) {
280
284
  export {
281
285
  runEvalCampaign
282
286
  };
283
- //# sourceMappingURL=chunk-EXGR4XEM.js.map
287
+ //# sourceMappingURL=chunk-SESZDQPX.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/eval-campaign.ts"],"sourcesContent":["/**\n * EvalCampaign — opinionated matrix runner that wires the four\n * capture-integrity directives by construction.\n *\n * Every consumer that ran a launch-grade benchmark before 0.22 reinvented\n * the same shape: matrix runner → for each (variant, scenario, seed) →\n * start a TraceEmitter → call LLMs → end the run → maybe analyze.\n * The bug class blueprint-agent reported (raw events not captured, route\n * silently wrong, integrity not asserted, analyst never ran) lives at the\n * integration boundary — not the agent-eval API surface. The four\n * directives in `SKILL.md § Capture integrity` are mitigations.\n *\n * `EvalCampaign` is the structural fix. Consumers don't wire the integrity\n * surface anymore; the campaign owns it. Specifically, the campaign:\n *\n * - calls `assertLlmRoute` once at preflight before any work runs\n * - constructs a per-run `TraceStore` and `RawProviderSink` via factories\n * - constructs the `TraceEmitter` with `onRunComplete: [analyst hook]`\n * - hands the runner an `LlmClientOptions` pre-wired with the sink and\n * trace context — the runner can't accidentally call an LLM without\n * capturing the raw HTTP envelope\n * - calls `assertRunCaptured` after every `endRun` and routes failures\n * through a configurable policy (`throw` / `mark_failed` / `log`)\n * - assembles per-run `RunRecord`s and runs `researchReport` at the end\n * so the campaign artifact is launch-decision-grade by default\n * - embeds the campaign fingerprint (a SHA-256 over the canonicalised\n * run set) and optional `preregistrationHash` in the report\n *\n * The runner contract is intentionally narrow: produce a `CampaignRunOutcome`\n * given a fully-wired `CampaignRunContext`. Everything orchestration-shaped\n * lives in the campaign. This is the inversion-of-control point — consumers\n * stop writing matrix runners and start writing scenario-runners.\n *\n * Out of scope for v1 (tracked in `docs/research-report-methodology.md`):\n *\n * - Distributed/cluster execution (concurrency is local async)\n * - Adaptive sampling / sequential interim looks\n * - Resume from partial state across crashes\n * - LLM-call retry beyond what `LlmClient` already does\n */\n\nimport { assertLlmRoute, type LlmClientOptions, type LlmRouteRequirements } from './llm-client'\nimport { canonicalize, hashJson } from './pre-registration'\nimport type {\n RunJudgeMetadata,\n RunOutcome,\n RunRecord,\n RunSplitTag,\n RunTokenUsage,\n} from './run-record'\nimport { type ResearchReport, type ResearchReportOptions, researchReport } from './summary-report'\nimport type { RunCompleteHook } from './trace/emitter'\nimport { TraceEmitter } from './trace/emitter'\nimport {\n assertRunCaptured,\n RunIntegrityError,\n type RunIntegrityExpectations,\n type RunIntegrityReport,\n} from './trace/integrity'\nimport { FileSystemRawProviderSink, type RawProviderSink } from './trace/raw-provider-sink'\nimport type { TraceStore } from './trace/store'\n\n// ── Public types ─────────────────────────────────────────────────────────\n\nexport interface CampaignVariant<V> {\n id: string\n payload: V\n}\n\nexport interface CampaignScenario {\n scenarioId: string\n /** Free-form metadata propagated to runs and reports. */\n tags?: Record<string, string>\n}\n\nexport interface CampaignRunContext<V> {\n /** Stable run id. The campaign generates this; the runner does not. */\n runId: string\n /** Logical experiment id (campaignId by default; overridable per-run via opts). */\n experimentId: string\n variant: V\n variantId: string\n scenarioId: string\n scenarioTags: Record<string, string>\n seed: number\n splitTag: RunSplitTag\n /**\n * The TraceEmitter for this run, with `onRunComplete` hooks pre-wired\n * (analyst auto-execution if configured, plus integrity check). The\n * runner MUST call `emitter.startRun` before doing any work and either\n * `emitter.endRun` or `emitter.abortRun` before returning.\n */\n emitter: TraceEmitter\n store: TraceStore\n rawSink: RawProviderSink\n /**\n * Pre-wired LLM client options — `rawSink` and `traceContext` are populated\n * so any `callLlm(req, ctx.llmOpts)` automatically captures raw HTTP. The\n * runner can spread additional fields if needed.\n */\n llmOpts: LlmClientOptions\n}\n\nexport interface CampaignRunOutcome {\n /** Did the run pass? Mirrors `RunOutcome.pass` semantics. */\n pass: boolean\n /** Score for the run on its split. Maps to `searchScore` or `holdoutScore`. */\n score: number\n /** Mandatory cost in USD. Use 0 + raw.cost_unknown=1 only if truly unknown. */\n costUsd: number\n tokenUsage: RunTokenUsage\n /** Snapshot model id (e.g. `claude-sonnet-4-6@2025-04-15`). */\n model: string\n /** sha256 of the effective prompt sent to the model. */\n promptHash: string\n /** sha256 of the effective config (model, temperature, tools, judges, splits). */\n configHash: string\n /** Optional extra numeric metrics to land in `outcome.raw`. */\n raw?: Record<string, number>\n /** Optional failure-taxonomy tag if the run failed. */\n failureMode?: string\n /** Optional judge metadata when a judge was used. */\n judgeMetadata?: RunJudgeMetadata\n}\n\nexport type CampaignRunner<V> = (ctx: CampaignRunContext<V>) => Promise<CampaignRunOutcome>\n\nexport type CampaignIntegrityPolicy = 'throw' | 'mark_failed' | 'log'\n\nexport interface EvalCampaignOptions<V> {\n /**\n * Stable id for the campaign. Used as the default `experimentId` on\n * every run, and folded into the campaign fingerprint.\n */\n campaignId: string\n variants: CampaignVariant<V>[]\n scenarios: CampaignScenario[]\n /** Default `[0, 1, 2]`. */\n seeds?: number[]\n /** Default `'holdout'` — the split that anchors a launch decision. */\n splitTag?: RunSplitTag\n /** Git SHA the campaign is run against. Mandatory; `RunRecord` rejects unset. */\n commitSha: string\n /**\n * LLM client config. Augmented per-run with `rawSink` and `traceContext`\n * before being passed to the runner. The campaign asserts this config\n * matches `routeRequirements` once at preflight.\n */\n llmOpts: LlmClientOptions\n /**\n * Default `{ requireExplicitBaseUrl: true, requireAuth: true }` — fail\n * loud if the campaign would silently fall back to the public router or\n * run unauthenticated. Override with an empty object to disable.\n */\n routeRequirements?: LlmRouteRequirements\n /**\n * Per-run TraceStore factory. Common shape: a fresh store per run keyed\n * on `runId`. Implementations that share a store across the campaign\n * are valid — the campaign only writes through `emitter`.\n */\n storeFactory: (params: CampaignFactoryParams) => TraceStore\n /**\n * Per-run RawProviderSink factory. Defaults to `FileSystemRawProviderSink`\n * rooted at `${workDir}/raw-events/${runId}` if `workDir` is supplied;\n * otherwise required. Forensic capture is non-negotiable in a campaign\n * run — pass `NoopRawProviderSink` explicitly if you want to opt out.\n */\n rawSinkFactory?: (params: CampaignFactoryParams) => RawProviderSink\n /**\n * Filesystem root for default `rawSinkFactory`. Ignored if\n * `rawSinkFactory` is supplied.\n */\n workDir?: string\n /**\n * Extra `onRunComplete` hooks the campaign appends (after its own\n * integrity-check hook). Pass `traceAnalystOnRunComplete(...)` here.\n */\n onRunComplete?: RunCompleteHook[]\n /**\n * Per-run integrity expectations. Defaults to:\n * `{ llmSpansMin: 1, requireRawCoverageOfLlmSpans: true, requireOutcome: true }`.\n * Override (e.g. `{ llmSpansMin: 0 }`) for runs that don't call LLMs.\n */\n integrity?: RunIntegrityExpectations\n /** Behaviour when integrity fails. Default `'mark_failed'`. */\n onIntegrityFailure?: CampaignIntegrityPolicy\n /**\n * Per-run runner. Receives a fully-wired context; produces an outcome\n * the campaign converts into a `RunRecord`.\n */\n runner: CampaignRunner<V>\n /**\n * If set, the campaign computes `researchReport` at the end. `comparator`\n * is a `variantId`. Other fields are forwarded verbatim.\n */\n report?: { comparator?: string } & Omit<\n ResearchReportOptions,\n 'comparator' | 'preregistrationHash' | 'generatedAt'\n >\n /**\n * Hash of a signed `HypothesisManifest` (see `pre-registration.ts`).\n * Embedded in the campaign fingerprint and the research report.\n */\n preregistrationHash?: string\n /** Local concurrency. Default `1` (sequential). */\n concurrency?: number\n /**\n * Override the time source. Tests pass a mock to make wallMs deterministic.\n */\n now?: () => number\n /** Override the runId generator. Tests pin this. */\n runId?: (params: CampaignFactoryParams) => string\n}\n\nexport interface CampaignFactoryParams {\n campaignId: string\n runId: string\n variantId: string\n scenarioId: string\n seed: number\n}\n\nexport interface FailedRun {\n runId: string\n variantId: string\n scenarioId: string\n seed: number\n reason: string\n error?: string\n}\n\nexport interface EvalCampaignResult {\n campaignId: string\n /** SHA-256 over canonicalised `(variantIds, scenarioIds, seeds, comparator, splitTag, baseUrl, provider, preregistrationHash)`. */\n campaignFingerprint: string\n preregistrationHash: string | null\n /** Successful runs only. Failed runs land in `failedRuns`. */\n runs: RunRecord[]\n /** Integrity reports for every successful run. */\n integrityReports: RunIntegrityReport[]\n failedRuns: FailedRun[]\n /** Computed when `report` is set on options. */\n report?: ResearchReport\n startedAt: string\n endedAt: string\n}\n\n// ── Implementation ───────────────────────────────────────────────────────\n\nconst DEFAULT_INTEGRITY: RunIntegrityExpectations = {\n llmSpansMin: 1,\n requireRawCoverageOfLlmSpans: true,\n requireOutcome: true,\n}\n\nconst DEFAULT_ROUTE: LlmRouteRequirements = {\n requireExplicitBaseUrl: true,\n requireAuth: true,\n}\n\nexport async function runEvalCampaign<V>(\n opts: EvalCampaignOptions<V>,\n): Promise<EvalCampaignResult> {\n // ── Preflight ──────────────────────────────────────────────────────\n assertLlmRoute(opts.llmOpts, opts.routeRequirements ?? DEFAULT_ROUTE)\n\n if (opts.variants.length === 0) {\n throw new Error('runEvalCampaign: variants must be non-empty.')\n }\n if (opts.scenarios.length === 0) {\n throw new Error('runEvalCampaign: scenarios must be non-empty.')\n }\n const variantIds = new Set<string>()\n for (const v of opts.variants) {\n if (variantIds.has(v.id)) {\n throw new Error(`runEvalCampaign: duplicate variant id \"${v.id}\".`)\n }\n variantIds.add(v.id)\n }\n const scenarioIds = new Set<string>()\n for (const s of opts.scenarios) {\n if (scenarioIds.has(s.scenarioId)) {\n throw new Error(`runEvalCampaign: duplicate scenarioId \"${s.scenarioId}\".`)\n }\n scenarioIds.add(s.scenarioId)\n }\n if (opts.report?.comparator && !variantIds.has(opts.report.comparator)) {\n throw new Error(\n `runEvalCampaign: report.comparator \"${opts.report.comparator}\" is not a configured variantId.`,\n )\n }\n if (!opts.commitSha) {\n throw new Error('runEvalCampaign: commitSha is required (every RunRecord needs it).')\n }\n\n const seeds = opts.seeds ?? [0, 1, 2]\n const splitTag: RunSplitTag = opts.splitTag ?? 'holdout'\n const concurrency = Math.max(1, opts.concurrency ?? 1)\n const integrity = { ...DEFAULT_INTEGRITY, ...(opts.integrity ?? {}) }\n const onIntegrityFailure: CampaignIntegrityPolicy = opts.onIntegrityFailure ?? 'mark_failed'\n const now = opts.now ?? (() => Date.now())\n const baseUrl = (opts.llmOpts.baseUrl ?? '').replace(/\\/+$/, '')\n const provider = opts.llmOpts.provider ?? null\n const preregistrationHash = opts.preregistrationHash ?? null\n\n const rawSinkFactory = opts.rawSinkFactory ?? defaultRawSinkFactory(opts.workDir)\n\n // ── Fingerprint ────────────────────────────────────────────────────\n const campaignFingerprint = await hashJson(\n canonicalize({\n campaignId: opts.campaignId,\n variants: opts.variants.map((v) => v.id).sort(),\n scenarios: opts.scenarios.map((s) => s.scenarioId).sort(),\n seeds: [...seeds].sort((a, b) => a - b),\n splitTag,\n comparator: opts.report?.comparator ?? null,\n baseUrl,\n provider,\n preregistrationHash,\n }),\n )\n\n // ── Plan the matrix ────────────────────────────────────────────────\n type Cell = { variant: CampaignVariant<V>; scenario: CampaignScenario; seed: number }\n const cells: Cell[] = []\n for (const variant of opts.variants) {\n for (const scenario of opts.scenarios) {\n for (const seed of seeds) {\n cells.push({ variant, scenario, seed })\n }\n }\n }\n\n const startedAt = new Date(now()).toISOString()\n const runs: RunRecord[] = []\n const integrityReports: RunIntegrityReport[] = []\n const failedRuns: FailedRun[] = []\n\n // ── Execute (bounded-concurrency worker pool) ──────────────────────\n let cursor = 0\n async function worker(): Promise<void> {\n while (true) {\n const i = cursor++\n if (i >= cells.length) return\n const cell = cells[i]!\n try {\n const result = await runOneCell(cell)\n runs.push(result.record)\n integrityReports.push(result.integrity)\n } catch (err) {\n if (err instanceof CellExecutionError) {\n failedRuns.push(err.failed)\n if (err.integrity) integrityReports.push(err.integrity)\n } else {\n // Genuine bug — not a runner failure, not an integrity failure.\n // Surface it; don't silently mask.\n throw err\n }\n }\n }\n }\n\n async function runOneCell(\n cell: Cell,\n ): Promise<{ record: RunRecord; integrity: RunIntegrityReport }> {\n const runId = (opts.runId ?? defaultRunId)({\n campaignId: opts.campaignId,\n runId: '', // unused by default generator\n variantId: cell.variant.id,\n scenarioId: cell.scenario.scenarioId,\n seed: cell.seed,\n })\n const factoryParams: CampaignFactoryParams = {\n campaignId: opts.campaignId,\n runId,\n variantId: cell.variant.id,\n scenarioId: cell.scenario.scenarioId,\n seed: cell.seed,\n }\n const store = opts.storeFactory(factoryParams)\n const rawSink = rawSinkFactory(factoryParams)\n\n const emitter = new TraceEmitter(store, {\n runId,\n now: opts.now,\n onRunComplete: opts.onRunComplete,\n })\n\n const llmOpts: LlmClientOptions = {\n ...opts.llmOpts,\n rawSink,\n traceContext: { runId },\n }\n\n const ctx: CampaignRunContext<V> = {\n runId,\n experimentId: opts.campaignId,\n variant: cell.variant.payload,\n variantId: cell.variant.id,\n scenarioId: cell.scenario.scenarioId,\n scenarioTags: cell.scenario.tags ?? {},\n seed: cell.seed,\n splitTag,\n emitter,\n store,\n rawSink,\n llmOpts,\n }\n\n const wallStart = now()\n let outcome: CampaignRunOutcome\n try {\n outcome = await opts.runner(ctx)\n } catch (err) {\n const message = err instanceof Error ? err.message : String(err)\n // The runner threw mid-execution; give it a chance to have aborted.\n try {\n await emitter.abortRun(message)\n } catch {\n // Already aborted/ended; ignore.\n }\n throw new CellExecutionError({\n runId,\n variantId: cell.variant.id,\n scenarioId: cell.scenario.scenarioId,\n seed: cell.seed,\n reason: 'runner_threw',\n error: message,\n })\n }\n const wallMs = now() - wallStart\n\n const integrityReport = await assertRunCaptured(store, runId, { ...integrity, rawSink })\n if (!integrityReport.ok) {\n switch (onIntegrityFailure) {\n case 'throw':\n throw new RunIntegrityError(integrityReport)\n case 'mark_failed':\n throw new CellExecutionError(\n {\n runId,\n variantId: cell.variant.id,\n scenarioId: cell.scenario.scenarioId,\n seed: cell.seed,\n reason: 'integrity_failed',\n error: integrityReport.issues.map((i) => i.code).join(', '),\n },\n integrityReport,\n )\n case 'log':\n // Caller wants the run admitted with a flagged report; fall through.\n break\n }\n }\n\n const recordOutcome: RunOutcome = {\n raw: outcome.raw ?? {},\n }\n if (splitTag === 'holdout') recordOutcome.holdoutScore = outcome.score\n else recordOutcome.searchScore = outcome.score\n\n const record: RunRecord = {\n runId,\n experimentId: opts.campaignId,\n candidateId: cell.variant.id,\n seed: cell.seed,\n model: outcome.model,\n promptHash: outcome.promptHash,\n configHash: outcome.configHash,\n commitSha: opts.commitSha,\n wallMs,\n costUsd: outcome.costUsd,\n tokenUsage: outcome.tokenUsage,\n judgeMetadata: outcome.judgeMetadata,\n outcome: recordOutcome,\n failureMode: outcome.failureMode,\n splitTag,\n scenarioId: cell.scenario.scenarioId,\n }\n return { record, integrity: integrityReport }\n }\n\n const workers = Array.from({ length: Math.min(concurrency, cells.length) }, () => worker())\n await Promise.all(workers)\n\n // ── Optional research report ───────────────────────────────────────\n let report: ResearchReport | undefined\n if (opts.report) {\n const reportOpts: ResearchReportOptions = {\n ...opts.report,\n comparator: opts.report.comparator,\n split: splitTag === 'dev' ? 'search' : splitTag,\n generatedAt: new Date(now()).toISOString(),\n preregistrationHash: preregistrationHash ?? undefined,\n }\n report = await researchReport(runs, reportOpts)\n }\n\n const endedAt = new Date(now()).toISOString()\n\n return {\n campaignId: opts.campaignId,\n campaignFingerprint,\n preregistrationHash,\n runs,\n integrityReports,\n failedRuns,\n report,\n startedAt,\n endedAt,\n }\n}\n\n// ── Internal ─────────────────────────────────────────────────────────────\n\nclass CellExecutionError extends Error {\n readonly failed: FailedRun\n readonly integrity?: RunIntegrityReport\n constructor(failed: FailedRun, integrity?: RunIntegrityReport) {\n super(`cell ${failed.variantId}/${failed.scenarioId}@${failed.seed} failed: ${failed.reason}`)\n this.failed = failed\n this.integrity = integrity\n }\n}\n\nfunction defaultRawSinkFactory(workDir: string | undefined) {\n return (params: CampaignFactoryParams): RawProviderSink => {\n if (!workDir) {\n throw new Error(\n 'runEvalCampaign: rawSinkFactory not supplied and workDir not set. Pass either to enable raw provider capture, or pass `new NoopRawProviderSink()` via rawSinkFactory to opt out explicitly.',\n )\n }\n return new FileSystemRawProviderSink({\n dir: `${workDir}/raw-events/${params.runId}`,\n })\n }\n}\n\nfunction defaultRunId(params: CampaignFactoryParams): string {\n // Stable across re-runs: fingerprint of (campaignId, variantId, scenarioId, seed).\n // Caller can override via opts.runId for non-deterministic IDs.\n const base = `${params.campaignId}::${params.variantId}::${params.scenarioId}::${params.seed}`\n // Lightweight hex: we don't need crypto-grade here, just stability + uniqueness.\n let h1 = 0x811c9dc5\n let h2 = 0x12345678\n for (let i = 0; i < base.length; i++) {\n const c = base.charCodeAt(i)\n h1 = Math.imul(h1 ^ c, 0x01000193) >>> 0\n h2 = Math.imul(h2 ^ c, 0x9e3779b1) >>> 0\n }\n return `run-${h1.toString(16).padStart(8, '0')}${h2.toString(16).padStart(8, '0')}`\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;AAyPA,IAAM,oBAA8C;AAAA,EAClD,aAAa;AAAA,EACb,8BAA8B;AAAA,EAC9B,gBAAgB;AAClB;AAEA,IAAM,gBAAsC;AAAA,EAC1C,wBAAwB;AAAA,EACxB,aAAa;AACf;AAEA,eAAsB,gBACpB,MAC6B;AAE7B,iBAAe,KAAK,SAAS,KAAK,qBAAqB,aAAa;AAEpE,MAAI,KAAK,SAAS,WAAW,GAAG;AAC9B,UAAM,IAAI,MAAM,8CAA8C;AAAA,EAChE;AACA,MAAI,KAAK,UAAU,WAAW,GAAG;AAC/B,UAAM,IAAI,MAAM,+CAA+C;AAAA,EACjE;AACA,QAAM,aAAa,oBAAI,IAAY;AACnC,aAAW,KAAK,KAAK,UAAU;AAC7B,QAAI,WAAW,IAAI,EAAE,EAAE,GAAG;AACxB,YAAM,IAAI,MAAM,0CAA0C,EAAE,EAAE,IAAI;AAAA,IACpE;AACA,eAAW,IAAI,EAAE,EAAE;AAAA,EACrB;AACA,QAAM,cAAc,oBAAI,IAAY;AACpC,aAAW,KAAK,KAAK,WAAW;AAC9B,QAAI,YAAY,IAAI,EAAE,UAAU,GAAG;AACjC,YAAM,IAAI,MAAM,0CAA0C,EAAE,UAAU,IAAI;AAAA,IAC5E;AACA,gBAAY,IAAI,EAAE,UAAU;AAAA,EAC9B;AACA,MAAI,KAAK,QAAQ,cAAc,CAAC,WAAW,IAAI,KAAK,OAAO,UAAU,GAAG;AACtE,UAAM,IAAI;AAAA,MACR,uCAAuC,KAAK,OAAO,UAAU;AAAA,IAC/D;AAAA,EACF;AACA,MAAI,CAAC,KAAK,WAAW;AACnB,UAAM,IAAI,MAAM,oEAAoE;AAAA,EACtF;AAEA,QAAM,QAAQ,KAAK,SAAS,CAAC,GAAG,GAAG,CAAC;AACpC,QAAM,WAAwB,KAAK,YAAY;AAC/C,QAAM,cAAc,KAAK,IAAI,GAAG,KAAK,eAAe,CAAC;AACrD,QAAM,YAAY,EAAE,GAAG,mBAAmB,GAAI,KAAK,aAAa,CAAC,EAAG;AACpE,QAAM,qBAA8C,KAAK,sBAAsB;AAC/E,QAAM,MAAM,KAAK,QAAQ,MAAM,KAAK,IAAI;AACxC,QAAM,WAAW,KAAK,QAAQ,WAAW,IAAI,QAAQ,QAAQ,EAAE;AAC/D,QAAM,WAAW,KAAK,QAAQ,YAAY;AAC1C,QAAM,sBAAsB,KAAK,uBAAuB;AAExD,QAAM,iBAAiB,KAAK,kBAAkB,sBAAsB,KAAK,OAAO;AAGhF,QAAM,sBAAsB,MAAM;AAAA,IAChC,aAAa;AAAA,MACX,YAAY,KAAK;AAAA,MACjB,UAAU,KAAK,SAAS,IAAI,CAAC,MAAM,EAAE,EAAE,EAAE,KAAK;AAAA,MAC9C,WAAW,KAAK,UAAU,IAAI,CAAC,MAAM,EAAE,UAAU,EAAE,KAAK;AAAA,MACxD,OAAO,CAAC,GAAG,KAAK,EAAE,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAAA,MACtC;AAAA,MACA,YAAY,KAAK,QAAQ,cAAc;AAAA,MACvC;AAAA,MACA;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAIA,QAAM,QAAgB,CAAC;AACvB,aAAW,WAAW,KAAK,UAAU;AACnC,eAAW,YAAY,KAAK,WAAW;AACrC,iBAAW,QAAQ,OAAO;AACxB,cAAM,KAAK,EAAE,SAAS,UAAU,KAAK,CAAC;AAAA,MACxC;AAAA,IACF;AAAA,EACF;AAEA,QAAM,YAAY,IAAI,KAAK,IAAI,CAAC,EAAE,YAAY;AAC9C,QAAM,OAAoB,CAAC;AAC3B,QAAM,mBAAyC,CAAC;AAChD,QAAM,aAA0B,CAAC;AAGjC,MAAI,SAAS;AACb,iBAAe,SAAwB;AACrC,WAAO,MAAM;AACX,YAAM,IAAI;AACV,UAAI,KAAK,MAAM,OAAQ;AACvB,YAAM,OAAO,MAAM,CAAC;AACpB,UAAI;AACF,cAAM,SAAS,MAAM,WAAW,IAAI;AACpC,aAAK,KAAK,OAAO,MAAM;AACvB,yBAAiB,KAAK,OAAO,SAAS;AAAA,MACxC,SAAS,KAAK;AACZ,YAAI,eAAe,oBAAoB;AACrC,qBAAW,KAAK,IAAI,MAAM;AAC1B,cAAI,IAAI,UAAW,kBAAiB,KAAK,IAAI,SAAS;AAAA,QACxD,OAAO;AAGL,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,iBAAe,WACb,MAC+D;AAC/D,UAAM,SAAS,KAAK,SAAS,cAAc;AAAA,MACzC,YAAY,KAAK;AAAA,MACjB,OAAO;AAAA;AAAA,MACP,WAAW,KAAK,QAAQ;AAAA,MACxB,YAAY,KAAK,SAAS;AAAA,MAC1B,MAAM,KAAK;AAAA,IACb,CAAC;AACD,UAAM,gBAAuC;AAAA,MAC3C,YAAY,KAAK;AAAA,MACjB;AAAA,MACA,WAAW,KAAK,QAAQ;AAAA,MACxB,YAAY,KAAK,SAAS;AAAA,MAC1B,MAAM,KAAK;AAAA,IACb;AACA,UAAM,QAAQ,KAAK,aAAa,aAAa;AAC7C,UAAM,UAAU,eAAe,aAAa;AAE5C,UAAM,UAAU,IAAI,aAAa,OAAO;AAAA,MACtC;AAAA,MACA,KAAK,KAAK;AAAA,MACV,eAAe,KAAK;AAAA,IACtB,CAAC;AAED,UAAM,UAA4B;AAAA,MAChC,GAAG,KAAK;AAAA,MACR;AAAA,MACA,cAAc,EAAE,MAAM;AAAA,IACxB;AAEA,UAAM,MAA6B;AAAA,MACjC;AAAA,MACA,cAAc,KAAK;AAAA,MACnB,SAAS,KAAK,QAAQ;AAAA,MACtB,WAAW,KAAK,QAAQ;AAAA,MACxB,YAAY,KAAK,SAAS;AAAA,MAC1B,cAAc,KAAK,SAAS,QAAQ,CAAC;AAAA,MACrC,MAAM,KAAK;AAAA,MACX;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,UAAM,YAAY,IAAI;AACtB,QAAI;AACJ,QAAI;AACF,gBAAU,MAAM,KAAK,OAAO,GAAG;AAAA,IACjC,SAAS,KAAK;AACZ,YAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAE/D,UAAI;AACF,cAAM,QAAQ,SAAS,OAAO;AAAA,MAChC,QAAQ;AAAA,MAER;AACA,YAAM,IAAI,mBAAmB;AAAA,QAC3B;AAAA,QACA,WAAW,KAAK,QAAQ;AAAA,QACxB,YAAY,KAAK,SAAS;AAAA,QAC1B,MAAM,KAAK;AAAA,QACX,QAAQ;AAAA,QACR,OAAO;AAAA,MACT,CAAC;AAAA,IACH;AACA,UAAM,SAAS,IAAI,IAAI;AAEvB,UAAM,kBAAkB,MAAM,kBAAkB,OAAO,OAAO,EAAE,GAAG,WAAW,QAAQ,CAAC;AACvF,QAAI,CAAC,gBAAgB,IAAI;AACvB,cAAQ,oBAAoB;AAAA,QAC1B,KAAK;AACH,gBAAM,IAAI,kBAAkB,eAAe;AAAA,QAC7C,KAAK;AACH,gBAAM,IAAI;AAAA,YACR;AAAA,cACE;AAAA,cACA,WAAW,KAAK,QAAQ;AAAA,cACxB,YAAY,KAAK,SAAS;AAAA,cAC1B,MAAM,KAAK;AAAA,cACX,QAAQ;AAAA,cACR,OAAO,gBAAgB,OAAO,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,IAAI;AAAA,YAC5D;AAAA,YACA;AAAA,UACF;AAAA,QACF,KAAK;AAEH;AAAA,MACJ;AAAA,IACF;AAEA,UAAM,gBAA4B;AAAA,MAChC,KAAK,QAAQ,OAAO,CAAC;AAAA,IACvB;AACA,QAAI,aAAa,UAAW,eAAc,eAAe,QAAQ;AAAA,QAC5D,eAAc,cAAc,QAAQ;AAEzC,UAAM,SAAoB;AAAA,MACxB;AAAA,MACA,cAAc,KAAK;AAAA,MACnB,aAAa,KAAK,QAAQ;AAAA,MAC1B,MAAM,KAAK;AAAA,MACX,OAAO,QAAQ;AAAA,MACf,YAAY,QAAQ;AAAA,MACpB,YAAY,QAAQ;AAAA,MACpB,WAAW,KAAK;AAAA,MAChB;AAAA,MACA,SAAS,QAAQ;AAAA,MACjB,YAAY,QAAQ;AAAA,MACpB,eAAe,QAAQ;AAAA,MACvB,SAAS;AAAA,MACT,aAAa,QAAQ;AAAA,MACrB;AAAA,MACA,YAAY,KAAK,SAAS;AAAA,IAC5B;AACA,WAAO,EAAE,QAAQ,WAAW,gBAAgB;AAAA,EAC9C;AAEA,QAAM,UAAU,MAAM,KAAK,EAAE,QAAQ,KAAK,IAAI,aAAa,MAAM,MAAM,EAAE,GAAG,MAAM,OAAO,CAAC;AAC1F,QAAM,QAAQ,IAAI,OAAO;AAGzB,MAAI;AACJ,MAAI,KAAK,QAAQ;AACf,UAAM,aAAoC;AAAA,MACxC,GAAG,KAAK;AAAA,MACR,YAAY,KAAK,OAAO;AAAA,MACxB,OAAO,aAAa,QAAQ,WAAW;AAAA,MACvC,aAAa,IAAI,KAAK,IAAI,CAAC,EAAE,YAAY;AAAA,MACzC,qBAAqB,uBAAuB;AAAA,IAC9C;AACA,aAAS,MAAM,eAAe,MAAM,UAAU;AAAA,EAChD;AAEA,QAAM,UAAU,IAAI,KAAK,IAAI,CAAC,EAAE,YAAY;AAE5C,SAAO;AAAA,IACL,YAAY,KAAK;AAAA,IACjB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAIA,IAAM,qBAAN,cAAiC,MAAM;AAAA,EAC5B;AAAA,EACA;AAAA,EACT,YAAY,QAAmB,WAAgC;AAC7D,UAAM,QAAQ,OAAO,SAAS,IAAI,OAAO,UAAU,IAAI,OAAO,IAAI,YAAY,OAAO,MAAM,EAAE;AAC7F,SAAK,SAAS;AACd,SAAK,YAAY;AAAA,EACnB;AACF;AAEA,SAAS,sBAAsB,SAA6B;AAC1D,SAAO,CAAC,WAAmD;AACzD,QAAI,CAAC,SAAS;AACZ,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AACA,WAAO,IAAI,0BAA0B;AAAA,MACnC,KAAK,GAAG,OAAO,eAAe,OAAO,KAAK;AAAA,IAC5C,CAAC;AAAA,EACH;AACF;AAEA,SAAS,aAAa,QAAuC;AAG3D,QAAM,OAAO,GAAG,OAAO,UAAU,KAAK,OAAO,SAAS,KAAK,OAAO,UAAU,KAAK,OAAO,IAAI;AAE5F,MAAI,KAAK;AACT,MAAI,KAAK;AACT,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,UAAM,IAAI,KAAK,WAAW,CAAC;AAC3B,SAAK,KAAK,KAAK,KAAK,GAAG,QAAU,MAAM;AACvC,SAAK,KAAK,KAAK,KAAK,GAAG,UAAU,MAAM;AAAA,EACzC;AACA,SAAO,OAAO,GAAG,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC,GAAG,GAAG,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC;AACnF;","names":[]}
@@ -37,7 +37,7 @@ var TraceEmitter = class {
37
37
  * to anchor to don't have to invent placeholder strings at the call site.
38
38
  */
39
39
  async startRun(run) {
40
- const scenarioId = run.scenarioId ?? run.layer ?? run.tags?.["kind"] ?? "runtime";
40
+ const scenarioId = run.scenarioId ?? run.layer ?? run.tags?.kind ?? "runtime";
41
41
  const full = {
42
42
  ...run,
43
43
  scenarioId,
@@ -60,7 +60,13 @@ var TraceEmitter = class {
60
60
  status: "aborted",
61
61
  outcome
62
62
  });
63
- await this.runHooks({ runId: this._runId, emitter: this, store: this.store, outcome, status: "aborted" });
63
+ await this.runHooks({
64
+ runId: this._runId,
65
+ emitter: this,
66
+ store: this.store,
67
+ outcome,
68
+ status: "aborted"
69
+ });
64
70
  }
65
71
  async runHooks(ctx) {
66
72
  for (const hook of this.hooks) {
@@ -104,7 +110,11 @@ var TraceEmitter = class {
104
110
  span,
105
111
  end: async (patch) => {
106
112
  const endedAt = this.now();
107
- await this.store.updateSpan(span.spanId, { endedAt, status: "ok", ...patch });
113
+ await this.store.updateSpan(span.spanId, {
114
+ endedAt,
115
+ status: "ok",
116
+ ...patch
117
+ });
108
118
  this.pop(span.spanId);
109
119
  },
110
120
  fail: async (error, patch) => {
@@ -233,4 +243,4 @@ export {
233
243
  TraceEmitter,
234
244
  llmSpanFromProvider
235
245
  };
236
- //# sourceMappingURL=chunk-5IIQKMD5.js.map
246
+ //# sourceMappingURL=chunk-TVVP3ZZQ.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/trace/emitter.ts"],"sourcesContent":["/**\n * TraceEmitter — hierarchical span builder that auto-parents using an\n * internal stack. One emitter per Run; emitters do NOT share state.\n *\n * Convenience methods (`llm`, `tool`, `retrieval`, `judge`, `sandbox`)\n * return a `SpanHandle` with `.end()` / `.fail()` so callers don't\n * have to thread spanIds manually. For async workflows that can't use\n * the stack (e.g. fan-out parallel calls), pass `parentSpanId`\n * explicitly.\n */\n\nimport type {\n Artifact,\n BudgetLedgerEntry,\n EventKind,\n JudgeSpan,\n LlmSpan,\n Message,\n RetrievalSpan,\n Run,\n RunOutcome,\n SandboxSpan,\n Span,\n SpanKind,\n ToolSpan,\n TraceEvent,\n} from './schema'\nimport type { TraceStore } from './store'\n\nexport interface SpanHandle<S extends Span = Span> {\n span: S\n end(patch?: Partial<S>): Promise<void>\n fail(error: string | Error, patch?: Partial<S>): Promise<void>\n}\n\nexport interface RunCompleteHookContext {\n runId: string\n emitter: TraceEmitter\n store: TraceStore\n /** Outcome the caller passed to `endRun` (undefined for `abortRun`). */\n outcome?: RunOutcome\n /** Final run status. */\n status: 'completed' | 'failed' | 'aborted'\n}\n\nexport type RunCompleteHook = (ctx: RunCompleteHookContext) => Promise<void> | void\n\nexport interface TraceEmitterOptions {\n runId?: string\n /** Inject a clock for deterministic tests. */\n now?: () => number\n /** Inject an id generator for deterministic tests. */\n id?: () => string\n /**\n * Hooks fired after `endRun` / `abortRun` writes the final run state.\n * Designed for trace-analyst auto-execution, integrity assertions, and\n * outbound notifications. Hooks run sequentially in the order supplied.\n *\n * By default a hook that throws is swallowed and logged as a `note` event\n * on the run — auto-orchestration must not crash the underlying flow.\n * Set `hookErrors: 'throw'` to propagate.\n */\n onRunComplete?: RunCompleteHook[]\n /** `'swallow'` (default) | `'throw'`. */\n hookErrors?: 'swallow' | 'throw'\n}\n\nexport class TraceEmitter {\n private store: TraceStore\n private stack: string[] = []\n private _runId: string\n private now: () => number\n private id: () => string\n private hooks: RunCompleteHook[]\n private hookErrors: 'swallow' | 'throw'\n\n constructor(store: TraceStore, options: TraceEmitterOptions = {}) {\n this.store = store\n this.now = options.now ?? (() => Date.now())\n this.id = options.id ?? (() => cryptoRandomId())\n this._runId = options.runId ?? this.id()\n this.hooks = options.onRunComplete ?? []\n this.hookErrors = options.hookErrors ?? 'swallow'\n }\n\n get runId(): string {\n return this._runId\n }\n\n get traceStore(): TraceStore {\n return this.store\n }\n\n /** Append a hook after construction (e.g. attach the trace analyst). */\n addRunCompleteHook(hook: RunCompleteHook): void {\n this.hooks.push(hook)\n }\n\n // ── Run lifecycle ──────────────────────────────────────────────────\n\n /**\n * Begin a Run.\n *\n * `scenarioId` is required on the persisted Run shape — every Run downstream\n * gets a non-empty scenarioId so filters and aggregations stay simple — but\n * the INPUT here accepts it as optional. When omitted, startRun substitutes\n * a sensible default (`run.layer ?? run.tags?.['kind'] ?? 'runtime'`) so\n * runtime / operator / meta-eval runs that have no curated-scenario corpus\n * to anchor to don't have to invent placeholder strings at the call site.\n */\n async startRun(\n run: Omit<Run, 'runId' | 'scenarioId' | 'startedAt' | 'status'> & { scenarioId?: string },\n ): Promise<Run> {\n const scenarioId = run.scenarioId ?? run.layer ?? run.tags?.kind ?? 'runtime'\n const full: Run = {\n ...run,\n scenarioId,\n runId: this._runId,\n startedAt: this.now(),\n status: 'running',\n }\n await this.store.appendRun(full)\n return full\n }\n\n async endRun(outcome?: RunOutcome): Promise<void> {\n const status: 'completed' | 'failed' = outcome?.pass === false ? 'failed' : 'completed'\n await this.store.updateRun(this._runId, { endedAt: this.now(), status, outcome })\n await this.runHooks({ runId: this._runId, emitter: this, store: this.store, outcome, status })\n }\n\n async abortRun(reason: string): Promise<void> {\n const outcome = { pass: false, notes: reason }\n await this.store.updateRun(this._runId, {\n endedAt: this.now(),\n status: 'aborted',\n outcome,\n })\n await this.runHooks({\n runId: this._runId,\n emitter: this,\n store: this.store,\n outcome,\n status: 'aborted',\n })\n }\n\n private async runHooks(ctx: RunCompleteHookContext): Promise<void> {\n for (const hook of this.hooks) {\n try {\n await hook(ctx)\n } catch (err) {\n if (this.hookErrors === 'throw') throw err\n try {\n await this.store.appendEvent({\n eventId: this.id(),\n runId: this._runId,\n kind: 'log',\n timestamp: this.now(),\n payload: {\n source: 'run_complete_hook',\n error: err instanceof Error ? err.message : String(err),\n },\n })\n } catch {\n // best-effort\n }\n }\n }\n }\n\n // ── Generic span ───────────────────────────────────────────────────\n\n async span<S extends Span = Span>(\n init: {\n kind: SpanKind\n name: string\n parentSpanId?: string\n attributes?: Record<string, unknown>\n } & Partial<Omit<S, 'spanId' | 'runId' | 'startedAt' | 'kind' | 'name'>>,\n ): Promise<SpanHandle<S>> {\n const spanId = this.id()\n const parent = init.parentSpanId ?? this.stack[this.stack.length - 1]\n const span = {\n spanId,\n parentSpanId: parent,\n runId: this._runId,\n startedAt: this.now(),\n ...init,\n } as unknown as S\n await this.store.appendSpan(span)\n this.stack.push(spanId)\n return this.handle<S>(span)\n }\n\n private handle<S extends Span>(span: S): SpanHandle<S> {\n return {\n span,\n end: async (patch?: Partial<S>) => {\n const endedAt = this.now()\n await this.store.updateSpan(span.spanId, {\n endedAt,\n status: 'ok',\n ...patch,\n } as Partial<Span>)\n this.pop(span.spanId)\n },\n fail: async (error: string | Error, patch?: Partial<S>) => {\n const endedAt = this.now()\n const errStr = error instanceof Error ? error.message : error\n await this.store.updateSpan(span.spanId, {\n endedAt,\n status: 'error',\n error: errStr,\n ...patch,\n } as Partial<Span>)\n this.pop(span.spanId)\n },\n }\n }\n\n private pop(spanId: string): void {\n const idx = this.stack.lastIndexOf(spanId)\n if (idx >= 0) this.stack.splice(idx, 1)\n }\n\n // ── Typed span conveniences ────────────────────────────────────────\n\n llm(\n init: Omit<LlmSpan, 'spanId' | 'runId' | 'kind' | 'startedAt'>,\n ): Promise<SpanHandle<LlmSpan>> {\n return this.span<LlmSpan>({ kind: 'llm', ...init })\n }\n\n tool(\n init: Omit<ToolSpan, 'spanId' | 'runId' | 'kind' | 'startedAt'>,\n ): Promise<SpanHandle<ToolSpan>> {\n return this.span<ToolSpan>({ kind: 'tool', ...init })\n }\n\n retrieval(\n init: Omit<RetrievalSpan, 'spanId' | 'runId' | 'kind' | 'startedAt'>,\n ): Promise<SpanHandle<RetrievalSpan>> {\n return this.span<RetrievalSpan>({ kind: 'retrieval', ...init })\n }\n\n async recordJudge(\n verdict: Omit<JudgeSpan, 'spanId' | 'runId' | 'kind' | 'startedAt' | 'endedAt'>,\n ): Promise<JudgeSpan> {\n const spanId = this.id()\n const now = this.now()\n const full: JudgeSpan = {\n spanId,\n runId: this._runId,\n kind: 'judge',\n startedAt: now,\n endedAt: now,\n status: 'ok',\n ...verdict,\n }\n await this.store.appendSpan(full)\n return full\n }\n\n sandbox(\n init: Omit<SandboxSpan, 'spanId' | 'runId' | 'kind' | 'startedAt'>,\n ): Promise<SpanHandle<SandboxSpan>> {\n return this.span<SandboxSpan>({ kind: 'sandbox', ...init })\n }\n\n // ── Events ─────────────────────────────────────────────────────────\n\n async emit(event: {\n kind: EventKind\n spanId?: string\n payload?: Record<string, unknown>\n }): Promise<TraceEvent> {\n const full: TraceEvent = {\n eventId: this.id(),\n runId: this._runId,\n spanId: event.spanId ?? this.stack[this.stack.length - 1],\n kind: event.kind,\n timestamp: this.now(),\n payload: event.payload ?? {},\n }\n await this.store.appendEvent(full)\n return full\n }\n\n // ── Budget ledger ──────────────────────────────────────────────────\n\n async recordBudget(\n entry: Omit<BudgetLedgerEntry, 'runId' | 'timestamp'> & { timestamp?: number },\n ): Promise<BudgetLedgerEntry> {\n const full: BudgetLedgerEntry = {\n runId: this._runId,\n timestamp: entry.timestamp ?? this.now(),\n dimension: entry.dimension,\n limit: entry.limit,\n consumed: entry.consumed,\n remaining: entry.remaining,\n breached: entry.breached,\n spanId: entry.spanId ?? this.stack[this.stack.length - 1],\n }\n await this.store.appendBudgetEntry(full)\n if (full.breached) {\n await this.emit({\n kind: 'budget_breach',\n spanId: full.spanId,\n payload: { dimension: full.dimension, limit: full.limit, consumed: full.consumed },\n })\n }\n return full\n }\n\n // ── Artifacts ──────────────────────────────────────────────────────\n\n async recordArtifact(artifact: Omit<Artifact, 'artifactId' | 'runId'>): Promise<Artifact> {\n const full: Artifact = { artifactId: this.id(), runId: this._runId, ...artifact }\n await this.store.appendArtifact(full)\n return full\n }\n\n // ── Nested composition ─────────────────────────────────────────────\n\n /**\n * Runs `fn` inside a span; auto-ends on success, auto-fails on throw.\n * Returns the fn's return value. Use this for the 95% case.\n */\n async within<T>(\n init: Parameters<TraceEmitter['span']>[0],\n fn: (handle: SpanHandle) => Promise<T>,\n ): Promise<T> {\n const handle = await this.span(init)\n try {\n const result = await fn(handle)\n await handle.end()\n return result\n } catch (err) {\n await handle.fail(err instanceof Error ? err : String(err))\n throw err\n }\n }\n}\n\n// Helpers -------------------------------------------------------------\n\nfunction cryptoRandomId(): string {\n if (typeof globalThis.crypto?.randomUUID === 'function') return globalThis.crypto.randomUUID()\n return `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 10)}`\n}\n\n/** Helper to build an LLM span handle args object from a provider-shaped response. */\nexport function llmSpanFromProvider(args: {\n name?: string\n model: string\n messages: Message[]\n output: string\n usage?: {\n inputTokens?: number\n outputTokens?: number\n cachedTokens?: number\n reasoningTokens?: number\n }\n costUsd?: number\n finishReason?: string\n}): Omit<LlmSpan, 'spanId' | 'runId' | 'kind' | 'startedAt'> {\n return {\n name: args.name ?? args.model,\n model: args.model,\n messages: args.messages,\n output: args.output,\n inputTokens: args.usage?.inputTokens,\n outputTokens: args.usage?.outputTokens,\n cachedTokens: args.usage?.cachedTokens,\n reasoningTokens: args.usage?.reasoningTokens,\n costUsd: args.costUsd,\n finishReason: args.finishReason,\n }\n}\n"],"mappings":";AAmEO,IAAM,eAAN,MAAmB;AAAA,EAChB;AAAA,EACA,QAAkB,CAAC;AAAA,EACnB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,OAAmB,UAA+B,CAAC,GAAG;AAChE,SAAK,QAAQ;AACb,SAAK,MAAM,QAAQ,QAAQ,MAAM,KAAK,IAAI;AAC1C,SAAK,KAAK,QAAQ,OAAO,MAAM,eAAe;AAC9C,SAAK,SAAS,QAAQ,SAAS,KAAK,GAAG;AACvC,SAAK,QAAQ,QAAQ,iBAAiB,CAAC;AACvC,SAAK,aAAa,QAAQ,cAAc;AAAA,EAC1C;AAAA,EAEA,IAAI,QAAgB;AAClB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAyB;AAC3B,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,mBAAmB,MAA6B;AAC9C,SAAK,MAAM,KAAK,IAAI;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAcA,MAAM,SACJ,KACc;AACd,UAAM,aAAa,IAAI,cAAc,IAAI,SAAS,IAAI,MAAM,QAAQ;AACpE,UAAM,OAAY;AAAA,MAChB,GAAG;AAAA,MACH;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,WAAW,KAAK,IAAI;AAAA,MACpB,QAAQ;AAAA,IACV;AACA,UAAM,KAAK,MAAM,UAAU,IAAI;AAC/B,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,OAAO,SAAqC;AAChD,UAAM,SAAiC,SAAS,SAAS,QAAQ,WAAW;AAC5E,UAAM,KAAK,MAAM,UAAU,KAAK,QAAQ,EAAE,SAAS,KAAK,IAAI,GAAG,QAAQ,QAAQ,CAAC;AAChF,UAAM,KAAK,SAAS,EAAE,OAAO,KAAK,QAAQ,SAAS,MAAM,OAAO,KAAK,OAAO,SAAS,OAAO,CAAC;AAAA,EAC/F;AAAA,EAEA,MAAM,SAAS,QAA+B;AAC5C,UAAM,UAAU,EAAE,MAAM,OAAO,OAAO,OAAO;AAC7C,UAAM,KAAK,MAAM,UAAU,KAAK,QAAQ;AAAA,MACtC,SAAS,KAAK,IAAI;AAAA,MAClB,QAAQ;AAAA,MACR;AAAA,IACF,CAAC;AACD,UAAM,KAAK,SAAS;AAAA,MAClB,OAAO,KAAK;AAAA,MACZ,SAAS;AAAA,MACT,OAAO,KAAK;AAAA,MACZ;AAAA,MACA,QAAQ;AAAA,IACV,CAAC;AAAA,EACH;AAAA,EAEA,MAAc,SAAS,KAA4C;AACjE,eAAW,QAAQ,KAAK,OAAO;AAC7B,UAAI;AACF,cAAM,KAAK,GAAG;AAAA,MAChB,SAAS,KAAK;AACZ,YAAI,KAAK,eAAe,QAAS,OAAM;AACvC,YAAI;AACF,gBAAM,KAAK,MAAM,YAAY;AAAA,YAC3B,SAAS,KAAK,GAAG;AAAA,YACjB,OAAO,KAAK;AAAA,YACZ,MAAM;AAAA,YACN,WAAW,KAAK,IAAI;AAAA,YACpB,SAAS;AAAA,cACP,QAAQ;AAAA,cACR,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,YACxD;AAAA,UACF,CAAC;AAAA,QACH,QAAQ;AAAA,QAER;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAIA,MAAM,KACJ,MAMwB;AACxB,UAAM,SAAS,KAAK,GAAG;AACvB,UAAM,SAAS,KAAK,gBAAgB,KAAK,MAAM,KAAK,MAAM,SAAS,CAAC;AACpE,UAAM,OAAO;AAAA,MACX;AAAA,MACA,cAAc;AAAA,MACd,OAAO,KAAK;AAAA,MACZ,WAAW,KAAK,IAAI;AAAA,MACpB,GAAG;AAAA,IACL;AACA,UAAM,KAAK,MAAM,WAAW,IAAI;AAChC,SAAK,MAAM,KAAK,MAAM;AACtB,WAAO,KAAK,OAAU,IAAI;AAAA,EAC5B;AAAA,EAEQ,OAAuB,MAAwB;AACrD,WAAO;AAAA,MACL;AAAA,MACA,KAAK,OAAO,UAAuB;AACjC,cAAM,UAAU,KAAK,IAAI;AACzB,cAAM,KAAK,MAAM,WAAW,KAAK,QAAQ;AAAA,UACvC;AAAA,UACA,QAAQ;AAAA,UACR,GAAG;AAAA,QACL,CAAkB;AAClB,aAAK,IAAI,KAAK,MAAM;AAAA,MACtB;AAAA,MACA,MAAM,OAAO,OAAuB,UAAuB;AACzD,cAAM,UAAU,KAAK,IAAI;AACzB,cAAM,SAAS,iBAAiB,QAAQ,MAAM,UAAU;AACxD,cAAM,KAAK,MAAM,WAAW,KAAK,QAAQ;AAAA,UACvC;AAAA,UACA,QAAQ;AAAA,UACR,OAAO;AAAA,UACP,GAAG;AAAA,QACL,CAAkB;AAClB,aAAK,IAAI,KAAK,MAAM;AAAA,MACtB;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,IAAI,QAAsB;AAChC,UAAM,MAAM,KAAK,MAAM,YAAY,MAAM;AACzC,QAAI,OAAO,EAAG,MAAK,MAAM,OAAO,KAAK,CAAC;AAAA,EACxC;AAAA;AAAA,EAIA,IACE,MAC8B;AAC9B,WAAO,KAAK,KAAc,EAAE,MAAM,OAAO,GAAG,KAAK,CAAC;AAAA,EACpD;AAAA,EAEA,KACE,MAC+B;AAC/B,WAAO,KAAK,KAAe,EAAE,MAAM,QAAQ,GAAG,KAAK,CAAC;AAAA,EACtD;AAAA,EAEA,UACE,MACoC;AACpC,WAAO,KAAK,KAAoB,EAAE,MAAM,aAAa,GAAG,KAAK,CAAC;AAAA,EAChE;AAAA,EAEA,MAAM,YACJ,SACoB;AACpB,UAAM,SAAS,KAAK,GAAG;AACvB,UAAM,MAAM,KAAK,IAAI;AACrB,UAAM,OAAkB;AAAA,MACtB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,MAAM;AAAA,MACN,WAAW;AAAA,MACX,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,GAAG;AAAA,IACL;AACA,UAAM,KAAK,MAAM,WAAW,IAAI;AAChC,WAAO;AAAA,EACT;AAAA,EAEA,QACE,MACkC;AAClC,WAAO,KAAK,KAAkB,EAAE,MAAM,WAAW,GAAG,KAAK,CAAC;AAAA,EAC5D;AAAA;AAAA,EAIA,MAAM,KAAK,OAIa;AACtB,UAAM,OAAmB;AAAA,MACvB,SAAS,KAAK,GAAG;AAAA,MACjB,OAAO,KAAK;AAAA,MACZ,QAAQ,MAAM,UAAU,KAAK,MAAM,KAAK,MAAM,SAAS,CAAC;AAAA,MACxD,MAAM,MAAM;AAAA,MACZ,WAAW,KAAK,IAAI;AAAA,MACpB,SAAS,MAAM,WAAW,CAAC;AAAA,IAC7B;AACA,UAAM,KAAK,MAAM,YAAY,IAAI;AACjC,WAAO;AAAA,EACT;AAAA;AAAA,EAIA,MAAM,aACJ,OAC4B;AAC5B,UAAM,OAA0B;AAAA,MAC9B,OAAO,KAAK;AAAA,MACZ,WAAW,MAAM,aAAa,KAAK,IAAI;AAAA,MACvC,WAAW,MAAM;AAAA,MACjB,OAAO,MAAM;AAAA,MACb,UAAU,MAAM;AAAA,MAChB,WAAW,MAAM;AAAA,MACjB,UAAU,MAAM;AAAA,MAChB,QAAQ,MAAM,UAAU,KAAK,MAAM,KAAK,MAAM,SAAS,CAAC;AAAA,IAC1D;AACA,UAAM,KAAK,MAAM,kBAAkB,IAAI;AACvC,QAAI,KAAK,UAAU;AACjB,YAAM,KAAK,KAAK;AAAA,QACd,MAAM;AAAA,QACN,QAAQ,KAAK;AAAA,QACb,SAAS,EAAE,WAAW,KAAK,WAAW,OAAO,KAAK,OAAO,UAAU,KAAK,SAAS;AAAA,MACnF,CAAC;AAAA,IACH;AACA,WAAO;AAAA,EACT;AAAA;AAAA,EAIA,MAAM,eAAe,UAAqE;AACxF,UAAM,OAAiB,EAAE,YAAY,KAAK,GAAG,GAAG,OAAO,KAAK,QAAQ,GAAG,SAAS;AAChF,UAAM,KAAK,MAAM,eAAe,IAAI;AACpC,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,OACJ,MACA,IACY;AACZ,UAAM,SAAS,MAAM,KAAK,KAAK,IAAI;AACnC,QAAI;AACF,YAAM,SAAS,MAAM,GAAG,MAAM;AAC9B,YAAM,OAAO,IAAI;AACjB,aAAO;AAAA,IACT,SAAS,KAAK;AACZ,YAAM,OAAO,KAAK,eAAe,QAAQ,MAAM,OAAO,GAAG,CAAC;AAC1D,YAAM;AAAA,IACR;AAAA,EACF;AACF;AAIA,SAAS,iBAAyB;AAChC,MAAI,OAAO,WAAW,QAAQ,eAAe,WAAY,QAAO,WAAW,OAAO,WAAW;AAC7F,SAAO,GAAG,KAAK,IAAI,EAAE,SAAS,EAAE,CAAC,IAAI,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,MAAM,GAAG,EAAE,CAAC;AAC9E;AAGO,SAAS,oBAAoB,MAayB;AAC3D,SAAO;AAAA,IACL,MAAM,KAAK,QAAQ,KAAK;AAAA,IACxB,OAAO,KAAK;AAAA,IACZ,UAAU,KAAK;AAAA,IACf,QAAQ,KAAK;AAAA,IACb,aAAa,KAAK,OAAO;AAAA,IACzB,cAAc,KAAK,OAAO;AAAA,IAC1B,cAAc,KAAK,OAAO;AAAA,IAC1B,iBAAiB,KAAK,OAAO;AAAA,IAC7B,SAAS,KAAK;AAAA,IACd,cAAc,KAAK;AAAA,EACrB;AACF;","names":[]}
@@ -0,0 +1,196 @@
1
+ import {
2
+ objectiveEval
3
+ } from "./chunk-LSH4MMOZ.js";
4
+
5
+ // src/knowledge/readiness.ts
6
+ function scoreKnowledgeReadiness(options) {
7
+ const now = options.now ?? /* @__PURE__ */ new Date();
8
+ const requirements = options.requirements.map(normalizeRequirement);
9
+ const missing = requirements.filter((requirement) => isRequirementMissing(requirement, now));
10
+ const blockingMissingRequirements = missing.filter(isBlockingGap);
11
+ const nonBlockingGaps = missing.filter((requirement) => !isBlockingGap(requirement));
12
+ const readinessScore = weightedReadinessAt(requirements, now);
13
+ const bundle = {
14
+ taskId: options.taskId,
15
+ requirements,
16
+ evidenceIds: unique([
17
+ ...options.evidenceIds ?? [],
18
+ ...requirements.flatMap((r) => r.evidenceIds)
19
+ ]),
20
+ claimIds: unique(options.claimIds ?? []),
21
+ wikiPageIds: unique(options.wikiPageIds ?? []),
22
+ userAnswers: options.userAnswers ?? {},
23
+ missing,
24
+ readinessScore,
25
+ metadata: options.metadata
26
+ };
27
+ const recommendedAction = chooseRecommendedAction(blockingMissingRequirements, nonBlockingGaps);
28
+ const severity = blockingMissingRequirements.length > 0 ? "critical" : nonBlockingGaps.some((gap) => gap.importance === "high") ? "warning" : "info";
29
+ const reason = blockingMissingRequirements.length > 0 ? `${blockingMissingRequirements.length} blocking knowledge requirement(s) are missing.` : nonBlockingGaps.length > 0 ? `${nonBlockingGaps.length} non-blocking knowledge gap(s) remain.` : "All declared knowledge requirements are ready.";
30
+ return {
31
+ taskId: options.taskId,
32
+ readinessScore,
33
+ blockingMissingRequirements,
34
+ nonBlockingGaps,
35
+ recommendedAction,
36
+ bundle,
37
+ severity,
38
+ reason
39
+ };
40
+ }
41
+ function blockingKnowledgeEval(report, options = {}) {
42
+ const minimumScore = options.minimumScore ?? 0.7;
43
+ const passed = report.blockingMissingRequirements.length === 0 && report.readinessScore >= minimumScore;
44
+ if (options.emitter) {
45
+ void options.emitter.emit({
46
+ kind: "custom",
47
+ payload: knowledgeReadinessTracePayload(report, { passed, minimumScore })
48
+ }).catch(() => void 0);
49
+ }
50
+ return objectiveEval({
51
+ id: options.id ?? "knowledge-ready",
52
+ passed,
53
+ score: report.readinessScore,
54
+ severity: passed ? "info" : report.severity,
55
+ detail: report.reason,
56
+ evidence: report.blockingMissingRequirements.map((r) => r.id).join(", ") || void 0,
57
+ metadata: { knowledgeReadiness: report }
58
+ });
59
+ }
60
+ function knowledgeReadinessTracePayload(report, options = {}) {
61
+ return {
62
+ kind: "readiness_scored",
63
+ taskId: report.taskId,
64
+ passed: options.passed ?? report.blockingMissingRequirements.length === 0,
65
+ readinessScore: report.readinessScore,
66
+ minimumScore: options.minimumScore,
67
+ blockingRequirementIds: report.blockingMissingRequirements.map((r) => r.id),
68
+ nonBlockingRequirementIds: report.nonBlockingGaps.map((r) => r.id),
69
+ recommendedAction: report.recommendedAction,
70
+ severity: report.severity,
71
+ reason: report.reason
72
+ };
73
+ }
74
+ function userQuestionsForKnowledgeGaps(gaps) {
75
+ return gaps.filter((gap) => gap.acquisitionMode === "ask_user" || gap.fallbackPolicy === "ask").map((gap) => ({
76
+ id: `question_${gap.id}`,
77
+ question: `Please provide: ${gap.description}`,
78
+ reason: `Required for ${gap.requiredFor.join(", ") || "the task"}.`,
79
+ requirementId: gap.id,
80
+ importance: gap.importance,
81
+ answerType: gap.sensitivity === "secret" ? "credential" : "free_text",
82
+ impactIfUnknown: impactFor(gap)
83
+ }));
84
+ }
85
+ function acquisitionPlansForKnowledgeGaps(gaps) {
86
+ const byMode = /* @__PURE__ */ new Map();
87
+ for (const gap of gaps) {
88
+ const mode = planMode(gap.acquisitionMode);
89
+ if (!mode) continue;
90
+ const bucket = byMode.get(mode) ?? [];
91
+ bucket.push(gap);
92
+ byMode.set(mode, bucket);
93
+ }
94
+ return [...byMode.entries()].map(([mode, requirements]) => ({
95
+ id: `acquire_${mode}`,
96
+ requirementIds: requirements.map((r) => r.id),
97
+ mode,
98
+ description: descriptionForPlan(mode, requirements),
99
+ priority: maxImportance(requirements.map((r) => r.importance)),
100
+ questions: mode === "ask_user" ? userQuestionsForKnowledgeGaps(requirements) : void 0
101
+ }));
102
+ }
103
+ function normalizeRequirement(requirement) {
104
+ return {
105
+ ...requirement,
106
+ confidenceNeeded: clamp01(requirement.confidenceNeeded),
107
+ currentConfidence: clamp01(requirement.currentConfidence),
108
+ evidenceIds: unique(requirement.evidenceIds)
109
+ };
110
+ }
111
+ function weightedReadinessAt(requirements, now) {
112
+ if (requirements.length === 0) return 1;
113
+ let weightSum = 0;
114
+ let scoreSum = 0;
115
+ for (const requirement of requirements) {
116
+ const weight = importanceWeight(requirement.importance);
117
+ const score = isExpired(requirement, now) ? 0 : requirement.confidenceNeeded <= 0 ? 1 : Math.min(1, requirement.currentConfidence / requirement.confidenceNeeded);
118
+ weightSum += weight;
119
+ scoreSum += weight * score;
120
+ }
121
+ return clamp01(scoreSum / weightSum);
122
+ }
123
+ function isRequirementMissing(requirement, now) {
124
+ return isExpired(requirement, now) || requirement.currentConfidence < requirement.confidenceNeeded;
125
+ }
126
+ function isExpired(requirement, now) {
127
+ if (!requirement.validUntil) return false;
128
+ const deadline = Date.parse(requirement.validUntil);
129
+ if (!Number.isFinite(deadline)) return true;
130
+ return deadline <= now.getTime();
131
+ }
132
+ function isBlockingGap(requirement) {
133
+ return requirement.importance === "blocking" || requirement.fallbackPolicy === "block" || requirement.sensitivity === "secret";
134
+ }
135
+ function chooseRecommendedAction(blocking, nonBlocking) {
136
+ const gaps = blocking.length > 0 ? blocking : nonBlocking;
137
+ if (gaps.length === 0) return "run_agent";
138
+ if (gaps.some((gap) => gap.acquisitionMode === "ask_user" || gap.fallbackPolicy === "ask"))
139
+ return "ask_user";
140
+ if (gaps.some((gap) => gap.acquisitionMode === "query_connector")) return "query_connectors";
141
+ if (gaps.some(
142
+ (gap) => gap.acquisitionMode === "inspect_repo" || gap.acquisitionMode === "run_command"
143
+ ))
144
+ return "inspect_repo";
145
+ if (gaps.some((gap) => gap.acquisitionMode === "search_web")) return "collect_web_data";
146
+ if (gaps.some((gap) => gap.acquisitionMode === "not_available")) return "abort_or_rescope";
147
+ if (nonBlocking.some((gap) => gap.importance === "high")) return "build_domain_wiki";
148
+ return "continue_with_caveat";
149
+ }
150
+ function planMode(mode) {
151
+ if (mode === "infer_low_confidence" || mode === "not_available") return null;
152
+ return mode;
153
+ }
154
+ function descriptionForPlan(mode, requirements) {
155
+ const labels = requirements.map((r) => r.description).join("; ");
156
+ if (mode === "ask_user") return `Ask the user for: ${labels}`;
157
+ if (mode === "search_web") return `Search web or documentation sources for: ${labels}`;
158
+ if (mode === "query_connector") return `Query configured connectors for: ${labels}`;
159
+ if (mode === "inspect_repo") return `Inspect repository context for: ${labels}`;
160
+ if (mode === "run_command") return `Run local commands to collect: ${labels}`;
161
+ return `Build domain wiki evidence for: ${labels}`;
162
+ }
163
+ function impactFor(requirement) {
164
+ if (requirement.fallbackPolicy === "block") return "The agent should not run until this is known.";
165
+ if (requirement.fallbackPolicy === "continue_with_caveat")
166
+ return "The agent may continue, but must disclose uncertainty.";
167
+ if (requirement.fallbackPolicy === "use_default")
168
+ return "The agent will use the configured default if skipped.";
169
+ return "The agent should ask before continuing.";
170
+ }
171
+ function maxImportance(values) {
172
+ const order = ["blocking", "high", "medium", "low"];
173
+ return order.find((value) => values.includes(value)) ?? "low";
174
+ }
175
+ function importanceWeight(importance) {
176
+ if (importance === "blocking") return 8;
177
+ if (importance === "high") return 4;
178
+ if (importance === "medium") return 2;
179
+ return 1;
180
+ }
181
+ function clamp01(value) {
182
+ if (!Number.isFinite(value)) return 0;
183
+ return Math.max(0, Math.min(1, value));
184
+ }
185
+ function unique(items) {
186
+ return [...new Set(items)];
187
+ }
188
+
189
+ export {
190
+ scoreKnowledgeReadiness,
191
+ blockingKnowledgeEval,
192
+ knowledgeReadinessTracePayload,
193
+ userQuestionsForKnowledgeGaps,
194
+ acquisitionPlansForKnowledgeGaps
195
+ };
196
+ //# sourceMappingURL=chunk-WWYCWKUM.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/knowledge/readiness.ts"],"sourcesContent":["import { type ControlEvalResult, objectiveEval } from '../control-runtime'\nimport type { TraceEmitter } from '../trace/emitter'\nimport type {\n DataAcquisitionPlan,\n KnowledgeAcquisitionMode,\n KnowledgeBundle,\n KnowledgeImportance,\n KnowledgeReadinessReport,\n KnowledgeRecommendedAction,\n KnowledgeRequirement,\n UserQuestion,\n} from './types'\n\nexport interface ScoreKnowledgeReadinessOptions {\n taskId: string\n requirements: KnowledgeRequirement[]\n evidenceIds?: string[]\n claimIds?: string[]\n wikiPageIds?: string[]\n userAnswers?: Record<string, string>\n metadata?: Record<string, unknown>\n now?: Date\n}\n\nexport function scoreKnowledgeReadiness(\n options: ScoreKnowledgeReadinessOptions,\n): KnowledgeReadinessReport {\n const now = options.now ?? new Date()\n const requirements = options.requirements.map(normalizeRequirement)\n const missing = requirements.filter((requirement) => isRequirementMissing(requirement, now))\n const blockingMissingRequirements = missing.filter(isBlockingGap)\n const nonBlockingGaps = missing.filter((requirement) => !isBlockingGap(requirement))\n const readinessScore = weightedReadinessAt(requirements, now)\n const bundle: KnowledgeBundle = {\n taskId: options.taskId,\n requirements,\n evidenceIds: unique([\n ...(options.evidenceIds ?? []),\n ...requirements.flatMap((r) => r.evidenceIds),\n ]),\n claimIds: unique(options.claimIds ?? []),\n wikiPageIds: unique(options.wikiPageIds ?? []),\n userAnswers: options.userAnswers ?? {},\n missing,\n readinessScore,\n metadata: options.metadata,\n }\n const recommendedAction = chooseRecommendedAction(blockingMissingRequirements, nonBlockingGaps)\n const severity =\n blockingMissingRequirements.length > 0\n ? 'critical'\n : nonBlockingGaps.some((gap) => gap.importance === 'high')\n ? 'warning'\n : 'info'\n const reason =\n blockingMissingRequirements.length > 0\n ? `${blockingMissingRequirements.length} blocking knowledge requirement(s) are missing.`\n : nonBlockingGaps.length > 0\n ? `${nonBlockingGaps.length} non-blocking knowledge gap(s) remain.`\n : 'All declared knowledge requirements are ready.'\n\n return {\n taskId: options.taskId,\n readinessScore,\n blockingMissingRequirements,\n nonBlockingGaps,\n recommendedAction,\n bundle,\n severity,\n reason,\n }\n}\n\nexport function blockingKnowledgeEval(\n report: KnowledgeReadinessReport,\n options: { id?: string; minimumScore?: number; emitter?: TraceEmitter } = {},\n): ControlEvalResult {\n const minimumScore = options.minimumScore ?? 0.7\n const passed =\n report.blockingMissingRequirements.length === 0 && report.readinessScore >= minimumScore\n if (options.emitter) {\n void options.emitter\n .emit({\n kind: 'custom',\n payload: knowledgeReadinessTracePayload(report, { passed, minimumScore }),\n })\n .catch(() => undefined)\n }\n return objectiveEval({\n id: options.id ?? 'knowledge-ready',\n passed,\n score: report.readinessScore,\n severity: passed ? 'info' : report.severity,\n detail: report.reason,\n evidence: report.blockingMissingRequirements.map((r) => r.id).join(', ') || undefined,\n metadata: { knowledgeReadiness: report },\n })\n}\n\nexport function knowledgeReadinessTracePayload(\n report: KnowledgeReadinessReport,\n options: { passed?: boolean; minimumScore?: number } = {},\n): Record<string, unknown> {\n return {\n kind: 'readiness_scored',\n taskId: report.taskId,\n passed: options.passed ?? report.blockingMissingRequirements.length === 0,\n readinessScore: report.readinessScore,\n minimumScore: options.minimumScore,\n blockingRequirementIds: report.blockingMissingRequirements.map((r) => r.id),\n nonBlockingRequirementIds: report.nonBlockingGaps.map((r) => r.id),\n recommendedAction: report.recommendedAction,\n severity: report.severity,\n reason: report.reason,\n }\n}\n\nexport function userQuestionsForKnowledgeGaps(gaps: KnowledgeRequirement[]): UserQuestion[] {\n return gaps\n .filter((gap) => gap.acquisitionMode === 'ask_user' || gap.fallbackPolicy === 'ask')\n .map((gap) => ({\n id: `question_${gap.id}`,\n question: `Please provide: ${gap.description}`,\n reason: `Required for ${gap.requiredFor.join(', ') || 'the task'}.`,\n requirementId: gap.id,\n importance: gap.importance,\n answerType: gap.sensitivity === 'secret' ? 'credential' : 'free_text',\n impactIfUnknown: impactFor(gap),\n }))\n}\n\nexport function acquisitionPlansForKnowledgeGaps(\n gaps: KnowledgeRequirement[],\n): DataAcquisitionPlan[] {\n const byMode = new Map<string, KnowledgeRequirement[]>()\n for (const gap of gaps) {\n const mode = planMode(gap.acquisitionMode)\n if (!mode) continue\n const bucket = byMode.get(mode) ?? []\n bucket.push(gap)\n byMode.set(mode, bucket)\n }\n return [...byMode.entries()].map(([mode, requirements]) => ({\n id: `acquire_${mode}`,\n requirementIds: requirements.map((r) => r.id),\n mode: mode as DataAcquisitionPlan['mode'],\n description: descriptionForPlan(mode as DataAcquisitionPlan['mode'], requirements),\n priority: maxImportance(requirements.map((r) => r.importance)),\n questions: mode === 'ask_user' ? userQuestionsForKnowledgeGaps(requirements) : undefined,\n }))\n}\n\nfunction normalizeRequirement(requirement: KnowledgeRequirement): KnowledgeRequirement {\n return {\n ...requirement,\n confidenceNeeded: clamp01(requirement.confidenceNeeded),\n currentConfidence: clamp01(requirement.currentConfidence),\n evidenceIds: unique(requirement.evidenceIds),\n }\n}\n\nfunction weightedReadinessAt(requirements: KnowledgeRequirement[], now: Date): number {\n if (requirements.length === 0) return 1\n let weightSum = 0\n let scoreSum = 0\n for (const requirement of requirements) {\n const weight = importanceWeight(requirement.importance)\n const score = isExpired(requirement, now)\n ? 0\n : requirement.confidenceNeeded <= 0\n ? 1\n : Math.min(1, requirement.currentConfidence / requirement.confidenceNeeded)\n weightSum += weight\n scoreSum += weight * score\n }\n return clamp01(scoreSum / weightSum)\n}\n\nfunction isRequirementMissing(requirement: KnowledgeRequirement, now: Date): boolean {\n return isExpired(requirement, now) || requirement.currentConfidence < requirement.confidenceNeeded\n}\n\nfunction isExpired(requirement: KnowledgeRequirement, now: Date): boolean {\n if (!requirement.validUntil) return false\n const deadline = Date.parse(requirement.validUntil)\n if (!Number.isFinite(deadline)) return true\n return deadline <= now.getTime()\n}\n\nfunction isBlockingGap(requirement: KnowledgeRequirement): boolean {\n return (\n requirement.importance === 'blocking' ||\n requirement.fallbackPolicy === 'block' ||\n requirement.sensitivity === 'secret'\n )\n}\n\nfunction chooseRecommendedAction(\n blocking: KnowledgeRequirement[],\n nonBlocking: KnowledgeRequirement[],\n): KnowledgeRecommendedAction {\n const gaps = blocking.length > 0 ? blocking : nonBlocking\n if (gaps.length === 0) return 'run_agent'\n if (gaps.some((gap) => gap.acquisitionMode === 'ask_user' || gap.fallbackPolicy === 'ask'))\n return 'ask_user'\n if (gaps.some((gap) => gap.acquisitionMode === 'query_connector')) return 'query_connectors'\n if (\n gaps.some(\n (gap) => gap.acquisitionMode === 'inspect_repo' || gap.acquisitionMode === 'run_command',\n )\n )\n return 'inspect_repo'\n if (gaps.some((gap) => gap.acquisitionMode === 'search_web')) return 'collect_web_data'\n if (gaps.some((gap) => gap.acquisitionMode === 'not_available')) return 'abort_or_rescope'\n if (nonBlocking.some((gap) => gap.importance === 'high')) return 'build_domain_wiki'\n return 'continue_with_caveat'\n}\n\nfunction planMode(mode: KnowledgeAcquisitionMode): DataAcquisitionPlan['mode'] | null {\n if (mode === 'infer_low_confidence' || mode === 'not_available') return null\n return mode\n}\n\nfunction descriptionForPlan(\n mode: DataAcquisitionPlan['mode'],\n requirements: KnowledgeRequirement[],\n): string {\n const labels = requirements.map((r) => r.description).join('; ')\n if (mode === 'ask_user') return `Ask the user for: ${labels}`\n if (mode === 'search_web') return `Search web or documentation sources for: ${labels}`\n if (mode === 'query_connector') return `Query configured connectors for: ${labels}`\n if (mode === 'inspect_repo') return `Inspect repository context for: ${labels}`\n if (mode === 'run_command') return `Run local commands to collect: ${labels}`\n return `Build domain wiki evidence for: ${labels}`\n}\n\nfunction impactFor(requirement: KnowledgeRequirement): string {\n if (requirement.fallbackPolicy === 'block') return 'The agent should not run until this is known.'\n if (requirement.fallbackPolicy === 'continue_with_caveat')\n return 'The agent may continue, but must disclose uncertainty.'\n if (requirement.fallbackPolicy === 'use_default')\n return 'The agent will use the configured default if skipped.'\n return 'The agent should ask before continuing.'\n}\n\nfunction maxImportance(values: KnowledgeImportance[]): KnowledgeImportance {\n const order: KnowledgeImportance[] = ['blocking', 'high', 'medium', 'low']\n return order.find((value) => values.includes(value)) ?? 'low'\n}\n\nfunction importanceWeight(importance: KnowledgeImportance): number {\n if (importance === 'blocking') return 8\n if (importance === 'high') return 4\n if (importance === 'medium') return 2\n return 1\n}\n\nfunction clamp01(value: number): number {\n if (!Number.isFinite(value)) return 0\n return Math.max(0, Math.min(1, value))\n}\n\nfunction unique<T>(items: T[]): T[] {\n return [...new Set(items)]\n}\n"],"mappings":";;;;;AAwBO,SAAS,wBACd,SAC0B;AAC1B,QAAM,MAAM,QAAQ,OAAO,oBAAI,KAAK;AACpC,QAAM,eAAe,QAAQ,aAAa,IAAI,oBAAoB;AAClE,QAAM,UAAU,aAAa,OAAO,CAAC,gBAAgB,qBAAqB,aAAa,GAAG,CAAC;AAC3F,QAAM,8BAA8B,QAAQ,OAAO,aAAa;AAChE,QAAM,kBAAkB,QAAQ,OAAO,CAAC,gBAAgB,CAAC,cAAc,WAAW,CAAC;AACnF,QAAM,iBAAiB,oBAAoB,cAAc,GAAG;AAC5D,QAAM,SAA0B;AAAA,IAC9B,QAAQ,QAAQ;AAAA,IAChB;AAAA,IACA,aAAa,OAAO;AAAA,MAClB,GAAI,QAAQ,eAAe,CAAC;AAAA,MAC5B,GAAG,aAAa,QAAQ,CAAC,MAAM,EAAE,WAAW;AAAA,IAC9C,CAAC;AAAA,IACD,UAAU,OAAO,QAAQ,YAAY,CAAC,CAAC;AAAA,IACvC,aAAa,OAAO,QAAQ,eAAe,CAAC,CAAC;AAAA,IAC7C,aAAa,QAAQ,eAAe,CAAC;AAAA,IACrC;AAAA,IACA;AAAA,IACA,UAAU,QAAQ;AAAA,EACpB;AACA,QAAM,oBAAoB,wBAAwB,6BAA6B,eAAe;AAC9F,QAAM,WACJ,4BAA4B,SAAS,IACjC,aACA,gBAAgB,KAAK,CAAC,QAAQ,IAAI,eAAe,MAAM,IACrD,YACA;AACR,QAAM,SACJ,4BAA4B,SAAS,IACjC,GAAG,4BAA4B,MAAM,oDACrC,gBAAgB,SAAS,IACvB,GAAG,gBAAgB,MAAM,2CACzB;AAER,SAAO;AAAA,IACL,QAAQ,QAAQ;AAAA,IAChB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEO,SAAS,sBACd,QACA,UAA0E,CAAC,GACxD;AACnB,QAAM,eAAe,QAAQ,gBAAgB;AAC7C,QAAM,SACJ,OAAO,4BAA4B,WAAW,KAAK,OAAO,kBAAkB;AAC9E,MAAI,QAAQ,SAAS;AACnB,SAAK,QAAQ,QACV,KAAK;AAAA,MACJ,MAAM;AAAA,MACN,SAAS,+BAA+B,QAAQ,EAAE,QAAQ,aAAa,CAAC;AAAA,IAC1E,CAAC,EACA,MAAM,MAAM,MAAS;AAAA,EAC1B;AACA,SAAO,cAAc;AAAA,IACnB,IAAI,QAAQ,MAAM;AAAA,IAClB;AAAA,IACA,OAAO,OAAO;AAAA,IACd,UAAU,SAAS,SAAS,OAAO;AAAA,IACnC,QAAQ,OAAO;AAAA,IACf,UAAU,OAAO,4BAA4B,IAAI,CAAC,MAAM,EAAE,EAAE,EAAE,KAAK,IAAI,KAAK;AAAA,IAC5E,UAAU,EAAE,oBAAoB,OAAO;AAAA,EACzC,CAAC;AACH;AAEO,SAAS,+BACd,QACA,UAAuD,CAAC,GAC/B;AACzB,SAAO;AAAA,IACL,MAAM;AAAA,IACN,QAAQ,OAAO;AAAA,IACf,QAAQ,QAAQ,UAAU,OAAO,4BAA4B,WAAW;AAAA,IACxE,gBAAgB,OAAO;AAAA,IACvB,cAAc,QAAQ;AAAA,IACtB,wBAAwB,OAAO,4BAA4B,IAAI,CAAC,MAAM,EAAE,EAAE;AAAA,IAC1E,2BAA2B,OAAO,gBAAgB,IAAI,CAAC,MAAM,EAAE,EAAE;AAAA,IACjE,mBAAmB,OAAO;AAAA,IAC1B,UAAU,OAAO;AAAA,IACjB,QAAQ,OAAO;AAAA,EACjB;AACF;AAEO,SAAS,8BAA8B,MAA8C;AAC1F,SAAO,KACJ,OAAO,CAAC,QAAQ,IAAI,oBAAoB,cAAc,IAAI,mBAAmB,KAAK,EAClF,IAAI,CAAC,SAAS;AAAA,IACb,IAAI,YAAY,IAAI,EAAE;AAAA,IACtB,UAAU,mBAAmB,IAAI,WAAW;AAAA,IAC5C,QAAQ,gBAAgB,IAAI,YAAY,KAAK,IAAI,KAAK,UAAU;AAAA,IAChE,eAAe,IAAI;AAAA,IACnB,YAAY,IAAI;AAAA,IAChB,YAAY,IAAI,gBAAgB,WAAW,eAAe;AAAA,IAC1D,iBAAiB,UAAU,GAAG;AAAA,EAChC,EAAE;AACN;AAEO,SAAS,iCACd,MACuB;AACvB,QAAM,SAAS,oBAAI,IAAoC;AACvD,aAAW,OAAO,MAAM;AACtB,UAAM,OAAO,SAAS,IAAI,eAAe;AACzC,QAAI,CAAC,KAAM;AACX,UAAM,SAAS,OAAO,IAAI,IAAI,KAAK,CAAC;AACpC,WAAO,KAAK,GAAG;AACf,WAAO,IAAI,MAAM,MAAM;AAAA,EACzB;AACA,SAAO,CAAC,GAAG,OAAO,QAAQ,CAAC,EAAE,IAAI,CAAC,CAAC,MAAM,YAAY,OAAO;AAAA,IAC1D,IAAI,WAAW,IAAI;AAAA,IACnB,gBAAgB,aAAa,IAAI,CAAC,MAAM,EAAE,EAAE;AAAA,IAC5C;AAAA,IACA,aAAa,mBAAmB,MAAqC,YAAY;AAAA,IACjF,UAAU,cAAc,aAAa,IAAI,CAAC,MAAM,EAAE,UAAU,CAAC;AAAA,IAC7D,WAAW,SAAS,aAAa,8BAA8B,YAAY,IAAI;AAAA,EACjF,EAAE;AACJ;AAEA,SAAS,qBAAqB,aAAyD;AACrF,SAAO;AAAA,IACL,GAAG;AAAA,IACH,kBAAkB,QAAQ,YAAY,gBAAgB;AAAA,IACtD,mBAAmB,QAAQ,YAAY,iBAAiB;AAAA,IACxD,aAAa,OAAO,YAAY,WAAW;AAAA,EAC7C;AACF;AAEA,SAAS,oBAAoB,cAAsC,KAAmB;AACpF,MAAI,aAAa,WAAW,EAAG,QAAO;AACtC,MAAI,YAAY;AAChB,MAAI,WAAW;AACf,aAAW,eAAe,cAAc;AACtC,UAAM,SAAS,iBAAiB,YAAY,UAAU;AACtD,UAAM,QAAQ,UAAU,aAAa,GAAG,IACpC,IACA,YAAY,oBAAoB,IAC9B,IACA,KAAK,IAAI,GAAG,YAAY,oBAAoB,YAAY,gBAAgB;AAC9E,iBAAa;AACb,gBAAY,SAAS;AAAA,EACvB;AACA,SAAO,QAAQ,WAAW,SAAS;AACrC;AAEA,SAAS,qBAAqB,aAAmC,KAAoB;AACnF,SAAO,UAAU,aAAa,GAAG,KAAK,YAAY,oBAAoB,YAAY;AACpF;AAEA,SAAS,UAAU,aAAmC,KAAoB;AACxE,MAAI,CAAC,YAAY,WAAY,QAAO;AACpC,QAAM,WAAW,KAAK,MAAM,YAAY,UAAU;AAClD,MAAI,CAAC,OAAO,SAAS,QAAQ,EAAG,QAAO;AACvC,SAAO,YAAY,IAAI,QAAQ;AACjC;AAEA,SAAS,cAAc,aAA4C;AACjE,SACE,YAAY,eAAe,cAC3B,YAAY,mBAAmB,WAC/B,YAAY,gBAAgB;AAEhC;AAEA,SAAS,wBACP,UACA,aAC4B;AAC5B,QAAM,OAAO,SAAS,SAAS,IAAI,WAAW;AAC9C,MAAI,KAAK,WAAW,EAAG,QAAO;AAC9B,MAAI,KAAK,KAAK,CAAC,QAAQ,IAAI,oBAAoB,cAAc,IAAI,mBAAmB,KAAK;AACvF,WAAO;AACT,MAAI,KAAK,KAAK,CAAC,QAAQ,IAAI,oBAAoB,iBAAiB,EAAG,QAAO;AAC1E,MACE,KAAK;AAAA,IACH,CAAC,QAAQ,IAAI,oBAAoB,kBAAkB,IAAI,oBAAoB;AAAA,EAC7E;AAEA,WAAO;AACT,MAAI,KAAK,KAAK,CAAC,QAAQ,IAAI,oBAAoB,YAAY,EAAG,QAAO;AACrE,MAAI,KAAK,KAAK,CAAC,QAAQ,IAAI,oBAAoB,eAAe,EAAG,QAAO;AACxE,MAAI,YAAY,KAAK,CAAC,QAAQ,IAAI,eAAe,MAAM,EAAG,QAAO;AACjE,SAAO;AACT;AAEA,SAAS,SAAS,MAAoE;AACpF,MAAI,SAAS,0BAA0B,SAAS,gBAAiB,QAAO;AACxE,SAAO;AACT;AAEA,SAAS,mBACP,MACA,cACQ;AACR,QAAM,SAAS,aAAa,IAAI,CAAC,MAAM,EAAE,WAAW,EAAE,KAAK,IAAI;AAC/D,MAAI,SAAS,WAAY,QAAO,qBAAqB,MAAM;AAC3D,MAAI,SAAS,aAAc,QAAO,4CAA4C,MAAM;AACpF,MAAI,SAAS,kBAAmB,QAAO,oCAAoC,MAAM;AACjF,MAAI,SAAS,eAAgB,QAAO,mCAAmC,MAAM;AAC7E,MAAI,SAAS,cAAe,QAAO,kCAAkC,MAAM;AAC3E,SAAO,mCAAmC,MAAM;AAClD;AAEA,SAAS,UAAU,aAA2C;AAC5D,MAAI,YAAY,mBAAmB,QAAS,QAAO;AACnD,MAAI,YAAY,mBAAmB;AACjC,WAAO;AACT,MAAI,YAAY,mBAAmB;AACjC,WAAO;AACT,SAAO;AACT;AAEA,SAAS,cAAc,QAAoD;AACzE,QAAM,QAA+B,CAAC,YAAY,QAAQ,UAAU,KAAK;AACzE,SAAO,MAAM,KAAK,CAAC,UAAU,OAAO,SAAS,KAAK,CAAC,KAAK;AAC1D;AAEA,SAAS,iBAAiB,YAAyC;AACjE,MAAI,eAAe,WAAY,QAAO;AACtC,MAAI,eAAe,OAAQ,QAAO;AAClC,MAAI,eAAe,SAAU,QAAO;AACpC,SAAO;AACT;AAEA,SAAS,QAAQ,OAAuB;AACtC,MAAI,CAAC,OAAO,SAAS,KAAK,EAAG,QAAO;AACpC,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,KAAK,CAAC;AACvC;AAEA,SAAS,OAAU,OAAiB;AAClC,SAAO,CAAC,GAAG,IAAI,IAAI,KAAK,CAAC;AAC3B;","names":[]}
@@ -152,95 +152,7 @@ function makeRng(seed) {
152
152
  };
153
153
  }
154
154
 
155
- // src/sequential.ts
156
- function pairedEvalueSequence(deltas, opts = {}) {
157
- const c = opts.bound ?? 1;
158
- const alpha = opts.alpha ?? 0.05;
159
- const initialShrink = opts.initialBetShrinkage ?? 0.5;
160
- const rope = opts.rope ?? null;
161
- if (c <= 0) throw new Error("pairedEvalueSequence: bound must be > 0");
162
- if (alpha <= 0 || alpha >= 1) throw new Error("pairedEvalueSequence: alpha must be in (0,1)");
163
- if (rope && !(Number.isFinite(rope.low) && Number.isFinite(rope.high) && rope.low <= rope.high)) {
164
- throw new Error("pairedEvalueSequence: rope must satisfy low \u2264 high");
165
- }
166
- const steps = [];
167
- let clipped = false;
168
- let evalue = 1;
169
- let decisionFiredAt = null;
170
- let sum = 0;
171
- let sumSq = 0;
172
- let count = 0;
173
- for (let i = 0; i < deltas.length; i++) {
174
- let d = deltas[i];
175
- if (d < -c || d > c) {
176
- d = Math.max(-c, Math.min(c, d));
177
- clipped = true;
178
- }
179
- const muHat = count === 0 ? 0 : sum / count;
180
- const varHat = count === 0 ? c * c : Math.max(1e-12, sumSq / count - muHat * muHat);
181
- const t = i + 1;
182
- const shrink = initialShrink * Math.min(1, count / 32);
183
- let lambda = muHat / (varHat + c * c) * shrink;
184
- const lambdaMax = 0.99 / c;
185
- if (lambda > lambdaMax) lambda = lambdaMax;
186
- if (lambda < -lambdaMax) lambda = -lambdaMax;
187
- evalue = evalue * (1 + lambda * d);
188
- if (!Number.isFinite(evalue) || evalue < 0) evalue = 0;
189
- sum += d;
190
- sumSq += d * d;
191
- count += 1;
192
- const pValue = Math.min(1, 1 / Math.max(evalue, 1e-300));
193
- const cs = empiricalBernsteinCs(sum, sumSq, count, c, alpha);
194
- let decision = "continue";
195
- if (rope && cs.low >= rope.low && cs.high <= rope.high) decision = "equivalent";
196
- else if (evalue >= 2 / alpha && muHat > 0) decision = "promote_now";
197
- else if (evalue >= 2 / alpha && muHat < 0) decision = "reject_now";
198
- else if (rope && cs.high < rope.low) decision = "reject_now";
199
- if (decision !== "continue" && decisionFiredAt === null) decisionFiredAt = t;
200
- steps.push({ t, delta: d, evalue, pValue, csLow: cs.low, csHigh: cs.high, decision });
201
- }
202
- const finalDecision = steps.length === 0 ? "continue" : steps[steps.length - 1].decision;
203
- return { steps, finalDecision, decisionFiredAt, clipped };
204
- }
205
- function evaluateInterimReleaseConfidence(input) {
206
- const candidates = input.deltaSeries.map((s) => {
207
- const seq = pairedEvalueSequence(s.deltas, {
208
- alpha: input.alpha,
209
- bound: input.bound,
210
- rope: input.rope
211
- });
212
- const last = seq.steps[seq.steps.length - 1];
213
- return {
214
- candidateId: s.candidateId,
215
- decision: seq.finalDecision,
216
- decisionFiredAt: seq.decisionFiredAt,
217
- finalEvalue: last?.evalue ?? 1,
218
- finalPValue: last?.pValue ?? 1,
219
- pairs: seq.steps.length,
220
- csLow: last?.csLow ?? Number.NEGATIVE_INFINITY,
221
- csHigh: last?.csHigh ?? Number.POSITIVE_INFINITY
222
- };
223
- });
224
- const promote = candidates.find((c) => c.decision === "promote_now");
225
- if (promote) return { candidates, recommendation: { decision: "promote_now", candidateId: promote.candidateId } };
226
- const live = candidates.find((c) => c.decision === "continue");
227
- if (live) return { candidates, recommendation: { decision: "continue", candidateId: null } };
228
- const equiv = candidates.find((c) => c.decision === "equivalent");
229
- if (equiv) return { candidates, recommendation: { decision: "equivalent", candidateId: equiv.candidateId } };
230
- return { candidates, recommendation: { decision: "reject_now", candidateId: null } };
231
- }
232
- function empiricalBernsteinCs(sum, sumSq, n, bound, alpha) {
233
- if (n === 0) return { low: -bound, high: bound };
234
- const mean = sum / n;
235
- const variance = Math.max(0, sumSq / n - mean * mean);
236
- const psi = Math.log(2 / alpha) + 1.7 * Math.log(Math.log(Math.max(Math.E, n)) + 1);
237
- const radius = Math.sqrt(2 * variance * psi / n) + 3 * bound * psi / n;
238
- return { low: mean - radius, high: mean + radius };
239
- }
240
-
241
155
  export {
242
- rubricPredictiveValidity,
243
- pairedEvalueSequence,
244
- evaluateInterimReleaseConfidence
156
+ rubricPredictiveValidity
245
157
  };
246
- //# sourceMappingURL=chunk-AXHNWLIX.js.map
158
+ //# sourceMappingURL=chunk-YRZ4M5GS.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/meta-eval/rubric-predictive-validity.ts"],"sourcesContent":["/**\n * Rubric predictive validity — does our eval rubric predict deployment\n * outcomes?\n *\n * `correlationStudy` (already in this package) joins a `TraceStore` to an\n * `OutcomeStore` and computes Pearson + Spearman + bootstrap CI for each\n * (eval-metric, outcome-metric) pair. That answers \"does X correlate with\n * Y at all.\" `rubricPredictiveValidity` is the campaign-shaped wrapper\n * around it: take a sequence of `RunRecord`s (the canonical campaign\n * artifact) and a `DeploymentOutcomeStore`, join on `runId`, return a\n * ranked verdict on every rubric whose dimension scores were captured in\n * `outcome.raw`.\n *\n * The point — quoting the methodology doc — is that **without this loop\n * every rubric is faith-based**. Once it's wired, you know which rubrics\n * have earned their promotion power and which ones are decoration.\n *\n * const validity = await rubricPredictiveValidity({\n * runs: lastQuarter,\n * outcomes: shipFlagOutcomeStore,\n * outcomeMetrics: ['revenue_lift', 'retention_30d', 'csat'],\n * rubrics: ['anti_slop', 'semantic_concept', 'tool_recovery'],\n * })\n * for (const r of validity.ranked) {\n * console.log(`${r.rubric} → ${r.bestOutcome}: ρ=${r.spearman.toFixed(2)}`)\n * }\n *\n * The function is intentionally read-only. Use the verdict to deprecate\n * decorative rubrics, re-weight composite scores, or trigger a\n * recalibration sweep when predictive validity drops below a threshold.\n */\n\nimport type { RunRecord } from '../run-record'\nimport type { DeploymentOutcome, OutcomeStore } from './outcome-store'\n\nexport interface RubricPredictiveValidityInput {\n /**\n * Canonical campaign output. Each record's `outcome.raw[<rubricId>]`\n * provides the eval score; missing keys are silently skipped per pair.\n */\n runs: RunRecord[]\n outcomes: OutcomeStore\n /**\n * Outcome metric names to evaluate against. Each must appear in at\n * least one `DeploymentOutcome.metrics` keyspace; pairs with too few\n * joined samples are excluded from the result.\n */\n outcomeMetrics: string[]\n /**\n * Rubric ids to evaluate. Must appear as keys in `RunRecord.outcome.raw`.\n * If omitted, every numeric key in `outcome.raw` across the run set is\n * treated as a rubric.\n */\n rubrics?: string[]\n /** Minimum joined-sample count before a pair is reported. Default 8. */\n minSamples?: number\n /** Bootstrap resamples for CI. Default 500. */\n bootstrapResamples?: number\n /** Random seed for the bootstrap (mulberry32). Default unset (Math.random). */\n seed?: number\n /**\n * Reduction when multiple outcomes attach to one runId. Default `'latest'`\n * (most recently captured).\n */\n reduction?: 'latest' | 'mean' | 'max'\n}\n\nexport interface RubricOutcomePair {\n rubric: string\n outcome: string\n n: number\n pearson: number\n spearman: number\n ci95: { low: number; high: number }\n /**\n * Verdict bucket. `load_bearing` ≥ 0.7, `informative` ≥ 0.4,\n * `decorative` < 0.4 in absolute correlation. A negative correlation\n * with a desired outcome is also `decorative` — actively misleading\n * is worse than uninformative.\n */\n verdict: 'load_bearing' | 'informative' | 'decorative'\n}\n\nexport interface RubricRanking {\n rubric: string\n /** Outcome metric this rubric correlated best with. */\n bestOutcome: string\n spearman: number\n pearson: number\n n: number\n verdict: RubricOutcomePair['verdict']\n}\n\nexport interface RubricPredictiveValidityReport {\n pairs: RubricOutcomePair[]\n /** Per-rubric best pair, sorted descending by |spearman|. */\n ranked: RubricRanking[]\n joinedSamples: number\n skippedRuns: number\n /** Rubrics that were declared but never produced a usable score. */\n rubricsWithoutData: string[]\n}\n\nexport async function rubricPredictiveValidity(\n input: RubricPredictiveValidityInput,\n): Promise<RubricPredictiveValidityReport> {\n const minSamples = input.minSamples ?? 8\n const reduction = input.reduction ?? 'latest'\n const resamples = input.bootstrapResamples ?? 500\n const rng = makeRng(input.seed)\n\n const outcomes = await input.outcomes.list()\n const outcomesByRun = new Map<string, DeploymentOutcome[]>()\n for (const o of outcomes) {\n const arr = outcomesByRun.get(o.runId) ?? []\n arr.push(o)\n outcomesByRun.set(o.runId, arr)\n }\n\n // Discover rubrics: caller-declared OR every numeric key in outcome.raw\n // observed across runs.\n const observedRubrics = new Set<string>()\n for (const r of input.runs) {\n for (const k of Object.keys(r.outcome.raw)) observedRubrics.add(k)\n }\n const rubrics = input.rubrics ?? [...observedRubrics]\n\n // Collect aligned (x, y) pairs per (rubric, outcome).\n type Bucket = { rubric: string; outcome: string; xs: number[]; ys: number[] }\n const buckets: Bucket[] = []\n for (const r of rubrics) {\n for (const o of input.outcomeMetrics) {\n buckets.push({ rubric: r, outcome: o, xs: [], ys: [] })\n }\n }\n\n let joined = 0\n let skipped = 0\n for (const run of input.runs) {\n const os = outcomesByRun.get(run.runId)\n if (!os || os.length === 0) {\n skipped++\n continue\n }\n let joinedThisRun = false\n for (const r of rubrics) {\n const x = run.outcome.raw[r]\n if (typeof x !== 'number' || !Number.isFinite(x)) continue\n for (const o of input.outcomeMetrics) {\n const values = os\n .map((row) => row.metrics[o])\n .filter((v): v is number => typeof v === 'number' && Number.isFinite(v))\n if (values.length === 0) continue\n const y = reduce(values, os, o, reduction)\n if (y === null) continue\n const bucket = buckets.find((b) => b.rubric === r && b.outcome === o)!\n bucket.xs.push(x)\n bucket.ys.push(y)\n joinedThisRun = true\n }\n }\n if (joinedThisRun) joined++\n }\n\n const pairs: RubricOutcomePair[] = []\n for (const b of buckets) {\n if (b.xs.length < minSamples) continue\n const pearson = pearsonR(b.xs, b.ys)\n const spearman = pearsonR(rankWithTies(b.xs), rankWithTies(b.ys))\n const ci = bootstrapCi(b.xs, b.ys, resamples, rng)\n const verdict: RubricOutcomePair['verdict'] =\n Math.abs(spearman) >= 0.7\n ? 'load_bearing'\n : Math.abs(spearman) >= 0.4\n ? 'informative'\n : 'decorative'\n pairs.push({\n rubric: b.rubric,\n outcome: b.outcome,\n n: b.xs.length,\n pearson,\n spearman,\n ci95: ci,\n verdict,\n })\n }\n\n const byRubric = new Map<string, RubricOutcomePair[]>()\n for (const p of pairs) {\n const arr = byRubric.get(p.rubric) ?? []\n arr.push(p)\n byRubric.set(p.rubric, arr)\n }\n const ranked: RubricRanking[] = [...byRubric.entries()]\n .map(([rubric, ps]) => {\n const best = ps.reduce((a, b) => (Math.abs(b.spearman) > Math.abs(a.spearman) ? b : a))\n return {\n rubric,\n bestOutcome: best.outcome,\n spearman: best.spearman,\n pearson: best.pearson,\n n: best.n,\n verdict: best.verdict,\n }\n })\n .sort((a, b) => Math.abs(b.spearman) - Math.abs(a.spearman))\n\n const rubricsWithoutData = rubrics.filter((r) => !byRubric.has(r))\n\n return { pairs, ranked, joinedSamples: joined, skippedRuns: skipped, rubricsWithoutData }\n}\n\n// ── Helpers ──────────────────────────────────────────────────────────────\n\nfunction reduce(\n values: number[],\n outcomes: DeploymentOutcome[],\n metric: string,\n kind: 'latest' | 'mean' | 'max',\n): number | null {\n if (values.length === 0) return null\n if (kind === 'mean') return values.reduce((s, v) => s + v, 0) / values.length\n if (kind === 'max') return Math.max(...values)\n // 'latest'\n const sorted = [...outcomes]\n .filter((o) => typeof o.metrics[metric] === 'number')\n .sort((a, b) => b.capturedAt - a.capturedAt)\n return sorted[0]?.metrics[metric] ?? null\n}\n\nfunction pearsonR(a: number[], b: number[]): number {\n if (a.length !== b.length || a.length < 2) return Number.NaN\n const ma = a.reduce((s, v) => s + v, 0) / a.length\n const mb = b.reduce((s, v) => s + v, 0) / b.length\n let num = 0,\n da = 0,\n db = 0\n for (let i = 0; i < a.length; i++) {\n const xa = a[i]! - ma\n const xb = b[i]! - mb\n num += xa * xb\n da += xa * xa\n db += xb * xb\n }\n if (da === 0 || db === 0) return da === 0 && db === 0 ? 1 : 0\n return num / Math.sqrt(da * db)\n}\n\nfunction rankWithTies(xs: number[]): number[] {\n const indexed = xs.map((v, i) => ({ v, i })).sort((a, b) => a.v - b.v)\n const r = new Array<number>(xs.length)\n for (let i = 0; i < indexed.length; ) {\n let j = i\n while (j + 1 < indexed.length && indexed[j + 1]!.v === indexed[i]!.v) j++\n const avg = (i + j + 2) / 2\n for (let k = i; k <= j; k++) r[indexed[k]!.i] = avg\n i = j + 1\n }\n return r\n}\n\nfunction bootstrapCi(\n xs: number[],\n ys: number[],\n iterations: number,\n rng: () => number,\n): { low: number; high: number } {\n const n = xs.length\n if (n < 3) return { low: Number.NaN, high: Number.NaN }\n const samples: number[] = []\n for (let b = 0; b < iterations; b++) {\n const rx = new Array<number>(n)\n const ry = new Array<number>(n)\n for (let i = 0; i < n; i++) {\n const idx = Math.floor(rng() * n)\n rx[i] = xs[idx]!\n ry[i] = ys[idx]!\n }\n const r = pearsonR(rx, ry)\n if (Number.isFinite(r)) samples.push(r)\n }\n samples.sort((a, b) => a - b)\n if (samples.length === 0) return { low: Number.NaN, high: Number.NaN }\n return {\n low: samples[Math.floor(0.025 * samples.length)]!,\n high: samples[Math.min(samples.length - 1, Math.floor(0.975 * samples.length))]!,\n }\n}\n\nfunction makeRng(seed?: number): () => number {\n if (seed === undefined) return Math.random\n let s = seed >>> 0\n return () => {\n s = (s + 0x6d2b79f5) >>> 0\n let t = s\n t = Math.imul(t ^ (t >>> 15), t | 1)\n t ^= t + Math.imul(t ^ (t >>> 7), t | 61)\n return ((t ^ (t >>> 14)) >>> 0) / 4294967296\n }\n}\n"],"mappings":";AAuGA,eAAsB,yBACpB,OACyC;AACzC,QAAM,aAAa,MAAM,cAAc;AACvC,QAAM,YAAY,MAAM,aAAa;AACrC,QAAM,YAAY,MAAM,sBAAsB;AAC9C,QAAM,MAAM,QAAQ,MAAM,IAAI;AAE9B,QAAM,WAAW,MAAM,MAAM,SAAS,KAAK;AAC3C,QAAM,gBAAgB,oBAAI,IAAiC;AAC3D,aAAW,KAAK,UAAU;AACxB,UAAM,MAAM,cAAc,IAAI,EAAE,KAAK,KAAK,CAAC;AAC3C,QAAI,KAAK,CAAC;AACV,kBAAc,IAAI,EAAE,OAAO,GAAG;AAAA,EAChC;AAIA,QAAM,kBAAkB,oBAAI,IAAY;AACxC,aAAW,KAAK,MAAM,MAAM;AAC1B,eAAW,KAAK,OAAO,KAAK,EAAE,QAAQ,GAAG,EAAG,iBAAgB,IAAI,CAAC;AAAA,EACnE;AACA,QAAM,UAAU,MAAM,WAAW,CAAC,GAAG,eAAe;AAIpD,QAAM,UAAoB,CAAC;AAC3B,aAAW,KAAK,SAAS;AACvB,eAAW,KAAK,MAAM,gBAAgB;AACpC,cAAQ,KAAK,EAAE,QAAQ,GAAG,SAAS,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,EAAE,CAAC;AAAA,IACxD;AAAA,EACF;AAEA,MAAI,SAAS;AACb,MAAI,UAAU;AACd,aAAW,OAAO,MAAM,MAAM;AAC5B,UAAM,KAAK,cAAc,IAAI,IAAI,KAAK;AACtC,QAAI,CAAC,MAAM,GAAG,WAAW,GAAG;AAC1B;AACA;AAAA,IACF;AACA,QAAI,gBAAgB;AACpB,eAAW,KAAK,SAAS;AACvB,YAAM,IAAI,IAAI,QAAQ,IAAI,CAAC;AAC3B,UAAI,OAAO,MAAM,YAAY,CAAC,OAAO,SAAS,CAAC,EAAG;AAClD,iBAAW,KAAK,MAAM,gBAAgB;AACpC,cAAM,SAAS,GACZ,IAAI,CAAC,QAAQ,IAAI,QAAQ,CAAC,CAAC,EAC3B,OAAO,CAAC,MAAmB,OAAO,MAAM,YAAY,OAAO,SAAS,CAAC,CAAC;AACzE,YAAI,OAAO,WAAW,EAAG;AACzB,cAAM,IAAI,OAAO,QAAQ,IAAI,GAAG,SAAS;AACzC,YAAI,MAAM,KAAM;AAChB,cAAM,SAAS,QAAQ,KAAK,CAAC,MAAM,EAAE,WAAW,KAAK,EAAE,YAAY,CAAC;AACpE,eAAO,GAAG,KAAK,CAAC;AAChB,eAAO,GAAG,KAAK,CAAC;AAChB,wBAAgB;AAAA,MAClB;AAAA,IACF;AACA,QAAI,cAAe;AAAA,EACrB;AAEA,QAAM,QAA6B,CAAC;AACpC,aAAW,KAAK,SAAS;AACvB,QAAI,EAAE,GAAG,SAAS,WAAY;AAC9B,UAAM,UAAU,SAAS,EAAE,IAAI,EAAE,EAAE;AACnC,UAAM,WAAW,SAAS,aAAa,EAAE,EAAE,GAAG,aAAa,EAAE,EAAE,CAAC;AAChE,UAAM,KAAK,YAAY,EAAE,IAAI,EAAE,IAAI,WAAW,GAAG;AACjD,UAAM,UACJ,KAAK,IAAI,QAAQ,KAAK,MAClB,iBACA,KAAK,IAAI,QAAQ,KAAK,MACpB,gBACA;AACR,UAAM,KAAK;AAAA,MACT,QAAQ,EAAE;AAAA,MACV,SAAS,EAAE;AAAA,MACX,GAAG,EAAE,GAAG;AAAA,MACR;AAAA,MACA;AAAA,MACA,MAAM;AAAA,MACN;AAAA,IACF,CAAC;AAAA,EACH;AAEA,QAAM,WAAW,oBAAI,IAAiC;AACtD,aAAW,KAAK,OAAO;AACrB,UAAM,MAAM,SAAS,IAAI,EAAE,MAAM,KAAK,CAAC;AACvC,QAAI,KAAK,CAAC;AACV,aAAS,IAAI,EAAE,QAAQ,GAAG;AAAA,EAC5B;AACA,QAAM,SAA0B,CAAC,GAAG,SAAS,QAAQ,CAAC,EACnD,IAAI,CAAC,CAAC,QAAQ,EAAE,MAAM;AACrB,UAAM,OAAO,GAAG,OAAO,CAAC,GAAG,MAAO,KAAK,IAAI,EAAE,QAAQ,IAAI,KAAK,IAAI,EAAE,QAAQ,IAAI,IAAI,CAAE;AACtF,WAAO;AAAA,MACL;AAAA,MACA,aAAa,KAAK;AAAA,MAClB,UAAU,KAAK;AAAA,MACf,SAAS,KAAK;AAAA,MACd,GAAG,KAAK;AAAA,MACR,SAAS,KAAK;AAAA,IAChB;AAAA,EACF,CAAC,EACA,KAAK,CAAC,GAAG,MAAM,KAAK,IAAI,EAAE,QAAQ,IAAI,KAAK,IAAI,EAAE,QAAQ,CAAC;AAE7D,QAAM,qBAAqB,QAAQ,OAAO,CAAC,MAAM,CAAC,SAAS,IAAI,CAAC,CAAC;AAEjE,SAAO,EAAE,OAAO,QAAQ,eAAe,QAAQ,aAAa,SAAS,mBAAmB;AAC1F;AAIA,SAAS,OACP,QACA,UACA,QACA,MACe;AACf,MAAI,OAAO,WAAW,EAAG,QAAO;AAChC,MAAI,SAAS,OAAQ,QAAO,OAAO,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,OAAO;AACvE,MAAI,SAAS,MAAO,QAAO,KAAK,IAAI,GAAG,MAAM;AAE7C,QAAM,SAAS,CAAC,GAAG,QAAQ,EACxB,OAAO,CAAC,MAAM,OAAO,EAAE,QAAQ,MAAM,MAAM,QAAQ,EACnD,KAAK,CAAC,GAAG,MAAM,EAAE,aAAa,EAAE,UAAU;AAC7C,SAAO,OAAO,CAAC,GAAG,QAAQ,MAAM,KAAK;AACvC;AAEA,SAAS,SAAS,GAAa,GAAqB;AAClD,MAAI,EAAE,WAAW,EAAE,UAAU,EAAE,SAAS,EAAG,QAAO,OAAO;AACzD,QAAM,KAAK,EAAE,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE;AAC5C,QAAM,KAAK,EAAE,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE;AAC5C,MAAI,MAAM,GACR,KAAK,GACL,KAAK;AACP,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK;AACjC,UAAM,KAAK,EAAE,CAAC,IAAK;AACnB,UAAM,KAAK,EAAE,CAAC,IAAK;AACnB,WAAO,KAAK;AACZ,UAAM,KAAK;AACX,UAAM,KAAK;AAAA,EACb;AACA,MAAI,OAAO,KAAK,OAAO,EAAG,QAAO,OAAO,KAAK,OAAO,IAAI,IAAI;AAC5D,SAAO,MAAM,KAAK,KAAK,KAAK,EAAE;AAChC;AAEA,SAAS,aAAa,IAAwB;AAC5C,QAAM,UAAU,GAAG,IAAI,CAAC,GAAG,OAAO,EAAE,GAAG,EAAE,EAAE,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,IAAI,EAAE,CAAC;AACrE,QAAM,IAAI,IAAI,MAAc,GAAG,MAAM;AACrC,WAAS,IAAI,GAAG,IAAI,QAAQ,UAAU;AACpC,QAAI,IAAI;AACR,WAAO,IAAI,IAAI,QAAQ,UAAU,QAAQ,IAAI,CAAC,EAAG,MAAM,QAAQ,CAAC,EAAG,EAAG;AACtE,UAAM,OAAO,IAAI,IAAI,KAAK;AAC1B,aAAS,IAAI,GAAG,KAAK,GAAG,IAAK,GAAE,QAAQ,CAAC,EAAG,CAAC,IAAI;AAChD,QAAI,IAAI;AAAA,EACV;AACA,SAAO;AACT;AAEA,SAAS,YACP,IACA,IACA,YACA,KAC+B;AAC/B,QAAM,IAAI,GAAG;AACb,MAAI,IAAI,EAAG,QAAO,EAAE,KAAK,OAAO,KAAK,MAAM,OAAO,IAAI;AACtD,QAAM,UAAoB,CAAC;AAC3B,WAAS,IAAI,GAAG,IAAI,YAAY,KAAK;AACnC,UAAM,KAAK,IAAI,MAAc,CAAC;AAC9B,UAAM,KAAK,IAAI,MAAc,CAAC;AAC9B,aAAS,IAAI,GAAG,IAAI,GAAG,KAAK;AAC1B,YAAM,MAAM,KAAK,MAAM,IAAI,IAAI,CAAC;AAChC,SAAG,CAAC,IAAI,GAAG,GAAG;AACd,SAAG,CAAC,IAAI,GAAG,GAAG;AAAA,IAChB;AACA,UAAM,IAAI,SAAS,IAAI,EAAE;AACzB,QAAI,OAAO,SAAS,CAAC,EAAG,SAAQ,KAAK,CAAC;AAAA,EACxC;AACA,UAAQ,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAC5B,MAAI,QAAQ,WAAW,EAAG,QAAO,EAAE,KAAK,OAAO,KAAK,MAAM,OAAO,IAAI;AACrE,SAAO;AAAA,IACL,KAAK,QAAQ,KAAK,MAAM,QAAQ,QAAQ,MAAM,CAAC;AAAA,IAC/C,MAAM,QAAQ,KAAK,IAAI,QAAQ,SAAS,GAAG,KAAK,MAAM,QAAQ,QAAQ,MAAM,CAAC,CAAC;AAAA,EAChF;AACF;AAEA,SAAS,QAAQ,MAA6B;AAC5C,MAAI,SAAS,OAAW,QAAO,KAAK;AACpC,MAAI,IAAI,SAAS;AACjB,SAAO,MAAM;AACX,QAAK,IAAI,eAAgB;AACzB,QAAI,IAAI;AACR,QAAI,KAAK,KAAK,IAAK,MAAM,IAAK,IAAI,CAAC;AACnC,SAAK,IAAI,KAAK,KAAK,IAAK,MAAM,GAAI,IAAI,EAAE;AACxC,aAAS,IAAK,MAAM,QAAS,KAAK;AAAA,EACpC;AACF;","names":[]}