@tangle-network/agent-eval 0.23.1 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/CHANGELOG.md +145 -0
  2. package/README.md +212 -79
  3. package/dist/baseline-4R5deP0N.d.ts +108 -0
  4. package/dist/benchmarks/index.d.ts +3 -2
  5. package/dist/benchmarks/index.js +1 -1
  6. package/dist/builder-eval/index.d.ts +249 -0
  7. package/dist/builder-eval/index.js +391 -0
  8. package/dist/builder-eval/index.js.map +1 -0
  9. package/dist/{chunk-IOXMGMHQ.js → chunk-2A5XJB43.js} +142 -318
  10. package/dist/chunk-2A5XJB43.js.map +1 -0
  11. package/dist/chunk-47X6LRCE.js +76 -0
  12. package/dist/chunk-47X6LRCE.js.map +1 -0
  13. package/dist/{chunk-6M774GY6.js → chunk-4F5DQN55.js} +1 -1
  14. package/dist/chunk-4F5DQN55.js.map +1 -0
  15. package/dist/{chunk-KAO3Q65R.js → chunk-4S4BM3QQ.js} +15 -13
  16. package/dist/chunk-4S4BM3QQ.js.map +1 -0
  17. package/dist/chunk-5BKGXME7.js +65 -0
  18. package/dist/chunk-5BKGXME7.js.map +1 -0
  19. package/dist/{chunk-6KQG5HAH.js → chunk-5LBB5B3Z.js} +376 -72
  20. package/dist/chunk-5LBB5B3Z.js.map +1 -0
  21. package/dist/{chunk-42I2QC2L.js → chunk-6QDKWHLS.js} +18 -14
  22. package/dist/chunk-6QDKWHLS.js.map +1 -0
  23. package/dist/{chunk-VQQSPGSM.js → chunk-EDUKQ5AM.js} +247 -189
  24. package/dist/chunk-EDUKQ5AM.js.map +1 -0
  25. package/dist/chunk-I4MBDTY5.js +272 -0
  26. package/dist/chunk-I4MBDTY5.js.map +1 -0
  27. package/dist/chunk-JLZQWFV3.js +618 -0
  28. package/dist/chunk-JLZQWFV3.js.map +1 -0
  29. package/dist/chunk-K2TPS5LB.js +569 -0
  30. package/dist/chunk-K2TPS5LB.js.map +1 -0
  31. package/dist/chunk-KKHDIONI.js +414 -0
  32. package/dist/chunk-KKHDIONI.js.map +1 -0
  33. package/dist/chunk-KMPRBJK4.js +74 -0
  34. package/dist/chunk-KMPRBJK4.js.map +1 -0
  35. package/dist/{chunk-QUKKGHTZ.js → chunk-KTGTIOFD.js} +6 -3
  36. package/dist/chunk-KTGTIOFD.js.map +1 -0
  37. package/dist/chunk-LSH4MMOZ.js +838 -0
  38. package/dist/chunk-LSH4MMOZ.js.map +1 -0
  39. package/dist/chunk-NG236HPC.js +57 -0
  40. package/dist/chunk-NG236HPC.js.map +1 -0
  41. package/dist/{chunk-QBW3YBTR.js → chunk-NLMNWKVM.js} +14 -6
  42. package/dist/chunk-NLMNWKVM.js.map +1 -0
  43. package/dist/chunk-NU65VQ7M.js +99 -0
  44. package/dist/chunk-NU65VQ7M.js.map +1 -0
  45. package/dist/chunk-OWLAAMME.js +250 -0
  46. package/dist/chunk-OWLAAMME.js.map +1 -0
  47. package/dist/{chunk-SQQLHODJ.js → chunk-PC4UYEBM.js} +7 -4
  48. package/dist/chunk-PC4UYEBM.js.map +1 -0
  49. package/dist/{chunk-7EAUOUQS.js → chunk-RAF443UI.js} +213 -115
  50. package/dist/chunk-RAF443UI.js.map +1 -0
  51. package/dist/chunk-RZTMDUO7.js +49 -0
  52. package/dist/chunk-RZTMDUO7.js.map +1 -0
  53. package/dist/{chunk-EXGR4XEM.js → chunk-SESZDQPX.js} +23 -19
  54. package/dist/chunk-SESZDQPX.js.map +1 -0
  55. package/dist/{chunk-5IIQKMD5.js → chunk-TVVP3ZZQ.js} +14 -4
  56. package/dist/chunk-TVVP3ZZQ.js.map +1 -0
  57. package/dist/chunk-WWYCWKUM.js +196 -0
  58. package/dist/chunk-WWYCWKUM.js.map +1 -0
  59. package/dist/{chunk-AXHNWLIX.js → chunk-YRZ4M5GS.js} +2 -90
  60. package/dist/chunk-YRZ4M5GS.js.map +1 -0
  61. package/dist/chunk-ZN274SWR.js +613 -0
  62. package/dist/chunk-ZN274SWR.js.map +1 -0
  63. package/dist/cli.js +10 -6
  64. package/dist/cli.js.map +1 -1
  65. package/dist/{control-DvkH87qJ.d.ts → control-CBShYYA6.d.ts} +32 -33
  66. package/dist/control-runtime-BuJHoLg0.d.ts +180 -0
  67. package/dist/control.d.ts +8 -6
  68. package/dist/control.js +10 -7
  69. package/dist/{dataset-B9qvlm_o.d.ts → dataset-CiK_3LDr.d.ts} +5 -2
  70. package/dist/{emitter-B2XqDKFU.d.ts → emitter-DP_cSSiw.d.ts} +1 -1
  71. package/dist/errors-BZ9sTdz7.d.ts +70 -0
  72. package/dist/failure-cluster-C2EGSDiT.d.ts +76 -0
  73. package/dist/feedback-trajectory-DfFdrraJ.d.ts +169 -0
  74. package/dist/governance/index.d.ts +5 -0
  75. package/dist/governance/index.js +18 -0
  76. package/dist/governance/index.js.map +1 -0
  77. package/dist/{index-DDTlbHEK.d.ts → index--fVrWDiR.d.ts} +1 -1
  78. package/dist/index-Oj9fAPPN.d.ts +270 -0
  79. package/dist/index.d.ts +2018 -3003
  80. package/dist/index.js +7443 -9102
  81. package/dist/index.js.map +1 -1
  82. package/dist/{integrity-Cr5YodSY.d.ts → integrity-DK2EBVZC.d.ts} +4 -3
  83. package/dist/knowledge/index.d.ts +102 -0
  84. package/dist/knowledge/index.js +18 -0
  85. package/dist/knowledge/index.js.map +1 -0
  86. package/dist/meta-eval/index.d.ts +99 -0
  87. package/dist/meta-eval/index.js +324 -0
  88. package/dist/meta-eval/index.js.map +1 -0
  89. package/dist/multi-layer-verifier-LkP3LVKj.d.ts +141 -0
  90. package/dist/openapi.json +491 -1
  91. package/dist/optimization.d.ts +11 -8
  92. package/dist/optimization.js +11 -9
  93. package/dist/outcome-store-D6KWmYvj.d.ts +63 -0
  94. package/dist/pipelines/index.d.ts +172 -0
  95. package/dist/pipelines/index.js +345 -0
  96. package/dist/pipelines/index.js.map +1 -0
  97. package/dist/prm/index.d.ts +99 -0
  98. package/dist/prm/index.js +222 -0
  99. package/dist/prm/index.js.map +1 -0
  100. package/dist/query-DODUYdPg.d.ts +30 -0
  101. package/dist/release-report-BNgMdqPF.d.ts +292 -0
  102. package/dist/replay-BL96gCEP.d.ts +226 -0
  103. package/dist/reporting.d.ts +10 -295
  104. package/dist/reporting.js +10 -6
  105. package/dist/{eval-campaign-Ds5QljIh.d.ts → researcher-BPT8x_NT.d.ts} +148 -146
  106. package/dist/rl.d.ts +1762 -8
  107. package/dist/rl.js +2035 -58
  108. package/dist/rl.js.map +1 -1
  109. package/dist/rubric-D5tjHNJQ.d.ts +72 -0
  110. package/dist/rubric-predictive-validity-C0uDYwG6.d.ts +105 -0
  111. package/dist/{run-record-DNiOMBrZ.d.ts → run-record-CqzahIbx.d.ts} +4 -1
  112. package/dist/sequential-Dgz1n51-.d.ts +139 -0
  113. package/dist/{store-u47QaJ9G.d.ts → store-Db2Bv8Cf.d.ts} +1 -1
  114. package/dist/{summary-report-Ce1r4EYo.d.ts → summary-report-C7VPYEj2.d.ts} +3 -76
  115. package/dist/telemetry/file.js +4 -1
  116. package/dist/telemetry/file.js.map +1 -1
  117. package/dist/telemetry/index.js +57 -57
  118. package/dist/telemetry/index.js.map +1 -1
  119. package/dist/test-graded-scenario-B2kWEdh9.d.ts +146 -0
  120. package/dist/traces.d.ts +142 -387
  121. package/dist/traces.js +1302 -40
  122. package/dist/traces.js.map +1 -1
  123. package/dist/trajectory-CnoBo-JY.d.ts +32 -0
  124. package/dist/wire/index.d.ts +369 -25
  125. package/dist/wire/index.js +22 -3
  126. package/package.json +44 -18
  127. package/dist/chunk-42I2QC2L.js.map +0 -1
  128. package/dist/chunk-5IIQKMD5.js.map +0 -1
  129. package/dist/chunk-6KQG5HAH.js.map +0 -1
  130. package/dist/chunk-6M774GY6.js.map +0 -1
  131. package/dist/chunk-7EAUOUQS.js.map +0 -1
  132. package/dist/chunk-AXHNWLIX.js.map +0 -1
  133. package/dist/chunk-EXGR4XEM.js.map +0 -1
  134. package/dist/chunk-IOXMGMHQ.js.map +0 -1
  135. package/dist/chunk-KAO3Q65R.js.map +0 -1
  136. package/dist/chunk-LZKIOBG2.js +0 -2026
  137. package/dist/chunk-LZKIOBG2.js.map +0 -1
  138. package/dist/chunk-QBW3YBTR.js.map +0 -1
  139. package/dist/chunk-QUKKGHTZ.js.map +0 -1
  140. package/dist/chunk-SQQLHODJ.js.map +0 -1
  141. package/dist/chunk-V5QSWN7L.js +0 -1310
  142. package/dist/chunk-V5QSWN7L.js.map +0 -1
  143. package/dist/chunk-VQQSPGSM.js.map +0 -1
  144. package/dist/chunk-XPHOZPOM.js +0 -1947
  145. package/dist/chunk-XPHOZPOM.js.map +0 -1
  146. package/dist/feedback-trajectory-c43WGtTX.d.ts +0 -346
  147. package/dist/index-ekBXweiQ.d.ts +0 -1894
  148. package/dist/sequential-DgU2mFsE.d.ts +0 -304
@@ -0,0 +1,74 @@
1
+ import {
2
+ buildTrajectory
3
+ } from "./chunk-RZTMDUO7.js";
4
+ import {
5
+ isLlmSpan,
6
+ isToolSpan
7
+ } from "./chunk-5BKGXME7.js";
8
+
9
+ // src/prm/training-export.ts
10
+ async function exportTrainingData(store, graded, options = {}) {
11
+ const window = options.contextWindow ?? 5;
12
+ const out = [];
13
+ for (const g of graded) {
14
+ const trajectory = await buildTrajectory(store, g.runId);
15
+ const spanById = new Map(trajectory.steps.map((s) => [s.span.spanId, s]));
16
+ for (const gs of g.steps) {
17
+ const node = spanById.get(gs.spanId);
18
+ if (!node) continue;
19
+ const idx = trajectory.steps.indexOf(node);
20
+ const priorSpans = trajectory.steps.slice(Math.max(0, idx - window), idx).map((s) => s.span);
21
+ out.push({
22
+ runId: g.runId,
23
+ spanId: gs.spanId,
24
+ rubricId: gs.rubricId,
25
+ score: gs.score,
26
+ context: {
27
+ priorTurns: priorSpans.map(spanToTurn).filter((t) => t !== null),
28
+ step: { kind: node.span.kind, text: spanToText(node.span) }
29
+ },
30
+ rationale: gs.rationale,
31
+ evidence: gs.evidence
32
+ });
33
+ }
34
+ }
35
+ return out;
36
+ }
37
+ function toNdjson(samples) {
38
+ return `${samples.map((s) => JSON.stringify(s)).join("\n")}
39
+ `;
40
+ }
41
+ function spanToTurn(span) {
42
+ if (isLlmSpan(span)) {
43
+ const text = span.output ?? span.messages.map((m) => `${m.role}: ${m.content}`).join("\n");
44
+ return { role: "assistant", content: text };
45
+ }
46
+ if (isToolSpan(span)) {
47
+ return {
48
+ role: "tool",
49
+ content: `${span.toolName}(${safeStringify(span.args)}) \u2192 ${safeStringify(span.result)}`
50
+ };
51
+ }
52
+ return null;
53
+ }
54
+ function spanToText(span) {
55
+ if (isLlmSpan(span)) return span.output ?? "";
56
+ if (isToolSpan(span))
57
+ return `${span.toolName}(${safeStringify(span.args)}) \u2192 ${safeStringify(span.result)}`;
58
+ return span.name;
59
+ }
60
+ function safeStringify(v) {
61
+ if (v === null || v === void 0) return "";
62
+ if (typeof v === "string") return v;
63
+ try {
64
+ return JSON.stringify(v);
65
+ } catch {
66
+ return String(v);
67
+ }
68
+ }
69
+
70
+ export {
71
+ exportTrainingData,
72
+ toNdjson
73
+ };
74
+ //# sourceMappingURL=chunk-KMPRBJK4.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/prm/training-export.ts"],"sourcesContent":["/**\n * Export PRM-graded traces as training data for downstream reward-model\n * fine-tuning. Canonical format is NDJSON of\n * `{ trajectory_text, step_index, rubric, score }` so a small model can\n * learn to predict step rewards from step context.\n *\n * The framework doesn't train the model — we emit the data; callers\n * plug it into their preferred trainer (TRL, Unsloth, custom).\n */\n\nimport type { LlmSpan, Span } from '../trace/schema'\nimport { isLlmSpan, isToolSpan } from '../trace/schema'\nimport type { TraceStore } from '../trace/store'\nimport { buildTrajectory } from '../trajectory'\nimport type { PrmGradedTrace } from './rubric'\n\nexport interface PrmTrainingSample {\n runId: string\n spanId: string\n rubricId: string\n score: number\n /** Serialized step context — step + surrounding conversation. */\n context: {\n priorTurns: Array<{ role: string; content: string }>\n step: { kind: Span['kind']; text: string }\n }\n /** Optional evidence + rationale for auditability. */\n rationale?: string\n evidence?: string\n}\n\nexport async function exportTrainingData(\n store: TraceStore,\n graded: PrmGradedTrace[],\n options: { contextWindow?: number } = {},\n): Promise<PrmTrainingSample[]> {\n const window = options.contextWindow ?? 5\n const out: PrmTrainingSample[] = []\n for (const g of graded) {\n const trajectory = await buildTrajectory(store, g.runId)\n const spanById = new Map(trajectory.steps.map((s) => [s.span.spanId, s]))\n for (const gs of g.steps) {\n const node = spanById.get(gs.spanId)\n if (!node) continue\n const idx = trajectory.steps.indexOf(node)\n const priorSpans = trajectory.steps.slice(Math.max(0, idx - window), idx).map((s) => s.span)\n out.push({\n runId: g.runId,\n spanId: gs.spanId,\n rubricId: gs.rubricId,\n score: gs.score,\n context: {\n priorTurns: priorSpans\n .map(spanToTurn)\n .filter((t): t is { role: string; content: string } => t !== null),\n step: { kind: node.span.kind, text: spanToText(node.span) },\n },\n rationale: gs.rationale,\n evidence: gs.evidence,\n })\n }\n }\n return out\n}\n\n/** NDJSON serialization — write to file or stream directly to a trainer. */\nexport function toNdjson(samples: PrmTrainingSample[]): string {\n return `${samples.map((s) => JSON.stringify(s)).join('\\n')}\\n`\n}\n\nfunction spanToTurn(span: Span): { role: string; content: string } | null {\n if (isLlmSpan(span)) {\n const text = span.output ?? span.messages.map((m) => `${m.role}: ${m.content}`).join('\\n')\n return { role: 'assistant', content: text }\n }\n if (isToolSpan(span)) {\n return {\n role: 'tool',\n content: `${span.toolName}(${safeStringify(span.args)}) → ${safeStringify(span.result)}`,\n }\n }\n return null\n}\n\nfunction spanToText(span: Span): string {\n if (isLlmSpan(span)) return (span as LlmSpan).output ?? ''\n if (isToolSpan(span))\n return `${span.toolName}(${safeStringify(span.args)}) → ${safeStringify(span.result)}`\n return span.name\n}\n\nfunction safeStringify(v: unknown): string {\n if (v === null || v === undefined) return ''\n if (typeof v === 'string') return v\n try {\n return JSON.stringify(v)\n } catch {\n return String(v)\n }\n}\n"],"mappings":";;;;;;;;;AA+BA,eAAsB,mBACpB,OACA,QACA,UAAsC,CAAC,GACT;AAC9B,QAAM,SAAS,QAAQ,iBAAiB;AACxC,QAAM,MAA2B,CAAC;AAClC,aAAW,KAAK,QAAQ;AACtB,UAAM,aAAa,MAAM,gBAAgB,OAAO,EAAE,KAAK;AACvD,UAAM,WAAW,IAAI,IAAI,WAAW,MAAM,IAAI,CAAC,MAAM,CAAC,EAAE,KAAK,QAAQ,CAAC,CAAC,CAAC;AACxE,eAAW,MAAM,EAAE,OAAO;AACxB,YAAM,OAAO,SAAS,IAAI,GAAG,MAAM;AACnC,UAAI,CAAC,KAAM;AACX,YAAM,MAAM,WAAW,MAAM,QAAQ,IAAI;AACzC,YAAM,aAAa,WAAW,MAAM,MAAM,KAAK,IAAI,GAAG,MAAM,MAAM,GAAG,GAAG,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI;AAC3F,UAAI,KAAK;AAAA,QACP,OAAO,EAAE;AAAA,QACT,QAAQ,GAAG;AAAA,QACX,UAAU,GAAG;AAAA,QACb,OAAO,GAAG;AAAA,QACV,SAAS;AAAA,UACP,YAAY,WACT,IAAI,UAAU,EACd,OAAO,CAAC,MAA8C,MAAM,IAAI;AAAA,UACnE,MAAM,EAAE,MAAM,KAAK,KAAK,MAAM,MAAM,WAAW,KAAK,IAAI,EAAE;AAAA,QAC5D;AAAA,QACA,WAAW,GAAG;AAAA,QACd,UAAU,GAAG;AAAA,MACf,CAAC;AAAA,IACH;AAAA,EACF;AACA,SAAO;AACT;AAGO,SAAS,SAAS,SAAsC;AAC7D,SAAO,GAAG,QAAQ,IAAI,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC;AAAA;AAC5D;AAEA,SAAS,WAAW,MAAsD;AACxE,MAAI,UAAU,IAAI,GAAG;AACnB,UAAM,OAAO,KAAK,UAAU,KAAK,SAAS,IAAI,CAAC,MAAM,GAAG,EAAE,IAAI,KAAK,EAAE,OAAO,EAAE,EAAE,KAAK,IAAI;AACzF,WAAO,EAAE,MAAM,aAAa,SAAS,KAAK;AAAA,EAC5C;AACA,MAAI,WAAW,IAAI,GAAG;AACpB,WAAO;AAAA,MACL,MAAM;AAAA,MACN,SAAS,GAAG,KAAK,QAAQ,IAAI,cAAc,KAAK,IAAI,CAAC,YAAO,cAAc,KAAK,MAAM,CAAC;AAAA,IACxF;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,WAAW,MAAoB;AACtC,MAAI,UAAU,IAAI,EAAG,QAAQ,KAAiB,UAAU;AACxD,MAAI,WAAW,IAAI;AACjB,WAAO,GAAG,KAAK,QAAQ,IAAI,cAAc,KAAK,IAAI,CAAC,YAAO,cAAc,KAAK,MAAM,CAAC;AACtF,SAAO,KAAK;AACd;AAEA,SAAS,cAAc,GAAoB;AACzC,MAAI,MAAM,QAAQ,MAAM,OAAW,QAAO;AAC1C,MAAI,OAAO,MAAM,SAAU,QAAO;AAClC,MAAI;AACF,WAAO,KAAK,UAAU,CAAC;AAAA,EACzB,QAAQ;AACN,WAAO,OAAO,CAAC;AAAA,EACjB;AACF;","names":[]}
@@ -1,11 +1,14 @@
1
+ import {
2
+ CaptureIntegrityError
3
+ } from "./chunk-NG236HPC.js";
4
+
1
5
  // src/trace/integrity.ts
2
- var RunIntegrityError = class extends Error {
6
+ var RunIntegrityError = class extends CaptureIntegrityError {
3
7
  constructor(report) {
4
8
  super(
5
9
  `Run ${report.runId} failed integrity check: ${report.issues.map((i) => i.code).join(", ")}`
6
10
  );
7
11
  this.report = report;
8
- this.name = "RunIntegrityError";
9
12
  }
10
13
  report;
11
14
  };
@@ -118,4 +121,4 @@ export {
118
121
  assertRunCaptured,
119
122
  throwIfRunIncomplete
120
123
  };
121
- //# sourceMappingURL=chunk-QUKKGHTZ.js.map
124
+ //# sourceMappingURL=chunk-KTGTIOFD.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/trace/integrity.ts"],"sourcesContent":["/**\n * Run-completion integrity check — at end of run, verify the expected event\n * types were actually captured. The point is the launch-review failure mode:\n * a run *appears* successful but the raw provider events were never written,\n * so a downstream reviewer can't reconstruct what happened.\n *\n * Pattern:\n *\n * const report = await assertRunCaptured(store, runId, {\n * llmSpansMin: 1,\n * judgeSpansMin: 1,\n * rawSink: providerSink, // must have ≥ 1 event for this run\n * requireRawCoverageOfLlmSpans: true, // every llm span has matching raw events\n * })\n * if (!report.ok) throwIfRunIncomplete(report) // or mark run failed and continue\n *\n * The function is read-only on the store and returns a structured report;\n * the caller chooses the failure mode (throw, mark run failed, log warning).\n * `throwIfRunIncomplete` is the convenient strict mode.\n */\n\nimport { CaptureIntegrityError } from '../errors'\nimport type { RawProviderSink } from './raw-provider-sink'\nimport type { TraceStore } from './store'\n\nexport interface RunIntegrityExpectations {\n /** Minimum LLM span count. Default 0 (no requirement). */\n llmSpansMin?: number\n /** Minimum judge span count. Default 0. */\n judgeSpansMin?: number\n /** Minimum tool span count. Default 0. */\n toolSpansMin?: number\n /**\n * Raw provider sink to consult for capture verification. When present,\n * the check requires at least one raw event for the run.\n */\n rawSink?: RawProviderSink\n /** Minimum raw provider event count. Default 0; ignored when `rawSink` absent. */\n rawProviderEventsMin?: number\n /**\n * Every LLM span must have at least one matching raw `request` event\n * (matched by spanId). Catches the common bug where the structured span\n * was emitted but the raw HTTP capture was wired to a different sink.\n */\n requireRawCoverageOfLlmSpans?: boolean\n /** Run outcome must be set (not null/undefined). Default false. */\n requireOutcome?: boolean\n}\n\nexport type RunIntegrityIssueCode =\n | 'no_run'\n | 'missing_llm_spans'\n | 'missing_judge_spans'\n | 'missing_tool_spans'\n | 'missing_raw_events'\n | 'no_raw_sink'\n | 'orphan_llm_span'\n | 'missing_outcome'\n\nexport interface RunIntegrityIssue {\n code: RunIntegrityIssueCode\n message: string\n detail?: Record<string, unknown>\n}\n\nexport interface RunIntegrityReport {\n ok: boolean\n runId: string\n llmSpanCount: number\n judgeSpanCount: number\n toolSpanCount: number\n rawProviderEventCount: number\n /**\n * Coverage of LLM spans by raw provider events keyed on spanId.\n * `total` is the number of LLM spans; `covered` is the count with at\n * least one matching `request` raw event.\n */\n rawSpanCoverage: { covered: number; total: number }\n issues: RunIntegrityIssue[]\n}\n\nexport class RunIntegrityError extends CaptureIntegrityError {\n constructor(public readonly report: RunIntegrityReport) {\n super(\n `Run ${report.runId} failed integrity check: ${report.issues.map((i) => i.code).join(', ')}`,\n )\n }\n}\n\nexport async function assertRunCaptured(\n store: TraceStore,\n runId: string,\n expectations: RunIntegrityExpectations = {},\n): Promise<RunIntegrityReport> {\n const issues: RunIntegrityIssue[] = []\n const run = await store.getRun(runId)\n if (!run) {\n return {\n ok: false,\n runId,\n llmSpanCount: 0,\n judgeSpanCount: 0,\n toolSpanCount: 0,\n rawProviderEventCount: 0,\n rawSpanCoverage: { covered: 0, total: 0 },\n issues: [{ code: 'no_run', message: `Run ${runId} not found in store.` }],\n }\n }\n\n const spans = await store.spans({ runId })\n const llmSpans = spans.filter((s) => s.kind === 'llm')\n const judgeSpans = spans.filter((s) => s.kind === 'judge')\n const toolSpans = spans.filter((s) => s.kind === 'tool')\n\n const llmMin = expectations.llmSpansMin ?? 0\n const judgeMin = expectations.judgeSpansMin ?? 0\n const toolMin = expectations.toolSpansMin ?? 0\n\n if (llmSpans.length < llmMin) {\n issues.push({\n code: 'missing_llm_spans',\n message: `Expected ≥ ${llmMin} LLM spans, found ${llmSpans.length}.`,\n detail: { expected: llmMin, found: llmSpans.length },\n })\n }\n if (judgeSpans.length < judgeMin) {\n issues.push({\n code: 'missing_judge_spans',\n message: `Expected ≥ ${judgeMin} judge spans, found ${judgeSpans.length}.`,\n detail: { expected: judgeMin, found: judgeSpans.length },\n })\n }\n if (toolSpans.length < toolMin) {\n issues.push({\n code: 'missing_tool_spans',\n message: `Expected ≥ ${toolMin} tool spans, found ${toolSpans.length}.`,\n detail: { expected: toolMin, found: toolSpans.length },\n })\n }\n\n let rawEventCount = 0\n let coverage = { covered: 0, total: llmSpans.length }\n\n if (expectations.rawSink) {\n if (!expectations.rawSink.list) {\n issues.push({\n code: 'no_raw_sink',\n message: 'Provided rawSink does not implement list(); cannot verify capture.',\n })\n } else {\n const events = await expectations.rawSink.list({ runId })\n rawEventCount = events.length\n const rawMin = expectations.rawProviderEventsMin ?? 1\n if (rawEventCount < rawMin) {\n issues.push({\n code: 'missing_raw_events',\n message: `Expected ≥ ${rawMin} raw provider events, found ${rawEventCount}.`,\n detail: { expected: rawMin, found: rawEventCount },\n })\n }\n if (expectations.requireRawCoverageOfLlmSpans) {\n const requestEventsBySpan = new Set(\n events.filter((e) => e.direction === 'request' && e.spanId).map((e) => e.spanId!),\n )\n const orphaned = llmSpans.filter((s) => !requestEventsBySpan.has(s.spanId))\n coverage = { covered: llmSpans.length - orphaned.length, total: llmSpans.length }\n if (orphaned.length > 0) {\n issues.push({\n code: 'orphan_llm_span',\n message: `${orphaned.length} LLM span(s) have no matching raw provider request event.`,\n detail: { orphanedSpanIds: orphaned.map((s) => s.spanId) },\n })\n }\n }\n }\n } else if (expectations.requireRawCoverageOfLlmSpans || expectations.rawProviderEventsMin) {\n issues.push({\n code: 'no_raw_sink',\n message: 'Raw coverage required but no rawSink supplied to the integrity check.',\n })\n }\n\n if (expectations.requireOutcome && (run.outcome === undefined || run.outcome === null)) {\n issues.push({\n code: 'missing_outcome',\n message: `Run ${runId} has no outcome recorded.`,\n })\n }\n\n return {\n ok: issues.length === 0,\n runId,\n llmSpanCount: llmSpans.length,\n judgeSpanCount: judgeSpans.length,\n toolSpanCount: toolSpans.length,\n rawProviderEventCount: rawEventCount,\n rawSpanCoverage: coverage,\n issues,\n }\n}\n\n/** Strict mode: throws `RunIntegrityError` when the report isn't ok. */\nexport function throwIfRunIncomplete(report: RunIntegrityReport): void {\n if (!report.ok) throw new RunIntegrityError(report)\n}\n"],"mappings":";;;;;AAiFO,IAAM,oBAAN,cAAgC,sBAAsB;AAAA,EAC3D,YAA4B,QAA4B;AACtD;AAAA,MACE,OAAO,OAAO,KAAK,4BAA4B,OAAO,OAAO,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,IAAI,CAAC;AAAA,IAC5F;AAH0B;AAAA,EAI5B;AAAA,EAJ4B;AAK9B;AAEA,eAAsB,kBACpB,OACA,OACA,eAAyC,CAAC,GACb;AAC7B,QAAM,SAA8B,CAAC;AACrC,QAAM,MAAM,MAAM,MAAM,OAAO,KAAK;AACpC,MAAI,CAAC,KAAK;AACR,WAAO;AAAA,MACL,IAAI;AAAA,MACJ;AAAA,MACA,cAAc;AAAA,MACd,gBAAgB;AAAA,MAChB,eAAe;AAAA,MACf,uBAAuB;AAAA,MACvB,iBAAiB,EAAE,SAAS,GAAG,OAAO,EAAE;AAAA,MACxC,QAAQ,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,KAAK,uBAAuB,CAAC;AAAA,IAC1E;AAAA,EACF;AAEA,QAAM,QAAQ,MAAM,MAAM,MAAM,EAAE,MAAM,CAAC;AACzC,QAAM,WAAW,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,KAAK;AACrD,QAAM,aAAa,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,OAAO;AACzD,QAAM,YAAY,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,MAAM;AAEvD,QAAM,SAAS,aAAa,eAAe;AAC3C,QAAM,WAAW,aAAa,iBAAiB;AAC/C,QAAM,UAAU,aAAa,gBAAgB;AAE7C,MAAI,SAAS,SAAS,QAAQ;AAC5B,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,SAAS,mBAAc,MAAM,qBAAqB,SAAS,MAAM;AAAA,MACjE,QAAQ,EAAE,UAAU,QAAQ,OAAO,SAAS,OAAO;AAAA,IACrD,CAAC;AAAA,EACH;AACA,MAAI,WAAW,SAAS,UAAU;AAChC,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,SAAS,mBAAc,QAAQ,uBAAuB,WAAW,MAAM;AAAA,MACvE,QAAQ,EAAE,UAAU,UAAU,OAAO,WAAW,OAAO;AAAA,IACzD,CAAC;AAAA,EACH;AACA,MAAI,UAAU,SAAS,SAAS;AAC9B,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,SAAS,mBAAc,OAAO,sBAAsB,UAAU,MAAM;AAAA,MACpE,QAAQ,EAAE,UAAU,SAAS,OAAO,UAAU,OAAO;AAAA,IACvD,CAAC;AAAA,EACH;AAEA,MAAI,gBAAgB;AACpB,MAAI,WAAW,EAAE,SAAS,GAAG,OAAO,SAAS,OAAO;AAEpD,MAAI,aAAa,SAAS;AACxB,QAAI,CAAC,aAAa,QAAQ,MAAM;AAC9B,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,SAAS;AAAA,MACX,CAAC;AAAA,IACH,OAAO;AACL,YAAM,SAAS,MAAM,aAAa,QAAQ,KAAK,EAAE,MAAM,CAAC;AACxD,sBAAgB,OAAO;AACvB,YAAM,SAAS,aAAa,wBAAwB;AACpD,UAAI,gBAAgB,QAAQ;AAC1B,eAAO,KAAK;AAAA,UACV,MAAM;AAAA,UACN,SAAS,mBAAc,MAAM,+BAA+B,aAAa;AAAA,UACzE,QAAQ,EAAE,UAAU,QAAQ,OAAO,cAAc;AAAA,QACnD,CAAC;AAAA,MACH;AACA,UAAI,aAAa,8BAA8B;AAC7C,cAAM,sBAAsB,IAAI;AAAA,UAC9B,OAAO,OAAO,CAAC,MAAM,EAAE,cAAc,aAAa,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,MAAO;AAAA,QAClF;AACA,cAAM,WAAW,SAAS,OAAO,CAAC,MAAM,CAAC,oBAAoB,IAAI,EAAE,MAAM,CAAC;AAC1E,mBAAW,EAAE,SAAS,SAAS,SAAS,SAAS,QAAQ,OAAO,SAAS,OAAO;AAChF,YAAI,SAAS,SAAS,GAAG;AACvB,iBAAO,KAAK;AAAA,YACV,MAAM;AAAA,YACN,SAAS,GAAG,SAAS,MAAM;AAAA,YAC3B,QAAQ,EAAE,iBAAiB,SAAS,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE;AAAA,UAC3D,CAAC;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF,WAAW,aAAa,gCAAgC,aAAa,sBAAsB;AACzF,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AAEA,MAAI,aAAa,mBAAmB,IAAI,YAAY,UAAa,IAAI,YAAY,OAAO;AACtF,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,SAAS,OAAO,KAAK;AAAA,IACvB,CAAC;AAAA,EACH;AAEA,SAAO;AAAA,IACL,IAAI,OAAO,WAAW;AAAA,IACtB;AAAA,IACA,cAAc,SAAS;AAAA,IACvB,gBAAgB,WAAW;AAAA,IAC3B,eAAe,UAAU;AAAA,IACzB,uBAAuB;AAAA,IACvB,iBAAiB;AAAA,IACjB;AAAA,EACF;AACF;AAGO,SAAS,qBAAqB,QAAkC;AACrE,MAAI,CAAC,OAAO,GAAI,OAAM,IAAI,kBAAkB,MAAM;AACpD;","names":[]}