@tangle-network/agent-eval 0.21.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/CHANGELOG.md +236 -1
  2. package/README.md +17 -3
  3. package/dist/benchmarks/index.d.ts +2 -2
  4. package/dist/{chunk-WOK2RTWG.js → chunk-4W4NCYM2.js} +134 -109
  5. package/dist/chunk-4W4NCYM2.js.map +1 -0
  6. package/dist/{chunk-WOPGKVN4.js → chunk-6KQG5HAH.js} +2 -2
  7. package/dist/chunk-6M774GY6.js +53 -0
  8. package/dist/chunk-6M774GY6.js.map +1 -0
  9. package/dist/chunk-7EAUOUQS.js +495 -0
  10. package/dist/chunk-7EAUOUQS.js.map +1 -0
  11. package/dist/chunk-AXHNWLIX.js +246 -0
  12. package/dist/chunk-AXHNWLIX.js.map +1 -0
  13. package/dist/chunk-EXGR4XEM.js +283 -0
  14. package/dist/chunk-EXGR4XEM.js.map +1 -0
  15. package/dist/{chunk-3IX6QTB7.js → chunk-IOXMGMHQ.js} +418 -541
  16. package/dist/chunk-IOXMGMHQ.js.map +1 -0
  17. package/dist/{chunk-3GN6U53I.js → chunk-KAO3Q65R.js} +2 -2
  18. package/dist/chunk-LZKIOBG2.js +2026 -0
  19. package/dist/chunk-LZKIOBG2.js.map +1 -0
  20. package/dist/{chunk-YUFXO3TU.js → chunk-QBW3YBTR.js} +1 -1
  21. package/dist/chunk-QBW3YBTR.js.map +1 -0
  22. package/dist/chunk-QUKKGHTZ.js +121 -0
  23. package/dist/chunk-QUKKGHTZ.js.map +1 -0
  24. package/dist/{chunk-SNUHRBDL.js → chunk-SQQLHODJ.js} +10 -1
  25. package/dist/{chunk-SNUHRBDL.js.map → chunk-SQQLHODJ.js.map} +1 -1
  26. package/dist/{chunk-ARZ6BEV6.js → chunk-V5QSWN7L.js} +2 -2
  27. package/dist/{chunk-HRZELXCR.js → chunk-VQQSPGSM.js} +3 -3
  28. package/dist/cli.js +3 -3
  29. package/dist/{control-cxwMOAsy.d.ts → control-DvkH87qJ.d.ts} +2 -2
  30. package/dist/control.d.ts +3 -3
  31. package/dist/control.js +2 -2
  32. package/dist/eval-campaign-Ds5QljIh.d.ts +573 -0
  33. package/dist/{feedback-trajectory-CB0A32o3.d.ts → feedback-trajectory-c43WGtTX.d.ts} +1 -1
  34. package/dist/{index-c5saLbKD.d.ts → index-DDTlbHEK.d.ts} +1 -1
  35. package/dist/index-ekBXweiQ.d.ts +1894 -0
  36. package/dist/index.d.ts +20 -430
  37. package/dist/index.js +154 -34
  38. package/dist/index.js.map +1 -1
  39. package/dist/integrity-Cr5YodSY.d.ts +210 -0
  40. package/dist/openapi.json +1 -1
  41. package/dist/optimization.d.ts +7 -145
  42. package/dist/optimization.js +12 -3
  43. package/dist/reporting.d.ts +294 -4
  44. package/dist/reporting.js +18 -9
  45. package/dist/rl.d.ts +8 -0
  46. package/dist/rl.js +113 -0
  47. package/dist/rl.js.map +1 -0
  48. package/dist/{run-record-CX_jcAyr.d.ts → run-record-DNiOMBrZ.d.ts} +10 -1
  49. package/dist/sequential-DgU2mFsE.d.ts +304 -0
  50. package/dist/{multi-shot-optimization-Bvtz294B.d.ts → summary-report-Ce1r4EYo.d.ts} +382 -2
  51. package/dist/traces.d.ts +101 -181
  52. package/dist/traces.js +19 -8
  53. package/dist/wire/index.js +3 -3
  54. package/docs/auto-research-loop-end-to-end.md +186 -0
  55. package/docs/research-report-methodology.md +19 -4
  56. package/docs/three-package-architecture.md +180 -0
  57. package/docs/wire-protocol.md +1 -1
  58. package/package.json +7 -2
  59. package/dist/chunk-3IX6QTB7.js.map +0 -1
  60. package/dist/chunk-KRR4VMH7.js +0 -423
  61. package/dist/chunk-KRR4VMH7.js.map +0 -1
  62. package/dist/chunk-WOK2RTWG.js.map +0 -1
  63. package/dist/chunk-YUFXO3TU.js.map +0 -1
  64. package/dist/reporting-Da2ihlcM.d.ts +0 -672
  65. /package/dist/{chunk-WOPGKVN4.js.map → chunk-6KQG5HAH.js.map} +0 -0
  66. /package/dist/{chunk-3GN6U53I.js.map → chunk-KAO3Q65R.js.map} +0 -0
  67. /package/dist/{chunk-ARZ6BEV6.js.map → chunk-V5QSWN7L.js.map} +0 -0
  68. /package/dist/{chunk-HRZELXCR.js.map → chunk-VQQSPGSM.js.map} +0 -0
@@ -0,0 +1,121 @@
1
+ // src/trace/integrity.ts
2
+ var RunIntegrityError = class extends Error {
3
+ constructor(report) {
4
+ super(
5
+ `Run ${report.runId} failed integrity check: ${report.issues.map((i) => i.code).join(", ")}`
6
+ );
7
+ this.report = report;
8
+ this.name = "RunIntegrityError";
9
+ }
10
+ report;
11
+ };
12
+ async function assertRunCaptured(store, runId, expectations = {}) {
13
+ const issues = [];
14
+ const run = await store.getRun(runId);
15
+ if (!run) {
16
+ return {
17
+ ok: false,
18
+ runId,
19
+ llmSpanCount: 0,
20
+ judgeSpanCount: 0,
21
+ toolSpanCount: 0,
22
+ rawProviderEventCount: 0,
23
+ rawSpanCoverage: { covered: 0, total: 0 },
24
+ issues: [{ code: "no_run", message: `Run ${runId} not found in store.` }]
25
+ };
26
+ }
27
+ const spans = await store.spans({ runId });
28
+ const llmSpans = spans.filter((s) => s.kind === "llm");
29
+ const judgeSpans = spans.filter((s) => s.kind === "judge");
30
+ const toolSpans = spans.filter((s) => s.kind === "tool");
31
+ const llmMin = expectations.llmSpansMin ?? 0;
32
+ const judgeMin = expectations.judgeSpansMin ?? 0;
33
+ const toolMin = expectations.toolSpansMin ?? 0;
34
+ if (llmSpans.length < llmMin) {
35
+ issues.push({
36
+ code: "missing_llm_spans",
37
+ message: `Expected \u2265 ${llmMin} LLM spans, found ${llmSpans.length}.`,
38
+ detail: { expected: llmMin, found: llmSpans.length }
39
+ });
40
+ }
41
+ if (judgeSpans.length < judgeMin) {
42
+ issues.push({
43
+ code: "missing_judge_spans",
44
+ message: `Expected \u2265 ${judgeMin} judge spans, found ${judgeSpans.length}.`,
45
+ detail: { expected: judgeMin, found: judgeSpans.length }
46
+ });
47
+ }
48
+ if (toolSpans.length < toolMin) {
49
+ issues.push({
50
+ code: "missing_tool_spans",
51
+ message: `Expected \u2265 ${toolMin} tool spans, found ${toolSpans.length}.`,
52
+ detail: { expected: toolMin, found: toolSpans.length }
53
+ });
54
+ }
55
+ let rawEventCount = 0;
56
+ let coverage = { covered: 0, total: llmSpans.length };
57
+ if (expectations.rawSink) {
58
+ if (!expectations.rawSink.list) {
59
+ issues.push({
60
+ code: "no_raw_sink",
61
+ message: "Provided rawSink does not implement list(); cannot verify capture."
62
+ });
63
+ } else {
64
+ const events = await expectations.rawSink.list({ runId });
65
+ rawEventCount = events.length;
66
+ const rawMin = expectations.rawProviderEventsMin ?? 1;
67
+ if (rawEventCount < rawMin) {
68
+ issues.push({
69
+ code: "missing_raw_events",
70
+ message: `Expected \u2265 ${rawMin} raw provider events, found ${rawEventCount}.`,
71
+ detail: { expected: rawMin, found: rawEventCount }
72
+ });
73
+ }
74
+ if (expectations.requireRawCoverageOfLlmSpans) {
75
+ const requestEventsBySpan = new Set(
76
+ events.filter((e) => e.direction === "request" && e.spanId).map((e) => e.spanId)
77
+ );
78
+ const orphaned = llmSpans.filter((s) => !requestEventsBySpan.has(s.spanId));
79
+ coverage = { covered: llmSpans.length - orphaned.length, total: llmSpans.length };
80
+ if (orphaned.length > 0) {
81
+ issues.push({
82
+ code: "orphan_llm_span",
83
+ message: `${orphaned.length} LLM span(s) have no matching raw provider request event.`,
84
+ detail: { orphanedSpanIds: orphaned.map((s) => s.spanId) }
85
+ });
86
+ }
87
+ }
88
+ }
89
+ } else if (expectations.requireRawCoverageOfLlmSpans || expectations.rawProviderEventsMin) {
90
+ issues.push({
91
+ code: "no_raw_sink",
92
+ message: "Raw coverage required but no rawSink supplied to the integrity check."
93
+ });
94
+ }
95
+ if (expectations.requireOutcome && (run.outcome === void 0 || run.outcome === null)) {
96
+ issues.push({
97
+ code: "missing_outcome",
98
+ message: `Run ${runId} has no outcome recorded.`
99
+ });
100
+ }
101
+ return {
102
+ ok: issues.length === 0,
103
+ runId,
104
+ llmSpanCount: llmSpans.length,
105
+ judgeSpanCount: judgeSpans.length,
106
+ toolSpanCount: toolSpans.length,
107
+ rawProviderEventCount: rawEventCount,
108
+ rawSpanCoverage: coverage,
109
+ issues
110
+ };
111
+ }
112
+ function throwIfRunIncomplete(report) {
113
+ if (!report.ok) throw new RunIntegrityError(report);
114
+ }
115
+
116
+ export {
117
+ RunIntegrityError,
118
+ assertRunCaptured,
119
+ throwIfRunIncomplete
120
+ };
121
+ //# sourceMappingURL=chunk-QUKKGHTZ.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/trace/integrity.ts"],"sourcesContent":["/**\n * Run-completion integrity check — at end of run, verify the expected event\n * types were actually captured. The point is the launch-review failure mode:\n * a run *appears* successful but the raw provider events were never written,\n * so a downstream reviewer can't reconstruct what happened.\n *\n * Pattern:\n *\n * const report = await assertRunCaptured(store, runId, {\n * llmSpansMin: 1,\n * judgeSpansMin: 1,\n * rawSink: providerSink, // must have ≥ 1 event for this run\n * requireRawCoverageOfLlmSpans: true, // every llm span has matching raw events\n * })\n * if (!report.ok) throwIfRunIncomplete(report) // or mark run failed and continue\n *\n * The function is read-only on the store and returns a structured report;\n * the caller chooses the failure mode (throw, mark run failed, log warning).\n * `throwIfRunIncomplete` is the convenient strict mode.\n */\n\nimport type { TraceStore } from './store'\nimport type { RawProviderSink } from './raw-provider-sink'\n\nexport interface RunIntegrityExpectations {\n /** Minimum LLM span count. Default 0 (no requirement). */\n llmSpansMin?: number\n /** Minimum judge span count. Default 0. */\n judgeSpansMin?: number\n /** Minimum tool span count. Default 0. */\n toolSpansMin?: number\n /**\n * Raw provider sink to consult for capture verification. When present,\n * the check requires at least one raw event for the run.\n */\n rawSink?: RawProviderSink\n /** Minimum raw provider event count. Default 0; ignored when `rawSink` absent. */\n rawProviderEventsMin?: number\n /**\n * Every LLM span must have at least one matching raw `request` event\n * (matched by spanId). Catches the common bug where the structured span\n * was emitted but the raw HTTP capture was wired to a different sink.\n */\n requireRawCoverageOfLlmSpans?: boolean\n /** Run outcome must be set (not null/undefined). Default false. */\n requireOutcome?: boolean\n}\n\nexport type RunIntegrityIssueCode =\n | 'no_run'\n | 'missing_llm_spans'\n | 'missing_judge_spans'\n | 'missing_tool_spans'\n | 'missing_raw_events'\n | 'no_raw_sink'\n | 'orphan_llm_span'\n | 'missing_outcome'\n\nexport interface RunIntegrityIssue {\n code: RunIntegrityIssueCode\n message: string\n detail?: Record<string, unknown>\n}\n\nexport interface RunIntegrityReport {\n ok: boolean\n runId: string\n llmSpanCount: number\n judgeSpanCount: number\n toolSpanCount: number\n rawProviderEventCount: number\n /**\n * Coverage of LLM spans by raw provider events keyed on spanId.\n * `total` is the number of LLM spans; `covered` is the count with at\n * least one matching `request` raw event.\n */\n rawSpanCoverage: { covered: number; total: number }\n issues: RunIntegrityIssue[]\n}\n\nexport class RunIntegrityError extends Error {\n constructor(public readonly report: RunIntegrityReport) {\n super(\n `Run ${report.runId} failed integrity check: ${report.issues.map((i) => i.code).join(', ')}`,\n )\n this.name = 'RunIntegrityError'\n }\n}\n\nexport async function assertRunCaptured(\n store: TraceStore,\n runId: string,\n expectations: RunIntegrityExpectations = {},\n): Promise<RunIntegrityReport> {\n const issues: RunIntegrityIssue[] = []\n const run = await store.getRun(runId)\n if (!run) {\n return {\n ok: false,\n runId,\n llmSpanCount: 0,\n judgeSpanCount: 0,\n toolSpanCount: 0,\n rawProviderEventCount: 0,\n rawSpanCoverage: { covered: 0, total: 0 },\n issues: [{ code: 'no_run', message: `Run ${runId} not found in store.` }],\n }\n }\n\n const spans = await store.spans({ runId })\n const llmSpans = spans.filter((s) => s.kind === 'llm')\n const judgeSpans = spans.filter((s) => s.kind === 'judge')\n const toolSpans = spans.filter((s) => s.kind === 'tool')\n\n const llmMin = expectations.llmSpansMin ?? 0\n const judgeMin = expectations.judgeSpansMin ?? 0\n const toolMin = expectations.toolSpansMin ?? 0\n\n if (llmSpans.length < llmMin) {\n issues.push({\n code: 'missing_llm_spans',\n message: `Expected ≥ ${llmMin} LLM spans, found ${llmSpans.length}.`,\n detail: { expected: llmMin, found: llmSpans.length },\n })\n }\n if (judgeSpans.length < judgeMin) {\n issues.push({\n code: 'missing_judge_spans',\n message: `Expected ≥ ${judgeMin} judge spans, found ${judgeSpans.length}.`,\n detail: { expected: judgeMin, found: judgeSpans.length },\n })\n }\n if (toolSpans.length < toolMin) {\n issues.push({\n code: 'missing_tool_spans',\n message: `Expected ≥ ${toolMin} tool spans, found ${toolSpans.length}.`,\n detail: { expected: toolMin, found: toolSpans.length },\n })\n }\n\n let rawEventCount = 0\n let coverage = { covered: 0, total: llmSpans.length }\n\n if (expectations.rawSink) {\n if (!expectations.rawSink.list) {\n issues.push({\n code: 'no_raw_sink',\n message: 'Provided rawSink does not implement list(); cannot verify capture.',\n })\n } else {\n const events = await expectations.rawSink.list({ runId })\n rawEventCount = events.length\n const rawMin = expectations.rawProviderEventsMin ?? 1\n if (rawEventCount < rawMin) {\n issues.push({\n code: 'missing_raw_events',\n message: `Expected ≥ ${rawMin} raw provider events, found ${rawEventCount}.`,\n detail: { expected: rawMin, found: rawEventCount },\n })\n }\n if (expectations.requireRawCoverageOfLlmSpans) {\n const requestEventsBySpan = new Set(\n events.filter((e) => e.direction === 'request' && e.spanId).map((e) => e.spanId!),\n )\n const orphaned = llmSpans.filter((s) => !requestEventsBySpan.has(s.spanId))\n coverage = { covered: llmSpans.length - orphaned.length, total: llmSpans.length }\n if (orphaned.length > 0) {\n issues.push({\n code: 'orphan_llm_span',\n message: `${orphaned.length} LLM span(s) have no matching raw provider request event.`,\n detail: { orphanedSpanIds: orphaned.map((s) => s.spanId) },\n })\n }\n }\n }\n } else if (expectations.requireRawCoverageOfLlmSpans || expectations.rawProviderEventsMin) {\n issues.push({\n code: 'no_raw_sink',\n message: 'Raw coverage required but no rawSink supplied to the integrity check.',\n })\n }\n\n if (expectations.requireOutcome && (run.outcome === undefined || run.outcome === null)) {\n issues.push({\n code: 'missing_outcome',\n message: `Run ${runId} has no outcome recorded.`,\n })\n }\n\n return {\n ok: issues.length === 0,\n runId,\n llmSpanCount: llmSpans.length,\n judgeSpanCount: judgeSpans.length,\n toolSpanCount: toolSpans.length,\n rawProviderEventCount: rawEventCount,\n rawSpanCoverage: coverage,\n issues,\n }\n}\n\n/** Strict mode: throws `RunIntegrityError` when the report isn't ok. */\nexport function throwIfRunIncomplete(report: RunIntegrityReport): void {\n if (!report.ok) throw new RunIntegrityError(report)\n}\n"],"mappings":";AAgFO,IAAM,oBAAN,cAAgC,MAAM;AAAA,EAC3C,YAA4B,QAA4B;AACtD;AAAA,MACE,OAAO,OAAO,KAAK,4BAA4B,OAAO,OAAO,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,IAAI,CAAC;AAAA,IAC5F;AAH0B;AAI1B,SAAK,OAAO;AAAA,EACd;AAAA,EAL4B;AAM9B;AAEA,eAAsB,kBACpB,OACA,OACA,eAAyC,CAAC,GACb;AAC7B,QAAM,SAA8B,CAAC;AACrC,QAAM,MAAM,MAAM,MAAM,OAAO,KAAK;AACpC,MAAI,CAAC,KAAK;AACR,WAAO;AAAA,MACL,IAAI;AAAA,MACJ;AAAA,MACA,cAAc;AAAA,MACd,gBAAgB;AAAA,MAChB,eAAe;AAAA,MACf,uBAAuB;AAAA,MACvB,iBAAiB,EAAE,SAAS,GAAG,OAAO,EAAE;AAAA,MACxC,QAAQ,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,KAAK,uBAAuB,CAAC;AAAA,IAC1E;AAAA,EACF;AAEA,QAAM,QAAQ,MAAM,MAAM,MAAM,EAAE,MAAM,CAAC;AACzC,QAAM,WAAW,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,KAAK;AACrD,QAAM,aAAa,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,OAAO;AACzD,QAAM,YAAY,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,MAAM;AAEvD,QAAM,SAAS,aAAa,eAAe;AAC3C,QAAM,WAAW,aAAa,iBAAiB;AAC/C,QAAM,UAAU,aAAa,gBAAgB;AAE7C,MAAI,SAAS,SAAS,QAAQ;AAC5B,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,SAAS,mBAAc,MAAM,qBAAqB,SAAS,MAAM;AAAA,MACjE,QAAQ,EAAE,UAAU,QAAQ,OAAO,SAAS,OAAO;AAAA,IACrD,CAAC;AAAA,EACH;AACA,MAAI,WAAW,SAAS,UAAU;AAChC,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,SAAS,mBAAc,QAAQ,uBAAuB,WAAW,MAAM;AAAA,MACvE,QAAQ,EAAE,UAAU,UAAU,OAAO,WAAW,OAAO;AAAA,IACzD,CAAC;AAAA,EACH;AACA,MAAI,UAAU,SAAS,SAAS;AAC9B,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,SAAS,mBAAc,OAAO,sBAAsB,UAAU,MAAM;AAAA,MACpE,QAAQ,EAAE,UAAU,SAAS,OAAO,UAAU,OAAO;AAAA,IACvD,CAAC;AAAA,EACH;AAEA,MAAI,gBAAgB;AACpB,MAAI,WAAW,EAAE,SAAS,GAAG,OAAO,SAAS,OAAO;AAEpD,MAAI,aAAa,SAAS;AACxB,QAAI,CAAC,aAAa,QAAQ,MAAM;AAC9B,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,SAAS;AAAA,MACX,CAAC;AAAA,IACH,OAAO;AACL,YAAM,SAAS,MAAM,aAAa,QAAQ,KAAK,EAAE,MAAM,CAAC;AACxD,sBAAgB,OAAO;AACvB,YAAM,SAAS,aAAa,wBAAwB;AACpD,UAAI,gBAAgB,QAAQ;AAC1B,eAAO,KAAK;AAAA,UACV,MAAM;AAAA,UACN,SAAS,mBAAc,MAAM,+BAA+B,aAAa;AAAA,UACzE,QAAQ,EAAE,UAAU,QAAQ,OAAO,cAAc;AAAA,QACnD,CAAC;AAAA,MACH;AACA,UAAI,aAAa,8BAA8B;AAC7C,cAAM,sBAAsB,IAAI;AAAA,UAC9B,OAAO,OAAO,CAAC,MAAM,EAAE,cAAc,aAAa,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,MAAO;AAAA,QAClF;AACA,cAAM,WAAW,SAAS,OAAO,CAAC,MAAM,CAAC,oBAAoB,IAAI,EAAE,MAAM,CAAC;AAC1E,mBAAW,EAAE,SAAS,SAAS,SAAS,SAAS,QAAQ,OAAO,SAAS,OAAO;AAChF,YAAI,SAAS,SAAS,GAAG;AACvB,iBAAO,KAAK;AAAA,YACV,MAAM;AAAA,YACN,SAAS,GAAG,SAAS,MAAM;AAAA,YAC3B,QAAQ,EAAE,iBAAiB,SAAS,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE;AAAA,UAC3D,CAAC;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF,WAAW,aAAa,gCAAgC,aAAa,sBAAsB;AACzF,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AAEA,MAAI,aAAa,mBAAmB,IAAI,YAAY,UAAa,IAAI,YAAY,OAAO;AACtF,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,SAAS,OAAO,KAAK;AAAA,IACvB,CAAC;AAAA,EACH;AAEA,SAAO;AAAA,IACL,IAAI,OAAO,WAAW;AAAA,IACtB;AAAA,IACA,cAAc,SAAS;AAAA,IACvB,gBAAgB,WAAW;AAAA,IAC3B,eAAe,UAAU;AAAA,IACzB,uBAAuB;AAAA,IACvB,iBAAiB;AAAA,IACjB;AAAA,EACF;AACF;AAGO,SAAS,qBAAqB,QAAkC;AACrE,MAAI,CAAC,OAAO,GAAI,OAAM,IAAI,kBAAkB,MAAM;AACpD;","names":[]}
@@ -67,6 +67,15 @@ var InMemoryRawProviderSink = class {
67
67
  var NoopRawProviderSink = class {
68
68
  async record() {
69
69
  }
70
+ /**
71
+ * Returns an empty array. Implemented so `assertRunCaptured` does not
72
+ * trip the `no_raw_sink` issue when a caller explicitly opts out of
73
+ * capture by passing this sink — opt-out is a deliberate choice, not a
74
+ * misconfiguration.
75
+ */
76
+ async list() {
77
+ return [];
78
+ }
70
79
  };
71
80
  var FileSystemRawProviderSink = class {
72
81
  dir;
@@ -151,4 +160,4 @@ export {
151
160
  FileSystemRawProviderSink,
152
161
  providerFromBaseUrl
153
162
  };
154
- //# sourceMappingURL=chunk-SNUHRBDL.js.map
163
+ //# sourceMappingURL=chunk-SQQLHODJ.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/trace/raw-provider-sink.ts"],"sourcesContent":["/**\n * RawProviderSink — first-class persistence for the actual HTTP-level\n * request/response bodies of every LLM provider call.\n *\n * Why this is a separate sink from the structured `LlmSpan`:\n *\n * - `LlmSpan` records the *intent* — model name, messages, output text,\n * usage. It's what dashboards read; it's NOT enough for forensics.\n * - When a downstream consumer reports \"the verifier used the wrong route\"\n * or \"tokens look right but reasoning was missing,\" the only way to\n * answer is the raw HTTP body. Span fields can lie (a proxy can echo\n * a different `model` value than what actually answered); the raw\n * response is ground truth.\n *\n * Default behaviour: opt-in. Pass `rawSink` to `LlmClientOptions` (or the\n * matrix runner / BuilderSession sets it up automatically) and every\n * request, response, and error is recorded — including retries, with the\n * attempt index attached so a flaky call's full event chain is recoverable.\n *\n * Redaction is enforced at sink time. The default redactor strips\n * `Authorization`, `X-Api-Key`, `X-Auth-Token`, `Cookie` headers and any\n * payload field whose key matches `apiKey | api_key | bearer | password |\n * secret | token` (case-insensitive). Override via the sink constructor or\n * the per-call `redactor`. The `redactedFields` array on the persisted\n * event lets a reviewer see what was stripped without exposing the values.\n */\n\nimport { promises as fs } from 'node:fs'\nimport * as path from 'node:path'\n\nexport type RawProviderDirection = 'request' | 'response' | 'error'\n\nexport interface RawProviderEvent {\n /** Stable id. Generated by the sink if omitted. */\n eventId: string\n /** Trace context populated by `LlmClient` when the call is wrapped in a span. */\n runId?: string\n spanId?: string\n /**\n * Logical provider name. Free-form so callers can use whatever id matches\n * their topology (`'openai'`, `'anthropic'`, `'tangle-router'`, …). When\n * omitted, derived from `baseUrl` in `LlmClientOptions`.\n */\n provider: string\n model: string\n /** Endpoint path, e.g. `'/v1/chat/completions'`. */\n endpoint: string\n /** Base URL used for the call (already-normalised — no trailing slash). */\n baseUrl: string\n /** 0-indexed retry attempt. The first attempt is 0; a retried call gets 1, 2, … */\n attemptIndex: number\n direction: RawProviderDirection\n /** Unix ms. */\n timestamp: number\n /** Wall-clock duration of the call leg. Set on `response` and `error` events; null on `request`. */\n durationMs?: number\n statusCode?: number\n requestHeaders?: Record<string, string>\n requestBody?: unknown\n responseHeaders?: Record<string, string>\n responseBody?: unknown\n /** Set on `direction: 'error'` events. */\n errorMessage?: string\n /** Field paths the redactor stripped from this event ('header:Authorization', 'body.apiKey', …). */\n redactedFields: string[]\n}\n\nexport interface RawProviderSinkFilter {\n runId?: string\n spanId?: string\n direction?: RawProviderDirection\n attemptIndex?: number\n}\n\nexport interface RawProviderSink {\n record(event: RawProviderEvent): Promise<void>\n /** Optional listing — implementations that durably persist (file, db) should support this. */\n list?(filter?: RawProviderSinkFilter): Promise<RawProviderEvent[]>\n /** Optional teardown for backed implementations. */\n close?(): Promise<void>\n}\n\nexport type ProviderRedactor = (event: RawProviderEvent) => RawProviderEvent\n\nconst REDACTED_HEADER_NAMES = new Set([\n 'authorization',\n 'x-api-key',\n 'x-auth-token',\n 'cookie',\n 'set-cookie',\n 'proxy-authorization',\n])\n\nconst REDACTED_BODY_KEY = /^(api[_-]?key|bearer|password|secret|token|access[_-]?token|refresh[_-]?token)$/i\n\n/**\n * Default redactor — strips well-known auth headers and any body field whose\n * key matches the credential pattern. Records every redacted path on\n * `event.redactedFields` so a downstream reviewer can see what was removed.\n */\nexport function defaultProviderRedactor(event: RawProviderEvent): RawProviderEvent {\n const redactedFields: string[] = [...(event.redactedFields ?? [])]\n const requestHeaders = redactHeaders(event.requestHeaders, 'request', redactedFields)\n const responseHeaders = redactHeaders(event.responseHeaders, 'response', redactedFields)\n const requestBody = redactBody(event.requestBody, 'requestBody', redactedFields)\n const responseBody = redactBody(event.responseBody, 'responseBody', redactedFields)\n return { ...event, requestHeaders, responseHeaders, requestBody, responseBody, redactedFields }\n}\n\nfunction redactHeaders(\n headers: Record<string, string> | undefined,\n prefix: 'request' | 'response',\n redactedFields: string[],\n): Record<string, string> | undefined {\n if (!headers) return headers\n const out: Record<string, string> = {}\n for (const [k, v] of Object.entries(headers)) {\n if (REDACTED_HEADER_NAMES.has(k.toLowerCase())) {\n redactedFields.push(`${prefix}Headers.${k}`)\n continue\n }\n out[k] = v\n }\n return out\n}\n\nfunction redactBody(\n value: unknown,\n pathStr: string,\n redactedFields: string[],\n): unknown {\n if (value == null) return value\n if (Array.isArray(value)) return value.map((v, i) => redactBody(v, `${pathStr}[${i}]`, redactedFields))\n if (typeof value === 'object') {\n const out: Record<string, unknown> = {}\n for (const [k, v] of Object.entries(value as Record<string, unknown>)) {\n if (REDACTED_BODY_KEY.test(k)) {\n redactedFields.push(`${pathStr}.${k}`)\n continue\n }\n out[k] = redactBody(v, `${pathStr}.${k}`, redactedFields)\n }\n return out\n }\n return value\n}\n\n// ── In-memory ────────────────────────────────────────────────────────────\n\nexport interface InMemoryRawProviderSinkOptions {\n redactor?: ProviderRedactor\n}\n\nexport class InMemoryRawProviderSink implements RawProviderSink {\n private events: RawProviderEvent[] = []\n private redactor: ProviderRedactor\n\n constructor(opts: InMemoryRawProviderSinkOptions = {}) {\n this.redactor = opts.redactor ?? defaultProviderRedactor\n }\n\n async record(event: RawProviderEvent): Promise<void> {\n this.events.push(this.redactor({ ...event, redactedFields: event.redactedFields ?? [] }))\n }\n\n async list(filter: RawProviderSinkFilter = {}): Promise<RawProviderEvent[]> {\n return this.events.filter((e) =>\n (filter.runId === undefined || e.runId === filter.runId) &&\n (filter.spanId === undefined || e.spanId === filter.spanId) &&\n (filter.direction === undefined || e.direction === filter.direction) &&\n (filter.attemptIndex === undefined || e.attemptIndex === filter.attemptIndex),\n )\n }\n\n size(): number { return this.events.length }\n}\n\nexport class NoopRawProviderSink implements RawProviderSink {\n async record(): Promise<void> { /* no-op */ }\n}\n\n// ── Filesystem (NDJSON) ──────────────────────────────────────────────────\n\nexport interface FileSystemRawProviderSinkOptions {\n /** Directory the NDJSON file is written into. Created if missing. */\n dir: string\n /** File name; default `'raw-provider-events.ndjson'`. */\n fileName?: string\n /** Bytes after which the writer rolls over to a new file (default 32 MiB). */\n rollAtBytes?: number\n redactor?: ProviderRedactor\n}\n\nexport class FileSystemRawProviderSink implements RawProviderSink {\n private dir: string\n private fileName: string\n private rollAtBytes: number\n private redactor: ProviderRedactor\n private bytesWritten = 0\n private rollIndex = 0\n private initPromise: Promise<void> | null = null\n\n constructor(opts: FileSystemRawProviderSinkOptions) {\n this.dir = opts.dir\n this.fileName = opts.fileName ?? 'raw-provider-events.ndjson'\n this.rollAtBytes = opts.rollAtBytes ?? 32 * 1024 * 1024\n this.redactor = opts.redactor ?? defaultProviderRedactor\n }\n\n private async ensureInit(): Promise<void> {\n if (!this.initPromise) {\n this.initPromise = fs.mkdir(this.dir, { recursive: true }).then(() => undefined)\n }\n await this.initPromise\n }\n\n private currentPath(): string {\n if (this.rollIndex === 0) return path.join(this.dir, this.fileName)\n return path.join(this.dir, `${this.fileName}.${this.rollIndex}`)\n }\n\n async record(event: RawProviderEvent): Promise<void> {\n await this.ensureInit()\n const redacted = this.redactor({ ...event, redactedFields: event.redactedFields ?? [] })\n const line = JSON.stringify(redacted) + '\\n'\n if (this.bytesWritten + line.length > this.rollAtBytes && this.bytesWritten > 0) {\n this.rollIndex += 1\n this.bytesWritten = 0\n }\n await fs.appendFile(this.currentPath(), line, 'utf8')\n this.bytesWritten += line.length\n }\n\n async list(filter: RawProviderSinkFilter = {}): Promise<RawProviderEvent[]> {\n await this.ensureInit()\n const out: RawProviderEvent[] = []\n for (let i = 0; i <= this.rollIndex; i++) {\n const file = i === 0\n ? path.join(this.dir, this.fileName)\n : path.join(this.dir, `${this.fileName}.${i}`)\n let body: string\n try {\n body = await fs.readFile(file, 'utf8')\n } catch (err) {\n if ((err as NodeJS.ErrnoException).code === 'ENOENT') continue\n throw err\n }\n for (const line of body.split('\\n')) {\n if (!line) continue\n const event = JSON.parse(line) as RawProviderEvent\n if (filter.runId !== undefined && event.runId !== filter.runId) continue\n if (filter.spanId !== undefined && event.spanId !== filter.spanId) continue\n if (filter.direction !== undefined && event.direction !== filter.direction) continue\n if (filter.attemptIndex !== undefined && event.attemptIndex !== filter.attemptIndex) continue\n out.push(event)\n }\n }\n return out\n }\n}\n\n// ── Helpers ──────────────────────────────────────────────────────────────\n\n/**\n * Best-effort provider id from a base URL. Falls back to the URL host when\n * none of the well-known patterns match.\n */\nexport function providerFromBaseUrl(baseUrl: string): string {\n const lower = baseUrl.toLowerCase()\n if (lower.includes('api.openai.com')) return 'openai'\n if (lower.includes('api.anthropic.com')) return 'anthropic'\n if (lower.includes('generativelanguage.googleapis.com')) return 'google'\n if (lower.includes('api.together.ai') || lower.includes('api.together.xyz')) return 'together'\n if (lower.includes('api.deepseek.com')) return 'deepseek'\n if (lower.includes('router.tangle.tools')) return 'tangle-router'\n if (lower.includes('api.litellm') || lower.includes('litellm')) return 'litellm'\n try {\n return new URL(baseUrl).host\n } catch {\n return baseUrl\n }\n}\n"],"mappings":";AA2BA,SAAS,YAAY,UAAU;AAC/B,YAAY,UAAU;AAwDtB,IAAM,wBAAwB,oBAAI,IAAI;AAAA,EACpC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAED,IAAM,oBAAoB;AAOnB,SAAS,wBAAwB,OAA2C;AACjF,QAAM,iBAA2B,CAAC,GAAI,MAAM,kBAAkB,CAAC,CAAE;AACjE,QAAM,iBAAiB,cAAc,MAAM,gBAAgB,WAAW,cAAc;AACpF,QAAM,kBAAkB,cAAc,MAAM,iBAAiB,YAAY,cAAc;AACvF,QAAM,cAAc,WAAW,MAAM,aAAa,eAAe,cAAc;AAC/E,QAAM,eAAe,WAAW,MAAM,cAAc,gBAAgB,cAAc;AAClF,SAAO,EAAE,GAAG,OAAO,gBAAgB,iBAAiB,aAAa,cAAc,eAAe;AAChG;AAEA,SAAS,cACP,SACA,QACA,gBACoC;AACpC,MAAI,CAAC,QAAS,QAAO;AACrB,QAAM,MAA8B,CAAC;AACrC,aAAW,CAAC,GAAG,CAAC,KAAK,OAAO,QAAQ,OAAO,GAAG;AAC5C,QAAI,sBAAsB,IAAI,EAAE,YAAY,CAAC,GAAG;AAC9C,qBAAe,KAAK,GAAG,MAAM,WAAW,CAAC,EAAE;AAC3C;AAAA,IACF;AACA,QAAI,CAAC,IAAI;AAAA,EACX;AACA,SAAO;AACT;AAEA,SAAS,WACP,OACA,SACA,gBACS;AACT,MAAI,SAAS,KAAM,QAAO;AAC1B,MAAI,MAAM,QAAQ,KAAK,EAAG,QAAO,MAAM,IAAI,CAAC,GAAG,MAAM,WAAW,GAAG,GAAG,OAAO,IAAI,CAAC,KAAK,cAAc,CAAC;AACtG,MAAI,OAAO,UAAU,UAAU;AAC7B,UAAM,MAA+B,CAAC;AACtC,eAAW,CAAC,GAAG,CAAC,KAAK,OAAO,QAAQ,KAAgC,GAAG;AACrE,UAAI,kBAAkB,KAAK,CAAC,GAAG;AAC7B,uBAAe,KAAK,GAAG,OAAO,IAAI,CAAC,EAAE;AACrC;AAAA,MACF;AACA,UAAI,CAAC,IAAI,WAAW,GAAG,GAAG,OAAO,IAAI,CAAC,IAAI,cAAc;AAAA,IAC1D;AACA,WAAO;AAAA,EACT;AACA,SAAO;AACT;AAQO,IAAM,0BAAN,MAAyD;AAAA,EACtD,SAA6B,CAAC;AAAA,EAC9B;AAAA,EAER,YAAY,OAAuC,CAAC,GAAG;AACrD,SAAK,WAAW,KAAK,YAAY;AAAA,EACnC;AAAA,EAEA,MAAM,OAAO,OAAwC;AACnD,SAAK,OAAO,KAAK,KAAK,SAAS,EAAE,GAAG,OAAO,gBAAgB,MAAM,kBAAkB,CAAC,EAAE,CAAC,CAAC;AAAA,EAC1F;AAAA,EAEA,MAAM,KAAK,SAAgC,CAAC,GAAgC;AAC1E,WAAO,KAAK,OAAO;AAAA,MAAO,CAAC,OACxB,OAAO,UAAU,UAAa,EAAE,UAAU,OAAO,WACjD,OAAO,WAAW,UAAa,EAAE,WAAW,OAAO,YACnD,OAAO,cAAc,UAAa,EAAE,cAAc,OAAO,eACzD,OAAO,iBAAiB,UAAa,EAAE,iBAAiB,OAAO;AAAA,IAClE;AAAA,EACF;AAAA,EAEA,OAAe;AAAE,WAAO,KAAK,OAAO;AAAA,EAAO;AAC7C;AAEO,IAAM,sBAAN,MAAqD;AAAA,EAC1D,MAAM,SAAwB;AAAA,EAAc;AAC9C;AAcO,IAAM,4BAAN,MAA2D;AAAA,EACxD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AAAA,EACf,YAAY;AAAA,EACZ,cAAoC;AAAA,EAE5C,YAAY,MAAwC;AAClD,SAAK,MAAM,KAAK;AAChB,SAAK,WAAW,KAAK,YAAY;AACjC,SAAK,cAAc,KAAK,eAAe,KAAK,OAAO;AACnD,SAAK,WAAW,KAAK,YAAY;AAAA,EACnC;AAAA,EAEA,MAAc,aAA4B;AACxC,QAAI,CAAC,KAAK,aAAa;AACrB,WAAK,cAAc,GAAG,MAAM,KAAK,KAAK,EAAE,WAAW,KAAK,CAAC,EAAE,KAAK,MAAM,MAAS;AAAA,IACjF;AACA,UAAM,KAAK;AAAA,EACb;AAAA,EAEQ,cAAsB;AAC5B,QAAI,KAAK,cAAc,EAAG,QAAY,UAAK,KAAK,KAAK,KAAK,QAAQ;AAClE,WAAY,UAAK,KAAK,KAAK,GAAG,KAAK,QAAQ,IAAI,KAAK,SAAS,EAAE;AAAA,EACjE;AAAA,EAEA,MAAM,OAAO,OAAwC;AACnD,UAAM,KAAK,WAAW;AACtB,UAAM,WAAW,KAAK,SAAS,EAAE,GAAG,OAAO,gBAAgB,MAAM,kBAAkB,CAAC,EAAE,CAAC;AACvF,UAAM,OAAO,KAAK,UAAU,QAAQ,IAAI;AACxC,QAAI,KAAK,eAAe,KAAK,SAAS,KAAK,eAAe,KAAK,eAAe,GAAG;AAC/E,WAAK,aAAa;AAClB,WAAK,eAAe;AAAA,IACtB;AACA,UAAM,GAAG,WAAW,KAAK,YAAY,GAAG,MAAM,MAAM;AACpD,SAAK,gBAAgB,KAAK;AAAA,EAC5B;AAAA,EAEA,MAAM,KAAK,SAAgC,CAAC,GAAgC;AAC1E,UAAM,KAAK,WAAW;AACtB,UAAM,MAA0B,CAAC;AACjC,aAAS,IAAI,GAAG,KAAK,KAAK,WAAW,KAAK;AACxC,YAAM,OAAO,MAAM,IACV,UAAK,KAAK,KAAK,KAAK,QAAQ,IAC5B,UAAK,KAAK,KAAK,GAAG,KAAK,QAAQ,IAAI,CAAC,EAAE;AAC/C,UAAI;AACJ,UAAI;AACF,eAAO,MAAM,GAAG,SAAS,MAAM,MAAM;AAAA,MACvC,SAAS,KAAK;AACZ,YAAK,IAA8B,SAAS,SAAU;AACtD,cAAM;AAAA,MACR;AACA,iBAAW,QAAQ,KAAK,MAAM,IAAI,GAAG;AACnC,YAAI,CAAC,KAAM;AACX,cAAM,QAAQ,KAAK,MAAM,IAAI;AAC7B,YAAI,OAAO,UAAU,UAAa,MAAM,UAAU,OAAO,MAAO;AAChE,YAAI,OAAO,WAAW,UAAa,MAAM,WAAW,OAAO,OAAQ;AACnE,YAAI,OAAO,cAAc,UAAa,MAAM,cAAc,OAAO,UAAW;AAC5E,YAAI,OAAO,iBAAiB,UAAa,MAAM,iBAAiB,OAAO,aAAc;AACrF,YAAI,KAAK,KAAK;AAAA,MAChB;AAAA,IACF;AACA,WAAO;AAAA,EACT;AACF;AAQO,SAAS,oBAAoB,SAAyB;AAC3D,QAAM,QAAQ,QAAQ,YAAY;AAClC,MAAI,MAAM,SAAS,gBAAgB,EAAG,QAAO;AAC7C,MAAI,MAAM,SAAS,mBAAmB,EAAG,QAAO;AAChD,MAAI,MAAM,SAAS,mCAAmC,EAAG,QAAO;AAChE,MAAI,MAAM,SAAS,iBAAiB,KAAK,MAAM,SAAS,kBAAkB,EAAG,QAAO;AACpF,MAAI,MAAM,SAAS,kBAAkB,EAAG,QAAO;AAC/C,MAAI,MAAM,SAAS,qBAAqB,EAAG,QAAO;AAClD,MAAI,MAAM,SAAS,aAAa,KAAK,MAAM,SAAS,SAAS,EAAG,QAAO;AACvE,MAAI;AACF,WAAO,IAAI,IAAI,OAAO,EAAE;AAAA,EAC1B,QAAQ;AACN,WAAO;AAAA,EACT;AACF;","names":[]}
1
+ {"version":3,"sources":["../src/trace/raw-provider-sink.ts"],"sourcesContent":["/**\n * RawProviderSink — first-class persistence for the actual HTTP-level\n * request/response bodies of every LLM provider call.\n *\n * Why this is a separate sink from the structured `LlmSpan`:\n *\n * - `LlmSpan` records the *intent* — model name, messages, output text,\n * usage. It's what dashboards read; it's NOT enough for forensics.\n * - When a downstream consumer reports \"the verifier used the wrong route\"\n * or \"tokens look right but reasoning was missing,\" the only way to\n * answer is the raw HTTP body. Span fields can lie (a proxy can echo\n * a different `model` value than what actually answered); the raw\n * response is ground truth.\n *\n * Default behaviour: opt-in. Pass `rawSink` to `LlmClientOptions` (or the\n * matrix runner / BuilderSession sets it up automatically) and every\n * request, response, and error is recorded — including retries, with the\n * attempt index attached so a flaky call's full event chain is recoverable.\n *\n * Redaction is enforced at sink time. The default redactor strips\n * `Authorization`, `X-Api-Key`, `X-Auth-Token`, `Cookie` headers and any\n * payload field whose key matches `apiKey | api_key | bearer | password |\n * secret | token` (case-insensitive). Override via the sink constructor or\n * the per-call `redactor`. The `redactedFields` array on the persisted\n * event lets a reviewer see what was stripped without exposing the values.\n */\n\nimport { promises as fs } from 'node:fs'\nimport * as path from 'node:path'\n\nexport type RawProviderDirection = 'request' | 'response' | 'error'\n\nexport interface RawProviderEvent {\n /** Stable id. Generated by the sink if omitted. */\n eventId: string\n /** Trace context populated by `LlmClient` when the call is wrapped in a span. */\n runId?: string\n spanId?: string\n /**\n * Logical provider name. Free-form so callers can use whatever id matches\n * their topology (`'openai'`, `'anthropic'`, `'tangle-router'`, …). When\n * omitted, derived from `baseUrl` in `LlmClientOptions`.\n */\n provider: string\n model: string\n /** Endpoint path, e.g. `'/v1/chat/completions'`. */\n endpoint: string\n /** Base URL used for the call (already-normalised — no trailing slash). */\n baseUrl: string\n /** 0-indexed retry attempt. The first attempt is 0; a retried call gets 1, 2, … */\n attemptIndex: number\n direction: RawProviderDirection\n /** Unix ms. */\n timestamp: number\n /** Wall-clock duration of the call leg. Set on `response` and `error` events; null on `request`. */\n durationMs?: number\n statusCode?: number\n requestHeaders?: Record<string, string>\n requestBody?: unknown\n responseHeaders?: Record<string, string>\n responseBody?: unknown\n /** Set on `direction: 'error'` events. */\n errorMessage?: string\n /** Field paths the redactor stripped from this event ('header:Authorization', 'body.apiKey', …). */\n redactedFields: string[]\n}\n\nexport interface RawProviderSinkFilter {\n runId?: string\n spanId?: string\n direction?: RawProviderDirection\n attemptIndex?: number\n}\n\nexport interface RawProviderSink {\n record(event: RawProviderEvent): Promise<void>\n /** Optional listing — implementations that durably persist (file, db) should support this. */\n list?(filter?: RawProviderSinkFilter): Promise<RawProviderEvent[]>\n /** Optional teardown for backed implementations. */\n close?(): Promise<void>\n}\n\nexport type ProviderRedactor = (event: RawProviderEvent) => RawProviderEvent\n\nconst REDACTED_HEADER_NAMES = new Set([\n 'authorization',\n 'x-api-key',\n 'x-auth-token',\n 'cookie',\n 'set-cookie',\n 'proxy-authorization',\n])\n\nconst REDACTED_BODY_KEY = /^(api[_-]?key|bearer|password|secret|token|access[_-]?token|refresh[_-]?token)$/i\n\n/**\n * Default redactor — strips well-known auth headers and any body field whose\n * key matches the credential pattern. Records every redacted path on\n * `event.redactedFields` so a downstream reviewer can see what was removed.\n */\nexport function defaultProviderRedactor(event: RawProviderEvent): RawProviderEvent {\n const redactedFields: string[] = [...(event.redactedFields ?? [])]\n const requestHeaders = redactHeaders(event.requestHeaders, 'request', redactedFields)\n const responseHeaders = redactHeaders(event.responseHeaders, 'response', redactedFields)\n const requestBody = redactBody(event.requestBody, 'requestBody', redactedFields)\n const responseBody = redactBody(event.responseBody, 'responseBody', redactedFields)\n return { ...event, requestHeaders, responseHeaders, requestBody, responseBody, redactedFields }\n}\n\nfunction redactHeaders(\n headers: Record<string, string> | undefined,\n prefix: 'request' | 'response',\n redactedFields: string[],\n): Record<string, string> | undefined {\n if (!headers) return headers\n const out: Record<string, string> = {}\n for (const [k, v] of Object.entries(headers)) {\n if (REDACTED_HEADER_NAMES.has(k.toLowerCase())) {\n redactedFields.push(`${prefix}Headers.${k}`)\n continue\n }\n out[k] = v\n }\n return out\n}\n\nfunction redactBody(\n value: unknown,\n pathStr: string,\n redactedFields: string[],\n): unknown {\n if (value == null) return value\n if (Array.isArray(value)) return value.map((v, i) => redactBody(v, `${pathStr}[${i}]`, redactedFields))\n if (typeof value === 'object') {\n const out: Record<string, unknown> = {}\n for (const [k, v] of Object.entries(value as Record<string, unknown>)) {\n if (REDACTED_BODY_KEY.test(k)) {\n redactedFields.push(`${pathStr}.${k}`)\n continue\n }\n out[k] = redactBody(v, `${pathStr}.${k}`, redactedFields)\n }\n return out\n }\n return value\n}\n\n// ── In-memory ────────────────────────────────────────────────────────────\n\nexport interface InMemoryRawProviderSinkOptions {\n redactor?: ProviderRedactor\n}\n\nexport class InMemoryRawProviderSink implements RawProviderSink {\n private events: RawProviderEvent[] = []\n private redactor: ProviderRedactor\n\n constructor(opts: InMemoryRawProviderSinkOptions = {}) {\n this.redactor = opts.redactor ?? defaultProviderRedactor\n }\n\n async record(event: RawProviderEvent): Promise<void> {\n this.events.push(this.redactor({ ...event, redactedFields: event.redactedFields ?? [] }))\n }\n\n async list(filter: RawProviderSinkFilter = {}): Promise<RawProviderEvent[]> {\n return this.events.filter((e) =>\n (filter.runId === undefined || e.runId === filter.runId) &&\n (filter.spanId === undefined || e.spanId === filter.spanId) &&\n (filter.direction === undefined || e.direction === filter.direction) &&\n (filter.attemptIndex === undefined || e.attemptIndex === filter.attemptIndex),\n )\n }\n\n size(): number { return this.events.length }\n}\n\nexport class NoopRawProviderSink implements RawProviderSink {\n async record(): Promise<void> { /* no-op */ }\n /**\n * Returns an empty array. Implemented so `assertRunCaptured` does not\n * trip the `no_raw_sink` issue when a caller explicitly opts out of\n * capture by passing this sink — opt-out is a deliberate choice, not a\n * misconfiguration.\n */\n async list(): Promise<RawProviderEvent[]> { return [] }\n}\n\n// ── Filesystem (NDJSON) ──────────────────────────────────────────────────\n\nexport interface FileSystemRawProviderSinkOptions {\n /** Directory the NDJSON file is written into. Created if missing. */\n dir: string\n /** File name; default `'raw-provider-events.ndjson'`. */\n fileName?: string\n /** Bytes after which the writer rolls over to a new file (default 32 MiB). */\n rollAtBytes?: number\n redactor?: ProviderRedactor\n}\n\nexport class FileSystemRawProviderSink implements RawProviderSink {\n private dir: string\n private fileName: string\n private rollAtBytes: number\n private redactor: ProviderRedactor\n private bytesWritten = 0\n private rollIndex = 0\n private initPromise: Promise<void> | null = null\n\n constructor(opts: FileSystemRawProviderSinkOptions) {\n this.dir = opts.dir\n this.fileName = opts.fileName ?? 'raw-provider-events.ndjson'\n this.rollAtBytes = opts.rollAtBytes ?? 32 * 1024 * 1024\n this.redactor = opts.redactor ?? defaultProviderRedactor\n }\n\n private async ensureInit(): Promise<void> {\n if (!this.initPromise) {\n this.initPromise = fs.mkdir(this.dir, { recursive: true }).then(() => undefined)\n }\n await this.initPromise\n }\n\n private currentPath(): string {\n if (this.rollIndex === 0) return path.join(this.dir, this.fileName)\n return path.join(this.dir, `${this.fileName}.${this.rollIndex}`)\n }\n\n async record(event: RawProviderEvent): Promise<void> {\n await this.ensureInit()\n const redacted = this.redactor({ ...event, redactedFields: event.redactedFields ?? [] })\n const line = JSON.stringify(redacted) + '\\n'\n if (this.bytesWritten + line.length > this.rollAtBytes && this.bytesWritten > 0) {\n this.rollIndex += 1\n this.bytesWritten = 0\n }\n await fs.appendFile(this.currentPath(), line, 'utf8')\n this.bytesWritten += line.length\n }\n\n async list(filter: RawProviderSinkFilter = {}): Promise<RawProviderEvent[]> {\n await this.ensureInit()\n const out: RawProviderEvent[] = []\n for (let i = 0; i <= this.rollIndex; i++) {\n const file = i === 0\n ? path.join(this.dir, this.fileName)\n : path.join(this.dir, `${this.fileName}.${i}`)\n let body: string\n try {\n body = await fs.readFile(file, 'utf8')\n } catch (err) {\n if ((err as NodeJS.ErrnoException).code === 'ENOENT') continue\n throw err\n }\n for (const line of body.split('\\n')) {\n if (!line) continue\n const event = JSON.parse(line) as RawProviderEvent\n if (filter.runId !== undefined && event.runId !== filter.runId) continue\n if (filter.spanId !== undefined && event.spanId !== filter.spanId) continue\n if (filter.direction !== undefined && event.direction !== filter.direction) continue\n if (filter.attemptIndex !== undefined && event.attemptIndex !== filter.attemptIndex) continue\n out.push(event)\n }\n }\n return out\n }\n}\n\n// ── Helpers ──────────────────────────────────────────────────────────────\n\n/**\n * Best-effort provider id from a base URL. Falls back to the URL host when\n * none of the well-known patterns match.\n */\nexport function providerFromBaseUrl(baseUrl: string): string {\n const lower = baseUrl.toLowerCase()\n if (lower.includes('api.openai.com')) return 'openai'\n if (lower.includes('api.anthropic.com')) return 'anthropic'\n if (lower.includes('generativelanguage.googleapis.com')) return 'google'\n if (lower.includes('api.together.ai') || lower.includes('api.together.xyz')) return 'together'\n if (lower.includes('api.deepseek.com')) return 'deepseek'\n if (lower.includes('router.tangle.tools')) return 'tangle-router'\n if (lower.includes('api.litellm') || lower.includes('litellm')) return 'litellm'\n try {\n return new URL(baseUrl).host\n } catch {\n return baseUrl\n }\n}\n"],"mappings":";AA2BA,SAAS,YAAY,UAAU;AAC/B,YAAY,UAAU;AAwDtB,IAAM,wBAAwB,oBAAI,IAAI;AAAA,EACpC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAED,IAAM,oBAAoB;AAOnB,SAAS,wBAAwB,OAA2C;AACjF,QAAM,iBAA2B,CAAC,GAAI,MAAM,kBAAkB,CAAC,CAAE;AACjE,QAAM,iBAAiB,cAAc,MAAM,gBAAgB,WAAW,cAAc;AACpF,QAAM,kBAAkB,cAAc,MAAM,iBAAiB,YAAY,cAAc;AACvF,QAAM,cAAc,WAAW,MAAM,aAAa,eAAe,cAAc;AAC/E,QAAM,eAAe,WAAW,MAAM,cAAc,gBAAgB,cAAc;AAClF,SAAO,EAAE,GAAG,OAAO,gBAAgB,iBAAiB,aAAa,cAAc,eAAe;AAChG;AAEA,SAAS,cACP,SACA,QACA,gBACoC;AACpC,MAAI,CAAC,QAAS,QAAO;AACrB,QAAM,MAA8B,CAAC;AACrC,aAAW,CAAC,GAAG,CAAC,KAAK,OAAO,QAAQ,OAAO,GAAG;AAC5C,QAAI,sBAAsB,IAAI,EAAE,YAAY,CAAC,GAAG;AAC9C,qBAAe,KAAK,GAAG,MAAM,WAAW,CAAC,EAAE;AAC3C;AAAA,IACF;AACA,QAAI,CAAC,IAAI;AAAA,EACX;AACA,SAAO;AACT;AAEA,SAAS,WACP,OACA,SACA,gBACS;AACT,MAAI,SAAS,KAAM,QAAO;AAC1B,MAAI,MAAM,QAAQ,KAAK,EAAG,QAAO,MAAM,IAAI,CAAC,GAAG,MAAM,WAAW,GAAG,GAAG,OAAO,IAAI,CAAC,KAAK,cAAc,CAAC;AACtG,MAAI,OAAO,UAAU,UAAU;AAC7B,UAAM,MAA+B,CAAC;AACtC,eAAW,CAAC,GAAG,CAAC,KAAK,OAAO,QAAQ,KAAgC,GAAG;AACrE,UAAI,kBAAkB,KAAK,CAAC,GAAG;AAC7B,uBAAe,KAAK,GAAG,OAAO,IAAI,CAAC,EAAE;AACrC;AAAA,MACF;AACA,UAAI,CAAC,IAAI,WAAW,GAAG,GAAG,OAAO,IAAI,CAAC,IAAI,cAAc;AAAA,IAC1D;AACA,WAAO;AAAA,EACT;AACA,SAAO;AACT;AAQO,IAAM,0BAAN,MAAyD;AAAA,EACtD,SAA6B,CAAC;AAAA,EAC9B;AAAA,EAER,YAAY,OAAuC,CAAC,GAAG;AACrD,SAAK,WAAW,KAAK,YAAY;AAAA,EACnC;AAAA,EAEA,MAAM,OAAO,OAAwC;AACnD,SAAK,OAAO,KAAK,KAAK,SAAS,EAAE,GAAG,OAAO,gBAAgB,MAAM,kBAAkB,CAAC,EAAE,CAAC,CAAC;AAAA,EAC1F;AAAA,EAEA,MAAM,KAAK,SAAgC,CAAC,GAAgC;AAC1E,WAAO,KAAK,OAAO;AAAA,MAAO,CAAC,OACxB,OAAO,UAAU,UAAa,EAAE,UAAU,OAAO,WACjD,OAAO,WAAW,UAAa,EAAE,WAAW,OAAO,YACnD,OAAO,cAAc,UAAa,EAAE,cAAc,OAAO,eACzD,OAAO,iBAAiB,UAAa,EAAE,iBAAiB,OAAO;AAAA,IAClE;AAAA,EACF;AAAA,EAEA,OAAe;AAAE,WAAO,KAAK,OAAO;AAAA,EAAO;AAC7C;AAEO,IAAM,sBAAN,MAAqD;AAAA,EAC1D,MAAM,SAAwB;AAAA,EAAc;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAO5C,MAAM,OAAoC;AAAE,WAAO,CAAC;AAAA,EAAE;AACxD;AAcO,IAAM,4BAAN,MAA2D;AAAA,EACxD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AAAA,EACf,YAAY;AAAA,EACZ,cAAoC;AAAA,EAE5C,YAAY,MAAwC;AAClD,SAAK,MAAM,KAAK;AAChB,SAAK,WAAW,KAAK,YAAY;AACjC,SAAK,cAAc,KAAK,eAAe,KAAK,OAAO;AACnD,SAAK,WAAW,KAAK,YAAY;AAAA,EACnC;AAAA,EAEA,MAAc,aAA4B;AACxC,QAAI,CAAC,KAAK,aAAa;AACrB,WAAK,cAAc,GAAG,MAAM,KAAK,KAAK,EAAE,WAAW,KAAK,CAAC,EAAE,KAAK,MAAM,MAAS;AAAA,IACjF;AACA,UAAM,KAAK;AAAA,EACb;AAAA,EAEQ,cAAsB;AAC5B,QAAI,KAAK,cAAc,EAAG,QAAY,UAAK,KAAK,KAAK,KAAK,QAAQ;AAClE,WAAY,UAAK,KAAK,KAAK,GAAG,KAAK,QAAQ,IAAI,KAAK,SAAS,EAAE;AAAA,EACjE;AAAA,EAEA,MAAM,OAAO,OAAwC;AACnD,UAAM,KAAK,WAAW;AACtB,UAAM,WAAW,KAAK,SAAS,EAAE,GAAG,OAAO,gBAAgB,MAAM,kBAAkB,CAAC,EAAE,CAAC;AACvF,UAAM,OAAO,KAAK,UAAU,QAAQ,IAAI;AACxC,QAAI,KAAK,eAAe,KAAK,SAAS,KAAK,eAAe,KAAK,eAAe,GAAG;AAC/E,WAAK,aAAa;AAClB,WAAK,eAAe;AAAA,IACtB;AACA,UAAM,GAAG,WAAW,KAAK,YAAY,GAAG,MAAM,MAAM;AACpD,SAAK,gBAAgB,KAAK;AAAA,EAC5B;AAAA,EAEA,MAAM,KAAK,SAAgC,CAAC,GAAgC;AAC1E,UAAM,KAAK,WAAW;AACtB,UAAM,MAA0B,CAAC;AACjC,aAAS,IAAI,GAAG,KAAK,KAAK,WAAW,KAAK;AACxC,YAAM,OAAO,MAAM,IACV,UAAK,KAAK,KAAK,KAAK,QAAQ,IAC5B,UAAK,KAAK,KAAK,GAAG,KAAK,QAAQ,IAAI,CAAC,EAAE;AAC/C,UAAI;AACJ,UAAI;AACF,eAAO,MAAM,GAAG,SAAS,MAAM,MAAM;AAAA,MACvC,SAAS,KAAK;AACZ,YAAK,IAA8B,SAAS,SAAU;AACtD,cAAM;AAAA,MACR;AACA,iBAAW,QAAQ,KAAK,MAAM,IAAI,GAAG;AACnC,YAAI,CAAC,KAAM;AACX,cAAM,QAAQ,KAAK,MAAM,IAAI;AAC7B,YAAI,OAAO,UAAU,UAAa,MAAM,UAAU,OAAO,MAAO;AAChE,YAAI,OAAO,WAAW,UAAa,MAAM,WAAW,OAAO,OAAQ;AACnE,YAAI,OAAO,cAAc,UAAa,MAAM,cAAc,OAAO,UAAW;AAC5E,YAAI,OAAO,iBAAiB,UAAa,MAAM,iBAAiB,OAAO,aAAc;AACrF,YAAI,KAAK,KAAK;AAAA,MAChB;AAAA,IACF;AACA,WAAO;AAAA,EACT;AACF;AAQO,SAAS,oBAAoB,SAAyB;AAC3D,QAAM,QAAQ,QAAQ,YAAY;AAClC,MAAI,MAAM,SAAS,gBAAgB,EAAG,QAAO;AAC7C,MAAI,MAAM,SAAS,mBAAmB,EAAG,QAAO;AAChD,MAAI,MAAM,SAAS,mCAAmC,EAAG,QAAO;AAChE,MAAI,MAAM,SAAS,iBAAiB,KAAK,MAAM,SAAS,kBAAkB,EAAG,QAAO;AACpF,MAAI,MAAM,SAAS,kBAAkB,EAAG,QAAO;AAC/C,MAAI,MAAM,SAAS,qBAAqB,EAAG,QAAO;AAClD,MAAI,MAAM,SAAS,aAAa,KAAK,MAAM,SAAS,SAAS,EAAG,QAAO;AACvE,MAAI;AACF,WAAO,IAAI,IAAI,OAAO,EAAE;AAAA,EAC1B,QAAQ;AACN,WAAO;AAAA,EACT;AACF;","names":[]}
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  validateRunRecord
3
- } from "./chunk-YUFXO3TU.js";
3
+ } from "./chunk-QBW3YBTR.js";
4
4
  import {
5
5
  TraceEmitter
6
6
  } from "./chunk-5IIQKMD5.js";
@@ -1307,4 +1307,4 @@ export {
1307
1307
  runProposeReviewAsControlLoop,
1308
1308
  controlFailureClassFromVerification
1309
1309
  };
1310
- //# sourceMappingURL=chunk-ARZ6BEV6.js.map
1310
+ //# sourceMappingURL=chunk-V5QSWN7L.js.map
@@ -1,10 +1,10 @@
1
1
  import {
2
2
  validateRunRecord
3
- } from "./chunk-YUFXO3TU.js";
3
+ } from "./chunk-QBW3YBTR.js";
4
4
  import {
5
5
  pairedBootstrap,
6
6
  pairedWilcoxon
7
- } from "./chunk-KRR4VMH7.js";
7
+ } from "./chunk-IOXMGMHQ.js";
8
8
 
9
9
  // src/feedback-trajectory.ts
10
10
  var DEFAULT_SPLIT_POLICY = {
@@ -1351,4 +1351,4 @@ export {
1351
1351
  buildReflectionPrompt,
1352
1352
  parseReflectionResponse
1353
1353
  };
1354
- //# sourceMappingURL=chunk-HRZELXCR.js.map
1354
+ //# sourceMappingURL=chunk-VQQSPGSM.js.map
package/dist/cli.js CHANGED
@@ -5,9 +5,9 @@ import {
5
5
  runRpcBatch,
6
6
  runRpcOnce,
7
7
  startServer
8
- } from "./chunk-WOPGKVN4.js";
9
- import "./chunk-3GN6U53I.js";
10
- import "./chunk-SNUHRBDL.js";
8
+ } from "./chunk-6KQG5HAH.js";
9
+ import "./chunk-KAO3Q65R.js";
10
+ import "./chunk-SQQLHODJ.js";
11
11
  import "./chunk-PZ5AY32C.js";
12
12
 
13
13
  // src/cli.ts
@@ -1,5 +1,5 @@
1
- import { c as ControlEvalResult, i as ControlRunResult, F as FeedbackLabel, A as ProposedSideEffect, j as ControlRuntimeConfig } from './feedback-trajectory-CB0A32o3.js';
2
- import { R as RunSplitTag, e as RunTokenUsage, a as RunRecord } from './run-record-CX_jcAyr.js';
1
+ import { G as ControlEvalResult, N as ControlRunResult, b as FeedbackLabel, p as ProposedSideEffect, O as ControlRuntimeConfig } from './feedback-trajectory-c43WGtTX.js';
2
+ import { a as RunSplitTag, b as RunTokenUsage, R as RunRecord } from './run-record-DNiOMBrZ.js';
3
3
  import { T as TraceStore, F as FailureClass } from './store-u47QaJ9G.js';
4
4
  import { T as TraceEmitter } from './emitter-B2XqDKFU.js';
5
5
 
package/dist/control.d.ts CHANGED
@@ -1,6 +1,6 @@
1
- export { d as ControlActionFailureMode, e as ControlActionOutcome, f as ControlBudget, g as ControlContext, h as ControlDecision, c as ControlEvalResult, i as ControlRunResult, j as ControlRuntimeConfig, k as ControlRuntimeError, C as ControlSeverity, l as ControlStep, m as ControlStopPolicies, S as StopDecision, B as allCriticalPassed, M as objectiveEval, T as runAgentControlLoop, V as stopOnNoProgress, W as stopOnRepeatedAction, X as subjectiveEval } from './feedback-trajectory-CB0A32o3.js';
2
- export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, p as RunEvidenceMetadata, s as controlRunToRunRecord, u as evaluateActionPolicy, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-cxwMOAsy.js';
1
+ export { H as ControlActionFailureMode, J as ControlActionOutcome, K as ControlBudget, L as ControlContext, M as ControlDecision, G as ControlEvalResult, N as ControlRunResult, O as ControlRuntimeConfig, Q as ControlRuntimeError, E as ControlSeverity, R as ControlStep, S as ControlStopPolicies, T as StopDecision, U as allCriticalPassed, V as objectiveEval, W as runAgentControlLoop, X as stopOnNoProgress, Y as stopOnRepeatedAction, Z as subjectiveEval } from './feedback-trajectory-c43WGtTX.js';
2
+ export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, p as RunEvidenceMetadata, s as controlRunToRunRecord, u as evaluateActionPolicy, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-DvkH87qJ.js';
3
3
  import './dataset-B9qvlm_o.js';
4
4
  import './emitter-B2XqDKFU.js';
5
5
  import './store-u47QaJ9G.js';
6
- import './run-record-CX_jcAyr.js';
6
+ import './run-record-DNiOMBrZ.js';
package/dist/control.js CHANGED
@@ -10,8 +10,8 @@ import {
10
10
  stopOnNoProgress,
11
11
  stopOnRepeatedAction,
12
12
  subjectiveEval
13
- } from "./chunk-ARZ6BEV6.js";
14
- import "./chunk-YUFXO3TU.js";
13
+ } from "./chunk-V5QSWN7L.js";
14
+ import "./chunk-QBW3YBTR.js";
15
15
  import "./chunk-5IIQKMD5.js";
16
16
  import "./chunk-PZ5AY32C.js";
17
17
  export {