@tangle-network/agent-eval 0.27.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/CHANGELOG.md +72 -0
  2. package/README.md +4 -5
  3. package/dist/{baseline-4R5deP0N.d.ts → baseline-BwdCXUS8.d.ts} +1 -1
  4. package/dist/builder-eval/index.d.ts +3 -3
  5. package/dist/builder-eval/index.js +1 -1
  6. package/dist/{chunk-WWYCWKUM.js → chunk-3CKU6VGU.js} +2 -2
  7. package/dist/{chunk-2A5XJB43.js → chunk-5AKPEK5L.js} +3 -3
  8. package/dist/chunk-5AKPEK5L.js.map +1 -0
  9. package/dist/{chunk-RAF443UI.js → chunk-DBIGN5MJ.js} +2 -2
  10. package/dist/{chunk-JLZQWFV3.js → chunk-K33INZHH.js} +2 -2
  11. package/dist/chunk-K33INZHH.js.map +1 -0
  12. package/dist/{chunk-NU65VQ7M.js → chunk-MAZ26DC7.js} +1 -1
  13. package/dist/chunk-MAZ26DC7.js.map +1 -0
  14. package/dist/{chunk-LSH4MMOZ.js → chunk-NCRFYPS3.js} +1 -1
  15. package/dist/chunk-NCRFYPS3.js.map +1 -0
  16. package/dist/{chunk-ZN274SWR.js → chunk-PALJO75S.js} +2 -2
  17. package/dist/{chunk-OWLAAMME.js → chunk-QHF6EQKK.js} +3 -2
  18. package/dist/chunk-QHF6EQKK.js.map +1 -0
  19. package/dist/chunk-R5UQJNKC.js +722 -0
  20. package/dist/chunk-R5UQJNKC.js.map +1 -0
  21. package/dist/{chunk-SESZDQPX.js → chunk-RUI6SIHY.js} +3 -3
  22. package/dist/chunk-RUI6SIHY.js.map +1 -0
  23. package/dist/{chunk-WHZMVFUV.js → chunk-SZSBQUIJ.js} +2 -2
  24. package/dist/chunk-SZSBQUIJ.js.map +1 -0
  25. package/dist/chunk-UW4NOOZI.js +1561 -0
  26. package/dist/chunk-UW4NOOZI.js.map +1 -0
  27. package/dist/{chunk-4F5DQN55.js → chunk-VSMTAMNK.js} +1 -1
  28. package/dist/chunk-VSMTAMNK.js.map +1 -0
  29. package/dist/{chunk-5LBB5B3Z.js → chunk-XFZCM5Z3.js} +1 -1
  30. package/dist/chunk-XFZCM5Z3.js.map +1 -0
  31. package/dist/cli.js +1 -1
  32. package/dist/{control-CBShYYA6.d.ts → control-rJhEDdpy.d.ts} +4 -4
  33. package/dist/{control-runtime-BuJHoLg0.d.ts → control-runtime-BRdQ0wrx.d.ts} +3 -2
  34. package/dist/control.d.ts +5 -5
  35. package/dist/control.js +2 -2
  36. package/dist/{emitter-DP_cSSiw.d.ts → emitter-BqjeOvJh.d.ts} +1 -1
  37. package/dist/{failure-cluster-C2EGSDiT.d.ts → failure-cluster-D1NZKqYu.d.ts} +2 -3
  38. package/dist/{feedback-trajectory-DfFdrraJ.d.ts → feedback-trajectory-j0nJFgC6.d.ts} +1 -1
  39. package/dist/governance/index.d.ts +2 -2
  40. package/dist/{index-D3iBCjdF.d.ts → index-Cgt3DKXr.d.ts} +2 -2
  41. package/dist/index.d.ts +1279 -468
  42. package/dist/index.js +1992 -1259
  43. package/dist/index.js.map +1 -1
  44. package/dist/{integrity-DK2EBVZC.d.ts → integrity-BAxLGJ9I.d.ts} +2 -2
  45. package/dist/knowledge/index.d.ts +3 -3
  46. package/dist/knowledge/index.js +2 -2
  47. package/dist/meta-eval/index.d.ts +1 -1
  48. package/dist/{multi-layer-verifier-LkP3LVKj.d.ts → multi-layer-verifier-BNi4-8lR.d.ts} +2 -2
  49. package/dist/openapi.json +1 -1
  50. package/dist/optimization.d.ts +8 -8
  51. package/dist/optimization.js +5 -5
  52. package/dist/pipelines/index.d.ts +6 -6
  53. package/dist/pipelines/index.js +2 -2
  54. package/dist/prm/index.d.ts +4 -4
  55. package/dist/{query-DODUYdPg.d.ts → query-BFDT0kX_.d.ts} +1 -1
  56. package/dist/{release-report-wfUySN5F.d.ts → release-report-PWhGlpfO.d.ts} +1 -1
  57. package/dist/replay-BX5Fm8en.d.ts +529 -0
  58. package/dist/reporting.d.ts +5 -5
  59. package/dist/reporting.js +5 -5
  60. package/dist/{researcher-bGkI7vCl.d.ts → researcher-ClDX3KZx.d.ts} +13 -14
  61. package/dist/rl.d.ts +29 -47
  62. package/dist/rl.js +5 -5
  63. package/dist/rl.js.map +1 -1
  64. package/dist/{rubric-D5tjHNJQ.d.ts → rubric-DgSqjqqj.d.ts} +2 -2
  65. package/dist/{sequential-Dgz1n51-.d.ts → sequential-5iSVfzl2.d.ts} +2 -2
  66. package/dist/{store-Db2Bv8Cf.d.ts → store-BP5be6s7.d.ts} +1 -1
  67. package/dist/{summary-report-DZVXOCK_.d.ts → summary-report-jrSGb2xZ.d.ts} +5 -5
  68. package/dist/{test-graded-scenario-B2kWEdh9.d.ts → test-graded-scenario-BJ54PDan.d.ts} +2 -2
  69. package/dist/traces.d.ts +9 -311
  70. package/dist/traces.js +16 -987
  71. package/dist/traces.js.map +1 -1
  72. package/dist/{trajectory-CnoBo-JY.d.ts → trajectory-BFmveYZt.d.ts} +1 -1
  73. package/dist/wire/index.d.ts +4 -4
  74. package/dist/wire/index.js +1 -1
  75. package/docs/research-report-methodology.md +4 -4
  76. package/docs/three-package-architecture.md +12 -24
  77. package/package.json +1 -1
  78. package/dist/chunk-2A5XJB43.js.map +0 -1
  79. package/dist/chunk-4F5DQN55.js.map +0 -1
  80. package/dist/chunk-5LBB5B3Z.js.map +0 -1
  81. package/dist/chunk-I4MBDTY5.js +0 -272
  82. package/dist/chunk-I4MBDTY5.js.map +0 -1
  83. package/dist/chunk-JLZQWFV3.js.map +0 -1
  84. package/dist/chunk-K2TPS5LB.js +0 -569
  85. package/dist/chunk-K2TPS5LB.js.map +0 -1
  86. package/dist/chunk-LSH4MMOZ.js.map +0 -1
  87. package/dist/chunk-NU65VQ7M.js.map +0 -1
  88. package/dist/chunk-OWLAAMME.js.map +0 -1
  89. package/dist/chunk-SESZDQPX.js.map +0 -1
  90. package/dist/chunk-WHZMVFUV.js.map +0 -1
  91. package/dist/replay-BL96gCEP.d.ts +0 -226
  92. /package/dist/{chunk-WWYCWKUM.js.map → chunk-3CKU6VGU.js.map} +0 -0
  93. /package/dist/{chunk-RAF443UI.js.map → chunk-DBIGN5MJ.js.map} +0 -0
  94. /package/dist/{chunk-ZN274SWR.js.map → chunk-PALJO75S.js.map} +0 -0
@@ -0,0 +1,1561 @@
1
+ import {
2
+ canonicalize,
3
+ hashJson
4
+ } from "./chunk-VSMTAMNK.js";
5
+ import {
6
+ NotFoundError,
7
+ ReplayError
8
+ } from "./chunk-NG236HPC.js";
9
+
10
+ // src/trace-analyst/prompts.ts
11
+ var TRACE_ANALYST_ACTOR_DESCRIPTION = `You answer questions about an OTLP-shaped JSONL trace dataset using the trace tools provided in the \`traces\` namespace.
12
+
13
+ DISCOVERY \u2192 NARROW \u2192 DEEP-READ protocol \u2014 follow exactly:
14
+
15
+ 1. ALWAYS call \`traces.getDatasetOverview({})\` FIRST without a regex_pattern. The result tells you total_traces, raw_jsonl_bytes, services, agents, models, and sample_trace_ids (real ids \u2014 never fabricate one).
16
+
17
+ 2. Use raw_jsonl_bytes to gauge how expensive raw scans will be. \`filters.regex_pattern\` is the one scan-heavy filter on getDatasetOverview / queryTraces / countTraces \u2014 narrow with indexed fields (has_errors, model_names, service_names, agent_names, time bounds) BEFORE adding a regex on a large dataset.
18
+
19
+ 3. To list more traces than the sample, call \`traces.queryTraces({ filters?, limit, offset? })\`. Each summary carries raw_jsonl_bytes \u2014 use it to choose between viewTrace and searchTrace BEFORE calling either.
20
+
21
+ 4. Per-trace inspection:
22
+ - SMALL trace (raw_jsonl_bytes well under 150_000): call \`traces.viewTrace({ trace_id })\`. Returns all spans. Per-attribute payloads are head-capped at ~4KB; large \`input.value\` / \`output.value\` / \`llm.input_messages\` will show a \`[trace-analyst truncated: N bytes]\` marker.
23
+ - LARGE trace (raw_jsonl_bytes near or above 150_000, or you saw an \`oversized\` response): use \`traces.searchTrace({ trace_id, regex_pattern })\` to get bounded SpanMatchRecords (span metadata + matched text + surrounding context). Then call \`traces.viewSpans({ trace_id, span_ids: [...] })\` for surgical reads (~16KB cap, 4\xD7 higher than discovery), or \`traces.searchSpan({ trace_id, span_id, regex_pattern })\` for one large span. Stays bounded regardless of trace size.
24
+ - Useful regex patterns: \`STATUS_CODE_ERROR\` (failures), tool names like \`grep\` or \`view_trace\`, error strings like \`MaxTurnsExceeded\`, model names, attribute keys.
25
+
26
+ 5. ONLY call viewTrace / viewSpans / searchTrace / searchSpan with trace/span ids you have already seen in sample_trace_ids, a queryTraces page, or a previous search result. Never invent ids.
27
+
28
+ 5a. **Result-shape contract** \u2014 searchTrace and searchSpan return \`{ trace_id, hits, total_matches, has_more }\`. Iterate \`result.hits\` (NOT result.matches). Each hit has \`{ span_id, span_name, span_kind, attribute_path, matched_text, context_before, context_after, match_offset }\`. viewTrace returns \`{ trace_id, spans }\` (or \`oversized\`). viewSpans returns \`{ trace_id, spans, missing_span_ids, truncated_attribute_count }\`. Never assume a field name \u2014 log the result shape first if unsure.
29
+
30
+ 6. If viewTrace returns an \`oversized\` summary instead of \`spans\`, DO NOT retry the same call. Read the summary's top_span_names, span_count, span_response_bytes_max, error_span_count to plan a follow-up: switch to searchTrace (or searchSpan for one large span), then viewSpans on a smaller, surgical span_ids set.
31
+
32
+ 7. If searchTrace or searchSpan returns has_more=true, REFINE the regex to be more specific rather than blindly raising max_matches.
33
+
34
+ 8. If a tool errors (invalid regex, range error), STOP and reconsider \u2014 don't retry with a guessed id or argument. Use the discovery tools above to recover.
35
+
36
+ 9. If a ~4KB-truncated payload from viewTrace / searchTrace matters for your answer, first try viewSpans on that span id (~16KB cap). If a 16KB-truncated payload from viewSpans still matters, narrow further with searchSpan against a more specific regex rather than asking for the full payload again.
37
+
38
+ 10. If maxDepth > 0 and the question splits into independent semantic branches, delegate well-defined subtasks to subagents using \`await llmQuery(...)\`. Pass narrow context and a focused query. Examples:
39
+
40
+ const reviews = await llmQuery([
41
+ { query: 'Drill into trace abc123 \u2014 what tool calls preceded the failure?', context: { trace_id: 'abc123' } },
42
+ { query: 'Drill into trace def456 \u2014 same failure mode?', context: { trace_id: 'def456' } },
43
+ ]);
44
+
45
+ OBSERVABILITY rules:
46
+ - Each non-final actor turn must emit at least one \`console.log(...)\` for evidence. Up to 3 logs per turn is fine when correlating multiple data sources (e.g. one log for findings list, one for source-file content, one for derived analysis).
47
+ - Do NOT combine \`console.log\` with \`final(...)\` or \`askClarification(...)\` in the same turn \u2014 finish gathering data first, then call final on its own turn.
48
+ - Reuse runtime variables across turns; don't recompute.
49
+ - When done, call \`await final(answer)\` with the fully-formed report. The responder rewrites the answer into output fields; if you only pass a vague summary string the responder has nothing concrete to format.
50
+
51
+ CRITICAL \u2014 \`final()\` payload contract for evidence-grounded analysis tasks:
52
+ - Pass a STRUCTURED object as the second arg with the actual data the responder needs to format the answer. Do NOT pass abstract instructions; pass evidence.
53
+ - Example for per-item verdict tasks:
54
+ \`\`\`js
55
+ await final("Format the per-item verdict report from the evidence below.", {
56
+ findings: [
57
+ { id: 'sub-1-finding-1', claim: '...', verdict: 'TRUE-POSITIVE', evidence: 'lines 42-45 of contracts/X.sol show ...' },
58
+ ...all items
59
+ ],
60
+ systemic_summary: '3 sentences I wrote based on the evidence above'
61
+ });
62
+ \`\`\`
63
+ - Calling \`final("answer", {})\` with no evidence is a failure mode \u2014 the responder will hallucinate or echo back the field names. Always include the gathered data.
64
+ - Premature final after a single viewSpans call is INSUFFICIENT for per-finding analysis tasks. Read the requested attributes (e.g. \`spans[i].attributes['redteam.finding.title']\`), and for each one perform the requested cross-reference (e.g. read the source SPAN's \`attributes['source.content']\`).
65
+
66
+ OUTPUT contract \u2014 your final answer must include:
67
+ - A clear prose conclusion answering the user's question.
68
+ - Trace ids and span ids cited as evidence for each claim.
69
+ - Failure modes named in the user's domain language, with frequency and concrete examples.
70
+
71
+ Do NOT invent trace ids, span ids, error messages, or model names. Every fact must be traceable to a tool result.`;
72
+ var TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION = "trace-analyst-actor-v5-2026-05-06";
73
+ var TRACE_ANALYST_SUBAGENT_DESCRIPTION = `You are a trace-analyst subagent. Your parent has delegated a focused trace-inspection question. Use the same DISCOVERY \u2192 NARROW \u2192 DEEP-READ protocol but stay tightly scoped: do exactly what was asked, return a concise compact answer, do NOT spawn further subagents unless the parent's question is genuinely multi-branch.
74
+
75
+ Cite trace ids and span ids for every claim. Do NOT invent ids.`;
76
+
77
+ // src/trace-analyst/types.ts
78
+ var DEFAULT_TRACE_ANALYST_BUDGETS = {
79
+ perCallByteCeiling: 15e4,
80
+ perAttributeViewBudget: 4096,
81
+ perAttributeSpanBudget: 16384,
82
+ perMatchTextBudget: 1024
83
+ };
84
+ var TRACE_ANALYST_TRUNCATION_MARKER_PREFIX = "[trace-analyst truncated:";
85
+
86
+ // src/trace-analyst/store-otlp.ts
87
+ import { readFile, stat } from "fs/promises";
88
+
89
+ // src/trace-analyst/store.ts
90
+ function compileSearchRegex(pattern) {
91
+ let source = pattern;
92
+ let flags = "m";
93
+ if (source.startsWith("(?i)")) {
94
+ source = source.slice(4);
95
+ flags += "i";
96
+ }
97
+ return new RegExp(source, flags);
98
+ }
99
+ function truncateForBudget(value, byteCap) {
100
+ const original = Buffer.byteLength(value, "utf8");
101
+ if (original <= byteCap) return value;
102
+ const ratio = byteCap / original;
103
+ let cut = Math.max(0, Math.floor(value.length * ratio));
104
+ while (cut > 0 && Buffer.byteLength(value.slice(0, cut), "utf8") > byteCap) {
105
+ cut -= 1;
106
+ }
107
+ return `${value.slice(0, cut)}
108
+ [trace-analyst truncated: original ${original} bytes]`;
109
+ }
110
+
111
+ // src/trace-analyst/store-otlp.ts
112
+ var OtlpFileTraceStore = class {
113
+ path;
114
+ perAttributeViewBudget;
115
+ perAttributeSpanBudget;
116
+ perCallByteCeiling;
117
+ perMatchTextBudget;
118
+ indexPromise;
119
+ /** Cached UTF-8 buffer of the file. We pin it once because every
120
+ * read needs slice access and re-reading on each call balloons the
121
+ * syscall count. */
122
+ bufferPromise;
123
+ constructor(opts) {
124
+ this.path = opts.path;
125
+ this.perAttributeViewBudget = opts.perAttributeViewBudget ?? DEFAULT_TRACE_ANALYST_BUDGETS.perAttributeViewBudget;
126
+ this.perAttributeSpanBudget = opts.perAttributeSpanBudget ?? DEFAULT_TRACE_ANALYST_BUDGETS.perAttributeSpanBudget;
127
+ this.perCallByteCeiling = opts.perCallByteCeiling ?? DEFAULT_TRACE_ANALYST_BUDGETS.perCallByteCeiling;
128
+ this.perMatchTextBudget = opts.perMatchTextBudget ?? DEFAULT_TRACE_ANALYST_BUDGETS.perMatchTextBudget;
129
+ }
130
+ // ─── Public API ────────────────────────────────────────────────────
131
+ async getOverview(filters) {
132
+ const idx = await this.index();
133
+ const matched = await this.matchedTraces(idx, filters);
134
+ const services = /* @__PURE__ */ new Set();
135
+ const agents = /* @__PURE__ */ new Set();
136
+ const models = /* @__PURE__ */ new Set();
137
+ const tools = /* @__PURE__ */ new Set();
138
+ let rawBytes = 0;
139
+ let earliest = null;
140
+ let latest = null;
141
+ let errorTraceCount = 0;
142
+ let errorSpanCount = 0;
143
+ for (const t of matched) {
144
+ if (t.service_name) services.add(t.service_name);
145
+ if (t.agent_name) agents.add(t.agent_name);
146
+ for (const m of t.models) models.add(m);
147
+ for (const tn of t.tools) tools.add(tn);
148
+ rawBytes += t.raw_jsonl_bytes;
149
+ if (!earliest || t.start_time < earliest) earliest = t.start_time;
150
+ if (!latest || t.end_time > latest) latest = t.end_time;
151
+ if (t.has_errors) {
152
+ errorTraceCount += 1;
153
+ for (const s of t.spans) if (s.status === "ERROR") errorSpanCount += 1;
154
+ }
155
+ }
156
+ const sample_trace_ids = matched.slice(0, 20).map((t) => t.trace_id);
157
+ return {
158
+ total_traces: matched.length,
159
+ raw_jsonl_bytes: rawBytes,
160
+ services: [...services].sort(),
161
+ agents: [...agents].sort(),
162
+ models: [...models].sort(),
163
+ tool_names: [...tools].sort(),
164
+ sample_trace_ids,
165
+ errors: { trace_count: errorTraceCount, span_count: errorSpanCount },
166
+ time_range: earliest && latest ? { earliest, latest } : null
167
+ };
168
+ }
169
+ async queryTraces(opts) {
170
+ if (!Number.isInteger(opts.limit) || opts.limit < 1 || opts.limit > 200) {
171
+ throw new RangeError(`queryTraces.limit must be 1..200, got ${opts.limit}`);
172
+ }
173
+ const offset = opts.offset ?? 0;
174
+ if (!Number.isInteger(offset) || offset < 0) {
175
+ throw new RangeError(`queryTraces.offset must be >=0, got ${offset}`);
176
+ }
177
+ const idx = await this.index();
178
+ const matched = await this.matchedTraces(idx, opts.filters);
179
+ const slice = matched.slice(offset, offset + opts.limit);
180
+ return {
181
+ traces: slice.map((t) => this.toSummary(t)),
182
+ total: matched.length,
183
+ has_more: offset + slice.length < matched.length
184
+ };
185
+ }
186
+ async countTraces(filters) {
187
+ const idx = await this.index();
188
+ const matched = await this.matchedTraces(idx, filters);
189
+ return matched.length;
190
+ }
191
+ async viewTrace(opts) {
192
+ const idx = await this.index();
193
+ const trace = idx.byTrace.get(opts.trace_id);
194
+ if (!trace) {
195
+ throw new TraceNotFoundError(opts.trace_id);
196
+ }
197
+ const cap = opts.per_attribute_byte_cap ?? this.perAttributeViewBudget;
198
+ const buf = await this.buffer();
199
+ const spans = [];
200
+ let runningBytes = 0;
201
+ let span_response_bytes_max = 0;
202
+ for (const s of trace.spans) {
203
+ const projected = await this.projectSpan(buf, trace.trace_id, s, cap);
204
+ const bytes = Buffer.byteLength(JSON.stringify(projected), "utf8");
205
+ span_response_bytes_max = Math.max(span_response_bytes_max, bytes);
206
+ runningBytes += bytes;
207
+ if (runningBytes > this.perCallByteCeiling) {
208
+ return {
209
+ trace_id: trace.trace_id,
210
+ oversized: this.buildOversizedSummary(trace, span_response_bytes_max)
211
+ };
212
+ }
213
+ spans.push(projected);
214
+ }
215
+ return { trace_id: trace.trace_id, spans };
216
+ }
217
+ async viewSpans(opts) {
218
+ const idx = await this.index();
219
+ const trace = idx.byTrace.get(opts.trace_id);
220
+ if (!trace) throw new TraceNotFoundError(opts.trace_id);
221
+ if (opts.span_ids.length === 0) {
222
+ return {
223
+ trace_id: trace.trace_id,
224
+ spans: [],
225
+ missing_span_ids: [],
226
+ truncated_attribute_count: 0
227
+ };
228
+ }
229
+ if (opts.span_ids.length > 100) {
230
+ throw new RangeError(`viewSpans.span_ids cap is 100, got ${opts.span_ids.length}`);
231
+ }
232
+ const cap = opts.per_attribute_byte_cap ?? this.perAttributeSpanBudget;
233
+ const wantSet = new Set(opts.span_ids);
234
+ const found = trace.spans.filter((s) => wantSet.has(s.span_id));
235
+ const missing = opts.span_ids.filter((id) => !found.some((f2) => f2.span_id === id));
236
+ const buf = await this.buffer();
237
+ const spans = [];
238
+ let truncated = 0;
239
+ let runningBytes = 0;
240
+ for (const s of found) {
241
+ const before = truncationCounter(this);
242
+ const projected = await this.projectSpan(buf, trace.trace_id, s, cap);
243
+ truncated += before.delta();
244
+ const bytes = Buffer.byteLength(JSON.stringify(projected), "utf8");
245
+ runningBytes += bytes;
246
+ if (runningBytes > this.perCallByteCeiling) {
247
+ break;
248
+ }
249
+ spans.push(projected);
250
+ }
251
+ return {
252
+ trace_id: trace.trace_id,
253
+ spans,
254
+ missing_span_ids: missing,
255
+ truncated_attribute_count: truncated
256
+ };
257
+ }
258
+ async searchTrace(opts) {
259
+ const max_matches = opts.max_matches ?? 50;
260
+ if (!Number.isInteger(max_matches) || max_matches < 1 || max_matches > 500) {
261
+ throw new RangeError(`searchTrace.max_matches must be 1..500, got ${max_matches}`);
262
+ }
263
+ const idx = await this.index();
264
+ const trace = idx.byTrace.get(opts.trace_id);
265
+ if (!trace) throw new TraceNotFoundError(opts.trace_id);
266
+ const re = compileSearchRegex(opts.regex_pattern);
267
+ const buf = await this.buffer();
268
+ const hits = [];
269
+ let total = 0;
270
+ let capped = false;
271
+ for (const s of trace.spans) {
272
+ const remaining = max_matches - hits.length;
273
+ const localHits = await this.scanSpanForMatches(
274
+ buf,
275
+ trace.trace_id,
276
+ s,
277
+ re,
278
+ this.perMatchTextBudget,
279
+ remaining
280
+ );
281
+ total += localHits.total;
282
+ for (const h of localHits.records) {
283
+ if (hits.length >= max_matches) break;
284
+ hits.push(h);
285
+ }
286
+ if (hits.length >= max_matches) {
287
+ capped = true;
288
+ total = Math.max(total, hits.length + 1);
289
+ break;
290
+ }
291
+ }
292
+ return {
293
+ trace_id: trace.trace_id,
294
+ hits,
295
+ total_matches: total,
296
+ has_more: capped || total > hits.length
297
+ };
298
+ }
299
+ async searchSpan(opts) {
300
+ const max_matches = opts.max_matches ?? 50;
301
+ if (!Number.isInteger(max_matches) || max_matches < 1 || max_matches > 500) {
302
+ throw new RangeError(`searchSpan.max_matches must be 1..500, got ${max_matches}`);
303
+ }
304
+ const idx = await this.index();
305
+ const trace = idx.byTrace.get(opts.trace_id);
306
+ if (!trace) throw new TraceNotFoundError(opts.trace_id);
307
+ const span = trace.spans.find((s) => s.span_id === opts.span_id);
308
+ if (!span) {
309
+ throw new SpanNotFoundError(opts.trace_id, opts.span_id);
310
+ }
311
+ const re = compileSearchRegex(opts.regex_pattern);
312
+ const buf = await this.buffer();
313
+ const localHits = await this.scanSpanForMatches(
314
+ buf,
315
+ trace.trace_id,
316
+ span,
317
+ re,
318
+ this.perMatchTextBudget,
319
+ max_matches
320
+ );
321
+ return {
322
+ trace_id: trace.trace_id,
323
+ span_id: span.span_id,
324
+ hits: localHits.records,
325
+ total_matches: localHits.total,
326
+ has_more: localHits.total > localHits.records.length
327
+ };
328
+ }
329
+ // ─── Index building ────────────────────────────────────────────────
330
+ /** Force the index to materialise. Useful to amortise startup cost
331
+ * before the first agent call. */
332
+ async ensureIndexed() {
333
+ await this.index();
334
+ }
335
+ async buffer() {
336
+ if (!this.bufferPromise) {
337
+ this.bufferPromise = readFile(this.path);
338
+ }
339
+ return this.bufferPromise;
340
+ }
341
+ async index() {
342
+ if (!this.indexPromise) {
343
+ this.indexPromise = this.buildIndex();
344
+ }
345
+ return this.indexPromise;
346
+ }
347
+ async buildIndex() {
348
+ let buf;
349
+ try {
350
+ buf = await this.buffer();
351
+ } catch (err) {
352
+ const stats = await stat(this.path).catch(() => null);
353
+ if (!stats) {
354
+ throw new TraceFileMissingError(this.path);
355
+ }
356
+ throw err;
357
+ }
358
+ const byTrace = /* @__PURE__ */ new Map();
359
+ let cursor = 0;
360
+ while (cursor < buf.length) {
361
+ const newlineIndex = buf.indexOf(10, cursor);
362
+ const lineEnd = newlineIndex === -1 ? buf.length : newlineIndex;
363
+ const lineLength = lineEnd - cursor;
364
+ if (lineLength === 0) {
365
+ cursor = lineEnd + 1;
366
+ continue;
367
+ }
368
+ const lineSlice = buf.subarray(cursor, lineEnd).toString("utf8");
369
+ const lineOffset = cursor;
370
+ cursor = lineEnd + 1;
371
+ let parsed;
372
+ try {
373
+ parsed = JSON.parse(lineSlice);
374
+ } catch {
375
+ continue;
376
+ }
377
+ if (!parsed || typeof parsed !== "object") continue;
378
+ const span = readOtlpSpan(parsed);
379
+ if (!span) continue;
380
+ let entry = byTrace.get(span.trace_id);
381
+ if (!entry) {
382
+ entry = {
383
+ trace_id: span.trace_id,
384
+ service_name: span.service_name,
385
+ agent_name: span.agent_name,
386
+ span_count: 0,
387
+ has_errors: false,
388
+ start_time: span.start_time,
389
+ end_time: span.end_time,
390
+ duration_ms: 0,
391
+ raw_jsonl_bytes: 0,
392
+ models: /* @__PURE__ */ new Set(),
393
+ tools: /* @__PURE__ */ new Set(),
394
+ spans: []
395
+ };
396
+ byTrace.set(span.trace_id, entry);
397
+ } else {
398
+ if (!entry.service_name && span.service_name) entry.service_name = span.service_name;
399
+ if (!entry.agent_name && span.agent_name) entry.agent_name = span.agent_name;
400
+ }
401
+ const indexEntry = {
402
+ span_id: span.span_id,
403
+ parent_span_id: span.parent_span_id,
404
+ name: span.name,
405
+ kind: span.kind,
406
+ start_time: span.start_time,
407
+ end_time: span.end_time,
408
+ duration_ms: span.duration_ms,
409
+ status: span.status,
410
+ status_message: span.status_message,
411
+ service_name: span.service_name,
412
+ agent_name: span.agent_name,
413
+ model_name: span.model_name,
414
+ tool_name: span.tool_name,
415
+ line_byte_offset: lineOffset,
416
+ line_byte_length: lineLength
417
+ };
418
+ entry.spans.push(indexEntry);
419
+ entry.span_count += 1;
420
+ entry.raw_jsonl_bytes += lineLength + 1;
421
+ if (span.status === "ERROR") entry.has_errors = true;
422
+ if (span.start_time < entry.start_time) entry.start_time = span.start_time;
423
+ if (span.end_time > entry.end_time) entry.end_time = span.end_time;
424
+ if (span.model_name) entry.models.add(span.model_name);
425
+ if (span.tool_name) entry.tools.add(span.tool_name);
426
+ }
427
+ let totalRawBytes = 0;
428
+ for (const t of byTrace.values()) {
429
+ totalRawBytes += t.raw_jsonl_bytes;
430
+ t.spans.sort(
431
+ (a, b) => a.start_time.localeCompare(b.start_time) || a.line_byte_offset - b.line_byte_offset
432
+ );
433
+ t.duration_ms = Math.max(0, new Date(t.end_time).getTime() - new Date(t.start_time).getTime());
434
+ }
435
+ const sortedTraceIds = [...byTrace.keys()].sort();
436
+ return { byTrace, totalRawBytes, sortedTraceIds };
437
+ }
438
+ // ─── Filter pipeline ───────────────────────────────────────────────
439
+ async matchedTraces(idx, filters) {
440
+ const traces = idx.sortedTraceIds.map((id) => idx.byTrace.get(id)).filter(isPresent);
441
+ if (!filters) return traces;
442
+ const indexedFiltered = traces.filter((t) => {
443
+ if (filters.has_errors !== void 0 && t.has_errors !== filters.has_errors) return false;
444
+ if (filters.service_names && filters.service_names.length > 0) {
445
+ if (!t.service_name || !filters.service_names.includes(t.service_name)) return false;
446
+ }
447
+ if (filters.agent_names && filters.agent_names.length > 0) {
448
+ if (!t.agent_name || !filters.agent_names.includes(t.agent_name)) return false;
449
+ }
450
+ if (filters.model_names && filters.model_names.length > 0) {
451
+ if (![...t.models].some((m) => filters.model_names.includes(m))) return false;
452
+ }
453
+ if (filters.tool_names && filters.tool_names.length > 0) {
454
+ if (![...t.tools].some((tn) => filters.tool_names.includes(tn))) return false;
455
+ }
456
+ if (filters.start_time_after && t.start_time < filters.start_time_after) return false;
457
+ if (filters.start_time_before && t.start_time > filters.start_time_before) return false;
458
+ return true;
459
+ });
460
+ if (!filters.regex_pattern) return indexedFiltered;
461
+ const re = compileSearchRegex(filters.regex_pattern);
462
+ const buf = await this.buffer();
463
+ const out = [];
464
+ for (const t of indexedFiltered) {
465
+ let matched = false;
466
+ for (const s of t.spans) {
467
+ const slice = buf.subarray(s.line_byte_offset, s.line_byte_offset + s.line_byte_length);
468
+ if (re.test(slice.toString("utf8"))) {
469
+ matched = true;
470
+ break;
471
+ }
472
+ }
473
+ if (matched) out.push(t);
474
+ }
475
+ return out;
476
+ }
477
+ toSummary(t) {
478
+ return {
479
+ trace_id: t.trace_id,
480
+ service_name: t.service_name,
481
+ agent_name: t.agent_name,
482
+ span_count: t.span_count,
483
+ has_errors: t.has_errors,
484
+ start_time: t.start_time,
485
+ end_time: t.end_time,
486
+ duration_ms: t.duration_ms,
487
+ raw_jsonl_bytes: t.raw_jsonl_bytes,
488
+ models: [...t.models].sort(),
489
+ tools: [...t.tools].sort()
490
+ };
491
+ }
492
+ // ─── Span projection (lazy attribute reads) ────────────────────────
493
+ async projectSpan(buf, trace_id, s, perAttrCap) {
494
+ const slice = buf.subarray(s.line_byte_offset, s.line_byte_offset + s.line_byte_length).toString("utf8");
495
+ let raw = {};
496
+ try {
497
+ const parsed = JSON.parse(slice);
498
+ if (parsed && typeof parsed === "object") raw = parsed;
499
+ } catch {
500
+ }
501
+ const attrs = extractAttributes(raw);
502
+ const projected = {};
503
+ for (const [k, v] of Object.entries(attrs)) {
504
+ if (typeof v === "string") {
505
+ const trunc = truncateForBudget(v, perAttrCap);
506
+ if (trunc !== v) trackTruncation(this);
507
+ projected[k] = trunc;
508
+ } else if (Array.isArray(v) || v && typeof v === "object") {
509
+ const json = JSON.stringify(v);
510
+ const trunc = truncateForBudget(json, perAttrCap);
511
+ if (trunc !== json) {
512
+ trackTruncation(this);
513
+ projected[k] = trunc;
514
+ } else {
515
+ projected[k] = v;
516
+ }
517
+ } else {
518
+ projected[k] = v;
519
+ }
520
+ }
521
+ return {
522
+ trace_id,
523
+ span_id: s.span_id,
524
+ parent_span_id: s.parent_span_id,
525
+ name: s.name,
526
+ kind: s.kind,
527
+ start_time: s.start_time,
528
+ end_time: s.end_time,
529
+ duration_ms: s.duration_ms,
530
+ status: s.status,
531
+ status_message: s.status_message,
532
+ service_name: s.service_name,
533
+ agent_name: s.agent_name,
534
+ model_name: s.model_name,
535
+ tool_name: s.tool_name,
536
+ attributes: projected
537
+ };
538
+ }
539
+ buildOversizedSummary(t, span_response_bytes_max) {
540
+ const counts = /* @__PURE__ */ new Map();
541
+ let errorCount = 0;
542
+ for (const s of t.spans) {
543
+ counts.set(s.name, (counts.get(s.name) ?? 0) + 1);
544
+ if (s.status === "ERROR") errorCount += 1;
545
+ }
546
+ const top = [...counts.entries()].sort((a, b) => b[1] - a[1]).slice(0, 20);
547
+ return {
548
+ span_count: t.span_count,
549
+ top_span_names: top,
550
+ span_response_bytes_max,
551
+ error_span_count: errorCount
552
+ };
553
+ }
554
+ async scanSpanForMatches(buf, trace_id, s, re, textBudget, recordCap) {
555
+ const slice = buf.subarray(s.line_byte_offset, s.line_byte_offset + s.line_byte_length).toString("utf8");
556
+ const records = [];
557
+ const globalRe = new RegExp(re.source, re.flags.includes("g") ? re.flags : `${re.flags}g`);
558
+ let total = 0;
559
+ let hasMore = false;
560
+ let m;
561
+ while ((m = globalRe.exec(slice)) !== null) {
562
+ total += 1;
563
+ if (m.index === globalRe.lastIndex) globalRe.lastIndex += 1;
564
+ if (records.length >= recordCap) {
565
+ hasMore = true;
566
+ break;
567
+ }
568
+ const before = slice.slice(Math.max(0, m.index - textBudget / 2), m.index);
569
+ const after = slice.slice(
570
+ m.index + m[0].length,
571
+ m.index + m[0].length + Math.floor(textBudget / 2)
572
+ );
573
+ records.push({
574
+ trace_id,
575
+ span_id: s.span_id,
576
+ span_name: s.name,
577
+ span_kind: s.kind,
578
+ attribute_path: bestAttributePathForOffset(slice, m.index) ?? "span.raw",
579
+ matched_text: truncateForBudget(m[0], textBudget),
580
+ context_before: truncateForBudget(before, textBudget),
581
+ context_after: truncateForBudget(after, textBudget),
582
+ match_offset: m.index
583
+ });
584
+ }
585
+ return { records, total, hasMore };
586
+ }
587
+ };
588
+ var TraceFileMissingError = class extends NotFoundError {
589
+ constructor(path) {
590
+ super(`trace file not found: ${path}`);
591
+ }
592
+ };
593
+ var TraceNotFoundError = class extends NotFoundError {
594
+ trace_id;
595
+ constructor(trace_id) {
596
+ super(`trace not found: ${trace_id}`);
597
+ this.trace_id = trace_id;
598
+ }
599
+ };
600
+ var SpanNotFoundError = class extends NotFoundError {
601
+ trace_id;
602
+ span_id;
603
+ constructor(trace_id, span_id) {
604
+ super(`span ${span_id} not found in trace ${trace_id}`);
605
+ this.trace_id = trace_id;
606
+ this.span_id = span_id;
607
+ }
608
+ };
609
+ function readOtlpSpan(raw) {
610
+ const trace_id = stringField(raw, "trace_id") ?? stringField(raw, "traceId");
611
+ const span_id = stringField(raw, "span_id") ?? stringField(raw, "spanId");
612
+ if (!trace_id || !span_id) return null;
613
+ const parent_id = stringField(raw, "parent_span_id") ?? stringField(raw, "parentSpanId") ?? null;
614
+ const name = stringField(raw, "name") ?? "unknown";
615
+ const start_time = stringField(raw, "start_time") ?? stringField(raw, "startTime") ?? "";
616
+ const end_time = stringField(raw, "end_time") ?? stringField(raw, "endTime") ?? start_time;
617
+ const status = readStatus(raw);
618
+ const attrs = extractAttributes(raw);
619
+ const service_name = asString(attrs["service.name"]) ?? asString(attrs["resource.attributes.service.name"]) ?? null;
620
+ const agent_name = asString(attrs["agent.name"]) ?? asString(attrs["inference.agent.name"]) ?? null;
621
+ const model_name = asString(attrs["llm.model_name"]) ?? asString(attrs["inference.llm.model_name"]) ?? null;
622
+ const tool_name = asString(attrs["tool.name"]) ?? asString(attrs["inference.tool.name"]) ?? null;
623
+ const kind = inferKind(attrs);
624
+ let duration_ms = 0;
625
+ if (start_time && end_time) {
626
+ const a = Date.parse(start_time);
627
+ const b = Date.parse(end_time);
628
+ if (!Number.isNaN(a) && !Number.isNaN(b)) duration_ms = Math.max(0, b - a);
629
+ }
630
+ return {
631
+ trace_id,
632
+ span_id,
633
+ parent_span_id: parent_id && parent_id.length > 0 ? parent_id : null,
634
+ name,
635
+ kind,
636
+ start_time,
637
+ end_time,
638
+ duration_ms,
639
+ status: status.code,
640
+ status_message: status.message,
641
+ service_name,
642
+ agent_name,
643
+ model_name,
644
+ tool_name
645
+ };
646
+ }
647
+ function readStatus(raw) {
648
+ const status = raw.status;
649
+ if (status && typeof status === "object" && !Array.isArray(status)) {
650
+ const codeRaw = status.code;
651
+ const code = codeRaw === "STATUS_CODE_OK" || codeRaw === "OK" ? "OK" : codeRaw === "STATUS_CODE_ERROR" || codeRaw === "ERROR" ? "ERROR" : "UNSET";
652
+ const messageRaw = status.message;
653
+ const message = typeof messageRaw === "string" && messageRaw.length > 0 ? messageRaw : void 0;
654
+ return { code, message };
655
+ }
656
+ return { code: "UNSET", message: void 0 };
657
+ }
658
+ function inferKind(attrs) {
659
+ const opik = asString(attrs["openinference.span.kind"]) ?? asString(attrs["inference.observation_kind"]);
660
+ if (opik) {
661
+ const upper = opik.toUpperCase();
662
+ if (upper === "AGENT" || upper === "LLM" || upper === "TOOL" || upper === "CHAIN" || upper === "GUARDRAIL" || upper === "SPAN") {
663
+ return upper;
664
+ }
665
+ }
666
+ return "UNKNOWN";
667
+ }
668
+ function extractAttributes(raw) {
669
+ const out = {};
670
+ const resource = raw.resource;
671
+ if (resource && typeof resource === "object" && !Array.isArray(resource)) {
672
+ const ra = resource.attributes;
673
+ if (ra && typeof ra === "object" && !Array.isArray(ra)) {
674
+ for (const [k, v] of Object.entries(ra)) {
675
+ out[k] = v;
676
+ }
677
+ }
678
+ }
679
+ const spanAttrs = raw.attributes;
680
+ if (spanAttrs && typeof spanAttrs === "object" && !Array.isArray(spanAttrs)) {
681
+ for (const [k, v] of Object.entries(spanAttrs)) {
682
+ out[k] = v;
683
+ }
684
+ }
685
+ return out;
686
+ }
687
+ function stringField(raw, key) {
688
+ const v = raw[key];
689
+ return typeof v === "string" ? v : void 0;
690
+ }
691
+ function asString(v) {
692
+ return typeof v === "string" && v.length > 0 ? v : null;
693
+ }
694
+ function isPresent(v) {
695
+ return v !== void 0;
696
+ }
697
+ var truncationCounters = /* @__PURE__ */ new WeakMap();
698
+ function trackTruncation(store) {
699
+ let c = truncationCounters.get(store);
700
+ if (!c) {
701
+ c = { value: 0 };
702
+ truncationCounters.set(store, c);
703
+ }
704
+ c.value += 1;
705
+ }
706
+ function truncationCounter(store) {
707
+ const before = truncationCounters.get(store)?.value ?? 0;
708
+ return {
709
+ delta() {
710
+ const after = truncationCounters.get(store)?.value ?? 0;
711
+ return after - before;
712
+ }
713
+ };
714
+ }
715
+ function bestAttributePathForOffset(slice, offset) {
716
+ let i = offset;
717
+ while (i > 0 && slice[i] !== '"') i -= 1;
718
+ if (i <= 0) return null;
719
+ let j = i - 1;
720
+ while (j > 0 && slice[j] !== ":") j -= 1;
721
+ if (j <= 0) return null;
722
+ let k = j - 1;
723
+ while (k > 0 && slice[k] !== '"') k -= 1;
724
+ let l = k - 1;
725
+ while (l > 0 && slice[l] !== '"') l -= 1;
726
+ if (l <= 0) return null;
727
+ return slice.slice(l + 1, k);
728
+ }
729
+
730
+ // src/trace-analyst/tools.ts
731
+ import { f, fn } from "@ax-llm/ax";
732
+ var NAMESPACE = "traces";
733
+ var filtersField = f.json("Filter set. ALL fields are AND-composed. Leave empty to scan everything.").optional();
734
+ function buildTraceAnalystTools(opts) {
735
+ const { store } = opts;
736
+ const getDatasetOverview = fn("getDatasetOverview").description(
737
+ "Dataset rollup: total traces, raw_jsonl_bytes, services, agents, models, tools, and sample_trace_ids (real ids passable to view/search). Always call this FIRST without a regex_pattern."
738
+ ).namespace(NAMESPACE).arg("filters", filtersField).returns(f.json("DatasetOverview")).handler(async ({ filters }) => store.getOverview(parseFilters(filters))).build();
739
+ const queryTraces = fn("queryTraces").description(
740
+ "Paginated trace summaries. Each summary carries raw_jsonl_bytes \u2014 use it to size traces BEFORE calling viewTrace. Narrow with indexed filters before adding regex_pattern."
741
+ ).namespace(NAMESPACE).arg("filters", filtersField).arg("limit", f.number("Page size, 1..200")).arg("offset", f.number("Page offset; default 0").optional()).returns(f.json("QueryTracesPage")).handler(
742
+ async ({ filters, limit, offset }) => store.queryTraces({
743
+ filters: parseFilters(filters),
744
+ limit: assertPageLimit(limit),
745
+ offset: assertOffset(offset)
746
+ })
747
+ ).build();
748
+ const countTraces = fn("countTraces").description(
749
+ "Count traces matching `filters`. Use as a cheap pre-flight before opting into a regex_pattern scan."
750
+ ).namespace(NAMESPACE).arg("filters", filtersField).returns(f.number("count")).handler(async ({ filters }) => store.countTraces(parseFilters(filters))).build();
751
+ const viewTrace = fn("viewTrace").description(
752
+ "Return ALL spans for a single trace, with each attribute capped at ~4KB. If the response would exceed the per-call ceiling the result carries `oversized` instead of `spans` \u2014 DO NOT retry with the same trace_id; switch to searchTrace / viewSpans."
753
+ ).namespace(NAMESPACE).arg("trace_id", f.string("Real trace id from a prior overview/query")).returns(f.json("ViewTraceResult")).handler(
754
+ async ({ trace_id }) => store.viewTrace({ trace_id: assertString(trace_id, "trace_id") })
755
+ ).build();
756
+ const viewSpans = fn("viewSpans").description(
757
+ "Surgical read of specific spans within a trace, with each attribute capped at ~16KB (4\xD7 the discovery cap). Use after searchTrace narrows to specific span_ids."
758
+ ).namespace(NAMESPACE).arg("trace_id", f.string("Real trace id")).arg("span_ids", f.string("Span ids to fetch").array()).returns(f.json("ViewSpansResult")).handler(
759
+ async ({ trace_id, span_ids }) => store.viewSpans({
760
+ trace_id: assertString(trace_id, "trace_id"),
761
+ span_ids: assertStringArray(span_ids, "span_ids")
762
+ })
763
+ ).build();
764
+ const searchTrace = fn("searchTrace").description(
765
+ "Regex search across all spans of one trace. Returns `{trace_id, hits: SpanMatchRecord[], total_matches, has_more}`. **Iterate `result.hits`, NOT `result.matches`** \u2014 the field is `hits`. Each hit has `{span_id, span_name, span_kind, attribute_path, matched_text, context_before, context_after, match_offset}`. Bounded regardless of trace size by max_matches (1..500, default 50). If has_more=true, REFINE the regex rather than blindly raising max_matches."
766
+ ).namespace(NAMESPACE).arg("trace_id", f.string("Real trace id")).arg("regex_pattern", f.string("JS-compatible regex, multiline")).arg("max_matches", f.number("Max records returned, 1..500; default 50").optional()).returns(f.json("SearchTraceResult")).handler(
767
+ async ({ trace_id, regex_pattern, max_matches }) => store.searchTrace({
768
+ trace_id: assertString(trace_id, "trace_id"),
769
+ regex_pattern: assertRegex(regex_pattern),
770
+ max_matches: assertMaxMatches(max_matches)
771
+ })
772
+ ).build();
773
+ const searchSpan = fn("searchSpan").description(
774
+ "Regex search inside a single span. Use when viewSpans returned a 16KB-truncated payload and you need to narrow further. Returns `{trace_id, span_id, hits: SpanMatchRecord[], total_matches, has_more}` \u2014 iterate `result.hits`, NOT `result.matches`."
775
+ ).namespace(NAMESPACE).arg("trace_id", f.string("Real trace id")).arg("span_id", f.string("Real span id within trace")).arg("regex_pattern", f.string("JS-compatible regex, multiline")).arg("max_matches", f.number("Max records, 1..500; default 50").optional()).returns(f.json("SearchSpanResult")).handler(
776
+ async ({ trace_id, span_id, regex_pattern, max_matches }) => store.searchSpan({
777
+ trace_id: assertString(trace_id, "trace_id"),
778
+ span_id: assertString(span_id, "span_id"),
779
+ regex_pattern: assertRegex(regex_pattern),
780
+ max_matches: assertMaxMatches(max_matches)
781
+ })
782
+ ).build();
783
+ return [
784
+ getDatasetOverview,
785
+ queryTraces,
786
+ countTraces,
787
+ viewTrace,
788
+ viewSpans,
789
+ searchTrace,
790
+ searchSpan
791
+ ];
792
+ }
793
+ function traceAnalystFunctionGroup(opts) {
794
+ return {
795
+ namespace: NAMESPACE,
796
+ title: "Trace Analysis",
797
+ selectionCriteria: "Use for any inspection of OTLP-shaped trace data.",
798
+ description: "Discovery \u2192 narrow \u2192 deep-read tools over a JSONL trace dataset. Always call getDatasetOverview first.",
799
+ functions: buildTraceAnalystTools(opts)
800
+ };
801
+ }
802
+ function parseFilters(input) {
803
+ if (input == null) return void 0;
804
+ if (typeof input !== "object" || Array.isArray(input)) {
805
+ throw new TypeError(`filters must be an object, got ${typeof input}`);
806
+ }
807
+ const f2 = input;
808
+ const out = {};
809
+ if (typeof f2.has_errors === "boolean") out.has_errors = f2.has_errors;
810
+ out.service_names = stringArrayOrUndefined(f2.service_names, "service_names");
811
+ out.agent_names = stringArrayOrUndefined(f2.agent_names, "agent_names");
812
+ out.model_names = stringArrayOrUndefined(f2.model_names, "model_names");
813
+ out.tool_names = stringArrayOrUndefined(f2.tool_names, "tool_names");
814
+ if (typeof f2.start_time_after === "string") out.start_time_after = f2.start_time_after;
815
+ if (typeof f2.start_time_before === "string") out.start_time_before = f2.start_time_before;
816
+ if (typeof f2.regex_pattern === "string") {
817
+ if (f2.regex_pattern.length === 0) {
818
+ throw new TypeError("filters.regex_pattern cannot be empty");
819
+ }
820
+ out.regex_pattern = f2.regex_pattern;
821
+ }
822
+ return out;
823
+ }
824
+ function stringArrayOrUndefined(v, label) {
825
+ if (v === void 0 || v === null) return void 0;
826
+ if (!Array.isArray(v)) throw new TypeError(`${label} must be an array of strings`);
827
+ if (v.some((x) => typeof x !== "string")) {
828
+ throw new TypeError(`${label} entries must be strings`);
829
+ }
830
+ return v;
831
+ }
832
+ function assertPageLimit(limit) {
833
+ if (typeof limit !== "number" || !Number.isInteger(limit) || limit < 1 || limit > 200) {
834
+ throw new RangeError(`limit must be an integer 1..200`);
835
+ }
836
+ return limit;
837
+ }
838
+ function assertOffset(offset) {
839
+ if (offset === void 0) return void 0;
840
+ if (typeof offset !== "number" || !Number.isInteger(offset) || offset < 0) {
841
+ throw new RangeError(`offset must be a non-negative integer`);
842
+ }
843
+ return offset;
844
+ }
845
+ function assertRegex(pattern) {
846
+ if (typeof pattern !== "string" || pattern.length === 0) {
847
+ throw new TypeError(`regex_pattern must be a non-empty string`);
848
+ }
849
+ new RegExp(pattern, "m");
850
+ return pattern;
851
+ }
852
+ function assertMaxMatches(n) {
853
+ if (n === void 0) return void 0;
854
+ if (typeof n !== "number" || !Number.isInteger(n) || n < 1 || n > 500) {
855
+ throw new RangeError(`max_matches must be an integer 1..500`);
856
+ }
857
+ return n;
858
+ }
859
+ function assertString(v, label) {
860
+ if (typeof v !== "string" || v.length === 0) {
861
+ throw new TypeError(`${label} must be a non-empty string`);
862
+ }
863
+ return v;
864
+ }
865
+ function assertStringArray(v, label) {
866
+ if (!Array.isArray(v)) throw new TypeError(`${label} must be an array of strings`);
867
+ if (v.some((x) => typeof x !== "string")) {
868
+ throw new TypeError(`${label} entries must be strings`);
869
+ }
870
+ return v;
871
+ }
872
+
873
+ // src/trace-analyst/analyst.ts
874
+ import { AxJSRuntime, agent } from "@ax-llm/ax";
875
+ async function analyzeTraces(input, options) {
876
+ if (!input.question || typeof input.question !== "string") {
877
+ throw new TypeError("analyzeTraces: input.question must be a non-empty string");
878
+ }
879
+ const store = typeof options.source === "string" ? new OtlpFileTraceStore({ path: options.source }) : options.source;
880
+ if (store instanceof OtlpFileTraceStore) {
881
+ await store.ensureIndexed();
882
+ }
883
+ const tools = buildTraceAnalystTools({ store });
884
+ const turns = [];
885
+ let progressFs;
886
+ if (options.progressLogPath) {
887
+ const { createWriteStream } = await import("fs");
888
+ const { mkdir } = await import("fs/promises");
889
+ const { dirname } = await import("path");
890
+ await mkdir(dirname(options.progressLogPath), { recursive: true });
891
+ progressFs = createWriteStream(options.progressLogPath, { flags: "a" });
892
+ }
893
+ const actorTurnCallback = async (turn) => {
894
+ const snap = {
895
+ turn: turn.turn,
896
+ isError: turn.isError,
897
+ code: turn.code,
898
+ output: turn.output,
899
+ thought: turn.thought
900
+ };
901
+ turns.push(snap);
902
+ if (progressFs) {
903
+ try {
904
+ progressFs.write(`${JSON.stringify({ ...snap, ts: Date.now() })}
905
+ `);
906
+ } catch {
907
+ }
908
+ }
909
+ if (options.onTurn) await options.onTurn(snap);
910
+ };
911
+ const maxDepth = options.maxDepth ?? 1;
912
+ const maxTurns = options.maxTurns ?? 12;
913
+ const maxParallelSubagents = options.maxParallelSubagents ?? 2;
914
+ const maxRuntimeChars = options.maxRuntimeChars ?? 6e3;
915
+ const analyst = agent(
916
+ "question:string -> answer:string, findings:string[]",
917
+ {
918
+ agentIdentity: {
919
+ name: "TraceAnalyst",
920
+ description: "Analyzes OTLP-shaped JSONL traces using bounded discovery tools to identify systemic failure modes."
921
+ },
922
+ contextFields: ["question"],
923
+ runtime: new AxJSRuntime({
924
+ permissions: [],
925
+ blockDynamicImport: true,
926
+ allowedModules: [],
927
+ freezeIntrinsics: true,
928
+ blockShadowRealm: true,
929
+ // RLM stdout mode relies on runtime bindings persisting across turns.
930
+ preventGlobalThisExtensions: false
931
+ }),
932
+ mode: maxDepth > 0 ? "advanced" : "simple",
933
+ recursionOptions: maxDepth > 0 ? { maxDepth } : void 0,
934
+ maxTurns,
935
+ maxRuntimeChars,
936
+ maxBatchedLlmQueryConcurrency: maxParallelSubagents,
937
+ promptLevel: "detailed",
938
+ // Trace analysis depends on exact prior tool results and runtime variables.
939
+ contextPolicy: { preset: "full", budget: "balanced" },
940
+ functions: { local: tools },
941
+ actorOptions: {
942
+ description: options.actorDescription ?? TRACE_ANALYST_ACTOR_DESCRIPTION,
943
+ ...options.model ? { model: options.model } : {},
944
+ // Keep actor messages tool-call/content shaped across reasoning models.
945
+ showThoughts: false,
946
+ thinkingTokenBudget: "none"
947
+ },
948
+ responderOptions: {
949
+ ...options.model ? { model: options.model } : {},
950
+ description: options.subagentDescription ?? TRACE_ANALYST_SUBAGENT_DESCRIPTION,
951
+ showThoughts: false
952
+ },
953
+ actorTurnCallback,
954
+ bubbleErrors: [TraceFileMissingError]
955
+ }
956
+ );
957
+ let result;
958
+ try {
959
+ result = await analyst.forward(options.ai, { question: input.question });
960
+ } finally {
961
+ if (progressFs) {
962
+ await new Promise((resolve) => progressFs.end(() => resolve()));
963
+ }
964
+ }
965
+ return {
966
+ answer: typeof result.answer === "string" ? result.answer : String(result.answer ?? ""),
967
+ findings: Array.isArray(result.findings) ? result.findings.filter((s) => typeof s === "string") : [],
968
+ turns,
969
+ turnCount: turns.length,
970
+ usage: normalizeRoleArrays(analyst.getUsage()),
971
+ chatLog: normalizeRoleArrays(analyst.getChatLog()),
972
+ actorPromptVersion: TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION
973
+ };
974
+ }
975
+ function normalizeRoleArrays(value) {
976
+ const record = value && typeof value === "object" ? value : {};
977
+ return {
978
+ actor: normalizeRecordArray(record.actor),
979
+ responder: normalizeRecordArray(record.responder)
980
+ };
981
+ }
982
+ function normalizeRecordArray(value) {
983
+ if (!Array.isArray(value)) return [];
984
+ return value.map(
985
+ (item) => item && typeof item === "object" ? { ...item } : { value: item }
986
+ );
987
+ }
988
+
989
+ // src/trace/store.ts
990
+ var InMemoryTraceStore = class {
991
+ runs = /* @__PURE__ */ new Map();
992
+ allSpans = [];
993
+ allEvents = [];
994
+ allArtifacts = [];
995
+ allBudget = [];
996
+ async appendRun(run) {
997
+ if (this.runs.has(run.runId)) throw new Error(`run ${run.runId} already exists`);
998
+ this.runs.set(run.runId, { ...run });
999
+ }
1000
+ async updateRun(runId, patch) {
1001
+ const existing = this.runs.get(runId);
1002
+ if (!existing) throw new Error(`run ${runId} not found`);
1003
+ this.runs.set(runId, { ...existing, ...patch });
1004
+ }
1005
+ async appendSpan(span) {
1006
+ this.allSpans.push({ ...span });
1007
+ }
1008
+ async updateSpan(spanId, patch) {
1009
+ const idx = this.allSpans.findIndex((s) => s.spanId === spanId);
1010
+ if (idx < 0) throw new Error(`span ${spanId} not found`);
1011
+ this.allSpans[idx] = { ...this.allSpans[idx], ...patch };
1012
+ }
1013
+ async appendEvent(event) {
1014
+ this.allEvents.push({ ...event });
1015
+ }
1016
+ async appendArtifact(artifact) {
1017
+ this.allArtifacts.push({ ...artifact });
1018
+ }
1019
+ async appendBudgetEntry(entry) {
1020
+ this.allBudget.push({ ...entry });
1021
+ }
1022
+ async getRun(runId) {
1023
+ const r = this.runs.get(runId);
1024
+ return r ? { ...r } : void 0;
1025
+ }
1026
+ async listRuns(filter = {}) {
1027
+ return [...this.runs.values()].filter((r) => matchesRun(r, filter));
1028
+ }
1029
+ async spans(filter = {}) {
1030
+ return this.allSpans.filter((s) => matchesSpan(s, filter)).map((s) => ({ ...s }));
1031
+ }
1032
+ async events(filter = {}) {
1033
+ return this.allEvents.filter((e) => matchesEvent(e, filter)).map((e) => ({ ...e }));
1034
+ }
1035
+ async budget(runId) {
1036
+ return this.allBudget.filter((b) => b.runId === runId).map((b) => ({ ...b }));
1037
+ }
1038
+ async artifacts(runId) {
1039
+ return this.allArtifacts.filter((a) => a.runId === runId).map((a) => ({ ...a }));
1040
+ }
1041
+ };
1042
+ function matchesRun(r, f2) {
1043
+ if (f2.scenarioId && r.scenarioId !== f2.scenarioId) return false;
1044
+ if (f2.variantId && r.variantId !== f2.variantId) return false;
1045
+ if (f2.status && r.status !== f2.status) return false;
1046
+ if (f2.since !== void 0 && r.startedAt < f2.since) return false;
1047
+ if (f2.until !== void 0 && r.startedAt > f2.until) return false;
1048
+ if (f2.tag && r.tags?.[f2.tag.key] !== f2.tag.value) return false;
1049
+ if (f2.parentRunId && r.parentRunId !== f2.parentRunId) return false;
1050
+ if (f2.projectId && r.projectId !== f2.projectId) return false;
1051
+ if (f2.chatId && r.chatId !== f2.chatId) return false;
1052
+ if (f2.layer && r.layer !== f2.layer) return false;
1053
+ return true;
1054
+ }
1055
+ function matchesSpan(s, f2) {
1056
+ if (f2.runId && s.runId !== f2.runId) return false;
1057
+ if (f2.parentSpanId && s.parentSpanId !== f2.parentSpanId) return false;
1058
+ if (f2.kind && s.kind !== f2.kind) return false;
1059
+ if (f2.name && s.name !== f2.name) return false;
1060
+ if (f2.toolName && (s.kind !== "tool" || s.toolName !== f2.toolName)) return false;
1061
+ if (f2.judgeId && (s.kind !== "judge" || s.judgeId !== f2.judgeId)) return false;
1062
+ if (f2.since !== void 0 && s.startedAt < f2.since) return false;
1063
+ if (f2.until !== void 0 && s.startedAt > f2.until) return false;
1064
+ return true;
1065
+ }
1066
+ function matchesEvent(e, f2) {
1067
+ if (f2.runId && e.runId !== f2.runId) return false;
1068
+ if (f2.spanId && e.spanId !== f2.spanId) return false;
1069
+ if (f2.kind && e.kind !== f2.kind) return false;
1070
+ if (f2.since !== void 0 && e.timestamp < f2.since) return false;
1071
+ if (f2.until !== void 0 && e.timestamp > f2.until) return false;
1072
+ return true;
1073
+ }
1074
+ var FileSystemTraceStore = class {
1075
+ dir;
1076
+ maxBytes;
1077
+ /** Lazy in-memory index for queries — populated on first read. */
1078
+ index;
1079
+ loaded = false;
1080
+ constructor(options) {
1081
+ this.dir = options.dir;
1082
+ this.maxBytes = options.maxBytes ?? 32 * 1024 * 1024;
1083
+ }
1084
+ async ensureDir() {
1085
+ const fs = await import("fs/promises");
1086
+ await fs.mkdir(this.dir, { recursive: true });
1087
+ }
1088
+ async append(name, record) {
1089
+ await this.ensureDir();
1090
+ const fs = await import("fs/promises");
1091
+ const path = await import("path");
1092
+ const active = path.join(this.dir, `${name}.ndjson`);
1093
+ try {
1094
+ const stat2 = await fs.stat(active);
1095
+ if (stat2.size >= this.maxBytes) {
1096
+ const rolled = path.join(this.dir, `${name}.${Date.now()}.ndjson`);
1097
+ await fs.rename(active, rolled);
1098
+ }
1099
+ } catch {
1100
+ }
1101
+ await fs.appendFile(active, `${JSON.stringify(record)}
1102
+ `, "utf8");
1103
+ if (this.index && !record?._update) {
1104
+ void this.insertInto(name, record);
1105
+ }
1106
+ }
1107
+ async insertInto(name, record) {
1108
+ if (!this.index) return;
1109
+ switch (name) {
1110
+ case "runs":
1111
+ await this.index.appendRun(record);
1112
+ break;
1113
+ case "spans":
1114
+ await this.index.appendSpan(record);
1115
+ break;
1116
+ case "events":
1117
+ await this.index.appendEvent(record);
1118
+ break;
1119
+ case "artifacts":
1120
+ await this.index.appendArtifact(record);
1121
+ break;
1122
+ case "budget":
1123
+ await this.index.appendBudgetEntry(record);
1124
+ break;
1125
+ }
1126
+ }
1127
+ async load() {
1128
+ if (this.loaded && this.index) return this.index;
1129
+ const fs = await import("fs/promises");
1130
+ const path = await import("path");
1131
+ const store = new InMemoryTraceStore();
1132
+ try {
1133
+ const entries = await fs.readdir(this.dir);
1134
+ for (const file of entries) {
1135
+ if (!file.endsWith(".ndjson")) continue;
1136
+ const full = path.join(this.dir, file);
1137
+ const content = await fs.readFile(full, "utf8");
1138
+ const base = file.split(".")[0];
1139
+ for (const line of content.split("\n")) {
1140
+ if (!line.trim()) continue;
1141
+ const record = JSON.parse(line);
1142
+ if (base === "runs") {
1143
+ try {
1144
+ await store.appendRun(record);
1145
+ } catch {
1146
+ await store.updateRun(record.runId, record);
1147
+ }
1148
+ } else if (base === "spans") {
1149
+ if (record?._update) {
1150
+ try {
1151
+ await store.updateSpan(record.spanId, record);
1152
+ } catch {
1153
+ await store.appendSpan(record);
1154
+ }
1155
+ } else {
1156
+ await store.appendSpan(record);
1157
+ }
1158
+ } else if (base === "events") {
1159
+ await store.appendEvent(record);
1160
+ } else if (base === "artifacts") {
1161
+ await store.appendArtifact(record);
1162
+ } else if (base === "budget") {
1163
+ await store.appendBudgetEntry(record);
1164
+ }
1165
+ }
1166
+ }
1167
+ } catch {
1168
+ }
1169
+ this.index = store;
1170
+ this.loaded = true;
1171
+ return store;
1172
+ }
1173
+ async appendRun(run) {
1174
+ await this.append("runs", run);
1175
+ }
1176
+ async updateRun(runId, patch) {
1177
+ await this.append("runs", { runId, ...patch, _update: true });
1178
+ if (this.index) await this.index.updateRun(runId, patch);
1179
+ }
1180
+ async appendSpan(span) {
1181
+ await this.append("spans", span);
1182
+ }
1183
+ async updateSpan(spanId, patch) {
1184
+ await this.append("spans", { spanId, ...patch, _update: true });
1185
+ if (this.index) await this.index.updateSpan(spanId, patch);
1186
+ }
1187
+ async appendEvent(event) {
1188
+ await this.append("events", event);
1189
+ }
1190
+ async appendArtifact(artifact) {
1191
+ await this.append("artifacts", artifact);
1192
+ }
1193
+ async appendBudgetEntry(entry) {
1194
+ await this.append("budget", entry);
1195
+ }
1196
+ async getRun(runId) {
1197
+ return (await this.load()).getRun(runId);
1198
+ }
1199
+ async listRuns(filter) {
1200
+ return (await this.load()).listRuns(filter);
1201
+ }
1202
+ async spans(filter) {
1203
+ return (await this.load()).spans(filter);
1204
+ }
1205
+ async events(filter) {
1206
+ return (await this.load()).events(filter);
1207
+ }
1208
+ async budget(runId) {
1209
+ return (await this.load()).budget(runId);
1210
+ }
1211
+ async artifacts(runId) {
1212
+ return (await this.load()).artifacts(runId);
1213
+ }
1214
+ };
1215
+
1216
+ // src/trace/otel.ts
1217
+ var OTEL_AGENT_EVAL_SCOPE = { name: "@tangle-network/agent-eval", version: "0.3.0" };
1218
+ async function exportRunAsOtlp(store, runId, resourceAttrs = {}) {
1219
+ const run = await store.getRun(runId);
1220
+ if (!run) throw new Error(`run ${runId} not found`);
1221
+ const spans = await store.spans({ runId });
1222
+ const events = await store.events({ runId });
1223
+ const eventsBySpan = /* @__PURE__ */ new Map();
1224
+ for (const e of events) {
1225
+ if (!e.spanId) continue;
1226
+ const arr = eventsBySpan.get(e.spanId) ?? [];
1227
+ arr.push(e);
1228
+ eventsBySpan.set(e.spanId, arr);
1229
+ }
1230
+ const traceId = runToTraceId(run);
1231
+ const otlpSpans = spans.map(
1232
+ (s) => spanToOtlp(s, traceId, eventsBySpan.get(s.spanId) ?? [])
1233
+ );
1234
+ return {
1235
+ resourceSpans: [
1236
+ {
1237
+ resource: {
1238
+ attributes: toAttributes({
1239
+ "service.name": "agent-eval",
1240
+ "run.id": run.runId,
1241
+ "run.scenario_id": run.scenarioId,
1242
+ "run.variant_id": run.variantId ?? "",
1243
+ "run.dataset_version": run.datasetVersion ?? "",
1244
+ "run.code_sha": run.codeSha ?? "",
1245
+ "run.model_fingerprint": run.modelFingerprint ?? "",
1246
+ ...resourceAttrs
1247
+ })
1248
+ },
1249
+ scopeSpans: [{ scope: OTEL_AGENT_EVAL_SCOPE, spans: otlpSpans }]
1250
+ }
1251
+ ]
1252
+ };
1253
+ }
1254
+ function spanToOtlp(span, traceId, events) {
1255
+ const endedAt = span.endedAt ?? span.startedAt;
1256
+ return {
1257
+ traceId,
1258
+ spanId: padSpanId(span.spanId),
1259
+ parentSpanId: span.parentSpanId ? padSpanId(span.parentSpanId) : void 0,
1260
+ name: span.name,
1261
+ kind: 1,
1262
+ // SPAN_KIND_INTERNAL
1263
+ startTimeUnixNano: msToNs(span.startedAt),
1264
+ endTimeUnixNano: msToNs(endedAt),
1265
+ attributes: toAttributes(flattenSpanAttributes(span)),
1266
+ events: events.map((e) => ({
1267
+ timeUnixNano: msToNs(e.timestamp),
1268
+ name: e.kind,
1269
+ attributes: toAttributes(flattenPayload(e.payload))
1270
+ })),
1271
+ status: span.status === "error" ? { code: 2, message: span.error } : { code: 1 }
1272
+ };
1273
+ }
1274
+ function flattenSpanAttributes(span) {
1275
+ const base = {
1276
+ "span.kind": span.kind
1277
+ };
1278
+ if (span.kind === "llm") {
1279
+ base["llm.model"] = span.model;
1280
+ if (span.inputTokens !== void 0) base["llm.input_tokens"] = span.inputTokens;
1281
+ if (span.outputTokens !== void 0) base["llm.output_tokens"] = span.outputTokens;
1282
+ if (span.costUsd !== void 0) base["llm.cost_usd"] = span.costUsd;
1283
+ if (span.finishReason) base["llm.finish_reason"] = span.finishReason;
1284
+ } else if (span.kind === "tool") {
1285
+ base["tool.name"] = span.toolName;
1286
+ if (span.latencyMs !== void 0) base["tool.latency_ms"] = span.latencyMs;
1287
+ } else if (span.kind === "retrieval") {
1288
+ base["retrieval.query"] = span.query;
1289
+ base["retrieval.hits"] = span.hits.length;
1290
+ } else if (span.kind === "judge") {
1291
+ base["judge.id"] = span.judgeId;
1292
+ base["judge.dimension"] = span.dimension;
1293
+ base["judge.score"] = span.score;
1294
+ base["judge.target_span_id"] = span.targetSpanId;
1295
+ } else if (span.kind === "sandbox") {
1296
+ if (span.image) base["sandbox.image"] = span.image;
1297
+ if (span.exitCode !== void 0) base["sandbox.exit_code"] = span.exitCode;
1298
+ if (span.testsPassed !== void 0) base["sandbox.tests_passed"] = span.testsPassed;
1299
+ if (span.testsTotal !== void 0) base["sandbox.tests_total"] = span.testsTotal;
1300
+ }
1301
+ if (span.attributes) {
1302
+ for (const [k, v] of Object.entries(span.attributes)) {
1303
+ if (typeof v === "string" || typeof v === "number" || typeof v === "boolean") base[k] = v;
1304
+ }
1305
+ }
1306
+ return base;
1307
+ }
1308
+ function flattenPayload(payload) {
1309
+ const out = {};
1310
+ for (const [k, v] of Object.entries(payload)) {
1311
+ if (typeof v === "string" || typeof v === "number" || typeof v === "boolean") out[k] = v;
1312
+ else out[k] = JSON.stringify(v);
1313
+ }
1314
+ return out;
1315
+ }
1316
+ function toAttributes(record) {
1317
+ return Object.entries(record).map(([key, value]) => ({
1318
+ key,
1319
+ value: typeof value === "number" ? Number.isInteger(value) ? { intValue: value.toString() } : { doubleValue: value } : typeof value === "boolean" ? { boolValue: value } : { stringValue: value }
1320
+ }));
1321
+ }
1322
+ function msToNs(ms) {
1323
+ return (BigInt(Math.floor(ms)) * 1000000n).toString();
1324
+ }
1325
+ function padSpanId(id) {
1326
+ const cleaned = id.replace(/-/g, "");
1327
+ return cleaned.slice(0, 16).padEnd(16, "0");
1328
+ }
1329
+ function runToTraceId(run) {
1330
+ const cleaned = run.runId.replace(/-/g, "");
1331
+ return cleaned.slice(0, 32).padEnd(32, "0");
1332
+ }
1333
+
1334
+ // src/trace/redact.ts
1335
+ var DEFAULT_REDACTION_RULES = [
1336
+ { id: "email", pattern: /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/gi },
1337
+ { id: "ssn", pattern: /\b\d{3}-\d{2}-\d{4}\b/g },
1338
+ { id: "credit-card", pattern: /\b(?:\d[ -]*?){13,16}\b/g },
1339
+ { id: "phone-us", pattern: /\b(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g },
1340
+ { id: "ipv4", pattern: /\b(?:\d{1,3}\.){3}\d{1,3}\b/g },
1341
+ { id: "aws-access-key", pattern: /\bAKIA[0-9A-Z]{16}\b/g },
1342
+ { id: "bearer", pattern: /\bBearer\s+[A-Za-z0-9._~+/=-]{10,}/gi },
1343
+ { id: "sk-key", pattern: /\bsk-[A-Za-z0-9_-]{10,}\b/g },
1344
+ {
1345
+ id: "private-key-block",
1346
+ pattern: /-----BEGIN (?:RSA |EC |OPENSSH |DSA )?PRIVATE KEY-----[\s\S]*?-----END[^-]*-----/g
1347
+ }
1348
+ ];
1349
+ var REDACTION_VERSION = "1.0.0";
1350
+ function redactString(input, rules = DEFAULT_REDACTION_RULES) {
1351
+ const byRule = {};
1352
+ let redactionCount = 0;
1353
+ let output = input;
1354
+ for (const rule of rules) {
1355
+ let hits = 0;
1356
+ output = output.replace(rule.pattern, () => {
1357
+ hits++;
1358
+ return rule.replacement ?? `[redacted:${rule.id}]`;
1359
+ });
1360
+ if (hits > 0) {
1361
+ byRule[rule.id] = hits;
1362
+ redactionCount += hits;
1363
+ }
1364
+ }
1365
+ return { output, report: { redactionCount, byRule } };
1366
+ }
1367
+ function redactValue(value, rules = DEFAULT_REDACTION_RULES, report = { redactionCount: 0, byRule: {} }) {
1368
+ if (typeof value === "string") {
1369
+ const { output, report: r } = redactString(value, rules);
1370
+ report.redactionCount += r.redactionCount;
1371
+ for (const [k, v] of Object.entries(r.byRule)) {
1372
+ report.byRule[k] = (report.byRule[k] ?? 0) + v;
1373
+ }
1374
+ return { value: output, report };
1375
+ }
1376
+ if (Array.isArray(value)) {
1377
+ return {
1378
+ value: value.map((v) => redactValue(v, rules, report).value),
1379
+ report
1380
+ };
1381
+ }
1382
+ if (value !== null && typeof value === "object") {
1383
+ const next = {};
1384
+ for (const [k, v] of Object.entries(value)) {
1385
+ next[k] = redactValue(v, rules, report).value;
1386
+ }
1387
+ return { value: next, report };
1388
+ }
1389
+ return { value, report };
1390
+ }
1391
+
1392
+ // src/replay.ts
1393
+ var ReplayCacheMissError = class extends ReplayError {
1394
+ constructor(url, requestKey2, message) {
1395
+ super(message ?? `replay cache miss for ${url} (key=${requestKey2})`);
1396
+ this.url = url;
1397
+ this.requestKey = requestKey2;
1398
+ }
1399
+ url;
1400
+ requestKey;
1401
+ };
1402
+ var ReplayCache = class _ReplayCache {
1403
+ byKey = /* @__PURE__ */ new Map();
1404
+ orphans = 0;
1405
+ byProvider = {};
1406
+ byModel = {};
1407
+ /**
1408
+ * Build a cache from a sink's events. The sink must implement `list()`.
1409
+ * Filter by `runId` / `spanId` to scope to a specific replay.
1410
+ */
1411
+ static async fromSink(sink, filter = {}) {
1412
+ if (!sink.list) {
1413
+ throw new ReplayError("ReplayCache.fromSink: sink must implement list() to be replayable.");
1414
+ }
1415
+ const events = await sink.list(filter);
1416
+ return _ReplayCache.fromEvents(events);
1417
+ }
1418
+ /** Build a cache from an in-memory event list. */
1419
+ static async fromEvents(events) {
1420
+ const cache = new _ReplayCache();
1421
+ const groups = /* @__PURE__ */ new Map();
1422
+ for (const e of events) {
1423
+ const k = `${e.runId ?? ""}::${e.spanId ?? ""}::${e.attemptIndex}`;
1424
+ const g = groups.get(k) ?? {};
1425
+ if (e.direction === "request") g.req = e;
1426
+ else g.res = e;
1427
+ groups.set(k, g);
1428
+ }
1429
+ for (const g of groups.values()) {
1430
+ if (!g.req) continue;
1431
+ if (!g.res) {
1432
+ cache.orphans += 1;
1433
+ continue;
1434
+ }
1435
+ const key = await requestKey(g.req);
1436
+ cache.byKey.set(key, { request: g.req, response: g.res });
1437
+ cache.byProvider[g.req.provider] = (cache.byProvider[g.req.provider] ?? 0) + 1;
1438
+ cache.byModel[g.req.model] = (cache.byModel[g.req.model] ?? 0) + 1;
1439
+ }
1440
+ return cache;
1441
+ }
1442
+ /** Number of cacheable (request, response) pairs in the cache. */
1443
+ size() {
1444
+ return this.byKey.size;
1445
+ }
1446
+ stats() {
1447
+ return {
1448
+ total: this.byKey.size,
1449
+ byProvider: { ...this.byProvider },
1450
+ byModel: { ...this.byModel },
1451
+ orphanRequests: this.orphans
1452
+ };
1453
+ }
1454
+ /** Iterate every cached `(request, response)` pair in insertion order. */
1455
+ *entries() {
1456
+ for (const entry of this.byKey.values()) yield entry;
1457
+ }
1458
+ /**
1459
+ * Look up a cached response by hashing the (model, messages, temperature,
1460
+ * maxTokens, response_format) shape. Returns `undefined` on miss; the
1461
+ * caller decides whether to throw, fall back to the network, or skip.
1462
+ */
1463
+ async lookup(requestBody) {
1464
+ const key = await keyFromBody(requestBody);
1465
+ return this.byKey.get(key);
1466
+ }
1467
+ };
1468
+ function createReplayFetch(cache, opts = {}) {
1469
+ const onMiss = opts.onMiss ?? "throw";
1470
+ const fallback = opts.fallbackFetch ?? globalThis.fetch?.bind(globalThis);
1471
+ return (async (input, init) => {
1472
+ const url = typeof input === "string" ? input : input instanceof URL ? input.toString() : input.url;
1473
+ if (!/\/chat\/completions(?:[?#].*)?$/.test(url)) {
1474
+ if (!fallback)
1475
+ throw new ReplayError(
1476
+ `replay fetch: non-completions URL ${url} but no fallbackFetch configured`
1477
+ );
1478
+ return fallback(input, init);
1479
+ }
1480
+ let bodyParsed;
1481
+ if (init?.body && typeof init.body === "string") {
1482
+ try {
1483
+ bodyParsed = JSON.parse(init.body);
1484
+ } catch {
1485
+ }
1486
+ }
1487
+ const hit = bodyParsed === void 0 ? void 0 : await cache.lookup(bodyParsed);
1488
+ if (hit) {
1489
+ opts.onHit?.({ url, provider: hit.request.provider, model: hit.request.model });
1490
+ const status = hit.response.statusCode ?? 200;
1491
+ const headers = new Headers(
1492
+ Object.entries(hit.response.responseHeaders ?? { "Content-Type": "application/json" })
1493
+ );
1494
+ const bodyText = typeof hit.response.responseBody === "string" ? hit.response.responseBody : JSON.stringify(hit.response.responseBody ?? {});
1495
+ return new Response(bodyText, { status, headers });
1496
+ }
1497
+ opts.onMissNotify?.({ url, requestBody: bodyParsed });
1498
+ if (onMiss === "throw") {
1499
+ const key = bodyParsed === void 0 ? "<unparseable>" : await keyFromBody(bodyParsed);
1500
+ throw new ReplayCacheMissError(url, key);
1501
+ }
1502
+ if (onMiss === "fail-closed") {
1503
+ return new Response(JSON.stringify({ error: "replay_cache_miss" }), { status: 599 });
1504
+ }
1505
+ if (!fallback)
1506
+ throw new ReplayError("replay fetch: onMiss=fallback but no fallbackFetch configured");
1507
+ return fallback(input, init);
1508
+ });
1509
+ }
1510
+ async function* iterateRawCalls(sink, filter = {}) {
1511
+ if (!sink.list) {
1512
+ throw new ReplayError("iterateRawCalls: sink must implement list().");
1513
+ }
1514
+ const events = await sink.list(filter);
1515
+ const cache = await ReplayCache.fromEvents(events);
1516
+ for (const entry of cache.entries()) yield entry;
1517
+ }
1518
+ async function requestKey(event) {
1519
+ return keyFromBody(event.requestBody);
1520
+ }
1521
+ async function keyFromBody(body) {
1522
+ if (body == null || typeof body !== "object") return hashJson({ raw: String(body) });
1523
+ const b = body;
1524
+ const reduced = canonicalize({
1525
+ model: b.model ?? null,
1526
+ messages: b.messages ?? null,
1527
+ temperature: b.temperature ?? null,
1528
+ max_tokens: b.max_tokens ?? null,
1529
+ max_completion_tokens: b.max_completion_tokens ?? null,
1530
+ response_format: b.response_format ?? null
1531
+ });
1532
+ return hashJson(reduced);
1533
+ }
1534
+
1535
+ export {
1536
+ TRACE_ANALYST_ACTOR_DESCRIPTION,
1537
+ TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION,
1538
+ TRACE_ANALYST_SUBAGENT_DESCRIPTION,
1539
+ DEFAULT_TRACE_ANALYST_BUDGETS,
1540
+ TRACE_ANALYST_TRUNCATION_MARKER_PREFIX,
1541
+ OtlpFileTraceStore,
1542
+ TraceFileMissingError,
1543
+ TraceNotFoundError,
1544
+ SpanNotFoundError,
1545
+ buildTraceAnalystTools,
1546
+ traceAnalystFunctionGroup,
1547
+ analyzeTraces,
1548
+ InMemoryTraceStore,
1549
+ FileSystemTraceStore,
1550
+ OTEL_AGENT_EVAL_SCOPE,
1551
+ exportRunAsOtlp,
1552
+ DEFAULT_REDACTION_RULES,
1553
+ REDACTION_VERSION,
1554
+ redactString,
1555
+ redactValue,
1556
+ ReplayCacheMissError,
1557
+ ReplayCache,
1558
+ createReplayFetch,
1559
+ iterateRawCalls
1560
+ };
1561
+ //# sourceMappingURL=chunk-UW4NOOZI.js.map