@axlsdk/studio 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,22 +31,169 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
31
31
  var server_exports = {};
32
32
  __export(server_exports, {
33
33
  ConnectionManager: () => ConnectionManager,
34
- CostAggregator: () => CostAggregator,
34
+ EvalAggregator: () => EvalAggregator,
35
+ ExecutionAggregator: () => ExecutionAggregator,
36
+ TraceAggregator: () => TraceAggregator,
35
37
  createServer: () => createServer
36
38
  });
37
39
  module.exports = __toCommonJS(server_exports);
38
40
  var import_node_fs = require("fs");
39
41
  var import_node_path = require("path");
40
- var import_hono12 = require("hono");
42
+ var import_hono15 = require("hono");
41
43
  var import_cors = require("hono/cors");
42
44
  var import_serve_static = require("@hono/node-server/serve-static");
43
45
 
46
+ // src/server/redact.ts
47
+ var REDACTED = "[redacted]";
48
+ var SAFE_ERROR_NAMES = /* @__PURE__ */ new Set([
49
+ "QuorumNotMet",
50
+ "NoConsensus",
51
+ "TimeoutError",
52
+ "MaxTurnsError",
53
+ "BudgetExceededError",
54
+ "ToolDenied"
55
+ ]);
56
+ function redactErrorMessage(err, redact) {
57
+ const raw = err instanceof Error ? err.message : String(err);
58
+ if (!redact) return raw;
59
+ const name = err instanceof Error ? err.name : "";
60
+ return SAFE_ERROR_NAMES.has(name) ? raw : REDACTED;
61
+ }
62
+ function redactValue(value, redact) {
63
+ if (!redact) return value;
64
+ return REDACTED;
65
+ }
66
+ function redactExecutionInfo(info, redact) {
67
+ if (!redact) return info;
68
+ return {
69
+ ...info,
70
+ ...info.result !== void 0 ? { result: REDACTED } : {},
71
+ ...info.error !== void 0 ? { error: REDACTED } : {}
72
+ };
73
+ }
74
+ function redactExecutionList(infos, redact) {
75
+ if (!redact) return infos;
76
+ return infos.map((info) => redactExecutionInfo(info, redact));
77
+ }
78
+ function redactMemoryValue(value, redact) {
79
+ if (!redact) return value;
80
+ return REDACTED;
81
+ }
82
+ function redactMemoryList(entries, redact) {
83
+ if (!redact) return entries;
84
+ return entries.map((entry) => ({ key: entry.key, value: REDACTED }));
85
+ }
86
+ function redactChatMessage(msg) {
87
+ const scrubbed = {
88
+ role: msg.role,
89
+ content: REDACTED,
90
+ ...msg.name !== void 0 ? { name: msg.name } : {},
91
+ ...msg.tool_call_id !== void 0 ? { tool_call_id: msg.tool_call_id } : {},
92
+ ...msg.tool_calls !== void 0 ? {
93
+ tool_calls: msg.tool_calls.map((tc) => ({
94
+ id: tc.id,
95
+ type: tc.type,
96
+ function: {
97
+ name: tc.function.name,
98
+ arguments: REDACTED
99
+ }
100
+ }))
101
+ } : {}
102
+ // providerMetadata deliberately omitted — opaque content.
103
+ };
104
+ return scrubbed;
105
+ }
106
+ function redactSessionHistory(history, redact) {
107
+ if (!redact) return history;
108
+ return history.map(redactChatMessage);
109
+ }
110
+ function redactStreamEvent(event, redact) {
111
+ if (!redact) return event;
112
+ switch (event.type) {
113
+ case "token":
114
+ return { type: "token", data: REDACTED };
115
+ case "tool_call":
116
+ return { ...event, args: REDACTED };
117
+ case "tool_result":
118
+ return { ...event, result: REDACTED };
119
+ case "tool_approval":
120
+ return {
121
+ ...event,
122
+ args: REDACTED,
123
+ ...event.reason !== void 0 ? { reason: REDACTED } : {}
124
+ };
125
+ case "done":
126
+ return { type: "done", data: REDACTED };
127
+ case "error":
128
+ return { type: "error", message: REDACTED };
129
+ // Structural events have no user content to scrub.
130
+ case "agent_start":
131
+ case "agent_end":
132
+ case "handoff":
133
+ case "step":
134
+ return event;
135
+ }
136
+ }
137
+ function redactEvalItem(item) {
138
+ const scrubbed = {
139
+ ...item,
140
+ input: REDACTED,
141
+ output: REDACTED,
142
+ ...item.annotations !== void 0 ? { annotations: REDACTED } : {},
143
+ ...item.error !== void 0 ? { error: REDACTED } : {},
144
+ ...item.scorerErrors !== void 0 ? { scorerErrors: item.scorerErrors.map(() => REDACTED) } : {}
145
+ };
146
+ if (item.scoreDetails) {
147
+ const detailsOut = {};
148
+ for (const [name, detail] of Object.entries(item.scoreDetails)) {
149
+ detailsOut[name] = {
150
+ score: detail.score,
151
+ ...detail.duration !== void 0 ? { duration: detail.duration } : {},
152
+ ...detail.cost !== void 0 ? { cost: detail.cost } : {}
153
+ // metadata deliberately omitted — may contain LLM scorer reasoning
154
+ };
155
+ }
156
+ scrubbed.scoreDetails = detailsOut;
157
+ }
158
+ return scrubbed;
159
+ }
160
+ function redactEvalResult(result, redact) {
161
+ if (!redact) return result;
162
+ return {
163
+ ...result,
164
+ items: result.items.map(redactEvalItem)
165
+ };
166
+ }
167
+ function redactEvalHistoryEntry(entry, redact) {
168
+ if (!redact) return entry;
169
+ return {
170
+ ...entry,
171
+ data: redactEvalResult(entry.data, redact)
172
+ };
173
+ }
174
+ function redactEvalHistoryList(entries, redact) {
175
+ if (!redact) return entries;
176
+ return entries.map((e) => redactEvalHistoryEntry(e, redact));
177
+ }
178
+ function redactPendingDecision(decision, redact) {
179
+ if (!redact) return decision;
180
+ return {
181
+ ...decision,
182
+ prompt: REDACTED,
183
+ ...decision.metadata !== void 0 ? { metadata: { redacted: true } } : {}
184
+ };
185
+ }
186
+ function redactPendingDecisionList(decisions, redact) {
187
+ if (!redact) return decisions;
188
+ return decisions.map((d) => redactPendingDecision(d, redact));
189
+ }
190
+
44
191
  // src/server/middleware/error-handler.ts
45
192
  async function errorHandler(c, next) {
46
193
  try {
47
194
  await next();
48
195
  } catch (err) {
49
- const message = err instanceof Error ? err.message : String(err);
196
+ const rawMessage = err instanceof Error ? err.message : String(err);
50
197
  const code = err.code ?? "INTERNAL_ERROR";
51
198
  let status = 500;
52
199
  if ("status" in err) {
@@ -54,46 +201,81 @@ async function errorHandler(c, next) {
54
201
  if (typeof errStatus === "number" && errStatus >= 400 && errStatus < 600) {
55
202
  status = errStatus;
56
203
  }
57
- } else if (code === "NOT_FOUND" || message.includes("not found") || message.includes("not registered")) {
204
+ } else if (code === "NOT_FOUND" || rawMessage.includes("not found") || rawMessage.includes("not registered")) {
58
205
  status = 404;
59
- } else if (code === "VALIDATION_ERROR" || message.includes("Expected") || message.includes("invalid")) {
206
+ } else if (code === "VALIDATION_ERROR" || rawMessage.includes("Expected") || rawMessage.includes("invalid")) {
60
207
  status = 400;
61
208
  }
209
+ const runtime = c.get("runtime");
210
+ const redactOn = runtime?.isRedactEnabled?.() ?? false;
62
211
  const body = {
63
212
  ok: false,
64
- error: { code, message }
213
+ error: { code, message: redactErrorMessage(err, redactOn) }
65
214
  };
66
215
  return c.json(body, status);
67
216
  }
68
217
  }
69
218
 
70
219
  // src/server/ws/connection-manager.ts
71
- function isBufferedChannel(channel) {
72
- return channel.startsWith("execution:");
73
- }
74
220
  var BUFFER_TTL_MS = 3e4;
75
221
  var MAX_BUFFER_EVENTS = 500;
222
+ var MAX_WS_FRAME_BYTES = 65536;
223
+ function isBufferedChannel(channel) {
224
+ return channel.startsWith("execution:") || channel.startsWith("eval:");
225
+ }
226
+ function truncateIfOversized(msg, channel, data) {
227
+ if (msg.length <= MAX_WS_FRAME_BYTES) return msg;
228
+ const event = data ?? {};
229
+ const truncated = {
230
+ type: "event",
231
+ channel,
232
+ data: {
233
+ ...event,
234
+ data: {
235
+ __truncated: true,
236
+ originalBytes: msg.length,
237
+ maxBytes: MAX_WS_FRAME_BYTES,
238
+ hint: "Event exceeded WS frame budget (likely a verbose agent_call with a large messages[] snapshot). Fetch via REST if you need the full payload."
239
+ }
240
+ }
241
+ };
242
+ return JSON.stringify(truncated);
243
+ }
76
244
  var ConnectionManager = class {
77
245
  /** channel -> set of WS connections */
78
246
  channels = /* @__PURE__ */ new Map();
79
- /** ws -> set of subscribed channels (for cleanup) */
247
+ /** ws -> subscribed channels + optional integrator-supplied metadata */
80
248
  connections = /* @__PURE__ */ new Map();
81
249
  /** channel -> replay buffer for execution streams */
82
250
  buffers = /* @__PURE__ */ new Map();
83
251
  maxConnections = 100;
252
+ filter;
253
+ /**
254
+ * Register a broadcast filter. Called once at middleware construction.
255
+ * The filter runs on every outbound event and can drop or deliver based
256
+ * on the destination connection's metadata.
257
+ */
258
+ setFilter(filter) {
259
+ this.filter = filter;
260
+ }
261
+ /** Attach integrator-supplied metadata to an already-added connection. */
262
+ setMetadata(ws, metadata) {
263
+ const entry = this.connections.get(ws);
264
+ if (entry) entry.metadata = metadata;
265
+ }
84
266
  /** Register a new WS connection. */
85
267
  add(ws) {
86
268
  if (this.connections.size >= this.maxConnections) {
87
269
  ws.close?.();
88
270
  return;
89
271
  }
90
- this.connections.set(ws, /* @__PURE__ */ new Set());
272
+ this.connections.set(ws, { channels: /* @__PURE__ */ new Set() });
91
273
  }
92
274
  /** Remove a WS connection and all its subscriptions. */
93
275
  remove(ws) {
94
- const channels = this.connections.get(ws);
95
- if (channels) {
96
- for (const ch of channels) {
276
+ const entry = this.connections.get(ws);
277
+ if (entry) {
278
+ for (const ch of entry.channels) {
97
279
  this.channels.get(ch)?.delete(ws);
98
280
  if (this.channels.get(ch)?.size === 0) {
99
281
  this.channels.delete(ch);
@@ -111,12 +293,20 @@ var ConnectionManager = class {
111
293
  this.channels.set(channel, subs);
112
294
  }
113
295
  subs.add(ws);
114
- this.connections.get(ws).add(channel);
296
+ this.connections.get(ws).channels.add(channel);
115
297
  const buffer = this.buffers.get(channel);
116
298
  if (buffer) {
117
- for (const msg of buffer.events) {
299
+ const metadata = this.connections.get(ws)?.metadata;
300
+ for (const event of buffer.events) {
301
+ if (this.filter) {
302
+ try {
303
+ if (!this.filter(event.data, metadata)) continue;
304
+ } catch {
305
+ continue;
306
+ }
307
+ }
118
308
  try {
119
- ws.send(msg);
309
+ ws.send(event.msg);
120
310
  } catch {
121
311
  this.remove(ws);
122
312
  return;
@@ -130,11 +320,15 @@ var ConnectionManager = class {
130
320
  if (this.channels.get(channel)?.size === 0) {
131
321
  this.channels.delete(channel);
132
322
  }
133
- this.connections.get(ws)?.delete(channel);
323
+ this.connections.get(ws)?.channels.delete(channel);
134
324
  }
135
325
  /** Broadcast data to all subscribers of a channel. Buffers events for execution channels. */
136
326
  broadcast(channel, data) {
137
- const msg = JSON.stringify({ type: "event", channel, data });
327
+ const msg = truncateIfOversized(
328
+ JSON.stringify({ type: "event", channel, data }),
329
+ channel,
330
+ data
331
+ );
138
332
  if (isBufferedChannel(channel)) {
139
333
  let buffer = this.buffers.get(channel);
140
334
  if (!buffer) {
@@ -144,7 +338,7 @@ var ConnectionManager = class {
144
338
  const event = data;
145
339
  const isTerminal = event.type === "done" || event.type === "error";
146
340
  if (buffer.events.length < MAX_BUFFER_EVENTS || isTerminal) {
147
- buffer.events.push(msg);
341
+ buffer.events.push({ msg, data });
148
342
  }
149
343
  if (isTerminal) {
150
344
  buffer.complete = true;
@@ -157,6 +351,14 @@ var ConnectionManager = class {
157
351
  const subs = this.channels.get(channel);
158
352
  if (!subs || subs.size === 0) return;
159
353
  for (const ws of [...subs]) {
354
+ if (this.filter) {
355
+ const metadata = this.connections.get(ws)?.metadata;
356
+ try {
357
+ if (!this.filter(data, metadata)) continue;
358
+ } catch {
359
+ continue;
360
+ }
361
+ }
160
362
  try {
161
363
  ws.send(msg);
162
364
  } catch {
@@ -172,8 +374,20 @@ var ConnectionManager = class {
172
374
  const wildcardChannel = channel.substring(0, colonIdx) + ":*";
173
375
  const subs = this.channels.get(wildcardChannel);
174
376
  if (!subs || subs.size === 0) return;
175
- const msg = JSON.stringify({ type: "event", channel, data });
377
+ const msg = truncateIfOversized(
378
+ JSON.stringify({ type: "event", channel, data }),
379
+ channel,
380
+ data
381
+ );
176
382
  for (const ws of [...subs]) {
383
+ if (this.filter) {
384
+ const metadata = this.connections.get(ws)?.metadata;
385
+ try {
386
+ if (!this.filter(data, metadata)) continue;
387
+ } catch {
388
+ continue;
389
+ }
390
+ }
177
391
  try {
178
392
  ws.send(msg);
179
393
  } catch {
@@ -205,11 +419,11 @@ var ConnectionManager = class {
205
419
  };
206
420
 
207
421
  // src/server/ws/protocol.ts
208
- var VALID_CHANNEL_PREFIXES = ["execution:", "trace:"];
209
- var VALID_EXACT_CHANNELS = ["costs", "decisions"];
422
+ var VALID_CHANNEL_PREFIXES = ["execution:", "trace:", "eval:"];
423
+ var VALID_EXACT_CHANNELS = ["costs", "decisions", "eval-trends", "workflow-stats", "trace-stats"];
210
424
  var MAX_CHANNEL_LENGTH = 256;
211
425
  function handleWsMessage(raw, socket, connMgr) {
212
- if (raw.length > 65536) {
426
+ if (raw.length > MAX_WS_FRAME_BYTES) {
213
427
  return JSON.stringify({ type: "error", message: "Message too large" });
214
428
  }
215
429
  let msg;
@@ -269,66 +483,580 @@ function createWsHandlers(connMgr) {
269
483
  };
270
484
  }
271
485
 
272
- // src/server/cost-aggregator.ts
273
- var CostAggregator = class {
274
- constructor(connMgr) {
486
+ // src/server/aggregates/aggregate-snapshots.ts
487
+ var WINDOW_MS = {
488
+ "24h": 24 * 60 * 60 * 1e3,
489
+ "7d": 7 * 24 * 60 * 60 * 1e3,
490
+ "30d": 30 * 24 * 60 * 60 * 1e3,
491
+ all: Number.POSITIVE_INFINITY
492
+ };
493
+ function withinWindow(ts, window, now) {
494
+ return ts >= now - WINDOW_MS[window];
495
+ }
496
+ var REBUILD_INTERVAL_MS = 5 * 6e4;
497
+ var ALL_WINDOWS = new Set(Object.keys(WINDOW_MS));
498
+ function parseWindowParam(raw, fallback = "7d") {
499
+ return raw && ALL_WINDOWS.has(raw) ? raw : fallback;
500
+ }
501
+ var AggregateSnapshots = class {
502
+ constructor(windows, emptyState, connMgr, channel, broadcastTransform) {
503
+ this.windows = windows;
504
+ this.emptyState = emptyState;
275
505
  this.connMgr = connMgr;
506
+ this.channel = channel;
507
+ this.broadcastTransform = broadcastTransform;
508
+ this.snapshots = new Map(windows.map((w) => [w, emptyState()]));
509
+ }
510
+ snapshots;
511
+ /** Replace all snapshots atomically — used after a full rebuild. */
512
+ replace(fresh) {
513
+ this.snapshots = fresh;
514
+ this.broadcast();
515
+ }
516
+ /** Apply a reducer update to every window where `ts` falls inside the window. */
517
+ fold(ts, update) {
518
+ const now = Date.now();
519
+ let changed = false;
520
+ for (const window of this.windows) {
521
+ if (withinWindow(ts, window, now)) {
522
+ const prev = this.snapshots.get(window);
523
+ this.snapshots.set(window, update(prev));
524
+ changed = true;
525
+ }
526
+ }
527
+ if (changed) this.broadcast();
528
+ }
529
+ get(window) {
530
+ return this.snapshots.get(window) ?? this.emptyState();
276
531
  }
277
- data = {
532
+ getAll() {
533
+ return Object.fromEntries(this.snapshots);
534
+ }
535
+ broadcast() {
536
+ const snapshots = this.broadcastTransform ? Object.fromEntries(
537
+ this.windows.map((w) => [w, this.broadcastTransform(this.snapshots.get(w))])
538
+ ) : this.getAll();
539
+ this.connMgr.broadcast(this.channel, {
540
+ snapshots,
541
+ updatedAt: Date.now()
542
+ });
543
+ }
544
+ };
545
+
546
+ // src/server/aggregates/trace-aggregator.ts
547
+ var TraceAggregator = class {
548
+ snaps;
549
+ interval;
550
+ listener;
551
+ options;
552
+ constructor(options) {
553
+ this.options = options;
554
+ this.snaps = new AggregateSnapshots(
555
+ options.windows,
556
+ options.emptyState,
557
+ options.connMgr,
558
+ options.channel,
559
+ options.broadcastTransform
560
+ );
561
+ }
562
+ async start() {
563
+ await this.rebuild();
564
+ this.listener = (event) => {
565
+ this.snaps.fold(event.timestamp, (prev) => this.options.reducer(prev, event));
566
+ };
567
+ this.options.runtime.on("trace", this.listener);
568
+ this.interval = setInterval(
569
+ () => this.rebuild().catch((err) => console.error("[axl-studio] rebuild failed:", err)),
570
+ REBUILD_INTERVAL_MS
571
+ );
572
+ }
573
+ async rebuild() {
574
+ const executions = await this.options.runtime.getExecutions();
575
+ const cap = this.options.executionCap ?? 2e3;
576
+ const capped = executions.slice(0, cap);
577
+ const now = Date.now();
578
+ const fresh = new Map(
579
+ this.options.windows.map((w) => [w, this.options.emptyState()])
580
+ );
581
+ for (const exec of capped) {
582
+ for (const event of exec.steps) {
583
+ for (const window of this.options.windows) {
584
+ if (withinWindow(event.timestamp, window, now)) {
585
+ fresh.set(window, this.options.reducer(fresh.get(window), event));
586
+ }
587
+ }
588
+ }
589
+ }
590
+ this.snaps.replace(fresh);
591
+ }
592
+ getSnapshot(window) {
593
+ return this.snaps.get(window);
594
+ }
595
+ getAllSnapshots() {
596
+ return this.snaps.getAll();
597
+ }
598
+ close() {
599
+ if (this.listener) this.options.runtime.off("trace", this.listener);
600
+ if (this.interval) clearInterval(this.interval);
601
+ }
602
+ };
603
+
604
+ // src/server/aggregates/reducers.ts
605
+ var finite = (v) => Number.isFinite(v) ? v : 0;
606
+ function isLogEvent(event, eventName) {
607
+ if (event.type === eventName) return true;
608
+ if (event.type === "log" && event.data != null && typeof event.data === "object") {
609
+ return event.data.event === eventName;
610
+ }
611
+ return false;
612
+ }
613
+ function emptyRetry() {
614
+ return {
615
+ primary: 0,
616
+ primaryCalls: 0,
617
+ schema: 0,
618
+ schemaCalls: 0,
619
+ validate: 0,
620
+ validateCalls: 0,
621
+ guardrail: 0,
622
+ guardrailCalls: 0,
623
+ retryCalls: 0
624
+ };
625
+ }
626
+ function emptyCostData() {
627
+ return {
278
628
  totalCost: 0,
279
629
  totalTokens: { input: 0, output: 0, reasoning: 0 },
280
630
  byAgent: {},
281
631
  byModel: {},
282
- byWorkflow: {}
632
+ byWorkflow: {},
633
+ retry: emptyRetry(),
634
+ byEmbedder: {}
283
635
  };
284
- /** Process a trace event and update cost data. */
285
- onTrace(event) {
286
- if (event.cost == null && !event.tokens) return;
287
- const cost = Number.isFinite(event.cost) ? event.cost : 0;
288
- const tokens = event.tokens ?? {};
289
- this.data.totalCost += cost;
290
- this.data.totalTokens.input += tokens.input ?? 0;
291
- this.data.totalTokens.output += tokens.output ?? 0;
292
- this.data.totalTokens.reasoning += tokens.reasoning ?? 0;
293
- if (event.agent) {
294
- const entry = this.data.byAgent[event.agent] ?? { cost: 0, calls: 0 };
295
- entry.cost += cost;
296
- entry.calls += 1;
297
- this.data.byAgent[event.agent] = entry;
636
+ }
637
+ function reduceCost(acc, event) {
638
+ const isWorkflowStart = isLogEvent(event, "workflow_start");
639
+ if (isWorkflowStart && event.workflow) {
640
+ const byWorkflow2 = { ...acc.byWorkflow };
641
+ const prev = byWorkflow2[event.workflow] ?? { cost: 0, executions: 0 };
642
+ byWorkflow2[event.workflow] = { ...prev, executions: prev.executions + 1 };
643
+ return { ...acc, byWorkflow: byWorkflow2 };
644
+ }
645
+ if (event.cost == null && !event.tokens) return acc;
646
+ const cost = finite(event.cost);
647
+ const tokens = event.tokens ?? {};
648
+ const totalTokens = event.type === "agent_call" ? {
649
+ input: acc.totalTokens.input + finite(tokens.input),
650
+ output: acc.totalTokens.output + finite(tokens.output),
651
+ reasoning: acc.totalTokens.reasoning + finite(tokens.reasoning)
652
+ } : acc.totalTokens;
653
+ const byAgent = { ...acc.byAgent };
654
+ if (event.agent) {
655
+ const prev = byAgent[event.agent] ?? { cost: 0, calls: 0 };
656
+ byAgent[event.agent] = { cost: prev.cost + cost, calls: prev.calls + 1 };
657
+ }
658
+ const byModel = { ...acc.byModel };
659
+ if (event.model) {
660
+ const prev = byModel[event.model] ?? { cost: 0, calls: 0, tokens: { input: 0, output: 0 } };
661
+ byModel[event.model] = {
662
+ cost: prev.cost + cost,
663
+ calls: prev.calls + 1,
664
+ tokens: {
665
+ input: prev.tokens.input + finite(tokens.input),
666
+ output: prev.tokens.output + finite(tokens.output)
667
+ }
668
+ };
669
+ }
670
+ const byWorkflow = { ...acc.byWorkflow };
671
+ if (event.workflow) {
672
+ const prev = byWorkflow[event.workflow] ?? { cost: 0, executions: 0 };
673
+ byWorkflow[event.workflow] = {
674
+ cost: prev.cost + cost,
675
+ executions: prev.executions + (isWorkflowStart ? 1 : 0)
676
+ };
677
+ }
678
+ let retry = acc.retry;
679
+ if (event.type === "agent_call") {
680
+ const d = event.data ?? {};
681
+ const reason = d.retryReason;
682
+ retry = { ...acc.retry };
683
+ if (reason === "schema") {
684
+ retry.schema += cost;
685
+ retry.schemaCalls += 1;
686
+ retry.retryCalls += 1;
687
+ } else if (reason === "validate") {
688
+ retry.validate += cost;
689
+ retry.validateCalls += 1;
690
+ retry.retryCalls += 1;
691
+ } else if (reason === "guardrail") {
692
+ retry.guardrail += cost;
693
+ retry.guardrailCalls += 1;
694
+ retry.retryCalls += 1;
695
+ } else {
696
+ retry.primary += cost;
697
+ retry.primaryCalls += 1;
298
698
  }
299
- if (event.model) {
300
- const entry = this.data.byModel[event.model] ?? {
301
- cost: 0,
302
- calls: 0,
303
- tokens: { input: 0, output: 0 }
699
+ }
700
+ let byEmbedder = acc.byEmbedder;
701
+ if (event.type === "log") {
702
+ const d = event.data ?? {};
703
+ if (d.event === "memory_remember" || d.event === "memory_recall") {
704
+ byEmbedder = { ...acc.byEmbedder };
705
+ const modelKey = d.usage?.model ?? "unknown";
706
+ const embedTokens = typeof d.usage?.tokens === "number" ? finite(d.usage.tokens) : 0;
707
+ const prev = byEmbedder[modelKey] ?? { cost: 0, calls: 0, tokens: 0 };
708
+ byEmbedder[modelKey] = {
709
+ cost: prev.cost + cost,
710
+ calls: prev.calls + 1,
711
+ tokens: prev.tokens + embedTokens
304
712
  };
305
- entry.cost += cost;
306
- entry.calls += 1;
307
- entry.tokens.input += tokens.input ?? 0;
308
- entry.tokens.output += tokens.output ?? 0;
309
- this.data.byModel[event.model] = entry;
310
713
  }
311
- if (event.workflow) {
312
- const entry = this.data.byWorkflow[event.workflow] ?? { cost: 0, executions: 0 };
313
- entry.cost += cost;
314
- if (event.type === "workflow_start") entry.executions += 1;
315
- this.data.byWorkflow[event.workflow] = entry;
714
+ }
715
+ return {
716
+ totalCost: acc.totalCost + cost,
717
+ totalTokens,
718
+ byAgent,
719
+ byModel,
720
+ byWorkflow,
721
+ retry,
722
+ byEmbedder
723
+ };
724
+ }
725
+ function emptyEvalTrendData() {
726
+ return { byEval: {}, totalRuns: 0, totalCost: 0 };
727
+ }
728
+ function extractScores(data) {
729
+ if (!data || typeof data !== "object") return {};
730
+ const result = data;
731
+ const summary = result.summary;
732
+ const scorers = summary?.scorers;
733
+ if (!scorers) return {};
734
+ const out = {};
735
+ for (const [name, entry] of Object.entries(scorers)) {
736
+ if (typeof entry === "number" && Number.isFinite(entry)) {
737
+ out[name] = entry;
738
+ } else if (entry && typeof entry === "object" && Number.isFinite(entry.mean)) {
739
+ out[name] = entry.mean;
740
+ }
741
+ }
742
+ return out;
743
+ }
744
+ function extractCost(data) {
745
+ if (!data || typeof data !== "object") return 0;
746
+ const result = data;
747
+ if (Number.isFinite(result.totalCost)) return result.totalCost;
748
+ const summary = result.summary;
749
+ return Number.isFinite(summary?.totalCost) ? summary.totalCost : 0;
750
+ }
751
+ function extractModel(data) {
752
+ if (!data || typeof data !== "object") return void 0;
753
+ const result = data;
754
+ const metadata = result.metadata;
755
+ const counts = metadata?.modelCounts;
756
+ if (counts && typeof counts === "object" && !Array.isArray(counts)) {
757
+ const entries = Object.entries(counts).filter(
758
+ ([, v]) => typeof v === "number"
759
+ );
760
+ if (entries.length > 0) {
761
+ entries.sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]));
762
+ return entries[0][0];
763
+ }
764
+ }
765
+ const models = metadata?.models;
766
+ if (Array.isArray(models) && typeof models[0] === "string") return models[0];
767
+ return void 0;
768
+ }
769
+ function extractDuration(data) {
770
+ if (!data || typeof data !== "object") return void 0;
771
+ const result = data;
772
+ return Number.isFinite(result.duration) ? result.duration : void 0;
773
+ }
774
+ function computeScoreStats(runs) {
775
+ const scorerNames = /* @__PURE__ */ new Set();
776
+ for (const run of runs) {
777
+ for (const name of Object.keys(run.scores)) scorerNames.add(name);
778
+ }
779
+ const mean = {};
780
+ const std = {};
781
+ for (const name of scorerNames) {
782
+ const values = runs.map((r) => r.scores[name]).filter((v) => v != null);
783
+ if (values.length === 0) continue;
784
+ const m = values.reduce((a, b) => a + b, 0) / values.length;
785
+ mean[name] = m;
786
+ const variance = values.reduce((sum, v) => sum + (v - m) ** 2, 0) / values.length;
787
+ std[name] = Math.sqrt(variance);
788
+ }
789
+ return { mean, std };
790
+ }
791
+ function reduceEvalTrends(acc, entry) {
792
+ const scores = extractScores(entry.data);
793
+ const cost = extractCost(entry.data);
794
+ const model = extractModel(entry.data);
795
+ const duration = extractDuration(entry.data);
796
+ const run = {
797
+ timestamp: entry.timestamp,
798
+ id: entry.id,
799
+ scores,
800
+ cost,
801
+ ...model !== void 0 ? { model } : {},
802
+ ...duration !== void 0 ? { duration } : {}
803
+ };
804
+ const byEval = { ...acc.byEval };
805
+ const prev = byEval[entry.eval];
806
+ const MAX_EVAL_RUNS = 50;
807
+ const allRuns = prev ? [...prev.runs, run] : [run];
808
+ const runs = allRuns.length > MAX_EVAL_RUNS ? allRuns.slice(-MAX_EVAL_RUNS) : allRuns;
809
+ const { mean, std } = computeScoreStats(runs);
810
+ const latestScores = prev && prev.runs.length > 0 && prev.runs[prev.runs.length - 1].timestamp > run.timestamp ? prev.latestScores : scores;
811
+ byEval[entry.eval] = {
812
+ runs,
813
+ latestScores,
814
+ scoreMean: mean,
815
+ scoreStd: std,
816
+ costTotal: (prev?.costTotal ?? 0) + cost,
817
+ runCount: (prev?.runCount ?? 0) + 1
818
+ };
819
+ return {
820
+ byEval,
821
+ totalRuns: acc.totalRuns + 1,
822
+ totalCost: acc.totalCost + cost
823
+ };
824
+ }
825
+ var MAX_DURATIONS = 200;
826
+ function emptyWorkflowStatsData() {
827
+ return { byWorkflow: {}, totalExecutions: 0, failureRate: 0 };
828
+ }
829
+ function percentile(sorted, p) {
830
+ if (sorted.length === 0) return 0;
831
+ const idx = p / 100 * (sorted.length - 1);
832
+ const lower = Math.floor(idx);
833
+ const upper = Math.ceil(idx);
834
+ if (lower === upper) return sorted[lower];
835
+ return sorted[lower] + (sorted[upper] - sorted[lower]) * (idx - lower);
836
+ }
837
+ function reduceWorkflowStats(acc, execution) {
838
+ const byWorkflow = { ...acc.byWorkflow };
839
+ const prev = byWorkflow[execution.workflow] ?? {
840
+ total: 0,
841
+ completed: 0,
842
+ failed: 0,
843
+ durations: [],
844
+ durationSum: 0,
845
+ avgDuration: 0
846
+ };
847
+ const dur = finite(execution.duration);
848
+ const durations = [...prev.durations];
849
+ const insertIdx = durations.findIndex((d) => d > dur);
850
+ if (insertIdx === -1) durations.push(dur);
851
+ else durations.splice(insertIdx, 0, dur);
852
+ if (durations.length > MAX_DURATIONS) durations.shift();
853
+ const total = prev.total + 1;
854
+ const completed = prev.completed + (execution.status === "completed" ? 1 : 0);
855
+ const failed = prev.failed + (execution.status === "failed" ? 1 : 0);
856
+ const durationSum = prev.durationSum + dur;
857
+ const avgDuration = durationSum / total;
858
+ byWorkflow[execution.workflow] = {
859
+ total,
860
+ completed,
861
+ failed,
862
+ durations,
863
+ durationSum,
864
+ avgDuration
865
+ };
866
+ const totalExecutions = acc.totalExecutions + 1;
867
+ const totalFailed = Object.values(byWorkflow).reduce((sum, w) => sum + w.failed, 0);
868
+ const failureRate = totalExecutions > 0 ? totalFailed / totalExecutions : 0;
869
+ return { byWorkflow, totalExecutions, failureRate };
870
+ }
871
+ function getWorkflowPercentiles(entry) {
872
+ return {
873
+ durationP50: percentile(entry.durations, 50),
874
+ durationP95: percentile(entry.durations, 95)
875
+ };
876
+ }
877
+ function enrichWorkflowStats(data) {
878
+ const byWorkflow = {};
879
+ for (const [name, entry] of Object.entries(data.byWorkflow)) {
880
+ const { durationP50, durationP95 } = getWorkflowPercentiles(entry);
881
+ byWorkflow[name] = {
882
+ total: entry.total,
883
+ completed: entry.completed,
884
+ failed: entry.failed,
885
+ durationP50,
886
+ durationP95,
887
+ avgDuration: entry.avgDuration
888
+ };
889
+ }
890
+ return {
891
+ byWorkflow,
892
+ totalExecutions: data.totalExecutions,
893
+ failureRate: data.failureRate
894
+ };
895
+ }
896
+ function emptyTraceStatsData() {
897
+ return {
898
+ eventTypeCounts: {},
899
+ byTool: {},
900
+ retryByAgent: {},
901
+ totalEvents: 0
902
+ };
903
+ }
904
+ function reduceTraceStats(acc, event) {
905
+ const eventTypeCounts = { ...acc.eventTypeCounts };
906
+ eventTypeCounts[event.type] = (eventTypeCounts[event.type] ?? 0) + 1;
907
+ const byTool = { ...acc.byTool };
908
+ if (event.type === "tool_call" || event.type === "tool_denied" || event.type === "tool_approval") {
909
+ const toolName = event.tool;
910
+ const prev = byTool[toolName] ?? { calls: 0, denied: 0, approved: 0 };
911
+ const isDeniedEvent = event.type === "tool_denied";
912
+ const isApprovalEvent = event.type === "tool_approval";
913
+ const eventData = isDeniedEvent || isApprovalEvent ? event.data : void 0;
914
+ const isApproved = isDeniedEvent && eventData?.approved === true || isApprovalEvent && eventData?.approved === true;
915
+ const isDenied = isDeniedEvent && !eventData?.approved || isApprovalEvent && eventData?.approved === false;
916
+ byTool[toolName] = {
917
+ calls: prev.calls + (event.type === "tool_call" ? 1 : 0),
918
+ denied: prev.denied + (isDenied ? 1 : 0),
919
+ approved: prev.approved + (isApproved ? 1 : 0)
920
+ };
921
+ }
922
+ const retryByAgent = { ...acc.retryByAgent };
923
+ if (event.agent && event.type === "agent_call") {
924
+ const data = event.data;
925
+ if (data?.retryReason) {
926
+ const prev = retryByAgent[event.agent] ?? { schema: 0, validate: 0, guardrail: 0 };
927
+ const reason = data.retryReason;
928
+ if (reason in prev) {
929
+ retryByAgent[event.agent] = { ...prev, [reason]: prev[reason] + 1 };
930
+ }
931
+ }
932
+ }
933
+ return {
934
+ eventTypeCounts,
935
+ byTool,
936
+ retryByAgent,
937
+ totalEvents: acc.totalEvents + 1
938
+ };
939
+ }
940
+
941
+ // src/server/aggregates/execution-aggregator.ts
942
+ var ExecutionAggregator = class {
943
+ snaps;
944
+ interval;
945
+ listener;
946
+ options;
947
+ /** Generation counter to prevent stale async fold after rebuild. */
948
+ generation = 0;
949
+ constructor(options) {
950
+ this.options = options;
951
+ this.snaps = new AggregateSnapshots(
952
+ options.windows,
953
+ options.emptyState,
954
+ options.connMgr,
955
+ options.channel,
956
+ options.broadcastTransform
957
+ );
958
+ }
959
+ async start() {
960
+ await this.rebuild();
961
+ this.listener = (event) => {
962
+ if (!isLogEvent(event, "workflow_end")) return;
963
+ const gen = this.generation;
964
+ this.options.runtime.getExecution(event.executionId).then((exec) => {
965
+ if (this.generation !== gen) return;
966
+ if (exec) {
967
+ this.snaps.fold(exec.startedAt, (prev) => this.options.reducer(prev, exec));
968
+ }
969
+ }).catch((err) => console.error("[axl-studio] execution fold failed:", err));
970
+ };
971
+ this.options.runtime.on("trace", this.listener);
972
+ this.interval = setInterval(
973
+ () => this.rebuild().catch((err) => console.error("[axl-studio] rebuild failed:", err)),
974
+ REBUILD_INTERVAL_MS
975
+ );
976
+ }
977
+ async rebuild() {
978
+ this.generation++;
979
+ const executions = await this.options.runtime.getExecutions();
980
+ const cap = this.options.executionCap ?? 2e3;
981
+ const capped = executions.slice(0, cap);
982
+ const now = Date.now();
983
+ const fresh = new Map(
984
+ this.options.windows.map((w) => [w, this.options.emptyState()])
985
+ );
986
+ for (const exec of capped) {
987
+ for (const window of this.options.windows) {
988
+ if (withinWindow(exec.startedAt, window, now)) {
989
+ fresh.set(window, this.options.reducer(fresh.get(window), exec));
990
+ }
991
+ }
316
992
  }
317
- this.connMgr.broadcast("costs", this.data);
318
- }
319
- /** Get current aggregated cost data. */
320
- getData() {
321
- return this.data;
322
- }
323
- /** Reset all accumulated data. */
324
- reset() {
325
- this.data = {
326
- totalCost: 0,
327
- totalTokens: { input: 0, output: 0, reasoning: 0 },
328
- byAgent: {},
329
- byModel: {},
330
- byWorkflow: {}
993
+ this.snaps.replace(fresh);
994
+ }
995
+ getSnapshot(window) {
996
+ return this.snaps.get(window);
997
+ }
998
+ getAllSnapshots() {
999
+ return this.snaps.getAll();
1000
+ }
1001
+ close() {
1002
+ if (this.listener) this.options.runtime.off("trace", this.listener);
1003
+ if (this.interval) clearInterval(this.interval);
1004
+ }
1005
+ };
1006
+
1007
+ // src/server/aggregates/eval-aggregator.ts
1008
+ var EvalAggregator = class {
1009
+ snaps;
1010
+ interval;
1011
+ listener;
1012
+ options;
1013
+ constructor(options) {
1014
+ this.options = options;
1015
+ this.snaps = new AggregateSnapshots(
1016
+ options.windows,
1017
+ options.emptyState,
1018
+ options.connMgr,
1019
+ options.channel,
1020
+ options.broadcastTransform
1021
+ );
1022
+ }
1023
+ async start() {
1024
+ await this.rebuild();
1025
+ this.listener = (entry) => {
1026
+ this.snaps.fold(entry.timestamp, (prev) => this.options.reducer(prev, entry));
331
1027
  };
1028
+ this.options.runtime.on("eval_result", this.listener);
1029
+ this.interval = setInterval(
1030
+ () => this.rebuild().catch((err) => console.error("[axl-studio] rebuild failed:", err)),
1031
+ REBUILD_INTERVAL_MS
1032
+ );
1033
+ }
1034
+ async rebuild() {
1035
+ const history = await this.options.runtime.getEvalHistory();
1036
+ const cap = this.options.entryCap ?? 500;
1037
+ const capped = history.slice(0, cap);
1038
+ const now = Date.now();
1039
+ const fresh = new Map(
1040
+ this.options.windows.map((w) => [w, this.options.emptyState()])
1041
+ );
1042
+ for (const entry of capped) {
1043
+ for (const window of this.options.windows) {
1044
+ if (withinWindow(entry.timestamp, window, now)) {
1045
+ fresh.set(window, this.options.reducer(fresh.get(window), entry));
1046
+ }
1047
+ }
1048
+ }
1049
+ this.snaps.replace(fresh);
1050
+ }
1051
+ getSnapshot(window) {
1052
+ return this.snaps.get(window);
1053
+ }
1054
+ getAllSnapshots() {
1055
+ return this.snaps.getAll();
1056
+ }
1057
+ close() {
1058
+ if (this.listener) this.options.runtime.off("eval_result", this.listener);
1059
+ if (this.interval) clearInterval(this.interval);
332
1060
  }
333
1061
  };
334
1062
 
@@ -399,15 +1127,22 @@ function createWorkflowRoutes(connMgr) {
399
1127
  if (body.stream) {
400
1128
  const stream = runtime.stream(name, body.input ?? {}, { metadata: body.metadata });
401
1129
  const executionId = `stream-${Date.now()}`;
1130
+ const redactOn = runtime.isRedactEnabled();
402
1131
  (async () => {
403
1132
  for await (const event of stream) {
404
- connMgr.broadcastWithWildcard(`execution:${executionId}`, event);
1133
+ connMgr.broadcastWithWildcard(
1134
+ `execution:${executionId}`,
1135
+ redactStreamEvent(event, redactOn)
1136
+ );
405
1137
  }
406
1138
  })();
407
1139
  return c.json({ ok: true, data: { executionId, streaming: true } });
408
1140
  }
409
1141
  const result = await runtime.execute(name, body.input ?? {}, { metadata: body.metadata });
410
- return c.json({ ok: true, data: { result } });
1142
+ return c.json({
1143
+ ok: true,
1144
+ data: { result: redactValue(result, runtime.isRedactEnabled()) }
1145
+ });
411
1146
  });
412
1147
  return app6;
413
1148
  }
@@ -418,7 +1153,10 @@ var app = new import_hono3.Hono();
418
1153
  app.get("/executions", async (c) => {
419
1154
  const runtime = c.get("runtime");
420
1155
  const executions = await runtime.getExecutions();
421
- return c.json({ ok: true, data: executions });
1156
+ return c.json({
1157
+ ok: true,
1158
+ data: redactExecutionList(executions, runtime.isRedactEnabled())
1159
+ });
422
1160
  });
423
1161
  app.get("/executions/:id", async (c) => {
424
1162
  const runtime = c.get("runtime");
@@ -430,7 +1168,10 @@ app.get("/executions/:id", async (c) => {
430
1168
  404
431
1169
  );
432
1170
  }
433
- return c.json({ ok: true, data: execution });
1171
+ return c.json({
1172
+ ok: true,
1173
+ data: redactExecutionInfo(execution, runtime.isRedactEnabled())
1174
+ });
434
1175
  });
435
1176
  app.post("/executions/:id/abort", (c) => {
436
1177
  const runtime = c.get("runtime");
@@ -464,7 +1205,16 @@ function createSessionRoutes(connMgr) {
464
1205
  const id = c.req.param("id");
465
1206
  const history = await store.getSession(id);
466
1207
  const handoffHistory = await store.getSessionMeta(id, "handoffHistory");
467
- return c.json({ ok: true, data: { id, history, handoffHistory: handoffHistory ?? [] } });
1208
+ return c.json({
1209
+ ok: true,
1210
+ data: {
1211
+ id,
1212
+ history: redactSessionHistory(history, runtime.isRedactEnabled()),
1213
+ // HandoffRecord has no content fields (source/target/mode/
1214
+ // timestamp/duration) — nothing to scrub.
1215
+ handoffHistory: handoffHistory ?? []
1216
+ }
1217
+ });
468
1218
  });
469
1219
  app6.post("/sessions/:id/send", async (c) => {
470
1220
  const runtime = c.get("runtime");
@@ -634,7 +1384,10 @@ app3.post("/tools/:name/test", async (c) => {
634
1384
  const body = await c.req.json();
635
1385
  const ctx = runtime.createContext();
636
1386
  const result = await tool.run(ctx, body.input);
637
- return c.json({ ok: true, data: { result } });
1387
+ return c.json({
1388
+ ok: true,
1389
+ data: { result: redactValue(result, runtime.isRedactEnabled()) }
1390
+ });
638
1391
  });
639
1392
  var tools_default = app3;
640
1393
 
@@ -649,7 +1402,7 @@ app4.get("/memory/:scope", async (c) => {
649
1402
  return c.json({ ok: true, data: [] });
650
1403
  }
651
1404
  const entries = await store.getAllMemory(scope);
652
- return c.json({ ok: true, data: entries });
1405
+ return c.json({ ok: true, data: redactMemoryList(entries, runtime.isRedactEnabled()) });
653
1406
  });
654
1407
  app4.get("/memory/:scope/:key", async (c) => {
655
1408
  const runtime = c.get("runtime");
@@ -669,7 +1422,10 @@ app4.get("/memory/:scope/:key", async (c) => {
669
1422
  404
670
1423
  );
671
1424
  }
672
- return c.json({ ok: true, data: { key, value } });
1425
+ return c.json({
1426
+ ok: true,
1427
+ data: { key, value: redactMemoryValue(value, runtime.isRedactEnabled()) }
1428
+ });
673
1429
  });
674
1430
  app4.put("/memory/:scope/:key", async (c) => {
675
1431
  const runtime = c.get("runtime");
@@ -714,7 +1470,10 @@ var app5 = new import_hono8.Hono();
714
1470
  app5.get("/decisions", async (c) => {
715
1471
  const runtime = c.get("runtime");
716
1472
  const decisions = await runtime.getPendingDecisions();
717
- return c.json({ ok: true, data: decisions });
1473
+ return c.json({
1474
+ ok: true,
1475
+ data: redactPendingDecisionList(decisions, runtime.isRedactEnabled())
1476
+ });
718
1477
  });
719
1478
  app5.post("/decisions/:executionId/resolve", async (c) => {
720
1479
  const runtime = c.get("runtime");
@@ -730,11 +1489,23 @@ var import_hono9 = require("hono");
730
1489
  function createCostRoutes(costAggregator) {
731
1490
  const app6 = new import_hono9.Hono();
732
1491
  app6.get("/costs", (c) => {
733
- return c.json({ ok: true, data: costAggregator.getData() });
1492
+ if (c.req.query("windows") === "all") {
1493
+ return c.json({ ok: true, data: costAggregator.getAllSnapshots() });
1494
+ }
1495
+ const window = parseWindowParam(c.req.query("window"));
1496
+ return c.json({ ok: true, data: costAggregator.getSnapshot(window) });
734
1497
  });
735
1498
  app6.post("/costs/reset", (c) => {
736
- costAggregator.reset();
737
- return c.json({ ok: true, data: { reset: true } });
1499
+ return c.json(
1500
+ {
1501
+ ok: false,
1502
+ error: {
1503
+ code: "GONE",
1504
+ message: "POST /api/costs/reset was removed in @axlsdk/studio 0.15. Cost aggregates are now time-windowed and rebuilt from StateStore history. Use GET /api/costs?window=24h|7d|30d|all to narrow the view instead of resetting."
1505
+ }
1506
+ },
1507
+ 410
1508
+ );
738
1509
  });
739
1510
  return app6;
740
1511
  }
@@ -742,8 +1513,9 @@ function createCostRoutes(costAggregator) {
742
1513
  // src/server/routes/evals.ts
743
1514
  var import_node_crypto = require("crypto");
744
1515
  var import_hono10 = require("hono");
745
- function createEvalRoutes(evalLoader) {
1516
+ function createEvalRoutes(connMgr, evalLoader) {
746
1517
  const app6 = new import_hono10.Hono();
1518
+ const activeRuns = /* @__PURE__ */ new Map();
747
1519
  app6.get("/evals", async (c) => {
748
1520
  if (evalLoader) await evalLoader();
749
1521
  const runtime = c.get("runtime");
@@ -753,7 +1525,10 @@ function createEvalRoutes(evalLoader) {
753
1525
  app6.get("/evals/history", async (c) => {
754
1526
  const runtime = c.get("runtime");
755
1527
  const history = await runtime.getEvalHistory();
756
- return c.json({ ok: true, data: history });
1528
+ return c.json({
1529
+ ok: true,
1530
+ data: redactEvalHistoryList(history, runtime.isRedactEnabled())
1531
+ });
757
1532
  });
758
1533
  app6.delete("/evals/history/:id", async (c) => {
759
1534
  const runtime = c.get("runtime");
@@ -774,6 +1549,7 @@ function createEvalRoutes(evalLoader) {
774
1549
  if (evalLoader) await evalLoader();
775
1550
  const runtime = c.get("runtime");
776
1551
  const name = c.req.param("name");
1552
+ const redactOn = runtime.isRedactEnabled();
777
1553
  const entry = runtime.getRegisteredEval(name);
778
1554
  if (!entry) {
779
1555
  return c.json(
@@ -782,13 +1558,89 @@ function createEvalRoutes(evalLoader) {
782
1558
  );
783
1559
  }
784
1560
  let runs = 1;
1561
+ let stream = false;
1562
+ let captureTraces = false;
785
1563
  try {
786
1564
  const body = await c.req.json().catch(() => ({}));
787
1565
  if (typeof body.runs === "number" && Number.isFinite(body.runs) && body.runs > 1) {
788
1566
  runs = Math.min(Math.floor(body.runs), 25);
789
1567
  }
1568
+ if (body.stream === true) {
1569
+ stream = true;
1570
+ }
1571
+ if (body.captureTraces === true) {
1572
+ captureTraces = true;
1573
+ }
790
1574
  } catch {
791
1575
  }
1576
+ if (stream) {
1577
+ const evalRunId = `eval-${(0, import_node_crypto.randomUUID)()}`;
1578
+ const ac = new AbortController();
1579
+ activeRuns.set(evalRunId, ac);
1580
+ (async () => {
1581
+ try {
1582
+ if (runs > 1) {
1583
+ const runGroupId = (0, import_node_crypto.randomUUID)();
1584
+ const results = [];
1585
+ for (let r = 0; r < runs; r++) {
1586
+ if (ac.signal.aborted) break;
1587
+ const result = await runtime.runRegisteredEval(name, {
1588
+ metadata: { runGroupId, runIndex: r },
1589
+ signal: ac.signal,
1590
+ captureTraces,
1591
+ onProgress: (event) => {
1592
+ if (event.type === "run_done") return;
1593
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1594
+ ...event,
1595
+ run: r + 1,
1596
+ totalRuns: runs
1597
+ });
1598
+ }
1599
+ });
1600
+ results.push(result);
1601
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1602
+ type: "run_done",
1603
+ run: r + 1,
1604
+ totalRuns: runs
1605
+ });
1606
+ }
1607
+ if (results.length > 0) {
1608
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1609
+ type: "done",
1610
+ evalResultId: results[0].id,
1611
+ runGroupId
1612
+ });
1613
+ } else {
1614
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1615
+ type: "error",
1616
+ message: "All runs were cancelled"
1617
+ });
1618
+ }
1619
+ } else {
1620
+ const result = await runtime.runRegisteredEval(name, {
1621
+ signal: ac.signal,
1622
+ captureTraces,
1623
+ onProgress: (event) => {
1624
+ if (event.type === "run_done") return;
1625
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, event);
1626
+ }
1627
+ });
1628
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1629
+ type: "done",
1630
+ evalResultId: result.id
1631
+ });
1632
+ }
1633
+ } catch (err) {
1634
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1635
+ type: "error",
1636
+ message: redactErrorMessage(err, redactOn)
1637
+ });
1638
+ } finally {
1639
+ activeRuns.delete(evalRunId);
1640
+ }
1641
+ })();
1642
+ return c.json({ ok: true, data: { evalRunId } });
1643
+ }
792
1644
  try {
793
1645
  if (runs > 1) {
794
1646
  const { aggregateRuns } = await import("@axlsdk/eval");
@@ -796,27 +1648,53 @@ function createEvalRoutes(evalLoader) {
796
1648
  const results = [];
797
1649
  for (let r = 0; r < runs; r++) {
798
1650
  const result2 = await runtime.runRegisteredEval(name, {
799
- metadata: { runGroupId, runIndex: r }
1651
+ metadata: { runGroupId, runIndex: r },
1652
+ captureTraces
800
1653
  });
801
1654
  results.push(result2);
802
1655
  }
803
1656
  const typedResults = results;
804
1657
  const aggregate = aggregateRuns(typedResults);
805
1658
  const first = typedResults[0];
806
- const result = { ...first, _multiRun: { aggregate, allRuns: typedResults } };
807
- return c.json({ ok: true, data: result });
1659
+ const result = {
1660
+ ...first,
1661
+ _multiRun: { aggregate, allRuns: typedResults }
1662
+ };
1663
+ return c.json({
1664
+ ok: true,
1665
+ data: redactEvalResult(result, redactOn)
1666
+ });
808
1667
  } else {
809
- const result = await runtime.runRegisteredEval(name);
810
- return c.json({ ok: true, data: result });
1668
+ const result = await runtime.runRegisteredEval(name, { captureTraces });
1669
+ return c.json({
1670
+ ok: true,
1671
+ data: redactEvalResult(result, redactOn)
1672
+ });
811
1673
  }
812
1674
  } catch (err) {
813
- const message = err instanceof Error ? err.message : String(err);
814
- return c.json({ ok: false, error: { code: "EVAL_ERROR", message } }, 400);
1675
+ return c.json(
1676
+ { ok: false, error: { code: "EVAL_ERROR", message: redactErrorMessage(err, redactOn) } },
1677
+ 400
1678
+ );
1679
+ }
1680
+ });
1681
+ app6.post("/evals/runs/:evalRunId/cancel", (c) => {
1682
+ const evalRunId = c.req.param("evalRunId");
1683
+ const ac = activeRuns.get(evalRunId);
1684
+ if (!ac) {
1685
+ return c.json(
1686
+ { ok: false, error: { code: "NOT_FOUND", message: "No active eval run found" } },
1687
+ 404
1688
+ );
815
1689
  }
1690
+ ac.abort();
1691
+ activeRuns.delete(evalRunId);
1692
+ return c.json({ ok: true, data: { cancelled: true } });
816
1693
  });
817
1694
  app6.post("/evals/:name/rescore", async (c) => {
818
1695
  if (evalLoader) await evalLoader();
819
1696
  const runtime = c.get("runtime");
1697
+ const redactOn = runtime.isRedactEnabled();
820
1698
  const name = c.req.param("name");
821
1699
  const body = await c.req.json();
822
1700
  if (!body.resultId || typeof body.resultId !== "string") {
@@ -854,14 +1732,20 @@ function createEvalRoutes(evalLoader) {
854
1732
  timestamp: Date.now(),
855
1733
  data: result
856
1734
  });
857
- return c.json({ ok: true, data: result });
1735
+ return c.json({
1736
+ ok: true,
1737
+ data: redactEvalResult(result, redactOn)
1738
+ });
858
1739
  } catch (err) {
859
- const message = err instanceof Error ? err.message : String(err);
860
- return c.json({ ok: false, error: { code: "EVAL_ERROR", message } }, 400);
1740
+ return c.json(
1741
+ { ok: false, error: { code: "EVAL_ERROR", message: redactErrorMessage(err, redactOn) } },
1742
+ 400
1743
+ );
861
1744
  }
862
1745
  });
863
1746
  app6.post("/evals/compare", async (c) => {
864
1747
  const runtime = c.get("runtime");
1748
+ const redactOn = runtime.isRedactEnabled();
865
1749
  const body = await c.req.json();
866
1750
  const validateIdParam = (v, name) => {
867
1751
  if (typeof v === "string") return v === "" ? `${name} must be non-empty` : null;
@@ -929,8 +1813,13 @@ function createEvalRoutes(evalLoader) {
929
1813
  const result = await runtime.evalCompare(baseline, candidate, body.options);
930
1814
  return c.json({ ok: true, data: result });
931
1815
  } catch (err) {
932
- const message = err instanceof Error ? err.message : String(err);
933
- return c.json({ ok: false, error: { code: "COMPARE_FAILED", message } }, 400);
1816
+ return c.json(
1817
+ {
1818
+ ok: false,
1819
+ error: { code: "COMPARE_FAILED", message: redactErrorMessage(err, redactOn) }
1820
+ },
1821
+ 400
1822
+ );
934
1823
  }
935
1824
  });
936
1825
  app6.post("/evals/import", async (c) => {
@@ -992,7 +1881,11 @@ function createEvalRoutes(evalLoader) {
992
1881
  });
993
1882
  return c.json({ ok: true, data: { id, eval: evalName, timestamp } });
994
1883
  });
995
- return app6;
1884
+ function closeActiveRuns() {
1885
+ for (const ac of activeRuns.values()) ac.abort();
1886
+ activeRuns.clear();
1887
+ }
1888
+ return { app: app6, closeActiveRuns };
996
1889
  }
997
1890
 
998
1891
  // src/server/routes/playground.ts
@@ -1030,13 +1923,14 @@ function createPlaygroundRoutes(connMgr) {
1030
1923
  const store = runtime.getStateStore();
1031
1924
  const history = await store.getSession(sessionId);
1032
1925
  history.push({ role: "user", content: body.message });
1926
+ const redactOn = runtime.isRedactEnabled();
1927
+ const broadcast = (event) => {
1928
+ connMgr.broadcastWithWildcard(`execution:${executionId}`, redactStreamEvent(event, redactOn));
1929
+ };
1033
1930
  const ctx = runtime.createContext({
1034
1931
  sessionHistory: history,
1035
1932
  onToken: (token) => {
1036
- connMgr.broadcastWithWildcard(`execution:${executionId}`, {
1037
- type: "token",
1038
- data: token
1039
- });
1933
+ broadcast({ type: "token", data: token });
1040
1934
  }
1041
1935
  });
1042
1936
  (async () => {
@@ -1045,12 +1939,9 @@ function createPlaygroundRoutes(connMgr) {
1045
1939
  const resultText = typeof result === "string" ? result : JSON.stringify(result);
1046
1940
  history.push({ role: "assistant", content: resultText });
1047
1941
  await store.saveSession(sessionId, history);
1048
- connMgr.broadcastWithWildcard(`execution:${executionId}`, {
1049
- type: "done",
1050
- data: resultText
1051
- });
1942
+ broadcast({ type: "done", data: resultText });
1052
1943
  } catch (err) {
1053
- connMgr.broadcastWithWildcard(`execution:${executionId}`, {
1944
+ broadcast({
1054
1945
  type: "error",
1055
1946
  message: err instanceof Error ? err.message : String(err)
1056
1947
  });
@@ -1064,12 +1955,78 @@ function createPlaygroundRoutes(connMgr) {
1064
1955
  return app6;
1065
1956
  }
1066
1957
 
1958
+ // src/server/routes/eval-trends.ts
1959
+ var import_hono12 = require("hono");
1960
+ function createEvalTrendsRoutes(aggregator) {
1961
+ const app6 = new import_hono12.Hono();
1962
+ app6.get("/eval-trends", (c) => {
1963
+ const window = parseWindowParam(c.req.query("window"));
1964
+ return c.json({ ok: true, data: aggregator.getSnapshot(window) });
1965
+ });
1966
+ return app6;
1967
+ }
1968
+
1969
+ // src/server/routes/workflow-stats.ts
1970
+ var import_hono13 = require("hono");
1971
+ function createWorkflowStatsRoutes(aggregator) {
1972
+ const app6 = new import_hono13.Hono();
1973
+ app6.get("/workflow-stats", (c) => {
1974
+ const window = parseWindowParam(c.req.query("window"));
1975
+ return c.json({ ok: true, data: enrichWorkflowStats(aggregator.getSnapshot(window)) });
1976
+ });
1977
+ return app6;
1978
+ }
1979
+
1980
+ // src/server/routes/trace-stats.ts
1981
+ var import_hono14 = require("hono");
1982
+ function createTraceStatsRoutes(aggregator) {
1983
+ const app6 = new import_hono14.Hono();
1984
+ app6.get("/trace-stats", (c) => {
1985
+ const window = parseWindowParam(c.req.query("window"));
1986
+ return c.json({ ok: true, data: aggregator.getSnapshot(window) });
1987
+ });
1988
+ return app6;
1989
+ }
1990
+
1067
1991
  // src/server/index.ts
1068
1992
  function createServer(options) {
1069
1993
  const { runtime, staticRoot, basePath = "", readOnly = false } = options;
1070
- const app6 = new import_hono12.Hono();
1994
+ const app6 = new import_hono15.Hono();
1071
1995
  const connMgr = new ConnectionManager();
1072
- const costAggregator = new CostAggregator(connMgr);
1996
+ const windows = ["24h", "7d", "30d", "all"];
1997
+ const costAggregator = new TraceAggregator({
1998
+ runtime,
1999
+ connMgr,
2000
+ channel: "costs",
2001
+ reducer: reduceCost,
2002
+ emptyState: emptyCostData,
2003
+ windows
2004
+ });
2005
+ const workflowStatsAggregator = new ExecutionAggregator({
2006
+ runtime,
2007
+ connMgr,
2008
+ channel: "workflow-stats",
2009
+ reducer: reduceWorkflowStats,
2010
+ emptyState: emptyWorkflowStatsData,
2011
+ windows,
2012
+ broadcastTransform: enrichWorkflowStats
2013
+ });
2014
+ const traceStatsAggregator = new TraceAggregator({
2015
+ runtime,
2016
+ connMgr,
2017
+ channel: "trace-stats",
2018
+ reducer: reduceTraceStats,
2019
+ emptyState: emptyTraceStatsData,
2020
+ windows
2021
+ });
2022
+ const evalTrendsAggregator = new EvalAggregator({
2023
+ runtime,
2024
+ connMgr,
2025
+ channel: "eval-trends",
2026
+ reducer: reduceEvalTrends,
2027
+ emptyState: emptyEvalTrendData,
2028
+ windows
2029
+ });
1073
2030
  if (options.cors !== false) {
1074
2031
  app6.use("*", (0, import_cors.cors)());
1075
2032
  }
@@ -1087,11 +2044,11 @@ function createServer(options) {
1087
2044
  /^PUT \/api\/memory(\/|$)/,
1088
2045
  /^DELETE \/api\/memory(\/|$)/,
1089
2046
  /^POST \/api\/decisions(\/|$)/,
1090
- /^POST \/api\/costs(\/|$)/,
1091
2047
  /^POST \/api\/tools(\/|$)/,
1092
2048
  /^POST \/api\/evals\/import$/,
1093
2049
  /^POST \/api\/evals\/[^/]+\/run$/,
1094
2050
  /^POST \/api\/evals\/[^/]+\/rescore$/,
2051
+ /^POST \/api\/evals\/runs\/[^/]+\/cancel$/,
1095
2052
  /^DELETE \/api\/evals\/history\/[^/]+$/,
1096
2053
  /^POST \/api\/playground(\/|$)/
1097
2054
  ];
@@ -1111,7 +2068,7 @@ function createServer(options) {
1111
2068
  await next();
1112
2069
  });
1113
2070
  }
1114
- const api = new import_hono12.Hono();
2071
+ const api = new import_hono15.Hono();
1115
2072
  api.route("/", createHealthRoutes(readOnly));
1116
2073
  api.route("/", createWorkflowRoutes(connMgr));
1117
2074
  api.route("/", executions_default);
@@ -1121,7 +2078,11 @@ function createServer(options) {
1121
2078
  api.route("/", memory_default);
1122
2079
  api.route("/", decisions_default);
1123
2080
  api.route("/", createCostRoutes(costAggregator));
1124
- api.route("/", createEvalRoutes(options.evalLoader));
2081
+ api.route("/", createEvalTrendsRoutes(evalTrendsAggregator));
2082
+ api.route("/", createWorkflowStatsRoutes(workflowStatsAggregator));
2083
+ api.route("/", createTraceStatsRoutes(traceStatsAggregator));
2084
+ const { app: evalApp, closeActiveRuns } = createEvalRoutes(connMgr, options.evalLoader);
2085
+ api.route("/", evalApp);
1125
2086
  api.route("/", createPlaygroundRoutes(connMgr));
1126
2087
  app6.route("/api", api);
1127
2088
  const traceListener = (event) => {
@@ -1129,12 +2090,17 @@ function createServer(options) {
1129
2090
  if (traceEvent.executionId) {
1130
2091
  connMgr.broadcastWithWildcard(`trace:${traceEvent.executionId}`, traceEvent);
1131
2092
  }
1132
- costAggregator.onTrace(traceEvent);
1133
2093
  if (traceEvent.type === "await_human") {
1134
2094
  connMgr.broadcast("decisions", traceEvent);
1135
2095
  }
1136
2096
  };
1137
2097
  runtime.on("trace", traceListener);
2098
+ const aggregatorStartPromise = Promise.all([
2099
+ costAggregator.start(),
2100
+ workflowStatsAggregator.start(),
2101
+ traceStatsAggregator.start(),
2102
+ evalTrendsAggregator.start()
2103
+ ]).catch((err) => console.error("[axl-studio] aggregator start failed:", err));
1138
2104
  if (staticRoot) {
1139
2105
  const indexPath = (0, import_node_path.resolve)(staticRoot, "index.html");
1140
2106
  let spaHtml;
@@ -1184,15 +2150,30 @@ function createServer(options) {
1184
2150
  app: app6,
1185
2151
  connMgr,
1186
2152
  costAggregator,
2153
+ workflowStatsAggregator,
2154
+ traceStatsAggregator,
2155
+ evalTrendsAggregator,
2156
+ aggregatorStartPromise,
1187
2157
  /** Create WS handlers. Call before registering static/SPA routes are reached. */
1188
2158
  createWsHandlers: () => createWsHandlers(connMgr),
1189
- traceListener
2159
+ traceListener,
2160
+ /** Abort all active streaming eval runs. */
2161
+ closeActiveRuns,
2162
+ /** Close all aggregators (clear intervals and unsubscribe listeners). */
2163
+ closeAggregators: () => {
2164
+ costAggregator.close();
2165
+ workflowStatsAggregator.close();
2166
+ traceStatsAggregator.close();
2167
+ evalTrendsAggregator.close();
2168
+ }
1190
2169
  };
1191
2170
  }
1192
2171
  // Annotate the CommonJS export names for ESM import in node:
1193
2172
  0 && (module.exports = {
1194
2173
  ConnectionManager,
1195
- CostAggregator,
2174
+ EvalAggregator,
2175
+ ExecutionAggregator,
2176
+ TraceAggregator,
1196
2177
  createServer
1197
2178
  });
1198
2179
  //# sourceMappingURL=index.cjs.map