@axlsdk/studio 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs CHANGED
@@ -32,16 +32,161 @@ var import_node_ws = require("@hono/node-ws");
32
32
  // src/server/index.ts
33
33
  var import_node_fs = require("fs");
34
34
  var import_node_path = require("path");
35
- var import_hono12 = require("hono");
35
+ var import_hono15 = require("hono");
36
36
  var import_cors = require("hono/cors");
37
37
  var import_serve_static = require("@hono/node-server/serve-static");
38
38
 
39
+ // src/server/redact.ts
40
+ var REDACTED = "[redacted]";
41
+ var SAFE_ERROR_NAMES = /* @__PURE__ */ new Set([
42
+ "QuorumNotMet",
43
+ "NoConsensus",
44
+ "TimeoutError",
45
+ "MaxTurnsError",
46
+ "BudgetExceededError",
47
+ "ToolDenied"
48
+ ]);
49
+ function redactErrorMessage(err, redact) {
50
+ const raw = err instanceof Error ? err.message : String(err);
51
+ if (!redact) return raw;
52
+ const name = err instanceof Error ? err.name : "";
53
+ return SAFE_ERROR_NAMES.has(name) ? raw : REDACTED;
54
+ }
55
+ function redactValue(value, redact) {
56
+ if (!redact) return value;
57
+ return REDACTED;
58
+ }
59
+ function redactExecutionInfo(info, redact) {
60
+ if (!redact) return info;
61
+ return {
62
+ ...info,
63
+ ...info.result !== void 0 ? { result: REDACTED } : {},
64
+ ...info.error !== void 0 ? { error: REDACTED } : {}
65
+ };
66
+ }
67
+ function redactExecutionList(infos, redact) {
68
+ if (!redact) return infos;
69
+ return infos.map((info) => redactExecutionInfo(info, redact));
70
+ }
71
+ function redactMemoryValue(value, redact) {
72
+ if (!redact) return value;
73
+ return REDACTED;
74
+ }
75
+ function redactMemoryList(entries, redact) {
76
+ if (!redact) return entries;
77
+ return entries.map((entry) => ({ key: entry.key, value: REDACTED }));
78
+ }
79
+ function redactChatMessage(msg) {
80
+ const scrubbed = {
81
+ role: msg.role,
82
+ content: REDACTED,
83
+ ...msg.name !== void 0 ? { name: msg.name } : {},
84
+ ...msg.tool_call_id !== void 0 ? { tool_call_id: msg.tool_call_id } : {},
85
+ ...msg.tool_calls !== void 0 ? {
86
+ tool_calls: msg.tool_calls.map((tc) => ({
87
+ id: tc.id,
88
+ type: tc.type,
89
+ function: {
90
+ name: tc.function.name,
91
+ arguments: REDACTED
92
+ }
93
+ }))
94
+ } : {}
95
+ // providerMetadata deliberately omitted — opaque content.
96
+ };
97
+ return scrubbed;
98
+ }
99
+ function redactSessionHistory(history, redact) {
100
+ if (!redact) return history;
101
+ return history.map(redactChatMessage);
102
+ }
103
+ function redactStreamEvent(event, redact) {
104
+ if (!redact) return event;
105
+ switch (event.type) {
106
+ case "token":
107
+ return { type: "token", data: REDACTED };
108
+ case "tool_call":
109
+ return { ...event, args: REDACTED };
110
+ case "tool_result":
111
+ return { ...event, result: REDACTED };
112
+ case "tool_approval":
113
+ return {
114
+ ...event,
115
+ args: REDACTED,
116
+ ...event.reason !== void 0 ? { reason: REDACTED } : {}
117
+ };
118
+ case "done":
119
+ return { type: "done", data: REDACTED };
120
+ case "error":
121
+ return { type: "error", message: REDACTED };
122
+ // Structural events have no user content to scrub.
123
+ case "agent_start":
124
+ case "agent_end":
125
+ case "handoff":
126
+ case "step":
127
+ return event;
128
+ }
129
+ }
130
+ function redactEvalItem(item) {
131
+ const scrubbed = {
132
+ ...item,
133
+ input: REDACTED,
134
+ output: REDACTED,
135
+ ...item.annotations !== void 0 ? { annotations: REDACTED } : {},
136
+ ...item.error !== void 0 ? { error: REDACTED } : {},
137
+ ...item.scorerErrors !== void 0 ? { scorerErrors: item.scorerErrors.map(() => REDACTED) } : {}
138
+ };
139
+ if (item.scoreDetails) {
140
+ const detailsOut = {};
141
+ for (const [name, detail] of Object.entries(item.scoreDetails)) {
142
+ detailsOut[name] = {
143
+ score: detail.score,
144
+ ...detail.duration !== void 0 ? { duration: detail.duration } : {},
145
+ ...detail.cost !== void 0 ? { cost: detail.cost } : {}
146
+ // metadata deliberately omitted — may contain LLM scorer reasoning
147
+ };
148
+ }
149
+ scrubbed.scoreDetails = detailsOut;
150
+ }
151
+ return scrubbed;
152
+ }
153
+ function redactEvalResult(result, redact) {
154
+ if (!redact) return result;
155
+ return {
156
+ ...result,
157
+ items: result.items.map(redactEvalItem)
158
+ };
159
+ }
160
+ function redactEvalHistoryEntry(entry, redact) {
161
+ if (!redact) return entry;
162
+ return {
163
+ ...entry,
164
+ data: redactEvalResult(entry.data, redact)
165
+ };
166
+ }
167
+ function redactEvalHistoryList(entries, redact) {
168
+ if (!redact) return entries;
169
+ return entries.map((e) => redactEvalHistoryEntry(e, redact));
170
+ }
171
+ function redactPendingDecision(decision, redact) {
172
+ if (!redact) return decision;
173
+ return {
174
+ ...decision,
175
+ prompt: REDACTED,
176
+ ...decision.metadata !== void 0 ? { metadata: { redacted: true } } : {}
177
+ };
178
+ }
179
+ function redactPendingDecisionList(decisions, redact) {
180
+ if (!redact) return decisions;
181
+ return decisions.map((d) => redactPendingDecision(d, redact));
182
+ }
183
+
39
184
  // src/server/middleware/error-handler.ts
40
185
  async function errorHandler(c, next) {
41
186
  try {
42
187
  await next();
43
188
  } catch (err) {
44
- const message = err instanceof Error ? err.message : String(err);
189
+ const rawMessage = err instanceof Error ? err.message : String(err);
45
190
  const code = err.code ?? "INTERNAL_ERROR";
46
191
  let status = 500;
47
192
  if ("status" in err) {
@@ -49,46 +194,81 @@ async function errorHandler(c, next) {
49
194
  if (typeof errStatus === "number" && errStatus >= 400 && errStatus < 600) {
50
195
  status = errStatus;
51
196
  }
52
- } else if (code === "NOT_FOUND" || message.includes("not found") || message.includes("not registered")) {
197
+ } else if (code === "NOT_FOUND" || rawMessage.includes("not found") || rawMessage.includes("not registered")) {
53
198
  status = 404;
54
- } else if (code === "VALIDATION_ERROR" || message.includes("Expected") || message.includes("invalid")) {
199
+ } else if (code === "VALIDATION_ERROR" || rawMessage.includes("Expected") || rawMessage.includes("invalid")) {
55
200
  status = 400;
56
201
  }
202
+ const runtime = c.get("runtime");
203
+ const redactOn = runtime?.isRedactEnabled?.() ?? false;
57
204
  const body = {
58
205
  ok: false,
59
- error: { code, message }
206
+ error: { code, message: redactErrorMessage(err, redactOn) }
60
207
  };
61
208
  return c.json(body, status);
62
209
  }
63
210
  }
64
211
 
65
212
  // src/server/ws/connection-manager.ts
66
- function isBufferedChannel(channel) {
67
- return channel.startsWith("execution:");
68
- }
69
213
  var BUFFER_TTL_MS = 3e4;
70
214
  var MAX_BUFFER_EVENTS = 500;
215
+ var MAX_WS_FRAME_BYTES = 65536;
216
+ function isBufferedChannel(channel) {
217
+ return channel.startsWith("execution:") || channel.startsWith("eval:");
218
+ }
219
+ function truncateIfOversized(msg, channel, data) {
220
+ if (msg.length <= MAX_WS_FRAME_BYTES) return msg;
221
+ const event = data ?? {};
222
+ const truncated = {
223
+ type: "event",
224
+ channel,
225
+ data: {
226
+ ...event,
227
+ data: {
228
+ __truncated: true,
229
+ originalBytes: msg.length,
230
+ maxBytes: MAX_WS_FRAME_BYTES,
231
+ hint: "Event exceeded WS frame budget (likely a verbose agent_call with a large messages[] snapshot). Fetch via REST if you need the full payload."
232
+ }
233
+ }
234
+ };
235
+ return JSON.stringify(truncated);
236
+ }
71
237
  var ConnectionManager = class {
72
238
  /** channel -> set of WS connections */
73
239
  channels = /* @__PURE__ */ new Map();
74
- /** ws -> set of subscribed channels (for cleanup) */
240
+ /** ws -> subscribed channels + optional integrator-supplied metadata */
75
241
  connections = /* @__PURE__ */ new Map();
76
242
  /** channel -> replay buffer for execution streams */
77
243
  buffers = /* @__PURE__ */ new Map();
78
244
  maxConnections = 100;
245
+ filter;
246
+ /**
247
+ * Register a broadcast filter. Called once at middleware construction.
248
+ * The filter runs on every outbound event and can drop or deliver based
249
+ * on the destination connection's metadata.
250
+ */
251
+ setFilter(filter) {
252
+ this.filter = filter;
253
+ }
254
+ /** Attach integrator-supplied metadata to an already-added connection. */
255
+ setMetadata(ws, metadata) {
256
+ const entry = this.connections.get(ws);
257
+ if (entry) entry.metadata = metadata;
258
+ }
79
259
  /** Register a new WS connection. */
80
260
  add(ws) {
81
261
  if (this.connections.size >= this.maxConnections) {
82
262
  ws.close?.();
83
263
  return;
84
264
  }
85
- this.connections.set(ws, /* @__PURE__ */ new Set());
265
+ this.connections.set(ws, { channels: /* @__PURE__ */ new Set() });
86
266
  }
87
267
  /** Remove a WS connection and all its subscriptions. */
88
268
  remove(ws) {
89
- const channels = this.connections.get(ws);
90
- if (channels) {
91
- for (const ch of channels) {
269
+ const entry = this.connections.get(ws);
270
+ if (entry) {
271
+ for (const ch of entry.channels) {
92
272
  this.channels.get(ch)?.delete(ws);
93
273
  if (this.channels.get(ch)?.size === 0) {
94
274
  this.channels.delete(ch);
@@ -106,12 +286,20 @@ var ConnectionManager = class {
106
286
  this.channels.set(channel, subs);
107
287
  }
108
288
  subs.add(ws);
109
- this.connections.get(ws).add(channel);
289
+ this.connections.get(ws).channels.add(channel);
110
290
  const buffer = this.buffers.get(channel);
111
291
  if (buffer) {
112
- for (const msg of buffer.events) {
292
+ const metadata = this.connections.get(ws)?.metadata;
293
+ for (const event of buffer.events) {
294
+ if (this.filter) {
295
+ try {
296
+ if (!this.filter(event.data, metadata)) continue;
297
+ } catch {
298
+ continue;
299
+ }
300
+ }
113
301
  try {
114
- ws.send(msg);
302
+ ws.send(event.msg);
115
303
  } catch {
116
304
  this.remove(ws);
117
305
  return;
@@ -125,11 +313,15 @@ var ConnectionManager = class {
125
313
  if (this.channels.get(channel)?.size === 0) {
126
314
  this.channels.delete(channel);
127
315
  }
128
- this.connections.get(ws)?.delete(channel);
316
+ this.connections.get(ws)?.channels.delete(channel);
129
317
  }
130
318
  /** Broadcast data to all subscribers of a channel. Buffers events for execution channels. */
131
319
  broadcast(channel, data) {
132
- const msg = JSON.stringify({ type: "event", channel, data });
320
+ const msg = truncateIfOversized(
321
+ JSON.stringify({ type: "event", channel, data }),
322
+ channel,
323
+ data
324
+ );
133
325
  if (isBufferedChannel(channel)) {
134
326
  let buffer = this.buffers.get(channel);
135
327
  if (!buffer) {
@@ -139,7 +331,7 @@ var ConnectionManager = class {
139
331
  const event = data;
140
332
  const isTerminal = event.type === "done" || event.type === "error";
141
333
  if (buffer.events.length < MAX_BUFFER_EVENTS || isTerminal) {
142
- buffer.events.push(msg);
334
+ buffer.events.push({ msg, data });
143
335
  }
144
336
  if (isTerminal) {
145
337
  buffer.complete = true;
@@ -152,6 +344,14 @@ var ConnectionManager = class {
152
344
  const subs = this.channels.get(channel);
153
345
  if (!subs || subs.size === 0) return;
154
346
  for (const ws of [...subs]) {
347
+ if (this.filter) {
348
+ const metadata = this.connections.get(ws)?.metadata;
349
+ try {
350
+ if (!this.filter(data, metadata)) continue;
351
+ } catch {
352
+ continue;
353
+ }
354
+ }
155
355
  try {
156
356
  ws.send(msg);
157
357
  } catch {
@@ -167,8 +367,20 @@ var ConnectionManager = class {
167
367
  const wildcardChannel = channel.substring(0, colonIdx) + ":*";
168
368
  const subs = this.channels.get(wildcardChannel);
169
369
  if (!subs || subs.size === 0) return;
170
- const msg = JSON.stringify({ type: "event", channel, data });
370
+ const msg = truncateIfOversized(
371
+ JSON.stringify({ type: "event", channel, data }),
372
+ channel,
373
+ data
374
+ );
171
375
  for (const ws of [...subs]) {
376
+ if (this.filter) {
377
+ const metadata = this.connections.get(ws)?.metadata;
378
+ try {
379
+ if (!this.filter(data, metadata)) continue;
380
+ } catch {
381
+ continue;
382
+ }
383
+ }
172
384
  try {
173
385
  ws.send(msg);
174
386
  } catch {
@@ -200,11 +412,11 @@ var ConnectionManager = class {
200
412
  };
201
413
 
202
414
  // src/server/ws/protocol.ts
203
- var VALID_CHANNEL_PREFIXES = ["execution:", "trace:"];
204
- var VALID_EXACT_CHANNELS = ["costs", "decisions"];
415
+ var VALID_CHANNEL_PREFIXES = ["execution:", "trace:", "eval:"];
416
+ var VALID_EXACT_CHANNELS = ["costs", "decisions", "eval-trends", "workflow-stats", "trace-stats"];
205
417
  var MAX_CHANNEL_LENGTH = 256;
206
418
  function handleWsMessage(raw, socket, connMgr) {
207
- if (raw.length > 65536) {
419
+ if (raw.length > MAX_WS_FRAME_BYTES) {
208
420
  return JSON.stringify({ type: "error", message: "Message too large" });
209
421
  }
210
422
  let msg;
@@ -264,67 +476,581 @@ function createWsHandlers(connMgr) {
264
476
  };
265
477
  }
266
478
 
267
- // src/server/cost-aggregator.ts
268
- var CostAggregator = class {
269
- constructor(connMgr) {
479
+ // src/server/aggregates/aggregate-snapshots.ts
480
+ var WINDOW_MS = {
481
+ "24h": 24 * 60 * 60 * 1e3,
482
+ "7d": 7 * 24 * 60 * 60 * 1e3,
483
+ "30d": 30 * 24 * 60 * 60 * 1e3,
484
+ all: Number.POSITIVE_INFINITY
485
+ };
486
+ function withinWindow(ts, window, now) {
487
+ return ts >= now - WINDOW_MS[window];
488
+ }
489
+ var REBUILD_INTERVAL_MS = 5 * 6e4;
490
+ var ALL_WINDOWS = new Set(Object.keys(WINDOW_MS));
491
+ function parseWindowParam(raw, fallback = "7d") {
492
+ return raw && ALL_WINDOWS.has(raw) ? raw : fallback;
493
+ }
494
+ var AggregateSnapshots = class {
495
+ constructor(windows, emptyState, connMgr, channel, broadcastTransform) {
496
+ this.windows = windows;
497
+ this.emptyState = emptyState;
270
498
  this.connMgr = connMgr;
499
+ this.channel = channel;
500
+ this.broadcastTransform = broadcastTransform;
501
+ this.snapshots = new Map(windows.map((w) => [w, emptyState()]));
502
+ }
503
+ snapshots;
504
+ /** Replace all snapshots atomically — used after a full rebuild. */
505
+ replace(fresh) {
506
+ this.snapshots = fresh;
507
+ this.broadcast();
271
508
  }
272
- data = {
509
+ /** Apply a reducer update to every window where `ts` falls inside the window. */
510
+ fold(ts, update) {
511
+ const now = Date.now();
512
+ let changed = false;
513
+ for (const window of this.windows) {
514
+ if (withinWindow(ts, window, now)) {
515
+ const prev = this.snapshots.get(window);
516
+ this.snapshots.set(window, update(prev));
517
+ changed = true;
518
+ }
519
+ }
520
+ if (changed) this.broadcast();
521
+ }
522
+ get(window) {
523
+ return this.snapshots.get(window) ?? this.emptyState();
524
+ }
525
+ getAll() {
526
+ return Object.fromEntries(this.snapshots);
527
+ }
528
+ broadcast() {
529
+ const snapshots = this.broadcastTransform ? Object.fromEntries(
530
+ this.windows.map((w) => [w, this.broadcastTransform(this.snapshots.get(w))])
531
+ ) : this.getAll();
532
+ this.connMgr.broadcast(this.channel, {
533
+ snapshots,
534
+ updatedAt: Date.now()
535
+ });
536
+ }
537
+ };
538
+
539
+ // src/server/aggregates/trace-aggregator.ts
540
+ var TraceAggregator = class {
541
+ snaps;
542
+ interval;
543
+ listener;
544
+ options;
545
+ constructor(options) {
546
+ this.options = options;
547
+ this.snaps = new AggregateSnapshots(
548
+ options.windows,
549
+ options.emptyState,
550
+ options.connMgr,
551
+ options.channel,
552
+ options.broadcastTransform
553
+ );
554
+ }
555
+ async start() {
556
+ await this.rebuild();
557
+ this.listener = (event) => {
558
+ this.snaps.fold(event.timestamp, (prev) => this.options.reducer(prev, event));
559
+ };
560
+ this.options.runtime.on("trace", this.listener);
561
+ this.interval = setInterval(
562
+ () => this.rebuild().catch((err) => console.error("[axl-studio] rebuild failed:", err)),
563
+ REBUILD_INTERVAL_MS
564
+ );
565
+ }
566
+ async rebuild() {
567
+ const executions = await this.options.runtime.getExecutions();
568
+ const cap = this.options.executionCap ?? 2e3;
569
+ const capped = executions.slice(0, cap);
570
+ const now = Date.now();
571
+ const fresh = new Map(
572
+ this.options.windows.map((w) => [w, this.options.emptyState()])
573
+ );
574
+ for (const exec of capped) {
575
+ for (const event of exec.steps) {
576
+ for (const window of this.options.windows) {
577
+ if (withinWindow(event.timestamp, window, now)) {
578
+ fresh.set(window, this.options.reducer(fresh.get(window), event));
579
+ }
580
+ }
581
+ }
582
+ }
583
+ this.snaps.replace(fresh);
584
+ }
585
+ getSnapshot(window) {
586
+ return this.snaps.get(window);
587
+ }
588
+ getAllSnapshots() {
589
+ return this.snaps.getAll();
590
+ }
591
+ close() {
592
+ if (this.listener) this.options.runtime.off("trace", this.listener);
593
+ if (this.interval) clearInterval(this.interval);
594
+ }
595
+ };
596
+
597
+ // src/server/aggregates/reducers.ts
598
+ var finite = (v) => Number.isFinite(v) ? v : 0;
599
+ function isLogEvent(event, eventName) {
600
+ if (event.type === eventName) return true;
601
+ if (event.type === "log" && event.data != null && typeof event.data === "object") {
602
+ return event.data.event === eventName;
603
+ }
604
+ return false;
605
+ }
606
+ function emptyRetry() {
607
+ return {
608
+ primary: 0,
609
+ primaryCalls: 0,
610
+ schema: 0,
611
+ schemaCalls: 0,
612
+ validate: 0,
613
+ validateCalls: 0,
614
+ guardrail: 0,
615
+ guardrailCalls: 0,
616
+ retryCalls: 0
617
+ };
618
+ }
619
+ function emptyCostData() {
620
+ return {
273
621
  totalCost: 0,
274
622
  totalTokens: { input: 0, output: 0, reasoning: 0 },
275
623
  byAgent: {},
276
624
  byModel: {},
277
- byWorkflow: {}
625
+ byWorkflow: {},
626
+ retry: emptyRetry(),
627
+ byEmbedder: {}
278
628
  };
279
- /** Process a trace event and update cost data. */
280
- onTrace(event) {
281
- if (event.cost == null && !event.tokens) return;
282
- const cost = Number.isFinite(event.cost) ? event.cost : 0;
283
- const tokens = event.tokens ?? {};
284
- this.data.totalCost += cost;
285
- this.data.totalTokens.input += tokens.input ?? 0;
286
- this.data.totalTokens.output += tokens.output ?? 0;
287
- this.data.totalTokens.reasoning += tokens.reasoning ?? 0;
288
- if (event.agent) {
289
- const entry = this.data.byAgent[event.agent] ?? { cost: 0, calls: 0 };
290
- entry.cost += cost;
291
- entry.calls += 1;
292
- this.data.byAgent[event.agent] = entry;
293
- }
294
- if (event.model) {
295
- const entry = this.data.byModel[event.model] ?? {
296
- cost: 0,
297
- calls: 0,
298
- tokens: { input: 0, output: 0 }
629
+ }
630
+ function reduceCost(acc, event) {
631
+ const isWorkflowStart = isLogEvent(event, "workflow_start");
632
+ if (isWorkflowStart && event.workflow) {
633
+ const byWorkflow2 = { ...acc.byWorkflow };
634
+ const prev = byWorkflow2[event.workflow] ?? { cost: 0, executions: 0 };
635
+ byWorkflow2[event.workflow] = { ...prev, executions: prev.executions + 1 };
636
+ return { ...acc, byWorkflow: byWorkflow2 };
637
+ }
638
+ if (event.cost == null && !event.tokens) return acc;
639
+ const cost = finite(event.cost);
640
+ const tokens = event.tokens ?? {};
641
+ const totalTokens = event.type === "agent_call" ? {
642
+ input: acc.totalTokens.input + finite(tokens.input),
643
+ output: acc.totalTokens.output + finite(tokens.output),
644
+ reasoning: acc.totalTokens.reasoning + finite(tokens.reasoning)
645
+ } : acc.totalTokens;
646
+ const byAgent = { ...acc.byAgent };
647
+ if (event.agent) {
648
+ const prev = byAgent[event.agent] ?? { cost: 0, calls: 0 };
649
+ byAgent[event.agent] = { cost: prev.cost + cost, calls: prev.calls + 1 };
650
+ }
651
+ const byModel = { ...acc.byModel };
652
+ if (event.model) {
653
+ const prev = byModel[event.model] ?? { cost: 0, calls: 0, tokens: { input: 0, output: 0 } };
654
+ byModel[event.model] = {
655
+ cost: prev.cost + cost,
656
+ calls: prev.calls + 1,
657
+ tokens: {
658
+ input: prev.tokens.input + finite(tokens.input),
659
+ output: prev.tokens.output + finite(tokens.output)
660
+ }
661
+ };
662
+ }
663
+ const byWorkflow = { ...acc.byWorkflow };
664
+ if (event.workflow) {
665
+ const prev = byWorkflow[event.workflow] ?? { cost: 0, executions: 0 };
666
+ byWorkflow[event.workflow] = {
667
+ cost: prev.cost + cost,
668
+ executions: prev.executions + (isWorkflowStart ? 1 : 0)
669
+ };
670
+ }
671
+ let retry = acc.retry;
672
+ if (event.type === "agent_call") {
673
+ const d = event.data ?? {};
674
+ const reason = d.retryReason;
675
+ retry = { ...acc.retry };
676
+ if (reason === "schema") {
677
+ retry.schema += cost;
678
+ retry.schemaCalls += 1;
679
+ retry.retryCalls += 1;
680
+ } else if (reason === "validate") {
681
+ retry.validate += cost;
682
+ retry.validateCalls += 1;
683
+ retry.retryCalls += 1;
684
+ } else if (reason === "guardrail") {
685
+ retry.guardrail += cost;
686
+ retry.guardrailCalls += 1;
687
+ retry.retryCalls += 1;
688
+ } else {
689
+ retry.primary += cost;
690
+ retry.primaryCalls += 1;
691
+ }
692
+ }
693
+ let byEmbedder = acc.byEmbedder;
694
+ if (event.type === "log") {
695
+ const d = event.data ?? {};
696
+ if (d.event === "memory_remember" || d.event === "memory_recall") {
697
+ byEmbedder = { ...acc.byEmbedder };
698
+ const modelKey = d.usage?.model ?? "unknown";
699
+ const embedTokens = typeof d.usage?.tokens === "number" ? finite(d.usage.tokens) : 0;
700
+ const prev = byEmbedder[modelKey] ?? { cost: 0, calls: 0, tokens: 0 };
701
+ byEmbedder[modelKey] = {
702
+ cost: prev.cost + cost,
703
+ calls: prev.calls + 1,
704
+ tokens: prev.tokens + embedTokens
299
705
  };
300
- entry.cost += cost;
301
- entry.calls += 1;
302
- entry.tokens.input += tokens.input ?? 0;
303
- entry.tokens.output += tokens.output ?? 0;
304
- this.data.byModel[event.model] = entry;
305
- }
306
- if (event.workflow) {
307
- const entry = this.data.byWorkflow[event.workflow] ?? { cost: 0, executions: 0 };
308
- entry.cost += cost;
309
- if (event.type === "workflow_start") entry.executions += 1;
310
- this.data.byWorkflow[event.workflow] = entry;
311
- }
312
- this.connMgr.broadcast("costs", this.data);
313
- }
314
- /** Get current aggregated cost data. */
315
- getData() {
316
- return this.data;
317
- }
318
- /** Reset all accumulated data. */
319
- reset() {
320
- this.data = {
321
- totalCost: 0,
322
- totalTokens: { input: 0, output: 0, reasoning: 0 },
323
- byAgent: {},
324
- byModel: {},
325
- byWorkflow: {}
706
+ }
707
+ }
708
+ return {
709
+ totalCost: acc.totalCost + cost,
710
+ totalTokens,
711
+ byAgent,
712
+ byModel,
713
+ byWorkflow,
714
+ retry,
715
+ byEmbedder
716
+ };
717
+ }
718
+ function emptyEvalTrendData() {
719
+ return { byEval: {}, totalRuns: 0, totalCost: 0 };
720
+ }
721
+ function extractScores(data) {
722
+ if (!data || typeof data !== "object") return {};
723
+ const result = data;
724
+ const summary = result.summary;
725
+ const scorers = summary?.scorers;
726
+ if (!scorers) return {};
727
+ const out = {};
728
+ for (const [name, entry] of Object.entries(scorers)) {
729
+ if (typeof entry === "number" && Number.isFinite(entry)) {
730
+ out[name] = entry;
731
+ } else if (entry && typeof entry === "object" && Number.isFinite(entry.mean)) {
732
+ out[name] = entry.mean;
733
+ }
734
+ }
735
+ return out;
736
+ }
737
+ function extractCost(data) {
738
+ if (!data || typeof data !== "object") return 0;
739
+ const result = data;
740
+ if (Number.isFinite(result.totalCost)) return result.totalCost;
741
+ const summary = result.summary;
742
+ return Number.isFinite(summary?.totalCost) ? summary.totalCost : 0;
743
+ }
744
+ function extractModel(data) {
745
+ if (!data || typeof data !== "object") return void 0;
746
+ const result = data;
747
+ const metadata = result.metadata;
748
+ const counts = metadata?.modelCounts;
749
+ if (counts && typeof counts === "object" && !Array.isArray(counts)) {
750
+ const entries = Object.entries(counts).filter(
751
+ ([, v]) => typeof v === "number"
752
+ );
753
+ if (entries.length > 0) {
754
+ entries.sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]));
755
+ return entries[0][0];
756
+ }
757
+ }
758
+ const models = metadata?.models;
759
+ if (Array.isArray(models) && typeof models[0] === "string") return models[0];
760
+ return void 0;
761
+ }
762
+ function extractDuration(data) {
763
+ if (!data || typeof data !== "object") return void 0;
764
+ const result = data;
765
+ return Number.isFinite(result.duration) ? result.duration : void 0;
766
+ }
767
+ function computeScoreStats(runs) {
768
+ const scorerNames = /* @__PURE__ */ new Set();
769
+ for (const run of runs) {
770
+ for (const name of Object.keys(run.scores)) scorerNames.add(name);
771
+ }
772
+ const mean = {};
773
+ const std = {};
774
+ for (const name of scorerNames) {
775
+ const values = runs.map((r) => r.scores[name]).filter((v) => v != null);
776
+ if (values.length === 0) continue;
777
+ const m = values.reduce((a, b) => a + b, 0) / values.length;
778
+ mean[name] = m;
779
+ const variance = values.reduce((sum, v) => sum + (v - m) ** 2, 0) / values.length;
780
+ std[name] = Math.sqrt(variance);
781
+ }
782
+ return { mean, std };
783
+ }
784
+ function reduceEvalTrends(acc, entry) {
785
+ const scores = extractScores(entry.data);
786
+ const cost = extractCost(entry.data);
787
+ const model = extractModel(entry.data);
788
+ const duration = extractDuration(entry.data);
789
+ const run = {
790
+ timestamp: entry.timestamp,
791
+ id: entry.id,
792
+ scores,
793
+ cost,
794
+ ...model !== void 0 ? { model } : {},
795
+ ...duration !== void 0 ? { duration } : {}
796
+ };
797
+ const byEval = { ...acc.byEval };
798
+ const prev = byEval[entry.eval];
799
+ const MAX_EVAL_RUNS = 50;
800
+ const allRuns = prev ? [...prev.runs, run] : [run];
801
+ const runs = allRuns.length > MAX_EVAL_RUNS ? allRuns.slice(-MAX_EVAL_RUNS) : allRuns;
802
+ const { mean, std } = computeScoreStats(runs);
803
+ const latestScores = prev && prev.runs.length > 0 && prev.runs[prev.runs.length - 1].timestamp > run.timestamp ? prev.latestScores : scores;
804
+ byEval[entry.eval] = {
805
+ runs,
806
+ latestScores,
807
+ scoreMean: mean,
808
+ scoreStd: std,
809
+ costTotal: (prev?.costTotal ?? 0) + cost,
810
+ runCount: (prev?.runCount ?? 0) + 1
811
+ };
812
+ return {
813
+ byEval,
814
+ totalRuns: acc.totalRuns + 1,
815
+ totalCost: acc.totalCost + cost
816
+ };
817
+ }
818
+ var MAX_DURATIONS = 200;
819
+ function emptyWorkflowStatsData() {
820
+ return { byWorkflow: {}, totalExecutions: 0, failureRate: 0 };
821
+ }
822
+ function percentile(sorted, p) {
823
+ if (sorted.length === 0) return 0;
824
+ const idx = p / 100 * (sorted.length - 1);
825
+ const lower = Math.floor(idx);
826
+ const upper = Math.ceil(idx);
827
+ if (lower === upper) return sorted[lower];
828
+ return sorted[lower] + (sorted[upper] - sorted[lower]) * (idx - lower);
829
+ }
830
+ function reduceWorkflowStats(acc, execution) {
831
+ const byWorkflow = { ...acc.byWorkflow };
832
+ const prev = byWorkflow[execution.workflow] ?? {
833
+ total: 0,
834
+ completed: 0,
835
+ failed: 0,
836
+ durations: [],
837
+ durationSum: 0,
838
+ avgDuration: 0
839
+ };
840
+ const dur = finite(execution.duration);
841
+ const durations = [...prev.durations];
842
+ const insertIdx = durations.findIndex((d) => d > dur);
843
+ if (insertIdx === -1) durations.push(dur);
844
+ else durations.splice(insertIdx, 0, dur);
845
+ if (durations.length > MAX_DURATIONS) durations.shift();
846
+ const total = prev.total + 1;
847
+ const completed = prev.completed + (execution.status === "completed" ? 1 : 0);
848
+ const failed = prev.failed + (execution.status === "failed" ? 1 : 0);
849
+ const durationSum = prev.durationSum + dur;
850
+ const avgDuration = durationSum / total;
851
+ byWorkflow[execution.workflow] = {
852
+ total,
853
+ completed,
854
+ failed,
855
+ durations,
856
+ durationSum,
857
+ avgDuration
858
+ };
859
+ const totalExecutions = acc.totalExecutions + 1;
860
+ const totalFailed = Object.values(byWorkflow).reduce((sum, w) => sum + w.failed, 0);
861
+ const failureRate = totalExecutions > 0 ? totalFailed / totalExecutions : 0;
862
+ return { byWorkflow, totalExecutions, failureRate };
863
+ }
864
+ function getWorkflowPercentiles(entry) {
865
+ return {
866
+ durationP50: percentile(entry.durations, 50),
867
+ durationP95: percentile(entry.durations, 95)
868
+ };
869
+ }
870
+ function enrichWorkflowStats(data) {
871
+ const byWorkflow = {};
872
+ for (const [name, entry] of Object.entries(data.byWorkflow)) {
873
+ const { durationP50, durationP95 } = getWorkflowPercentiles(entry);
874
+ byWorkflow[name] = {
875
+ total: entry.total,
876
+ completed: entry.completed,
877
+ failed: entry.failed,
878
+ durationP50,
879
+ durationP95,
880
+ avgDuration: entry.avgDuration
881
+ };
882
+ }
883
+ return {
884
+ byWorkflow,
885
+ totalExecutions: data.totalExecutions,
886
+ failureRate: data.failureRate
887
+ };
888
+ }
889
+ function emptyTraceStatsData() {
890
+ return {
891
+ eventTypeCounts: {},
892
+ byTool: {},
893
+ retryByAgent: {},
894
+ totalEvents: 0
895
+ };
896
+ }
897
+ function reduceTraceStats(acc, event) {
898
+ const eventTypeCounts = { ...acc.eventTypeCounts };
899
+ eventTypeCounts[event.type] = (eventTypeCounts[event.type] ?? 0) + 1;
900
+ const byTool = { ...acc.byTool };
901
+ if (event.type === "tool_call" || event.type === "tool_denied" || event.type === "tool_approval") {
902
+ const toolName = event.tool;
903
+ const prev = byTool[toolName] ?? { calls: 0, denied: 0, approved: 0 };
904
+ const isDeniedEvent = event.type === "tool_denied";
905
+ const isApprovalEvent = event.type === "tool_approval";
906
+ const eventData = isDeniedEvent || isApprovalEvent ? event.data : void 0;
907
+ const isApproved = isDeniedEvent && eventData?.approved === true || isApprovalEvent && eventData?.approved === true;
908
+ const isDenied = isDeniedEvent && !eventData?.approved || isApprovalEvent && eventData?.approved === false;
909
+ byTool[toolName] = {
910
+ calls: prev.calls + (event.type === "tool_call" ? 1 : 0),
911
+ denied: prev.denied + (isDenied ? 1 : 0),
912
+ approved: prev.approved + (isApproved ? 1 : 0)
326
913
  };
327
914
  }
915
+ const retryByAgent = { ...acc.retryByAgent };
916
+ if (event.agent && event.type === "agent_call") {
917
+ const data = event.data;
918
+ if (data?.retryReason) {
919
+ const prev = retryByAgent[event.agent] ?? { schema: 0, validate: 0, guardrail: 0 };
920
+ const reason = data.retryReason;
921
+ if (reason in prev) {
922
+ retryByAgent[event.agent] = { ...prev, [reason]: prev[reason] + 1 };
923
+ }
924
+ }
925
+ }
926
+ return {
927
+ eventTypeCounts,
928
+ byTool,
929
+ retryByAgent,
930
+ totalEvents: acc.totalEvents + 1
931
+ };
932
+ }
933
+
934
+ // src/server/aggregates/execution-aggregator.ts
935
+ var ExecutionAggregator = class {
936
+ snaps;
937
+ interval;
938
+ listener;
939
+ options;
940
+ /** Generation counter to prevent stale async fold after rebuild. */
941
+ generation = 0;
942
+ constructor(options) {
943
+ this.options = options;
944
+ this.snaps = new AggregateSnapshots(
945
+ options.windows,
946
+ options.emptyState,
947
+ options.connMgr,
948
+ options.channel,
949
+ options.broadcastTransform
950
+ );
951
+ }
952
+ async start() {
953
+ await this.rebuild();
954
+ this.listener = (event) => {
955
+ if (!isLogEvent(event, "workflow_end")) return;
956
+ const gen = this.generation;
957
+ this.options.runtime.getExecution(event.executionId).then((exec) => {
958
+ if (this.generation !== gen) return;
959
+ if (exec) {
960
+ this.snaps.fold(exec.startedAt, (prev) => this.options.reducer(prev, exec));
961
+ }
962
+ }).catch((err) => console.error("[axl-studio] execution fold failed:", err));
963
+ };
964
+ this.options.runtime.on("trace", this.listener);
965
+ this.interval = setInterval(
966
+ () => this.rebuild().catch((err) => console.error("[axl-studio] rebuild failed:", err)),
967
+ REBUILD_INTERVAL_MS
968
+ );
969
+ }
970
+ async rebuild() {
971
+ this.generation++;
972
+ const executions = await this.options.runtime.getExecutions();
973
+ const cap = this.options.executionCap ?? 2e3;
974
+ const capped = executions.slice(0, cap);
975
+ const now = Date.now();
976
+ const fresh = new Map(
977
+ this.options.windows.map((w) => [w, this.options.emptyState()])
978
+ );
979
+ for (const exec of capped) {
980
+ for (const window of this.options.windows) {
981
+ if (withinWindow(exec.startedAt, window, now)) {
982
+ fresh.set(window, this.options.reducer(fresh.get(window), exec));
983
+ }
984
+ }
985
+ }
986
+ this.snaps.replace(fresh);
987
+ }
988
+ getSnapshot(window) {
989
+ return this.snaps.get(window);
990
+ }
991
+ getAllSnapshots() {
992
+ return this.snaps.getAll();
993
+ }
994
+ close() {
995
+ if (this.listener) this.options.runtime.off("trace", this.listener);
996
+ if (this.interval) clearInterval(this.interval);
997
+ }
998
+ };
999
+
1000
+ // src/server/aggregates/eval-aggregator.ts
1001
+ var EvalAggregator = class {
1002
+ snaps;
1003
+ interval;
1004
+ listener;
1005
+ options;
1006
+ constructor(options) {
1007
+ this.options = options;
1008
+ this.snaps = new AggregateSnapshots(
1009
+ options.windows,
1010
+ options.emptyState,
1011
+ options.connMgr,
1012
+ options.channel,
1013
+ options.broadcastTransform
1014
+ );
1015
+ }
1016
+ async start() {
1017
+ await this.rebuild();
1018
+ this.listener = (entry) => {
1019
+ this.snaps.fold(entry.timestamp, (prev) => this.options.reducer(prev, entry));
1020
+ };
1021
+ this.options.runtime.on("eval_result", this.listener);
1022
+ this.interval = setInterval(
1023
+ () => this.rebuild().catch((err) => console.error("[axl-studio] rebuild failed:", err)),
1024
+ REBUILD_INTERVAL_MS
1025
+ );
1026
+ }
1027
+ async rebuild() {
1028
+ const history = await this.options.runtime.getEvalHistory();
1029
+ const cap = this.options.entryCap ?? 500;
1030
+ const capped = history.slice(0, cap);
1031
+ const now = Date.now();
1032
+ const fresh = new Map(
1033
+ this.options.windows.map((w) => [w, this.options.emptyState()])
1034
+ );
1035
+ for (const entry of capped) {
1036
+ for (const window of this.options.windows) {
1037
+ if (withinWindow(entry.timestamp, window, now)) {
1038
+ fresh.set(window, this.options.reducer(fresh.get(window), entry));
1039
+ }
1040
+ }
1041
+ }
1042
+ this.snaps.replace(fresh);
1043
+ }
1044
+ getSnapshot(window) {
1045
+ return this.snaps.get(window);
1046
+ }
1047
+ getAllSnapshots() {
1048
+ return this.snaps.getAll();
1049
+ }
1050
+ close() {
1051
+ if (this.listener) this.options.runtime.off("eval_result", this.listener);
1052
+ if (this.interval) clearInterval(this.interval);
1053
+ }
328
1054
  };
329
1055
 
330
1056
  // src/server/routes/health.ts
@@ -394,15 +1120,22 @@ function createWorkflowRoutes(connMgr) {
394
1120
  if (body.stream) {
395
1121
  const stream = runtime.stream(name, body.input ?? {}, { metadata: body.metadata });
396
1122
  const executionId = `stream-${Date.now()}`;
1123
+ const redactOn = runtime.isRedactEnabled();
397
1124
  (async () => {
398
1125
  for await (const event of stream) {
399
- connMgr.broadcastWithWildcard(`execution:${executionId}`, event);
1126
+ connMgr.broadcastWithWildcard(
1127
+ `execution:${executionId}`,
1128
+ redactStreamEvent(event, redactOn)
1129
+ );
400
1130
  }
401
1131
  })();
402
1132
  return c.json({ ok: true, data: { executionId, streaming: true } });
403
1133
  }
404
1134
  const result = await runtime.execute(name, body.input ?? {}, { metadata: body.metadata });
405
- return c.json({ ok: true, data: { result } });
1135
+ return c.json({
1136
+ ok: true,
1137
+ data: { result: redactValue(result, runtime.isRedactEnabled()) }
1138
+ });
406
1139
  });
407
1140
  return app6;
408
1141
  }
@@ -413,7 +1146,10 @@ var app = new import_hono3.Hono();
413
1146
  app.get("/executions", async (c) => {
414
1147
  const runtime = c.get("runtime");
415
1148
  const executions = await runtime.getExecutions();
416
- return c.json({ ok: true, data: executions });
1149
+ return c.json({
1150
+ ok: true,
1151
+ data: redactExecutionList(executions, runtime.isRedactEnabled())
1152
+ });
417
1153
  });
418
1154
  app.get("/executions/:id", async (c) => {
419
1155
  const runtime = c.get("runtime");
@@ -425,7 +1161,10 @@ app.get("/executions/:id", async (c) => {
425
1161
  404
426
1162
  );
427
1163
  }
428
- return c.json({ ok: true, data: execution });
1164
+ return c.json({
1165
+ ok: true,
1166
+ data: redactExecutionInfo(execution, runtime.isRedactEnabled())
1167
+ });
429
1168
  });
430
1169
  app.post("/executions/:id/abort", (c) => {
431
1170
  const runtime = c.get("runtime");
@@ -459,7 +1198,16 @@ function createSessionRoutes(connMgr) {
459
1198
  const id = c.req.param("id");
460
1199
  const history = await store.getSession(id);
461
1200
  const handoffHistory = await store.getSessionMeta(id, "handoffHistory");
462
- return c.json({ ok: true, data: { id, history, handoffHistory: handoffHistory ?? [] } });
1201
+ return c.json({
1202
+ ok: true,
1203
+ data: {
1204
+ id,
1205
+ history: redactSessionHistory(history, runtime.isRedactEnabled()),
1206
+ // HandoffRecord has no content fields (source/target/mode/
1207
+ // timestamp/duration) — nothing to scrub.
1208
+ handoffHistory: handoffHistory ?? []
1209
+ }
1210
+ });
463
1211
  });
464
1212
  app6.post("/sessions/:id/send", async (c) => {
465
1213
  const runtime = c.get("runtime");
@@ -629,7 +1377,10 @@ app3.post("/tools/:name/test", async (c) => {
629
1377
  const body = await c.req.json();
630
1378
  const ctx = runtime.createContext();
631
1379
  const result = await tool.run(ctx, body.input);
632
- return c.json({ ok: true, data: { result } });
1380
+ return c.json({
1381
+ ok: true,
1382
+ data: { result: redactValue(result, runtime.isRedactEnabled()) }
1383
+ });
633
1384
  });
634
1385
  var tools_default = app3;
635
1386
 
@@ -644,7 +1395,7 @@ app4.get("/memory/:scope", async (c) => {
644
1395
  return c.json({ ok: true, data: [] });
645
1396
  }
646
1397
  const entries = await store.getAllMemory(scope);
647
- return c.json({ ok: true, data: entries });
1398
+ return c.json({ ok: true, data: redactMemoryList(entries, runtime.isRedactEnabled()) });
648
1399
  });
649
1400
  app4.get("/memory/:scope/:key", async (c) => {
650
1401
  const runtime = c.get("runtime");
@@ -664,7 +1415,10 @@ app4.get("/memory/:scope/:key", async (c) => {
664
1415
  404
665
1416
  );
666
1417
  }
667
- return c.json({ ok: true, data: { key, value } });
1418
+ return c.json({
1419
+ ok: true,
1420
+ data: { key, value: redactMemoryValue(value, runtime.isRedactEnabled()) }
1421
+ });
668
1422
  });
669
1423
  app4.put("/memory/:scope/:key", async (c) => {
670
1424
  const runtime = c.get("runtime");
@@ -709,7 +1463,10 @@ var app5 = new import_hono8.Hono();
709
1463
  app5.get("/decisions", async (c) => {
710
1464
  const runtime = c.get("runtime");
711
1465
  const decisions = await runtime.getPendingDecisions();
712
- return c.json({ ok: true, data: decisions });
1466
+ return c.json({
1467
+ ok: true,
1468
+ data: redactPendingDecisionList(decisions, runtime.isRedactEnabled())
1469
+ });
713
1470
  });
714
1471
  app5.post("/decisions/:executionId/resolve", async (c) => {
715
1472
  const runtime = c.get("runtime");
@@ -725,11 +1482,23 @@ var import_hono9 = require("hono");
725
1482
  function createCostRoutes(costAggregator) {
726
1483
  const app6 = new import_hono9.Hono();
727
1484
  app6.get("/costs", (c) => {
728
- return c.json({ ok: true, data: costAggregator.getData() });
1485
+ if (c.req.query("windows") === "all") {
1486
+ return c.json({ ok: true, data: costAggregator.getAllSnapshots() });
1487
+ }
1488
+ const window = parseWindowParam(c.req.query("window"));
1489
+ return c.json({ ok: true, data: costAggregator.getSnapshot(window) });
729
1490
  });
730
1491
  app6.post("/costs/reset", (c) => {
731
- costAggregator.reset();
732
- return c.json({ ok: true, data: { reset: true } });
1492
+ return c.json(
1493
+ {
1494
+ ok: false,
1495
+ error: {
1496
+ code: "GONE",
1497
+ message: "POST /api/costs/reset was removed in @axlsdk/studio 0.15. Cost aggregates are now time-windowed and rebuilt from StateStore history. Use GET /api/costs?window=24h|7d|30d|all to narrow the view instead of resetting."
1498
+ }
1499
+ },
1500
+ 410
1501
+ );
733
1502
  });
734
1503
  return app6;
735
1504
  }
@@ -737,8 +1506,9 @@ function createCostRoutes(costAggregator) {
737
1506
  // src/server/routes/evals.ts
738
1507
  var import_node_crypto = require("crypto");
739
1508
  var import_hono10 = require("hono");
740
- function createEvalRoutes(evalLoader) {
1509
+ function createEvalRoutes(connMgr, evalLoader) {
741
1510
  const app6 = new import_hono10.Hono();
1511
+ const activeRuns = /* @__PURE__ */ new Map();
742
1512
  app6.get("/evals", async (c) => {
743
1513
  if (evalLoader) await evalLoader();
744
1514
  const runtime = c.get("runtime");
@@ -748,7 +1518,10 @@ function createEvalRoutes(evalLoader) {
748
1518
  app6.get("/evals/history", async (c) => {
749
1519
  const runtime = c.get("runtime");
750
1520
  const history = await runtime.getEvalHistory();
751
- return c.json({ ok: true, data: history });
1521
+ return c.json({
1522
+ ok: true,
1523
+ data: redactEvalHistoryList(history, runtime.isRedactEnabled())
1524
+ });
752
1525
  });
753
1526
  app6.delete("/evals/history/:id", async (c) => {
754
1527
  const runtime = c.get("runtime");
@@ -769,6 +1542,7 @@ function createEvalRoutes(evalLoader) {
769
1542
  if (evalLoader) await evalLoader();
770
1543
  const runtime = c.get("runtime");
771
1544
  const name = c.req.param("name");
1545
+ const redactOn = runtime.isRedactEnabled();
772
1546
  const entry = runtime.getRegisteredEval(name);
773
1547
  if (!entry) {
774
1548
  return c.json(
@@ -777,13 +1551,89 @@ function createEvalRoutes(evalLoader) {
777
1551
  );
778
1552
  }
779
1553
  let runs = 1;
1554
+ let stream = false;
1555
+ let captureTraces = false;
780
1556
  try {
781
1557
  const body = await c.req.json().catch(() => ({}));
782
1558
  if (typeof body.runs === "number" && Number.isFinite(body.runs) && body.runs > 1) {
783
1559
  runs = Math.min(Math.floor(body.runs), 25);
784
1560
  }
1561
+ if (body.stream === true) {
1562
+ stream = true;
1563
+ }
1564
+ if (body.captureTraces === true) {
1565
+ captureTraces = true;
1566
+ }
785
1567
  } catch {
786
1568
  }
1569
+ if (stream) {
1570
+ const evalRunId = `eval-${(0, import_node_crypto.randomUUID)()}`;
1571
+ const ac = new AbortController();
1572
+ activeRuns.set(evalRunId, ac);
1573
+ (async () => {
1574
+ try {
1575
+ if (runs > 1) {
1576
+ const runGroupId = (0, import_node_crypto.randomUUID)();
1577
+ const results = [];
1578
+ for (let r = 0; r < runs; r++) {
1579
+ if (ac.signal.aborted) break;
1580
+ const result = await runtime.runRegisteredEval(name, {
1581
+ metadata: { runGroupId, runIndex: r },
1582
+ signal: ac.signal,
1583
+ captureTraces,
1584
+ onProgress: (event) => {
1585
+ if (event.type === "run_done") return;
1586
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1587
+ ...event,
1588
+ run: r + 1,
1589
+ totalRuns: runs
1590
+ });
1591
+ }
1592
+ });
1593
+ results.push(result);
1594
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1595
+ type: "run_done",
1596
+ run: r + 1,
1597
+ totalRuns: runs
1598
+ });
1599
+ }
1600
+ if (results.length > 0) {
1601
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1602
+ type: "done",
1603
+ evalResultId: results[0].id,
1604
+ runGroupId
1605
+ });
1606
+ } else {
1607
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1608
+ type: "error",
1609
+ message: "All runs were cancelled"
1610
+ });
1611
+ }
1612
+ } else {
1613
+ const result = await runtime.runRegisteredEval(name, {
1614
+ signal: ac.signal,
1615
+ captureTraces,
1616
+ onProgress: (event) => {
1617
+ if (event.type === "run_done") return;
1618
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, event);
1619
+ }
1620
+ });
1621
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1622
+ type: "done",
1623
+ evalResultId: result.id
1624
+ });
1625
+ }
1626
+ } catch (err) {
1627
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1628
+ type: "error",
1629
+ message: redactErrorMessage(err, redactOn)
1630
+ });
1631
+ } finally {
1632
+ activeRuns.delete(evalRunId);
1633
+ }
1634
+ })();
1635
+ return c.json({ ok: true, data: { evalRunId } });
1636
+ }
787
1637
  try {
788
1638
  if (runs > 1) {
789
1639
  const { aggregateRuns } = await import("@axlsdk/eval");
@@ -791,27 +1641,53 @@ function createEvalRoutes(evalLoader) {
791
1641
  const results = [];
792
1642
  for (let r = 0; r < runs; r++) {
793
1643
  const result2 = await runtime.runRegisteredEval(name, {
794
- metadata: { runGroupId, runIndex: r }
1644
+ metadata: { runGroupId, runIndex: r },
1645
+ captureTraces
795
1646
  });
796
1647
  results.push(result2);
797
1648
  }
798
1649
  const typedResults = results;
799
1650
  const aggregate = aggregateRuns(typedResults);
800
1651
  const first = typedResults[0];
801
- const result = { ...first, _multiRun: { aggregate, allRuns: typedResults } };
802
- return c.json({ ok: true, data: result });
1652
+ const result = {
1653
+ ...first,
1654
+ _multiRun: { aggregate, allRuns: typedResults }
1655
+ };
1656
+ return c.json({
1657
+ ok: true,
1658
+ data: redactEvalResult(result, redactOn)
1659
+ });
803
1660
  } else {
804
- const result = await runtime.runRegisteredEval(name);
805
- return c.json({ ok: true, data: result });
1661
+ const result = await runtime.runRegisteredEval(name, { captureTraces });
1662
+ return c.json({
1663
+ ok: true,
1664
+ data: redactEvalResult(result, redactOn)
1665
+ });
806
1666
  }
807
1667
  } catch (err) {
808
- const message = err instanceof Error ? err.message : String(err);
809
- return c.json({ ok: false, error: { code: "EVAL_ERROR", message } }, 400);
1668
+ return c.json(
1669
+ { ok: false, error: { code: "EVAL_ERROR", message: redactErrorMessage(err, redactOn) } },
1670
+ 400
1671
+ );
810
1672
  }
811
1673
  });
1674
+ app6.post("/evals/runs/:evalRunId/cancel", (c) => {
1675
+ const evalRunId = c.req.param("evalRunId");
1676
+ const ac = activeRuns.get(evalRunId);
1677
+ if (!ac) {
1678
+ return c.json(
1679
+ { ok: false, error: { code: "NOT_FOUND", message: "No active eval run found" } },
1680
+ 404
1681
+ );
1682
+ }
1683
+ ac.abort();
1684
+ activeRuns.delete(evalRunId);
1685
+ return c.json({ ok: true, data: { cancelled: true } });
1686
+ });
812
1687
  app6.post("/evals/:name/rescore", async (c) => {
813
1688
  if (evalLoader) await evalLoader();
814
1689
  const runtime = c.get("runtime");
1690
+ const redactOn = runtime.isRedactEnabled();
815
1691
  const name = c.req.param("name");
816
1692
  const body = await c.req.json();
817
1693
  if (!body.resultId || typeof body.resultId !== "string") {
@@ -849,14 +1725,20 @@ function createEvalRoutes(evalLoader) {
849
1725
  timestamp: Date.now(),
850
1726
  data: result
851
1727
  });
852
- return c.json({ ok: true, data: result });
1728
+ return c.json({
1729
+ ok: true,
1730
+ data: redactEvalResult(result, redactOn)
1731
+ });
853
1732
  } catch (err) {
854
- const message = err instanceof Error ? err.message : String(err);
855
- return c.json({ ok: false, error: { code: "EVAL_ERROR", message } }, 400);
1733
+ return c.json(
1734
+ { ok: false, error: { code: "EVAL_ERROR", message: redactErrorMessage(err, redactOn) } },
1735
+ 400
1736
+ );
856
1737
  }
857
1738
  });
858
1739
  app6.post("/evals/compare", async (c) => {
859
1740
  const runtime = c.get("runtime");
1741
+ const redactOn = runtime.isRedactEnabled();
860
1742
  const body = await c.req.json();
861
1743
  const validateIdParam = (v, name) => {
862
1744
  if (typeof v === "string") return v === "" ? `${name} must be non-empty` : null;
@@ -924,8 +1806,13 @@ function createEvalRoutes(evalLoader) {
924
1806
  const result = await runtime.evalCompare(baseline, candidate, body.options);
925
1807
  return c.json({ ok: true, data: result });
926
1808
  } catch (err) {
927
- const message = err instanceof Error ? err.message : String(err);
928
- return c.json({ ok: false, error: { code: "COMPARE_FAILED", message } }, 400);
1809
+ return c.json(
1810
+ {
1811
+ ok: false,
1812
+ error: { code: "COMPARE_FAILED", message: redactErrorMessage(err, redactOn) }
1813
+ },
1814
+ 400
1815
+ );
929
1816
  }
930
1817
  });
931
1818
  app6.post("/evals/import", async (c) => {
@@ -987,7 +1874,11 @@ function createEvalRoutes(evalLoader) {
987
1874
  });
988
1875
  return c.json({ ok: true, data: { id, eval: evalName, timestamp } });
989
1876
  });
990
- return app6;
1877
+ function closeActiveRuns() {
1878
+ for (const ac of activeRuns.values()) ac.abort();
1879
+ activeRuns.clear();
1880
+ }
1881
+ return { app: app6, closeActiveRuns };
991
1882
  }
992
1883
 
993
1884
  // src/server/routes/playground.ts
@@ -1025,13 +1916,14 @@ function createPlaygroundRoutes(connMgr) {
1025
1916
  const store = runtime.getStateStore();
1026
1917
  const history = await store.getSession(sessionId);
1027
1918
  history.push({ role: "user", content: body.message });
1919
+ const redactOn = runtime.isRedactEnabled();
1920
+ const broadcast = (event) => {
1921
+ connMgr.broadcastWithWildcard(`execution:${executionId}`, redactStreamEvent(event, redactOn));
1922
+ };
1028
1923
  const ctx = runtime.createContext({
1029
1924
  sessionHistory: history,
1030
1925
  onToken: (token) => {
1031
- connMgr.broadcastWithWildcard(`execution:${executionId}`, {
1032
- type: "token",
1033
- data: token
1034
- });
1926
+ broadcast({ type: "token", data: token });
1035
1927
  }
1036
1928
  });
1037
1929
  (async () => {
@@ -1040,12 +1932,9 @@ function createPlaygroundRoutes(connMgr) {
1040
1932
  const resultText = typeof result === "string" ? result : JSON.stringify(result);
1041
1933
  history.push({ role: "assistant", content: resultText });
1042
1934
  await store.saveSession(sessionId, history);
1043
- connMgr.broadcastWithWildcard(`execution:${executionId}`, {
1044
- type: "done",
1045
- data: resultText
1046
- });
1935
+ broadcast({ type: "done", data: resultText });
1047
1936
  } catch (err) {
1048
- connMgr.broadcastWithWildcard(`execution:${executionId}`, {
1937
+ broadcast({
1049
1938
  type: "error",
1050
1939
  message: err instanceof Error ? err.message : String(err)
1051
1940
  });
@@ -1059,12 +1948,78 @@ function createPlaygroundRoutes(connMgr) {
1059
1948
  return app6;
1060
1949
  }
1061
1950
 
1951
+ // src/server/routes/eval-trends.ts
1952
+ var import_hono12 = require("hono");
1953
+ function createEvalTrendsRoutes(aggregator) {
1954
+ const app6 = new import_hono12.Hono();
1955
+ app6.get("/eval-trends", (c) => {
1956
+ const window = parseWindowParam(c.req.query("window"));
1957
+ return c.json({ ok: true, data: aggregator.getSnapshot(window) });
1958
+ });
1959
+ return app6;
1960
+ }
1961
+
1962
+ // src/server/routes/workflow-stats.ts
1963
+ var import_hono13 = require("hono");
1964
+ function createWorkflowStatsRoutes(aggregator) {
1965
+ const app6 = new import_hono13.Hono();
1966
+ app6.get("/workflow-stats", (c) => {
1967
+ const window = parseWindowParam(c.req.query("window"));
1968
+ return c.json({ ok: true, data: enrichWorkflowStats(aggregator.getSnapshot(window)) });
1969
+ });
1970
+ return app6;
1971
+ }
1972
+
1973
+ // src/server/routes/trace-stats.ts
1974
+ var import_hono14 = require("hono");
1975
+ function createTraceStatsRoutes(aggregator) {
1976
+ const app6 = new import_hono14.Hono();
1977
+ app6.get("/trace-stats", (c) => {
1978
+ const window = parseWindowParam(c.req.query("window"));
1979
+ return c.json({ ok: true, data: aggregator.getSnapshot(window) });
1980
+ });
1981
+ return app6;
1982
+ }
1983
+
1062
1984
  // src/server/index.ts
1063
1985
  function createServer(options) {
1064
1986
  const { runtime, staticRoot, basePath = "", readOnly = false } = options;
1065
- const app6 = new import_hono12.Hono();
1987
+ const app6 = new import_hono15.Hono();
1066
1988
  const connMgr = new ConnectionManager();
1067
- const costAggregator = new CostAggregator(connMgr);
1989
+ const windows = ["24h", "7d", "30d", "all"];
1990
+ const costAggregator = new TraceAggregator({
1991
+ runtime,
1992
+ connMgr,
1993
+ channel: "costs",
1994
+ reducer: reduceCost,
1995
+ emptyState: emptyCostData,
1996
+ windows
1997
+ });
1998
+ const workflowStatsAggregator = new ExecutionAggregator({
1999
+ runtime,
2000
+ connMgr,
2001
+ channel: "workflow-stats",
2002
+ reducer: reduceWorkflowStats,
2003
+ emptyState: emptyWorkflowStatsData,
2004
+ windows,
2005
+ broadcastTransform: enrichWorkflowStats
2006
+ });
2007
+ const traceStatsAggregator = new TraceAggregator({
2008
+ runtime,
2009
+ connMgr,
2010
+ channel: "trace-stats",
2011
+ reducer: reduceTraceStats,
2012
+ emptyState: emptyTraceStatsData,
2013
+ windows
2014
+ });
2015
+ const evalTrendsAggregator = new EvalAggregator({
2016
+ runtime,
2017
+ connMgr,
2018
+ channel: "eval-trends",
2019
+ reducer: reduceEvalTrends,
2020
+ emptyState: emptyEvalTrendData,
2021
+ windows
2022
+ });
1068
2023
  if (options.cors !== false) {
1069
2024
  app6.use("*", (0, import_cors.cors)());
1070
2025
  }
@@ -1082,11 +2037,11 @@ function createServer(options) {
1082
2037
  /^PUT \/api\/memory(\/|$)/,
1083
2038
  /^DELETE \/api\/memory(\/|$)/,
1084
2039
  /^POST \/api\/decisions(\/|$)/,
1085
- /^POST \/api\/costs(\/|$)/,
1086
2040
  /^POST \/api\/tools(\/|$)/,
1087
2041
  /^POST \/api\/evals\/import$/,
1088
2042
  /^POST \/api\/evals\/[^/]+\/run$/,
1089
2043
  /^POST \/api\/evals\/[^/]+\/rescore$/,
2044
+ /^POST \/api\/evals\/runs\/[^/]+\/cancel$/,
1090
2045
  /^DELETE \/api\/evals\/history\/[^/]+$/,
1091
2046
  /^POST \/api\/playground(\/|$)/
1092
2047
  ];
@@ -1106,7 +2061,7 @@ function createServer(options) {
1106
2061
  await next();
1107
2062
  });
1108
2063
  }
1109
- const api = new import_hono12.Hono();
2064
+ const api = new import_hono15.Hono();
1110
2065
  api.route("/", createHealthRoutes(readOnly));
1111
2066
  api.route("/", createWorkflowRoutes(connMgr));
1112
2067
  api.route("/", executions_default);
@@ -1116,7 +2071,11 @@ function createServer(options) {
1116
2071
  api.route("/", memory_default);
1117
2072
  api.route("/", decisions_default);
1118
2073
  api.route("/", createCostRoutes(costAggregator));
1119
- api.route("/", createEvalRoutes(options.evalLoader));
2074
+ api.route("/", createEvalTrendsRoutes(evalTrendsAggregator));
2075
+ api.route("/", createWorkflowStatsRoutes(workflowStatsAggregator));
2076
+ api.route("/", createTraceStatsRoutes(traceStatsAggregator));
2077
+ const { app: evalApp, closeActiveRuns } = createEvalRoutes(connMgr, options.evalLoader);
2078
+ api.route("/", evalApp);
1120
2079
  api.route("/", createPlaygroundRoutes(connMgr));
1121
2080
  app6.route("/api", api);
1122
2081
  const traceListener = (event) => {
@@ -1124,12 +2083,17 @@ function createServer(options) {
1124
2083
  if (traceEvent.executionId) {
1125
2084
  connMgr.broadcastWithWildcard(`trace:${traceEvent.executionId}`, traceEvent);
1126
2085
  }
1127
- costAggregator.onTrace(traceEvent);
1128
2086
  if (traceEvent.type === "await_human") {
1129
2087
  connMgr.broadcast("decisions", traceEvent);
1130
2088
  }
1131
2089
  };
1132
2090
  runtime.on("trace", traceListener);
2091
+ const aggregatorStartPromise = Promise.all([
2092
+ costAggregator.start(),
2093
+ workflowStatsAggregator.start(),
2094
+ traceStatsAggregator.start(),
2095
+ evalTrendsAggregator.start()
2096
+ ]).catch((err) => console.error("[axl-studio] aggregator start failed:", err));
1133
2097
  if (staticRoot) {
1134
2098
  const indexPath = (0, import_node_path.resolve)(staticRoot, "index.html");
1135
2099
  let spaHtml;
@@ -1179,9 +2143,22 @@ function createServer(options) {
1179
2143
  app: app6,
1180
2144
  connMgr,
1181
2145
  costAggregator,
2146
+ workflowStatsAggregator,
2147
+ traceStatsAggregator,
2148
+ evalTrendsAggregator,
2149
+ aggregatorStartPromise,
1182
2150
  /** Create WS handlers. Call before registering static/SPA routes are reached. */
1183
2151
  createWsHandlers: () => createWsHandlers(connMgr),
1184
- traceListener
2152
+ traceListener,
2153
+ /** Abort all active streaming eval runs. */
2154
+ closeActiveRuns,
2155
+ /** Close all aggregators (clear intervals and unsubscribe listeners). */
2156
+ closeAggregators: () => {
2157
+ costAggregator.close();
2158
+ workflowStatsAggregator.close();
2159
+ traceStatsAggregator.close();
2160
+ evalTrendsAggregator.close();
2161
+ }
1185
2162
  };
1186
2163
  }
1187
2164