@axlsdk/studio 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,16 +43,161 @@ var import_ws = require("ws");
43
43
  // src/server/index.ts
44
44
  var import_node_fs = require("fs");
45
45
  var import_node_path = require("path");
46
- var import_hono12 = require("hono");
46
+ var import_hono15 = require("hono");
47
47
  var import_cors = require("hono/cors");
48
48
  var import_serve_static = require("@hono/node-server/serve-static");
49
49
 
50
+ // src/server/redact.ts
51
+ var REDACTED = "[redacted]";
52
+ var SAFE_ERROR_NAMES = /* @__PURE__ */ new Set([
53
+ "QuorumNotMet",
54
+ "NoConsensus",
55
+ "TimeoutError",
56
+ "MaxTurnsError",
57
+ "BudgetExceededError",
58
+ "ToolDenied"
59
+ ]);
60
+ function redactErrorMessage(err, redact) {
61
+ const raw = err instanceof Error ? err.message : String(err);
62
+ if (!redact) return raw;
63
+ const name = err instanceof Error ? err.name : "";
64
+ return SAFE_ERROR_NAMES.has(name) ? raw : REDACTED;
65
+ }
66
+ function redactValue(value, redact) {
67
+ if (!redact) return value;
68
+ return REDACTED;
69
+ }
70
+ function redactExecutionInfo(info, redact) {
71
+ if (!redact) return info;
72
+ return {
73
+ ...info,
74
+ ...info.result !== void 0 ? { result: REDACTED } : {},
75
+ ...info.error !== void 0 ? { error: REDACTED } : {}
76
+ };
77
+ }
78
+ function redactExecutionList(infos, redact) {
79
+ if (!redact) return infos;
80
+ return infos.map((info) => redactExecutionInfo(info, redact));
81
+ }
82
+ function redactMemoryValue(value, redact) {
83
+ if (!redact) return value;
84
+ return REDACTED;
85
+ }
86
+ function redactMemoryList(entries, redact) {
87
+ if (!redact) return entries;
88
+ return entries.map((entry) => ({ key: entry.key, value: REDACTED }));
89
+ }
90
+ function redactChatMessage(msg) {
91
+ const scrubbed = {
92
+ role: msg.role,
93
+ content: REDACTED,
94
+ ...msg.name !== void 0 ? { name: msg.name } : {},
95
+ ...msg.tool_call_id !== void 0 ? { tool_call_id: msg.tool_call_id } : {},
96
+ ...msg.tool_calls !== void 0 ? {
97
+ tool_calls: msg.tool_calls.map((tc) => ({
98
+ id: tc.id,
99
+ type: tc.type,
100
+ function: {
101
+ name: tc.function.name,
102
+ arguments: REDACTED
103
+ }
104
+ }))
105
+ } : {}
106
+ // providerMetadata deliberately omitted — opaque content.
107
+ };
108
+ return scrubbed;
109
+ }
110
+ function redactSessionHistory(history, redact) {
111
+ if (!redact) return history;
112
+ return history.map(redactChatMessage);
113
+ }
114
+ function redactStreamEvent(event, redact) {
115
+ if (!redact) return event;
116
+ switch (event.type) {
117
+ case "token":
118
+ return { type: "token", data: REDACTED };
119
+ case "tool_call":
120
+ return { ...event, args: REDACTED };
121
+ case "tool_result":
122
+ return { ...event, result: REDACTED };
123
+ case "tool_approval":
124
+ return {
125
+ ...event,
126
+ args: REDACTED,
127
+ ...event.reason !== void 0 ? { reason: REDACTED } : {}
128
+ };
129
+ case "done":
130
+ return { type: "done", data: REDACTED };
131
+ case "error":
132
+ return { type: "error", message: REDACTED };
133
+ // Structural events have no user content to scrub.
134
+ case "agent_start":
135
+ case "agent_end":
136
+ case "handoff":
137
+ case "step":
138
+ return event;
139
+ }
140
+ }
141
+ function redactEvalItem(item) {
142
+ const scrubbed = {
143
+ ...item,
144
+ input: REDACTED,
145
+ output: REDACTED,
146
+ ...item.annotations !== void 0 ? { annotations: REDACTED } : {},
147
+ ...item.error !== void 0 ? { error: REDACTED } : {},
148
+ ...item.scorerErrors !== void 0 ? { scorerErrors: item.scorerErrors.map(() => REDACTED) } : {}
149
+ };
150
+ if (item.scoreDetails) {
151
+ const detailsOut = {};
152
+ for (const [name, detail] of Object.entries(item.scoreDetails)) {
153
+ detailsOut[name] = {
154
+ score: detail.score,
155
+ ...detail.duration !== void 0 ? { duration: detail.duration } : {},
156
+ ...detail.cost !== void 0 ? { cost: detail.cost } : {}
157
+ // metadata deliberately omitted — may contain LLM scorer reasoning
158
+ };
159
+ }
160
+ scrubbed.scoreDetails = detailsOut;
161
+ }
162
+ return scrubbed;
163
+ }
164
+ function redactEvalResult(result, redact) {
165
+ if (!redact) return result;
166
+ return {
167
+ ...result,
168
+ items: result.items.map(redactEvalItem)
169
+ };
170
+ }
171
+ function redactEvalHistoryEntry(entry, redact) {
172
+ if (!redact) return entry;
173
+ return {
174
+ ...entry,
175
+ data: redactEvalResult(entry.data, redact)
176
+ };
177
+ }
178
+ function redactEvalHistoryList(entries, redact) {
179
+ if (!redact) return entries;
180
+ return entries.map((e) => redactEvalHistoryEntry(e, redact));
181
+ }
182
+ function redactPendingDecision(decision, redact) {
183
+ if (!redact) return decision;
184
+ return {
185
+ ...decision,
186
+ prompt: REDACTED,
187
+ ...decision.metadata !== void 0 ? { metadata: { redacted: true } } : {}
188
+ };
189
+ }
190
+ function redactPendingDecisionList(decisions, redact) {
191
+ if (!redact) return decisions;
192
+ return decisions.map((d) => redactPendingDecision(d, redact));
193
+ }
194
+
50
195
  // src/server/middleware/error-handler.ts
51
196
  async function errorHandler(c, next) {
52
197
  try {
53
198
  await next();
54
199
  } catch (err) {
55
- const message = err instanceof Error ? err.message : String(err);
200
+ const rawMessage = err instanceof Error ? err.message : String(err);
56
201
  const code = err.code ?? "INTERNAL_ERROR";
57
202
  let status = 500;
58
203
  if ("status" in err) {
@@ -60,46 +205,81 @@ async function errorHandler(c, next) {
60
205
  if (typeof errStatus === "number" && errStatus >= 400 && errStatus < 600) {
61
206
  status = errStatus;
62
207
  }
63
- } else if (code === "NOT_FOUND" || message.includes("not found") || message.includes("not registered")) {
208
+ } else if (code === "NOT_FOUND" || rawMessage.includes("not found") || rawMessage.includes("not registered")) {
64
209
  status = 404;
65
- } else if (code === "VALIDATION_ERROR" || message.includes("Expected") || message.includes("invalid")) {
210
+ } else if (code === "VALIDATION_ERROR" || rawMessage.includes("Expected") || rawMessage.includes("invalid")) {
66
211
  status = 400;
67
212
  }
213
+ const runtime = c.get("runtime");
214
+ const redactOn = runtime?.isRedactEnabled?.() ?? false;
68
215
  const body = {
69
216
  ok: false,
70
- error: { code, message }
217
+ error: { code, message: redactErrorMessage(err, redactOn) }
71
218
  };
72
219
  return c.json(body, status);
73
220
  }
74
221
  }
75
222
 
76
223
  // src/server/ws/connection-manager.ts
77
- function isBufferedChannel(channel) {
78
- return channel.startsWith("execution:");
79
- }
80
224
  var BUFFER_TTL_MS = 3e4;
81
225
  var MAX_BUFFER_EVENTS = 500;
226
+ var MAX_WS_FRAME_BYTES = 65536;
227
+ function isBufferedChannel(channel) {
228
+ return channel.startsWith("execution:") || channel.startsWith("eval:");
229
+ }
230
+ function truncateIfOversized(msg, channel, data) {
231
+ if (msg.length <= MAX_WS_FRAME_BYTES) return msg;
232
+ const event = data ?? {};
233
+ const truncated = {
234
+ type: "event",
235
+ channel,
236
+ data: {
237
+ ...event,
238
+ data: {
239
+ __truncated: true,
240
+ originalBytes: msg.length,
241
+ maxBytes: MAX_WS_FRAME_BYTES,
242
+ hint: "Event exceeded WS frame budget (likely a verbose agent_call with a large messages[] snapshot). Fetch via REST if you need the full payload."
243
+ }
244
+ }
245
+ };
246
+ return JSON.stringify(truncated);
247
+ }
82
248
  var ConnectionManager = class {
83
249
  /** channel -> set of WS connections */
84
250
  channels = /* @__PURE__ */ new Map();
85
- /** ws -> set of subscribed channels (for cleanup) */
251
+ /** ws -> subscribed channels + optional integrator-supplied metadata */
86
252
  connections = /* @__PURE__ */ new Map();
87
253
  /** channel -> replay buffer for execution streams */
88
254
  buffers = /* @__PURE__ */ new Map();
89
255
  maxConnections = 100;
256
+ filter;
257
+ /**
258
+ * Register a broadcast filter. Called once at middleware construction.
259
+ * The filter runs on every outbound event and can drop or deliver based
260
+ * on the destination connection's metadata.
261
+ */
262
+ setFilter(filter) {
263
+ this.filter = filter;
264
+ }
265
+ /** Attach integrator-supplied metadata to an already-added connection. */
266
+ setMetadata(ws, metadata) {
267
+ const entry = this.connections.get(ws);
268
+ if (entry) entry.metadata = metadata;
269
+ }
90
270
  /** Register a new WS connection. */
91
271
  add(ws) {
92
272
  if (this.connections.size >= this.maxConnections) {
93
273
  ws.close?.();
94
274
  return;
95
275
  }
96
- this.connections.set(ws, /* @__PURE__ */ new Set());
276
+ this.connections.set(ws, { channels: /* @__PURE__ */ new Set() });
97
277
  }
98
278
  /** Remove a WS connection and all its subscriptions. */
99
279
  remove(ws) {
100
- const channels = this.connections.get(ws);
101
- if (channels) {
102
- for (const ch of channels) {
280
+ const entry = this.connections.get(ws);
281
+ if (entry) {
282
+ for (const ch of entry.channels) {
103
283
  this.channels.get(ch)?.delete(ws);
104
284
  if (this.channels.get(ch)?.size === 0) {
105
285
  this.channels.delete(ch);
@@ -117,12 +297,20 @@ var ConnectionManager = class {
117
297
  this.channels.set(channel, subs);
118
298
  }
119
299
  subs.add(ws);
120
- this.connections.get(ws).add(channel);
300
+ this.connections.get(ws).channels.add(channel);
121
301
  const buffer = this.buffers.get(channel);
122
302
  if (buffer) {
123
- for (const msg of buffer.events) {
303
+ const metadata = this.connections.get(ws)?.metadata;
304
+ for (const event of buffer.events) {
305
+ if (this.filter) {
306
+ try {
307
+ if (!this.filter(event.data, metadata)) continue;
308
+ } catch {
309
+ continue;
310
+ }
311
+ }
124
312
  try {
125
- ws.send(msg);
313
+ ws.send(event.msg);
126
314
  } catch {
127
315
  this.remove(ws);
128
316
  return;
@@ -136,11 +324,15 @@ var ConnectionManager = class {
136
324
  if (this.channels.get(channel)?.size === 0) {
137
325
  this.channels.delete(channel);
138
326
  }
139
- this.connections.get(ws)?.delete(channel);
327
+ this.connections.get(ws)?.channels.delete(channel);
140
328
  }
141
329
  /** Broadcast data to all subscribers of a channel. Buffers events for execution channels. */
142
330
  broadcast(channel, data) {
143
- const msg = JSON.stringify({ type: "event", channel, data });
331
+ const msg = truncateIfOversized(
332
+ JSON.stringify({ type: "event", channel, data }),
333
+ channel,
334
+ data
335
+ );
144
336
  if (isBufferedChannel(channel)) {
145
337
  let buffer = this.buffers.get(channel);
146
338
  if (!buffer) {
@@ -150,7 +342,7 @@ var ConnectionManager = class {
150
342
  const event = data;
151
343
  const isTerminal = event.type === "done" || event.type === "error";
152
344
  if (buffer.events.length < MAX_BUFFER_EVENTS || isTerminal) {
153
- buffer.events.push(msg);
345
+ buffer.events.push({ msg, data });
154
346
  }
155
347
  if (isTerminal) {
156
348
  buffer.complete = true;
@@ -163,6 +355,14 @@ var ConnectionManager = class {
163
355
  const subs = this.channels.get(channel);
164
356
  if (!subs || subs.size === 0) return;
165
357
  for (const ws of [...subs]) {
358
+ if (this.filter) {
359
+ const metadata = this.connections.get(ws)?.metadata;
360
+ try {
361
+ if (!this.filter(data, metadata)) continue;
362
+ } catch {
363
+ continue;
364
+ }
365
+ }
166
366
  try {
167
367
  ws.send(msg);
168
368
  } catch {
@@ -178,8 +378,20 @@ var ConnectionManager = class {
178
378
  const wildcardChannel = channel.substring(0, colonIdx) + ":*";
179
379
  const subs = this.channels.get(wildcardChannel);
180
380
  if (!subs || subs.size === 0) return;
181
- const msg = JSON.stringify({ type: "event", channel, data });
381
+ const msg = truncateIfOversized(
382
+ JSON.stringify({ type: "event", channel, data }),
383
+ channel,
384
+ data
385
+ );
182
386
  for (const ws of [...subs]) {
387
+ if (this.filter) {
388
+ const metadata = this.connections.get(ws)?.metadata;
389
+ try {
390
+ if (!this.filter(data, metadata)) continue;
391
+ } catch {
392
+ continue;
393
+ }
394
+ }
183
395
  try {
184
396
  ws.send(msg);
185
397
  } catch {
@@ -211,11 +423,11 @@ var ConnectionManager = class {
211
423
  };
212
424
 
213
425
  // src/server/ws/protocol.ts
214
- var VALID_CHANNEL_PREFIXES = ["execution:", "trace:"];
215
- var VALID_EXACT_CHANNELS = ["costs", "decisions"];
426
+ var VALID_CHANNEL_PREFIXES = ["execution:", "trace:", "eval:"];
427
+ var VALID_EXACT_CHANNELS = ["costs", "decisions", "eval-trends", "workflow-stats", "trace-stats"];
216
428
  var MAX_CHANNEL_LENGTH = 256;
217
429
  function handleWsMessage(raw, socket, connMgr) {
218
- if (raw.length > 65536) {
430
+ if (raw.length > MAX_WS_FRAME_BYTES) {
219
431
  return JSON.stringify({ type: "error", message: "Message too large" });
220
432
  }
221
433
  let msg;
@@ -275,66 +487,580 @@ function createWsHandlers(connMgr) {
275
487
  };
276
488
  }
277
489
 
278
- // src/server/cost-aggregator.ts
279
- var CostAggregator = class {
280
- constructor(connMgr) {
490
+ // src/server/aggregates/aggregate-snapshots.ts
491
+ var WINDOW_MS = {
492
+ "24h": 24 * 60 * 60 * 1e3,
493
+ "7d": 7 * 24 * 60 * 60 * 1e3,
494
+ "30d": 30 * 24 * 60 * 60 * 1e3,
495
+ all: Number.POSITIVE_INFINITY
496
+ };
497
+ function withinWindow(ts, window, now) {
498
+ return ts >= now - WINDOW_MS[window];
499
+ }
500
+ var REBUILD_INTERVAL_MS = 5 * 6e4;
501
+ var ALL_WINDOWS = new Set(Object.keys(WINDOW_MS));
502
+ function parseWindowParam(raw, fallback = "7d") {
503
+ return raw && ALL_WINDOWS.has(raw) ? raw : fallback;
504
+ }
505
+ var AggregateSnapshots = class {
506
+ constructor(windows, emptyState, connMgr, channel, broadcastTransform) {
507
+ this.windows = windows;
508
+ this.emptyState = emptyState;
281
509
  this.connMgr = connMgr;
510
+ this.channel = channel;
511
+ this.broadcastTransform = broadcastTransform;
512
+ this.snapshots = new Map(windows.map((w) => [w, emptyState()]));
513
+ }
514
+ snapshots;
515
+ /** Replace all snapshots atomically — used after a full rebuild. */
516
+ replace(fresh) {
517
+ this.snapshots = fresh;
518
+ this.broadcast();
519
+ }
520
+ /** Apply a reducer update to every window where `ts` falls inside the window. */
521
+ fold(ts, update) {
522
+ const now = Date.now();
523
+ let changed = false;
524
+ for (const window of this.windows) {
525
+ if (withinWindow(ts, window, now)) {
526
+ const prev = this.snapshots.get(window);
527
+ this.snapshots.set(window, update(prev));
528
+ changed = true;
529
+ }
530
+ }
531
+ if (changed) this.broadcast();
532
+ }
533
+ get(window) {
534
+ return this.snapshots.get(window) ?? this.emptyState();
535
+ }
536
+ getAll() {
537
+ return Object.fromEntries(this.snapshots);
538
+ }
539
+ broadcast() {
540
+ const snapshots = this.broadcastTransform ? Object.fromEntries(
541
+ this.windows.map((w) => [w, this.broadcastTransform(this.snapshots.get(w))])
542
+ ) : this.getAll();
543
+ this.connMgr.broadcast(this.channel, {
544
+ snapshots,
545
+ updatedAt: Date.now()
546
+ });
547
+ }
548
+ };
549
+
550
+ // src/server/aggregates/trace-aggregator.ts
551
+ var TraceAggregator = class {
552
+ snaps;
553
+ interval;
554
+ listener;
555
+ options;
556
+ constructor(options) {
557
+ this.options = options;
558
+ this.snaps = new AggregateSnapshots(
559
+ options.windows,
560
+ options.emptyState,
561
+ options.connMgr,
562
+ options.channel,
563
+ options.broadcastTransform
564
+ );
565
+ }
566
+ async start() {
567
+ await this.rebuild();
568
+ this.listener = (event) => {
569
+ this.snaps.fold(event.timestamp, (prev) => this.options.reducer(prev, event));
570
+ };
571
+ this.options.runtime.on("trace", this.listener);
572
+ this.interval = setInterval(
573
+ () => this.rebuild().catch((err) => console.error("[axl-studio] rebuild failed:", err)),
574
+ REBUILD_INTERVAL_MS
575
+ );
576
+ }
577
+ async rebuild() {
578
+ const executions = await this.options.runtime.getExecutions();
579
+ const cap = this.options.executionCap ?? 2e3;
580
+ const capped = executions.slice(0, cap);
581
+ const now = Date.now();
582
+ const fresh = new Map(
583
+ this.options.windows.map((w) => [w, this.options.emptyState()])
584
+ );
585
+ for (const exec of capped) {
586
+ for (const event of exec.steps) {
587
+ for (const window of this.options.windows) {
588
+ if (withinWindow(event.timestamp, window, now)) {
589
+ fresh.set(window, this.options.reducer(fresh.get(window), event));
590
+ }
591
+ }
592
+ }
593
+ }
594
+ this.snaps.replace(fresh);
595
+ }
596
+ getSnapshot(window) {
597
+ return this.snaps.get(window);
598
+ }
599
+ getAllSnapshots() {
600
+ return this.snaps.getAll();
601
+ }
602
+ close() {
603
+ if (this.listener) this.options.runtime.off("trace", this.listener);
604
+ if (this.interval) clearInterval(this.interval);
282
605
  }
283
- data = {
606
+ };
607
+
608
+ // src/server/aggregates/reducers.ts
609
+ var finite = (v) => Number.isFinite(v) ? v : 0;
610
+ function isLogEvent(event, eventName) {
611
+ if (event.type === eventName) return true;
612
+ if (event.type === "log" && event.data != null && typeof event.data === "object") {
613
+ return event.data.event === eventName;
614
+ }
615
+ return false;
616
+ }
617
+ function emptyRetry() {
618
+ return {
619
+ primary: 0,
620
+ primaryCalls: 0,
621
+ schema: 0,
622
+ schemaCalls: 0,
623
+ validate: 0,
624
+ validateCalls: 0,
625
+ guardrail: 0,
626
+ guardrailCalls: 0,
627
+ retryCalls: 0
628
+ };
629
+ }
630
+ function emptyCostData() {
631
+ return {
284
632
  totalCost: 0,
285
633
  totalTokens: { input: 0, output: 0, reasoning: 0 },
286
634
  byAgent: {},
287
635
  byModel: {},
288
- byWorkflow: {}
636
+ byWorkflow: {},
637
+ retry: emptyRetry(),
638
+ byEmbedder: {}
289
639
  };
290
- /** Process a trace event and update cost data. */
291
- onTrace(event) {
292
- if (event.cost == null && !event.tokens) return;
293
- const cost = Number.isFinite(event.cost) ? event.cost : 0;
294
- const tokens = event.tokens ?? {};
295
- this.data.totalCost += cost;
296
- this.data.totalTokens.input += tokens.input ?? 0;
297
- this.data.totalTokens.output += tokens.output ?? 0;
298
- this.data.totalTokens.reasoning += tokens.reasoning ?? 0;
299
- if (event.agent) {
300
- const entry = this.data.byAgent[event.agent] ?? { cost: 0, calls: 0 };
301
- entry.cost += cost;
302
- entry.calls += 1;
303
- this.data.byAgent[event.agent] = entry;
304
- }
305
- if (event.model) {
306
- const entry = this.data.byModel[event.model] ?? {
307
- cost: 0,
308
- calls: 0,
309
- tokens: { input: 0, output: 0 }
640
+ }
641
+ function reduceCost(acc, event) {
642
+ const isWorkflowStart = isLogEvent(event, "workflow_start");
643
+ if (isWorkflowStart && event.workflow) {
644
+ const byWorkflow2 = { ...acc.byWorkflow };
645
+ const prev = byWorkflow2[event.workflow] ?? { cost: 0, executions: 0 };
646
+ byWorkflow2[event.workflow] = { ...prev, executions: prev.executions + 1 };
647
+ return { ...acc, byWorkflow: byWorkflow2 };
648
+ }
649
+ if (event.cost == null && !event.tokens) return acc;
650
+ const cost = finite(event.cost);
651
+ const tokens = event.tokens ?? {};
652
+ const totalTokens = event.type === "agent_call" ? {
653
+ input: acc.totalTokens.input + finite(tokens.input),
654
+ output: acc.totalTokens.output + finite(tokens.output),
655
+ reasoning: acc.totalTokens.reasoning + finite(tokens.reasoning)
656
+ } : acc.totalTokens;
657
+ const byAgent = { ...acc.byAgent };
658
+ if (event.agent) {
659
+ const prev = byAgent[event.agent] ?? { cost: 0, calls: 0 };
660
+ byAgent[event.agent] = { cost: prev.cost + cost, calls: prev.calls + 1 };
661
+ }
662
+ const byModel = { ...acc.byModel };
663
+ if (event.model) {
664
+ const prev = byModel[event.model] ?? { cost: 0, calls: 0, tokens: { input: 0, output: 0 } };
665
+ byModel[event.model] = {
666
+ cost: prev.cost + cost,
667
+ calls: prev.calls + 1,
668
+ tokens: {
669
+ input: prev.tokens.input + finite(tokens.input),
670
+ output: prev.tokens.output + finite(tokens.output)
671
+ }
672
+ };
673
+ }
674
+ const byWorkflow = { ...acc.byWorkflow };
675
+ if (event.workflow) {
676
+ const prev = byWorkflow[event.workflow] ?? { cost: 0, executions: 0 };
677
+ byWorkflow[event.workflow] = {
678
+ cost: prev.cost + cost,
679
+ executions: prev.executions + (isWorkflowStart ? 1 : 0)
680
+ };
681
+ }
682
+ let retry = acc.retry;
683
+ if (event.type === "agent_call") {
684
+ const d = event.data ?? {};
685
+ const reason = d.retryReason;
686
+ retry = { ...acc.retry };
687
+ if (reason === "schema") {
688
+ retry.schema += cost;
689
+ retry.schemaCalls += 1;
690
+ retry.retryCalls += 1;
691
+ } else if (reason === "validate") {
692
+ retry.validate += cost;
693
+ retry.validateCalls += 1;
694
+ retry.retryCalls += 1;
695
+ } else if (reason === "guardrail") {
696
+ retry.guardrail += cost;
697
+ retry.guardrailCalls += 1;
698
+ retry.retryCalls += 1;
699
+ } else {
700
+ retry.primary += cost;
701
+ retry.primaryCalls += 1;
702
+ }
703
+ }
704
+ let byEmbedder = acc.byEmbedder;
705
+ if (event.type === "log") {
706
+ const d = event.data ?? {};
707
+ if (d.event === "memory_remember" || d.event === "memory_recall") {
708
+ byEmbedder = { ...acc.byEmbedder };
709
+ const modelKey = d.usage?.model ?? "unknown";
710
+ const embedTokens = typeof d.usage?.tokens === "number" ? finite(d.usage.tokens) : 0;
711
+ const prev = byEmbedder[modelKey] ?? { cost: 0, calls: 0, tokens: 0 };
712
+ byEmbedder[modelKey] = {
713
+ cost: prev.cost + cost,
714
+ calls: prev.calls + 1,
715
+ tokens: prev.tokens + embedTokens
310
716
  };
311
- entry.cost += cost;
312
- entry.calls += 1;
313
- entry.tokens.input += tokens.input ?? 0;
314
- entry.tokens.output += tokens.output ?? 0;
315
- this.data.byModel[event.model] = entry;
316
- }
317
- if (event.workflow) {
318
- const entry = this.data.byWorkflow[event.workflow] ?? { cost: 0, executions: 0 };
319
- entry.cost += cost;
320
- if (event.type === "workflow_start") entry.executions += 1;
321
- this.data.byWorkflow[event.workflow] = entry;
322
- }
323
- this.connMgr.broadcast("costs", this.data);
324
- }
325
- /** Get current aggregated cost data. */
326
- getData() {
327
- return this.data;
328
- }
329
- /** Reset all accumulated data. */
330
- reset() {
331
- this.data = {
332
- totalCost: 0,
333
- totalTokens: { input: 0, output: 0, reasoning: 0 },
334
- byAgent: {},
335
- byModel: {},
336
- byWorkflow: {}
717
+ }
718
+ }
719
+ return {
720
+ totalCost: acc.totalCost + cost,
721
+ totalTokens,
722
+ byAgent,
723
+ byModel,
724
+ byWorkflow,
725
+ retry,
726
+ byEmbedder
727
+ };
728
+ }
729
+ function emptyEvalTrendData() {
730
+ return { byEval: {}, totalRuns: 0, totalCost: 0 };
731
+ }
732
+ function extractScores(data) {
733
+ if (!data || typeof data !== "object") return {};
734
+ const result = data;
735
+ const summary = result.summary;
736
+ const scorers = summary?.scorers;
737
+ if (!scorers) return {};
738
+ const out = {};
739
+ for (const [name, entry] of Object.entries(scorers)) {
740
+ if (typeof entry === "number" && Number.isFinite(entry)) {
741
+ out[name] = entry;
742
+ } else if (entry && typeof entry === "object" && Number.isFinite(entry.mean)) {
743
+ out[name] = entry.mean;
744
+ }
745
+ }
746
+ return out;
747
+ }
748
+ function extractCost(data) {
749
+ if (!data || typeof data !== "object") return 0;
750
+ const result = data;
751
+ if (Number.isFinite(result.totalCost)) return result.totalCost;
752
+ const summary = result.summary;
753
+ return Number.isFinite(summary?.totalCost) ? summary.totalCost : 0;
754
+ }
755
+ function extractModel(data) {
756
+ if (!data || typeof data !== "object") return void 0;
757
+ const result = data;
758
+ const metadata = result.metadata;
759
+ const counts = metadata?.modelCounts;
760
+ if (counts && typeof counts === "object" && !Array.isArray(counts)) {
761
+ const entries = Object.entries(counts).filter(
762
+ ([, v]) => typeof v === "number"
763
+ );
764
+ if (entries.length > 0) {
765
+ entries.sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]));
766
+ return entries[0][0];
767
+ }
768
+ }
769
+ const models = metadata?.models;
770
+ if (Array.isArray(models) && typeof models[0] === "string") return models[0];
771
+ return void 0;
772
+ }
773
+ function extractDuration(data) {
774
+ if (!data || typeof data !== "object") return void 0;
775
+ const result = data;
776
+ return Number.isFinite(result.duration) ? result.duration : void 0;
777
+ }
778
+ function computeScoreStats(runs) {
779
+ const scorerNames = /* @__PURE__ */ new Set();
780
+ for (const run of runs) {
781
+ for (const name of Object.keys(run.scores)) scorerNames.add(name);
782
+ }
783
+ const mean = {};
784
+ const std = {};
785
+ for (const name of scorerNames) {
786
+ const values = runs.map((r) => r.scores[name]).filter((v) => v != null);
787
+ if (values.length === 0) continue;
788
+ const m = values.reduce((a, b) => a + b, 0) / values.length;
789
+ mean[name] = m;
790
+ const variance = values.reduce((sum, v) => sum + (v - m) ** 2, 0) / values.length;
791
+ std[name] = Math.sqrt(variance);
792
+ }
793
+ return { mean, std };
794
+ }
795
+ function reduceEvalTrends(acc, entry) {
796
+ const scores = extractScores(entry.data);
797
+ const cost = extractCost(entry.data);
798
+ const model = extractModel(entry.data);
799
+ const duration = extractDuration(entry.data);
800
+ const run = {
801
+ timestamp: entry.timestamp,
802
+ id: entry.id,
803
+ scores,
804
+ cost,
805
+ ...model !== void 0 ? { model } : {},
806
+ ...duration !== void 0 ? { duration } : {}
807
+ };
808
+ const byEval = { ...acc.byEval };
809
+ const prev = byEval[entry.eval];
810
+ const MAX_EVAL_RUNS = 50;
811
+ const allRuns = prev ? [...prev.runs, run] : [run];
812
+ const runs = allRuns.length > MAX_EVAL_RUNS ? allRuns.slice(-MAX_EVAL_RUNS) : allRuns;
813
+ const { mean, std } = computeScoreStats(runs);
814
+ const latestScores = prev && prev.runs.length > 0 && prev.runs[prev.runs.length - 1].timestamp > run.timestamp ? prev.latestScores : scores;
815
+ byEval[entry.eval] = {
816
+ runs,
817
+ latestScores,
818
+ scoreMean: mean,
819
+ scoreStd: std,
820
+ costTotal: (prev?.costTotal ?? 0) + cost,
821
+ runCount: (prev?.runCount ?? 0) + 1
822
+ };
823
+ return {
824
+ byEval,
825
+ totalRuns: acc.totalRuns + 1,
826
+ totalCost: acc.totalCost + cost
827
+ };
828
+ }
829
+ var MAX_DURATIONS = 200;
830
+ function emptyWorkflowStatsData() {
831
+ return { byWorkflow: {}, totalExecutions: 0, failureRate: 0 };
832
+ }
833
+ function percentile(sorted, p) {
834
+ if (sorted.length === 0) return 0;
835
+ const idx = p / 100 * (sorted.length - 1);
836
+ const lower = Math.floor(idx);
837
+ const upper = Math.ceil(idx);
838
+ if (lower === upper) return sorted[lower];
839
+ return sorted[lower] + (sorted[upper] - sorted[lower]) * (idx - lower);
840
+ }
841
+ function reduceWorkflowStats(acc, execution) {
842
+ const byWorkflow = { ...acc.byWorkflow };
843
+ const prev = byWorkflow[execution.workflow] ?? {
844
+ total: 0,
845
+ completed: 0,
846
+ failed: 0,
847
+ durations: [],
848
+ durationSum: 0,
849
+ avgDuration: 0
850
+ };
851
+ const dur = finite(execution.duration);
852
+ const durations = [...prev.durations];
853
+ const insertIdx = durations.findIndex((d) => d > dur);
854
+ if (insertIdx === -1) durations.push(dur);
855
+ else durations.splice(insertIdx, 0, dur);
856
+ if (durations.length > MAX_DURATIONS) durations.shift();
857
+ const total = prev.total + 1;
858
+ const completed = prev.completed + (execution.status === "completed" ? 1 : 0);
859
+ const failed = prev.failed + (execution.status === "failed" ? 1 : 0);
860
+ const durationSum = prev.durationSum + dur;
861
+ const avgDuration = durationSum / total;
862
+ byWorkflow[execution.workflow] = {
863
+ total,
864
+ completed,
865
+ failed,
866
+ durations,
867
+ durationSum,
868
+ avgDuration
869
+ };
870
+ const totalExecutions = acc.totalExecutions + 1;
871
+ const totalFailed = Object.values(byWorkflow).reduce((sum, w) => sum + w.failed, 0);
872
+ const failureRate = totalExecutions > 0 ? totalFailed / totalExecutions : 0;
873
+ return { byWorkflow, totalExecutions, failureRate };
874
+ }
875
+ function getWorkflowPercentiles(entry) {
876
+ return {
877
+ durationP50: percentile(entry.durations, 50),
878
+ durationP95: percentile(entry.durations, 95)
879
+ };
880
+ }
881
+ function enrichWorkflowStats(data) {
882
+ const byWorkflow = {};
883
+ for (const [name, entry] of Object.entries(data.byWorkflow)) {
884
+ const { durationP50, durationP95 } = getWorkflowPercentiles(entry);
885
+ byWorkflow[name] = {
886
+ total: entry.total,
887
+ completed: entry.completed,
888
+ failed: entry.failed,
889
+ durationP50,
890
+ durationP95,
891
+ avgDuration: entry.avgDuration
892
+ };
893
+ }
894
+ return {
895
+ byWorkflow,
896
+ totalExecutions: data.totalExecutions,
897
+ failureRate: data.failureRate
898
+ };
899
+ }
900
+ function emptyTraceStatsData() {
901
+ return {
902
+ eventTypeCounts: {},
903
+ byTool: {},
904
+ retryByAgent: {},
905
+ totalEvents: 0
906
+ };
907
+ }
908
+ function reduceTraceStats(acc, event) {
909
+ const eventTypeCounts = { ...acc.eventTypeCounts };
910
+ eventTypeCounts[event.type] = (eventTypeCounts[event.type] ?? 0) + 1;
911
+ const byTool = { ...acc.byTool };
912
+ if (event.type === "tool_call" || event.type === "tool_denied" || event.type === "tool_approval") {
913
+ const toolName = event.tool;
914
+ const prev = byTool[toolName] ?? { calls: 0, denied: 0, approved: 0 };
915
+ const isDeniedEvent = event.type === "tool_denied";
916
+ const isApprovalEvent = event.type === "tool_approval";
917
+ const eventData = isDeniedEvent || isApprovalEvent ? event.data : void 0;
918
+ const isApproved = isDeniedEvent && eventData?.approved === true || isApprovalEvent && eventData?.approved === true;
919
+ const isDenied = isDeniedEvent && !eventData?.approved || isApprovalEvent && eventData?.approved === false;
920
+ byTool[toolName] = {
921
+ calls: prev.calls + (event.type === "tool_call" ? 1 : 0),
922
+ denied: prev.denied + (isDenied ? 1 : 0),
923
+ approved: prev.approved + (isApproved ? 1 : 0)
924
+ };
925
+ }
926
+ const retryByAgent = { ...acc.retryByAgent };
927
+ if (event.agent && event.type === "agent_call") {
928
+ const data = event.data;
929
+ if (data?.retryReason) {
930
+ const prev = retryByAgent[event.agent] ?? { schema: 0, validate: 0, guardrail: 0 };
931
+ const reason = data.retryReason;
932
+ if (reason in prev) {
933
+ retryByAgent[event.agent] = { ...prev, [reason]: prev[reason] + 1 };
934
+ }
935
+ }
936
+ }
937
+ return {
938
+ eventTypeCounts,
939
+ byTool,
940
+ retryByAgent,
941
+ totalEvents: acc.totalEvents + 1
942
+ };
943
+ }
944
+
945
+ // src/server/aggregates/execution-aggregator.ts
946
+ var ExecutionAggregator = class {
947
+ snaps;
948
+ interval;
949
+ listener;
950
+ options;
951
+ /** Generation counter to prevent stale async fold after rebuild. */
952
+ generation = 0;
953
+ constructor(options) {
954
+ this.options = options;
955
+ this.snaps = new AggregateSnapshots(
956
+ options.windows,
957
+ options.emptyState,
958
+ options.connMgr,
959
+ options.channel,
960
+ options.broadcastTransform
961
+ );
962
+ }
963
+ async start() {
964
+ await this.rebuild();
965
+ this.listener = (event) => {
966
+ if (!isLogEvent(event, "workflow_end")) return;
967
+ const gen = this.generation;
968
+ this.options.runtime.getExecution(event.executionId).then((exec) => {
969
+ if (this.generation !== gen) return;
970
+ if (exec) {
971
+ this.snaps.fold(exec.startedAt, (prev) => this.options.reducer(prev, exec));
972
+ }
973
+ }).catch((err) => console.error("[axl-studio] execution fold failed:", err));
337
974
  };
975
+ this.options.runtime.on("trace", this.listener);
976
+ this.interval = setInterval(
977
+ () => this.rebuild().catch((err) => console.error("[axl-studio] rebuild failed:", err)),
978
+ REBUILD_INTERVAL_MS
979
+ );
980
+ }
981
+ async rebuild() {
982
+ this.generation++;
983
+ const executions = await this.options.runtime.getExecutions();
984
+ const cap = this.options.executionCap ?? 2e3;
985
+ const capped = executions.slice(0, cap);
986
+ const now = Date.now();
987
+ const fresh = new Map(
988
+ this.options.windows.map((w) => [w, this.options.emptyState()])
989
+ );
990
+ for (const exec of capped) {
991
+ for (const window of this.options.windows) {
992
+ if (withinWindow(exec.startedAt, window, now)) {
993
+ fresh.set(window, this.options.reducer(fresh.get(window), exec));
994
+ }
995
+ }
996
+ }
997
+ this.snaps.replace(fresh);
998
+ }
999
+ getSnapshot(window) {
1000
+ return this.snaps.get(window);
1001
+ }
1002
+ getAllSnapshots() {
1003
+ return this.snaps.getAll();
1004
+ }
1005
+ close() {
1006
+ if (this.listener) this.options.runtime.off("trace", this.listener);
1007
+ if (this.interval) clearInterval(this.interval);
1008
+ }
1009
+ };
1010
+
1011
+ // src/server/aggregates/eval-aggregator.ts
1012
+ var EvalAggregator = class {
1013
+ snaps;
1014
+ interval;
1015
+ listener;
1016
+ options;
1017
+ constructor(options) {
1018
+ this.options = options;
1019
+ this.snaps = new AggregateSnapshots(
1020
+ options.windows,
1021
+ options.emptyState,
1022
+ options.connMgr,
1023
+ options.channel,
1024
+ options.broadcastTransform
1025
+ );
1026
+ }
1027
+ async start() {
1028
+ await this.rebuild();
1029
+ this.listener = (entry) => {
1030
+ this.snaps.fold(entry.timestamp, (prev) => this.options.reducer(prev, entry));
1031
+ };
1032
+ this.options.runtime.on("eval_result", this.listener);
1033
+ this.interval = setInterval(
1034
+ () => this.rebuild().catch((err) => console.error("[axl-studio] rebuild failed:", err)),
1035
+ REBUILD_INTERVAL_MS
1036
+ );
1037
+ }
1038
+ async rebuild() {
1039
+ const history = await this.options.runtime.getEvalHistory();
1040
+ const cap = this.options.entryCap ?? 500;
1041
+ const capped = history.slice(0, cap);
1042
+ const now = Date.now();
1043
+ const fresh = new Map(
1044
+ this.options.windows.map((w) => [w, this.options.emptyState()])
1045
+ );
1046
+ for (const entry of capped) {
1047
+ for (const window of this.options.windows) {
1048
+ if (withinWindow(entry.timestamp, window, now)) {
1049
+ fresh.set(window, this.options.reducer(fresh.get(window), entry));
1050
+ }
1051
+ }
1052
+ }
1053
+ this.snaps.replace(fresh);
1054
+ }
1055
+ getSnapshot(window) {
1056
+ return this.snaps.get(window);
1057
+ }
1058
+ getAllSnapshots() {
1059
+ return this.snaps.getAll();
1060
+ }
1061
+ close() {
1062
+ if (this.listener) this.options.runtime.off("eval_result", this.listener);
1063
+ if (this.interval) clearInterval(this.interval);
338
1064
  }
339
1065
  };
340
1066
 
@@ -405,15 +1131,22 @@ function createWorkflowRoutes(connMgr) {
405
1131
  if (body.stream) {
406
1132
  const stream = runtime.stream(name, body.input ?? {}, { metadata: body.metadata });
407
1133
  const executionId = `stream-${Date.now()}`;
1134
+ const redactOn = runtime.isRedactEnabled();
408
1135
  (async () => {
409
1136
  for await (const event of stream) {
410
- connMgr.broadcastWithWildcard(`execution:${executionId}`, event);
1137
+ connMgr.broadcastWithWildcard(
1138
+ `execution:${executionId}`,
1139
+ redactStreamEvent(event, redactOn)
1140
+ );
411
1141
  }
412
1142
  })();
413
1143
  return c.json({ ok: true, data: { executionId, streaming: true } });
414
1144
  }
415
1145
  const result = await runtime.execute(name, body.input ?? {}, { metadata: body.metadata });
416
- return c.json({ ok: true, data: { result } });
1146
+ return c.json({
1147
+ ok: true,
1148
+ data: { result: redactValue(result, runtime.isRedactEnabled()) }
1149
+ });
417
1150
  });
418
1151
  return app6;
419
1152
  }
@@ -424,7 +1157,10 @@ var app = new import_hono3.Hono();
424
1157
  app.get("/executions", async (c) => {
425
1158
  const runtime = c.get("runtime");
426
1159
  const executions = await runtime.getExecutions();
427
- return c.json({ ok: true, data: executions });
1160
+ return c.json({
1161
+ ok: true,
1162
+ data: redactExecutionList(executions, runtime.isRedactEnabled())
1163
+ });
428
1164
  });
429
1165
  app.get("/executions/:id", async (c) => {
430
1166
  const runtime = c.get("runtime");
@@ -436,7 +1172,10 @@ app.get("/executions/:id", async (c) => {
436
1172
  404
437
1173
  );
438
1174
  }
439
- return c.json({ ok: true, data: execution });
1175
+ return c.json({
1176
+ ok: true,
1177
+ data: redactExecutionInfo(execution, runtime.isRedactEnabled())
1178
+ });
440
1179
  });
441
1180
  app.post("/executions/:id/abort", (c) => {
442
1181
  const runtime = c.get("runtime");
@@ -470,7 +1209,16 @@ function createSessionRoutes(connMgr) {
470
1209
  const id = c.req.param("id");
471
1210
  const history = await store.getSession(id);
472
1211
  const handoffHistory = await store.getSessionMeta(id, "handoffHistory");
473
- return c.json({ ok: true, data: { id, history, handoffHistory: handoffHistory ?? [] } });
1212
+ return c.json({
1213
+ ok: true,
1214
+ data: {
1215
+ id,
1216
+ history: redactSessionHistory(history, runtime.isRedactEnabled()),
1217
+ // HandoffRecord has no content fields (source/target/mode/
1218
+ // timestamp/duration) — nothing to scrub.
1219
+ handoffHistory: handoffHistory ?? []
1220
+ }
1221
+ });
474
1222
  });
475
1223
  app6.post("/sessions/:id/send", async (c) => {
476
1224
  const runtime = c.get("runtime");
@@ -640,7 +1388,10 @@ app3.post("/tools/:name/test", async (c) => {
640
1388
  const body = await c.req.json();
641
1389
  const ctx = runtime.createContext();
642
1390
  const result = await tool.run(ctx, body.input);
643
- return c.json({ ok: true, data: { result } });
1391
+ return c.json({
1392
+ ok: true,
1393
+ data: { result: redactValue(result, runtime.isRedactEnabled()) }
1394
+ });
644
1395
  });
645
1396
  var tools_default = app3;
646
1397
 
@@ -655,7 +1406,7 @@ app4.get("/memory/:scope", async (c) => {
655
1406
  return c.json({ ok: true, data: [] });
656
1407
  }
657
1408
  const entries = await store.getAllMemory(scope);
658
- return c.json({ ok: true, data: entries });
1409
+ return c.json({ ok: true, data: redactMemoryList(entries, runtime.isRedactEnabled()) });
659
1410
  });
660
1411
  app4.get("/memory/:scope/:key", async (c) => {
661
1412
  const runtime = c.get("runtime");
@@ -675,7 +1426,10 @@ app4.get("/memory/:scope/:key", async (c) => {
675
1426
  404
676
1427
  );
677
1428
  }
678
- return c.json({ ok: true, data: { key, value } });
1429
+ return c.json({
1430
+ ok: true,
1431
+ data: { key, value: redactMemoryValue(value, runtime.isRedactEnabled()) }
1432
+ });
679
1433
  });
680
1434
  app4.put("/memory/:scope/:key", async (c) => {
681
1435
  const runtime = c.get("runtime");
@@ -720,7 +1474,10 @@ var app5 = new import_hono8.Hono();
720
1474
  app5.get("/decisions", async (c) => {
721
1475
  const runtime = c.get("runtime");
722
1476
  const decisions = await runtime.getPendingDecisions();
723
- return c.json({ ok: true, data: decisions });
1477
+ return c.json({
1478
+ ok: true,
1479
+ data: redactPendingDecisionList(decisions, runtime.isRedactEnabled())
1480
+ });
724
1481
  });
725
1482
  app5.post("/decisions/:executionId/resolve", async (c) => {
726
1483
  const runtime = c.get("runtime");
@@ -736,11 +1493,23 @@ var import_hono9 = require("hono");
736
1493
  function createCostRoutes(costAggregator) {
737
1494
  const app6 = new import_hono9.Hono();
738
1495
  app6.get("/costs", (c) => {
739
- return c.json({ ok: true, data: costAggregator.getData() });
1496
+ if (c.req.query("windows") === "all") {
1497
+ return c.json({ ok: true, data: costAggregator.getAllSnapshots() });
1498
+ }
1499
+ const window = parseWindowParam(c.req.query("window"));
1500
+ return c.json({ ok: true, data: costAggregator.getSnapshot(window) });
740
1501
  });
741
1502
  app6.post("/costs/reset", (c) => {
742
- costAggregator.reset();
743
- return c.json({ ok: true, data: { reset: true } });
1503
+ return c.json(
1504
+ {
1505
+ ok: false,
1506
+ error: {
1507
+ code: "GONE",
1508
+ message: "POST /api/costs/reset was removed in @axlsdk/studio 0.15. Cost aggregates are now time-windowed and rebuilt from StateStore history. Use GET /api/costs?window=24h|7d|30d|all to narrow the view instead of resetting."
1509
+ }
1510
+ },
1511
+ 410
1512
+ );
744
1513
  });
745
1514
  return app6;
746
1515
  }
@@ -748,8 +1517,9 @@ function createCostRoutes(costAggregator) {
748
1517
  // src/server/routes/evals.ts
749
1518
  var import_node_crypto = require("crypto");
750
1519
  var import_hono10 = require("hono");
751
- function createEvalRoutes(evalLoader) {
1520
+ function createEvalRoutes(connMgr, evalLoader) {
752
1521
  const app6 = new import_hono10.Hono();
1522
+ const activeRuns = /* @__PURE__ */ new Map();
753
1523
  app6.get("/evals", async (c) => {
754
1524
  if (evalLoader) await evalLoader();
755
1525
  const runtime = c.get("runtime");
@@ -759,7 +1529,10 @@ function createEvalRoutes(evalLoader) {
759
1529
  app6.get("/evals/history", async (c) => {
760
1530
  const runtime = c.get("runtime");
761
1531
  const history = await runtime.getEvalHistory();
762
- return c.json({ ok: true, data: history });
1532
+ return c.json({
1533
+ ok: true,
1534
+ data: redactEvalHistoryList(history, runtime.isRedactEnabled())
1535
+ });
763
1536
  });
764
1537
  app6.delete("/evals/history/:id", async (c) => {
765
1538
  const runtime = c.get("runtime");
@@ -780,6 +1553,7 @@ function createEvalRoutes(evalLoader) {
780
1553
  if (evalLoader) await evalLoader();
781
1554
  const runtime = c.get("runtime");
782
1555
  const name = c.req.param("name");
1556
+ const redactOn = runtime.isRedactEnabled();
783
1557
  const entry = runtime.getRegisteredEval(name);
784
1558
  if (!entry) {
785
1559
  return c.json(
@@ -788,13 +1562,89 @@ function createEvalRoutes(evalLoader) {
788
1562
  );
789
1563
  }
790
1564
  let runs = 1;
1565
+ let stream = false;
1566
+ let captureTraces = false;
791
1567
  try {
792
1568
  const body = await c.req.json().catch(() => ({}));
793
1569
  if (typeof body.runs === "number" && Number.isFinite(body.runs) && body.runs > 1) {
794
1570
  runs = Math.min(Math.floor(body.runs), 25);
795
1571
  }
1572
+ if (body.stream === true) {
1573
+ stream = true;
1574
+ }
1575
+ if (body.captureTraces === true) {
1576
+ captureTraces = true;
1577
+ }
796
1578
  } catch {
797
1579
  }
1580
+ if (stream) {
1581
+ const evalRunId = `eval-${(0, import_node_crypto.randomUUID)()}`;
1582
+ const ac = new AbortController();
1583
+ activeRuns.set(evalRunId, ac);
1584
+ (async () => {
1585
+ try {
1586
+ if (runs > 1) {
1587
+ const runGroupId = (0, import_node_crypto.randomUUID)();
1588
+ const results = [];
1589
+ for (let r = 0; r < runs; r++) {
1590
+ if (ac.signal.aborted) break;
1591
+ const result = await runtime.runRegisteredEval(name, {
1592
+ metadata: { runGroupId, runIndex: r },
1593
+ signal: ac.signal,
1594
+ captureTraces,
1595
+ onProgress: (event) => {
1596
+ if (event.type === "run_done") return;
1597
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1598
+ ...event,
1599
+ run: r + 1,
1600
+ totalRuns: runs
1601
+ });
1602
+ }
1603
+ });
1604
+ results.push(result);
1605
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1606
+ type: "run_done",
1607
+ run: r + 1,
1608
+ totalRuns: runs
1609
+ });
1610
+ }
1611
+ if (results.length > 0) {
1612
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1613
+ type: "done",
1614
+ evalResultId: results[0].id,
1615
+ runGroupId
1616
+ });
1617
+ } else {
1618
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1619
+ type: "error",
1620
+ message: "All runs were cancelled"
1621
+ });
1622
+ }
1623
+ } else {
1624
+ const result = await runtime.runRegisteredEval(name, {
1625
+ signal: ac.signal,
1626
+ captureTraces,
1627
+ onProgress: (event) => {
1628
+ if (event.type === "run_done") return;
1629
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, event);
1630
+ }
1631
+ });
1632
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1633
+ type: "done",
1634
+ evalResultId: result.id
1635
+ });
1636
+ }
1637
+ } catch (err) {
1638
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1639
+ type: "error",
1640
+ message: redactErrorMessage(err, redactOn)
1641
+ });
1642
+ } finally {
1643
+ activeRuns.delete(evalRunId);
1644
+ }
1645
+ })();
1646
+ return c.json({ ok: true, data: { evalRunId } });
1647
+ }
798
1648
  try {
799
1649
  if (runs > 1) {
800
1650
  const { aggregateRuns } = await import("@axlsdk/eval");
@@ -802,27 +1652,53 @@ function createEvalRoutes(evalLoader) {
802
1652
  const results = [];
803
1653
  for (let r = 0; r < runs; r++) {
804
1654
  const result2 = await runtime.runRegisteredEval(name, {
805
- metadata: { runGroupId, runIndex: r }
1655
+ metadata: { runGroupId, runIndex: r },
1656
+ captureTraces
806
1657
  });
807
1658
  results.push(result2);
808
1659
  }
809
1660
  const typedResults = results;
810
1661
  const aggregate = aggregateRuns(typedResults);
811
1662
  const first = typedResults[0];
812
- const result = { ...first, _multiRun: { aggregate, allRuns: typedResults } };
813
- return c.json({ ok: true, data: result });
1663
+ const result = {
1664
+ ...first,
1665
+ _multiRun: { aggregate, allRuns: typedResults }
1666
+ };
1667
+ return c.json({
1668
+ ok: true,
1669
+ data: redactEvalResult(result, redactOn)
1670
+ });
814
1671
  } else {
815
- const result = await runtime.runRegisteredEval(name);
816
- return c.json({ ok: true, data: result });
1672
+ const result = await runtime.runRegisteredEval(name, { captureTraces });
1673
+ return c.json({
1674
+ ok: true,
1675
+ data: redactEvalResult(result, redactOn)
1676
+ });
817
1677
  }
818
1678
  } catch (err) {
819
- const message = err instanceof Error ? err.message : String(err);
820
- return c.json({ ok: false, error: { code: "EVAL_ERROR", message } }, 400);
1679
+ return c.json(
1680
+ { ok: false, error: { code: "EVAL_ERROR", message: redactErrorMessage(err, redactOn) } },
1681
+ 400
1682
+ );
821
1683
  }
822
1684
  });
1685
+ app6.post("/evals/runs/:evalRunId/cancel", (c) => {
1686
+ const evalRunId = c.req.param("evalRunId");
1687
+ const ac = activeRuns.get(evalRunId);
1688
+ if (!ac) {
1689
+ return c.json(
1690
+ { ok: false, error: { code: "NOT_FOUND", message: "No active eval run found" } },
1691
+ 404
1692
+ );
1693
+ }
1694
+ ac.abort();
1695
+ activeRuns.delete(evalRunId);
1696
+ return c.json({ ok: true, data: { cancelled: true } });
1697
+ });
823
1698
  app6.post("/evals/:name/rescore", async (c) => {
824
1699
  if (evalLoader) await evalLoader();
825
1700
  const runtime = c.get("runtime");
1701
+ const redactOn = runtime.isRedactEnabled();
826
1702
  const name = c.req.param("name");
827
1703
  const body = await c.req.json();
828
1704
  if (!body.resultId || typeof body.resultId !== "string") {
@@ -860,14 +1736,20 @@ function createEvalRoutes(evalLoader) {
860
1736
  timestamp: Date.now(),
861
1737
  data: result
862
1738
  });
863
- return c.json({ ok: true, data: result });
1739
+ return c.json({
1740
+ ok: true,
1741
+ data: redactEvalResult(result, redactOn)
1742
+ });
864
1743
  } catch (err) {
865
- const message = err instanceof Error ? err.message : String(err);
866
- return c.json({ ok: false, error: { code: "EVAL_ERROR", message } }, 400);
1744
+ return c.json(
1745
+ { ok: false, error: { code: "EVAL_ERROR", message: redactErrorMessage(err, redactOn) } },
1746
+ 400
1747
+ );
867
1748
  }
868
1749
  });
869
1750
  app6.post("/evals/compare", async (c) => {
870
1751
  const runtime = c.get("runtime");
1752
+ const redactOn = runtime.isRedactEnabled();
871
1753
  const body = await c.req.json();
872
1754
  const validateIdParam = (v, name) => {
873
1755
  if (typeof v === "string") return v === "" ? `${name} must be non-empty` : null;
@@ -935,8 +1817,13 @@ function createEvalRoutes(evalLoader) {
935
1817
  const result = await runtime.evalCompare(baseline, candidate, body.options);
936
1818
  return c.json({ ok: true, data: result });
937
1819
  } catch (err) {
938
- const message = err instanceof Error ? err.message : String(err);
939
- return c.json({ ok: false, error: { code: "COMPARE_FAILED", message } }, 400);
1820
+ return c.json(
1821
+ {
1822
+ ok: false,
1823
+ error: { code: "COMPARE_FAILED", message: redactErrorMessage(err, redactOn) }
1824
+ },
1825
+ 400
1826
+ );
940
1827
  }
941
1828
  });
942
1829
  app6.post("/evals/import", async (c) => {
@@ -998,7 +1885,11 @@ function createEvalRoutes(evalLoader) {
998
1885
  });
999
1886
  return c.json({ ok: true, data: { id, eval: evalName, timestamp } });
1000
1887
  });
1001
- return app6;
1888
+ function closeActiveRuns() {
1889
+ for (const ac of activeRuns.values()) ac.abort();
1890
+ activeRuns.clear();
1891
+ }
1892
+ return { app: app6, closeActiveRuns };
1002
1893
  }
1003
1894
 
1004
1895
  // src/server/routes/playground.ts
@@ -1036,13 +1927,14 @@ function createPlaygroundRoutes(connMgr) {
1036
1927
  const store = runtime.getStateStore();
1037
1928
  const history = await store.getSession(sessionId);
1038
1929
  history.push({ role: "user", content: body.message });
1930
+ const redactOn = runtime.isRedactEnabled();
1931
+ const broadcast = (event) => {
1932
+ connMgr.broadcastWithWildcard(`execution:${executionId}`, redactStreamEvent(event, redactOn));
1933
+ };
1039
1934
  const ctx = runtime.createContext({
1040
1935
  sessionHistory: history,
1041
1936
  onToken: (token) => {
1042
- connMgr.broadcastWithWildcard(`execution:${executionId}`, {
1043
- type: "token",
1044
- data: token
1045
- });
1937
+ broadcast({ type: "token", data: token });
1046
1938
  }
1047
1939
  });
1048
1940
  (async () => {
@@ -1051,12 +1943,9 @@ function createPlaygroundRoutes(connMgr) {
1051
1943
  const resultText = typeof result === "string" ? result : JSON.stringify(result);
1052
1944
  history.push({ role: "assistant", content: resultText });
1053
1945
  await store.saveSession(sessionId, history);
1054
- connMgr.broadcastWithWildcard(`execution:${executionId}`, {
1055
- type: "done",
1056
- data: resultText
1057
- });
1946
+ broadcast({ type: "done", data: resultText });
1058
1947
  } catch (err) {
1059
- connMgr.broadcastWithWildcard(`execution:${executionId}`, {
1948
+ broadcast({
1060
1949
  type: "error",
1061
1950
  message: err instanceof Error ? err.message : String(err)
1062
1951
  });
@@ -1070,12 +1959,78 @@ function createPlaygroundRoutes(connMgr) {
1070
1959
  return app6;
1071
1960
  }
1072
1961
 
1962
+ // src/server/routes/eval-trends.ts
1963
+ var import_hono12 = require("hono");
1964
+ function createEvalTrendsRoutes(aggregator) {
1965
+ const app6 = new import_hono12.Hono();
1966
+ app6.get("/eval-trends", (c) => {
1967
+ const window = parseWindowParam(c.req.query("window"));
1968
+ return c.json({ ok: true, data: aggregator.getSnapshot(window) });
1969
+ });
1970
+ return app6;
1971
+ }
1972
+
1973
+ // src/server/routes/workflow-stats.ts
1974
+ var import_hono13 = require("hono");
1975
+ function createWorkflowStatsRoutes(aggregator) {
1976
+ const app6 = new import_hono13.Hono();
1977
+ app6.get("/workflow-stats", (c) => {
1978
+ const window = parseWindowParam(c.req.query("window"));
1979
+ return c.json({ ok: true, data: enrichWorkflowStats(aggregator.getSnapshot(window)) });
1980
+ });
1981
+ return app6;
1982
+ }
1983
+
1984
+ // src/server/routes/trace-stats.ts
1985
+ var import_hono14 = require("hono");
1986
+ function createTraceStatsRoutes(aggregator) {
1987
+ const app6 = new import_hono14.Hono();
1988
+ app6.get("/trace-stats", (c) => {
1989
+ const window = parseWindowParam(c.req.query("window"));
1990
+ return c.json({ ok: true, data: aggregator.getSnapshot(window) });
1991
+ });
1992
+ return app6;
1993
+ }
1994
+
1073
1995
  // src/server/index.ts
1074
1996
  function createServer(options) {
1075
1997
  const { runtime, staticRoot, basePath = "", readOnly = false } = options;
1076
- const app6 = new import_hono12.Hono();
1998
+ const app6 = new import_hono15.Hono();
1077
1999
  const connMgr = new ConnectionManager();
1078
- const costAggregator = new CostAggregator(connMgr);
2000
+ const windows = ["24h", "7d", "30d", "all"];
2001
+ const costAggregator = new TraceAggregator({
2002
+ runtime,
2003
+ connMgr,
2004
+ channel: "costs",
2005
+ reducer: reduceCost,
2006
+ emptyState: emptyCostData,
2007
+ windows
2008
+ });
2009
+ const workflowStatsAggregator = new ExecutionAggregator({
2010
+ runtime,
2011
+ connMgr,
2012
+ channel: "workflow-stats",
2013
+ reducer: reduceWorkflowStats,
2014
+ emptyState: emptyWorkflowStatsData,
2015
+ windows,
2016
+ broadcastTransform: enrichWorkflowStats
2017
+ });
2018
+ const traceStatsAggregator = new TraceAggregator({
2019
+ runtime,
2020
+ connMgr,
2021
+ channel: "trace-stats",
2022
+ reducer: reduceTraceStats,
2023
+ emptyState: emptyTraceStatsData,
2024
+ windows
2025
+ });
2026
+ const evalTrendsAggregator = new EvalAggregator({
2027
+ runtime,
2028
+ connMgr,
2029
+ channel: "eval-trends",
2030
+ reducer: reduceEvalTrends,
2031
+ emptyState: emptyEvalTrendData,
2032
+ windows
2033
+ });
1079
2034
  if (options.cors !== false) {
1080
2035
  app6.use("*", (0, import_cors.cors)());
1081
2036
  }
@@ -1093,11 +2048,11 @@ function createServer(options) {
1093
2048
  /^PUT \/api\/memory(\/|$)/,
1094
2049
  /^DELETE \/api\/memory(\/|$)/,
1095
2050
  /^POST \/api\/decisions(\/|$)/,
1096
- /^POST \/api\/costs(\/|$)/,
1097
2051
  /^POST \/api\/tools(\/|$)/,
1098
2052
  /^POST \/api\/evals\/import$/,
1099
2053
  /^POST \/api\/evals\/[^/]+\/run$/,
1100
2054
  /^POST \/api\/evals\/[^/]+\/rescore$/,
2055
+ /^POST \/api\/evals\/runs\/[^/]+\/cancel$/,
1101
2056
  /^DELETE \/api\/evals\/history\/[^/]+$/,
1102
2057
  /^POST \/api\/playground(\/|$)/
1103
2058
  ];
@@ -1117,7 +2072,7 @@ function createServer(options) {
1117
2072
  await next();
1118
2073
  });
1119
2074
  }
1120
- const api = new import_hono12.Hono();
2075
+ const api = new import_hono15.Hono();
1121
2076
  api.route("/", createHealthRoutes(readOnly));
1122
2077
  api.route("/", createWorkflowRoutes(connMgr));
1123
2078
  api.route("/", executions_default);
@@ -1127,7 +2082,11 @@ function createServer(options) {
1127
2082
  api.route("/", memory_default);
1128
2083
  api.route("/", decisions_default);
1129
2084
  api.route("/", createCostRoutes(costAggregator));
1130
- api.route("/", createEvalRoutes(options.evalLoader));
2085
+ api.route("/", createEvalTrendsRoutes(evalTrendsAggregator));
2086
+ api.route("/", createWorkflowStatsRoutes(workflowStatsAggregator));
2087
+ api.route("/", createTraceStatsRoutes(traceStatsAggregator));
2088
+ const { app: evalApp, closeActiveRuns } = createEvalRoutes(connMgr, options.evalLoader);
2089
+ api.route("/", evalApp);
1131
2090
  api.route("/", createPlaygroundRoutes(connMgr));
1132
2091
  app6.route("/api", api);
1133
2092
  const traceListener = (event) => {
@@ -1135,12 +2094,17 @@ function createServer(options) {
1135
2094
  if (traceEvent.executionId) {
1136
2095
  connMgr.broadcastWithWildcard(`trace:${traceEvent.executionId}`, traceEvent);
1137
2096
  }
1138
- costAggregator.onTrace(traceEvent);
1139
2097
  if (traceEvent.type === "await_human") {
1140
2098
  connMgr.broadcast("decisions", traceEvent);
1141
2099
  }
1142
2100
  };
1143
2101
  runtime.on("trace", traceListener);
2102
+ const aggregatorStartPromise = Promise.all([
2103
+ costAggregator.start(),
2104
+ workflowStatsAggregator.start(),
2105
+ traceStatsAggregator.start(),
2106
+ evalTrendsAggregator.start()
2107
+ ]).catch((err) => console.error("[axl-studio] aggregator start failed:", err));
1144
2108
  if (staticRoot) {
1145
2109
  const indexPath = (0, import_node_path.resolve)(staticRoot, "index.html");
1146
2110
  let spaHtml;
@@ -1190,9 +2154,22 @@ function createServer(options) {
1190
2154
  app: app6,
1191
2155
  connMgr,
1192
2156
  costAggregator,
2157
+ workflowStatsAggregator,
2158
+ traceStatsAggregator,
2159
+ evalTrendsAggregator,
2160
+ aggregatorStartPromise,
1193
2161
  /** Create WS handlers. Call before registering static/SPA routes are reached. */
1194
2162
  createWsHandlers: () => createWsHandlers(connMgr),
1195
- traceListener
2163
+ traceListener,
2164
+ /** Abort all active streaming eval runs. */
2165
+ closeActiveRuns,
2166
+ /** Close all aggregators (clear intervals and unsubscribe listeners). */
2167
+ closeAggregators: () => {
2168
+ costAggregator.close();
2169
+ workflowStatsAggregator.close();
2170
+ traceStatsAggregator.close();
2171
+ evalTrendsAggregator.close();
2172
+ }
1196
2173
  };
1197
2174
  }
1198
2175
 
@@ -1374,7 +2351,13 @@ async function registerConditions(conditions) {
1374
2351
  // src/middleware.ts
1375
2352
  var import_meta2 = {};
1376
2353
  function createStudioMiddleware(options) {
1377
- const { runtime, serveClient = true, verifyUpgrade, readOnly = false } = options;
2354
+ const {
2355
+ runtime,
2356
+ serveClient = true,
2357
+ verifyUpgrade,
2358
+ readOnly = false,
2359
+ filterTraceEvent
2360
+ } = options;
1378
2361
  const basePath = normalizeBasePath(options.basePath);
1379
2362
  const staticRoot = serveClient ? resolveClientDist() : void 0;
1380
2363
  if (serveClient && !staticRoot) {
@@ -1384,7 +2367,7 @@ function createStudioMiddleware(options) {
1384
2367
  );
1385
2368
  }
1386
2369
  const evalLoader = options.evals ? createEvalLoader(options.evals, runtime) : void 0;
1387
- const { app: app6, connMgr, traceListener } = createServer({
2370
+ const { app: app6, connMgr, traceListener, closeActiveRuns, closeAggregators } = createServer({
1388
2371
  runtime,
1389
2372
  staticRoot,
1390
2373
  basePath,
@@ -1393,6 +2376,9 @@ function createStudioMiddleware(options) {
1393
2376
  // Host framework owns CORS policy
1394
2377
  evalLoader
1395
2378
  });
2379
+ if (filterTraceEvent) {
2380
+ connMgr.setFilter(filterTraceEvent);
2381
+ }
1396
2382
  if (process.env.NODE_ENV === "production" && !verifyUpgrade) {
1397
2383
  console.warn(
1398
2384
  "[axl-studio] WARNING: Studio middleware mounted in production without verifyUpgrade. WebSocket connections are not authenticated. All registered workflows, tools, and agents are accessible. See https://axlsdk.com/docs/studio/security"
@@ -1433,7 +2419,7 @@ function createStudioMiddleware(options) {
1433
2419
  }
1434
2420
  });
1435
2421
  }
1436
- function handleWebSocket(ws) {
2422
+ function handleWebSocket(ws, metadata) {
1437
2423
  if (closed) {
1438
2424
  ws.close();
1439
2425
  return;
@@ -1443,6 +2429,9 @@ function createStudioMiddleware(options) {
1443
2429
  close: () => ws.close()
1444
2430
  };
1445
2431
  connMgr.add(socket);
2432
+ if (metadata !== void 0) {
2433
+ connMgr.setMetadata(socket, metadata);
2434
+ }
1446
2435
  ws.on("message", (raw) => {
1447
2436
  const reply = handleWsMessage(String(raw), socket, connMgr);
1448
2437
  if (reply) ws.send(reply);
@@ -1465,32 +2454,42 @@ function createStudioMiddleware(options) {
1465
2454
  upgradeHandler = async (req, socket, head) => {
1466
2455
  const pathname = new URL(req.url, `http://${req.headers.host}`).pathname;
1467
2456
  if (pathname !== wsPath) return;
2457
+ if (closed) {
2458
+ socket.destroy();
2459
+ return;
2460
+ }
2461
+ let connectionMetadata;
1468
2462
  if (verifyUpgrade) {
1469
2463
  try {
1470
- const allowed = await verifyUpgrade(req);
2464
+ const result = await verifyUpgrade(req);
2465
+ const allowed = typeof result === "boolean" ? result : result.allowed;
1471
2466
  if (!allowed) {
1472
2467
  socket.write("HTTP/1.1 401 Unauthorized\r\n\r\n");
1473
2468
  socket.destroy();
1474
2469
  return;
1475
2470
  }
2471
+ if (typeof result === "object" && result !== null) {
2472
+ connectionMetadata = result.metadata;
2473
+ }
1476
2474
  } catch {
1477
2475
  socket.write("HTTP/1.1 403 Forbidden\r\n\r\n");
1478
2476
  socket.destroy();
1479
2477
  return;
1480
2478
  }
1481
2479
  }
1482
- if (!wss) {
2480
+ if (closed || !wss) {
1483
2481
  socket.destroy();
1484
2482
  return;
1485
2483
  }
1486
2484
  wss.handleUpgrade(req, socket, head, (ws) => {
1487
- handleWebSocket(ws);
2485
+ handleWebSocket(ws, connectionMetadata);
1488
2486
  });
1489
2487
  };
1490
2488
  server.on("upgrade", upgradeHandler);
1491
2489
  }
1492
2490
  function close() {
1493
2491
  closed = true;
2492
+ closeActiveRuns();
1494
2493
  connMgr.closeAll();
1495
2494
  if (upgradeHandler && serverRef) {
1496
2495
  serverRef.removeListener("upgrade", upgradeHandler);
@@ -1504,6 +2503,7 @@ function createStudioMiddleware(options) {
1504
2503
  if (traceListener) {
1505
2504
  runtime.removeListener("trace", traceListener);
1506
2505
  }
2506
+ closeAggregators();
1507
2507
  }
1508
2508
  return {
1509
2509
  handler,