@axlsdk/studio 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +69 -10
- package/dist/chunk-RE6VPUXA.js +2213 -0
- package/dist/chunk-RE6VPUXA.js.map +1 -0
- package/dist/cli.cjs +1191 -143
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +1 -1
- package/dist/client/assets/index-ClajLxib.js +288 -0
- package/dist/client/assets/index-DnHL_gtF.css +1 -0
- package/dist/client/index.html +2 -2
- package/dist/connection-manager-DAuqk9lM.d.cts +166 -0
- package/dist/connection-manager-DAuqk9lM.d.ts +166 -0
- package/dist/middleware.cjs +1222 -150
- package/dist/middleware.cjs.map +1 -1
- package/dist/middleware.d.cts +76 -6
- package/dist/middleware.d.ts +76 -6
- package/dist/middleware.js +32 -8
- package/dist/middleware.js.map +1 -1
- package/dist/server/index.cjs +1194 -142
- package/dist/server/index.cjs.map +1 -1
- package/dist/server/index.d.cts +171 -28
- package/dist/server/index.d.ts +171 -28
- package/dist/server/index.js +7 -3
- package/package.json +13 -9
- package/dist/chunk-HUKUQDYL.js +0 -1163
- package/dist/chunk-HUKUQDYL.js.map +0 -1
- package/dist/client/assets/index-7aDhMztu.css +0 -1
- package/dist/client/assets/index-Bzr3vDPz.js +0 -255
- package/dist/connection-manager-B7AWpsCD.d.cts +0 -81
- package/dist/connection-manager-B7AWpsCD.d.ts +0 -81
package/dist/cli.cjs
CHANGED
|
@@ -32,16 +32,140 @@ var import_node_ws = require("@hono/node-ws");
|
|
|
32
32
|
// src/server/index.ts
|
|
33
33
|
var import_node_fs = require("fs");
|
|
34
34
|
var import_node_path = require("path");
|
|
35
|
-
var
|
|
35
|
+
var import_hono15 = require("hono");
|
|
36
36
|
var import_cors = require("hono/cors");
|
|
37
37
|
var import_serve_static = require("@hono/node-server/serve-static");
|
|
38
38
|
|
|
39
|
+
// src/server/redact.ts
|
|
40
|
+
var import_axl = require("@axlsdk/axl");
|
|
41
|
+
var REDACTED = "[redacted]";
|
|
42
|
+
var SAFE_ERROR_NAMES = /* @__PURE__ */ new Set([
|
|
43
|
+
"QuorumNotMet",
|
|
44
|
+
"NoConsensus",
|
|
45
|
+
"TimeoutError",
|
|
46
|
+
"MaxTurnsError",
|
|
47
|
+
"BudgetExceededError",
|
|
48
|
+
"ToolDenied"
|
|
49
|
+
]);
|
|
50
|
+
function redactErrorMessage(err, redact) {
|
|
51
|
+
const raw = err instanceof Error ? err.message : String(err);
|
|
52
|
+
if (!redact) return raw;
|
|
53
|
+
const name = err instanceof Error ? err.name : "";
|
|
54
|
+
return SAFE_ERROR_NAMES.has(name) ? raw : REDACTED;
|
|
55
|
+
}
|
|
56
|
+
function redactValue(value, redact) {
|
|
57
|
+
if (!redact) return value;
|
|
58
|
+
return REDACTED;
|
|
59
|
+
}
|
|
60
|
+
function redactExecutionInfo(info, redact) {
|
|
61
|
+
if (!redact) return info;
|
|
62
|
+
return {
|
|
63
|
+
...info,
|
|
64
|
+
...info.result !== void 0 ? { result: REDACTED } : {},
|
|
65
|
+
...info.error !== void 0 ? { error: REDACTED } : {},
|
|
66
|
+
events: info.events.map((e) => redactStreamEvent(e, true))
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
function redactExecutionList(infos, redact) {
|
|
70
|
+
if (!redact) return infos;
|
|
71
|
+
return infos.map((info) => redactExecutionInfo(info, redact));
|
|
72
|
+
}
|
|
73
|
+
function redactMemoryValue(value, redact) {
|
|
74
|
+
if (!redact) return value;
|
|
75
|
+
return REDACTED;
|
|
76
|
+
}
|
|
77
|
+
function redactMemoryList(entries, redact) {
|
|
78
|
+
if (!redact) return entries;
|
|
79
|
+
return entries.map((entry) => ({ key: entry.key, value: REDACTED }));
|
|
80
|
+
}
|
|
81
|
+
function redactChatMessage(msg) {
|
|
82
|
+
const scrubbed = {
|
|
83
|
+
role: msg.role,
|
|
84
|
+
content: REDACTED,
|
|
85
|
+
...msg.name !== void 0 ? { name: msg.name } : {},
|
|
86
|
+
...msg.tool_call_id !== void 0 ? { tool_call_id: msg.tool_call_id } : {},
|
|
87
|
+
...msg.tool_calls !== void 0 ? {
|
|
88
|
+
tool_calls: msg.tool_calls.map((tc) => ({
|
|
89
|
+
id: tc.id,
|
|
90
|
+
type: tc.type,
|
|
91
|
+
function: {
|
|
92
|
+
name: tc.function.name,
|
|
93
|
+
arguments: REDACTED
|
|
94
|
+
}
|
|
95
|
+
}))
|
|
96
|
+
} : {}
|
|
97
|
+
// providerMetadata deliberately omitted — opaque content.
|
|
98
|
+
};
|
|
99
|
+
return scrubbed;
|
|
100
|
+
}
|
|
101
|
+
function redactSessionHistory(history, redact) {
|
|
102
|
+
if (!redact) return history;
|
|
103
|
+
return history.map(redactChatMessage);
|
|
104
|
+
}
|
|
105
|
+
function redactStreamEvent(event, redact) {
|
|
106
|
+
if (!redact) return event;
|
|
107
|
+
return (0, import_axl.redactEvent)(event);
|
|
108
|
+
}
|
|
109
|
+
function redactEvalItem(item) {
|
|
110
|
+
const scrubbed = {
|
|
111
|
+
...item,
|
|
112
|
+
input: REDACTED,
|
|
113
|
+
output: REDACTED,
|
|
114
|
+
...item.annotations !== void 0 ? { annotations: REDACTED } : {},
|
|
115
|
+
...item.error !== void 0 ? { error: REDACTED } : {},
|
|
116
|
+
...item.scorerErrors !== void 0 ? { scorerErrors: item.scorerErrors.map(() => REDACTED) } : {}
|
|
117
|
+
};
|
|
118
|
+
if (item.scoreDetails) {
|
|
119
|
+
const detailsOut = {};
|
|
120
|
+
for (const [name, detail] of Object.entries(item.scoreDetails)) {
|
|
121
|
+
detailsOut[name] = {
|
|
122
|
+
score: detail.score,
|
|
123
|
+
...detail.duration !== void 0 ? { duration: detail.duration } : {},
|
|
124
|
+
...detail.cost !== void 0 ? { cost: detail.cost } : {}
|
|
125
|
+
// metadata deliberately omitted — may contain LLM scorer reasoning
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
scrubbed.scoreDetails = detailsOut;
|
|
129
|
+
}
|
|
130
|
+
return scrubbed;
|
|
131
|
+
}
|
|
132
|
+
function redactEvalResult(result, redact) {
|
|
133
|
+
if (!redact) return result;
|
|
134
|
+
return {
|
|
135
|
+
...result,
|
|
136
|
+
items: result.items.map(redactEvalItem)
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
function redactEvalHistoryEntry(entry, redact) {
|
|
140
|
+
if (!redact) return entry;
|
|
141
|
+
return {
|
|
142
|
+
...entry,
|
|
143
|
+
data: redactEvalResult(entry.data, redact)
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
function redactEvalHistoryList(entries, redact) {
|
|
147
|
+
if (!redact) return entries;
|
|
148
|
+
return entries.map((e) => redactEvalHistoryEntry(e, redact));
|
|
149
|
+
}
|
|
150
|
+
function redactPendingDecision(decision, redact) {
|
|
151
|
+
if (!redact) return decision;
|
|
152
|
+
return {
|
|
153
|
+
...decision,
|
|
154
|
+
prompt: REDACTED,
|
|
155
|
+
...decision.metadata !== void 0 ? { metadata: { redacted: true } } : {}
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
function redactPendingDecisionList(decisions, redact) {
|
|
159
|
+
if (!redact) return decisions;
|
|
160
|
+
return decisions.map((d) => redactPendingDecision(d, redact));
|
|
161
|
+
}
|
|
162
|
+
|
|
39
163
|
// src/server/middleware/error-handler.ts
|
|
40
164
|
async function errorHandler(c, next) {
|
|
41
165
|
try {
|
|
42
166
|
await next();
|
|
43
167
|
} catch (err) {
|
|
44
|
-
const
|
|
168
|
+
const rawMessage = err instanceof Error ? err.message : String(err);
|
|
45
169
|
const code = err.code ?? "INTERNAL_ERROR";
|
|
46
170
|
let status = 500;
|
|
47
171
|
if ("status" in err) {
|
|
@@ -49,46 +173,104 @@ async function errorHandler(c, next) {
|
|
|
49
173
|
if (typeof errStatus === "number" && errStatus >= 400 && errStatus < 600) {
|
|
50
174
|
status = errStatus;
|
|
51
175
|
}
|
|
52
|
-
} else if (code === "NOT_FOUND" ||
|
|
176
|
+
} else if (code === "NOT_FOUND" || rawMessage.includes("not found") || rawMessage.includes("not registered")) {
|
|
53
177
|
status = 404;
|
|
54
|
-
} else if (code === "VALIDATION_ERROR" ||
|
|
178
|
+
} else if (code === "VALIDATION_ERROR" || rawMessage.includes("Expected") || rawMessage.includes("invalid")) {
|
|
55
179
|
status = 400;
|
|
56
180
|
}
|
|
181
|
+
const runtime = c.get("runtime");
|
|
182
|
+
const redactOn = runtime?.isRedactEnabled?.() ?? false;
|
|
57
183
|
const body = {
|
|
58
184
|
ok: false,
|
|
59
|
-
error: { code, message }
|
|
185
|
+
error: { code, message: redactErrorMessage(err, redactOn) }
|
|
60
186
|
};
|
|
61
187
|
return c.json(body, status);
|
|
62
188
|
}
|
|
63
189
|
}
|
|
64
190
|
|
|
65
191
|
// src/server/ws/connection-manager.ts
|
|
192
|
+
var BUFFER_TTL_MS = 3e4;
|
|
193
|
+
var DEFAULT_MAX_BUFFER_EVENTS = 1e3;
|
|
194
|
+
var DEFAULT_MAX_BUFFER_BYTES = 4 * 1024 * 1024;
|
|
195
|
+
var DEFAULT_MAX_ACTIVE_BUFFERS = 256;
|
|
196
|
+
var UNBUFFERED_EVENT_TYPES = /* @__PURE__ */ new Set(["token", "partial_object"]);
|
|
197
|
+
var MAX_WS_FRAME_BYTES = 65536;
|
|
66
198
|
function isBufferedChannel(channel) {
|
|
67
|
-
return channel.startsWith("execution:");
|
|
199
|
+
return channel.startsWith("execution:") || channel.startsWith("eval:");
|
|
200
|
+
}
|
|
201
|
+
function truncateIfOversized(msg, channel, data) {
|
|
202
|
+
const msgBytes = Buffer.byteLength(msg, "utf8");
|
|
203
|
+
if (msgBytes <= MAX_WS_FRAME_BYTES) return msg;
|
|
204
|
+
const event = data ?? {};
|
|
205
|
+
const truncated = {
|
|
206
|
+
type: "event",
|
|
207
|
+
channel,
|
|
208
|
+
data: {
|
|
209
|
+
...event,
|
|
210
|
+
data: {
|
|
211
|
+
__truncated: true,
|
|
212
|
+
originalBytes: msgBytes,
|
|
213
|
+
maxBytes: MAX_WS_FRAME_BYTES,
|
|
214
|
+
hint: "Event exceeded WS frame budget (likely a verbose agent_call with a large messages[] snapshot). Fetch via REST if you need the full payload."
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
};
|
|
218
|
+
return JSON.stringify(truncated);
|
|
68
219
|
}
|
|
69
|
-
var BUFFER_TTL_MS = 3e4;
|
|
70
|
-
var MAX_BUFFER_EVENTS = 500;
|
|
71
220
|
var ConnectionManager = class {
|
|
72
221
|
/** channel -> set of WS connections */
|
|
73
222
|
channels = /* @__PURE__ */ new Map();
|
|
74
|
-
/** ws ->
|
|
223
|
+
/** ws -> subscribed channels + optional integrator-supplied metadata */
|
|
75
224
|
connections = /* @__PURE__ */ new Map();
|
|
76
225
|
/** channel -> replay buffer for execution streams */
|
|
77
226
|
buffers = /* @__PURE__ */ new Map();
|
|
78
227
|
maxConnections = 100;
|
|
228
|
+
filter;
|
|
229
|
+
/** Resolved replay-buffer caps. Per-instance so embedders can dial them
|
|
230
|
+
* without monkey-patching module-level constants. */
|
|
231
|
+
maxEventsPerBuffer;
|
|
232
|
+
maxBytesPerBuffer;
|
|
233
|
+
maxActiveBuffers;
|
|
234
|
+
constructor(bufferCaps) {
|
|
235
|
+
const validatePositiveInt = (key, value) => {
|
|
236
|
+
if (value === void 0) return;
|
|
237
|
+
if (!Number.isFinite(value) || !Number.isInteger(value) || value < 1) {
|
|
238
|
+
throw new RangeError(`bufferCaps.${key} must be a positive integer (>= 1); got ${value}`);
|
|
239
|
+
}
|
|
240
|
+
};
|
|
241
|
+
validatePositiveInt("maxEventsPerBuffer", bufferCaps?.maxEventsPerBuffer);
|
|
242
|
+
validatePositiveInt("maxBytesPerBuffer", bufferCaps?.maxBytesPerBuffer);
|
|
243
|
+
validatePositiveInt("maxActiveBuffers", bufferCaps?.maxActiveBuffers);
|
|
244
|
+
this.maxEventsPerBuffer = bufferCaps?.maxEventsPerBuffer ?? DEFAULT_MAX_BUFFER_EVENTS;
|
|
245
|
+
this.maxBytesPerBuffer = bufferCaps?.maxBytesPerBuffer ?? DEFAULT_MAX_BUFFER_BYTES;
|
|
246
|
+
this.maxActiveBuffers = bufferCaps?.maxActiveBuffers ?? DEFAULT_MAX_ACTIVE_BUFFERS;
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Register a broadcast filter. Called once at middleware construction.
|
|
250
|
+
* The filter runs on every outbound event and can drop or deliver based
|
|
251
|
+
* on the destination connection's metadata.
|
|
252
|
+
*/
|
|
253
|
+
setFilter(filter) {
|
|
254
|
+
this.filter = filter;
|
|
255
|
+
}
|
|
256
|
+
/** Attach integrator-supplied metadata to an already-added connection. */
|
|
257
|
+
setMetadata(ws, metadata) {
|
|
258
|
+
const entry = this.connections.get(ws);
|
|
259
|
+
if (entry) entry.metadata = metadata;
|
|
260
|
+
}
|
|
79
261
|
/** Register a new WS connection. */
|
|
80
262
|
add(ws) {
|
|
81
263
|
if (this.connections.size >= this.maxConnections) {
|
|
82
264
|
ws.close?.();
|
|
83
265
|
return;
|
|
84
266
|
}
|
|
85
|
-
this.connections.set(ws, /* @__PURE__ */ new Set());
|
|
267
|
+
this.connections.set(ws, { channels: /* @__PURE__ */ new Set() });
|
|
86
268
|
}
|
|
87
269
|
/** Remove a WS connection and all its subscriptions. */
|
|
88
270
|
remove(ws) {
|
|
89
|
-
const
|
|
90
|
-
if (
|
|
91
|
-
for (const ch of channels) {
|
|
271
|
+
const entry = this.connections.get(ws);
|
|
272
|
+
if (entry) {
|
|
273
|
+
for (const ch of entry.channels) {
|
|
92
274
|
this.channels.get(ch)?.delete(ws);
|
|
93
275
|
if (this.channels.get(ch)?.size === 0) {
|
|
94
276
|
this.channels.delete(ch);
|
|
@@ -106,12 +288,20 @@ var ConnectionManager = class {
|
|
|
106
288
|
this.channels.set(channel, subs);
|
|
107
289
|
}
|
|
108
290
|
subs.add(ws);
|
|
109
|
-
this.connections.get(ws).add(channel);
|
|
291
|
+
this.connections.get(ws).channels.add(channel);
|
|
110
292
|
const buffer = this.buffers.get(channel);
|
|
111
293
|
if (buffer) {
|
|
112
|
-
|
|
294
|
+
const metadata = this.connections.get(ws)?.metadata;
|
|
295
|
+
for (const event of buffer.events) {
|
|
296
|
+
if (this.filter) {
|
|
297
|
+
try {
|
|
298
|
+
if (!this.filter(event.data, metadata)) continue;
|
|
299
|
+
} catch {
|
|
300
|
+
continue;
|
|
301
|
+
}
|
|
302
|
+
}
|
|
113
303
|
try {
|
|
114
|
-
ws.send(msg);
|
|
304
|
+
ws.send(event.msg);
|
|
115
305
|
} catch {
|
|
116
306
|
this.remove(ws);
|
|
117
307
|
return;
|
|
@@ -125,21 +315,49 @@ var ConnectionManager = class {
|
|
|
125
315
|
if (this.channels.get(channel)?.size === 0) {
|
|
126
316
|
this.channels.delete(channel);
|
|
127
317
|
}
|
|
128
|
-
this.connections.get(ws)?.delete(channel);
|
|
318
|
+
this.connections.get(ws)?.channels.delete(channel);
|
|
129
319
|
}
|
|
130
320
|
/** Broadcast data to all subscribers of a channel. Buffers events for execution channels. */
|
|
131
321
|
broadcast(channel, data) {
|
|
132
|
-
const msg =
|
|
322
|
+
const msg = truncateIfOversized(
|
|
323
|
+
JSON.stringify({ type: "event", channel, data }),
|
|
324
|
+
channel,
|
|
325
|
+
data
|
|
326
|
+
);
|
|
133
327
|
if (isBufferedChannel(channel)) {
|
|
134
328
|
let buffer = this.buffers.get(channel);
|
|
135
329
|
if (!buffer) {
|
|
136
|
-
|
|
330
|
+
if (this.buffers.size >= this.maxActiveBuffers) {
|
|
331
|
+
let victim;
|
|
332
|
+
for (const [ch, buf] of this.buffers) {
|
|
333
|
+
if (buf.complete) {
|
|
334
|
+
victim = ch;
|
|
335
|
+
break;
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
if (victim === void 0) {
|
|
339
|
+
victim = this.buffers.keys().next().value;
|
|
340
|
+
}
|
|
341
|
+
if (victim !== void 0) {
|
|
342
|
+
const old = this.buffers.get(victim);
|
|
343
|
+
if (old?.timer) clearTimeout(old.timer);
|
|
344
|
+
this.buffers.delete(victim);
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
buffer = { events: [], complete: false, bytes: 0 };
|
|
137
348
|
this.buffers.set(channel, buffer);
|
|
138
349
|
}
|
|
139
350
|
const event = data;
|
|
140
351
|
const isTerminal = event.type === "done" || event.type === "error";
|
|
141
|
-
|
|
142
|
-
|
|
352
|
+
const isUnbuffered = event.type !== void 0 && UNBUFFERED_EVENT_TYPES.has(event.type);
|
|
353
|
+
if (!isUnbuffered) {
|
|
354
|
+
const msgBytes = Buffer.byteLength(msg, "utf8");
|
|
355
|
+
const atCountCap = buffer.events.length >= this.maxEventsPerBuffer;
|
|
356
|
+
const atByteCap = buffer.bytes + msgBytes > this.maxBytesPerBuffer;
|
|
357
|
+
if (isTerminal || !atCountCap && !atByteCap) {
|
|
358
|
+
buffer.events.push({ msg, data });
|
|
359
|
+
buffer.bytes += msgBytes;
|
|
360
|
+
}
|
|
143
361
|
}
|
|
144
362
|
if (isTerminal) {
|
|
145
363
|
buffer.complete = true;
|
|
@@ -152,6 +370,14 @@ var ConnectionManager = class {
|
|
|
152
370
|
const subs = this.channels.get(channel);
|
|
153
371
|
if (!subs || subs.size === 0) return;
|
|
154
372
|
for (const ws of [...subs]) {
|
|
373
|
+
if (this.filter) {
|
|
374
|
+
const metadata = this.connections.get(ws)?.metadata;
|
|
375
|
+
try {
|
|
376
|
+
if (!this.filter(data, metadata)) continue;
|
|
377
|
+
} catch {
|
|
378
|
+
continue;
|
|
379
|
+
}
|
|
380
|
+
}
|
|
155
381
|
try {
|
|
156
382
|
ws.send(msg);
|
|
157
383
|
} catch {
|
|
@@ -167,8 +393,20 @@ var ConnectionManager = class {
|
|
|
167
393
|
const wildcardChannel = channel.substring(0, colonIdx) + ":*";
|
|
168
394
|
const subs = this.channels.get(wildcardChannel);
|
|
169
395
|
if (!subs || subs.size === 0) return;
|
|
170
|
-
const msg =
|
|
396
|
+
const msg = truncateIfOversized(
|
|
397
|
+
JSON.stringify({ type: "event", channel, data }),
|
|
398
|
+
channel,
|
|
399
|
+
data
|
|
400
|
+
);
|
|
171
401
|
for (const ws of [...subs]) {
|
|
402
|
+
if (this.filter) {
|
|
403
|
+
const metadata = this.connections.get(ws)?.metadata;
|
|
404
|
+
try {
|
|
405
|
+
if (!this.filter(data, metadata)) continue;
|
|
406
|
+
} catch {
|
|
407
|
+
continue;
|
|
408
|
+
}
|
|
409
|
+
}
|
|
172
410
|
try {
|
|
173
411
|
ws.send(msg);
|
|
174
412
|
} catch {
|
|
@@ -200,11 +438,11 @@ var ConnectionManager = class {
|
|
|
200
438
|
};
|
|
201
439
|
|
|
202
440
|
// src/server/ws/protocol.ts
|
|
203
|
-
var VALID_CHANNEL_PREFIXES = ["execution:", "trace:"];
|
|
204
|
-
var VALID_EXACT_CHANNELS = ["costs", "decisions"];
|
|
441
|
+
var VALID_CHANNEL_PREFIXES = ["execution:", "trace:", "eval:"];
|
|
442
|
+
var VALID_EXACT_CHANNELS = ["costs", "decisions", "eval-trends", "workflow-stats", "trace-stats"];
|
|
205
443
|
var MAX_CHANNEL_LENGTH = 256;
|
|
206
444
|
function handleWsMessage(raw, socket, connMgr) {
|
|
207
|
-
if (raw
|
|
445
|
+
if (Buffer.byteLength(raw, "utf8") > MAX_WS_FRAME_BYTES) {
|
|
208
446
|
return JSON.stringify({ type: "error", message: "Message too large" });
|
|
209
447
|
}
|
|
210
448
|
let msg;
|
|
@@ -264,68 +502,575 @@ function createWsHandlers(connMgr) {
|
|
|
264
502
|
};
|
|
265
503
|
}
|
|
266
504
|
|
|
267
|
-
// src/server/
|
|
268
|
-
var
|
|
269
|
-
|
|
505
|
+
// src/server/aggregates/aggregate-snapshots.ts
|
|
506
|
+
var WINDOW_MS = {
|
|
507
|
+
"24h": 24 * 60 * 60 * 1e3,
|
|
508
|
+
"7d": 7 * 24 * 60 * 60 * 1e3,
|
|
509
|
+
"30d": 30 * 24 * 60 * 60 * 1e3,
|
|
510
|
+
all: Number.POSITIVE_INFINITY
|
|
511
|
+
};
|
|
512
|
+
function withinWindow(ts, window, now) {
|
|
513
|
+
return ts >= now - WINDOW_MS[window];
|
|
514
|
+
}
|
|
515
|
+
var REBUILD_INTERVAL_MS = 5 * 6e4;
|
|
516
|
+
var ALL_WINDOWS = new Set(Object.keys(WINDOW_MS));
|
|
517
|
+
function parseWindowParam(raw, fallback = "7d") {
|
|
518
|
+
return raw && ALL_WINDOWS.has(raw) ? raw : fallback;
|
|
519
|
+
}
|
|
520
|
+
var AggregateSnapshots = class {
|
|
521
|
+
constructor(windows, emptyState, connMgr, channel, broadcastTransform) {
|
|
522
|
+
this.windows = windows;
|
|
523
|
+
this.emptyState = emptyState;
|
|
270
524
|
this.connMgr = connMgr;
|
|
525
|
+
this.channel = channel;
|
|
526
|
+
this.broadcastTransform = broadcastTransform;
|
|
527
|
+
this.snapshots = new Map(windows.map((w) => [w, emptyState()]));
|
|
528
|
+
}
|
|
529
|
+
snapshots;
|
|
530
|
+
/** Replace all snapshots atomically — used after a full rebuild. */
|
|
531
|
+
replace(fresh) {
|
|
532
|
+
this.snapshots = fresh;
|
|
533
|
+
this.broadcast();
|
|
534
|
+
}
|
|
535
|
+
/** Apply a reducer update to every window where `ts` falls inside the window. */
|
|
536
|
+
fold(ts, update) {
|
|
537
|
+
const now = Date.now();
|
|
538
|
+
let changed = false;
|
|
539
|
+
for (const window of this.windows) {
|
|
540
|
+
if (withinWindow(ts, window, now)) {
|
|
541
|
+
const prev = this.snapshots.get(window);
|
|
542
|
+
this.snapshots.set(window, update(prev));
|
|
543
|
+
changed = true;
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
if (changed) this.broadcast();
|
|
271
547
|
}
|
|
272
|
-
|
|
548
|
+
get(window) {
|
|
549
|
+
return this.snapshots.get(window) ?? this.emptyState();
|
|
550
|
+
}
|
|
551
|
+
getAll() {
|
|
552
|
+
return Object.fromEntries(this.snapshots);
|
|
553
|
+
}
|
|
554
|
+
broadcast() {
|
|
555
|
+
const snapshots = this.broadcastTransform ? Object.fromEntries(
|
|
556
|
+
this.windows.map((w) => [w, this.broadcastTransform(this.snapshots.get(w))])
|
|
557
|
+
) : this.getAll();
|
|
558
|
+
this.connMgr.broadcast(this.channel, {
|
|
559
|
+
snapshots,
|
|
560
|
+
updatedAt: Date.now()
|
|
561
|
+
});
|
|
562
|
+
}
|
|
563
|
+
};
|
|
564
|
+
|
|
565
|
+
// src/server/aggregates/trace-aggregator.ts
|
|
566
|
+
var TraceAggregator = class {
|
|
567
|
+
snaps;
|
|
568
|
+
interval;
|
|
569
|
+
listener;
|
|
570
|
+
options;
|
|
571
|
+
constructor(options) {
|
|
572
|
+
this.options = options;
|
|
573
|
+
this.snaps = new AggregateSnapshots(
|
|
574
|
+
options.windows,
|
|
575
|
+
options.emptyState,
|
|
576
|
+
options.connMgr,
|
|
577
|
+
options.channel,
|
|
578
|
+
options.broadcastTransform
|
|
579
|
+
);
|
|
580
|
+
}
|
|
581
|
+
async start() {
|
|
582
|
+
await this.rebuild();
|
|
583
|
+
this.listener = (event) => {
|
|
584
|
+
this.snaps.fold(event.timestamp, (prev) => this.options.reducer(prev, event));
|
|
585
|
+
};
|
|
586
|
+
this.options.runtime.on("trace", this.listener);
|
|
587
|
+
this.interval = setInterval(
|
|
588
|
+
() => this.rebuild().catch((err) => console.error("[axl-studio] rebuild failed:", err)),
|
|
589
|
+
REBUILD_INTERVAL_MS
|
|
590
|
+
);
|
|
591
|
+
}
|
|
592
|
+
async rebuild() {
|
|
593
|
+
const executions = await this.options.runtime.getExecutions();
|
|
594
|
+
const cap = this.options.executionCap ?? 2e3;
|
|
595
|
+
const capped = executions.slice(0, cap);
|
|
596
|
+
const now = Date.now();
|
|
597
|
+
const fresh = new Map(
|
|
598
|
+
this.options.windows.map((w) => [w, this.options.emptyState()])
|
|
599
|
+
);
|
|
600
|
+
for (const exec of capped) {
|
|
601
|
+
for (const event of exec.events) {
|
|
602
|
+
for (const window of this.options.windows) {
|
|
603
|
+
if (withinWindow(event.timestamp, window, now)) {
|
|
604
|
+
fresh.set(window, this.options.reducer(fresh.get(window), event));
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
this.snaps.replace(fresh);
|
|
610
|
+
}
|
|
611
|
+
getSnapshot(window) {
|
|
612
|
+
return this.snaps.get(window);
|
|
613
|
+
}
|
|
614
|
+
getAllSnapshots() {
|
|
615
|
+
return this.snaps.getAll();
|
|
616
|
+
}
|
|
617
|
+
close() {
|
|
618
|
+
if (this.listener) this.options.runtime.off("trace", this.listener);
|
|
619
|
+
if (this.interval) clearInterval(this.interval);
|
|
620
|
+
}
|
|
621
|
+
};
|
|
622
|
+
|
|
623
|
+
// src/server/aggregates/execution-aggregator.ts
|
|
624
|
+
var ExecutionAggregator = class {
|
|
625
|
+
snaps;
|
|
626
|
+
interval;
|
|
627
|
+
listener;
|
|
628
|
+
options;
|
|
629
|
+
/** Generation counter to prevent stale async fold after rebuild. */
|
|
630
|
+
generation = 0;
|
|
631
|
+
constructor(options) {
|
|
632
|
+
this.options = options;
|
|
633
|
+
this.snaps = new AggregateSnapshots(
|
|
634
|
+
options.windows,
|
|
635
|
+
options.emptyState,
|
|
636
|
+
options.connMgr,
|
|
637
|
+
options.channel,
|
|
638
|
+
options.broadcastTransform
|
|
639
|
+
);
|
|
640
|
+
}
|
|
641
|
+
async start() {
|
|
642
|
+
await this.rebuild();
|
|
643
|
+
this.listener = (event) => {
|
|
644
|
+
if (event.type !== "workflow_end") return;
|
|
645
|
+
const gen = this.generation;
|
|
646
|
+
this.options.runtime.getExecution(event.executionId).then((exec) => {
|
|
647
|
+
if (this.generation !== gen) return;
|
|
648
|
+
if (exec) {
|
|
649
|
+
this.snaps.fold(exec.startedAt, (prev) => this.options.reducer(prev, exec));
|
|
650
|
+
}
|
|
651
|
+
}).catch((err) => console.error("[axl-studio] execution fold failed:", err));
|
|
652
|
+
};
|
|
653
|
+
this.options.runtime.on("trace", this.listener);
|
|
654
|
+
this.interval = setInterval(
|
|
655
|
+
() => this.rebuild().catch((err) => console.error("[axl-studio] rebuild failed:", err)),
|
|
656
|
+
REBUILD_INTERVAL_MS
|
|
657
|
+
);
|
|
658
|
+
}
|
|
659
|
+
async rebuild() {
|
|
660
|
+
this.generation++;
|
|
661
|
+
const executions = await this.options.runtime.getExecutions();
|
|
662
|
+
const cap = this.options.executionCap ?? 2e3;
|
|
663
|
+
const capped = executions.slice(0, cap);
|
|
664
|
+
const now = Date.now();
|
|
665
|
+
const fresh = new Map(
|
|
666
|
+
this.options.windows.map((w) => [w, this.options.emptyState()])
|
|
667
|
+
);
|
|
668
|
+
for (const exec of capped) {
|
|
669
|
+
for (const window of this.options.windows) {
|
|
670
|
+
if (withinWindow(exec.startedAt, window, now)) {
|
|
671
|
+
fresh.set(window, this.options.reducer(fresh.get(window), exec));
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
this.snaps.replace(fresh);
|
|
676
|
+
}
|
|
677
|
+
getSnapshot(window) {
|
|
678
|
+
return this.snaps.get(window);
|
|
679
|
+
}
|
|
680
|
+
getAllSnapshots() {
|
|
681
|
+
return this.snaps.getAll();
|
|
682
|
+
}
|
|
683
|
+
close() {
|
|
684
|
+
if (this.listener) this.options.runtime.off("trace", this.listener);
|
|
685
|
+
if (this.interval) clearInterval(this.interval);
|
|
686
|
+
}
|
|
687
|
+
};
|
|
688
|
+
|
|
689
|
+
// src/server/aggregates/eval-aggregator.ts
|
|
690
|
+
var EvalAggregator = class {
|
|
691
|
+
snaps;
|
|
692
|
+
interval;
|
|
693
|
+
listener;
|
|
694
|
+
options;
|
|
695
|
+
constructor(options) {
|
|
696
|
+
this.options = options;
|
|
697
|
+
this.snaps = new AggregateSnapshots(
|
|
698
|
+
options.windows,
|
|
699
|
+
options.emptyState,
|
|
700
|
+
options.connMgr,
|
|
701
|
+
options.channel,
|
|
702
|
+
options.broadcastTransform
|
|
703
|
+
);
|
|
704
|
+
}
|
|
705
|
+
async start() {
|
|
706
|
+
await this.rebuild();
|
|
707
|
+
this.listener = (entry) => {
|
|
708
|
+
this.snaps.fold(entry.timestamp, (prev) => this.options.reducer(prev, entry));
|
|
709
|
+
};
|
|
710
|
+
this.options.runtime.on("eval_result", this.listener);
|
|
711
|
+
this.interval = setInterval(
|
|
712
|
+
() => this.rebuild().catch((err) => console.error("[axl-studio] rebuild failed:", err)),
|
|
713
|
+
REBUILD_INTERVAL_MS
|
|
714
|
+
);
|
|
715
|
+
}
|
|
716
|
+
async rebuild() {
|
|
717
|
+
const history = await this.options.runtime.getEvalHistory();
|
|
718
|
+
const cap = this.options.entryCap ?? 500;
|
|
719
|
+
const capped = history.slice(0, cap);
|
|
720
|
+
const now = Date.now();
|
|
721
|
+
const fresh = new Map(
|
|
722
|
+
this.options.windows.map((w) => [w, this.options.emptyState()])
|
|
723
|
+
);
|
|
724
|
+
for (const entry of capped) {
|
|
725
|
+
for (const window of this.options.windows) {
|
|
726
|
+
if (withinWindow(entry.timestamp, window, now)) {
|
|
727
|
+
fresh.set(window, this.options.reducer(fresh.get(window), entry));
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
this.snaps.replace(fresh);
|
|
732
|
+
}
|
|
733
|
+
getSnapshot(window) {
|
|
734
|
+
return this.snaps.get(window);
|
|
735
|
+
}
|
|
736
|
+
getAllSnapshots() {
|
|
737
|
+
return this.snaps.getAll();
|
|
738
|
+
}
|
|
739
|
+
close() {
|
|
740
|
+
if (this.listener) this.options.runtime.off("eval_result", this.listener);
|
|
741
|
+
if (this.interval) clearInterval(this.interval);
|
|
742
|
+
}
|
|
743
|
+
};
|
|
744
|
+
|
|
745
|
+
// src/server/aggregates/reducers.ts
|
|
746
|
+
var import_axl2 = require("@axlsdk/axl");
|
|
747
|
+
var finite = (v) => Number.isFinite(v) ? v : 0;
|
|
748
|
+
function emptyRetry() {
|
|
749
|
+
return {
|
|
750
|
+
primary: 0,
|
|
751
|
+
primaryCalls: 0,
|
|
752
|
+
schema: 0,
|
|
753
|
+
schemaCalls: 0,
|
|
754
|
+
validate: 0,
|
|
755
|
+
validateCalls: 0,
|
|
756
|
+
guardrail: 0,
|
|
757
|
+
guardrailCalls: 0,
|
|
758
|
+
retryCalls: 0
|
|
759
|
+
};
|
|
760
|
+
}
|
|
761
|
+
function emptyCostData() {
|
|
762
|
+
return {
|
|
273
763
|
totalCost: 0,
|
|
274
764
|
totalTokens: { input: 0, output: 0, reasoning: 0 },
|
|
275
765
|
byAgent: {},
|
|
276
766
|
byModel: {},
|
|
277
|
-
byWorkflow: {}
|
|
767
|
+
byWorkflow: {},
|
|
768
|
+
retry: emptyRetry(),
|
|
769
|
+
byEmbedder: {}
|
|
278
770
|
};
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
const
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
this.connMgr.broadcast("costs", this.data);
|
|
313
|
-
}
|
|
314
|
-
/** Get current aggregated cost data. */
|
|
315
|
-
getData() {
|
|
316
|
-
return this.data;
|
|
317
|
-
}
|
|
318
|
-
/** Reset all accumulated data. */
|
|
319
|
-
reset() {
|
|
320
|
-
this.data = {
|
|
321
|
-
totalCost: 0,
|
|
322
|
-
totalTokens: { input: 0, output: 0, reasoning: 0 },
|
|
323
|
-
byAgent: {},
|
|
324
|
-
byModel: {},
|
|
325
|
-
byWorkflow: {}
|
|
771
|
+
}
|
|
772
|
+
function reduceCost(acc, event) {
|
|
773
|
+
const isWorkflowStart = event.type === "workflow_start";
|
|
774
|
+
if (isWorkflowStart && event.workflow) {
|
|
775
|
+
const byWorkflow2 = { ...acc.byWorkflow };
|
|
776
|
+
const prev = byWorkflow2[event.workflow] ?? { cost: 0, executions: 0 };
|
|
777
|
+
byWorkflow2[event.workflow] = { ...prev, executions: prev.executions + 1 };
|
|
778
|
+
return { ...acc, byWorkflow: byWorkflow2 };
|
|
779
|
+
}
|
|
780
|
+
if (event.cost == null && !event.tokens) return acc;
|
|
781
|
+
const cost = (0, import_axl2.eventCostContribution)(event);
|
|
782
|
+
if (event.type === "ask_end") return acc;
|
|
783
|
+
const tokens = event.tokens ?? {};
|
|
784
|
+
const totalTokens = event.type === "agent_call_end" ? {
|
|
785
|
+
input: acc.totalTokens.input + finite(tokens.input),
|
|
786
|
+
output: acc.totalTokens.output + finite(tokens.output),
|
|
787
|
+
reasoning: acc.totalTokens.reasoning + finite(tokens.reasoning)
|
|
788
|
+
} : acc.totalTokens;
|
|
789
|
+
const byAgent = { ...acc.byAgent };
|
|
790
|
+
if (event.agent) {
|
|
791
|
+
const prev = byAgent[event.agent] ?? { cost: 0, calls: 0 };
|
|
792
|
+
byAgent[event.agent] = { cost: prev.cost + cost, calls: prev.calls + 1 };
|
|
793
|
+
}
|
|
794
|
+
const byModel = { ...acc.byModel };
|
|
795
|
+
if (event.model) {
|
|
796
|
+
const prev = byModel[event.model] ?? { cost: 0, calls: 0, tokens: { input: 0, output: 0 } };
|
|
797
|
+
byModel[event.model] = {
|
|
798
|
+
cost: prev.cost + cost,
|
|
799
|
+
calls: prev.calls + 1,
|
|
800
|
+
tokens: {
|
|
801
|
+
input: prev.tokens.input + finite(tokens.input),
|
|
802
|
+
output: prev.tokens.output + finite(tokens.output)
|
|
803
|
+
}
|
|
326
804
|
};
|
|
327
805
|
}
|
|
328
|
-
};
|
|
806
|
+
const byWorkflow = { ...acc.byWorkflow };
|
|
807
|
+
if (event.workflow) {
|
|
808
|
+
const prev = byWorkflow[event.workflow] ?? { cost: 0, executions: 0 };
|
|
809
|
+
byWorkflow[event.workflow] = {
|
|
810
|
+
cost: prev.cost + cost,
|
|
811
|
+
executions: prev.executions + (isWorkflowStart ? 1 : 0)
|
|
812
|
+
};
|
|
813
|
+
}
|
|
814
|
+
let retry = acc.retry;
|
|
815
|
+
if (event.type === "agent_call_end") {
|
|
816
|
+
const d = event.data ?? {};
|
|
817
|
+
const reason = d.retryReason;
|
|
818
|
+
retry = { ...acc.retry };
|
|
819
|
+
if (reason === "schema") {
|
|
820
|
+
retry.schema += cost;
|
|
821
|
+
retry.schemaCalls += 1;
|
|
822
|
+
retry.retryCalls += 1;
|
|
823
|
+
} else if (reason === "validate") {
|
|
824
|
+
retry.validate += cost;
|
|
825
|
+
retry.validateCalls += 1;
|
|
826
|
+
retry.retryCalls += 1;
|
|
827
|
+
} else if (reason === "guardrail") {
|
|
828
|
+
retry.guardrail += cost;
|
|
829
|
+
retry.guardrailCalls += 1;
|
|
830
|
+
retry.retryCalls += 1;
|
|
831
|
+
} else {
|
|
832
|
+
retry.primary += cost;
|
|
833
|
+
retry.primaryCalls += 1;
|
|
834
|
+
}
|
|
835
|
+
}
|
|
836
|
+
let byEmbedder = acc.byEmbedder;
|
|
837
|
+
if (event.type === "memory_remember" || event.type === "memory_recall") {
|
|
838
|
+
const usage = event.data.usage;
|
|
839
|
+
byEmbedder = { ...acc.byEmbedder };
|
|
840
|
+
const modelKey = usage?.model ?? "unknown";
|
|
841
|
+
const embedTokens = typeof usage?.tokens === "number" ? finite(usage.tokens) : 0;
|
|
842
|
+
const prev = byEmbedder[modelKey] ?? { cost: 0, calls: 0, tokens: 0 };
|
|
843
|
+
byEmbedder[modelKey] = {
|
|
844
|
+
cost: prev.cost + cost,
|
|
845
|
+
calls: prev.calls + 1,
|
|
846
|
+
tokens: prev.tokens + embedTokens
|
|
847
|
+
};
|
|
848
|
+
}
|
|
849
|
+
return {
|
|
850
|
+
totalCost: acc.totalCost + cost,
|
|
851
|
+
totalTokens,
|
|
852
|
+
byAgent,
|
|
853
|
+
byModel,
|
|
854
|
+
byWorkflow,
|
|
855
|
+
retry,
|
|
856
|
+
byEmbedder
|
|
857
|
+
};
|
|
858
|
+
}
|
|
859
|
+
function emptyEvalTrendData() {
|
|
860
|
+
return { byEval: {}, totalRuns: 0, totalCost: 0 };
|
|
861
|
+
}
|
|
862
|
+
function extractScores(data) {
|
|
863
|
+
if (!data || typeof data !== "object") return {};
|
|
864
|
+
const result = data;
|
|
865
|
+
const summary = result.summary;
|
|
866
|
+
const scorers = summary?.scorers;
|
|
867
|
+
if (!scorers) return {};
|
|
868
|
+
const out = {};
|
|
869
|
+
for (const [name, entry] of Object.entries(scorers)) {
|
|
870
|
+
if (typeof entry === "number" && Number.isFinite(entry)) {
|
|
871
|
+
out[name] = entry;
|
|
872
|
+
} else if (entry && typeof entry === "object" && Number.isFinite(entry.mean)) {
|
|
873
|
+
out[name] = entry.mean;
|
|
874
|
+
}
|
|
875
|
+
}
|
|
876
|
+
return out;
|
|
877
|
+
}
|
|
878
|
+
function extractCost(data) {
|
|
879
|
+
if (!data || typeof data !== "object") return 0;
|
|
880
|
+
const result = data;
|
|
881
|
+
if (Number.isFinite(result.totalCost)) return result.totalCost;
|
|
882
|
+
const summary = result.summary;
|
|
883
|
+
return Number.isFinite(summary?.totalCost) ? summary.totalCost : 0;
|
|
884
|
+
}
|
|
885
|
+
function extractModel(data) {
|
|
886
|
+
if (!data || typeof data !== "object") return void 0;
|
|
887
|
+
const result = data;
|
|
888
|
+
const metadata = result.metadata;
|
|
889
|
+
const counts = metadata?.modelCounts;
|
|
890
|
+
if (counts && typeof counts === "object" && !Array.isArray(counts)) {
|
|
891
|
+
const entries = Object.entries(counts).filter(
|
|
892
|
+
([, v]) => typeof v === "number"
|
|
893
|
+
);
|
|
894
|
+
if (entries.length > 0) {
|
|
895
|
+
entries.sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]));
|
|
896
|
+
return entries[0][0];
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
const models = metadata?.models;
|
|
900
|
+
if (Array.isArray(models) && typeof models[0] === "string") return models[0];
|
|
901
|
+
return void 0;
|
|
902
|
+
}
|
|
903
|
+
function extractDuration(data) {
|
|
904
|
+
if (!data || typeof data !== "object") return void 0;
|
|
905
|
+
const result = data;
|
|
906
|
+
return Number.isFinite(result.duration) ? result.duration : void 0;
|
|
907
|
+
}
|
|
908
|
+
function computeScoreStats(runs) {
|
|
909
|
+
const scorerNames = /* @__PURE__ */ new Set();
|
|
910
|
+
for (const run of runs) {
|
|
911
|
+
for (const name of Object.keys(run.scores)) scorerNames.add(name);
|
|
912
|
+
}
|
|
913
|
+
const mean = {};
|
|
914
|
+
const std = {};
|
|
915
|
+
for (const name of scorerNames) {
|
|
916
|
+
const values = runs.map((r) => r.scores[name]).filter((v) => v != null);
|
|
917
|
+
if (values.length === 0) continue;
|
|
918
|
+
const m = values.reduce((a, b) => a + b, 0) / values.length;
|
|
919
|
+
mean[name] = m;
|
|
920
|
+
const variance = values.reduce((sum, v) => sum + (v - m) ** 2, 0) / values.length;
|
|
921
|
+
std[name] = Math.sqrt(variance);
|
|
922
|
+
}
|
|
923
|
+
return { mean, std };
|
|
924
|
+
}
|
|
925
|
+
function reduceEvalTrends(acc, entry) {
|
|
926
|
+
const scores = extractScores(entry.data);
|
|
927
|
+
const cost = extractCost(entry.data);
|
|
928
|
+
const model = extractModel(entry.data);
|
|
929
|
+
const duration = extractDuration(entry.data);
|
|
930
|
+
const run = {
|
|
931
|
+
timestamp: entry.timestamp,
|
|
932
|
+
id: entry.id,
|
|
933
|
+
scores,
|
|
934
|
+
cost,
|
|
935
|
+
...model !== void 0 ? { model } : {},
|
|
936
|
+
...duration !== void 0 ? { duration } : {}
|
|
937
|
+
};
|
|
938
|
+
const byEval = { ...acc.byEval };
|
|
939
|
+
const prev = byEval[entry.eval];
|
|
940
|
+
const MAX_EVAL_RUNS = 50;
|
|
941
|
+
const allRuns = prev ? [...prev.runs, run] : [run];
|
|
942
|
+
const runs = allRuns.length > MAX_EVAL_RUNS ? allRuns.slice(-MAX_EVAL_RUNS) : allRuns;
|
|
943
|
+
const { mean, std } = computeScoreStats(runs);
|
|
944
|
+
const latestScores = prev && prev.runs.length > 0 && prev.runs[prev.runs.length - 1].timestamp > run.timestamp ? prev.latestScores : scores;
|
|
945
|
+
byEval[entry.eval] = {
|
|
946
|
+
runs,
|
|
947
|
+
latestScores,
|
|
948
|
+
scoreMean: mean,
|
|
949
|
+
scoreStd: std,
|
|
950
|
+
costTotal: (prev?.costTotal ?? 0) + cost,
|
|
951
|
+
runCount: (prev?.runCount ?? 0) + 1
|
|
952
|
+
};
|
|
953
|
+
return {
|
|
954
|
+
byEval,
|
|
955
|
+
totalRuns: acc.totalRuns + 1,
|
|
956
|
+
totalCost: acc.totalCost + cost
|
|
957
|
+
};
|
|
958
|
+
}
|
|
959
|
+
var MAX_DURATIONS = 200;
|
|
960
|
+
function emptyWorkflowStatsData() {
|
|
961
|
+
return { byWorkflow: {}, totalExecutions: 0, failureRate: 0 };
|
|
962
|
+
}
|
|
963
|
+
function percentile(sorted, p) {
|
|
964
|
+
if (sorted.length === 0) return 0;
|
|
965
|
+
const idx = p / 100 * (sorted.length - 1);
|
|
966
|
+
const lower = Math.floor(idx);
|
|
967
|
+
const upper = Math.ceil(idx);
|
|
968
|
+
if (lower === upper) return sorted[lower];
|
|
969
|
+
return sorted[lower] + (sorted[upper] - sorted[lower]) * (idx - lower);
|
|
970
|
+
}
|
|
971
|
+
function reduceWorkflowStats(acc, execution) {
|
|
972
|
+
const byWorkflow = { ...acc.byWorkflow };
|
|
973
|
+
const prev = byWorkflow[execution.workflow] ?? {
|
|
974
|
+
total: 0,
|
|
975
|
+
completed: 0,
|
|
976
|
+
failed: 0,
|
|
977
|
+
durations: [],
|
|
978
|
+
durationSum: 0,
|
|
979
|
+
avgDuration: 0
|
|
980
|
+
};
|
|
981
|
+
const dur = finite(execution.duration);
|
|
982
|
+
const durations = [...prev.durations];
|
|
983
|
+
const insertIdx = durations.findIndex((d) => d > dur);
|
|
984
|
+
if (insertIdx === -1) durations.push(dur);
|
|
985
|
+
else durations.splice(insertIdx, 0, dur);
|
|
986
|
+
if (durations.length > MAX_DURATIONS) durations.shift();
|
|
987
|
+
const total = prev.total + 1;
|
|
988
|
+
const completed = prev.completed + (execution.status === "completed" ? 1 : 0);
|
|
989
|
+
const failed = prev.failed + (execution.status === "failed" ? 1 : 0);
|
|
990
|
+
const durationSum = prev.durationSum + dur;
|
|
991
|
+
const avgDuration = durationSum / total;
|
|
992
|
+
byWorkflow[execution.workflow] = {
|
|
993
|
+
total,
|
|
994
|
+
completed,
|
|
995
|
+
failed,
|
|
996
|
+
durations,
|
|
997
|
+
durationSum,
|
|
998
|
+
avgDuration
|
|
999
|
+
};
|
|
1000
|
+
const totalExecutions = acc.totalExecutions + 1;
|
|
1001
|
+
const totalFailed = Object.values(byWorkflow).reduce((sum, w) => sum + w.failed, 0);
|
|
1002
|
+
const failureRate = totalExecutions > 0 ? totalFailed / totalExecutions : 0;
|
|
1003
|
+
return { byWorkflow, totalExecutions, failureRate };
|
|
1004
|
+
}
|
|
1005
|
+
function getWorkflowPercentiles(entry) {
|
|
1006
|
+
return {
|
|
1007
|
+
durationP50: percentile(entry.durations, 50),
|
|
1008
|
+
durationP95: percentile(entry.durations, 95)
|
|
1009
|
+
};
|
|
1010
|
+
}
|
|
1011
|
+
function enrichWorkflowStats(data) {
|
|
1012
|
+
const byWorkflow = {};
|
|
1013
|
+
for (const [name, entry] of Object.entries(data.byWorkflow)) {
|
|
1014
|
+
const { durationP50, durationP95 } = getWorkflowPercentiles(entry);
|
|
1015
|
+
byWorkflow[name] = {
|
|
1016
|
+
total: entry.total,
|
|
1017
|
+
completed: entry.completed,
|
|
1018
|
+
failed: entry.failed,
|
|
1019
|
+
durationP50,
|
|
1020
|
+
durationP95,
|
|
1021
|
+
avgDuration: entry.avgDuration
|
|
1022
|
+
};
|
|
1023
|
+
}
|
|
1024
|
+
return {
|
|
1025
|
+
byWorkflow,
|
|
1026
|
+
totalExecutions: data.totalExecutions,
|
|
1027
|
+
failureRate: data.failureRate
|
|
1028
|
+
};
|
|
1029
|
+
}
|
|
1030
|
+
function emptyTraceStatsData() {
|
|
1031
|
+
return {
|
|
1032
|
+
eventTypeCounts: {},
|
|
1033
|
+
byTool: {},
|
|
1034
|
+
retryByAgent: {},
|
|
1035
|
+
totalEvents: 0
|
|
1036
|
+
};
|
|
1037
|
+
}
|
|
1038
|
+
function reduceTraceStats(acc, event) {
|
|
1039
|
+
const eventTypeCounts = { ...acc.eventTypeCounts };
|
|
1040
|
+
eventTypeCounts[event.type] = (eventTypeCounts[event.type] ?? 0) + 1;
|
|
1041
|
+
const byTool = { ...acc.byTool };
|
|
1042
|
+
if (event.type === "tool_call_end" || event.type === "tool_denied" || event.type === "tool_approval") {
|
|
1043
|
+
const toolName = event.tool;
|
|
1044
|
+
const prev = byTool[toolName] ?? { calls: 0, denied: 0, approved: 0 };
|
|
1045
|
+
const isDeniedEvent = event.type === "tool_denied";
|
|
1046
|
+
const isApprovalEvent = event.type === "tool_approval";
|
|
1047
|
+
const eventData = isDeniedEvent || isApprovalEvent ? event.data : void 0;
|
|
1048
|
+
const isApproved = isDeniedEvent && eventData?.approved === true || isApprovalEvent && eventData?.approved === true;
|
|
1049
|
+
const isDenied = isDeniedEvent && !eventData?.approved || isApprovalEvent && eventData?.approved === false;
|
|
1050
|
+
byTool[toolName] = {
|
|
1051
|
+
calls: prev.calls + (event.type === "tool_call_end" ? 1 : 0),
|
|
1052
|
+
denied: prev.denied + (isDenied ? 1 : 0),
|
|
1053
|
+
approved: prev.approved + (isApproved ? 1 : 0)
|
|
1054
|
+
};
|
|
1055
|
+
}
|
|
1056
|
+
const retryByAgent = { ...acc.retryByAgent };
|
|
1057
|
+
if (event.agent && event.type === "agent_call_end") {
|
|
1058
|
+
const data = event.data;
|
|
1059
|
+
if (data?.retryReason) {
|
|
1060
|
+
const prev = retryByAgent[event.agent] ?? { schema: 0, validate: 0, guardrail: 0 };
|
|
1061
|
+
const reason = data.retryReason;
|
|
1062
|
+
if (reason in prev) {
|
|
1063
|
+
retryByAgent[event.agent] = { ...prev, [reason]: prev[reason] + 1 };
|
|
1064
|
+
}
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
return {
|
|
1068
|
+
eventTypeCounts,
|
|
1069
|
+
byTool,
|
|
1070
|
+
retryByAgent,
|
|
1071
|
+
totalEvents: acc.totalEvents + 1
|
|
1072
|
+
};
|
|
1073
|
+
}
|
|
329
1074
|
|
|
330
1075
|
// src/server/routes/health.ts
|
|
331
1076
|
var import_hono = require("hono");
|
|
@@ -349,7 +1094,7 @@ function createHealthRoutes(readOnly) {
|
|
|
349
1094
|
|
|
350
1095
|
// src/server/routes/workflows.ts
|
|
351
1096
|
var import_hono2 = require("hono");
|
|
352
|
-
var
|
|
1097
|
+
var import_axl3 = require("@axlsdk/axl");
|
|
353
1098
|
function createWorkflowRoutes(connMgr) {
|
|
354
1099
|
const app6 = new import_hono2.Hono();
|
|
355
1100
|
app6.get("/workflows", (c) => {
|
|
@@ -375,8 +1120,8 @@ function createWorkflowRoutes(connMgr) {
|
|
|
375
1120
|
ok: true,
|
|
376
1121
|
data: {
|
|
377
1122
|
name: workflow.name,
|
|
378
|
-
inputSchema: workflow.inputSchema ? (0,
|
|
379
|
-
outputSchema: workflow.outputSchema ? (0,
|
|
1123
|
+
inputSchema: workflow.inputSchema ? (0, import_axl3.zodToJsonSchema)(workflow.inputSchema) : null,
|
|
1124
|
+
outputSchema: workflow.outputSchema ? (0, import_axl3.zodToJsonSchema)(workflow.outputSchema) : null
|
|
380
1125
|
}
|
|
381
1126
|
});
|
|
382
1127
|
});
|
|
@@ -394,15 +1139,22 @@ function createWorkflowRoutes(connMgr) {
|
|
|
394
1139
|
if (body.stream) {
|
|
395
1140
|
const stream = runtime.stream(name, body.input ?? {}, { metadata: body.metadata });
|
|
396
1141
|
const executionId = `stream-${Date.now()}`;
|
|
1142
|
+
const redactOn = runtime.isRedactEnabled();
|
|
397
1143
|
(async () => {
|
|
398
1144
|
for await (const event of stream) {
|
|
399
|
-
connMgr.broadcastWithWildcard(
|
|
1145
|
+
connMgr.broadcastWithWildcard(
|
|
1146
|
+
`execution:${executionId}`,
|
|
1147
|
+
redactStreamEvent(event, redactOn)
|
|
1148
|
+
);
|
|
400
1149
|
}
|
|
401
1150
|
})();
|
|
402
1151
|
return c.json({ ok: true, data: { executionId, streaming: true } });
|
|
403
1152
|
}
|
|
404
1153
|
const result = await runtime.execute(name, body.input ?? {}, { metadata: body.metadata });
|
|
405
|
-
return c.json({
|
|
1154
|
+
return c.json({
|
|
1155
|
+
ok: true,
|
|
1156
|
+
data: { result: redactValue(result, runtime.isRedactEnabled()) }
|
|
1157
|
+
});
|
|
406
1158
|
});
|
|
407
1159
|
return app6;
|
|
408
1160
|
}
|
|
@@ -413,7 +1165,10 @@ var app = new import_hono3.Hono();
|
|
|
413
1165
|
app.get("/executions", async (c) => {
|
|
414
1166
|
const runtime = c.get("runtime");
|
|
415
1167
|
const executions = await runtime.getExecutions();
|
|
416
|
-
return c.json({
|
|
1168
|
+
return c.json({
|
|
1169
|
+
ok: true,
|
|
1170
|
+
data: redactExecutionList(executions, runtime.isRedactEnabled())
|
|
1171
|
+
});
|
|
417
1172
|
});
|
|
418
1173
|
app.get("/executions/:id", async (c) => {
|
|
419
1174
|
const runtime = c.get("runtime");
|
|
@@ -425,7 +1180,32 @@ app.get("/executions/:id", async (c) => {
|
|
|
425
1180
|
404
|
|
426
1181
|
);
|
|
427
1182
|
}
|
|
428
|
-
|
|
1183
|
+
const sinceParam = c.req.query("since");
|
|
1184
|
+
let paged = execution;
|
|
1185
|
+
if (sinceParam !== void 0) {
|
|
1186
|
+
const since = Number(sinceParam);
|
|
1187
|
+
if (!Number.isFinite(since) || !Number.isInteger(since)) {
|
|
1188
|
+
return c.json(
|
|
1189
|
+
{
|
|
1190
|
+
ok: false,
|
|
1191
|
+
error: {
|
|
1192
|
+
code: "INVALID_PARAM",
|
|
1193
|
+
message: `\`since\` must be a finite integer (got "${sinceParam}")`,
|
|
1194
|
+
param: "since"
|
|
1195
|
+
}
|
|
1196
|
+
},
|
|
1197
|
+
400
|
|
1198
|
+
);
|
|
1199
|
+
}
|
|
1200
|
+
paged = {
|
|
1201
|
+
...execution,
|
|
1202
|
+
events: execution.events.filter((e) => e.step > since)
|
|
1203
|
+
};
|
|
1204
|
+
}
|
|
1205
|
+
return c.json({
|
|
1206
|
+
ok: true,
|
|
1207
|
+
data: redactExecutionInfo(paged, runtime.isRedactEnabled())
|
|
1208
|
+
});
|
|
429
1209
|
});
|
|
430
1210
|
app.post("/executions/:id/abort", (c) => {
|
|
431
1211
|
const runtime = c.get("runtime");
|
|
@@ -459,7 +1239,16 @@ function createSessionRoutes(connMgr) {
|
|
|
459
1239
|
const id = c.req.param("id");
|
|
460
1240
|
const history = await store.getSession(id);
|
|
461
1241
|
const handoffHistory = await store.getSessionMeta(id, "handoffHistory");
|
|
462
|
-
return c.json({
|
|
1242
|
+
return c.json({
|
|
1243
|
+
ok: true,
|
|
1244
|
+
data: {
|
|
1245
|
+
id,
|
|
1246
|
+
history: redactSessionHistory(history, runtime.isRedactEnabled()),
|
|
1247
|
+
// HandoffRecord has no content fields (source/target/mode/
|
|
1248
|
+
// timestamp/duration) — nothing to scrub.
|
|
1249
|
+
handoffHistory: handoffHistory ?? []
|
|
1250
|
+
}
|
|
1251
|
+
});
|
|
463
1252
|
});
|
|
464
1253
|
app6.post("/sessions/:id/send", async (c) => {
|
|
465
1254
|
const runtime = c.get("runtime");
|
|
@@ -495,7 +1284,7 @@ function createSessionRoutes(connMgr) {
|
|
|
495
1284
|
|
|
496
1285
|
// src/server/routes/agents.ts
|
|
497
1286
|
var import_hono5 = require("hono");
|
|
498
|
-
var
|
|
1287
|
+
var import_axl4 = require("@axlsdk/axl");
|
|
499
1288
|
var app2 = new import_hono5.Hono();
|
|
500
1289
|
app2.get("/agents", (c) => {
|
|
501
1290
|
const runtime = c.get("runtime");
|
|
@@ -536,7 +1325,7 @@ app2.get("/agents/:name", (c) => {
|
|
|
536
1325
|
tools: cfg.tools?.map((t) => ({
|
|
537
1326
|
name: t.name,
|
|
538
1327
|
description: t.description,
|
|
539
|
-
inputSchema: (0,
|
|
1328
|
+
inputSchema: (0, import_axl4.zodToJsonSchema)(t.inputSchema)
|
|
540
1329
|
})) ?? [],
|
|
541
1330
|
handoffs: typeof cfg.handoffs === "function" ? [
|
|
542
1331
|
{
|
|
@@ -576,14 +1365,14 @@ var agents_default = app2;
|
|
|
576
1365
|
|
|
577
1366
|
// src/server/routes/tools.ts
|
|
578
1367
|
var import_hono6 = require("hono");
|
|
579
|
-
var
|
|
1368
|
+
var import_axl5 = require("@axlsdk/axl");
|
|
580
1369
|
var app3 = new import_hono6.Hono();
|
|
581
1370
|
app3.get("/tools", (c) => {
|
|
582
1371
|
const runtime = c.get("runtime");
|
|
583
1372
|
const tools = runtime.getTools().map((t) => ({
|
|
584
1373
|
name: t.name,
|
|
585
1374
|
description: t.description,
|
|
586
|
-
inputSchema: t.inputSchema ? (0,
|
|
1375
|
+
inputSchema: t.inputSchema ? (0, import_axl5.zodToJsonSchema)(t.inputSchema) : {},
|
|
587
1376
|
sensitive: t.sensitive ?? false,
|
|
588
1377
|
requireApproval: t.requireApproval ?? false
|
|
589
1378
|
}));
|
|
@@ -604,7 +1393,7 @@ app3.get("/tools/:name", (c) => {
|
|
|
604
1393
|
data: {
|
|
605
1394
|
name: tool.name,
|
|
606
1395
|
description: tool.description,
|
|
607
|
-
inputSchema: tool.inputSchema ? (0,
|
|
1396
|
+
inputSchema: tool.inputSchema ? (0, import_axl5.zodToJsonSchema)(tool.inputSchema) : {},
|
|
608
1397
|
sensitive: tool.sensitive,
|
|
609
1398
|
requireApproval: tool.requireApproval,
|
|
610
1399
|
retry: tool.retry,
|
|
@@ -629,7 +1418,10 @@ app3.post("/tools/:name/test", async (c) => {
|
|
|
629
1418
|
const body = await c.req.json();
|
|
630
1419
|
const ctx = runtime.createContext();
|
|
631
1420
|
const result = await tool.run(ctx, body.input);
|
|
632
|
-
return c.json({
|
|
1421
|
+
return c.json({
|
|
1422
|
+
ok: true,
|
|
1423
|
+
data: { result: redactValue(result, runtime.isRedactEnabled()) }
|
|
1424
|
+
});
|
|
633
1425
|
});
|
|
634
1426
|
var tools_default = app3;
|
|
635
1427
|
|
|
@@ -644,7 +1436,7 @@ app4.get("/memory/:scope", async (c) => {
|
|
|
644
1436
|
return c.json({ ok: true, data: [] });
|
|
645
1437
|
}
|
|
646
1438
|
const entries = await store.getAllMemory(scope);
|
|
647
|
-
return c.json({ ok: true, data: entries });
|
|
1439
|
+
return c.json({ ok: true, data: redactMemoryList(entries, runtime.isRedactEnabled()) });
|
|
648
1440
|
});
|
|
649
1441
|
app4.get("/memory/:scope/:key", async (c) => {
|
|
650
1442
|
const runtime = c.get("runtime");
|
|
@@ -664,7 +1456,10 @@ app4.get("/memory/:scope/:key", async (c) => {
|
|
|
664
1456
|
404
|
|
665
1457
|
);
|
|
666
1458
|
}
|
|
667
|
-
return c.json({
|
|
1459
|
+
return c.json({
|
|
1460
|
+
ok: true,
|
|
1461
|
+
data: { key, value: redactMemoryValue(value, runtime.isRedactEnabled()) }
|
|
1462
|
+
});
|
|
668
1463
|
});
|
|
669
1464
|
app4.put("/memory/:scope/:key", async (c) => {
|
|
670
1465
|
const runtime = c.get("runtime");
|
|
@@ -709,7 +1504,10 @@ var app5 = new import_hono8.Hono();
|
|
|
709
1504
|
app5.get("/decisions", async (c) => {
|
|
710
1505
|
const runtime = c.get("runtime");
|
|
711
1506
|
const decisions = await runtime.getPendingDecisions();
|
|
712
|
-
return c.json({
|
|
1507
|
+
return c.json({
|
|
1508
|
+
ok: true,
|
|
1509
|
+
data: redactPendingDecisionList(decisions, runtime.isRedactEnabled())
|
|
1510
|
+
});
|
|
713
1511
|
});
|
|
714
1512
|
app5.post("/decisions/:executionId/resolve", async (c) => {
|
|
715
1513
|
const runtime = c.get("runtime");
|
|
@@ -725,11 +1523,23 @@ var import_hono9 = require("hono");
|
|
|
725
1523
|
function createCostRoutes(costAggregator) {
|
|
726
1524
|
const app6 = new import_hono9.Hono();
|
|
727
1525
|
app6.get("/costs", (c) => {
|
|
728
|
-
|
|
1526
|
+
if (c.req.query("windows") === "all") {
|
|
1527
|
+
return c.json({ ok: true, data: costAggregator.getAllSnapshots() });
|
|
1528
|
+
}
|
|
1529
|
+
const window = parseWindowParam(c.req.query("window"));
|
|
1530
|
+
return c.json({ ok: true, data: costAggregator.getSnapshot(window) });
|
|
729
1531
|
});
|
|
730
1532
|
app6.post("/costs/reset", (c) => {
|
|
731
|
-
|
|
732
|
-
|
|
1533
|
+
return c.json(
|
|
1534
|
+
{
|
|
1535
|
+
ok: false,
|
|
1536
|
+
error: {
|
|
1537
|
+
code: "GONE",
|
|
1538
|
+
message: "POST /api/costs/reset was removed in @axlsdk/studio 0.15. Cost aggregates are now time-windowed and rebuilt from StateStore history. Use GET /api/costs?window=24h|7d|30d|all to narrow the view instead of resetting."
|
|
1539
|
+
}
|
|
1540
|
+
},
|
|
1541
|
+
410
|
|
1542
|
+
);
|
|
733
1543
|
});
|
|
734
1544
|
return app6;
|
|
735
1545
|
}
|
|
@@ -737,8 +1547,9 @@ function createCostRoutes(costAggregator) {
|
|
|
737
1547
|
// src/server/routes/evals.ts
|
|
738
1548
|
var import_node_crypto = require("crypto");
|
|
739
1549
|
var import_hono10 = require("hono");
|
|
740
|
-
function createEvalRoutes(evalLoader) {
|
|
1550
|
+
function createEvalRoutes(connMgr, evalLoader) {
|
|
741
1551
|
const app6 = new import_hono10.Hono();
|
|
1552
|
+
const activeRuns = /* @__PURE__ */ new Map();
|
|
742
1553
|
app6.get("/evals", async (c) => {
|
|
743
1554
|
if (evalLoader) await evalLoader();
|
|
744
1555
|
const runtime = c.get("runtime");
|
|
@@ -748,7 +1559,10 @@ function createEvalRoutes(evalLoader) {
|
|
|
748
1559
|
app6.get("/evals/history", async (c) => {
|
|
749
1560
|
const runtime = c.get("runtime");
|
|
750
1561
|
const history = await runtime.getEvalHistory();
|
|
751
|
-
return c.json({
|
|
1562
|
+
return c.json({
|
|
1563
|
+
ok: true,
|
|
1564
|
+
data: redactEvalHistoryList(history, runtime.isRedactEnabled())
|
|
1565
|
+
});
|
|
752
1566
|
});
|
|
753
1567
|
app6.delete("/evals/history/:id", async (c) => {
|
|
754
1568
|
const runtime = c.get("runtime");
|
|
@@ -769,6 +1583,7 @@ function createEvalRoutes(evalLoader) {
|
|
|
769
1583
|
if (evalLoader) await evalLoader();
|
|
770
1584
|
const runtime = c.get("runtime");
|
|
771
1585
|
const name = c.req.param("name");
|
|
1586
|
+
const redactOn = runtime.isRedactEnabled();
|
|
772
1587
|
const entry = runtime.getRegisteredEval(name);
|
|
773
1588
|
if (!entry) {
|
|
774
1589
|
return c.json(
|
|
@@ -777,13 +1592,89 @@ function createEvalRoutes(evalLoader) {
|
|
|
777
1592
|
);
|
|
778
1593
|
}
|
|
779
1594
|
let runs = 1;
|
|
1595
|
+
let stream = false;
|
|
1596
|
+
let captureTraces = false;
|
|
780
1597
|
try {
|
|
781
1598
|
const body = await c.req.json().catch(() => ({}));
|
|
782
1599
|
if (typeof body.runs === "number" && Number.isFinite(body.runs) && body.runs > 1) {
|
|
783
1600
|
runs = Math.min(Math.floor(body.runs), 25);
|
|
784
1601
|
}
|
|
1602
|
+
if (body.stream === true) {
|
|
1603
|
+
stream = true;
|
|
1604
|
+
}
|
|
1605
|
+
if (body.captureTraces === true) {
|
|
1606
|
+
captureTraces = true;
|
|
1607
|
+
}
|
|
785
1608
|
} catch {
|
|
786
1609
|
}
|
|
1610
|
+
if (stream) {
|
|
1611
|
+
const evalRunId = `eval-${(0, import_node_crypto.randomUUID)()}`;
|
|
1612
|
+
const ac = new AbortController();
|
|
1613
|
+
activeRuns.set(evalRunId, ac);
|
|
1614
|
+
(async () => {
|
|
1615
|
+
try {
|
|
1616
|
+
if (runs > 1) {
|
|
1617
|
+
const runGroupId = (0, import_node_crypto.randomUUID)();
|
|
1618
|
+
const results = [];
|
|
1619
|
+
for (let r = 0; r < runs; r++) {
|
|
1620
|
+
if (ac.signal.aborted) break;
|
|
1621
|
+
const result = await runtime.runRegisteredEval(name, {
|
|
1622
|
+
metadata: { runGroupId, runIndex: r },
|
|
1623
|
+
signal: ac.signal,
|
|
1624
|
+
captureTraces,
|
|
1625
|
+
onProgress: (event) => {
|
|
1626
|
+
if (event.type === "run_done") return;
|
|
1627
|
+
connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
|
|
1628
|
+
...event,
|
|
1629
|
+
run: r + 1,
|
|
1630
|
+
totalRuns: runs
|
|
1631
|
+
});
|
|
1632
|
+
}
|
|
1633
|
+
});
|
|
1634
|
+
results.push(result);
|
|
1635
|
+
connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
|
|
1636
|
+
type: "run_done",
|
|
1637
|
+
run: r + 1,
|
|
1638
|
+
totalRuns: runs
|
|
1639
|
+
});
|
|
1640
|
+
}
|
|
1641
|
+
if (results.length > 0) {
|
|
1642
|
+
connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
|
|
1643
|
+
type: "done",
|
|
1644
|
+
evalResultId: results[0].id,
|
|
1645
|
+
runGroupId
|
|
1646
|
+
});
|
|
1647
|
+
} else {
|
|
1648
|
+
connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
|
|
1649
|
+
type: "error",
|
|
1650
|
+
message: "All runs were cancelled"
|
|
1651
|
+
});
|
|
1652
|
+
}
|
|
1653
|
+
} else {
|
|
1654
|
+
const result = await runtime.runRegisteredEval(name, {
|
|
1655
|
+
signal: ac.signal,
|
|
1656
|
+
captureTraces,
|
|
1657
|
+
onProgress: (event) => {
|
|
1658
|
+
if (event.type === "run_done") return;
|
|
1659
|
+
connMgr.broadcastWithWildcard(`eval:${evalRunId}`, event);
|
|
1660
|
+
}
|
|
1661
|
+
});
|
|
1662
|
+
connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
|
|
1663
|
+
type: "done",
|
|
1664
|
+
evalResultId: result.id
|
|
1665
|
+
});
|
|
1666
|
+
}
|
|
1667
|
+
} catch (err) {
|
|
1668
|
+
connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
|
|
1669
|
+
type: "error",
|
|
1670
|
+
message: redactErrorMessage(err, redactOn)
|
|
1671
|
+
});
|
|
1672
|
+
} finally {
|
|
1673
|
+
activeRuns.delete(evalRunId);
|
|
1674
|
+
}
|
|
1675
|
+
})();
|
|
1676
|
+
return c.json({ ok: true, data: { evalRunId } });
|
|
1677
|
+
}
|
|
787
1678
|
try {
|
|
788
1679
|
if (runs > 1) {
|
|
789
1680
|
const { aggregateRuns } = await import("@axlsdk/eval");
|
|
@@ -791,27 +1682,53 @@ function createEvalRoutes(evalLoader) {
|
|
|
791
1682
|
const results = [];
|
|
792
1683
|
for (let r = 0; r < runs; r++) {
|
|
793
1684
|
const result2 = await runtime.runRegisteredEval(name, {
|
|
794
|
-
metadata: { runGroupId, runIndex: r }
|
|
1685
|
+
metadata: { runGroupId, runIndex: r },
|
|
1686
|
+
captureTraces
|
|
795
1687
|
});
|
|
796
1688
|
results.push(result2);
|
|
797
1689
|
}
|
|
798
1690
|
const typedResults = results;
|
|
799
1691
|
const aggregate = aggregateRuns(typedResults);
|
|
800
1692
|
const first = typedResults[0];
|
|
801
|
-
const result = {
|
|
802
|
-
|
|
1693
|
+
const result = {
|
|
1694
|
+
...first,
|
|
1695
|
+
_multiRun: { aggregate, allRuns: typedResults }
|
|
1696
|
+
};
|
|
1697
|
+
return c.json({
|
|
1698
|
+
ok: true,
|
|
1699
|
+
data: redactEvalResult(result, redactOn)
|
|
1700
|
+
});
|
|
803
1701
|
} else {
|
|
804
|
-
const result = await runtime.runRegisteredEval(name);
|
|
805
|
-
return c.json({
|
|
1702
|
+
const result = await runtime.runRegisteredEval(name, { captureTraces });
|
|
1703
|
+
return c.json({
|
|
1704
|
+
ok: true,
|
|
1705
|
+
data: redactEvalResult(result, redactOn)
|
|
1706
|
+
});
|
|
806
1707
|
}
|
|
807
1708
|
} catch (err) {
|
|
808
|
-
|
|
809
|
-
|
|
1709
|
+
return c.json(
|
|
1710
|
+
{ ok: false, error: { code: "EVAL_ERROR", message: redactErrorMessage(err, redactOn) } },
|
|
1711
|
+
400
|
|
1712
|
+
);
|
|
1713
|
+
}
|
|
1714
|
+
});
|
|
1715
|
+
app6.post("/evals/runs/:evalRunId/cancel", (c) => {
|
|
1716
|
+
const evalRunId = c.req.param("evalRunId");
|
|
1717
|
+
const ac = activeRuns.get(evalRunId);
|
|
1718
|
+
if (!ac) {
|
|
1719
|
+
return c.json(
|
|
1720
|
+
{ ok: false, error: { code: "NOT_FOUND", message: "No active eval run found" } },
|
|
1721
|
+
404
|
|
1722
|
+
);
|
|
810
1723
|
}
|
|
1724
|
+
ac.abort();
|
|
1725
|
+
activeRuns.delete(evalRunId);
|
|
1726
|
+
return c.json({ ok: true, data: { cancelled: true } });
|
|
811
1727
|
});
|
|
812
1728
|
app6.post("/evals/:name/rescore", async (c) => {
|
|
813
1729
|
if (evalLoader) await evalLoader();
|
|
814
1730
|
const runtime = c.get("runtime");
|
|
1731
|
+
const redactOn = runtime.isRedactEnabled();
|
|
815
1732
|
const name = c.req.param("name");
|
|
816
1733
|
const body = await c.req.json();
|
|
817
1734
|
if (!body.resultId || typeof body.resultId !== "string") {
|
|
@@ -849,19 +1766,29 @@ function createEvalRoutes(evalLoader) {
|
|
|
849
1766
|
timestamp: Date.now(),
|
|
850
1767
|
data: result
|
|
851
1768
|
});
|
|
852
|
-
return c.json({
|
|
1769
|
+
return c.json({
|
|
1770
|
+
ok: true,
|
|
1771
|
+
data: redactEvalResult(result, redactOn)
|
|
1772
|
+
});
|
|
853
1773
|
} catch (err) {
|
|
854
|
-
|
|
855
|
-
|
|
1774
|
+
return c.json(
|
|
1775
|
+
{ ok: false, error: { code: "EVAL_ERROR", message: redactErrorMessage(err, redactOn) } },
|
|
1776
|
+
400
|
|
1777
|
+
);
|
|
856
1778
|
}
|
|
857
1779
|
});
|
|
858
1780
|
app6.post("/evals/compare", async (c) => {
|
|
859
1781
|
const runtime = c.get("runtime");
|
|
1782
|
+
const redactOn = runtime.isRedactEnabled();
|
|
860
1783
|
const body = await c.req.json();
|
|
1784
|
+
const MAX_POOLED_RUNS = 25;
|
|
861
1785
|
const validateIdParam = (v, name) => {
|
|
862
1786
|
if (typeof v === "string") return v === "" ? `${name} must be non-empty` : null;
|
|
863
1787
|
if (Array.isArray(v)) {
|
|
864
1788
|
if (v.length === 0) return `${name} must be a non-empty array`;
|
|
1789
|
+
if (v.length > MAX_POOLED_RUNS) {
|
|
1790
|
+
return `${name} may contain at most ${MAX_POOLED_RUNS} ids (pooled comparison)`;
|
|
1791
|
+
}
|
|
865
1792
|
for (const elem of v) {
|
|
866
1793
|
if (typeof elem !== "string" || elem === "") {
|
|
867
1794
|
return `${name} array must contain only non-empty strings`;
|
|
@@ -924,8 +1851,13 @@ function createEvalRoutes(evalLoader) {
|
|
|
924
1851
|
const result = await runtime.evalCompare(baseline, candidate, body.options);
|
|
925
1852
|
return c.json({ ok: true, data: result });
|
|
926
1853
|
} catch (err) {
|
|
927
|
-
|
|
928
|
-
|
|
1854
|
+
return c.json(
|
|
1855
|
+
{
|
|
1856
|
+
ok: false,
|
|
1857
|
+
error: { code: "COMPARE_FAILED", message: redactErrorMessage(err, redactOn) }
|
|
1858
|
+
},
|
|
1859
|
+
400
|
|
1860
|
+
);
|
|
929
1861
|
}
|
|
930
1862
|
});
|
|
931
1863
|
app6.post("/evals/import", async (c) => {
|
|
@@ -987,7 +1919,11 @@ function createEvalRoutes(evalLoader) {
|
|
|
987
1919
|
});
|
|
988
1920
|
return c.json({ ok: true, data: { id, eval: evalName, timestamp } });
|
|
989
1921
|
});
|
|
990
|
-
|
|
1922
|
+
function closeActiveRuns() {
|
|
1923
|
+
for (const ac of activeRuns.values()) ac.abort();
|
|
1924
|
+
activeRuns.clear();
|
|
1925
|
+
}
|
|
1926
|
+
return { app: app6, closeActiveRuns };
|
|
991
1927
|
}
|
|
992
1928
|
|
|
993
1929
|
// src/server/routes/playground.ts
|
|
@@ -1021,34 +1957,50 @@ function createPlaygroundRoutes(connMgr) {
|
|
|
1021
1957
|
);
|
|
1022
1958
|
}
|
|
1023
1959
|
const sessionId = body.sessionId ?? `playground-${Date.now()}`;
|
|
1024
|
-
const executionId = `playground-${sessionId}-${Date.now()}`;
|
|
1025
1960
|
const store = runtime.getStateStore();
|
|
1026
1961
|
const history = await store.getSession(sessionId);
|
|
1027
1962
|
history.push({ role: "user", content: body.message });
|
|
1028
|
-
const
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
});
|
|
1963
|
+
const redactOn = runtime.isRedactEnabled();
|
|
1964
|
+
const ctx = runtime.createContext({ sessionHistory: history });
|
|
1965
|
+
const executionId = ctx.executionId;
|
|
1966
|
+
const traceListener = (event) => {
|
|
1967
|
+
if (event.executionId !== executionId) return;
|
|
1968
|
+
connMgr.broadcastWithWildcard(`execution:${executionId}`, redactStreamEvent(event, redactOn));
|
|
1969
|
+
};
|
|
1970
|
+
runtime.on("trace", traceListener);
|
|
1037
1971
|
(async () => {
|
|
1972
|
+
let stepCounter = Number.MAX_SAFE_INTEGER - 1;
|
|
1973
|
+
const terminalFields = () => ({
|
|
1974
|
+
executionId,
|
|
1975
|
+
step: stepCounter++,
|
|
1976
|
+
timestamp: Date.now()
|
|
1977
|
+
});
|
|
1038
1978
|
try {
|
|
1039
1979
|
const result = await ctx.ask(agent, body.message);
|
|
1040
1980
|
const resultText = typeof result === "string" ? result : JSON.stringify(result);
|
|
1041
1981
|
history.push({ role: "assistant", content: resultText });
|
|
1042
1982
|
await store.saveSession(sessionId, history);
|
|
1043
|
-
|
|
1983
|
+
const doneEvent = {
|
|
1984
|
+
...terminalFields(),
|
|
1044
1985
|
type: "done",
|
|
1045
|
-
data: resultText
|
|
1046
|
-
}
|
|
1986
|
+
data: { result: resultText }
|
|
1987
|
+
};
|
|
1988
|
+
connMgr.broadcastWithWildcard(
|
|
1989
|
+
`execution:${executionId}`,
|
|
1990
|
+
redactStreamEvent(doneEvent, redactOn)
|
|
1991
|
+
);
|
|
1047
1992
|
} catch (err) {
|
|
1048
|
-
|
|
1993
|
+
const errorEvent = {
|
|
1994
|
+
...terminalFields(),
|
|
1049
1995
|
type: "error",
|
|
1050
|
-
message: err instanceof Error ? err.message : String(err)
|
|
1051
|
-
}
|
|
1996
|
+
data: { message: err instanceof Error ? err.message : String(err) }
|
|
1997
|
+
};
|
|
1998
|
+
connMgr.broadcastWithWildcard(
|
|
1999
|
+
`execution:${executionId}`,
|
|
2000
|
+
redactStreamEvent(errorEvent, redactOn)
|
|
2001
|
+
);
|
|
2002
|
+
} finally {
|
|
2003
|
+
runtime.off("trace", traceListener);
|
|
1052
2004
|
}
|
|
1053
2005
|
})();
|
|
1054
2006
|
return c.json({
|
|
@@ -1059,12 +2011,78 @@ function createPlaygroundRoutes(connMgr) {
|
|
|
1059
2011
|
return app6;
|
|
1060
2012
|
}
|
|
1061
2013
|
|
|
2014
|
+
// src/server/routes/eval-trends.ts
|
|
2015
|
+
var import_hono12 = require("hono");
|
|
2016
|
+
function createEvalTrendsRoutes(aggregator) {
|
|
2017
|
+
const app6 = new import_hono12.Hono();
|
|
2018
|
+
app6.get("/eval-trends", (c) => {
|
|
2019
|
+
const window = parseWindowParam(c.req.query("window"));
|
|
2020
|
+
return c.json({ ok: true, data: aggregator.getSnapshot(window) });
|
|
2021
|
+
});
|
|
2022
|
+
return app6;
|
|
2023
|
+
}
|
|
2024
|
+
|
|
2025
|
+
// src/server/routes/workflow-stats.ts
|
|
2026
|
+
var import_hono13 = require("hono");
|
|
2027
|
+
function createWorkflowStatsRoutes(aggregator) {
|
|
2028
|
+
const app6 = new import_hono13.Hono();
|
|
2029
|
+
app6.get("/workflow-stats", (c) => {
|
|
2030
|
+
const window = parseWindowParam(c.req.query("window"));
|
|
2031
|
+
return c.json({ ok: true, data: enrichWorkflowStats(aggregator.getSnapshot(window)) });
|
|
2032
|
+
});
|
|
2033
|
+
return app6;
|
|
2034
|
+
}
|
|
2035
|
+
|
|
2036
|
+
// src/server/routes/trace-stats.ts
|
|
2037
|
+
var import_hono14 = require("hono");
|
|
2038
|
+
function createTraceStatsRoutes(aggregator) {
|
|
2039
|
+
const app6 = new import_hono14.Hono();
|
|
2040
|
+
app6.get("/trace-stats", (c) => {
|
|
2041
|
+
const window = parseWindowParam(c.req.query("window"));
|
|
2042
|
+
return c.json({ ok: true, data: aggregator.getSnapshot(window) });
|
|
2043
|
+
});
|
|
2044
|
+
return app6;
|
|
2045
|
+
}
|
|
2046
|
+
|
|
1062
2047
|
// src/server/index.ts
|
|
1063
2048
|
function createServer(options) {
|
|
1064
2049
|
const { runtime, staticRoot, basePath = "", readOnly = false } = options;
|
|
1065
|
-
const app6 = new
|
|
1066
|
-
const connMgr = new ConnectionManager();
|
|
1067
|
-
const
|
|
2050
|
+
const app6 = new import_hono15.Hono();
|
|
2051
|
+
const connMgr = new ConnectionManager(options.bufferCaps);
|
|
2052
|
+
const windows = ["24h", "7d", "30d", "all"];
|
|
2053
|
+
const costAggregator = new TraceAggregator({
|
|
2054
|
+
runtime,
|
|
2055
|
+
connMgr,
|
|
2056
|
+
channel: "costs",
|
|
2057
|
+
reducer: reduceCost,
|
|
2058
|
+
emptyState: emptyCostData,
|
|
2059
|
+
windows
|
|
2060
|
+
});
|
|
2061
|
+
const workflowStatsAggregator = new ExecutionAggregator({
|
|
2062
|
+
runtime,
|
|
2063
|
+
connMgr,
|
|
2064
|
+
channel: "workflow-stats",
|
|
2065
|
+
reducer: reduceWorkflowStats,
|
|
2066
|
+
emptyState: emptyWorkflowStatsData,
|
|
2067
|
+
windows,
|
|
2068
|
+
broadcastTransform: enrichWorkflowStats
|
|
2069
|
+
});
|
|
2070
|
+
const traceStatsAggregator = new TraceAggregator({
|
|
2071
|
+
runtime,
|
|
2072
|
+
connMgr,
|
|
2073
|
+
channel: "trace-stats",
|
|
2074
|
+
reducer: reduceTraceStats,
|
|
2075
|
+
emptyState: emptyTraceStatsData,
|
|
2076
|
+
windows
|
|
2077
|
+
});
|
|
2078
|
+
const evalTrendsAggregator = new EvalAggregator({
|
|
2079
|
+
runtime,
|
|
2080
|
+
connMgr,
|
|
2081
|
+
channel: "eval-trends",
|
|
2082
|
+
reducer: reduceEvalTrends,
|
|
2083
|
+
emptyState: emptyEvalTrendData,
|
|
2084
|
+
windows
|
|
2085
|
+
});
|
|
1068
2086
|
if (options.cors !== false) {
|
|
1069
2087
|
app6.use("*", (0, import_cors.cors)());
|
|
1070
2088
|
}
|
|
@@ -1082,11 +2100,11 @@ function createServer(options) {
|
|
|
1082
2100
|
/^PUT \/api\/memory(\/|$)/,
|
|
1083
2101
|
/^DELETE \/api\/memory(\/|$)/,
|
|
1084
2102
|
/^POST \/api\/decisions(\/|$)/,
|
|
1085
|
-
/^POST \/api\/costs(\/|$)/,
|
|
1086
2103
|
/^POST \/api\/tools(\/|$)/,
|
|
1087
2104
|
/^POST \/api\/evals\/import$/,
|
|
1088
2105
|
/^POST \/api\/evals\/[^/]+\/run$/,
|
|
1089
2106
|
/^POST \/api\/evals\/[^/]+\/rescore$/,
|
|
2107
|
+
/^POST \/api\/evals\/runs\/[^/]+\/cancel$/,
|
|
1090
2108
|
/^DELETE \/api\/evals\/history\/[^/]+$/,
|
|
1091
2109
|
/^POST \/api\/playground(\/|$)/
|
|
1092
2110
|
];
|
|
@@ -1106,7 +2124,7 @@ function createServer(options) {
|
|
|
1106
2124
|
await next();
|
|
1107
2125
|
});
|
|
1108
2126
|
}
|
|
1109
|
-
const api = new
|
|
2127
|
+
const api = new import_hono15.Hono();
|
|
1110
2128
|
api.route("/", createHealthRoutes(readOnly));
|
|
1111
2129
|
api.route("/", createWorkflowRoutes(connMgr));
|
|
1112
2130
|
api.route("/", executions_default);
|
|
@@ -1116,20 +2134,37 @@ function createServer(options) {
|
|
|
1116
2134
|
api.route("/", memory_default);
|
|
1117
2135
|
api.route("/", decisions_default);
|
|
1118
2136
|
api.route("/", createCostRoutes(costAggregator));
|
|
1119
|
-
api.route("/",
|
|
2137
|
+
api.route("/", createEvalTrendsRoutes(evalTrendsAggregator));
|
|
2138
|
+
api.route("/", createWorkflowStatsRoutes(workflowStatsAggregator));
|
|
2139
|
+
api.route("/", createTraceStatsRoutes(traceStatsAggregator));
|
|
2140
|
+
const { app: evalApp, closeActiveRuns } = createEvalRoutes(connMgr, options.evalLoader);
|
|
2141
|
+
api.route("/", evalApp);
|
|
1120
2142
|
api.route("/", createPlaygroundRoutes(connMgr));
|
|
1121
2143
|
app6.route("/api", api);
|
|
1122
2144
|
const traceListener = (event) => {
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
2145
|
+
try {
|
|
2146
|
+
const traceEvent = event;
|
|
2147
|
+
const redacted = redactStreamEvent(traceEvent, runtime.isRedactEnabled());
|
|
2148
|
+
if (traceEvent.executionId) {
|
|
2149
|
+
connMgr.broadcastWithWildcard(`trace:${traceEvent.executionId}`, redacted);
|
|
2150
|
+
}
|
|
2151
|
+
if (traceEvent.type === "await_human") {
|
|
2152
|
+
connMgr.broadcast("decisions", redacted);
|
|
2153
|
+
}
|
|
2154
|
+
} catch (err) {
|
|
2155
|
+
console.error(
|
|
2156
|
+
"[axl-studio] trace listener threw; event dropped:",
|
|
2157
|
+
err instanceof Error ? err.message : String(err)
|
|
2158
|
+
);
|
|
1130
2159
|
}
|
|
1131
2160
|
};
|
|
1132
2161
|
runtime.on("trace", traceListener);
|
|
2162
|
+
const aggregatorStartPromise = Promise.all([
|
|
2163
|
+
costAggregator.start(),
|
|
2164
|
+
workflowStatsAggregator.start(),
|
|
2165
|
+
traceStatsAggregator.start(),
|
|
2166
|
+
evalTrendsAggregator.start()
|
|
2167
|
+
]).catch((err) => console.error("[axl-studio] aggregator start failed:", err));
|
|
1133
2168
|
if (staticRoot) {
|
|
1134
2169
|
const indexPath = (0, import_node_path.resolve)(staticRoot, "index.html");
|
|
1135
2170
|
let spaHtml;
|
|
@@ -1179,9 +2214,22 @@ function createServer(options) {
|
|
|
1179
2214
|
app: app6,
|
|
1180
2215
|
connMgr,
|
|
1181
2216
|
costAggregator,
|
|
2217
|
+
workflowStatsAggregator,
|
|
2218
|
+
traceStatsAggregator,
|
|
2219
|
+
evalTrendsAggregator,
|
|
2220
|
+
aggregatorStartPromise,
|
|
1182
2221
|
/** Create WS handlers. Call before registering static/SPA routes are reached. */
|
|
1183
2222
|
createWsHandlers: () => createWsHandlers(connMgr),
|
|
1184
|
-
traceListener
|
|
2223
|
+
traceListener,
|
|
2224
|
+
/** Abort all active streaming eval runs. */
|
|
2225
|
+
closeActiveRuns,
|
|
2226
|
+
/** Close all aggregators (clear intervals and unsubscribe listeners). */
|
|
2227
|
+
closeAggregators: () => {
|
|
2228
|
+
costAggregator.close();
|
|
2229
|
+
workflowStatsAggregator.close();
|
|
2230
|
+
traceStatsAggregator.close();
|
|
2231
|
+
evalTrendsAggregator.close();
|
|
2232
|
+
}
|
|
1185
2233
|
};
|
|
1186
2234
|
}
|
|
1187
2235
|
|