@axlsdk/studio 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +69 -10
- package/dist/chunk-RE6VPUXA.js +2213 -0
- package/dist/chunk-RE6VPUXA.js.map +1 -0
- package/dist/cli.cjs +1191 -143
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +1 -1
- package/dist/client/assets/index-ClajLxib.js +288 -0
- package/dist/client/assets/index-DnHL_gtF.css +1 -0
- package/dist/client/index.html +2 -2
- package/dist/connection-manager-DAuqk9lM.d.cts +166 -0
- package/dist/connection-manager-DAuqk9lM.d.ts +166 -0
- package/dist/middleware.cjs +1222 -150
- package/dist/middleware.cjs.map +1 -1
- package/dist/middleware.d.cts +76 -6
- package/dist/middleware.d.ts +76 -6
- package/dist/middleware.js +32 -8
- package/dist/middleware.js.map +1 -1
- package/dist/server/index.cjs +1194 -142
- package/dist/server/index.cjs.map +1 -1
- package/dist/server/index.d.cts +171 -28
- package/dist/server/index.d.ts +171 -28
- package/dist/server/index.js +7 -3
- package/package.json +13 -9
- package/dist/chunk-HUKUQDYL.js +0 -1163
- package/dist/chunk-HUKUQDYL.js.map +0 -1
- package/dist/client/assets/index-7aDhMztu.css +0 -1
- package/dist/client/assets/index-Bzr3vDPz.js +0 -255
- package/dist/connection-manager-B7AWpsCD.d.cts +0 -81
- package/dist/connection-manager-B7AWpsCD.d.ts +0 -81
package/dist/server/index.cjs
CHANGED
|
@@ -31,22 +31,148 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
31
31
|
var server_exports = {};
|
|
32
32
|
__export(server_exports, {
|
|
33
33
|
ConnectionManager: () => ConnectionManager,
|
|
34
|
-
|
|
34
|
+
EvalAggregator: () => EvalAggregator,
|
|
35
|
+
ExecutionAggregator: () => ExecutionAggregator,
|
|
36
|
+
TraceAggregator: () => TraceAggregator,
|
|
35
37
|
createServer: () => createServer
|
|
36
38
|
});
|
|
37
39
|
module.exports = __toCommonJS(server_exports);
|
|
38
40
|
var import_node_fs = require("fs");
|
|
39
41
|
var import_node_path = require("path");
|
|
40
|
-
var
|
|
42
|
+
var import_hono15 = require("hono");
|
|
41
43
|
var import_cors = require("hono/cors");
|
|
42
44
|
var import_serve_static = require("@hono/node-server/serve-static");
|
|
43
45
|
|
|
46
|
+
// src/server/redact.ts
|
|
47
|
+
var import_axl = require("@axlsdk/axl");
|
|
48
|
+
var REDACTED = "[redacted]";
|
|
49
|
+
var SAFE_ERROR_NAMES = /* @__PURE__ */ new Set([
|
|
50
|
+
"QuorumNotMet",
|
|
51
|
+
"NoConsensus",
|
|
52
|
+
"TimeoutError",
|
|
53
|
+
"MaxTurnsError",
|
|
54
|
+
"BudgetExceededError",
|
|
55
|
+
"ToolDenied"
|
|
56
|
+
]);
|
|
57
|
+
function redactErrorMessage(err, redact) {
|
|
58
|
+
const raw = err instanceof Error ? err.message : String(err);
|
|
59
|
+
if (!redact) return raw;
|
|
60
|
+
const name = err instanceof Error ? err.name : "";
|
|
61
|
+
return SAFE_ERROR_NAMES.has(name) ? raw : REDACTED;
|
|
62
|
+
}
|
|
63
|
+
function redactValue(value, redact) {
|
|
64
|
+
if (!redact) return value;
|
|
65
|
+
return REDACTED;
|
|
66
|
+
}
|
|
67
|
+
function redactExecutionInfo(info, redact) {
|
|
68
|
+
if (!redact) return info;
|
|
69
|
+
return {
|
|
70
|
+
...info,
|
|
71
|
+
...info.result !== void 0 ? { result: REDACTED } : {},
|
|
72
|
+
...info.error !== void 0 ? { error: REDACTED } : {},
|
|
73
|
+
events: info.events.map((e) => redactStreamEvent(e, true))
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
function redactExecutionList(infos, redact) {
|
|
77
|
+
if (!redact) return infos;
|
|
78
|
+
return infos.map((info) => redactExecutionInfo(info, redact));
|
|
79
|
+
}
|
|
80
|
+
function redactMemoryValue(value, redact) {
|
|
81
|
+
if (!redact) return value;
|
|
82
|
+
return REDACTED;
|
|
83
|
+
}
|
|
84
|
+
function redactMemoryList(entries, redact) {
|
|
85
|
+
if (!redact) return entries;
|
|
86
|
+
return entries.map((entry) => ({ key: entry.key, value: REDACTED }));
|
|
87
|
+
}
|
|
88
|
+
function redactChatMessage(msg) {
|
|
89
|
+
const scrubbed = {
|
|
90
|
+
role: msg.role,
|
|
91
|
+
content: REDACTED,
|
|
92
|
+
...msg.name !== void 0 ? { name: msg.name } : {},
|
|
93
|
+
...msg.tool_call_id !== void 0 ? { tool_call_id: msg.tool_call_id } : {},
|
|
94
|
+
...msg.tool_calls !== void 0 ? {
|
|
95
|
+
tool_calls: msg.tool_calls.map((tc) => ({
|
|
96
|
+
id: tc.id,
|
|
97
|
+
type: tc.type,
|
|
98
|
+
function: {
|
|
99
|
+
name: tc.function.name,
|
|
100
|
+
arguments: REDACTED
|
|
101
|
+
}
|
|
102
|
+
}))
|
|
103
|
+
} : {}
|
|
104
|
+
// providerMetadata deliberately omitted — opaque content.
|
|
105
|
+
};
|
|
106
|
+
return scrubbed;
|
|
107
|
+
}
|
|
108
|
+
function redactSessionHistory(history, redact) {
|
|
109
|
+
if (!redact) return history;
|
|
110
|
+
return history.map(redactChatMessage);
|
|
111
|
+
}
|
|
112
|
+
function redactStreamEvent(event, redact) {
|
|
113
|
+
if (!redact) return event;
|
|
114
|
+
return (0, import_axl.redactEvent)(event);
|
|
115
|
+
}
|
|
116
|
+
function redactEvalItem(item) {
|
|
117
|
+
const scrubbed = {
|
|
118
|
+
...item,
|
|
119
|
+
input: REDACTED,
|
|
120
|
+
output: REDACTED,
|
|
121
|
+
...item.annotations !== void 0 ? { annotations: REDACTED } : {},
|
|
122
|
+
...item.error !== void 0 ? { error: REDACTED } : {},
|
|
123
|
+
...item.scorerErrors !== void 0 ? { scorerErrors: item.scorerErrors.map(() => REDACTED) } : {}
|
|
124
|
+
};
|
|
125
|
+
if (item.scoreDetails) {
|
|
126
|
+
const detailsOut = {};
|
|
127
|
+
for (const [name, detail] of Object.entries(item.scoreDetails)) {
|
|
128
|
+
detailsOut[name] = {
|
|
129
|
+
score: detail.score,
|
|
130
|
+
...detail.duration !== void 0 ? { duration: detail.duration } : {},
|
|
131
|
+
...detail.cost !== void 0 ? { cost: detail.cost } : {}
|
|
132
|
+
// metadata deliberately omitted — may contain LLM scorer reasoning
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
scrubbed.scoreDetails = detailsOut;
|
|
136
|
+
}
|
|
137
|
+
return scrubbed;
|
|
138
|
+
}
|
|
139
|
+
function redactEvalResult(result, redact) {
|
|
140
|
+
if (!redact) return result;
|
|
141
|
+
return {
|
|
142
|
+
...result,
|
|
143
|
+
items: result.items.map(redactEvalItem)
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
function redactEvalHistoryEntry(entry, redact) {
|
|
147
|
+
if (!redact) return entry;
|
|
148
|
+
return {
|
|
149
|
+
...entry,
|
|
150
|
+
data: redactEvalResult(entry.data, redact)
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
function redactEvalHistoryList(entries, redact) {
|
|
154
|
+
if (!redact) return entries;
|
|
155
|
+
return entries.map((e) => redactEvalHistoryEntry(e, redact));
|
|
156
|
+
}
|
|
157
|
+
function redactPendingDecision(decision, redact) {
|
|
158
|
+
if (!redact) return decision;
|
|
159
|
+
return {
|
|
160
|
+
...decision,
|
|
161
|
+
prompt: REDACTED,
|
|
162
|
+
...decision.metadata !== void 0 ? { metadata: { redacted: true } } : {}
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
function redactPendingDecisionList(decisions, redact) {
|
|
166
|
+
if (!redact) return decisions;
|
|
167
|
+
return decisions.map((d) => redactPendingDecision(d, redact));
|
|
168
|
+
}
|
|
169
|
+
|
|
44
170
|
// src/server/middleware/error-handler.ts
|
|
45
171
|
async function errorHandler(c, next) {
|
|
46
172
|
try {
|
|
47
173
|
await next();
|
|
48
174
|
} catch (err) {
|
|
49
|
-
const
|
|
175
|
+
const rawMessage = err instanceof Error ? err.message : String(err);
|
|
50
176
|
const code = err.code ?? "INTERNAL_ERROR";
|
|
51
177
|
let status = 500;
|
|
52
178
|
if ("status" in err) {
|
|
@@ -54,46 +180,104 @@ async function errorHandler(c, next) {
|
|
|
54
180
|
if (typeof errStatus === "number" && errStatus >= 400 && errStatus < 600) {
|
|
55
181
|
status = errStatus;
|
|
56
182
|
}
|
|
57
|
-
} else if (code === "NOT_FOUND" ||
|
|
183
|
+
} else if (code === "NOT_FOUND" || rawMessage.includes("not found") || rawMessage.includes("not registered")) {
|
|
58
184
|
status = 404;
|
|
59
|
-
} else if (code === "VALIDATION_ERROR" ||
|
|
185
|
+
} else if (code === "VALIDATION_ERROR" || rawMessage.includes("Expected") || rawMessage.includes("invalid")) {
|
|
60
186
|
status = 400;
|
|
61
187
|
}
|
|
188
|
+
const runtime = c.get("runtime");
|
|
189
|
+
const redactOn = runtime?.isRedactEnabled?.() ?? false;
|
|
62
190
|
const body = {
|
|
63
191
|
ok: false,
|
|
64
|
-
error: { code, message }
|
|
192
|
+
error: { code, message: redactErrorMessage(err, redactOn) }
|
|
65
193
|
};
|
|
66
194
|
return c.json(body, status);
|
|
67
195
|
}
|
|
68
196
|
}
|
|
69
197
|
|
|
70
198
|
// src/server/ws/connection-manager.ts
|
|
199
|
+
var BUFFER_TTL_MS = 3e4;
|
|
200
|
+
var DEFAULT_MAX_BUFFER_EVENTS = 1e3;
|
|
201
|
+
var DEFAULT_MAX_BUFFER_BYTES = 4 * 1024 * 1024;
|
|
202
|
+
var DEFAULT_MAX_ACTIVE_BUFFERS = 256;
|
|
203
|
+
var UNBUFFERED_EVENT_TYPES = /* @__PURE__ */ new Set(["token", "partial_object"]);
|
|
204
|
+
var MAX_WS_FRAME_BYTES = 65536;
|
|
71
205
|
function isBufferedChannel(channel) {
|
|
72
|
-
return channel.startsWith("execution:");
|
|
206
|
+
return channel.startsWith("execution:") || channel.startsWith("eval:");
|
|
207
|
+
}
|
|
208
|
+
function truncateIfOversized(msg, channel, data) {
|
|
209
|
+
const msgBytes = Buffer.byteLength(msg, "utf8");
|
|
210
|
+
if (msgBytes <= MAX_WS_FRAME_BYTES) return msg;
|
|
211
|
+
const event = data ?? {};
|
|
212
|
+
const truncated = {
|
|
213
|
+
type: "event",
|
|
214
|
+
channel,
|
|
215
|
+
data: {
|
|
216
|
+
...event,
|
|
217
|
+
data: {
|
|
218
|
+
__truncated: true,
|
|
219
|
+
originalBytes: msgBytes,
|
|
220
|
+
maxBytes: MAX_WS_FRAME_BYTES,
|
|
221
|
+
hint: "Event exceeded WS frame budget (likely a verbose agent_call with a large messages[] snapshot). Fetch via REST if you need the full payload."
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
};
|
|
225
|
+
return JSON.stringify(truncated);
|
|
73
226
|
}
|
|
74
|
-
var BUFFER_TTL_MS = 3e4;
|
|
75
|
-
var MAX_BUFFER_EVENTS = 500;
|
|
76
227
|
var ConnectionManager = class {
|
|
77
228
|
/** channel -> set of WS connections */
|
|
78
229
|
channels = /* @__PURE__ */ new Map();
|
|
79
|
-
/** ws ->
|
|
230
|
+
/** ws -> subscribed channels + optional integrator-supplied metadata */
|
|
80
231
|
connections = /* @__PURE__ */ new Map();
|
|
81
232
|
/** channel -> replay buffer for execution streams */
|
|
82
233
|
buffers = /* @__PURE__ */ new Map();
|
|
83
234
|
maxConnections = 100;
|
|
235
|
+
filter;
|
|
236
|
+
/** Resolved replay-buffer caps. Per-instance so embedders can dial them
|
|
237
|
+
* without monkey-patching module-level constants. */
|
|
238
|
+
maxEventsPerBuffer;
|
|
239
|
+
maxBytesPerBuffer;
|
|
240
|
+
maxActiveBuffers;
|
|
241
|
+
constructor(bufferCaps) {
|
|
242
|
+
const validatePositiveInt = (key, value) => {
|
|
243
|
+
if (value === void 0) return;
|
|
244
|
+
if (!Number.isFinite(value) || !Number.isInteger(value) || value < 1) {
|
|
245
|
+
throw new RangeError(`bufferCaps.${key} must be a positive integer (>= 1); got ${value}`);
|
|
246
|
+
}
|
|
247
|
+
};
|
|
248
|
+
validatePositiveInt("maxEventsPerBuffer", bufferCaps?.maxEventsPerBuffer);
|
|
249
|
+
validatePositiveInt("maxBytesPerBuffer", bufferCaps?.maxBytesPerBuffer);
|
|
250
|
+
validatePositiveInt("maxActiveBuffers", bufferCaps?.maxActiveBuffers);
|
|
251
|
+
this.maxEventsPerBuffer = bufferCaps?.maxEventsPerBuffer ?? DEFAULT_MAX_BUFFER_EVENTS;
|
|
252
|
+
this.maxBytesPerBuffer = bufferCaps?.maxBytesPerBuffer ?? DEFAULT_MAX_BUFFER_BYTES;
|
|
253
|
+
this.maxActiveBuffers = bufferCaps?.maxActiveBuffers ?? DEFAULT_MAX_ACTIVE_BUFFERS;
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* Register a broadcast filter. Called once at middleware construction.
|
|
257
|
+
* The filter runs on every outbound event and can drop or deliver based
|
|
258
|
+
* on the destination connection's metadata.
|
|
259
|
+
*/
|
|
260
|
+
setFilter(filter) {
|
|
261
|
+
this.filter = filter;
|
|
262
|
+
}
|
|
263
|
+
/** Attach integrator-supplied metadata to an already-added connection. */
|
|
264
|
+
setMetadata(ws, metadata) {
|
|
265
|
+
const entry = this.connections.get(ws);
|
|
266
|
+
if (entry) entry.metadata = metadata;
|
|
267
|
+
}
|
|
84
268
|
/** Register a new WS connection. */
|
|
85
269
|
add(ws) {
|
|
86
270
|
if (this.connections.size >= this.maxConnections) {
|
|
87
271
|
ws.close?.();
|
|
88
272
|
return;
|
|
89
273
|
}
|
|
90
|
-
this.connections.set(ws, /* @__PURE__ */ new Set());
|
|
274
|
+
this.connections.set(ws, { channels: /* @__PURE__ */ new Set() });
|
|
91
275
|
}
|
|
92
276
|
/** Remove a WS connection and all its subscriptions. */
|
|
93
277
|
remove(ws) {
|
|
94
|
-
const
|
|
95
|
-
if (
|
|
96
|
-
for (const ch of channels) {
|
|
278
|
+
const entry = this.connections.get(ws);
|
|
279
|
+
if (entry) {
|
|
280
|
+
for (const ch of entry.channels) {
|
|
97
281
|
this.channels.get(ch)?.delete(ws);
|
|
98
282
|
if (this.channels.get(ch)?.size === 0) {
|
|
99
283
|
this.channels.delete(ch);
|
|
@@ -111,12 +295,20 @@ var ConnectionManager = class {
|
|
|
111
295
|
this.channels.set(channel, subs);
|
|
112
296
|
}
|
|
113
297
|
subs.add(ws);
|
|
114
|
-
this.connections.get(ws).add(channel);
|
|
298
|
+
this.connections.get(ws).channels.add(channel);
|
|
115
299
|
const buffer = this.buffers.get(channel);
|
|
116
300
|
if (buffer) {
|
|
117
|
-
|
|
301
|
+
const metadata = this.connections.get(ws)?.metadata;
|
|
302
|
+
for (const event of buffer.events) {
|
|
303
|
+
if (this.filter) {
|
|
304
|
+
try {
|
|
305
|
+
if (!this.filter(event.data, metadata)) continue;
|
|
306
|
+
} catch {
|
|
307
|
+
continue;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
118
310
|
try {
|
|
119
|
-
ws.send(msg);
|
|
311
|
+
ws.send(event.msg);
|
|
120
312
|
} catch {
|
|
121
313
|
this.remove(ws);
|
|
122
314
|
return;
|
|
@@ -130,21 +322,49 @@ var ConnectionManager = class {
|
|
|
130
322
|
if (this.channels.get(channel)?.size === 0) {
|
|
131
323
|
this.channels.delete(channel);
|
|
132
324
|
}
|
|
133
|
-
this.connections.get(ws)?.delete(channel);
|
|
325
|
+
this.connections.get(ws)?.channels.delete(channel);
|
|
134
326
|
}
|
|
135
327
|
/** Broadcast data to all subscribers of a channel. Buffers events for execution channels. */
|
|
136
328
|
broadcast(channel, data) {
|
|
137
|
-
const msg =
|
|
329
|
+
const msg = truncateIfOversized(
|
|
330
|
+
JSON.stringify({ type: "event", channel, data }),
|
|
331
|
+
channel,
|
|
332
|
+
data
|
|
333
|
+
);
|
|
138
334
|
if (isBufferedChannel(channel)) {
|
|
139
335
|
let buffer = this.buffers.get(channel);
|
|
140
336
|
if (!buffer) {
|
|
141
|
-
|
|
337
|
+
if (this.buffers.size >= this.maxActiveBuffers) {
|
|
338
|
+
let victim;
|
|
339
|
+
for (const [ch, buf] of this.buffers) {
|
|
340
|
+
if (buf.complete) {
|
|
341
|
+
victim = ch;
|
|
342
|
+
break;
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
if (victim === void 0) {
|
|
346
|
+
victim = this.buffers.keys().next().value;
|
|
347
|
+
}
|
|
348
|
+
if (victim !== void 0) {
|
|
349
|
+
const old = this.buffers.get(victim);
|
|
350
|
+
if (old?.timer) clearTimeout(old.timer);
|
|
351
|
+
this.buffers.delete(victim);
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
buffer = { events: [], complete: false, bytes: 0 };
|
|
142
355
|
this.buffers.set(channel, buffer);
|
|
143
356
|
}
|
|
144
357
|
const event = data;
|
|
145
358
|
const isTerminal = event.type === "done" || event.type === "error";
|
|
146
|
-
|
|
147
|
-
|
|
359
|
+
const isUnbuffered = event.type !== void 0 && UNBUFFERED_EVENT_TYPES.has(event.type);
|
|
360
|
+
if (!isUnbuffered) {
|
|
361
|
+
const msgBytes = Buffer.byteLength(msg, "utf8");
|
|
362
|
+
const atCountCap = buffer.events.length >= this.maxEventsPerBuffer;
|
|
363
|
+
const atByteCap = buffer.bytes + msgBytes > this.maxBytesPerBuffer;
|
|
364
|
+
if (isTerminal || !atCountCap && !atByteCap) {
|
|
365
|
+
buffer.events.push({ msg, data });
|
|
366
|
+
buffer.bytes += msgBytes;
|
|
367
|
+
}
|
|
148
368
|
}
|
|
149
369
|
if (isTerminal) {
|
|
150
370
|
buffer.complete = true;
|
|
@@ -157,6 +377,14 @@ var ConnectionManager = class {
|
|
|
157
377
|
const subs = this.channels.get(channel);
|
|
158
378
|
if (!subs || subs.size === 0) return;
|
|
159
379
|
for (const ws of [...subs]) {
|
|
380
|
+
if (this.filter) {
|
|
381
|
+
const metadata = this.connections.get(ws)?.metadata;
|
|
382
|
+
try {
|
|
383
|
+
if (!this.filter(data, metadata)) continue;
|
|
384
|
+
} catch {
|
|
385
|
+
continue;
|
|
386
|
+
}
|
|
387
|
+
}
|
|
160
388
|
try {
|
|
161
389
|
ws.send(msg);
|
|
162
390
|
} catch {
|
|
@@ -172,8 +400,20 @@ var ConnectionManager = class {
|
|
|
172
400
|
const wildcardChannel = channel.substring(0, colonIdx) + ":*";
|
|
173
401
|
const subs = this.channels.get(wildcardChannel);
|
|
174
402
|
if (!subs || subs.size === 0) return;
|
|
175
|
-
const msg =
|
|
403
|
+
const msg = truncateIfOversized(
|
|
404
|
+
JSON.stringify({ type: "event", channel, data }),
|
|
405
|
+
channel,
|
|
406
|
+
data
|
|
407
|
+
);
|
|
176
408
|
for (const ws of [...subs]) {
|
|
409
|
+
if (this.filter) {
|
|
410
|
+
const metadata = this.connections.get(ws)?.metadata;
|
|
411
|
+
try {
|
|
412
|
+
if (!this.filter(data, metadata)) continue;
|
|
413
|
+
} catch {
|
|
414
|
+
continue;
|
|
415
|
+
}
|
|
416
|
+
}
|
|
177
417
|
try {
|
|
178
418
|
ws.send(msg);
|
|
179
419
|
} catch {
|
|
@@ -205,11 +445,11 @@ var ConnectionManager = class {
|
|
|
205
445
|
};
|
|
206
446
|
|
|
207
447
|
// src/server/ws/protocol.ts
|
|
208
|
-
var VALID_CHANNEL_PREFIXES = ["execution:", "trace:"];
|
|
209
|
-
var VALID_EXACT_CHANNELS = ["costs", "decisions"];
|
|
448
|
+
var VALID_CHANNEL_PREFIXES = ["execution:", "trace:", "eval:"];
|
|
449
|
+
var VALID_EXACT_CHANNELS = ["costs", "decisions", "eval-trends", "workflow-stats", "trace-stats"];
|
|
210
450
|
var MAX_CHANNEL_LENGTH = 256;
|
|
211
451
|
function handleWsMessage(raw, socket, connMgr) {
|
|
212
|
-
if (raw
|
|
452
|
+
if (Buffer.byteLength(raw, "utf8") > MAX_WS_FRAME_BYTES) {
|
|
213
453
|
return JSON.stringify({ type: "error", message: "Message too large" });
|
|
214
454
|
}
|
|
215
455
|
let msg;
|
|
@@ -269,68 +509,575 @@ function createWsHandlers(connMgr) {
|
|
|
269
509
|
};
|
|
270
510
|
}
|
|
271
511
|
|
|
272
|
-
// src/server/
|
|
273
|
-
var
|
|
274
|
-
|
|
512
|
+
// src/server/aggregates/aggregate-snapshots.ts
|
|
513
|
+
var WINDOW_MS = {
|
|
514
|
+
"24h": 24 * 60 * 60 * 1e3,
|
|
515
|
+
"7d": 7 * 24 * 60 * 60 * 1e3,
|
|
516
|
+
"30d": 30 * 24 * 60 * 60 * 1e3,
|
|
517
|
+
all: Number.POSITIVE_INFINITY
|
|
518
|
+
};
|
|
519
|
+
function withinWindow(ts, window, now) {
|
|
520
|
+
return ts >= now - WINDOW_MS[window];
|
|
521
|
+
}
|
|
522
|
+
var REBUILD_INTERVAL_MS = 5 * 6e4;
|
|
523
|
+
var ALL_WINDOWS = new Set(Object.keys(WINDOW_MS));
|
|
524
|
+
function parseWindowParam(raw, fallback = "7d") {
|
|
525
|
+
return raw && ALL_WINDOWS.has(raw) ? raw : fallback;
|
|
526
|
+
}
|
|
527
|
+
var AggregateSnapshots = class {
|
|
528
|
+
constructor(windows, emptyState, connMgr, channel, broadcastTransform) {
|
|
529
|
+
this.windows = windows;
|
|
530
|
+
this.emptyState = emptyState;
|
|
275
531
|
this.connMgr = connMgr;
|
|
532
|
+
this.channel = channel;
|
|
533
|
+
this.broadcastTransform = broadcastTransform;
|
|
534
|
+
this.snapshots = new Map(windows.map((w) => [w, emptyState()]));
|
|
276
535
|
}
|
|
277
|
-
|
|
536
|
+
snapshots;
|
|
537
|
+
/** Replace all snapshots atomically — used after a full rebuild. */
|
|
538
|
+
replace(fresh) {
|
|
539
|
+
this.snapshots = fresh;
|
|
540
|
+
this.broadcast();
|
|
541
|
+
}
|
|
542
|
+
/** Apply a reducer update to every window where `ts` falls inside the window. */
|
|
543
|
+
fold(ts, update) {
|
|
544
|
+
const now = Date.now();
|
|
545
|
+
let changed = false;
|
|
546
|
+
for (const window of this.windows) {
|
|
547
|
+
if (withinWindow(ts, window, now)) {
|
|
548
|
+
const prev = this.snapshots.get(window);
|
|
549
|
+
this.snapshots.set(window, update(prev));
|
|
550
|
+
changed = true;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
if (changed) this.broadcast();
|
|
554
|
+
}
|
|
555
|
+
get(window) {
|
|
556
|
+
return this.snapshots.get(window) ?? this.emptyState();
|
|
557
|
+
}
|
|
558
|
+
getAll() {
|
|
559
|
+
return Object.fromEntries(this.snapshots);
|
|
560
|
+
}
|
|
561
|
+
broadcast() {
|
|
562
|
+
const snapshots = this.broadcastTransform ? Object.fromEntries(
|
|
563
|
+
this.windows.map((w) => [w, this.broadcastTransform(this.snapshots.get(w))])
|
|
564
|
+
) : this.getAll();
|
|
565
|
+
this.connMgr.broadcast(this.channel, {
|
|
566
|
+
snapshots,
|
|
567
|
+
updatedAt: Date.now()
|
|
568
|
+
});
|
|
569
|
+
}
|
|
570
|
+
};
|
|
571
|
+
|
|
572
|
+
// src/server/aggregates/trace-aggregator.ts
|
|
573
|
+
var TraceAggregator = class {
|
|
574
|
+
snaps;
|
|
575
|
+
interval;
|
|
576
|
+
listener;
|
|
577
|
+
options;
|
|
578
|
+
constructor(options) {
|
|
579
|
+
this.options = options;
|
|
580
|
+
this.snaps = new AggregateSnapshots(
|
|
581
|
+
options.windows,
|
|
582
|
+
options.emptyState,
|
|
583
|
+
options.connMgr,
|
|
584
|
+
options.channel,
|
|
585
|
+
options.broadcastTransform
|
|
586
|
+
);
|
|
587
|
+
}
|
|
588
|
+
async start() {
|
|
589
|
+
await this.rebuild();
|
|
590
|
+
this.listener = (event) => {
|
|
591
|
+
this.snaps.fold(event.timestamp, (prev) => this.options.reducer(prev, event));
|
|
592
|
+
};
|
|
593
|
+
this.options.runtime.on("trace", this.listener);
|
|
594
|
+
this.interval = setInterval(
|
|
595
|
+
() => this.rebuild().catch((err) => console.error("[axl-studio] rebuild failed:", err)),
|
|
596
|
+
REBUILD_INTERVAL_MS
|
|
597
|
+
);
|
|
598
|
+
}
|
|
599
|
+
async rebuild() {
|
|
600
|
+
const executions = await this.options.runtime.getExecutions();
|
|
601
|
+
const cap = this.options.executionCap ?? 2e3;
|
|
602
|
+
const capped = executions.slice(0, cap);
|
|
603
|
+
const now = Date.now();
|
|
604
|
+
const fresh = new Map(
|
|
605
|
+
this.options.windows.map((w) => [w, this.options.emptyState()])
|
|
606
|
+
);
|
|
607
|
+
for (const exec of capped) {
|
|
608
|
+
for (const event of exec.events) {
|
|
609
|
+
for (const window of this.options.windows) {
|
|
610
|
+
if (withinWindow(event.timestamp, window, now)) {
|
|
611
|
+
fresh.set(window, this.options.reducer(fresh.get(window), event));
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
this.snaps.replace(fresh);
|
|
617
|
+
}
|
|
618
|
+
getSnapshot(window) {
|
|
619
|
+
return this.snaps.get(window);
|
|
620
|
+
}
|
|
621
|
+
getAllSnapshots() {
|
|
622
|
+
return this.snaps.getAll();
|
|
623
|
+
}
|
|
624
|
+
close() {
|
|
625
|
+
if (this.listener) this.options.runtime.off("trace", this.listener);
|
|
626
|
+
if (this.interval) clearInterval(this.interval);
|
|
627
|
+
}
|
|
628
|
+
};
|
|
629
|
+
|
|
630
|
+
// src/server/aggregates/execution-aggregator.ts
|
|
631
|
+
var ExecutionAggregator = class {
|
|
632
|
+
snaps;
|
|
633
|
+
interval;
|
|
634
|
+
listener;
|
|
635
|
+
options;
|
|
636
|
+
/** Generation counter to prevent stale async fold after rebuild. */
|
|
637
|
+
generation = 0;
|
|
638
|
+
constructor(options) {
|
|
639
|
+
this.options = options;
|
|
640
|
+
this.snaps = new AggregateSnapshots(
|
|
641
|
+
options.windows,
|
|
642
|
+
options.emptyState,
|
|
643
|
+
options.connMgr,
|
|
644
|
+
options.channel,
|
|
645
|
+
options.broadcastTransform
|
|
646
|
+
);
|
|
647
|
+
}
|
|
648
|
+
async start() {
|
|
649
|
+
await this.rebuild();
|
|
650
|
+
this.listener = (event) => {
|
|
651
|
+
if (event.type !== "workflow_end") return;
|
|
652
|
+
const gen = this.generation;
|
|
653
|
+
this.options.runtime.getExecution(event.executionId).then((exec) => {
|
|
654
|
+
if (this.generation !== gen) return;
|
|
655
|
+
if (exec) {
|
|
656
|
+
this.snaps.fold(exec.startedAt, (prev) => this.options.reducer(prev, exec));
|
|
657
|
+
}
|
|
658
|
+
}).catch((err) => console.error("[axl-studio] execution fold failed:", err));
|
|
659
|
+
};
|
|
660
|
+
this.options.runtime.on("trace", this.listener);
|
|
661
|
+
this.interval = setInterval(
|
|
662
|
+
() => this.rebuild().catch((err) => console.error("[axl-studio] rebuild failed:", err)),
|
|
663
|
+
REBUILD_INTERVAL_MS
|
|
664
|
+
);
|
|
665
|
+
}
|
|
666
|
+
async rebuild() {
|
|
667
|
+
this.generation++;
|
|
668
|
+
const executions = await this.options.runtime.getExecutions();
|
|
669
|
+
const cap = this.options.executionCap ?? 2e3;
|
|
670
|
+
const capped = executions.slice(0, cap);
|
|
671
|
+
const now = Date.now();
|
|
672
|
+
const fresh = new Map(
|
|
673
|
+
this.options.windows.map((w) => [w, this.options.emptyState()])
|
|
674
|
+
);
|
|
675
|
+
for (const exec of capped) {
|
|
676
|
+
for (const window of this.options.windows) {
|
|
677
|
+
if (withinWindow(exec.startedAt, window, now)) {
|
|
678
|
+
fresh.set(window, this.options.reducer(fresh.get(window), exec));
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
this.snaps.replace(fresh);
|
|
683
|
+
}
|
|
684
|
+
getSnapshot(window) {
|
|
685
|
+
return this.snaps.get(window);
|
|
686
|
+
}
|
|
687
|
+
getAllSnapshots() {
|
|
688
|
+
return this.snaps.getAll();
|
|
689
|
+
}
|
|
690
|
+
close() {
|
|
691
|
+
if (this.listener) this.options.runtime.off("trace", this.listener);
|
|
692
|
+
if (this.interval) clearInterval(this.interval);
|
|
693
|
+
}
|
|
694
|
+
};
|
|
695
|
+
|
|
696
|
+
// src/server/aggregates/eval-aggregator.ts
|
|
697
|
+
var EvalAggregator = class {
|
|
698
|
+
snaps;
|
|
699
|
+
interval;
|
|
700
|
+
listener;
|
|
701
|
+
options;
|
|
702
|
+
constructor(options) {
|
|
703
|
+
this.options = options;
|
|
704
|
+
this.snaps = new AggregateSnapshots(
|
|
705
|
+
options.windows,
|
|
706
|
+
options.emptyState,
|
|
707
|
+
options.connMgr,
|
|
708
|
+
options.channel,
|
|
709
|
+
options.broadcastTransform
|
|
710
|
+
);
|
|
711
|
+
}
|
|
712
|
+
async start() {
|
|
713
|
+
await this.rebuild();
|
|
714
|
+
this.listener = (entry) => {
|
|
715
|
+
this.snaps.fold(entry.timestamp, (prev) => this.options.reducer(prev, entry));
|
|
716
|
+
};
|
|
717
|
+
this.options.runtime.on("eval_result", this.listener);
|
|
718
|
+
this.interval = setInterval(
|
|
719
|
+
() => this.rebuild().catch((err) => console.error("[axl-studio] rebuild failed:", err)),
|
|
720
|
+
REBUILD_INTERVAL_MS
|
|
721
|
+
);
|
|
722
|
+
}
|
|
723
|
+
async rebuild() {
|
|
724
|
+
const history = await this.options.runtime.getEvalHistory();
|
|
725
|
+
const cap = this.options.entryCap ?? 500;
|
|
726
|
+
const capped = history.slice(0, cap);
|
|
727
|
+
const now = Date.now();
|
|
728
|
+
const fresh = new Map(
|
|
729
|
+
this.options.windows.map((w) => [w, this.options.emptyState()])
|
|
730
|
+
);
|
|
731
|
+
for (const entry of capped) {
|
|
732
|
+
for (const window of this.options.windows) {
|
|
733
|
+
if (withinWindow(entry.timestamp, window, now)) {
|
|
734
|
+
fresh.set(window, this.options.reducer(fresh.get(window), entry));
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
this.snaps.replace(fresh);
|
|
739
|
+
}
|
|
740
|
+
getSnapshot(window) {
|
|
741
|
+
return this.snaps.get(window);
|
|
742
|
+
}
|
|
743
|
+
getAllSnapshots() {
|
|
744
|
+
return this.snaps.getAll();
|
|
745
|
+
}
|
|
746
|
+
close() {
|
|
747
|
+
if (this.listener) this.options.runtime.off("eval_result", this.listener);
|
|
748
|
+
if (this.interval) clearInterval(this.interval);
|
|
749
|
+
}
|
|
750
|
+
};
|
|
751
|
+
|
|
752
|
+
// src/server/aggregates/reducers.ts
|
|
753
|
+
var import_axl2 = require("@axlsdk/axl");
|
|
754
|
+
var finite = (v) => Number.isFinite(v) ? v : 0;
|
|
755
|
+
function emptyRetry() {
|
|
756
|
+
return {
|
|
757
|
+
primary: 0,
|
|
758
|
+
primaryCalls: 0,
|
|
759
|
+
schema: 0,
|
|
760
|
+
schemaCalls: 0,
|
|
761
|
+
validate: 0,
|
|
762
|
+
validateCalls: 0,
|
|
763
|
+
guardrail: 0,
|
|
764
|
+
guardrailCalls: 0,
|
|
765
|
+
retryCalls: 0
|
|
766
|
+
};
|
|
767
|
+
}
|
|
768
|
+
function emptyCostData() {
|
|
769
|
+
return {
|
|
278
770
|
totalCost: 0,
|
|
279
771
|
totalTokens: { input: 0, output: 0, reasoning: 0 },
|
|
280
772
|
byAgent: {},
|
|
281
773
|
byModel: {},
|
|
282
|
-
byWorkflow: {}
|
|
774
|
+
byWorkflow: {},
|
|
775
|
+
retry: emptyRetry(),
|
|
776
|
+
byEmbedder: {}
|
|
283
777
|
};
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
const
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
778
|
+
}
|
|
779
|
+
function reduceCost(acc, event) {
|
|
780
|
+
const isWorkflowStart = event.type === "workflow_start";
|
|
781
|
+
if (isWorkflowStart && event.workflow) {
|
|
782
|
+
const byWorkflow2 = { ...acc.byWorkflow };
|
|
783
|
+
const prev = byWorkflow2[event.workflow] ?? { cost: 0, executions: 0 };
|
|
784
|
+
byWorkflow2[event.workflow] = { ...prev, executions: prev.executions + 1 };
|
|
785
|
+
return { ...acc, byWorkflow: byWorkflow2 };
|
|
786
|
+
}
|
|
787
|
+
if (event.cost == null && !event.tokens) return acc;
|
|
788
|
+
const cost = (0, import_axl2.eventCostContribution)(event);
|
|
789
|
+
if (event.type === "ask_end") return acc;
|
|
790
|
+
const tokens = event.tokens ?? {};
|
|
791
|
+
const totalTokens = event.type === "agent_call_end" ? {
|
|
792
|
+
input: acc.totalTokens.input + finite(tokens.input),
|
|
793
|
+
output: acc.totalTokens.output + finite(tokens.output),
|
|
794
|
+
reasoning: acc.totalTokens.reasoning + finite(tokens.reasoning)
|
|
795
|
+
} : acc.totalTokens;
|
|
796
|
+
const byAgent = { ...acc.byAgent };
|
|
797
|
+
if (event.agent) {
|
|
798
|
+
const prev = byAgent[event.agent] ?? { cost: 0, calls: 0 };
|
|
799
|
+
byAgent[event.agent] = { cost: prev.cost + cost, calls: prev.calls + 1 };
|
|
800
|
+
}
|
|
801
|
+
const byModel = { ...acc.byModel };
|
|
802
|
+
if (event.model) {
|
|
803
|
+
const prev = byModel[event.model] ?? { cost: 0, calls: 0, tokens: { input: 0, output: 0 } };
|
|
804
|
+
byModel[event.model] = {
|
|
805
|
+
cost: prev.cost + cost,
|
|
806
|
+
calls: prev.calls + 1,
|
|
807
|
+
tokens: {
|
|
808
|
+
input: prev.tokens.input + finite(tokens.input),
|
|
809
|
+
output: prev.tokens.output + finite(tokens.output)
|
|
810
|
+
}
|
|
811
|
+
};
|
|
812
|
+
}
|
|
813
|
+
const byWorkflow = { ...acc.byWorkflow };
|
|
814
|
+
if (event.workflow) {
|
|
815
|
+
const prev = byWorkflow[event.workflow] ?? { cost: 0, executions: 0 };
|
|
816
|
+
byWorkflow[event.workflow] = {
|
|
817
|
+
cost: prev.cost + cost,
|
|
818
|
+
executions: prev.executions + (isWorkflowStart ? 1 : 0)
|
|
819
|
+
};
|
|
820
|
+
}
|
|
821
|
+
let retry = acc.retry;
|
|
822
|
+
if (event.type === "agent_call_end") {
|
|
823
|
+
const d = event.data ?? {};
|
|
824
|
+
const reason = d.retryReason;
|
|
825
|
+
retry = { ...acc.retry };
|
|
826
|
+
if (reason === "schema") {
|
|
827
|
+
retry.schema += cost;
|
|
828
|
+
retry.schemaCalls += 1;
|
|
829
|
+
retry.retryCalls += 1;
|
|
830
|
+
} else if (reason === "validate") {
|
|
831
|
+
retry.validate += cost;
|
|
832
|
+
retry.validateCalls += 1;
|
|
833
|
+
retry.retryCalls += 1;
|
|
834
|
+
} else if (reason === "guardrail") {
|
|
835
|
+
retry.guardrail += cost;
|
|
836
|
+
retry.guardrailCalls += 1;
|
|
837
|
+
retry.retryCalls += 1;
|
|
838
|
+
} else {
|
|
839
|
+
retry.primary += cost;
|
|
840
|
+
retry.primaryCalls += 1;
|
|
298
841
|
}
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
842
|
+
}
|
|
843
|
+
let byEmbedder = acc.byEmbedder;
|
|
844
|
+
if (event.type === "memory_remember" || event.type === "memory_recall") {
|
|
845
|
+
const usage = event.data.usage;
|
|
846
|
+
byEmbedder = { ...acc.byEmbedder };
|
|
847
|
+
const modelKey = usage?.model ?? "unknown";
|
|
848
|
+
const embedTokens = typeof usage?.tokens === "number" ? finite(usage.tokens) : 0;
|
|
849
|
+
const prev = byEmbedder[modelKey] ?? { cost: 0, calls: 0, tokens: 0 };
|
|
850
|
+
byEmbedder[modelKey] = {
|
|
851
|
+
cost: prev.cost + cost,
|
|
852
|
+
calls: prev.calls + 1,
|
|
853
|
+
tokens: prev.tokens + embedTokens
|
|
854
|
+
};
|
|
855
|
+
}
|
|
856
|
+
return {
|
|
857
|
+
totalCost: acc.totalCost + cost,
|
|
858
|
+
totalTokens,
|
|
859
|
+
byAgent,
|
|
860
|
+
byModel,
|
|
861
|
+
byWorkflow,
|
|
862
|
+
retry,
|
|
863
|
+
byEmbedder
|
|
864
|
+
};
|
|
865
|
+
}
|
|
866
|
+
function emptyEvalTrendData() {
|
|
867
|
+
return { byEval: {}, totalRuns: 0, totalCost: 0 };
|
|
868
|
+
}
|
|
869
|
+
function extractScores(data) {
|
|
870
|
+
if (!data || typeof data !== "object") return {};
|
|
871
|
+
const result = data;
|
|
872
|
+
const summary = result.summary;
|
|
873
|
+
const scorers = summary?.scorers;
|
|
874
|
+
if (!scorers) return {};
|
|
875
|
+
const out = {};
|
|
876
|
+
for (const [name, entry] of Object.entries(scorers)) {
|
|
877
|
+
if (typeof entry === "number" && Number.isFinite(entry)) {
|
|
878
|
+
out[name] = entry;
|
|
879
|
+
} else if (entry && typeof entry === "object" && Number.isFinite(entry.mean)) {
|
|
880
|
+
out[name] = entry.mean;
|
|
310
881
|
}
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
882
|
+
}
|
|
883
|
+
return out;
|
|
884
|
+
}
|
|
885
|
+
function extractCost(data) {
|
|
886
|
+
if (!data || typeof data !== "object") return 0;
|
|
887
|
+
const result = data;
|
|
888
|
+
if (Number.isFinite(result.totalCost)) return result.totalCost;
|
|
889
|
+
const summary = result.summary;
|
|
890
|
+
return Number.isFinite(summary?.totalCost) ? summary.totalCost : 0;
|
|
891
|
+
}
|
|
892
|
+
function extractModel(data) {
|
|
893
|
+
if (!data || typeof data !== "object") return void 0;
|
|
894
|
+
const result = data;
|
|
895
|
+
const metadata = result.metadata;
|
|
896
|
+
const counts = metadata?.modelCounts;
|
|
897
|
+
if (counts && typeof counts === "object" && !Array.isArray(counts)) {
|
|
898
|
+
const entries = Object.entries(counts).filter(
|
|
899
|
+
([, v]) => typeof v === "number"
|
|
900
|
+
);
|
|
901
|
+
if (entries.length > 0) {
|
|
902
|
+
entries.sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]));
|
|
903
|
+
return entries[0][0];
|
|
316
904
|
}
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
905
|
+
}
|
|
906
|
+
const models = metadata?.models;
|
|
907
|
+
if (Array.isArray(models) && typeof models[0] === "string") return models[0];
|
|
908
|
+
return void 0;
|
|
909
|
+
}
|
|
910
|
+
function extractDuration(data) {
|
|
911
|
+
if (!data || typeof data !== "object") return void 0;
|
|
912
|
+
const result = data;
|
|
913
|
+
return Number.isFinite(result.duration) ? result.duration : void 0;
|
|
914
|
+
}
|
|
915
|
+
function computeScoreStats(runs) {
|
|
916
|
+
const scorerNames = /* @__PURE__ */ new Set();
|
|
917
|
+
for (const run of runs) {
|
|
918
|
+
for (const name of Object.keys(run.scores)) scorerNames.add(name);
|
|
919
|
+
}
|
|
920
|
+
const mean = {};
|
|
921
|
+
const std = {};
|
|
922
|
+
for (const name of scorerNames) {
|
|
923
|
+
const values = runs.map((r) => r.scores[name]).filter((v) => v != null);
|
|
924
|
+
if (values.length === 0) continue;
|
|
925
|
+
const m = values.reduce((a, b) => a + b, 0) / values.length;
|
|
926
|
+
mean[name] = m;
|
|
927
|
+
const variance = values.reduce((sum, v) => sum + (v - m) ** 2, 0) / values.length;
|
|
928
|
+
std[name] = Math.sqrt(variance);
|
|
929
|
+
}
|
|
930
|
+
return { mean, std };
|
|
931
|
+
}
|
|
932
|
+
function reduceEvalTrends(acc, entry) {
|
|
933
|
+
const scores = extractScores(entry.data);
|
|
934
|
+
const cost = extractCost(entry.data);
|
|
935
|
+
const model = extractModel(entry.data);
|
|
936
|
+
const duration = extractDuration(entry.data);
|
|
937
|
+
const run = {
|
|
938
|
+
timestamp: entry.timestamp,
|
|
939
|
+
id: entry.id,
|
|
940
|
+
scores,
|
|
941
|
+
cost,
|
|
942
|
+
...model !== void 0 ? { model } : {},
|
|
943
|
+
...duration !== void 0 ? { duration } : {}
|
|
944
|
+
};
|
|
945
|
+
const byEval = { ...acc.byEval };
|
|
946
|
+
const prev = byEval[entry.eval];
|
|
947
|
+
const MAX_EVAL_RUNS = 50;
|
|
948
|
+
const allRuns = prev ? [...prev.runs, run] : [run];
|
|
949
|
+
const runs = allRuns.length > MAX_EVAL_RUNS ? allRuns.slice(-MAX_EVAL_RUNS) : allRuns;
|
|
950
|
+
const { mean, std } = computeScoreStats(runs);
|
|
951
|
+
const latestScores = prev && prev.runs.length > 0 && prev.runs[prev.runs.length - 1].timestamp > run.timestamp ? prev.latestScores : scores;
|
|
952
|
+
byEval[entry.eval] = {
|
|
953
|
+
runs,
|
|
954
|
+
latestScores,
|
|
955
|
+
scoreMean: mean,
|
|
956
|
+
scoreStd: std,
|
|
957
|
+
costTotal: (prev?.costTotal ?? 0) + cost,
|
|
958
|
+
runCount: (prev?.runCount ?? 0) + 1
|
|
959
|
+
};
|
|
960
|
+
return {
|
|
961
|
+
byEval,
|
|
962
|
+
totalRuns: acc.totalRuns + 1,
|
|
963
|
+
totalCost: acc.totalCost + cost
|
|
964
|
+
};
|
|
965
|
+
}
|
|
966
|
+
var MAX_DURATIONS = 200;
|
|
967
|
+
function emptyWorkflowStatsData() {
|
|
968
|
+
return { byWorkflow: {}, totalExecutions: 0, failureRate: 0 };
|
|
969
|
+
}
|
|
970
|
+
function percentile(sorted, p) {
|
|
971
|
+
if (sorted.length === 0) return 0;
|
|
972
|
+
const idx = p / 100 * (sorted.length - 1);
|
|
973
|
+
const lower = Math.floor(idx);
|
|
974
|
+
const upper = Math.ceil(idx);
|
|
975
|
+
if (lower === upper) return sorted[lower];
|
|
976
|
+
return sorted[lower] + (sorted[upper] - sorted[lower]) * (idx - lower);
|
|
977
|
+
}
|
|
978
|
+
function reduceWorkflowStats(acc, execution) {
|
|
979
|
+
const byWorkflow = { ...acc.byWorkflow };
|
|
980
|
+
const prev = byWorkflow[execution.workflow] ?? {
|
|
981
|
+
total: 0,
|
|
982
|
+
completed: 0,
|
|
983
|
+
failed: 0,
|
|
984
|
+
durations: [],
|
|
985
|
+
durationSum: 0,
|
|
986
|
+
avgDuration: 0
|
|
987
|
+
};
|
|
988
|
+
const dur = finite(execution.duration);
|
|
989
|
+
const durations = [...prev.durations];
|
|
990
|
+
const insertIdx = durations.findIndex((d) => d > dur);
|
|
991
|
+
if (insertIdx === -1) durations.push(dur);
|
|
992
|
+
else durations.splice(insertIdx, 0, dur);
|
|
993
|
+
if (durations.length > MAX_DURATIONS) durations.shift();
|
|
994
|
+
const total = prev.total + 1;
|
|
995
|
+
const completed = prev.completed + (execution.status === "completed" ? 1 : 0);
|
|
996
|
+
const failed = prev.failed + (execution.status === "failed" ? 1 : 0);
|
|
997
|
+
const durationSum = prev.durationSum + dur;
|
|
998
|
+
const avgDuration = durationSum / total;
|
|
999
|
+
byWorkflow[execution.workflow] = {
|
|
1000
|
+
total,
|
|
1001
|
+
completed,
|
|
1002
|
+
failed,
|
|
1003
|
+
durations,
|
|
1004
|
+
durationSum,
|
|
1005
|
+
avgDuration
|
|
1006
|
+
};
|
|
1007
|
+
const totalExecutions = acc.totalExecutions + 1;
|
|
1008
|
+
const totalFailed = Object.values(byWorkflow).reduce((sum, w) => sum + w.failed, 0);
|
|
1009
|
+
const failureRate = totalExecutions > 0 ? totalFailed / totalExecutions : 0;
|
|
1010
|
+
return { byWorkflow, totalExecutions, failureRate };
|
|
1011
|
+
}
|
|
1012
|
+
function getWorkflowPercentiles(entry) {
|
|
1013
|
+
return {
|
|
1014
|
+
durationP50: percentile(entry.durations, 50),
|
|
1015
|
+
durationP95: percentile(entry.durations, 95)
|
|
1016
|
+
};
|
|
1017
|
+
}
|
|
1018
|
+
function enrichWorkflowStats(data) {
|
|
1019
|
+
const byWorkflow = {};
|
|
1020
|
+
for (const [name, entry] of Object.entries(data.byWorkflow)) {
|
|
1021
|
+
const { durationP50, durationP95 } = getWorkflowPercentiles(entry);
|
|
1022
|
+
byWorkflow[name] = {
|
|
1023
|
+
total: entry.total,
|
|
1024
|
+
completed: entry.completed,
|
|
1025
|
+
failed: entry.failed,
|
|
1026
|
+
durationP50,
|
|
1027
|
+
durationP95,
|
|
1028
|
+
avgDuration: entry.avgDuration
|
|
331
1029
|
};
|
|
332
1030
|
}
|
|
333
|
-
|
|
1031
|
+
return {
|
|
1032
|
+
byWorkflow,
|
|
1033
|
+
totalExecutions: data.totalExecutions,
|
|
1034
|
+
failureRate: data.failureRate
|
|
1035
|
+
};
|
|
1036
|
+
}
|
|
1037
|
+
function emptyTraceStatsData() {
|
|
1038
|
+
return {
|
|
1039
|
+
eventTypeCounts: {},
|
|
1040
|
+
byTool: {},
|
|
1041
|
+
retryByAgent: {},
|
|
1042
|
+
totalEvents: 0
|
|
1043
|
+
};
|
|
1044
|
+
}
|
|
1045
|
+
function reduceTraceStats(acc, event) {
|
|
1046
|
+
const eventTypeCounts = { ...acc.eventTypeCounts };
|
|
1047
|
+
eventTypeCounts[event.type] = (eventTypeCounts[event.type] ?? 0) + 1;
|
|
1048
|
+
const byTool = { ...acc.byTool };
|
|
1049
|
+
if (event.type === "tool_call_end" || event.type === "tool_denied" || event.type === "tool_approval") {
|
|
1050
|
+
const toolName = event.tool;
|
|
1051
|
+
const prev = byTool[toolName] ?? { calls: 0, denied: 0, approved: 0 };
|
|
1052
|
+
const isDeniedEvent = event.type === "tool_denied";
|
|
1053
|
+
const isApprovalEvent = event.type === "tool_approval";
|
|
1054
|
+
const eventData = isDeniedEvent || isApprovalEvent ? event.data : void 0;
|
|
1055
|
+
const isApproved = isDeniedEvent && eventData?.approved === true || isApprovalEvent && eventData?.approved === true;
|
|
1056
|
+
const isDenied = isDeniedEvent && !eventData?.approved || isApprovalEvent && eventData?.approved === false;
|
|
1057
|
+
byTool[toolName] = {
|
|
1058
|
+
calls: prev.calls + (event.type === "tool_call_end" ? 1 : 0),
|
|
1059
|
+
denied: prev.denied + (isDenied ? 1 : 0),
|
|
1060
|
+
approved: prev.approved + (isApproved ? 1 : 0)
|
|
1061
|
+
};
|
|
1062
|
+
}
|
|
1063
|
+
const retryByAgent = { ...acc.retryByAgent };
|
|
1064
|
+
if (event.agent && event.type === "agent_call_end") {
|
|
1065
|
+
const data = event.data;
|
|
1066
|
+
if (data?.retryReason) {
|
|
1067
|
+
const prev = retryByAgent[event.agent] ?? { schema: 0, validate: 0, guardrail: 0 };
|
|
1068
|
+
const reason = data.retryReason;
|
|
1069
|
+
if (reason in prev) {
|
|
1070
|
+
retryByAgent[event.agent] = { ...prev, [reason]: prev[reason] + 1 };
|
|
1071
|
+
}
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
return {
|
|
1075
|
+
eventTypeCounts,
|
|
1076
|
+
byTool,
|
|
1077
|
+
retryByAgent,
|
|
1078
|
+
totalEvents: acc.totalEvents + 1
|
|
1079
|
+
};
|
|
1080
|
+
}
|
|
334
1081
|
|
|
335
1082
|
// src/server/routes/health.ts
|
|
336
1083
|
var import_hono = require("hono");
|
|
@@ -354,7 +1101,7 @@ function createHealthRoutes(readOnly) {
|
|
|
354
1101
|
|
|
355
1102
|
// src/server/routes/workflows.ts
|
|
356
1103
|
var import_hono2 = require("hono");
|
|
357
|
-
var
|
|
1104
|
+
var import_axl3 = require("@axlsdk/axl");
|
|
358
1105
|
function createWorkflowRoutes(connMgr) {
|
|
359
1106
|
const app6 = new import_hono2.Hono();
|
|
360
1107
|
app6.get("/workflows", (c) => {
|
|
@@ -380,8 +1127,8 @@ function createWorkflowRoutes(connMgr) {
|
|
|
380
1127
|
ok: true,
|
|
381
1128
|
data: {
|
|
382
1129
|
name: workflow.name,
|
|
383
|
-
inputSchema: workflow.inputSchema ? (0,
|
|
384
|
-
outputSchema: workflow.outputSchema ? (0,
|
|
1130
|
+
inputSchema: workflow.inputSchema ? (0, import_axl3.zodToJsonSchema)(workflow.inputSchema) : null,
|
|
1131
|
+
outputSchema: workflow.outputSchema ? (0, import_axl3.zodToJsonSchema)(workflow.outputSchema) : null
|
|
385
1132
|
}
|
|
386
1133
|
});
|
|
387
1134
|
});
|
|
@@ -399,15 +1146,22 @@ function createWorkflowRoutes(connMgr) {
|
|
|
399
1146
|
if (body.stream) {
|
|
400
1147
|
const stream = runtime.stream(name, body.input ?? {}, { metadata: body.metadata });
|
|
401
1148
|
const executionId = `stream-${Date.now()}`;
|
|
1149
|
+
const redactOn = runtime.isRedactEnabled();
|
|
402
1150
|
(async () => {
|
|
403
1151
|
for await (const event of stream) {
|
|
404
|
-
connMgr.broadcastWithWildcard(
|
|
1152
|
+
connMgr.broadcastWithWildcard(
|
|
1153
|
+
`execution:${executionId}`,
|
|
1154
|
+
redactStreamEvent(event, redactOn)
|
|
1155
|
+
);
|
|
405
1156
|
}
|
|
406
1157
|
})();
|
|
407
1158
|
return c.json({ ok: true, data: { executionId, streaming: true } });
|
|
408
1159
|
}
|
|
409
1160
|
const result = await runtime.execute(name, body.input ?? {}, { metadata: body.metadata });
|
|
410
|
-
return c.json({
|
|
1161
|
+
return c.json({
|
|
1162
|
+
ok: true,
|
|
1163
|
+
data: { result: redactValue(result, runtime.isRedactEnabled()) }
|
|
1164
|
+
});
|
|
411
1165
|
});
|
|
412
1166
|
return app6;
|
|
413
1167
|
}
|
|
@@ -418,7 +1172,10 @@ var app = new import_hono3.Hono();
|
|
|
418
1172
|
app.get("/executions", async (c) => {
|
|
419
1173
|
const runtime = c.get("runtime");
|
|
420
1174
|
const executions = await runtime.getExecutions();
|
|
421
|
-
return c.json({
|
|
1175
|
+
return c.json({
|
|
1176
|
+
ok: true,
|
|
1177
|
+
data: redactExecutionList(executions, runtime.isRedactEnabled())
|
|
1178
|
+
});
|
|
422
1179
|
});
|
|
423
1180
|
app.get("/executions/:id", async (c) => {
|
|
424
1181
|
const runtime = c.get("runtime");
|
|
@@ -430,7 +1187,32 @@ app.get("/executions/:id", async (c) => {
|
|
|
430
1187
|
404
|
|
431
1188
|
);
|
|
432
1189
|
}
|
|
433
|
-
|
|
1190
|
+
const sinceParam = c.req.query("since");
|
|
1191
|
+
let paged = execution;
|
|
1192
|
+
if (sinceParam !== void 0) {
|
|
1193
|
+
const since = Number(sinceParam);
|
|
1194
|
+
if (!Number.isFinite(since) || !Number.isInteger(since)) {
|
|
1195
|
+
return c.json(
|
|
1196
|
+
{
|
|
1197
|
+
ok: false,
|
|
1198
|
+
error: {
|
|
1199
|
+
code: "INVALID_PARAM",
|
|
1200
|
+
message: `\`since\` must be a finite integer (got "${sinceParam}")`,
|
|
1201
|
+
param: "since"
|
|
1202
|
+
}
|
|
1203
|
+
},
|
|
1204
|
+
400
|
|
1205
|
+
);
|
|
1206
|
+
}
|
|
1207
|
+
paged = {
|
|
1208
|
+
...execution,
|
|
1209
|
+
events: execution.events.filter((e) => e.step > since)
|
|
1210
|
+
};
|
|
1211
|
+
}
|
|
1212
|
+
return c.json({
|
|
1213
|
+
ok: true,
|
|
1214
|
+
data: redactExecutionInfo(paged, runtime.isRedactEnabled())
|
|
1215
|
+
});
|
|
434
1216
|
});
|
|
435
1217
|
app.post("/executions/:id/abort", (c) => {
|
|
436
1218
|
const runtime = c.get("runtime");
|
|
@@ -464,7 +1246,16 @@ function createSessionRoutes(connMgr) {
|
|
|
464
1246
|
const id = c.req.param("id");
|
|
465
1247
|
const history = await store.getSession(id);
|
|
466
1248
|
const handoffHistory = await store.getSessionMeta(id, "handoffHistory");
|
|
467
|
-
return c.json({
|
|
1249
|
+
return c.json({
|
|
1250
|
+
ok: true,
|
|
1251
|
+
data: {
|
|
1252
|
+
id,
|
|
1253
|
+
history: redactSessionHistory(history, runtime.isRedactEnabled()),
|
|
1254
|
+
// HandoffRecord has no content fields (source/target/mode/
|
|
1255
|
+
// timestamp/duration) — nothing to scrub.
|
|
1256
|
+
handoffHistory: handoffHistory ?? []
|
|
1257
|
+
}
|
|
1258
|
+
});
|
|
468
1259
|
});
|
|
469
1260
|
app6.post("/sessions/:id/send", async (c) => {
|
|
470
1261
|
const runtime = c.get("runtime");
|
|
@@ -500,7 +1291,7 @@ function createSessionRoutes(connMgr) {
|
|
|
500
1291
|
|
|
501
1292
|
// src/server/routes/agents.ts
|
|
502
1293
|
var import_hono5 = require("hono");
|
|
503
|
-
var
|
|
1294
|
+
var import_axl4 = require("@axlsdk/axl");
|
|
504
1295
|
var app2 = new import_hono5.Hono();
|
|
505
1296
|
app2.get("/agents", (c) => {
|
|
506
1297
|
const runtime = c.get("runtime");
|
|
@@ -541,7 +1332,7 @@ app2.get("/agents/:name", (c) => {
|
|
|
541
1332
|
tools: cfg.tools?.map((t) => ({
|
|
542
1333
|
name: t.name,
|
|
543
1334
|
description: t.description,
|
|
544
|
-
inputSchema: (0,
|
|
1335
|
+
inputSchema: (0, import_axl4.zodToJsonSchema)(t.inputSchema)
|
|
545
1336
|
})) ?? [],
|
|
546
1337
|
handoffs: typeof cfg.handoffs === "function" ? [
|
|
547
1338
|
{
|
|
@@ -581,14 +1372,14 @@ var agents_default = app2;
|
|
|
581
1372
|
|
|
582
1373
|
// src/server/routes/tools.ts
|
|
583
1374
|
var import_hono6 = require("hono");
|
|
584
|
-
var
|
|
1375
|
+
var import_axl5 = require("@axlsdk/axl");
|
|
585
1376
|
var app3 = new import_hono6.Hono();
|
|
586
1377
|
app3.get("/tools", (c) => {
|
|
587
1378
|
const runtime = c.get("runtime");
|
|
588
1379
|
const tools = runtime.getTools().map((t) => ({
|
|
589
1380
|
name: t.name,
|
|
590
1381
|
description: t.description,
|
|
591
|
-
inputSchema: t.inputSchema ? (0,
|
|
1382
|
+
inputSchema: t.inputSchema ? (0, import_axl5.zodToJsonSchema)(t.inputSchema) : {},
|
|
592
1383
|
sensitive: t.sensitive ?? false,
|
|
593
1384
|
requireApproval: t.requireApproval ?? false
|
|
594
1385
|
}));
|
|
@@ -609,7 +1400,7 @@ app3.get("/tools/:name", (c) => {
|
|
|
609
1400
|
data: {
|
|
610
1401
|
name: tool.name,
|
|
611
1402
|
description: tool.description,
|
|
612
|
-
inputSchema: tool.inputSchema ? (0,
|
|
1403
|
+
inputSchema: tool.inputSchema ? (0, import_axl5.zodToJsonSchema)(tool.inputSchema) : {},
|
|
613
1404
|
sensitive: tool.sensitive,
|
|
614
1405
|
requireApproval: tool.requireApproval,
|
|
615
1406
|
retry: tool.retry,
|
|
@@ -634,7 +1425,10 @@ app3.post("/tools/:name/test", async (c) => {
|
|
|
634
1425
|
const body = await c.req.json();
|
|
635
1426
|
const ctx = runtime.createContext();
|
|
636
1427
|
const result = await tool.run(ctx, body.input);
|
|
637
|
-
return c.json({
|
|
1428
|
+
return c.json({
|
|
1429
|
+
ok: true,
|
|
1430
|
+
data: { result: redactValue(result, runtime.isRedactEnabled()) }
|
|
1431
|
+
});
|
|
638
1432
|
});
|
|
639
1433
|
var tools_default = app3;
|
|
640
1434
|
|
|
@@ -649,7 +1443,7 @@ app4.get("/memory/:scope", async (c) => {
|
|
|
649
1443
|
return c.json({ ok: true, data: [] });
|
|
650
1444
|
}
|
|
651
1445
|
const entries = await store.getAllMemory(scope);
|
|
652
|
-
return c.json({ ok: true, data: entries });
|
|
1446
|
+
return c.json({ ok: true, data: redactMemoryList(entries, runtime.isRedactEnabled()) });
|
|
653
1447
|
});
|
|
654
1448
|
app4.get("/memory/:scope/:key", async (c) => {
|
|
655
1449
|
const runtime = c.get("runtime");
|
|
@@ -669,7 +1463,10 @@ app4.get("/memory/:scope/:key", async (c) => {
|
|
|
669
1463
|
404
|
|
670
1464
|
);
|
|
671
1465
|
}
|
|
672
|
-
return c.json({
|
|
1466
|
+
return c.json({
|
|
1467
|
+
ok: true,
|
|
1468
|
+
data: { key, value: redactMemoryValue(value, runtime.isRedactEnabled()) }
|
|
1469
|
+
});
|
|
673
1470
|
});
|
|
674
1471
|
app4.put("/memory/:scope/:key", async (c) => {
|
|
675
1472
|
const runtime = c.get("runtime");
|
|
@@ -714,7 +1511,10 @@ var app5 = new import_hono8.Hono();
|
|
|
714
1511
|
app5.get("/decisions", async (c) => {
|
|
715
1512
|
const runtime = c.get("runtime");
|
|
716
1513
|
const decisions = await runtime.getPendingDecisions();
|
|
717
|
-
return c.json({
|
|
1514
|
+
return c.json({
|
|
1515
|
+
ok: true,
|
|
1516
|
+
data: redactPendingDecisionList(decisions, runtime.isRedactEnabled())
|
|
1517
|
+
});
|
|
718
1518
|
});
|
|
719
1519
|
app5.post("/decisions/:executionId/resolve", async (c) => {
|
|
720
1520
|
const runtime = c.get("runtime");
|
|
@@ -730,11 +1530,23 @@ var import_hono9 = require("hono");
|
|
|
730
1530
|
function createCostRoutes(costAggregator) {
|
|
731
1531
|
const app6 = new import_hono9.Hono();
|
|
732
1532
|
app6.get("/costs", (c) => {
|
|
733
|
-
|
|
1533
|
+
if (c.req.query("windows") === "all") {
|
|
1534
|
+
return c.json({ ok: true, data: costAggregator.getAllSnapshots() });
|
|
1535
|
+
}
|
|
1536
|
+
const window = parseWindowParam(c.req.query("window"));
|
|
1537
|
+
return c.json({ ok: true, data: costAggregator.getSnapshot(window) });
|
|
734
1538
|
});
|
|
735
1539
|
app6.post("/costs/reset", (c) => {
|
|
736
|
-
|
|
737
|
-
|
|
1540
|
+
return c.json(
|
|
1541
|
+
{
|
|
1542
|
+
ok: false,
|
|
1543
|
+
error: {
|
|
1544
|
+
code: "GONE",
|
|
1545
|
+
message: "POST /api/costs/reset was removed in @axlsdk/studio 0.15. Cost aggregates are now time-windowed and rebuilt from StateStore history. Use GET /api/costs?window=24h|7d|30d|all to narrow the view instead of resetting."
|
|
1546
|
+
}
|
|
1547
|
+
},
|
|
1548
|
+
410
|
|
1549
|
+
);
|
|
738
1550
|
});
|
|
739
1551
|
return app6;
|
|
740
1552
|
}
|
|
@@ -742,8 +1554,9 @@ function createCostRoutes(costAggregator) {
|
|
|
742
1554
|
// src/server/routes/evals.ts
|
|
743
1555
|
var import_node_crypto = require("crypto");
|
|
744
1556
|
var import_hono10 = require("hono");
|
|
745
|
-
function createEvalRoutes(evalLoader) {
|
|
1557
|
+
function createEvalRoutes(connMgr, evalLoader) {
|
|
746
1558
|
const app6 = new import_hono10.Hono();
|
|
1559
|
+
const activeRuns = /* @__PURE__ */ new Map();
|
|
747
1560
|
app6.get("/evals", async (c) => {
|
|
748
1561
|
if (evalLoader) await evalLoader();
|
|
749
1562
|
const runtime = c.get("runtime");
|
|
@@ -753,7 +1566,10 @@ function createEvalRoutes(evalLoader) {
|
|
|
753
1566
|
app6.get("/evals/history", async (c) => {
|
|
754
1567
|
const runtime = c.get("runtime");
|
|
755
1568
|
const history = await runtime.getEvalHistory();
|
|
756
|
-
return c.json({
|
|
1569
|
+
return c.json({
|
|
1570
|
+
ok: true,
|
|
1571
|
+
data: redactEvalHistoryList(history, runtime.isRedactEnabled())
|
|
1572
|
+
});
|
|
757
1573
|
});
|
|
758
1574
|
app6.delete("/evals/history/:id", async (c) => {
|
|
759
1575
|
const runtime = c.get("runtime");
|
|
@@ -774,6 +1590,7 @@ function createEvalRoutes(evalLoader) {
|
|
|
774
1590
|
if (evalLoader) await evalLoader();
|
|
775
1591
|
const runtime = c.get("runtime");
|
|
776
1592
|
const name = c.req.param("name");
|
|
1593
|
+
const redactOn = runtime.isRedactEnabled();
|
|
777
1594
|
const entry = runtime.getRegisteredEval(name);
|
|
778
1595
|
if (!entry) {
|
|
779
1596
|
return c.json(
|
|
@@ -782,13 +1599,89 @@ function createEvalRoutes(evalLoader) {
|
|
|
782
1599
|
);
|
|
783
1600
|
}
|
|
784
1601
|
let runs = 1;
|
|
1602
|
+
let stream = false;
|
|
1603
|
+
let captureTraces = false;
|
|
785
1604
|
try {
|
|
786
1605
|
const body = await c.req.json().catch(() => ({}));
|
|
787
1606
|
if (typeof body.runs === "number" && Number.isFinite(body.runs) && body.runs > 1) {
|
|
788
1607
|
runs = Math.min(Math.floor(body.runs), 25);
|
|
789
1608
|
}
|
|
1609
|
+
if (body.stream === true) {
|
|
1610
|
+
stream = true;
|
|
1611
|
+
}
|
|
1612
|
+
if (body.captureTraces === true) {
|
|
1613
|
+
captureTraces = true;
|
|
1614
|
+
}
|
|
790
1615
|
} catch {
|
|
791
1616
|
}
|
|
1617
|
+
if (stream) {
|
|
1618
|
+
const evalRunId = `eval-${(0, import_node_crypto.randomUUID)()}`;
|
|
1619
|
+
const ac = new AbortController();
|
|
1620
|
+
activeRuns.set(evalRunId, ac);
|
|
1621
|
+
(async () => {
|
|
1622
|
+
try {
|
|
1623
|
+
if (runs > 1) {
|
|
1624
|
+
const runGroupId = (0, import_node_crypto.randomUUID)();
|
|
1625
|
+
const results = [];
|
|
1626
|
+
for (let r = 0; r < runs; r++) {
|
|
1627
|
+
if (ac.signal.aborted) break;
|
|
1628
|
+
const result = await runtime.runRegisteredEval(name, {
|
|
1629
|
+
metadata: { runGroupId, runIndex: r },
|
|
1630
|
+
signal: ac.signal,
|
|
1631
|
+
captureTraces,
|
|
1632
|
+
onProgress: (event) => {
|
|
1633
|
+
if (event.type === "run_done") return;
|
|
1634
|
+
connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
|
|
1635
|
+
...event,
|
|
1636
|
+
run: r + 1,
|
|
1637
|
+
totalRuns: runs
|
|
1638
|
+
});
|
|
1639
|
+
}
|
|
1640
|
+
});
|
|
1641
|
+
results.push(result);
|
|
1642
|
+
connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
|
|
1643
|
+
type: "run_done",
|
|
1644
|
+
run: r + 1,
|
|
1645
|
+
totalRuns: runs
|
|
1646
|
+
});
|
|
1647
|
+
}
|
|
1648
|
+
if (results.length > 0) {
|
|
1649
|
+
connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
|
|
1650
|
+
type: "done",
|
|
1651
|
+
evalResultId: results[0].id,
|
|
1652
|
+
runGroupId
|
|
1653
|
+
});
|
|
1654
|
+
} else {
|
|
1655
|
+
connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
|
|
1656
|
+
type: "error",
|
|
1657
|
+
message: "All runs were cancelled"
|
|
1658
|
+
});
|
|
1659
|
+
}
|
|
1660
|
+
} else {
|
|
1661
|
+
const result = await runtime.runRegisteredEval(name, {
|
|
1662
|
+
signal: ac.signal,
|
|
1663
|
+
captureTraces,
|
|
1664
|
+
onProgress: (event) => {
|
|
1665
|
+
if (event.type === "run_done") return;
|
|
1666
|
+
connMgr.broadcastWithWildcard(`eval:${evalRunId}`, event);
|
|
1667
|
+
}
|
|
1668
|
+
});
|
|
1669
|
+
connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
|
|
1670
|
+
type: "done",
|
|
1671
|
+
evalResultId: result.id
|
|
1672
|
+
});
|
|
1673
|
+
}
|
|
1674
|
+
} catch (err) {
|
|
1675
|
+
connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
|
|
1676
|
+
type: "error",
|
|
1677
|
+
message: redactErrorMessage(err, redactOn)
|
|
1678
|
+
});
|
|
1679
|
+
} finally {
|
|
1680
|
+
activeRuns.delete(evalRunId);
|
|
1681
|
+
}
|
|
1682
|
+
})();
|
|
1683
|
+
return c.json({ ok: true, data: { evalRunId } });
|
|
1684
|
+
}
|
|
792
1685
|
try {
|
|
793
1686
|
if (runs > 1) {
|
|
794
1687
|
const { aggregateRuns } = await import("@axlsdk/eval");
|
|
@@ -796,27 +1689,53 @@ function createEvalRoutes(evalLoader) {
|
|
|
796
1689
|
const results = [];
|
|
797
1690
|
for (let r = 0; r < runs; r++) {
|
|
798
1691
|
const result2 = await runtime.runRegisteredEval(name, {
|
|
799
|
-
metadata: { runGroupId, runIndex: r }
|
|
1692
|
+
metadata: { runGroupId, runIndex: r },
|
|
1693
|
+
captureTraces
|
|
800
1694
|
});
|
|
801
1695
|
results.push(result2);
|
|
802
1696
|
}
|
|
803
1697
|
const typedResults = results;
|
|
804
1698
|
const aggregate = aggregateRuns(typedResults);
|
|
805
1699
|
const first = typedResults[0];
|
|
806
|
-
const result = {
|
|
807
|
-
|
|
1700
|
+
const result = {
|
|
1701
|
+
...first,
|
|
1702
|
+
_multiRun: { aggregate, allRuns: typedResults }
|
|
1703
|
+
};
|
|
1704
|
+
return c.json({
|
|
1705
|
+
ok: true,
|
|
1706
|
+
data: redactEvalResult(result, redactOn)
|
|
1707
|
+
});
|
|
808
1708
|
} else {
|
|
809
|
-
const result = await runtime.runRegisteredEval(name);
|
|
810
|
-
return c.json({
|
|
1709
|
+
const result = await runtime.runRegisteredEval(name, { captureTraces });
|
|
1710
|
+
return c.json({
|
|
1711
|
+
ok: true,
|
|
1712
|
+
data: redactEvalResult(result, redactOn)
|
|
1713
|
+
});
|
|
811
1714
|
}
|
|
812
1715
|
} catch (err) {
|
|
813
|
-
|
|
814
|
-
|
|
1716
|
+
return c.json(
|
|
1717
|
+
{ ok: false, error: { code: "EVAL_ERROR", message: redactErrorMessage(err, redactOn) } },
|
|
1718
|
+
400
|
|
1719
|
+
);
|
|
1720
|
+
}
|
|
1721
|
+
});
|
|
1722
|
+
app6.post("/evals/runs/:evalRunId/cancel", (c) => {
|
|
1723
|
+
const evalRunId = c.req.param("evalRunId");
|
|
1724
|
+
const ac = activeRuns.get(evalRunId);
|
|
1725
|
+
if (!ac) {
|
|
1726
|
+
return c.json(
|
|
1727
|
+
{ ok: false, error: { code: "NOT_FOUND", message: "No active eval run found" } },
|
|
1728
|
+
404
|
|
1729
|
+
);
|
|
815
1730
|
}
|
|
1731
|
+
ac.abort();
|
|
1732
|
+
activeRuns.delete(evalRunId);
|
|
1733
|
+
return c.json({ ok: true, data: { cancelled: true } });
|
|
816
1734
|
});
|
|
817
1735
|
app6.post("/evals/:name/rescore", async (c) => {
|
|
818
1736
|
if (evalLoader) await evalLoader();
|
|
819
1737
|
const runtime = c.get("runtime");
|
|
1738
|
+
const redactOn = runtime.isRedactEnabled();
|
|
820
1739
|
const name = c.req.param("name");
|
|
821
1740
|
const body = await c.req.json();
|
|
822
1741
|
if (!body.resultId || typeof body.resultId !== "string") {
|
|
@@ -854,19 +1773,29 @@ function createEvalRoutes(evalLoader) {
|
|
|
854
1773
|
timestamp: Date.now(),
|
|
855
1774
|
data: result
|
|
856
1775
|
});
|
|
857
|
-
return c.json({
|
|
1776
|
+
return c.json({
|
|
1777
|
+
ok: true,
|
|
1778
|
+
data: redactEvalResult(result, redactOn)
|
|
1779
|
+
});
|
|
858
1780
|
} catch (err) {
|
|
859
|
-
|
|
860
|
-
|
|
1781
|
+
return c.json(
|
|
1782
|
+
{ ok: false, error: { code: "EVAL_ERROR", message: redactErrorMessage(err, redactOn) } },
|
|
1783
|
+
400
|
|
1784
|
+
);
|
|
861
1785
|
}
|
|
862
1786
|
});
|
|
863
1787
|
app6.post("/evals/compare", async (c) => {
|
|
864
1788
|
const runtime = c.get("runtime");
|
|
1789
|
+
const redactOn = runtime.isRedactEnabled();
|
|
865
1790
|
const body = await c.req.json();
|
|
1791
|
+
const MAX_POOLED_RUNS = 25;
|
|
866
1792
|
const validateIdParam = (v, name) => {
|
|
867
1793
|
if (typeof v === "string") return v === "" ? `${name} must be non-empty` : null;
|
|
868
1794
|
if (Array.isArray(v)) {
|
|
869
1795
|
if (v.length === 0) return `${name} must be a non-empty array`;
|
|
1796
|
+
if (v.length > MAX_POOLED_RUNS) {
|
|
1797
|
+
return `${name} may contain at most ${MAX_POOLED_RUNS} ids (pooled comparison)`;
|
|
1798
|
+
}
|
|
870
1799
|
for (const elem of v) {
|
|
871
1800
|
if (typeof elem !== "string" || elem === "") {
|
|
872
1801
|
return `${name} array must contain only non-empty strings`;
|
|
@@ -929,8 +1858,13 @@ function createEvalRoutes(evalLoader) {
|
|
|
929
1858
|
const result = await runtime.evalCompare(baseline, candidate, body.options);
|
|
930
1859
|
return c.json({ ok: true, data: result });
|
|
931
1860
|
} catch (err) {
|
|
932
|
-
|
|
933
|
-
|
|
1861
|
+
return c.json(
|
|
1862
|
+
{
|
|
1863
|
+
ok: false,
|
|
1864
|
+
error: { code: "COMPARE_FAILED", message: redactErrorMessage(err, redactOn) }
|
|
1865
|
+
},
|
|
1866
|
+
400
|
|
1867
|
+
);
|
|
934
1868
|
}
|
|
935
1869
|
});
|
|
936
1870
|
app6.post("/evals/import", async (c) => {
|
|
@@ -992,7 +1926,11 @@ function createEvalRoutes(evalLoader) {
|
|
|
992
1926
|
});
|
|
993
1927
|
return c.json({ ok: true, data: { id, eval: evalName, timestamp } });
|
|
994
1928
|
});
|
|
995
|
-
|
|
1929
|
+
function closeActiveRuns() {
|
|
1930
|
+
for (const ac of activeRuns.values()) ac.abort();
|
|
1931
|
+
activeRuns.clear();
|
|
1932
|
+
}
|
|
1933
|
+
return { app: app6, closeActiveRuns };
|
|
996
1934
|
}
|
|
997
1935
|
|
|
998
1936
|
// src/server/routes/playground.ts
|
|
@@ -1026,34 +1964,50 @@ function createPlaygroundRoutes(connMgr) {
|
|
|
1026
1964
|
);
|
|
1027
1965
|
}
|
|
1028
1966
|
const sessionId = body.sessionId ?? `playground-${Date.now()}`;
|
|
1029
|
-
const executionId = `playground-${sessionId}-${Date.now()}`;
|
|
1030
1967
|
const store = runtime.getStateStore();
|
|
1031
1968
|
const history = await store.getSession(sessionId);
|
|
1032
1969
|
history.push({ role: "user", content: body.message });
|
|
1033
|
-
const
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
});
|
|
1970
|
+
const redactOn = runtime.isRedactEnabled();
|
|
1971
|
+
const ctx = runtime.createContext({ sessionHistory: history });
|
|
1972
|
+
const executionId = ctx.executionId;
|
|
1973
|
+
const traceListener = (event) => {
|
|
1974
|
+
if (event.executionId !== executionId) return;
|
|
1975
|
+
connMgr.broadcastWithWildcard(`execution:${executionId}`, redactStreamEvent(event, redactOn));
|
|
1976
|
+
};
|
|
1977
|
+
runtime.on("trace", traceListener);
|
|
1042
1978
|
(async () => {
|
|
1979
|
+
let stepCounter = Number.MAX_SAFE_INTEGER - 1;
|
|
1980
|
+
const terminalFields = () => ({
|
|
1981
|
+
executionId,
|
|
1982
|
+
step: stepCounter++,
|
|
1983
|
+
timestamp: Date.now()
|
|
1984
|
+
});
|
|
1043
1985
|
try {
|
|
1044
1986
|
const result = await ctx.ask(agent, body.message);
|
|
1045
1987
|
const resultText = typeof result === "string" ? result : JSON.stringify(result);
|
|
1046
1988
|
history.push({ role: "assistant", content: resultText });
|
|
1047
1989
|
await store.saveSession(sessionId, history);
|
|
1048
|
-
|
|
1990
|
+
const doneEvent = {
|
|
1991
|
+
...terminalFields(),
|
|
1049
1992
|
type: "done",
|
|
1050
|
-
data: resultText
|
|
1051
|
-
}
|
|
1993
|
+
data: { result: resultText }
|
|
1994
|
+
};
|
|
1995
|
+
connMgr.broadcastWithWildcard(
|
|
1996
|
+
`execution:${executionId}`,
|
|
1997
|
+
redactStreamEvent(doneEvent, redactOn)
|
|
1998
|
+
);
|
|
1052
1999
|
} catch (err) {
|
|
1053
|
-
|
|
2000
|
+
const errorEvent = {
|
|
2001
|
+
...terminalFields(),
|
|
1054
2002
|
type: "error",
|
|
1055
|
-
message: err instanceof Error ? err.message : String(err)
|
|
1056
|
-
}
|
|
2003
|
+
data: { message: err instanceof Error ? err.message : String(err) }
|
|
2004
|
+
};
|
|
2005
|
+
connMgr.broadcastWithWildcard(
|
|
2006
|
+
`execution:${executionId}`,
|
|
2007
|
+
redactStreamEvent(errorEvent, redactOn)
|
|
2008
|
+
);
|
|
2009
|
+
} finally {
|
|
2010
|
+
runtime.off("trace", traceListener);
|
|
1057
2011
|
}
|
|
1058
2012
|
})();
|
|
1059
2013
|
return c.json({
|
|
@@ -1064,12 +2018,78 @@ function createPlaygroundRoutes(connMgr) {
|
|
|
1064
2018
|
return app6;
|
|
1065
2019
|
}
|
|
1066
2020
|
|
|
2021
|
+
// src/server/routes/eval-trends.ts
|
|
2022
|
+
var import_hono12 = require("hono");
|
|
2023
|
+
function createEvalTrendsRoutes(aggregator) {
|
|
2024
|
+
const app6 = new import_hono12.Hono();
|
|
2025
|
+
app6.get("/eval-trends", (c) => {
|
|
2026
|
+
const window = parseWindowParam(c.req.query("window"));
|
|
2027
|
+
return c.json({ ok: true, data: aggregator.getSnapshot(window) });
|
|
2028
|
+
});
|
|
2029
|
+
return app6;
|
|
2030
|
+
}
|
|
2031
|
+
|
|
2032
|
+
// src/server/routes/workflow-stats.ts
|
|
2033
|
+
var import_hono13 = require("hono");
|
|
2034
|
+
function createWorkflowStatsRoutes(aggregator) {
|
|
2035
|
+
const app6 = new import_hono13.Hono();
|
|
2036
|
+
app6.get("/workflow-stats", (c) => {
|
|
2037
|
+
const window = parseWindowParam(c.req.query("window"));
|
|
2038
|
+
return c.json({ ok: true, data: enrichWorkflowStats(aggregator.getSnapshot(window)) });
|
|
2039
|
+
});
|
|
2040
|
+
return app6;
|
|
2041
|
+
}
|
|
2042
|
+
|
|
2043
|
+
// src/server/routes/trace-stats.ts
|
|
2044
|
+
var import_hono14 = require("hono");
|
|
2045
|
+
function createTraceStatsRoutes(aggregator) {
|
|
2046
|
+
const app6 = new import_hono14.Hono();
|
|
2047
|
+
app6.get("/trace-stats", (c) => {
|
|
2048
|
+
const window = parseWindowParam(c.req.query("window"));
|
|
2049
|
+
return c.json({ ok: true, data: aggregator.getSnapshot(window) });
|
|
2050
|
+
});
|
|
2051
|
+
return app6;
|
|
2052
|
+
}
|
|
2053
|
+
|
|
1067
2054
|
// src/server/index.ts
|
|
1068
2055
|
function createServer(options) {
|
|
1069
2056
|
const { runtime, staticRoot, basePath = "", readOnly = false } = options;
|
|
1070
|
-
const app6 = new
|
|
1071
|
-
const connMgr = new ConnectionManager();
|
|
1072
|
-
const
|
|
2057
|
+
const app6 = new import_hono15.Hono();
|
|
2058
|
+
const connMgr = new ConnectionManager(options.bufferCaps);
|
|
2059
|
+
const windows = ["24h", "7d", "30d", "all"];
|
|
2060
|
+
const costAggregator = new TraceAggregator({
|
|
2061
|
+
runtime,
|
|
2062
|
+
connMgr,
|
|
2063
|
+
channel: "costs",
|
|
2064
|
+
reducer: reduceCost,
|
|
2065
|
+
emptyState: emptyCostData,
|
|
2066
|
+
windows
|
|
2067
|
+
});
|
|
2068
|
+
const workflowStatsAggregator = new ExecutionAggregator({
|
|
2069
|
+
runtime,
|
|
2070
|
+
connMgr,
|
|
2071
|
+
channel: "workflow-stats",
|
|
2072
|
+
reducer: reduceWorkflowStats,
|
|
2073
|
+
emptyState: emptyWorkflowStatsData,
|
|
2074
|
+
windows,
|
|
2075
|
+
broadcastTransform: enrichWorkflowStats
|
|
2076
|
+
});
|
|
2077
|
+
const traceStatsAggregator = new TraceAggregator({
|
|
2078
|
+
runtime,
|
|
2079
|
+
connMgr,
|
|
2080
|
+
channel: "trace-stats",
|
|
2081
|
+
reducer: reduceTraceStats,
|
|
2082
|
+
emptyState: emptyTraceStatsData,
|
|
2083
|
+
windows
|
|
2084
|
+
});
|
|
2085
|
+
const evalTrendsAggregator = new EvalAggregator({
|
|
2086
|
+
runtime,
|
|
2087
|
+
connMgr,
|
|
2088
|
+
channel: "eval-trends",
|
|
2089
|
+
reducer: reduceEvalTrends,
|
|
2090
|
+
emptyState: emptyEvalTrendData,
|
|
2091
|
+
windows
|
|
2092
|
+
});
|
|
1073
2093
|
if (options.cors !== false) {
|
|
1074
2094
|
app6.use("*", (0, import_cors.cors)());
|
|
1075
2095
|
}
|
|
@@ -1087,11 +2107,11 @@ function createServer(options) {
|
|
|
1087
2107
|
/^PUT \/api\/memory(\/|$)/,
|
|
1088
2108
|
/^DELETE \/api\/memory(\/|$)/,
|
|
1089
2109
|
/^POST \/api\/decisions(\/|$)/,
|
|
1090
|
-
/^POST \/api\/costs(\/|$)/,
|
|
1091
2110
|
/^POST \/api\/tools(\/|$)/,
|
|
1092
2111
|
/^POST \/api\/evals\/import$/,
|
|
1093
2112
|
/^POST \/api\/evals\/[^/]+\/run$/,
|
|
1094
2113
|
/^POST \/api\/evals\/[^/]+\/rescore$/,
|
|
2114
|
+
/^POST \/api\/evals\/runs\/[^/]+\/cancel$/,
|
|
1095
2115
|
/^DELETE \/api\/evals\/history\/[^/]+$/,
|
|
1096
2116
|
/^POST \/api\/playground(\/|$)/
|
|
1097
2117
|
];
|
|
@@ -1111,7 +2131,7 @@ function createServer(options) {
|
|
|
1111
2131
|
await next();
|
|
1112
2132
|
});
|
|
1113
2133
|
}
|
|
1114
|
-
const api = new
|
|
2134
|
+
const api = new import_hono15.Hono();
|
|
1115
2135
|
api.route("/", createHealthRoutes(readOnly));
|
|
1116
2136
|
api.route("/", createWorkflowRoutes(connMgr));
|
|
1117
2137
|
api.route("/", executions_default);
|
|
@@ -1121,20 +2141,37 @@ function createServer(options) {
|
|
|
1121
2141
|
api.route("/", memory_default);
|
|
1122
2142
|
api.route("/", decisions_default);
|
|
1123
2143
|
api.route("/", createCostRoutes(costAggregator));
|
|
1124
|
-
api.route("/",
|
|
2144
|
+
api.route("/", createEvalTrendsRoutes(evalTrendsAggregator));
|
|
2145
|
+
api.route("/", createWorkflowStatsRoutes(workflowStatsAggregator));
|
|
2146
|
+
api.route("/", createTraceStatsRoutes(traceStatsAggregator));
|
|
2147
|
+
const { app: evalApp, closeActiveRuns } = createEvalRoutes(connMgr, options.evalLoader);
|
|
2148
|
+
api.route("/", evalApp);
|
|
1125
2149
|
api.route("/", createPlaygroundRoutes(connMgr));
|
|
1126
2150
|
app6.route("/api", api);
|
|
1127
2151
|
const traceListener = (event) => {
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
2152
|
+
try {
|
|
2153
|
+
const traceEvent = event;
|
|
2154
|
+
const redacted = redactStreamEvent(traceEvent, runtime.isRedactEnabled());
|
|
2155
|
+
if (traceEvent.executionId) {
|
|
2156
|
+
connMgr.broadcastWithWildcard(`trace:${traceEvent.executionId}`, redacted);
|
|
2157
|
+
}
|
|
2158
|
+
if (traceEvent.type === "await_human") {
|
|
2159
|
+
connMgr.broadcast("decisions", redacted);
|
|
2160
|
+
}
|
|
2161
|
+
} catch (err) {
|
|
2162
|
+
console.error(
|
|
2163
|
+
"[axl-studio] trace listener threw; event dropped:",
|
|
2164
|
+
err instanceof Error ? err.message : String(err)
|
|
2165
|
+
);
|
|
1135
2166
|
}
|
|
1136
2167
|
};
|
|
1137
2168
|
runtime.on("trace", traceListener);
|
|
2169
|
+
const aggregatorStartPromise = Promise.all([
|
|
2170
|
+
costAggregator.start(),
|
|
2171
|
+
workflowStatsAggregator.start(),
|
|
2172
|
+
traceStatsAggregator.start(),
|
|
2173
|
+
evalTrendsAggregator.start()
|
|
2174
|
+
]).catch((err) => console.error("[axl-studio] aggregator start failed:", err));
|
|
1138
2175
|
if (staticRoot) {
|
|
1139
2176
|
const indexPath = (0, import_node_path.resolve)(staticRoot, "index.html");
|
|
1140
2177
|
let spaHtml;
|
|
@@ -1184,15 +2221,30 @@ function createServer(options) {
|
|
|
1184
2221
|
app: app6,
|
|
1185
2222
|
connMgr,
|
|
1186
2223
|
costAggregator,
|
|
2224
|
+
workflowStatsAggregator,
|
|
2225
|
+
traceStatsAggregator,
|
|
2226
|
+
evalTrendsAggregator,
|
|
2227
|
+
aggregatorStartPromise,
|
|
1187
2228
|
/** Create WS handlers. Call before registering static/SPA routes are reached. */
|
|
1188
2229
|
createWsHandlers: () => createWsHandlers(connMgr),
|
|
1189
|
-
traceListener
|
|
2230
|
+
traceListener,
|
|
2231
|
+
/** Abort all active streaming eval runs. */
|
|
2232
|
+
closeActiveRuns,
|
|
2233
|
+
/** Close all aggregators (clear intervals and unsubscribe listeners). */
|
|
2234
|
+
closeAggregators: () => {
|
|
2235
|
+
costAggregator.close();
|
|
2236
|
+
workflowStatsAggregator.close();
|
|
2237
|
+
traceStatsAggregator.close();
|
|
2238
|
+
evalTrendsAggregator.close();
|
|
2239
|
+
}
|
|
1190
2240
|
};
|
|
1191
2241
|
}
|
|
1192
2242
|
// Annotate the CommonJS export names for ESM import in node:
|
|
1193
2243
|
0 && (module.exports = {
|
|
1194
2244
|
ConnectionManager,
|
|
1195
|
-
|
|
2245
|
+
EvalAggregator,
|
|
2246
|
+
ExecutionAggregator,
|
|
2247
|
+
TraceAggregator,
|
|
1196
2248
|
createServer
|
|
1197
2249
|
});
|
|
1198
2250
|
//# sourceMappingURL=index.cjs.map
|