@ls-stack/agent-eval 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-CljutWb7.mjs → app-7qDBq_ub.mjs} +3 -3
- package/dist/apps/web/dist/assets/index-CdxG9-O-.css +1 -0
- package/dist/apps/web/dist/assets/index-J1yKYGfN.js +112 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-B0QmsWCU.mjs → cli-C-n-Fd4o.mjs} +885 -261
- package/dist/index.d.mts +1095 -755
- package/dist/index.mjs +3 -3
- package/dist/{runner-BY-y4OzF.mjs → runner-CwEtnUFf.mjs} +2 -2
- package/dist/{runner-CsSJwWE4.mjs → runner-uzzY8kk1.mjs} +1 -1
- package/dist/src-Dy31CPXH.mjs +2 -0
- package/package.json +3 -3
- package/dist/apps/web/dist/assets/index-B2GWGl5i.css +0 -1
- package/dist/apps/web/dist/assets/index-ibhQ_P7i.js +0 -109
- package/dist/src-Bivx1C6b.mjs +0 -2
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { createHash } from "node:crypto";
|
|
2
2
|
import { mkdir, readFile, readdir, rename, rm, stat, writeFile } from "node:fs/promises";
|
|
3
3
|
import { dirname, extname, join, relative, resolve } from "node:path";
|
|
4
|
+
import { z, z as z$1 } from "zod/v4";
|
|
4
5
|
import { AsyncLocalStorage } from "node:async_hooks";
|
|
5
6
|
import { Buffer as Buffer$1 } from "node:buffer";
|
|
6
7
|
import { getCompositeKey } from "@ls-stack/utils/getCompositeKey";
|
|
7
|
-
import { z } from "zod/v4";
|
|
8
8
|
import { watch } from "chokidar";
|
|
9
9
|
import { glob } from "glob";
|
|
10
10
|
import { existsSync } from "node:fs";
|
|
@@ -70,6 +70,27 @@ function getCurrentScope() {
|
|
|
70
70
|
function isInEvalScope() {
|
|
71
71
|
return getCurrentScope() !== void 0;
|
|
72
72
|
}
|
|
73
|
+
function isObjectLike(value) {
|
|
74
|
+
return typeof value === "object" && value !== null;
|
|
75
|
+
}
|
|
76
|
+
function isObjectRecord(value) {
|
|
77
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
78
|
+
}
|
|
79
|
+
function copyArray$1(value) {
|
|
80
|
+
return value.map((item) => item);
|
|
81
|
+
}
|
|
82
|
+
function getEvalCaseInput(path = void 0) {
|
|
83
|
+
const scope = getCurrentScope();
|
|
84
|
+
if (!scope) return void 0;
|
|
85
|
+
if (path === void 0) return scope.input;
|
|
86
|
+
if (path.length === 0) return void 0;
|
|
87
|
+
let current = scope.input;
|
|
88
|
+
for (const segment of path.split(".")) {
|
|
89
|
+
if (segment.length === 0 || !isObjectLike(current)) return;
|
|
90
|
+
current = current[segment];
|
|
91
|
+
}
|
|
92
|
+
return current;
|
|
93
|
+
}
|
|
73
94
|
/**
|
|
74
95
|
* Attach cache context (adapter, mode, eval id, fingerprint) to a scope.
|
|
75
96
|
*
|
|
@@ -86,6 +107,7 @@ function setScopeCacheContext(scope, context) {
|
|
|
86
107
|
async function runInEvalScope(caseId, fn, options = {}) {
|
|
87
108
|
const scope = {
|
|
88
109
|
caseId,
|
|
110
|
+
input: options.input,
|
|
89
111
|
outputs: {},
|
|
90
112
|
assertionFailures: [],
|
|
91
113
|
spans: [],
|
|
@@ -145,6 +167,58 @@ function setEvalOutput(key, value) {
|
|
|
145
167
|
});
|
|
146
168
|
}
|
|
147
169
|
/**
|
|
170
|
+
* Append an item to an output array in the current case scope.
|
|
171
|
+
*
|
|
172
|
+
* Missing values become `[value]`, existing arrays receive the item, and
|
|
173
|
+
* existing scalar/object values are preserved as `[existing, value]`.
|
|
174
|
+
*/
|
|
175
|
+
function appendToEvalOutput(key, value) {
|
|
176
|
+
const scope = getCurrentScope();
|
|
177
|
+
if (!scope) return;
|
|
178
|
+
const existing = scope.outputs[key];
|
|
179
|
+
if (existing === void 0) scope.outputs[key] = [value];
|
|
180
|
+
else if (Array.isArray(existing)) scope.outputs[key] = [...copyArray$1(existing), value];
|
|
181
|
+
else scope.outputs[key] = [existing, value];
|
|
182
|
+
recordOpIfActive(scope, {
|
|
183
|
+
kind: "appendOutput",
|
|
184
|
+
key,
|
|
185
|
+
value
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Shallow-merge object fields into an output value in the current case scope.
|
|
190
|
+
*
|
|
191
|
+
* Missing values become a copy of `patch`. Non-object existing values are
|
|
192
|
+
* recorded as assertion failures instead of being replaced.
|
|
193
|
+
*/
|
|
194
|
+
function mergeEvalOutput(key, patch) {
|
|
195
|
+
const scope = getCurrentScope();
|
|
196
|
+
if (!scope) return;
|
|
197
|
+
const existing = scope.outputs[key];
|
|
198
|
+
if (existing === void 0) {
|
|
199
|
+
scope.outputs[key] = { ...patch };
|
|
200
|
+
recordOpIfActive(scope, {
|
|
201
|
+
kind: "mergeOutput",
|
|
202
|
+
key,
|
|
203
|
+
patch
|
|
204
|
+
});
|
|
205
|
+
return;
|
|
206
|
+
}
|
|
207
|
+
if (!isObjectRecord(existing)) {
|
|
208
|
+
scope.assertionFailures.push(toAssertionFailure$1(`mergeEvalOutput("${key}"): existing value is ${Array.isArray(existing) ? "array" : typeof existing}, expected object`));
|
|
209
|
+
return;
|
|
210
|
+
}
|
|
211
|
+
scope.outputs[key] = {
|
|
212
|
+
...existing,
|
|
213
|
+
...patch
|
|
214
|
+
};
|
|
215
|
+
recordOpIfActive(scope, {
|
|
216
|
+
kind: "mergeOutput",
|
|
217
|
+
key,
|
|
218
|
+
patch
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
148
222
|
* Add a numeric delta to an output value in the current case scope.
|
|
149
223
|
*
|
|
150
224
|
* If the existing value is non-numeric, the operation is recorded as an
|
|
@@ -189,6 +263,451 @@ function evalAssert(condition, message) {
|
|
|
189
263
|
throw error;
|
|
190
264
|
}
|
|
191
265
|
//#endregion
|
|
266
|
+
//#region ../sdk/src/cacheKey.ts
|
|
267
|
+
var SerializedCacheKeyValue = class {
|
|
268
|
+
value;
|
|
269
|
+
constructor(value) {
|
|
270
|
+
this.value = value;
|
|
271
|
+
}
|
|
272
|
+
};
|
|
273
|
+
/**
|
|
274
|
+
* Hash the components of a cache key into a deterministic hex digest.
|
|
275
|
+
*
|
|
276
|
+
* Native `Blob` and `File` values are read asynchronously and hashed by
|
|
277
|
+
* content. Use `hashCacheKeySync` only when the key contains no async values.
|
|
278
|
+
*/
|
|
279
|
+
async function hashCacheKey(input) {
|
|
280
|
+
return hashCacheKeySyncMaterialized(await materializeAsyncCacheKeyValue(input));
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Synchronously hash cache key components. This supports JSON-like data and
|
|
284
|
+
* in-memory binary values such as `Buffer`, `ArrayBuffer`, and typed arrays,
|
|
285
|
+
* but cannot content-hash native `Blob` or `File` values.
|
|
286
|
+
*/
|
|
287
|
+
function hashCacheKeySync(input) {
|
|
288
|
+
return hashCacheKeySyncMaterialized(input);
|
|
289
|
+
}
|
|
290
|
+
function hashCacheKeySyncMaterialized(input) {
|
|
291
|
+
return createHash("sha256").update(getCompositeKey(input, { stringify: stringifyCacheKeyValue })).digest("hex");
|
|
292
|
+
}
|
|
293
|
+
function stringifyCacheKeyValue(value) {
|
|
294
|
+
if (value instanceof SerializedCacheKeyValue) return value.value;
|
|
295
|
+
if (Buffer$1.isBuffer(value)) return `$buffer:${hashBytes(value)}`;
|
|
296
|
+
if (isArrayBuffer(value)) return `$arrayBuffer:${hashBytes(new Uint8Array(value))}`;
|
|
297
|
+
if (isSharedArrayBuffer(value)) return `$sharedArrayBuffer:${hashBytes(new Uint8Array(value))}`;
|
|
298
|
+
if (isArrayBufferView(value)) {
|
|
299
|
+
const bytes = new Uint8Array(value.buffer, value.byteOffset, value.byteLength);
|
|
300
|
+
return `$${value.constructor.name}:${hashBytes(bytes)}`;
|
|
301
|
+
}
|
|
302
|
+
if (isFile$1(value)) return `$file:${getCompositeKey({
|
|
303
|
+
lastModified: value.lastModified,
|
|
304
|
+
name: value.name,
|
|
305
|
+
size: value.size,
|
|
306
|
+
type: value.type
|
|
307
|
+
})}`;
|
|
308
|
+
if (isBlob$1(value)) return `$blob:${getCompositeKey({
|
|
309
|
+
size: value.size,
|
|
310
|
+
type: value.type
|
|
311
|
+
})}`;
|
|
312
|
+
}
|
|
313
|
+
async function materializeAsyncCacheKeyValue(value, refs = /* @__PURE__ */ new WeakSet()) {
|
|
314
|
+
const serialized = await stringifyAsyncCacheKeyValue(value);
|
|
315
|
+
if (serialized !== void 0) return new SerializedCacheKeyValue(serialized);
|
|
316
|
+
if (stringifyCacheKeyValue(value) !== void 0) return value;
|
|
317
|
+
if (!value || typeof value !== "object") return value;
|
|
318
|
+
if (Array.isArray(value)) {
|
|
319
|
+
const items = [];
|
|
320
|
+
for (const item of value) items.push(await materializeAsyncCacheKeyValue(item, refs));
|
|
321
|
+
return items;
|
|
322
|
+
}
|
|
323
|
+
if (refs.has(value)) throw new Error("Circular reference detected");
|
|
324
|
+
refs.add(value);
|
|
325
|
+
const entries = [];
|
|
326
|
+
for (const [key, entryValue] of Object.entries(value)) entries.push([key, await materializeAsyncCacheKeyValue(entryValue, refs)]);
|
|
327
|
+
refs.delete(value);
|
|
328
|
+
return Object.fromEntries(entries);
|
|
329
|
+
}
|
|
330
|
+
async function stringifyAsyncCacheKeyValue(value) {
|
|
331
|
+
if (isFile$1(value)) return `$file:${getCompositeKey({
|
|
332
|
+
bytes: await hashBlobBytes(value),
|
|
333
|
+
lastModified: value.lastModified,
|
|
334
|
+
name: value.name,
|
|
335
|
+
size: value.size,
|
|
336
|
+
type: value.type
|
|
337
|
+
})}`;
|
|
338
|
+
if (isBlob$1(value)) return `$blob:${getCompositeKey({
|
|
339
|
+
bytes: await hashBlobBytes(value),
|
|
340
|
+
size: value.size,
|
|
341
|
+
type: value.type
|
|
342
|
+
})}`;
|
|
343
|
+
}
|
|
344
|
+
async function hashBlobBytes(value) {
|
|
345
|
+
return hashBytes(new Uint8Array(await value.arrayBuffer()));
|
|
346
|
+
}
|
|
347
|
+
function hashBytes(value) {
|
|
348
|
+
return createHash("sha256").update(value).digest("hex");
|
|
349
|
+
}
|
|
350
|
+
function isArrayBuffer(value) {
|
|
351
|
+
return value instanceof ArrayBuffer;
|
|
352
|
+
}
|
|
353
|
+
function isSharedArrayBuffer(value) {
|
|
354
|
+
return value instanceof SharedArrayBuffer;
|
|
355
|
+
}
|
|
356
|
+
function isArrayBufferView(value) {
|
|
357
|
+
return ArrayBuffer.isView(value);
|
|
358
|
+
}
|
|
359
|
+
function isBlob$1(value) {
|
|
360
|
+
return value instanceof Blob;
|
|
361
|
+
}
|
|
362
|
+
function isFile$1(value) {
|
|
363
|
+
return value instanceof File;
|
|
364
|
+
}
|
|
365
|
+
function toJsonSafe(value) {
|
|
366
|
+
if (value === void 0) return void 0;
|
|
367
|
+
const text = JSON.stringify(value);
|
|
368
|
+
return JSON.parse(text);
|
|
369
|
+
}
|
|
370
|
+
//#endregion
|
|
371
|
+
//#region ../sdk/src/cacheRecording.ts
|
|
372
|
+
function mergeSpanAttributes$1(span, attributes) {
|
|
373
|
+
span.attributes = {
|
|
374
|
+
...span.attributes,
|
|
375
|
+
...attributes
|
|
376
|
+
};
|
|
377
|
+
}
|
|
378
|
+
function isRecordLike$1(value) {
|
|
379
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
380
|
+
}
|
|
381
|
+
function valueKind$1(value) {
|
|
382
|
+
return Array.isArray(value) ? "array" : typeof value;
|
|
383
|
+
}
|
|
384
|
+
function copyArray(value) {
|
|
385
|
+
return value.map((item) => item);
|
|
386
|
+
}
|
|
387
|
+
function stripCacheAttributes(attributes) {
|
|
388
|
+
if (!attributes) return {};
|
|
389
|
+
const result = {};
|
|
390
|
+
for (const [key, value] of Object.entries(attributes)) if (!key.startsWith("cache.")) result[key] = value;
|
|
391
|
+
return result;
|
|
392
|
+
}
|
|
393
|
+
function snapshotNonCacheAttributes(span) {
|
|
394
|
+
const snapshot = toJsonSafe(stripCacheAttributes(span?.attributes));
|
|
395
|
+
return isRecordLike$1(snapshot) ? snapshot : {};
|
|
396
|
+
}
|
|
397
|
+
function diffNonCacheAttributes(before, after) {
|
|
398
|
+
const result = {};
|
|
399
|
+
for (const [key, value] of Object.entries(after)) if (!cacheAttributeValuesEqual(before[key], value)) result[key] = value;
|
|
400
|
+
return result;
|
|
401
|
+
}
|
|
402
|
+
function cacheAttributeValuesEqual(left, right) {
|
|
403
|
+
if (Object.is(left, right)) return true;
|
|
404
|
+
try {
|
|
405
|
+
return JSON.stringify(left) === JSON.stringify(right);
|
|
406
|
+
} catch {
|
|
407
|
+
return false;
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
function appendCacheRef(span, ref) {
|
|
411
|
+
if (span === void 0) return;
|
|
412
|
+
const existing = span.attributes?.["cache.refs"];
|
|
413
|
+
mergeSpanAttributes$1(span, { "cache.refs": [...Array.isArray(existing) ? copyArray(existing) : [], ref] });
|
|
414
|
+
}
|
|
415
|
+
function serializeSubSpanTree(scope, spanId) {
|
|
416
|
+
const original = scope.spans.find((s) => s.id === spanId);
|
|
417
|
+
if (!original) return {
|
|
418
|
+
kind: "custom",
|
|
419
|
+
name: "unknown",
|
|
420
|
+
attributes: void 0,
|
|
421
|
+
status: "ok",
|
|
422
|
+
error: void 0,
|
|
423
|
+
errors: void 0,
|
|
424
|
+
warning: void 0,
|
|
425
|
+
warnings: void 0,
|
|
426
|
+
children: []
|
|
427
|
+
};
|
|
428
|
+
const children = scope.spans.filter((s) => s.parentId === spanId).map((child) => serializeSubSpanTree(scope, child.id));
|
|
429
|
+
return {
|
|
430
|
+
kind: original.kind,
|
|
431
|
+
name: original.name,
|
|
432
|
+
attributes: original.attributes,
|
|
433
|
+
status: original.status,
|
|
434
|
+
error: original.error,
|
|
435
|
+
errors: original.errors,
|
|
436
|
+
warning: original.warning,
|
|
437
|
+
warnings: original.warnings,
|
|
438
|
+
children
|
|
439
|
+
};
|
|
440
|
+
}
|
|
441
|
+
function appendSubSpanOps(scope, frame) {
|
|
442
|
+
for (let i = frame.baseSpanIndex; i < scope.spans.length; i++) {
|
|
443
|
+
const candidate = scope.spans[i];
|
|
444
|
+
if (candidate?.parentId === frame.replayParentSpanId) frame.ops.push({
|
|
445
|
+
kind: "subSpan",
|
|
446
|
+
span: serializeSubSpanTree(scope, candidate.id)
|
|
447
|
+
});
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
function replayRecording(scope, parentSpan, recording, options) {
|
|
451
|
+
scope.replayingDepth++;
|
|
452
|
+
try {
|
|
453
|
+
for (const op of recording.ops) applyRecordingOp(scope, parentSpan, op, options);
|
|
454
|
+
if (parentSpan !== void 0 && Object.keys(recording.finalAttributes).length > 0) mergeSpanAttributes$1(parentSpan, recording.finalAttributes);
|
|
455
|
+
if (parentSpan !== void 0 && recording.finalError !== void 0) parentSpan.error = recording.finalError;
|
|
456
|
+
if (parentSpan !== void 0 && recording.finalErrors !== void 0) parentSpan.errors = recording.finalErrors;
|
|
457
|
+
if (parentSpan !== void 0 && recording.finalWarning !== void 0) parentSpan.warning = recording.finalWarning;
|
|
458
|
+
if (parentSpan !== void 0 && recording.finalWarnings !== void 0) parentSpan.warnings = recording.finalWarnings;
|
|
459
|
+
} finally {
|
|
460
|
+
scope.replayingDepth--;
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
function applyRecordingOp(scope, parentSpan, op, options) {
|
|
464
|
+
if (op.kind === "setOutput") {
|
|
465
|
+
scope.outputs[op.key] = op.value;
|
|
466
|
+
return;
|
|
467
|
+
}
|
|
468
|
+
if (op.kind === "appendOutput") {
|
|
469
|
+
const existing = scope.outputs[op.key];
|
|
470
|
+
if (existing === void 0) scope.outputs[op.key] = [op.value];
|
|
471
|
+
else if (Array.isArray(existing)) scope.outputs[op.key] = [...copyArray(existing), op.value];
|
|
472
|
+
else scope.outputs[op.key] = [existing, op.value];
|
|
473
|
+
return;
|
|
474
|
+
}
|
|
475
|
+
if (op.kind === "mergeOutput") {
|
|
476
|
+
const existing = scope.outputs[op.key];
|
|
477
|
+
if (existing === void 0) scope.outputs[op.key] = { ...op.patch };
|
|
478
|
+
else if (isRecordLike$1(existing)) scope.outputs[op.key] = {
|
|
479
|
+
...existing,
|
|
480
|
+
...op.patch
|
|
481
|
+
};
|
|
482
|
+
else scope.assertionFailures.push({ message: `replay mergeEvalOutput("${op.key}"): existing value is ${valueKind$1(existing)}, expected object` });
|
|
483
|
+
return;
|
|
484
|
+
}
|
|
485
|
+
if (op.kind === "incrementOutput") {
|
|
486
|
+
const existing = scope.outputs[op.key];
|
|
487
|
+
if (existing === void 0) scope.outputs[op.key] = op.delta;
|
|
488
|
+
else if (typeof existing === "number") scope.outputs[op.key] = existing + op.delta;
|
|
489
|
+
else scope.assertionFailures.push({ message: `replay incrementEvalOutput("${op.key}"): existing value is ${valueKind$1(existing)}, expected number` });
|
|
490
|
+
return;
|
|
491
|
+
}
|
|
492
|
+
if (op.kind === "checkpoint") {
|
|
493
|
+
scope.checkpoints.set(op.name, op.data);
|
|
494
|
+
return;
|
|
495
|
+
}
|
|
496
|
+
replaySerializedSpan(scope, parentSpan?.id ?? null, op.span, options);
|
|
497
|
+
}
|
|
498
|
+
function replaySerializedSpan(scope, parentId, serialized, options) {
|
|
499
|
+
const id = options.generateSpanId();
|
|
500
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
501
|
+
const replayed = {
|
|
502
|
+
id,
|
|
503
|
+
parentId,
|
|
504
|
+
caseId: scope.caseId,
|
|
505
|
+
kind: serialized.kind,
|
|
506
|
+
name: serialized.name,
|
|
507
|
+
startedAt: now,
|
|
508
|
+
endedAt: now,
|
|
509
|
+
status: serialized.status,
|
|
510
|
+
attributes: serialized.attributes,
|
|
511
|
+
error: serialized.error,
|
|
512
|
+
errors: serialized.errors,
|
|
513
|
+
warning: serialized.warning,
|
|
514
|
+
warnings: serialized.warnings
|
|
515
|
+
};
|
|
516
|
+
scope.spans.push(replayed);
|
|
517
|
+
for (const child of serialized.children) replaySerializedSpan(scope, id, child, options);
|
|
518
|
+
}
|
|
519
|
+
//#endregion
|
|
520
|
+
//#region ../sdk/src/traceDiagnostics.ts
|
|
521
|
+
const errorCoreFields = new Set([
|
|
522
|
+
"name",
|
|
523
|
+
"message",
|
|
524
|
+
"stack",
|
|
525
|
+
"capturedAt"
|
|
526
|
+
]);
|
|
527
|
+
function isRecord$2(value) {
|
|
528
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
529
|
+
}
|
|
530
|
+
function formatUnknownErrorMessage(error) {
|
|
531
|
+
if (typeof error === "string") return error;
|
|
532
|
+
if (typeof error === "number" || typeof error === "boolean") return String(error);
|
|
533
|
+
if (typeof error === "bigint") return String(error);
|
|
534
|
+
if (typeof error === "symbol") return error.description ?? "Symbol";
|
|
535
|
+
if (typeof error === "function") return error.name ? `[function ${error.name}]` : "[function]";
|
|
536
|
+
if (error === void 0) return "undefined";
|
|
537
|
+
if (error === null) return "null";
|
|
538
|
+
try {
|
|
539
|
+
return JSON.stringify(error);
|
|
540
|
+
} catch {
|
|
541
|
+
return "Unknown error";
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
function getErrorExtraFields(error) {
|
|
545
|
+
return Object.fromEntries(Object.entries(error).filter(([key]) => !errorCoreFields.has(key)));
|
|
546
|
+
}
|
|
547
|
+
function normalizeTraceError(error, capturedAt = void 0) {
|
|
548
|
+
if (error instanceof Error) return {
|
|
549
|
+
...getErrorExtraFields(error),
|
|
550
|
+
name: error.name,
|
|
551
|
+
message: error.message,
|
|
552
|
+
stack: error.stack,
|
|
553
|
+
capturedAt
|
|
554
|
+
};
|
|
555
|
+
if (isRecord$2(error)) {
|
|
556
|
+
const extraFields = getErrorExtraFields(error);
|
|
557
|
+
const name = typeof error.name === "string" ? error.name : void 0;
|
|
558
|
+
const stack = typeof error.stack === "string" ? error.stack : void 0;
|
|
559
|
+
const message = error.message === void 0 ? formatUnknownErrorMessage(error) : formatUnknownErrorMessage(error.message);
|
|
560
|
+
return {
|
|
561
|
+
...extraFields,
|
|
562
|
+
...name === void 0 ? {} : { name },
|
|
563
|
+
message,
|
|
564
|
+
...stack === void 0 ? {} : { stack },
|
|
565
|
+
capturedAt
|
|
566
|
+
};
|
|
567
|
+
}
|
|
568
|
+
return {
|
|
569
|
+
message: String(error),
|
|
570
|
+
capturedAt
|
|
571
|
+
};
|
|
572
|
+
}
|
|
573
|
+
function normalizeTraceErrors(errorOrErrors, additionalErrors, capturedAt) {
|
|
574
|
+
return (additionalErrors.length > 0 ? [errorOrErrors, ...additionalErrors] : Array.isArray(errorOrErrors) ? errorOrErrors : [errorOrErrors]).map((error) => normalizeTraceError(error, capturedAt));
|
|
575
|
+
}
|
|
576
|
+
function normalizeTraceWarnings(warningOrWarnings, additionalWarnings, capturedAt) {
|
|
577
|
+
return (additionalWarnings.length > 0 ? [warningOrWarnings, ...additionalWarnings] : Array.isArray(warningOrWarnings) ? warningOrWarnings : [warningOrWarnings]).map((warning) => normalizeTraceError(warning, capturedAt));
|
|
578
|
+
}
|
|
579
|
+
function isCaptureEvalSpanErrorOptions(value) {
|
|
580
|
+
if (!isRecord$2(value)) return false;
|
|
581
|
+
const keys = Object.keys(value);
|
|
582
|
+
if (keys.length === 0) return false;
|
|
583
|
+
if (!keys.every((key) => key === "level")) return false;
|
|
584
|
+
return value.level === void 0 || isCaptureEvalSpanErrorLevel(value.level);
|
|
585
|
+
}
|
|
586
|
+
function isCaptureEvalSpanErrorLevel(value) {
|
|
587
|
+
return value === "error" || value === "warning";
|
|
588
|
+
}
|
|
589
|
+
function splitCaptureEvalSpanErrorArgs(additionalErrorsOrOptions) {
|
|
590
|
+
const lastArg = additionalErrorsOrOptions.at(-1);
|
|
591
|
+
if (isCaptureEvalSpanErrorLevel(lastArg)) return {
|
|
592
|
+
additionalErrors: additionalErrorsOrOptions.slice(0, -1),
|
|
593
|
+
options: { level: lastArg }
|
|
594
|
+
};
|
|
595
|
+
if (isCaptureEvalSpanErrorOptions(lastArg)) return {
|
|
596
|
+
additionalErrors: additionalErrorsOrOptions.slice(0, -1),
|
|
597
|
+
options: lastArg
|
|
598
|
+
};
|
|
599
|
+
return {
|
|
600
|
+
additionalErrors: additionalErrorsOrOptions,
|
|
601
|
+
options: {}
|
|
602
|
+
};
|
|
603
|
+
}
|
|
604
|
+
function appendSpanErrors(span, errors) {
|
|
605
|
+
if (errors.length === 0) return;
|
|
606
|
+
const latestError = errors.at(-1);
|
|
607
|
+
if (latestError === void 0) return;
|
|
608
|
+
span.errors = [...span.errors ?? [], ...errors];
|
|
609
|
+
span.error = latestError;
|
|
610
|
+
span.status = "error";
|
|
611
|
+
}
|
|
612
|
+
function appendSpanWarnings(span, warnings) {
|
|
613
|
+
if (warnings.length === 0) return;
|
|
614
|
+
const latestWarning = warnings.at(-1);
|
|
615
|
+
if (latestWarning === void 0) return;
|
|
616
|
+
span.warnings = [...span.warnings ?? [], ...warnings];
|
|
617
|
+
span.warning = latestWarning;
|
|
618
|
+
}
|
|
619
|
+
function hasSpanError(span) {
|
|
620
|
+
return span.error !== void 0 || (span.errors?.length ?? 0) > 0;
|
|
621
|
+
}
|
|
622
|
+
//#endregion
|
|
623
|
+
//#region ../sdk/src/valueCache.ts
|
|
624
|
+
function createTraceCache(generateSpanId) {
|
|
625
|
+
return async function traceCache(info, fn) {
|
|
626
|
+
const scope = getCurrentScope();
|
|
627
|
+
if (!scope) return await fn();
|
|
628
|
+
const cacheCtx = scope.cacheContext;
|
|
629
|
+
if (cacheCtx === void 0 || scope.replayingDepth > 0) return await fn();
|
|
630
|
+
const namespace = info.namespace ?? `${cacheCtx.evalId}__${info.name}`;
|
|
631
|
+
const keyHash = await hashCacheKey({
|
|
632
|
+
namespace,
|
|
633
|
+
codeFingerprint: cacheCtx.codeFingerprint,
|
|
634
|
+
key: info.key
|
|
635
|
+
});
|
|
636
|
+
const activeSpan = scope.activeSpanStack.at(-1);
|
|
637
|
+
if (cacheCtx.mode === "use") {
|
|
638
|
+
const hit = await cacheCtx.adapter.lookup(namespace, keyHash);
|
|
639
|
+
if (hit) {
|
|
640
|
+
const storedAt = hit.storedAt;
|
|
641
|
+
const age = Date.now() - new Date(storedAt).getTime();
|
|
642
|
+
appendCacheRef(activeSpan, {
|
|
643
|
+
type: "value",
|
|
644
|
+
name: info.name,
|
|
645
|
+
namespace,
|
|
646
|
+
key: keyHash,
|
|
647
|
+
status: "hit",
|
|
648
|
+
storedAt,
|
|
649
|
+
age
|
|
650
|
+
});
|
|
651
|
+
replayRecording(scope, activeSpan, hit.recording, { generateSpanId });
|
|
652
|
+
return hit.recording.returnValue;
|
|
653
|
+
}
|
|
654
|
+
appendCacheRef(activeSpan, {
|
|
655
|
+
type: "value",
|
|
656
|
+
name: info.name,
|
|
657
|
+
namespace,
|
|
658
|
+
key: keyHash,
|
|
659
|
+
status: "miss"
|
|
660
|
+
});
|
|
661
|
+
} else if (cacheCtx.mode === "refresh") appendCacheRef(activeSpan, {
|
|
662
|
+
type: "value",
|
|
663
|
+
name: info.name,
|
|
664
|
+
namespace,
|
|
665
|
+
key: keyHash,
|
|
666
|
+
status: "refresh"
|
|
667
|
+
});
|
|
668
|
+
else appendCacheRef(activeSpan, {
|
|
669
|
+
type: "value",
|
|
670
|
+
name: info.name,
|
|
671
|
+
namespace,
|
|
672
|
+
key: keyHash,
|
|
673
|
+
status: "bypass"
|
|
674
|
+
});
|
|
675
|
+
const beforeAttributes = snapshotNonCacheAttributes(activeSpan);
|
|
676
|
+
const frame = {
|
|
677
|
+
baseSpanIndex: scope.spans.length,
|
|
678
|
+
replayParentSpanId: activeSpan?.id ?? null,
|
|
679
|
+
ops: []
|
|
680
|
+
};
|
|
681
|
+
scope.recordingStack.push(frame);
|
|
682
|
+
let bodyResult;
|
|
683
|
+
try {
|
|
684
|
+
bodyResult = await fn();
|
|
685
|
+
} finally {
|
|
686
|
+
scope.recordingStack.pop();
|
|
687
|
+
}
|
|
688
|
+
appendSubSpanOps(scope, frame);
|
|
689
|
+
if (cacheCtx.mode !== "bypass") {
|
|
690
|
+
const finalAttributes = diffNonCacheAttributes(beforeAttributes, snapshotNonCacheAttributes(activeSpan));
|
|
691
|
+
const recording = {
|
|
692
|
+
returnValue: toJsonSafe(bodyResult),
|
|
693
|
+
finalAttributes,
|
|
694
|
+
ops: frame.ops
|
|
695
|
+
};
|
|
696
|
+
await cacheCtx.adapter.write({
|
|
697
|
+
version: 1,
|
|
698
|
+
key: keyHash,
|
|
699
|
+
namespace,
|
|
700
|
+
operationType: "value",
|
|
701
|
+
operationName: info.name,
|
|
702
|
+
storedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
703
|
+
codeFingerprint: cacheCtx.codeFingerprint,
|
|
704
|
+
recording
|
|
705
|
+
});
|
|
706
|
+
}
|
|
707
|
+
return bodyResult;
|
|
708
|
+
};
|
|
709
|
+
}
|
|
710
|
+
//#endregion
|
|
192
711
|
//#region ../sdk/src/tracer.ts
|
|
193
712
|
let spanIdCounter = 0;
|
|
194
713
|
function generateSpanId() {
|
|
@@ -204,7 +723,10 @@ function noopActiveSpan() {
|
|
|
204
723
|
return {
|
|
205
724
|
setName() {},
|
|
206
725
|
setAttribute() {},
|
|
207
|
-
setAttributes() {}
|
|
726
|
+
setAttributes() {},
|
|
727
|
+
incrementAttribute() {},
|
|
728
|
+
appendToAttribute() {},
|
|
729
|
+
mergeAttribute() {}
|
|
208
730
|
};
|
|
209
731
|
}
|
|
210
732
|
function noopExternalSpan(id) {
|
|
@@ -213,6 +735,9 @@ function noopExternalSpan(id) {
|
|
|
213
735
|
setName() {},
|
|
214
736
|
setAttribute() {},
|
|
215
737
|
setAttributes() {},
|
|
738
|
+
incrementAttribute() {},
|
|
739
|
+
appendToAttribute() {},
|
|
740
|
+
mergeAttribute() {},
|
|
216
741
|
end() {}
|
|
217
742
|
};
|
|
218
743
|
}
|
|
@@ -222,6 +747,61 @@ function mergeSpanAttributes(span, attributes) {
|
|
|
222
747
|
...attributes
|
|
223
748
|
};
|
|
224
749
|
}
|
|
750
|
+
function isRecordLike(value) {
|
|
751
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
752
|
+
}
|
|
753
|
+
function valueKind(value) {
|
|
754
|
+
return Array.isArray(value) ? "array" : typeof value;
|
|
755
|
+
}
|
|
756
|
+
function recordSpanAttributeAssertion(message) {
|
|
757
|
+
const scope = getCurrentScope();
|
|
758
|
+
if (!scope) return;
|
|
759
|
+
scope.assertionFailures.push({ message });
|
|
760
|
+
}
|
|
761
|
+
function incrementSpanAttribute(span, key, delta) {
|
|
762
|
+
const existing = span.attributes?.[key];
|
|
763
|
+
if (existing === void 0) {
|
|
764
|
+
mergeSpanAttributes(span, { [key]: delta });
|
|
765
|
+
return;
|
|
766
|
+
}
|
|
767
|
+
if (typeof existing !== "number") {
|
|
768
|
+
recordSpanAttributeAssertion(`evalSpan.incrementAttribute("${key}"): existing value is ${valueKind(existing)}, expected number`);
|
|
769
|
+
return;
|
|
770
|
+
}
|
|
771
|
+
mergeSpanAttributes(span, { [key]: existing + delta });
|
|
772
|
+
}
|
|
773
|
+
function appendToSpanAttribute(span, key, value) {
|
|
774
|
+
const existing = span.attributes?.[key];
|
|
775
|
+
if (existing === void 0) {
|
|
776
|
+
mergeSpanAttributes(span, { [key]: [value] });
|
|
777
|
+
return;
|
|
778
|
+
}
|
|
779
|
+
if (Array.isArray(existing)) {
|
|
780
|
+
const items = existing.map((item) => item);
|
|
781
|
+
mergeSpanAttributes(span, { [key]: [...items, value] });
|
|
782
|
+
return;
|
|
783
|
+
}
|
|
784
|
+
mergeSpanAttributes(span, { [key]: [existing, value] });
|
|
785
|
+
}
|
|
786
|
+
function mergeSpanAttribute(span, key, patch) {
|
|
787
|
+
const existing = span.attributes?.[key];
|
|
788
|
+
if (existing === void 0) {
|
|
789
|
+
mergeSpanAttributes(span, { [key]: { ...patch } });
|
|
790
|
+
return;
|
|
791
|
+
}
|
|
792
|
+
if (!isRecordLike(existing)) {
|
|
793
|
+
recordSpanAttributeAssertion(`evalSpan.mergeAttribute("${key}"): existing value is ${valueKind(existing)}, expected object`);
|
|
794
|
+
return;
|
|
795
|
+
}
|
|
796
|
+
mergeSpanAttributes(span, { [key]: {
|
|
797
|
+
...existing,
|
|
798
|
+
...patch
|
|
799
|
+
} });
|
|
800
|
+
}
|
|
801
|
+
function finishSpanWithoutThrownError(span) {
|
|
802
|
+
span.status = hasSpanError(span) ? "error" : "ok";
|
|
803
|
+
span.endedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
804
|
+
}
|
|
225
805
|
function createSpanHandle(span) {
|
|
226
806
|
return {
|
|
227
807
|
setName(value) {
|
|
@@ -232,9 +812,25 @@ function createSpanHandle(span) {
|
|
|
232
812
|
},
|
|
233
813
|
setAttributes(value) {
|
|
234
814
|
mergeSpanAttributes(span, value);
|
|
815
|
+
},
|
|
816
|
+
incrementAttribute(key, delta) {
|
|
817
|
+
incrementSpanAttribute(span, key, delta);
|
|
818
|
+
},
|
|
819
|
+
appendToAttribute(key, value) {
|
|
820
|
+
appendToSpanAttribute(span, key, value);
|
|
821
|
+
},
|
|
822
|
+
mergeAttribute(key, patch) {
|
|
823
|
+
mergeSpanAttribute(span, key, patch);
|
|
235
824
|
}
|
|
236
825
|
};
|
|
237
826
|
}
|
|
827
|
+
function updateExternalSpanRecord(id, update) {
|
|
828
|
+
const scope = getCurrentScope();
|
|
829
|
+
if (!scope) return;
|
|
830
|
+
const span = findSpan(scope, id);
|
|
831
|
+
if (!span) return;
|
|
832
|
+
update(span);
|
|
833
|
+
}
|
|
238
834
|
function createExternalSpanHandle(id) {
|
|
239
835
|
return {
|
|
240
836
|
id,
|
|
@@ -256,6 +852,21 @@ function createExternalSpanHandle(id) {
|
|
|
256
852
|
attributes: value
|
|
257
853
|
});
|
|
258
854
|
},
|
|
855
|
+
incrementAttribute(key, delta) {
|
|
856
|
+
updateExternalSpanRecord(id, (span) => {
|
|
857
|
+
incrementSpanAttribute(span, key, delta);
|
|
858
|
+
});
|
|
859
|
+
},
|
|
860
|
+
appendToAttribute(key, value) {
|
|
861
|
+
updateExternalSpanRecord(id, (span) => {
|
|
862
|
+
appendToSpanAttribute(span, key, value);
|
|
863
|
+
});
|
|
864
|
+
},
|
|
865
|
+
mergeAttribute(key, patch) {
|
|
866
|
+
updateExternalSpanRecord(id, (span) => {
|
|
867
|
+
mergeSpanAttribute(span, key, patch);
|
|
868
|
+
});
|
|
869
|
+
},
|
|
259
870
|
end(info = {}) {
|
|
260
871
|
endExternalSpan({
|
|
261
872
|
...info,
|
|
@@ -312,6 +923,8 @@ function updateExternalSpan(info) {
|
|
|
312
923
|
if (info.name !== void 0) span.name = info.name;
|
|
313
924
|
if (info.status !== void 0) span.status = info.status;
|
|
314
925
|
if (info.error !== void 0) span.error = info.error;
|
|
926
|
+
if (info.warning !== void 0) span.warning = info.warning;
|
|
927
|
+
if (info.warnings !== void 0) span.warnings = info.warnings;
|
|
315
928
|
if (info.attributes !== void 0) mergeSpanAttributes(span, info.attributes);
|
|
316
929
|
}
|
|
317
930
|
function endExternalSpan(info) {
|
|
@@ -340,6 +953,8 @@ function recordExternalSpan(info) {
|
|
|
340
953
|
existing.status = status;
|
|
341
954
|
existing.attributes = info.attributes;
|
|
342
955
|
existing.error = info.error;
|
|
956
|
+
existing.warning = info.warning;
|
|
957
|
+
existing.warnings = info.warnings;
|
|
343
958
|
return id;
|
|
344
959
|
}
|
|
345
960
|
scope.spans.push({
|
|
@@ -352,7 +967,9 @@ function recordExternalSpan(info) {
|
|
|
352
967
|
endedAt,
|
|
353
968
|
status,
|
|
354
969
|
attributes: info.attributes,
|
|
355
|
-
error: info.error
|
|
970
|
+
error: info.error,
|
|
971
|
+
warning: info.warning,
|
|
972
|
+
warnings: info.warnings
|
|
356
973
|
});
|
|
357
974
|
return id;
|
|
358
975
|
}
|
|
@@ -364,20 +981,58 @@ function recordExternalSpan(info) {
|
|
|
364
981
|
const evalSpan = {
|
|
365
982
|
setName(value) {
|
|
366
983
|
updateCurrentSpan((currentSpan) => {
|
|
367
|
-
currentSpan.name = value;
|
|
984
|
+
currentSpan.name = value;
|
|
985
|
+
});
|
|
986
|
+
},
|
|
987
|
+
setAttribute(key, value) {
|
|
988
|
+
updateCurrentSpan((currentSpan) => {
|
|
989
|
+
mergeSpanAttributes(currentSpan, { [key]: value });
|
|
990
|
+
});
|
|
991
|
+
},
|
|
992
|
+
setAttributes(value) {
|
|
993
|
+
updateCurrentSpan((currentSpan) => {
|
|
994
|
+
mergeSpanAttributes(currentSpan, value);
|
|
995
|
+
});
|
|
996
|
+
},
|
|
997
|
+
incrementAttribute(key, delta) {
|
|
998
|
+
updateCurrentSpan((currentSpan) => {
|
|
999
|
+
incrementSpanAttribute(currentSpan, key, delta);
|
|
368
1000
|
});
|
|
369
1001
|
},
|
|
370
|
-
|
|
1002
|
+
appendToAttribute(key, value) {
|
|
371
1003
|
updateCurrentSpan((currentSpan) => {
|
|
372
|
-
|
|
1004
|
+
appendToSpanAttribute(currentSpan, key, value);
|
|
373
1005
|
});
|
|
374
1006
|
},
|
|
375
|
-
|
|
1007
|
+
mergeAttribute(key, patch) {
|
|
376
1008
|
updateCurrentSpan((currentSpan) => {
|
|
377
|
-
|
|
1009
|
+
mergeSpanAttribute(currentSpan, key, patch);
|
|
378
1010
|
});
|
|
379
1011
|
}
|
|
380
1012
|
};
|
|
1013
|
+
/**
|
|
1014
|
+
* Attach one or more recoverable errors to the active eval span.
|
|
1015
|
+
*
|
|
1016
|
+
* By default the active span is marked as `error` even if its callback later
|
|
1017
|
+
* completes without throwing. Pass `'warning'` or `{ level: 'warning' }` as the
|
|
1018
|
+
* final argument to record the diagnostic without changing span status. Calls
|
|
1019
|
+
* outside `evalTracer.span(...)` are ignored.
|
|
1020
|
+
*/
|
|
1021
|
+
function captureEvalSpanError(errorOrErrors, ...additionalErrorsOrOptions) {
|
|
1022
|
+
const { additionalErrors, options } = splitCaptureEvalSpanErrorArgs(additionalErrorsOrOptions);
|
|
1023
|
+
const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
1024
|
+
if ((options.level ?? "error") === "warning") {
|
|
1025
|
+
const warnings = normalizeTraceWarnings(errorOrErrors, additionalErrors, capturedAt);
|
|
1026
|
+
updateCurrentSpan((currentSpan) => {
|
|
1027
|
+
appendSpanWarnings(currentSpan, warnings);
|
|
1028
|
+
});
|
|
1029
|
+
return;
|
|
1030
|
+
}
|
|
1031
|
+
const errors = normalizeTraceErrors(errorOrErrors, additionalErrors, capturedAt);
|
|
1032
|
+
updateCurrentSpan((currentSpan) => {
|
|
1033
|
+
appendSpanErrors(currentSpan, errors);
|
|
1034
|
+
});
|
|
1035
|
+
}
|
|
381
1036
|
async function traceSpan(info, fn) {
|
|
382
1037
|
const scope = getCurrentScope();
|
|
383
1038
|
if (!scope) return await fn(noopActiveSpan());
|
|
@@ -421,8 +1076,8 @@ async function traceSpan(info, fn) {
|
|
|
421
1076
|
"cache.storedAt": storedAt,
|
|
422
1077
|
"cache.age": Date.now() - new Date(storedAt).getTime()
|
|
423
1078
|
});
|
|
424
|
-
replayRecording(scope, spanRecord, hit.recording);
|
|
425
|
-
spanRecord.status = "ok";
|
|
1079
|
+
replayRecording(scope, spanRecord, hit.recording, { generateSpanId });
|
|
1080
|
+
spanRecord.status = hit.recording.finalStatus ?? (hasSpanError(spanRecord) ? "error" : "ok");
|
|
426
1081
|
spanRecord.endedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
427
1082
|
return hit.recording.returnValue;
|
|
428
1083
|
}
|
|
@@ -431,7 +1086,7 @@ async function traceSpan(info, fn) {
|
|
|
431
1086
|
else mergeSpanAttributes(spanRecord, { "cache.status": "bypass" });
|
|
432
1087
|
const frame = {
|
|
433
1088
|
baseSpanIndex: scope.spans.length,
|
|
434
|
-
|
|
1089
|
+
replayParentSpanId: id,
|
|
435
1090
|
ops: []
|
|
436
1091
|
};
|
|
437
1092
|
scope.recordingStack.push(frame);
|
|
@@ -442,16 +1097,24 @@ async function traceSpan(info, fn) {
|
|
|
442
1097
|
scope.recordingStack.pop();
|
|
443
1098
|
}
|
|
444
1099
|
appendSubSpanOps(scope, frame);
|
|
1100
|
+
finishSpanWithoutThrownError(spanRecord);
|
|
445
1101
|
if (ctx.mode !== "bypass") {
|
|
446
1102
|
const recording = {
|
|
447
1103
|
returnValue: toJsonSafe(bodyResult),
|
|
448
1104
|
finalAttributes: stripCacheAttributes(spanRecord.attributes),
|
|
1105
|
+
finalStatus: spanRecord.status,
|
|
1106
|
+
finalError: spanRecord.error,
|
|
1107
|
+
finalErrors: spanRecord.errors,
|
|
1108
|
+
finalWarning: spanRecord.warning,
|
|
1109
|
+
finalWarnings: spanRecord.warnings,
|
|
449
1110
|
ops: frame.ops
|
|
450
1111
|
};
|
|
451
1112
|
const entry = {
|
|
452
1113
|
version: 1,
|
|
453
1114
|
key: keyHash,
|
|
454
1115
|
namespace,
|
|
1116
|
+
operationType: "span",
|
|
1117
|
+
operationName: info.name,
|
|
455
1118
|
spanName: info.name,
|
|
456
1119
|
spanKind: info.kind,
|
|
457
1120
|
storedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
@@ -460,23 +1123,15 @@ async function traceSpan(info, fn) {
|
|
|
460
1123
|
};
|
|
461
1124
|
await ctx.adapter.write(entry);
|
|
462
1125
|
}
|
|
463
|
-
spanRecord.status = "ok";
|
|
464
|
-
spanRecord.endedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
465
1126
|
return bodyResult;
|
|
466
1127
|
}
|
|
467
1128
|
const result = await fn(activeSpan);
|
|
468
|
-
spanRecord
|
|
469
|
-
spanRecord.endedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
1129
|
+
finishSpanWithoutThrownError(spanRecord);
|
|
470
1130
|
return result;
|
|
471
1131
|
} catch (error) {
|
|
472
1132
|
spanRecord.status = "error";
|
|
473
1133
|
spanRecord.endedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
474
|
-
|
|
475
|
-
name: error.name,
|
|
476
|
-
message: error.message,
|
|
477
|
-
stack: error.stack
|
|
478
|
-
};
|
|
479
|
-
else spanRecord.error = { message: String(error) };
|
|
1134
|
+
spanRecord.error = normalizeTraceError(error);
|
|
480
1135
|
throw error;
|
|
481
1136
|
} finally {
|
|
482
1137
|
scope.spanStack.pop();
|
|
@@ -491,6 +1146,13 @@ const evalTracer = {
|
|
|
491
1146
|
/** Run a callback inside a new trace span and record its lifecycle. */
|
|
492
1147
|
span: traceSpan,
|
|
493
1148
|
/**
|
|
1149
|
+
* Cache a pure value without creating a trace span.
|
|
1150
|
+
*
|
|
1151
|
+
* When called inside an active span, the span receives a `cache.refs` entry
|
|
1152
|
+
* describing the value cache status for this run.
|
|
1153
|
+
*/
|
|
1154
|
+
cache: createTraceCache(generateSpanId),
|
|
1155
|
+
/**
|
|
494
1156
|
* Start a span whose lifecycle is controlled by an external tracer/exporter.
|
|
495
1157
|
*
|
|
496
1158
|
* Calls are no-ops outside an eval case scope, except that a generated or
|
|
@@ -571,189 +1233,6 @@ function buildTraceTree(spans, checkpoints) {
|
|
|
571
1233
|
checkpoints
|
|
572
1234
|
};
|
|
573
1235
|
}
|
|
574
|
-
var SerializedCacheKeyValue = class {
|
|
575
|
-
value;
|
|
576
|
-
constructor(value) {
|
|
577
|
-
this.value = value;
|
|
578
|
-
}
|
|
579
|
-
};
|
|
580
|
-
/**
|
|
581
|
-
* Hash the components of a cache key into a deterministic hex digest.
|
|
582
|
-
*
|
|
583
|
-
* Native `Blob` and `File` values are read asynchronously and hashed by
|
|
584
|
-
* content. Use `hashCacheKeySync` only when the key contains no async values.
|
|
585
|
-
*/
|
|
586
|
-
async function hashCacheKey(input) {
|
|
587
|
-
return hashCacheKeySyncMaterialized(await materializeAsyncCacheKeyValue(input));
|
|
588
|
-
}
|
|
589
|
-
/**
|
|
590
|
-
* Synchronously hash cache key components. This supports JSON-like data and
|
|
591
|
-
* in-memory binary values such as `Buffer`, `ArrayBuffer`, and typed arrays,
|
|
592
|
-
* but cannot content-hash native `Blob` or `File` values.
|
|
593
|
-
*/
|
|
594
|
-
function hashCacheKeySync(input) {
|
|
595
|
-
return hashCacheKeySyncMaterialized(input);
|
|
596
|
-
}
|
|
597
|
-
function hashCacheKeySyncMaterialized(input) {
|
|
598
|
-
return createHash("sha256").update(getCompositeKey(input, { stringify: stringifyCacheKeyValue })).digest("hex");
|
|
599
|
-
}
|
|
600
|
-
function stringifyCacheKeyValue(value) {
|
|
601
|
-
if (value instanceof SerializedCacheKeyValue) return value.value;
|
|
602
|
-
if (Buffer$1.isBuffer(value)) return `$buffer:${hashBytes(value)}`;
|
|
603
|
-
if (isArrayBuffer(value)) return `$arrayBuffer:${hashBytes(new Uint8Array(value))}`;
|
|
604
|
-
if (isSharedArrayBuffer(value)) return `$sharedArrayBuffer:${hashBytes(new Uint8Array(value))}`;
|
|
605
|
-
if (isArrayBufferView(value)) {
|
|
606
|
-
const bytes = new Uint8Array(value.buffer, value.byteOffset, value.byteLength);
|
|
607
|
-
return `$${value.constructor.name}:${hashBytes(bytes)}`;
|
|
608
|
-
}
|
|
609
|
-
if (isFile$1(value)) return `$file:${getCompositeKey({
|
|
610
|
-
lastModified: value.lastModified,
|
|
611
|
-
name: value.name,
|
|
612
|
-
size: value.size,
|
|
613
|
-
type: value.type
|
|
614
|
-
})}`;
|
|
615
|
-
if (isBlob$1(value)) return `$blob:${getCompositeKey({
|
|
616
|
-
size: value.size,
|
|
617
|
-
type: value.type
|
|
618
|
-
})}`;
|
|
619
|
-
}
|
|
620
|
-
async function materializeAsyncCacheKeyValue(value, refs = /* @__PURE__ */ new WeakSet()) {
|
|
621
|
-
const serialized = await stringifyAsyncCacheKeyValue(value);
|
|
622
|
-
if (serialized !== void 0) return new SerializedCacheKeyValue(serialized);
|
|
623
|
-
if (stringifyCacheKeyValue(value) !== void 0) return value;
|
|
624
|
-
if (!value || typeof value !== "object") return value;
|
|
625
|
-
if (Array.isArray(value)) {
|
|
626
|
-
const items = [];
|
|
627
|
-
for (const item of value) items.push(await materializeAsyncCacheKeyValue(item, refs));
|
|
628
|
-
return items;
|
|
629
|
-
}
|
|
630
|
-
if (refs.has(value)) throw new Error("Circular reference detected");
|
|
631
|
-
refs.add(value);
|
|
632
|
-
const entries = [];
|
|
633
|
-
for (const [key, entryValue] of Object.entries(value)) entries.push([key, await materializeAsyncCacheKeyValue(entryValue, refs)]);
|
|
634
|
-
refs.delete(value);
|
|
635
|
-
return Object.fromEntries(entries);
|
|
636
|
-
}
|
|
637
|
-
async function stringifyAsyncCacheKeyValue(value) {
|
|
638
|
-
if (isFile$1(value)) return `$file:${getCompositeKey({
|
|
639
|
-
bytes: await hashBlobBytes(value),
|
|
640
|
-
lastModified: value.lastModified,
|
|
641
|
-
name: value.name,
|
|
642
|
-
size: value.size,
|
|
643
|
-
type: value.type
|
|
644
|
-
})}`;
|
|
645
|
-
if (isBlob$1(value)) return `$blob:${getCompositeKey({
|
|
646
|
-
bytes: await hashBlobBytes(value),
|
|
647
|
-
size: value.size,
|
|
648
|
-
type: value.type
|
|
649
|
-
})}`;
|
|
650
|
-
}
|
|
651
|
-
async function hashBlobBytes(value) {
|
|
652
|
-
return hashBytes(new Uint8Array(await value.arrayBuffer()));
|
|
653
|
-
}
|
|
654
|
-
function hashBytes(value) {
|
|
655
|
-
return createHash("sha256").update(value).digest("hex");
|
|
656
|
-
}
|
|
657
|
-
function isArrayBuffer(value) {
|
|
658
|
-
return value instanceof ArrayBuffer;
|
|
659
|
-
}
|
|
660
|
-
function isSharedArrayBuffer(value) {
|
|
661
|
-
return value instanceof SharedArrayBuffer;
|
|
662
|
-
}
|
|
663
|
-
function isArrayBufferView(value) {
|
|
664
|
-
return ArrayBuffer.isView(value);
|
|
665
|
-
}
|
|
666
|
-
function isBlob$1(value) {
|
|
667
|
-
return value instanceof Blob;
|
|
668
|
-
}
|
|
669
|
-
function isFile$1(value) {
|
|
670
|
-
return value instanceof File;
|
|
671
|
-
}
|
|
672
|
-
function toJsonSafe(value) {
|
|
673
|
-
if (value === void 0) return void 0;
|
|
674
|
-
const text = JSON.stringify(value);
|
|
675
|
-
return JSON.parse(text);
|
|
676
|
-
}
|
|
677
|
-
function stripCacheAttributes(attributes) {
|
|
678
|
-
if (!attributes) return {};
|
|
679
|
-
const result = {};
|
|
680
|
-
for (const [key, value] of Object.entries(attributes)) if (!key.startsWith("cache.")) result[key] = value;
|
|
681
|
-
return result;
|
|
682
|
-
}
|
|
683
|
-
function serializeSubSpanTree(scope, spanId) {
|
|
684
|
-
const original = scope.spans.find((s) => s.id === spanId);
|
|
685
|
-
if (!original) return {
|
|
686
|
-
kind: "custom",
|
|
687
|
-
name: "unknown",
|
|
688
|
-
attributes: void 0,
|
|
689
|
-
status: "ok",
|
|
690
|
-
error: void 0,
|
|
691
|
-
children: []
|
|
692
|
-
};
|
|
693
|
-
const children = scope.spans.filter((s) => s.parentId === spanId).map((child) => serializeSubSpanTree(scope, child.id));
|
|
694
|
-
return {
|
|
695
|
-
kind: original.kind,
|
|
696
|
-
name: original.name,
|
|
697
|
-
attributes: original.attributes,
|
|
698
|
-
status: original.status,
|
|
699
|
-
error: original.error,
|
|
700
|
-
children
|
|
701
|
-
};
|
|
702
|
-
}
|
|
703
|
-
function appendSubSpanOps(scope, frame) {
|
|
704
|
-
for (let i = frame.baseSpanIndex; i < scope.spans.length; i++) {
|
|
705
|
-
const candidate = scope.spans[i];
|
|
706
|
-
if (candidate?.parentId === frame.cachedSpanId) frame.ops.push({
|
|
707
|
-
kind: "subSpan",
|
|
708
|
-
span: serializeSubSpanTree(scope, candidate.id)
|
|
709
|
-
});
|
|
710
|
-
}
|
|
711
|
-
}
|
|
712
|
-
function replayRecording(scope, parentSpan, recording) {
|
|
713
|
-
scope.replayingDepth++;
|
|
714
|
-
try {
|
|
715
|
-
for (const op of recording.ops) applyRecordingOp(scope, parentSpan, op);
|
|
716
|
-
if (Object.keys(recording.finalAttributes).length > 0) mergeSpanAttributes(parentSpan, recording.finalAttributes);
|
|
717
|
-
} finally {
|
|
718
|
-
scope.replayingDepth--;
|
|
719
|
-
}
|
|
720
|
-
}
|
|
721
|
-
function applyRecordingOp(scope, parentSpan, op) {
|
|
722
|
-
if (op.kind === "setOutput") {
|
|
723
|
-
scope.outputs[op.key] = op.value;
|
|
724
|
-
return;
|
|
725
|
-
}
|
|
726
|
-
if (op.kind === "incrementOutput") {
|
|
727
|
-
const existing = scope.outputs[op.key];
|
|
728
|
-
if (existing === void 0) scope.outputs[op.key] = op.delta;
|
|
729
|
-
else if (typeof existing === "number") scope.outputs[op.key] = existing + op.delta;
|
|
730
|
-
else scope.assertionFailures.push({ message: `replay incrementEvalOutput("${op.key}"): existing value is ${typeof existing}, expected number` });
|
|
731
|
-
return;
|
|
732
|
-
}
|
|
733
|
-
if (op.kind === "checkpoint") {
|
|
734
|
-
scope.checkpoints.set(op.name, op.data);
|
|
735
|
-
return;
|
|
736
|
-
}
|
|
737
|
-
replaySerializedSpan(scope, parentSpan.id, op.span);
|
|
738
|
-
}
|
|
739
|
-
function replaySerializedSpan(scope, parentId, serialized) {
|
|
740
|
-
const id = generateSpanId();
|
|
741
|
-
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
742
|
-
const replayed = {
|
|
743
|
-
id,
|
|
744
|
-
parentId,
|
|
745
|
-
caseId: scope.caseId,
|
|
746
|
-
kind: serialized.kind,
|
|
747
|
-
name: serialized.name,
|
|
748
|
-
startedAt: now,
|
|
749
|
-
endedAt: now,
|
|
750
|
-
status: serialized.status,
|
|
751
|
-
attributes: serialized.attributes,
|
|
752
|
-
error: serialized.error
|
|
753
|
-
};
|
|
754
|
-
scope.spans.push(replayed);
|
|
755
|
-
for (const child of serialized.children) replaySerializedSpan(scope, id, child);
|
|
756
|
-
}
|
|
757
1236
|
//#endregion
|
|
758
1237
|
//#region ../shared/src/schemas/display.ts
|
|
759
1238
|
const scalarCellSchema = z.union([
|
|
@@ -886,6 +1365,15 @@ const traceAttributeDisplayInputSchema = z.object({
|
|
|
886
1365
|
});
|
|
887
1366
|
/** Schema for authored trace display config in eval or workspace config. */
|
|
888
1367
|
const traceDisplayInputConfigSchema = z.object({ attributes: z.array(traceAttributeDisplayInputSchema).optional() });
|
|
1368
|
+
/** Schema for an error attached to a trace span. */
|
|
1369
|
+
const traceSpanErrorSchema = z.object({
|
|
1370
|
+
name: z.string().optional(),
|
|
1371
|
+
message: z.string(),
|
|
1372
|
+
stack: z.string().optional(),
|
|
1373
|
+
capturedAt: z.string().optional()
|
|
1374
|
+
}).catchall(z.unknown());
|
|
1375
|
+
/** Schema for a warning attached to a trace span. */
|
|
1376
|
+
const traceSpanWarningSchema = traceSpanErrorSchema;
|
|
889
1377
|
/** Schema for a persisted trace span captured during case execution. */
|
|
890
1378
|
const traceSpanSchema = z.object({
|
|
891
1379
|
id: z.string(),
|
|
@@ -902,11 +1390,10 @@ const traceSpanSchema = z.object({
|
|
|
902
1390
|
"cancelled"
|
|
903
1391
|
]),
|
|
904
1392
|
attributes: z.record(z.string(), z.unknown()).optional(),
|
|
905
|
-
error:
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
}).optional()
|
|
1393
|
+
error: traceSpanErrorSchema.optional(),
|
|
1394
|
+
errors: z.array(traceSpanErrorSchema).optional(),
|
|
1395
|
+
warning: traceSpanWarningSchema.optional(),
|
|
1396
|
+
warnings: z.array(traceSpanWarningSchema).optional()
|
|
910
1397
|
});
|
|
911
1398
|
//#endregion
|
|
912
1399
|
//#region ../shared/src/schemas/chart.ts
|
|
@@ -1167,12 +1654,16 @@ const spanCacheOptionsSchema = z.object({
|
|
|
1167
1654
|
/** Override the default namespace (`${evalId}__${spanName}`). */
|
|
1168
1655
|
namespace: z.string().optional()
|
|
1169
1656
|
});
|
|
1657
|
+
/** Category of operation stored in the eval cache. */
|
|
1658
|
+
const cacheOperationTypeSchema = z.enum(["span", "value"]);
|
|
1170
1659
|
/** Summary of a single persisted cache entry, used by list/delete endpoints. */
|
|
1171
1660
|
const cacheListItemSchema = z.object({
|
|
1172
1661
|
key: z.string(),
|
|
1173
1662
|
namespace: z.string(),
|
|
1174
|
-
|
|
1175
|
-
|
|
1663
|
+
operationType: cacheOperationTypeSchema,
|
|
1664
|
+
operationName: z.string(),
|
|
1665
|
+
spanName: z.string().optional(),
|
|
1666
|
+
spanKind: traceSpanKindSchema.optional(),
|
|
1176
1667
|
storedAt: z.string(),
|
|
1177
1668
|
codeFingerprint: z.string(),
|
|
1178
1669
|
sizeBytes: z.number()
|
|
@@ -1188,11 +1679,10 @@ const serializedCacheSpanSchema = z.object({
|
|
|
1188
1679
|
"error",
|
|
1189
1680
|
"cancelled"
|
|
1190
1681
|
]),
|
|
1191
|
-
error:
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
}).optional()
|
|
1682
|
+
error: traceSpanErrorSchema.optional(),
|
|
1683
|
+
errors: z.array(traceSpanErrorSchema).optional(),
|
|
1684
|
+
warning: traceSpanWarningSchema.optional(),
|
|
1685
|
+
warnings: z.array(traceSpanWarningSchema).optional()
|
|
1196
1686
|
}).extend({ children: z.lazy(() => z.array(serializedCacheSpanSchema)) });
|
|
1197
1687
|
/**
|
|
1198
1688
|
* One captured operation performed while a cached span's body executed.
|
|
@@ -1206,6 +1696,16 @@ const cacheRecordingOpSchema = z.discriminatedUnion("kind", [
|
|
|
1206
1696
|
key: z.string(),
|
|
1207
1697
|
value: z.unknown()
|
|
1208
1698
|
}),
|
|
1699
|
+
z.object({
|
|
1700
|
+
kind: z.literal("appendOutput"),
|
|
1701
|
+
key: z.string(),
|
|
1702
|
+
value: z.unknown()
|
|
1703
|
+
}),
|
|
1704
|
+
z.object({
|
|
1705
|
+
kind: z.literal("mergeOutput"),
|
|
1706
|
+
key: z.string(),
|
|
1707
|
+
patch: z.record(z.string(), z.unknown())
|
|
1708
|
+
}),
|
|
1209
1709
|
z.object({
|
|
1210
1710
|
kind: z.literal("incrementOutput"),
|
|
1211
1711
|
key: z.string(),
|
|
@@ -1225,6 +1725,16 @@ const cacheRecordingOpSchema = z.discriminatedUnion("kind", [
|
|
|
1225
1725
|
const cacheRecordingSchema = z.object({
|
|
1226
1726
|
returnValue: z.unknown(),
|
|
1227
1727
|
finalAttributes: z.record(z.string(), z.unknown()),
|
|
1728
|
+
finalStatus: z.enum([
|
|
1729
|
+
"running",
|
|
1730
|
+
"ok",
|
|
1731
|
+
"error",
|
|
1732
|
+
"cancelled"
|
|
1733
|
+
]).optional(),
|
|
1734
|
+
finalError: traceSpanErrorSchema.optional(),
|
|
1735
|
+
finalErrors: z.array(traceSpanErrorSchema).optional(),
|
|
1736
|
+
finalWarning: traceSpanWarningSchema.optional(),
|
|
1737
|
+
finalWarnings: z.array(traceSpanWarningSchema).optional(),
|
|
1228
1738
|
ops: z.array(cacheRecordingOpSchema)
|
|
1229
1739
|
});
|
|
1230
1740
|
/** Persisted cache file containing metadata and a recording. */
|
|
@@ -1232,8 +1742,10 @@ const cacheEntrySchema = z.object({
|
|
|
1232
1742
|
version: z.literal(1),
|
|
1233
1743
|
key: z.string(),
|
|
1234
1744
|
namespace: z.string(),
|
|
1235
|
-
|
|
1236
|
-
|
|
1745
|
+
operationType: cacheOperationTypeSchema.optional(),
|
|
1746
|
+
operationName: z.string().optional(),
|
|
1747
|
+
spanName: z.string().optional(),
|
|
1748
|
+
spanKind: traceSpanKindSchema.optional(),
|
|
1237
1749
|
storedAt: z.string(),
|
|
1238
1750
|
codeFingerprint: z.string(),
|
|
1239
1751
|
recording: cacheRecordingSchema
|
|
@@ -1543,15 +2055,21 @@ function createFsCacheStore(options) {
|
|
|
1543
2055
|
if (fileStatResult.error || !fileStatResult.value.isFile()) continue;
|
|
1544
2056
|
const cacheFile = await readCacheFilePath(filePath);
|
|
1545
2057
|
if (cacheFile === null) continue;
|
|
1546
|
-
for (const entry of Object.values(cacheFile.entries))
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
2058
|
+
for (const entry of Object.values(cacheFile.entries)) {
|
|
2059
|
+
const operationType = entry.operationType ?? "span";
|
|
2060
|
+
const operationName = entry.operationName ?? entry.spanName ?? entry.namespace;
|
|
2061
|
+
items.push({
|
|
2062
|
+
key: entry.key,
|
|
2063
|
+
namespace: entry.namespace,
|
|
2064
|
+
operationType,
|
|
2065
|
+
operationName,
|
|
2066
|
+
spanName: entry.spanName,
|
|
2067
|
+
spanKind: entry.spanKind,
|
|
2068
|
+
storedAt: entry.storedAt,
|
|
2069
|
+
codeFingerprint: entry.codeFingerprint,
|
|
2070
|
+
sizeBytes: Buffer.byteLength(JSON.stringify(entry), "utf8")
|
|
2071
|
+
});
|
|
2072
|
+
}
|
|
1555
2073
|
}
|
|
1556
2074
|
items.sort((a, b) => a.storedAt < b.storedAt ? 1 : -1);
|
|
1557
2075
|
return items;
|
|
@@ -2442,12 +2960,15 @@ async function runCase(params) {
|
|
|
2442
2960
|
input: evalCase.input,
|
|
2443
2961
|
signal
|
|
2444
2962
|
}]);
|
|
2445
|
-
}, {
|
|
2446
|
-
|
|
2447
|
-
|
|
2448
|
-
|
|
2449
|
-
|
|
2450
|
-
|
|
2963
|
+
}, {
|
|
2964
|
+
input: evalCase.input,
|
|
2965
|
+
cacheContext: cacheAdapter ? {
|
|
2966
|
+
adapter: cacheAdapter,
|
|
2967
|
+
mode: cacheMode,
|
|
2968
|
+
evalId,
|
|
2969
|
+
codeFingerprint
|
|
2970
|
+
} : void 0
|
|
2971
|
+
});
|
|
2451
2972
|
const traceTree = buildTraceTree(scope.spans, scope.checkpoints);
|
|
2452
2973
|
const nonAssertError = executeError && !(executeError instanceof EvalAssertionError) ? executeError : null;
|
|
2453
2974
|
if (executeError instanceof EvalAssertionError && scope.assertionFailures.length === 0) scope.assertionFailures.push(toAssertionFailure(executeError.message, executeError));
|
|
@@ -2463,20 +2984,31 @@ async function runCase(params) {
|
|
|
2463
2984
|
const message = `deriveFromTracing threw: ${e instanceof Error ? e.message : String(e)}`;
|
|
2464
2985
|
scope.assertionFailures.push(toAssertionFailure(message, e instanceof Error ? e : void 0));
|
|
2465
2986
|
}
|
|
2987
|
+
if (!nonAssertError && evalDef.outputsSchema) {
|
|
2988
|
+
const parsedOutputs = evalDef.outputsSchema.safeParse(getOutputsSchemaInput(evalDef.outputsSchema, scope.outputs));
|
|
2989
|
+
if (parsedOutputs.success) scope.outputs = {
|
|
2990
|
+
...scope.outputs,
|
|
2991
|
+
...parsedOutputs.data
|
|
2992
|
+
};
|
|
2993
|
+
else scope.assertionFailures.push(toAssertionFailure(formatOutputsSchemaError(parsedOutputs.error)));
|
|
2994
|
+
}
|
|
2466
2995
|
const scoreResults = /* @__PURE__ */ new Map();
|
|
2467
2996
|
const scoringTraces = {};
|
|
2468
|
-
if (!nonAssertError && evalDef.scores) for (const [key, def] of Object.entries(evalDef.scores)) {
|
|
2997
|
+
if (!nonAssertError && scope.assertionFailures.length === 0 && evalDef.scores) for (const [key, def] of Object.entries(evalDef.scores)) {
|
|
2469
2998
|
const { compute, passThreshold, label } = normalizeScoreDef(def);
|
|
2470
2999
|
const scoreRun = await runInEvalScope(evalCase.id, async () => await callWithUnknownResult(compute, [{
|
|
2471
3000
|
input: evalCase.input,
|
|
2472
3001
|
outputs: { ...scope.outputs },
|
|
2473
3002
|
case: evalCase
|
|
2474
|
-
}]), {
|
|
2475
|
-
|
|
2476
|
-
|
|
2477
|
-
|
|
2478
|
-
|
|
2479
|
-
|
|
3003
|
+
}]), {
|
|
3004
|
+
input: evalCase.input,
|
|
3005
|
+
cacheContext: cacheAdapter ? {
|
|
3006
|
+
adapter: cacheAdapter,
|
|
3007
|
+
mode: cacheMode,
|
|
3008
|
+
evalId: `${evalId}__score__${key}`,
|
|
3009
|
+
codeFingerprint
|
|
3010
|
+
} : void 0
|
|
3011
|
+
});
|
|
2480
3012
|
const { trace, traceDisplay } = resolveTracePresentation(scoreRun.scope.spans, globalTraceDisplay, evalDef.traceDisplay);
|
|
2481
3013
|
if (trace.length > 0) scoringTraces[key] = {
|
|
2482
3014
|
trace,
|
|
@@ -2567,6 +3099,19 @@ function isRecord(value) {
|
|
|
2567
3099
|
function isBlob(value) {
|
|
2568
3100
|
return value instanceof Blob;
|
|
2569
3101
|
}
|
|
3102
|
+
function getOutputsSchemaInput(schema, outputs) {
|
|
3103
|
+
if (!(schema instanceof z.ZodObject)) return outputs;
|
|
3104
|
+
const configuredOutputs = {};
|
|
3105
|
+
for (const key of Object.keys(schema.shape)) if (key in outputs) configuredOutputs[key] = outputs[key];
|
|
3106
|
+
return configuredOutputs;
|
|
3107
|
+
}
|
|
3108
|
+
function formatOutputsSchemaError(error) {
|
|
3109
|
+
const issueLines = error.issues.map((issue) => {
|
|
3110
|
+
return `${issue.path.length > 0 ? issue.path.join(".") : "<root>"}: ${issue.message}`;
|
|
3111
|
+
});
|
|
3112
|
+
if (issueLines.length === 0) return "outputsSchema validation failed";
|
|
3113
|
+
return `outputsSchema validation failed:\n${issueLines.join("\n")}`;
|
|
3114
|
+
}
|
|
2570
3115
|
function toAssertionFailure(message, error = void 0) {
|
|
2571
3116
|
return error?.stack ? {
|
|
2572
3117
|
message,
|
|
@@ -3036,6 +3581,39 @@ function toLastRunStatus(status) {
|
|
|
3036
3581
|
}
|
|
3037
3582
|
//#endregion
|
|
3038
3583
|
//#region ../runner/src/runner.ts
|
|
3584
|
+
const globMagicCharacters = new Set([
|
|
3585
|
+
"*",
|
|
3586
|
+
"?",
|
|
3587
|
+
"[",
|
|
3588
|
+
"]",
|
|
3589
|
+
"{",
|
|
3590
|
+
"}",
|
|
3591
|
+
"(",
|
|
3592
|
+
")",
|
|
3593
|
+
"!",
|
|
3594
|
+
"+",
|
|
3595
|
+
"@"
|
|
3596
|
+
]);
|
|
3597
|
+
function hasGlobMagic(value) {
|
|
3598
|
+
for (const char of value) if (globMagicCharacters.has(char)) return true;
|
|
3599
|
+
return false;
|
|
3600
|
+
}
|
|
3601
|
+
function getWatchRootForIncludePattern(params) {
|
|
3602
|
+
const segments = params.pattern.replaceAll("\\", "/").split("/").filter((part) => part !== "");
|
|
3603
|
+
const firstGlobSegmentIndex = segments.findIndex(hasGlobMagic);
|
|
3604
|
+
if (firstGlobSegmentIndex === -1) return dirname(resolve(params.workspaceRoot, params.pattern));
|
|
3605
|
+
if (firstGlobSegmentIndex === 0) return params.workspaceRoot;
|
|
3606
|
+
return resolve(params.workspaceRoot, segments.slice(0, firstGlobSegmentIndex).join("/"));
|
|
3607
|
+
}
|
|
3608
|
+
function getWatchRootsForIncludePatterns(params) {
|
|
3609
|
+
const roots = /* @__PURE__ */ new Set();
|
|
3610
|
+
for (const pattern of params.patterns) roots.add(getWatchRootForIncludePattern({
|
|
3611
|
+
pattern,
|
|
3612
|
+
workspaceRoot: params.workspaceRoot
|
|
3613
|
+
}));
|
|
3614
|
+
if (roots.size === 0) return [params.workspaceRoot];
|
|
3615
|
+
return [...roots];
|
|
3616
|
+
}
|
|
3039
3617
|
/** Create an in-memory eval runner bound to the current workspace config. */
|
|
3040
3618
|
function createRunner({ watchForChanges = true } = {}) {
|
|
3041
3619
|
let config;
|
|
@@ -3048,6 +3626,8 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
3048
3626
|
const latestRunInfoMap = /* @__PURE__ */ new Map();
|
|
3049
3627
|
const discoveryListeners = /* @__PURE__ */ new Set();
|
|
3050
3628
|
let nextShortIdNum = 0;
|
|
3629
|
+
let discoveryWatcher;
|
|
3630
|
+
let discoveryRefreshTimer;
|
|
3051
3631
|
function toWorkspaceRelativePath(filePath) {
|
|
3052
3632
|
return relative(workspaceRoot, filePath).replaceAll("\\", "/");
|
|
3053
3633
|
}
|
|
@@ -3076,7 +3656,7 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
3076
3656
|
});
|
|
3077
3657
|
await loadPersistedRuns();
|
|
3078
3658
|
await runner.refreshDiscovery();
|
|
3079
|
-
if (watchForChanges) setupWatcher();
|
|
3659
|
+
if (watchForChanges) await setupWatcher();
|
|
3080
3660
|
},
|
|
3081
3661
|
async listCache() {
|
|
3082
3662
|
return cacheStore.list();
|
|
@@ -3401,6 +3981,16 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
3401
3981
|
discoveryListeners.delete(listener);
|
|
3402
3982
|
};
|
|
3403
3983
|
},
|
|
3984
|
+
async close() {
|
|
3985
|
+
if (discoveryRefreshTimer !== void 0) {
|
|
3986
|
+
clearTimeout(discoveryRefreshTimer);
|
|
3987
|
+
discoveryRefreshTimer = void 0;
|
|
3988
|
+
}
|
|
3989
|
+
const watcher = discoveryWatcher;
|
|
3990
|
+
if (watcher === void 0) return;
|
|
3991
|
+
discoveryWatcher = void 0;
|
|
3992
|
+
await watcher.close();
|
|
3993
|
+
},
|
|
3404
3994
|
getWorkspaceRoot() {
|
|
3405
3995
|
return workspaceRoot;
|
|
3406
3996
|
},
|
|
@@ -3408,19 +3998,29 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
3408
3998
|
return resolveArtifactPath(join(localStateDir, "runs"), artifactId_);
|
|
3409
3999
|
}
|
|
3410
4000
|
};
|
|
3411
|
-
function setupWatcher() {
|
|
3412
|
-
const watcher = watch(
|
|
4001
|
+
async function setupWatcher() {
|
|
4002
|
+
const watcher = watch(getWatchRootsForIncludePatterns({
|
|
4003
|
+
patterns: config.include,
|
|
4004
|
+
workspaceRoot
|
|
4005
|
+
}), {
|
|
3413
4006
|
ignoreInitial: true,
|
|
3414
4007
|
persistent: true
|
|
3415
4008
|
});
|
|
3416
|
-
|
|
3417
|
-
|
|
3418
|
-
|
|
3419
|
-
|
|
3420
|
-
|
|
3421
|
-
|
|
3422
|
-
|
|
3423
|
-
|
|
4009
|
+
discoveryWatcher = watcher;
|
|
4010
|
+
const scheduleRefresh = () => {
|
|
4011
|
+
if (discoveryRefreshTimer !== void 0) clearTimeout(discoveryRefreshTimer);
|
|
4012
|
+
discoveryRefreshTimer = setTimeout(() => {
|
|
4013
|
+
discoveryRefreshTimer = void 0;
|
|
4014
|
+
runner.refreshDiscovery();
|
|
4015
|
+
}, 50);
|
|
4016
|
+
};
|
|
4017
|
+
watcher.on("change", scheduleRefresh);
|
|
4018
|
+
watcher.on("add", scheduleRefresh);
|
|
4019
|
+
watcher.on("unlink", scheduleRefresh);
|
|
4020
|
+
watcher.on("addDir", scheduleRefresh);
|
|
4021
|
+
watcher.on("unlinkDir", scheduleRefresh);
|
|
4022
|
+
await new Promise((ready) => {
|
|
4023
|
+
watcher.once("ready", ready);
|
|
3424
4024
|
});
|
|
3425
4025
|
}
|
|
3426
4026
|
function emitDiscoveryEvent() {
|
|
@@ -3467,6 +4067,7 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
3467
4067
|
//#endregion
|
|
3468
4068
|
//#region src/cli.ts
|
|
3469
4069
|
function parseArgs(argv) {
|
|
4070
|
+
const normalizedArgv = argv.filter((arg) => arg !== "--no-env");
|
|
3470
4071
|
const args = {
|
|
3471
4072
|
command: "help",
|
|
3472
4073
|
subcommand: void 0,
|
|
@@ -3480,9 +4081,10 @@ function parseArgs(argv) {
|
|
|
3480
4081
|
port: 4100,
|
|
3481
4082
|
cacheMode: "use",
|
|
3482
4083
|
clearCache: false,
|
|
3483
|
-
all: false
|
|
4084
|
+
all: false,
|
|
4085
|
+
loadEnv: normalizedArgv.length === argv.length
|
|
3484
4086
|
};
|
|
3485
|
-
const command =
|
|
4087
|
+
const command = normalizedArgv[0];
|
|
3486
4088
|
if (command === "--help" || command === "-h") {
|
|
3487
4089
|
args.showHelp = true;
|
|
3488
4090
|
return args;
|
|
@@ -3493,16 +4095,16 @@ function parseArgs(argv) {
|
|
|
3493
4095
|
} else if (command !== void 0 && !command.startsWith("-")) args.unknownHelpTarget = command;
|
|
3494
4096
|
let cursor = 1;
|
|
3495
4097
|
if (args.command === "cache") {
|
|
3496
|
-
const sub =
|
|
4098
|
+
const sub = normalizedArgv[cursor];
|
|
3497
4099
|
if (sub === "list" || sub === "clear") {
|
|
3498
4100
|
args.subcommand = sub;
|
|
3499
4101
|
args.helpTopic = `cache ${sub}`;
|
|
3500
4102
|
cursor++;
|
|
3501
4103
|
} else if (sub !== void 0 && !sub.startsWith("-")) args.unknownHelpTarget = `cache ${sub}`;
|
|
3502
4104
|
}
|
|
3503
|
-
for (let i = cursor; i <
|
|
3504
|
-
const arg =
|
|
3505
|
-
const next =
|
|
4105
|
+
for (let i = cursor; i < normalizedArgv.length; i++) {
|
|
4106
|
+
const arg = normalizedArgv[i];
|
|
4107
|
+
const next = normalizedArgv[i + 1];
|
|
3506
4108
|
if (arg === "--help" || arg === "-h") args.showHelp = true;
|
|
3507
4109
|
else if (arg === "--eval" && next) {
|
|
3508
4110
|
args.evalIds.push(...next.split(","));
|
|
@@ -3534,6 +4136,10 @@ function parseArgs(argv) {
|
|
|
3534
4136
|
*/
|
|
3535
4137
|
async function runCli(argv) {
|
|
3536
4138
|
const args = parseArgs(argv);
|
|
4139
|
+
if (args.loadEnv && !loadWorkspaceEnv()) {
|
|
4140
|
+
process.exit(1);
|
|
4141
|
+
return;
|
|
4142
|
+
}
|
|
3537
4143
|
if (args.showHelp) {
|
|
3538
4144
|
if (args.unknownHelpTarget !== void 0) {
|
|
3539
4145
|
console.error(`No help found for "${args.unknownHelpTarget}".`);
|
|
@@ -3564,6 +4170,18 @@ async function runCli(argv) {
|
|
|
3564
4170
|
function isCliCommand(command) {
|
|
3565
4171
|
return command === "app" || command === "list" || command === "run" || command === "cache" || command === "help";
|
|
3566
4172
|
}
|
|
4173
|
+
function loadWorkspaceEnv() {
|
|
4174
|
+
const envPath = resolve(process.cwd(), ".env");
|
|
4175
|
+
if (!existsSync(envPath)) return true;
|
|
4176
|
+
const loadResult = resultify(() => {
|
|
4177
|
+
process.loadEnvFile(envPath);
|
|
4178
|
+
});
|
|
4179
|
+
if (loadResult.error) {
|
|
4180
|
+
console.error(`Failed to load .env at ${envPath}: ${loadResult.error.message}`);
|
|
4181
|
+
return false;
|
|
4182
|
+
}
|
|
4183
|
+
return true;
|
|
4184
|
+
}
|
|
3567
4185
|
const currentDir = dirname(fileURLToPath(import.meta.url));
|
|
3568
4186
|
const repoRoot = resolve(currentDir, "../../..");
|
|
3569
4187
|
const pnpmCommand = process.platform === "win32" ? "pnpm.cmd" : "pnpm";
|
|
@@ -3612,8 +4230,8 @@ async function commandApp(args) {
|
|
|
3612
4230
|
const { serve } = await import("@hono/node-server");
|
|
3613
4231
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
3614
4232
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
3615
|
-
const appModule = await import("./app-
|
|
3616
|
-
const runnerModule = await import("./runner-
|
|
4233
|
+
const appModule = await import("./app-7qDBq_ub.mjs");
|
|
4234
|
+
const runnerModule = await import("./runner-uzzY8kk1.mjs");
|
|
3617
4235
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
3618
4236
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
3619
4237
|
await runnerModule.initRunner();
|
|
@@ -3714,7 +4332,8 @@ async function commandCache(args) {
|
|
|
3714
4332
|
for (const entry of entries) {
|
|
3715
4333
|
console.info(` ${entry.namespace}`);
|
|
3716
4334
|
console.info(` key: ${entry.key}`);
|
|
3717
|
-
|
|
4335
|
+
const operationLabel = entry.operationType === "span" ? `${entry.operationName} (span ${entry.spanKind ?? "unknown"})` : `${entry.operationName} (value)`;
|
|
4336
|
+
console.info(` operation: ${operationLabel}`);
|
|
3718
4337
|
console.info(` stored: ${entry.storedAt}`);
|
|
3719
4338
|
console.info(` size: ${String(entry.sizeBytes)} bytes`);
|
|
3720
4339
|
console.info("");
|
|
@@ -3769,6 +4388,7 @@ Usage:
|
|
|
3769
4388
|
|
|
3770
4389
|
Flags:
|
|
3771
4390
|
--port <n> Server port (default: 4100)
|
|
4391
|
+
--no-env Disable automatic .env loading
|
|
3772
4392
|
--help, -h Show this help
|
|
3773
4393
|
`);
|
|
3774
4394
|
return;
|
|
@@ -3781,6 +4401,7 @@ Usage:
|
|
|
3781
4401
|
agent-evals list [flags]
|
|
3782
4402
|
|
|
3783
4403
|
Flags:
|
|
4404
|
+
--no-env Disable automatic .env loading
|
|
3784
4405
|
--help, -h Show this help
|
|
3785
4406
|
`);
|
|
3786
4407
|
return;
|
|
@@ -3801,6 +4422,7 @@ Flags:
|
|
|
3801
4422
|
--no-cache Shortcut for --cache bypass
|
|
3802
4423
|
--refresh-cache Shortcut for --cache refresh
|
|
3803
4424
|
--clear-cache Clear the cache before starting the run
|
|
4425
|
+
--no-env Disable automatic .env loading
|
|
3804
4426
|
--help, -h Show this help
|
|
3805
4427
|
`);
|
|
3806
4428
|
return;
|
|
@@ -3818,6 +4440,7 @@ Flags:
|
|
|
3818
4440
|
--eval <id> Clear entries for specific eval(s) (comma-separated)
|
|
3819
4441
|
--all Confirm clearing every cached entry
|
|
3820
4442
|
--json Output cache listing as JSON
|
|
4443
|
+
--no-env Disable automatic .env loading
|
|
3821
4444
|
--help, -h Show this help
|
|
3822
4445
|
`);
|
|
3823
4446
|
return;
|
|
@@ -3844,8 +4467,9 @@ Options:
|
|
|
3844
4467
|
--no-cache Shortcut for --cache bypass
|
|
3845
4468
|
--refresh-cache Shortcut for --cache refresh
|
|
3846
4469
|
--clear-cache Clear the cache before starting the run
|
|
4470
|
+
--no-env Disable automatic .env loading
|
|
3847
4471
|
--help, -h Show help
|
|
3848
4472
|
`);
|
|
3849
4473
|
}
|
|
3850
4474
|
//#endregion
|
|
3851
|
-
export {
|
|
4475
|
+
export { columnDefSchema as $, evalStatsConfigSchema as A, evalChartTypeSchema as B, spanCacheOptionsSchema as C, setEvalOutput as Ct, evalFreshnessStatusSchema as D, getEvalRegistry as Dt, caseRowSchema as E, defineEval as Et, evalChartBuiltinMetricSchema as F, traceAttributeDisplaySchema as G, traceAttributeDisplayFormatSchema as H, evalChartColorSchema as I, traceSpanErrorSchema as J, traceDisplayConfigSchema as K, evalChartConfigSchema as L, scoreTraceSchema as M, evalChartAggregateSchema as N, evalStatAggregateSchema as O, evalChartAxisSchema as P, cellValueSchema as Q, evalChartMetricSchema as R, serializedCacheSpanSchema as S, runInEvalScope as St, caseDetailSchema as T, repoFile as Tt, traceAttributeDisplayInputSchema as U, evalChartsConfigSchema as V, traceAttributeDisplayPlacementSchema as W, traceSpanSchema as X, traceSpanKindSchema as Y, traceSpanWarningSchema as Z, cacheListItemSchema as _, getCurrentScope as _t, sseEnvelopeSchema as a, repoFileRefSchema as at, cacheRecordingOpSchema as b, isInEvalScope as bt, deriveScopedSummaryFromCases as c, buildTraceTree as ct, runManifestSchema as d, evalTracer as dt, columnFormatSchema as et, runSummarySchema as f, hashCacheKey as ft, cacheFileSchema as g, evalAssert as gt, cacheEntrySchema as h, appendToEvalOutput as ht, updateManualScoreRequestSchema as i, numberDisplayOptionsSchema as it, evalSummarySchema as j, evalStatItemSchema as k, deriveStatusFromCaseRows as l, captureEvalSpanError as lt, trialSelectionModeSchema as m, EvalAssertionError as mt, createRunner as n, fileRefSchema as nt, getEvalTitle as o, runArtifactRefSchema as ot, agentEvalsConfigSchema as p, hashCacheKeySync as pt, traceDisplayInputConfigSchema as q, createRunRequestSchema as r, jsonCellSchema as rt, getEvalDisplayStatus as s, z$1 as st, runCli as t, columnKindSchema as tt, deriveStatusFromChildStatuses as u, evalSpan as ut, cacheModeSchema as v, getEvalCaseInput as vt, assertionFailureSchema as w, setScopeCacheContext as wt, cacheRecordingSchema as x, mergeEvalOutput as xt, cacheOperationTypeSchema as y, incrementEvalOutput as yt, evalChartTooltipExtraSchema as z };
|