@ls-stack/agent-eval 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-C5CJ1sX6.mjs → app-7qDBq_ub.mjs} +3 -3
- package/dist/apps/web/dist/assets/index-CdxG9-O-.css +1 -0
- package/dist/apps/web/dist/assets/index-J1yKYGfN.js +112 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-C5FL7C4G.mjs → cli-C-n-Fd4o.mjs} +746 -269
- package/dist/index.d.mts +1070 -826
- package/dist/index.mjs +3 -3
- package/dist/{runner-K2bN8KRS.mjs → runner-CwEtnUFf.mjs} +2 -2
- package/dist/{runner-Cdlvk56X.mjs → runner-uzzY8kk1.mjs} +1 -1
- package/dist/src-Dy31CPXH.mjs +2 -0
- package/package.json +3 -3
- package/dist/apps/web/dist/assets/index-CBvHVkE7.js +0 -109
- package/dist/apps/web/dist/assets/index-Dd7I28ts.css +0 -1
- package/dist/src-gqm1z1Nu.mjs +0 -2
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { createHash } from "node:crypto";
|
|
2
2
|
import { mkdir, readFile, readdir, rename, rm, stat, writeFile } from "node:fs/promises";
|
|
3
3
|
import { dirname, extname, join, relative, resolve } from "node:path";
|
|
4
|
+
import { z, z as z$1 } from "zod/v4";
|
|
4
5
|
import { AsyncLocalStorage } from "node:async_hooks";
|
|
5
6
|
import { Buffer as Buffer$1 } from "node:buffer";
|
|
6
7
|
import { getCompositeKey } from "@ls-stack/utils/getCompositeKey";
|
|
7
|
-
import { z } from "zod/v4";
|
|
8
8
|
import { watch } from "chokidar";
|
|
9
9
|
import { glob } from "glob";
|
|
10
10
|
import { existsSync } from "node:fs";
|
|
@@ -70,6 +70,27 @@ function getCurrentScope() {
|
|
|
70
70
|
function isInEvalScope() {
|
|
71
71
|
return getCurrentScope() !== void 0;
|
|
72
72
|
}
|
|
73
|
+
function isObjectLike(value) {
|
|
74
|
+
return typeof value === "object" && value !== null;
|
|
75
|
+
}
|
|
76
|
+
function isObjectRecord(value) {
|
|
77
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
78
|
+
}
|
|
79
|
+
function copyArray$1(value) {
|
|
80
|
+
return value.map((item) => item);
|
|
81
|
+
}
|
|
82
|
+
function getEvalCaseInput(path = void 0) {
|
|
83
|
+
const scope = getCurrentScope();
|
|
84
|
+
if (!scope) return void 0;
|
|
85
|
+
if (path === void 0) return scope.input;
|
|
86
|
+
if (path.length === 0) return void 0;
|
|
87
|
+
let current = scope.input;
|
|
88
|
+
for (const segment of path.split(".")) {
|
|
89
|
+
if (segment.length === 0 || !isObjectLike(current)) return;
|
|
90
|
+
current = current[segment];
|
|
91
|
+
}
|
|
92
|
+
return current;
|
|
93
|
+
}
|
|
73
94
|
/**
|
|
74
95
|
* Attach cache context (adapter, mode, eval id, fingerprint) to a scope.
|
|
75
96
|
*
|
|
@@ -86,6 +107,7 @@ function setScopeCacheContext(scope, context) {
|
|
|
86
107
|
async function runInEvalScope(caseId, fn, options = {}) {
|
|
87
108
|
const scope = {
|
|
88
109
|
caseId,
|
|
110
|
+
input: options.input,
|
|
89
111
|
outputs: {},
|
|
90
112
|
assertionFailures: [],
|
|
91
113
|
spans: [],
|
|
@@ -145,6 +167,58 @@ function setEvalOutput(key, value) {
|
|
|
145
167
|
});
|
|
146
168
|
}
|
|
147
169
|
/**
|
|
170
|
+
* Append an item to an output array in the current case scope.
|
|
171
|
+
*
|
|
172
|
+
* Missing values become `[value]`, existing arrays receive the item, and
|
|
173
|
+
* existing scalar/object values are preserved as `[existing, value]`.
|
|
174
|
+
*/
|
|
175
|
+
function appendToEvalOutput(key, value) {
|
|
176
|
+
const scope = getCurrentScope();
|
|
177
|
+
if (!scope) return;
|
|
178
|
+
const existing = scope.outputs[key];
|
|
179
|
+
if (existing === void 0) scope.outputs[key] = [value];
|
|
180
|
+
else if (Array.isArray(existing)) scope.outputs[key] = [...copyArray$1(existing), value];
|
|
181
|
+
else scope.outputs[key] = [existing, value];
|
|
182
|
+
recordOpIfActive(scope, {
|
|
183
|
+
kind: "appendOutput",
|
|
184
|
+
key,
|
|
185
|
+
value
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Shallow-merge object fields into an output value in the current case scope.
|
|
190
|
+
*
|
|
191
|
+
* Missing values become a copy of `patch`. Non-object existing values are
|
|
192
|
+
* recorded as assertion failures instead of being replaced.
|
|
193
|
+
*/
|
|
194
|
+
function mergeEvalOutput(key, patch) {
|
|
195
|
+
const scope = getCurrentScope();
|
|
196
|
+
if (!scope) return;
|
|
197
|
+
const existing = scope.outputs[key];
|
|
198
|
+
if (existing === void 0) {
|
|
199
|
+
scope.outputs[key] = { ...patch };
|
|
200
|
+
recordOpIfActive(scope, {
|
|
201
|
+
kind: "mergeOutput",
|
|
202
|
+
key,
|
|
203
|
+
patch
|
|
204
|
+
});
|
|
205
|
+
return;
|
|
206
|
+
}
|
|
207
|
+
if (!isObjectRecord(existing)) {
|
|
208
|
+
scope.assertionFailures.push(toAssertionFailure$1(`mergeEvalOutput("${key}"): existing value is ${Array.isArray(existing) ? "array" : typeof existing}, expected object`));
|
|
209
|
+
return;
|
|
210
|
+
}
|
|
211
|
+
scope.outputs[key] = {
|
|
212
|
+
...existing,
|
|
213
|
+
...patch
|
|
214
|
+
};
|
|
215
|
+
recordOpIfActive(scope, {
|
|
216
|
+
kind: "mergeOutput",
|
|
217
|
+
key,
|
|
218
|
+
patch
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
148
222
|
* Add a numeric delta to an output value in the current case scope.
|
|
149
223
|
*
|
|
150
224
|
* If the existing value is non-numeric, the operation is recorded as an
|
|
@@ -189,18 +263,267 @@ function evalAssert(condition, message) {
|
|
|
189
263
|
throw error;
|
|
190
264
|
}
|
|
191
265
|
//#endregion
|
|
192
|
-
//#region ../sdk/src/
|
|
193
|
-
|
|
266
|
+
//#region ../sdk/src/cacheKey.ts
|
|
267
|
+
var SerializedCacheKeyValue = class {
|
|
268
|
+
value;
|
|
269
|
+
constructor(value) {
|
|
270
|
+
this.value = value;
|
|
271
|
+
}
|
|
272
|
+
};
|
|
273
|
+
/**
|
|
274
|
+
* Hash the components of a cache key into a deterministic hex digest.
|
|
275
|
+
*
|
|
276
|
+
* Native `Blob` and `File` values are read asynchronously and hashed by
|
|
277
|
+
* content. Use `hashCacheKeySync` only when the key contains no async values.
|
|
278
|
+
*/
|
|
279
|
+
async function hashCacheKey(input) {
|
|
280
|
+
return hashCacheKeySyncMaterialized(await materializeAsyncCacheKeyValue(input));
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Synchronously hash cache key components. This supports JSON-like data and
|
|
284
|
+
* in-memory binary values such as `Buffer`, `ArrayBuffer`, and typed arrays,
|
|
285
|
+
* but cannot content-hash native `Blob` or `File` values.
|
|
286
|
+
*/
|
|
287
|
+
function hashCacheKeySync(input) {
|
|
288
|
+
return hashCacheKeySyncMaterialized(input);
|
|
289
|
+
}
|
|
290
|
+
function hashCacheKeySyncMaterialized(input) {
|
|
291
|
+
return createHash("sha256").update(getCompositeKey(input, { stringify: stringifyCacheKeyValue })).digest("hex");
|
|
292
|
+
}
|
|
293
|
+
function stringifyCacheKeyValue(value) {
|
|
294
|
+
if (value instanceof SerializedCacheKeyValue) return value.value;
|
|
295
|
+
if (Buffer$1.isBuffer(value)) return `$buffer:${hashBytes(value)}`;
|
|
296
|
+
if (isArrayBuffer(value)) return `$arrayBuffer:${hashBytes(new Uint8Array(value))}`;
|
|
297
|
+
if (isSharedArrayBuffer(value)) return `$sharedArrayBuffer:${hashBytes(new Uint8Array(value))}`;
|
|
298
|
+
if (isArrayBufferView(value)) {
|
|
299
|
+
const bytes = new Uint8Array(value.buffer, value.byteOffset, value.byteLength);
|
|
300
|
+
return `$${value.constructor.name}:${hashBytes(bytes)}`;
|
|
301
|
+
}
|
|
302
|
+
if (isFile$1(value)) return `$file:${getCompositeKey({
|
|
303
|
+
lastModified: value.lastModified,
|
|
304
|
+
name: value.name,
|
|
305
|
+
size: value.size,
|
|
306
|
+
type: value.type
|
|
307
|
+
})}`;
|
|
308
|
+
if (isBlob$1(value)) return `$blob:${getCompositeKey({
|
|
309
|
+
size: value.size,
|
|
310
|
+
type: value.type
|
|
311
|
+
})}`;
|
|
312
|
+
}
|
|
313
|
+
async function materializeAsyncCacheKeyValue(value, refs = /* @__PURE__ */ new WeakSet()) {
|
|
314
|
+
const serialized = await stringifyAsyncCacheKeyValue(value);
|
|
315
|
+
if (serialized !== void 0) return new SerializedCacheKeyValue(serialized);
|
|
316
|
+
if (stringifyCacheKeyValue(value) !== void 0) return value;
|
|
317
|
+
if (!value || typeof value !== "object") return value;
|
|
318
|
+
if (Array.isArray(value)) {
|
|
319
|
+
const items = [];
|
|
320
|
+
for (const item of value) items.push(await materializeAsyncCacheKeyValue(item, refs));
|
|
321
|
+
return items;
|
|
322
|
+
}
|
|
323
|
+
if (refs.has(value)) throw new Error("Circular reference detected");
|
|
324
|
+
refs.add(value);
|
|
325
|
+
const entries = [];
|
|
326
|
+
for (const [key, entryValue] of Object.entries(value)) entries.push([key, await materializeAsyncCacheKeyValue(entryValue, refs)]);
|
|
327
|
+
refs.delete(value);
|
|
328
|
+
return Object.fromEntries(entries);
|
|
329
|
+
}
|
|
330
|
+
async function stringifyAsyncCacheKeyValue(value) {
|
|
331
|
+
if (isFile$1(value)) return `$file:${getCompositeKey({
|
|
332
|
+
bytes: await hashBlobBytes(value),
|
|
333
|
+
lastModified: value.lastModified,
|
|
334
|
+
name: value.name,
|
|
335
|
+
size: value.size,
|
|
336
|
+
type: value.type
|
|
337
|
+
})}`;
|
|
338
|
+
if (isBlob$1(value)) return `$blob:${getCompositeKey({
|
|
339
|
+
bytes: await hashBlobBytes(value),
|
|
340
|
+
size: value.size,
|
|
341
|
+
type: value.type
|
|
342
|
+
})}`;
|
|
343
|
+
}
|
|
344
|
+
async function hashBlobBytes(value) {
|
|
345
|
+
return hashBytes(new Uint8Array(await value.arrayBuffer()));
|
|
346
|
+
}
|
|
347
|
+
function hashBytes(value) {
|
|
348
|
+
return createHash("sha256").update(value).digest("hex");
|
|
349
|
+
}
|
|
350
|
+
function isArrayBuffer(value) {
|
|
351
|
+
return value instanceof ArrayBuffer;
|
|
352
|
+
}
|
|
353
|
+
function isSharedArrayBuffer(value) {
|
|
354
|
+
return value instanceof SharedArrayBuffer;
|
|
355
|
+
}
|
|
356
|
+
function isArrayBufferView(value) {
|
|
357
|
+
return ArrayBuffer.isView(value);
|
|
358
|
+
}
|
|
359
|
+
function isBlob$1(value) {
|
|
360
|
+
return value instanceof Blob;
|
|
361
|
+
}
|
|
362
|
+
function isFile$1(value) {
|
|
363
|
+
return value instanceof File;
|
|
364
|
+
}
|
|
365
|
+
function toJsonSafe(value) {
|
|
366
|
+
if (value === void 0) return void 0;
|
|
367
|
+
const text = JSON.stringify(value);
|
|
368
|
+
return JSON.parse(text);
|
|
369
|
+
}
|
|
370
|
+
//#endregion
|
|
371
|
+
//#region ../sdk/src/cacheRecording.ts
|
|
372
|
+
function mergeSpanAttributes$1(span, attributes) {
|
|
373
|
+
span.attributes = {
|
|
374
|
+
...span.attributes,
|
|
375
|
+
...attributes
|
|
376
|
+
};
|
|
377
|
+
}
|
|
378
|
+
function isRecordLike$1(value) {
|
|
379
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
380
|
+
}
|
|
381
|
+
function valueKind$1(value) {
|
|
382
|
+
return Array.isArray(value) ? "array" : typeof value;
|
|
383
|
+
}
|
|
384
|
+
function copyArray(value) {
|
|
385
|
+
return value.map((item) => item);
|
|
386
|
+
}
|
|
387
|
+
function stripCacheAttributes(attributes) {
|
|
388
|
+
if (!attributes) return {};
|
|
389
|
+
const result = {};
|
|
390
|
+
for (const [key, value] of Object.entries(attributes)) if (!key.startsWith("cache.")) result[key] = value;
|
|
391
|
+
return result;
|
|
392
|
+
}
|
|
393
|
+
function snapshotNonCacheAttributes(span) {
|
|
394
|
+
const snapshot = toJsonSafe(stripCacheAttributes(span?.attributes));
|
|
395
|
+
return isRecordLike$1(snapshot) ? snapshot : {};
|
|
396
|
+
}
|
|
397
|
+
function diffNonCacheAttributes(before, after) {
|
|
398
|
+
const result = {};
|
|
399
|
+
for (const [key, value] of Object.entries(after)) if (!cacheAttributeValuesEqual(before[key], value)) result[key] = value;
|
|
400
|
+
return result;
|
|
401
|
+
}
|
|
402
|
+
function cacheAttributeValuesEqual(left, right) {
|
|
403
|
+
if (Object.is(left, right)) return true;
|
|
404
|
+
try {
|
|
405
|
+
return JSON.stringify(left) === JSON.stringify(right);
|
|
406
|
+
} catch {
|
|
407
|
+
return false;
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
function appendCacheRef(span, ref) {
|
|
411
|
+
if (span === void 0) return;
|
|
412
|
+
const existing = span.attributes?.["cache.refs"];
|
|
413
|
+
mergeSpanAttributes$1(span, { "cache.refs": [...Array.isArray(existing) ? copyArray(existing) : [], ref] });
|
|
414
|
+
}
|
|
415
|
+
function serializeSubSpanTree(scope, spanId) {
|
|
416
|
+
const original = scope.spans.find((s) => s.id === spanId);
|
|
417
|
+
if (!original) return {
|
|
418
|
+
kind: "custom",
|
|
419
|
+
name: "unknown",
|
|
420
|
+
attributes: void 0,
|
|
421
|
+
status: "ok",
|
|
422
|
+
error: void 0,
|
|
423
|
+
errors: void 0,
|
|
424
|
+
warning: void 0,
|
|
425
|
+
warnings: void 0,
|
|
426
|
+
children: []
|
|
427
|
+
};
|
|
428
|
+
const children = scope.spans.filter((s) => s.parentId === spanId).map((child) => serializeSubSpanTree(scope, child.id));
|
|
429
|
+
return {
|
|
430
|
+
kind: original.kind,
|
|
431
|
+
name: original.name,
|
|
432
|
+
attributes: original.attributes,
|
|
433
|
+
status: original.status,
|
|
434
|
+
error: original.error,
|
|
435
|
+
errors: original.errors,
|
|
436
|
+
warning: original.warning,
|
|
437
|
+
warnings: original.warnings,
|
|
438
|
+
children
|
|
439
|
+
};
|
|
440
|
+
}
|
|
441
|
+
function appendSubSpanOps(scope, frame) {
|
|
442
|
+
for (let i = frame.baseSpanIndex; i < scope.spans.length; i++) {
|
|
443
|
+
const candidate = scope.spans[i];
|
|
444
|
+
if (candidate?.parentId === frame.replayParentSpanId) frame.ops.push({
|
|
445
|
+
kind: "subSpan",
|
|
446
|
+
span: serializeSubSpanTree(scope, candidate.id)
|
|
447
|
+
});
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
function replayRecording(scope, parentSpan, recording, options) {
|
|
451
|
+
scope.replayingDepth++;
|
|
452
|
+
try {
|
|
453
|
+
for (const op of recording.ops) applyRecordingOp(scope, parentSpan, op, options);
|
|
454
|
+
if (parentSpan !== void 0 && Object.keys(recording.finalAttributes).length > 0) mergeSpanAttributes$1(parentSpan, recording.finalAttributes);
|
|
455
|
+
if (parentSpan !== void 0 && recording.finalError !== void 0) parentSpan.error = recording.finalError;
|
|
456
|
+
if (parentSpan !== void 0 && recording.finalErrors !== void 0) parentSpan.errors = recording.finalErrors;
|
|
457
|
+
if (parentSpan !== void 0 && recording.finalWarning !== void 0) parentSpan.warning = recording.finalWarning;
|
|
458
|
+
if (parentSpan !== void 0 && recording.finalWarnings !== void 0) parentSpan.warnings = recording.finalWarnings;
|
|
459
|
+
} finally {
|
|
460
|
+
scope.replayingDepth--;
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
function applyRecordingOp(scope, parentSpan, op, options) {
|
|
464
|
+
if (op.kind === "setOutput") {
|
|
465
|
+
scope.outputs[op.key] = op.value;
|
|
466
|
+
return;
|
|
467
|
+
}
|
|
468
|
+
if (op.kind === "appendOutput") {
|
|
469
|
+
const existing = scope.outputs[op.key];
|
|
470
|
+
if (existing === void 0) scope.outputs[op.key] = [op.value];
|
|
471
|
+
else if (Array.isArray(existing)) scope.outputs[op.key] = [...copyArray(existing), op.value];
|
|
472
|
+
else scope.outputs[op.key] = [existing, op.value];
|
|
473
|
+
return;
|
|
474
|
+
}
|
|
475
|
+
if (op.kind === "mergeOutput") {
|
|
476
|
+
const existing = scope.outputs[op.key];
|
|
477
|
+
if (existing === void 0) scope.outputs[op.key] = { ...op.patch };
|
|
478
|
+
else if (isRecordLike$1(existing)) scope.outputs[op.key] = {
|
|
479
|
+
...existing,
|
|
480
|
+
...op.patch
|
|
481
|
+
};
|
|
482
|
+
else scope.assertionFailures.push({ message: `replay mergeEvalOutput("${op.key}"): existing value is ${valueKind$1(existing)}, expected object` });
|
|
483
|
+
return;
|
|
484
|
+
}
|
|
485
|
+
if (op.kind === "incrementOutput") {
|
|
486
|
+
const existing = scope.outputs[op.key];
|
|
487
|
+
if (existing === void 0) scope.outputs[op.key] = op.delta;
|
|
488
|
+
else if (typeof existing === "number") scope.outputs[op.key] = existing + op.delta;
|
|
489
|
+
else scope.assertionFailures.push({ message: `replay incrementEvalOutput("${op.key}"): existing value is ${valueKind$1(existing)}, expected number` });
|
|
490
|
+
return;
|
|
491
|
+
}
|
|
492
|
+
if (op.kind === "checkpoint") {
|
|
493
|
+
scope.checkpoints.set(op.name, op.data);
|
|
494
|
+
return;
|
|
495
|
+
}
|
|
496
|
+
replaySerializedSpan(scope, parentSpan?.id ?? null, op.span, options);
|
|
497
|
+
}
|
|
498
|
+
function replaySerializedSpan(scope, parentId, serialized, options) {
|
|
499
|
+
const id = options.generateSpanId();
|
|
500
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
501
|
+
const replayed = {
|
|
502
|
+
id,
|
|
503
|
+
parentId,
|
|
504
|
+
caseId: scope.caseId,
|
|
505
|
+
kind: serialized.kind,
|
|
506
|
+
name: serialized.name,
|
|
507
|
+
startedAt: now,
|
|
508
|
+
endedAt: now,
|
|
509
|
+
status: serialized.status,
|
|
510
|
+
attributes: serialized.attributes,
|
|
511
|
+
error: serialized.error,
|
|
512
|
+
errors: serialized.errors,
|
|
513
|
+
warning: serialized.warning,
|
|
514
|
+
warnings: serialized.warnings
|
|
515
|
+
};
|
|
516
|
+
scope.spans.push(replayed);
|
|
517
|
+
for (const child of serialized.children) replaySerializedSpan(scope, id, child, options);
|
|
518
|
+
}
|
|
519
|
+
//#endregion
|
|
520
|
+
//#region ../sdk/src/traceDiagnostics.ts
|
|
194
521
|
const errorCoreFields = new Set([
|
|
195
522
|
"name",
|
|
196
523
|
"message",
|
|
197
524
|
"stack",
|
|
198
525
|
"capturedAt"
|
|
199
526
|
]);
|
|
200
|
-
function generateSpanId() {
|
|
201
|
-
spanIdCounter++;
|
|
202
|
-
return `span_${String(Date.now())}_${String(spanIdCounter)}`;
|
|
203
|
-
}
|
|
204
527
|
function isRecord$2(value) {
|
|
205
528
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
206
529
|
}
|
|
@@ -221,33 +544,6 @@ function formatUnknownErrorMessage(error) {
|
|
|
221
544
|
function getErrorExtraFields(error) {
|
|
222
545
|
return Object.fromEntries(Object.entries(error).filter(([key]) => !errorCoreFields.has(key)));
|
|
223
546
|
}
|
|
224
|
-
function updateCurrentSpan(update) {
|
|
225
|
-
const currentSpan = getCurrentScope()?.activeSpanStack.at(-1);
|
|
226
|
-
if (!currentSpan) return;
|
|
227
|
-
update(currentSpan);
|
|
228
|
-
}
|
|
229
|
-
function noopActiveSpan() {
|
|
230
|
-
return {
|
|
231
|
-
setName() {},
|
|
232
|
-
setAttribute() {},
|
|
233
|
-
setAttributes() {}
|
|
234
|
-
};
|
|
235
|
-
}
|
|
236
|
-
function noopExternalSpan(id) {
|
|
237
|
-
return {
|
|
238
|
-
id,
|
|
239
|
-
setName() {},
|
|
240
|
-
setAttribute() {},
|
|
241
|
-
setAttributes() {},
|
|
242
|
-
end() {}
|
|
243
|
-
};
|
|
244
|
-
}
|
|
245
|
-
function mergeSpanAttributes(span, attributes) {
|
|
246
|
-
span.attributes = {
|
|
247
|
-
...span.attributes,
|
|
248
|
-
...attributes
|
|
249
|
-
};
|
|
250
|
-
}
|
|
251
547
|
function normalizeTraceError(error, capturedAt = void 0) {
|
|
252
548
|
if (error instanceof Error) return {
|
|
253
549
|
...getErrorExtraFields(error),
|
|
@@ -277,6 +573,34 @@ function normalizeTraceError(error, capturedAt = void 0) {
|
|
|
277
573
|
function normalizeTraceErrors(errorOrErrors, additionalErrors, capturedAt) {
|
|
278
574
|
return (additionalErrors.length > 0 ? [errorOrErrors, ...additionalErrors] : Array.isArray(errorOrErrors) ? errorOrErrors : [errorOrErrors]).map((error) => normalizeTraceError(error, capturedAt));
|
|
279
575
|
}
|
|
576
|
+
function normalizeTraceWarnings(warningOrWarnings, additionalWarnings, capturedAt) {
|
|
577
|
+
return (additionalWarnings.length > 0 ? [warningOrWarnings, ...additionalWarnings] : Array.isArray(warningOrWarnings) ? warningOrWarnings : [warningOrWarnings]).map((warning) => normalizeTraceError(warning, capturedAt));
|
|
578
|
+
}
|
|
579
|
+
function isCaptureEvalSpanErrorOptions(value) {
|
|
580
|
+
if (!isRecord$2(value)) return false;
|
|
581
|
+
const keys = Object.keys(value);
|
|
582
|
+
if (keys.length === 0) return false;
|
|
583
|
+
if (!keys.every((key) => key === "level")) return false;
|
|
584
|
+
return value.level === void 0 || isCaptureEvalSpanErrorLevel(value.level);
|
|
585
|
+
}
|
|
586
|
+
function isCaptureEvalSpanErrorLevel(value) {
|
|
587
|
+
return value === "error" || value === "warning";
|
|
588
|
+
}
|
|
589
|
+
function splitCaptureEvalSpanErrorArgs(additionalErrorsOrOptions) {
|
|
590
|
+
const lastArg = additionalErrorsOrOptions.at(-1);
|
|
591
|
+
if (isCaptureEvalSpanErrorLevel(lastArg)) return {
|
|
592
|
+
additionalErrors: additionalErrorsOrOptions.slice(0, -1),
|
|
593
|
+
options: { level: lastArg }
|
|
594
|
+
};
|
|
595
|
+
if (isCaptureEvalSpanErrorOptions(lastArg)) return {
|
|
596
|
+
additionalErrors: additionalErrorsOrOptions.slice(0, -1),
|
|
597
|
+
options: lastArg
|
|
598
|
+
};
|
|
599
|
+
return {
|
|
600
|
+
additionalErrors: additionalErrorsOrOptions,
|
|
601
|
+
options: {}
|
|
602
|
+
};
|
|
603
|
+
}
|
|
280
604
|
function appendSpanErrors(span, errors) {
|
|
281
605
|
if (errors.length === 0) return;
|
|
282
606
|
const latestError = errors.at(-1);
|
|
@@ -285,8 +609,194 @@ function appendSpanErrors(span, errors) {
|
|
|
285
609
|
span.error = latestError;
|
|
286
610
|
span.status = "error";
|
|
287
611
|
}
|
|
288
|
-
function
|
|
289
|
-
|
|
612
|
+
function appendSpanWarnings(span, warnings) {
|
|
613
|
+
if (warnings.length === 0) return;
|
|
614
|
+
const latestWarning = warnings.at(-1);
|
|
615
|
+
if (latestWarning === void 0) return;
|
|
616
|
+
span.warnings = [...span.warnings ?? [], ...warnings];
|
|
617
|
+
span.warning = latestWarning;
|
|
618
|
+
}
|
|
619
|
+
function hasSpanError(span) {
|
|
620
|
+
return span.error !== void 0 || (span.errors?.length ?? 0) > 0;
|
|
621
|
+
}
|
|
622
|
+
//#endregion
|
|
623
|
+
//#region ../sdk/src/valueCache.ts
|
|
624
|
+
function createTraceCache(generateSpanId) {
|
|
625
|
+
return async function traceCache(info, fn) {
|
|
626
|
+
const scope = getCurrentScope();
|
|
627
|
+
if (!scope) return await fn();
|
|
628
|
+
const cacheCtx = scope.cacheContext;
|
|
629
|
+
if (cacheCtx === void 0 || scope.replayingDepth > 0) return await fn();
|
|
630
|
+
const namespace = info.namespace ?? `${cacheCtx.evalId}__${info.name}`;
|
|
631
|
+
const keyHash = await hashCacheKey({
|
|
632
|
+
namespace,
|
|
633
|
+
codeFingerprint: cacheCtx.codeFingerprint,
|
|
634
|
+
key: info.key
|
|
635
|
+
});
|
|
636
|
+
const activeSpan = scope.activeSpanStack.at(-1);
|
|
637
|
+
if (cacheCtx.mode === "use") {
|
|
638
|
+
const hit = await cacheCtx.adapter.lookup(namespace, keyHash);
|
|
639
|
+
if (hit) {
|
|
640
|
+
const storedAt = hit.storedAt;
|
|
641
|
+
const age = Date.now() - new Date(storedAt).getTime();
|
|
642
|
+
appendCacheRef(activeSpan, {
|
|
643
|
+
type: "value",
|
|
644
|
+
name: info.name,
|
|
645
|
+
namespace,
|
|
646
|
+
key: keyHash,
|
|
647
|
+
status: "hit",
|
|
648
|
+
storedAt,
|
|
649
|
+
age
|
|
650
|
+
});
|
|
651
|
+
replayRecording(scope, activeSpan, hit.recording, { generateSpanId });
|
|
652
|
+
return hit.recording.returnValue;
|
|
653
|
+
}
|
|
654
|
+
appendCacheRef(activeSpan, {
|
|
655
|
+
type: "value",
|
|
656
|
+
name: info.name,
|
|
657
|
+
namespace,
|
|
658
|
+
key: keyHash,
|
|
659
|
+
status: "miss"
|
|
660
|
+
});
|
|
661
|
+
} else if (cacheCtx.mode === "refresh") appendCacheRef(activeSpan, {
|
|
662
|
+
type: "value",
|
|
663
|
+
name: info.name,
|
|
664
|
+
namespace,
|
|
665
|
+
key: keyHash,
|
|
666
|
+
status: "refresh"
|
|
667
|
+
});
|
|
668
|
+
else appendCacheRef(activeSpan, {
|
|
669
|
+
type: "value",
|
|
670
|
+
name: info.name,
|
|
671
|
+
namespace,
|
|
672
|
+
key: keyHash,
|
|
673
|
+
status: "bypass"
|
|
674
|
+
});
|
|
675
|
+
const beforeAttributes = snapshotNonCacheAttributes(activeSpan);
|
|
676
|
+
const frame = {
|
|
677
|
+
baseSpanIndex: scope.spans.length,
|
|
678
|
+
replayParentSpanId: activeSpan?.id ?? null,
|
|
679
|
+
ops: []
|
|
680
|
+
};
|
|
681
|
+
scope.recordingStack.push(frame);
|
|
682
|
+
let bodyResult;
|
|
683
|
+
try {
|
|
684
|
+
bodyResult = await fn();
|
|
685
|
+
} finally {
|
|
686
|
+
scope.recordingStack.pop();
|
|
687
|
+
}
|
|
688
|
+
appendSubSpanOps(scope, frame);
|
|
689
|
+
if (cacheCtx.mode !== "bypass") {
|
|
690
|
+
const finalAttributes = diffNonCacheAttributes(beforeAttributes, snapshotNonCacheAttributes(activeSpan));
|
|
691
|
+
const recording = {
|
|
692
|
+
returnValue: toJsonSafe(bodyResult),
|
|
693
|
+
finalAttributes,
|
|
694
|
+
ops: frame.ops
|
|
695
|
+
};
|
|
696
|
+
await cacheCtx.adapter.write({
|
|
697
|
+
version: 1,
|
|
698
|
+
key: keyHash,
|
|
699
|
+
namespace,
|
|
700
|
+
operationType: "value",
|
|
701
|
+
operationName: info.name,
|
|
702
|
+
storedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
703
|
+
codeFingerprint: cacheCtx.codeFingerprint,
|
|
704
|
+
recording
|
|
705
|
+
});
|
|
706
|
+
}
|
|
707
|
+
return bodyResult;
|
|
708
|
+
};
|
|
709
|
+
}
|
|
710
|
+
//#endregion
|
|
711
|
+
//#region ../sdk/src/tracer.ts
|
|
712
|
+
let spanIdCounter = 0;
|
|
713
|
+
function generateSpanId() {
|
|
714
|
+
spanIdCounter++;
|
|
715
|
+
return `span_${String(Date.now())}_${String(spanIdCounter)}`;
|
|
716
|
+
}
|
|
717
|
+
function updateCurrentSpan(update) {
|
|
718
|
+
const currentSpan = getCurrentScope()?.activeSpanStack.at(-1);
|
|
719
|
+
if (!currentSpan) return;
|
|
720
|
+
update(currentSpan);
|
|
721
|
+
}
|
|
722
|
+
function noopActiveSpan() {
|
|
723
|
+
return {
|
|
724
|
+
setName() {},
|
|
725
|
+
setAttribute() {},
|
|
726
|
+
setAttributes() {},
|
|
727
|
+
incrementAttribute() {},
|
|
728
|
+
appendToAttribute() {},
|
|
729
|
+
mergeAttribute() {}
|
|
730
|
+
};
|
|
731
|
+
}
|
|
732
|
+
function noopExternalSpan(id) {
|
|
733
|
+
return {
|
|
734
|
+
id,
|
|
735
|
+
setName() {},
|
|
736
|
+
setAttribute() {},
|
|
737
|
+
setAttributes() {},
|
|
738
|
+
incrementAttribute() {},
|
|
739
|
+
appendToAttribute() {},
|
|
740
|
+
mergeAttribute() {},
|
|
741
|
+
end() {}
|
|
742
|
+
};
|
|
743
|
+
}
|
|
744
|
+
function mergeSpanAttributes(span, attributes) {
|
|
745
|
+
span.attributes = {
|
|
746
|
+
...span.attributes,
|
|
747
|
+
...attributes
|
|
748
|
+
};
|
|
749
|
+
}
|
|
750
|
+
function isRecordLike(value) {
|
|
751
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
752
|
+
}
|
|
753
|
+
function valueKind(value) {
|
|
754
|
+
return Array.isArray(value) ? "array" : typeof value;
|
|
755
|
+
}
|
|
756
|
+
function recordSpanAttributeAssertion(message) {
|
|
757
|
+
const scope = getCurrentScope();
|
|
758
|
+
if (!scope) return;
|
|
759
|
+
scope.assertionFailures.push({ message });
|
|
760
|
+
}
|
|
761
|
+
function incrementSpanAttribute(span, key, delta) {
|
|
762
|
+
const existing = span.attributes?.[key];
|
|
763
|
+
if (existing === void 0) {
|
|
764
|
+
mergeSpanAttributes(span, { [key]: delta });
|
|
765
|
+
return;
|
|
766
|
+
}
|
|
767
|
+
if (typeof existing !== "number") {
|
|
768
|
+
recordSpanAttributeAssertion(`evalSpan.incrementAttribute("${key}"): existing value is ${valueKind(existing)}, expected number`);
|
|
769
|
+
return;
|
|
770
|
+
}
|
|
771
|
+
mergeSpanAttributes(span, { [key]: existing + delta });
|
|
772
|
+
}
|
|
773
|
+
function appendToSpanAttribute(span, key, value) {
|
|
774
|
+
const existing = span.attributes?.[key];
|
|
775
|
+
if (existing === void 0) {
|
|
776
|
+
mergeSpanAttributes(span, { [key]: [value] });
|
|
777
|
+
return;
|
|
778
|
+
}
|
|
779
|
+
if (Array.isArray(existing)) {
|
|
780
|
+
const items = existing.map((item) => item);
|
|
781
|
+
mergeSpanAttributes(span, { [key]: [...items, value] });
|
|
782
|
+
return;
|
|
783
|
+
}
|
|
784
|
+
mergeSpanAttributes(span, { [key]: [existing, value] });
|
|
785
|
+
}
|
|
786
|
+
function mergeSpanAttribute(span, key, patch) {
|
|
787
|
+
const existing = span.attributes?.[key];
|
|
788
|
+
if (existing === void 0) {
|
|
789
|
+
mergeSpanAttributes(span, { [key]: { ...patch } });
|
|
790
|
+
return;
|
|
791
|
+
}
|
|
792
|
+
if (!isRecordLike(existing)) {
|
|
793
|
+
recordSpanAttributeAssertion(`evalSpan.mergeAttribute("${key}"): existing value is ${valueKind(existing)}, expected object`);
|
|
794
|
+
return;
|
|
795
|
+
}
|
|
796
|
+
mergeSpanAttributes(span, { [key]: {
|
|
797
|
+
...existing,
|
|
798
|
+
...patch
|
|
799
|
+
} });
|
|
290
800
|
}
|
|
291
801
|
function finishSpanWithoutThrownError(span) {
|
|
292
802
|
span.status = hasSpanError(span) ? "error" : "ok";
|
|
@@ -302,9 +812,25 @@ function createSpanHandle(span) {
|
|
|
302
812
|
},
|
|
303
813
|
setAttributes(value) {
|
|
304
814
|
mergeSpanAttributes(span, value);
|
|
815
|
+
},
|
|
816
|
+
incrementAttribute(key, delta) {
|
|
817
|
+
incrementSpanAttribute(span, key, delta);
|
|
818
|
+
},
|
|
819
|
+
appendToAttribute(key, value) {
|
|
820
|
+
appendToSpanAttribute(span, key, value);
|
|
821
|
+
},
|
|
822
|
+
mergeAttribute(key, patch) {
|
|
823
|
+
mergeSpanAttribute(span, key, patch);
|
|
305
824
|
}
|
|
306
825
|
};
|
|
307
826
|
}
|
|
827
|
+
function updateExternalSpanRecord(id, update) {
|
|
828
|
+
const scope = getCurrentScope();
|
|
829
|
+
if (!scope) return;
|
|
830
|
+
const span = findSpan(scope, id);
|
|
831
|
+
if (!span) return;
|
|
832
|
+
update(span);
|
|
833
|
+
}
|
|
308
834
|
function createExternalSpanHandle(id) {
|
|
309
835
|
return {
|
|
310
836
|
id,
|
|
@@ -326,6 +852,21 @@ function createExternalSpanHandle(id) {
|
|
|
326
852
|
attributes: value
|
|
327
853
|
});
|
|
328
854
|
},
|
|
855
|
+
incrementAttribute(key, delta) {
|
|
856
|
+
updateExternalSpanRecord(id, (span) => {
|
|
857
|
+
incrementSpanAttribute(span, key, delta);
|
|
858
|
+
});
|
|
859
|
+
},
|
|
860
|
+
appendToAttribute(key, value) {
|
|
861
|
+
updateExternalSpanRecord(id, (span) => {
|
|
862
|
+
appendToSpanAttribute(span, key, value);
|
|
863
|
+
});
|
|
864
|
+
},
|
|
865
|
+
mergeAttribute(key, patch) {
|
|
866
|
+
updateExternalSpanRecord(id, (span) => {
|
|
867
|
+
mergeSpanAttribute(span, key, patch);
|
|
868
|
+
});
|
|
869
|
+
},
|
|
329
870
|
end(info = {}) {
|
|
330
871
|
endExternalSpan({
|
|
331
872
|
...info,
|
|
@@ -382,6 +923,8 @@ function updateExternalSpan(info) {
|
|
|
382
923
|
if (info.name !== void 0) span.name = info.name;
|
|
383
924
|
if (info.status !== void 0) span.status = info.status;
|
|
384
925
|
if (info.error !== void 0) span.error = info.error;
|
|
926
|
+
if (info.warning !== void 0) span.warning = info.warning;
|
|
927
|
+
if (info.warnings !== void 0) span.warnings = info.warnings;
|
|
385
928
|
if (info.attributes !== void 0) mergeSpanAttributes(span, info.attributes);
|
|
386
929
|
}
|
|
387
930
|
function endExternalSpan(info) {
|
|
@@ -410,6 +953,8 @@ function recordExternalSpan(info) {
|
|
|
410
953
|
existing.status = status;
|
|
411
954
|
existing.attributes = info.attributes;
|
|
412
955
|
existing.error = info.error;
|
|
956
|
+
existing.warning = info.warning;
|
|
957
|
+
existing.warnings = info.warnings;
|
|
413
958
|
return id;
|
|
414
959
|
}
|
|
415
960
|
scope.spans.push({
|
|
@@ -422,7 +967,9 @@ function recordExternalSpan(info) {
|
|
|
422
967
|
endedAt,
|
|
423
968
|
status,
|
|
424
969
|
attributes: info.attributes,
|
|
425
|
-
error: info.error
|
|
970
|
+
error: info.error,
|
|
971
|
+
warning: info.warning,
|
|
972
|
+
warnings: info.warnings
|
|
426
973
|
});
|
|
427
974
|
return id;
|
|
428
975
|
}
|
|
@@ -446,16 +993,42 @@ const evalSpan = {
|
|
|
446
993
|
updateCurrentSpan((currentSpan) => {
|
|
447
994
|
mergeSpanAttributes(currentSpan, value);
|
|
448
995
|
});
|
|
996
|
+
},
|
|
997
|
+
incrementAttribute(key, delta) {
|
|
998
|
+
updateCurrentSpan((currentSpan) => {
|
|
999
|
+
incrementSpanAttribute(currentSpan, key, delta);
|
|
1000
|
+
});
|
|
1001
|
+
},
|
|
1002
|
+
appendToAttribute(key, value) {
|
|
1003
|
+
updateCurrentSpan((currentSpan) => {
|
|
1004
|
+
appendToSpanAttribute(currentSpan, key, value);
|
|
1005
|
+
});
|
|
1006
|
+
},
|
|
1007
|
+
mergeAttribute(key, patch) {
|
|
1008
|
+
updateCurrentSpan((currentSpan) => {
|
|
1009
|
+
mergeSpanAttribute(currentSpan, key, patch);
|
|
1010
|
+
});
|
|
449
1011
|
}
|
|
450
1012
|
};
|
|
451
1013
|
/**
|
|
452
1014
|
* Attach one or more recoverable errors to the active eval span.
|
|
453
1015
|
*
|
|
454
|
-
*
|
|
455
|
-
* without throwing.
|
|
1016
|
+
* By default the active span is marked as `error` even if its callback later
|
|
1017
|
+
* completes without throwing. Pass `'warning'` or `{ level: 'warning' }` as the
|
|
1018
|
+
* final argument to record the diagnostic without changing span status. Calls
|
|
1019
|
+
* outside `evalTracer.span(...)` are ignored.
|
|
456
1020
|
*/
|
|
457
|
-
function captureEvalSpanError(errorOrErrors, ...
|
|
458
|
-
const
|
|
1021
|
+
function captureEvalSpanError(errorOrErrors, ...additionalErrorsOrOptions) {
|
|
1022
|
+
const { additionalErrors, options } = splitCaptureEvalSpanErrorArgs(additionalErrorsOrOptions);
|
|
1023
|
+
const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
1024
|
+
if ((options.level ?? "error") === "warning") {
|
|
1025
|
+
const warnings = normalizeTraceWarnings(errorOrErrors, additionalErrors, capturedAt);
|
|
1026
|
+
updateCurrentSpan((currentSpan) => {
|
|
1027
|
+
appendSpanWarnings(currentSpan, warnings);
|
|
1028
|
+
});
|
|
1029
|
+
return;
|
|
1030
|
+
}
|
|
1031
|
+
const errors = normalizeTraceErrors(errorOrErrors, additionalErrors, capturedAt);
|
|
459
1032
|
updateCurrentSpan((currentSpan) => {
|
|
460
1033
|
appendSpanErrors(currentSpan, errors);
|
|
461
1034
|
});
|
|
@@ -503,7 +1076,7 @@ async function traceSpan(info, fn) {
|
|
|
503
1076
|
"cache.storedAt": storedAt,
|
|
504
1077
|
"cache.age": Date.now() - new Date(storedAt).getTime()
|
|
505
1078
|
});
|
|
506
|
-
replayRecording(scope, spanRecord, hit.recording);
|
|
1079
|
+
replayRecording(scope, spanRecord, hit.recording, { generateSpanId });
|
|
507
1080
|
spanRecord.status = hit.recording.finalStatus ?? (hasSpanError(spanRecord) ? "error" : "ok");
|
|
508
1081
|
spanRecord.endedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
509
1082
|
return hit.recording.returnValue;
|
|
@@ -513,7 +1086,7 @@ async function traceSpan(info, fn) {
|
|
|
513
1086
|
else mergeSpanAttributes(spanRecord, { "cache.status": "bypass" });
|
|
514
1087
|
const frame = {
|
|
515
1088
|
baseSpanIndex: scope.spans.length,
|
|
516
|
-
|
|
1089
|
+
replayParentSpanId: id,
|
|
517
1090
|
ops: []
|
|
518
1091
|
};
|
|
519
1092
|
scope.recordingStack.push(frame);
|
|
@@ -532,12 +1105,16 @@ async function traceSpan(info, fn) {
|
|
|
532
1105
|
finalStatus: spanRecord.status,
|
|
533
1106
|
finalError: spanRecord.error,
|
|
534
1107
|
finalErrors: spanRecord.errors,
|
|
1108
|
+
finalWarning: spanRecord.warning,
|
|
1109
|
+
finalWarnings: spanRecord.warnings,
|
|
535
1110
|
ops: frame.ops
|
|
536
1111
|
};
|
|
537
1112
|
const entry = {
|
|
538
1113
|
version: 1,
|
|
539
1114
|
key: keyHash,
|
|
540
1115
|
namespace,
|
|
1116
|
+
operationType: "span",
|
|
1117
|
+
operationName: info.name,
|
|
541
1118
|
spanName: info.name,
|
|
542
1119
|
spanKind: info.kind,
|
|
543
1120
|
storedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
@@ -569,6 +1146,13 @@ const evalTracer = {
|
|
|
569
1146
|
/** Run a callback inside a new trace span and record its lifecycle. */
|
|
570
1147
|
span: traceSpan,
|
|
571
1148
|
/**
|
|
1149
|
+
* Cache a pure value without creating a trace span.
|
|
1150
|
+
*
|
|
1151
|
+
* When called inside an active span, the span receives a `cache.refs` entry
|
|
1152
|
+
* describing the value cache status for this run.
|
|
1153
|
+
*/
|
|
1154
|
+
cache: createTraceCache(generateSpanId),
|
|
1155
|
+
/**
|
|
572
1156
|
* Start a span whose lifecycle is controlled by an external tracer/exporter.
|
|
573
1157
|
*
|
|
574
1158
|
* Calls are no-ops outside an eval case scope, except that a generated or
|
|
@@ -649,194 +1233,6 @@ function buildTraceTree(spans, checkpoints) {
|
|
|
649
1233
|
checkpoints
|
|
650
1234
|
};
|
|
651
1235
|
}
|
|
652
|
-
var SerializedCacheKeyValue = class {
|
|
653
|
-
value;
|
|
654
|
-
constructor(value) {
|
|
655
|
-
this.value = value;
|
|
656
|
-
}
|
|
657
|
-
};
|
|
658
|
-
/**
|
|
659
|
-
* Hash the components of a cache key into a deterministic hex digest.
|
|
660
|
-
*
|
|
661
|
-
* Native `Blob` and `File` values are read asynchronously and hashed by
|
|
662
|
-
* content. Use `hashCacheKeySync` only when the key contains no async values.
|
|
663
|
-
*/
|
|
664
|
-
async function hashCacheKey(input) {
|
|
665
|
-
return hashCacheKeySyncMaterialized(await materializeAsyncCacheKeyValue(input));
|
|
666
|
-
}
|
|
667
|
-
/**
|
|
668
|
-
* Synchronously hash cache key components. This supports JSON-like data and
|
|
669
|
-
* in-memory binary values such as `Buffer`, `ArrayBuffer`, and typed arrays,
|
|
670
|
-
* but cannot content-hash native `Blob` or `File` values.
|
|
671
|
-
*/
|
|
672
|
-
function hashCacheKeySync(input) {
|
|
673
|
-
return hashCacheKeySyncMaterialized(input);
|
|
674
|
-
}
|
|
675
|
-
function hashCacheKeySyncMaterialized(input) {
|
|
676
|
-
return createHash("sha256").update(getCompositeKey(input, { stringify: stringifyCacheKeyValue })).digest("hex");
|
|
677
|
-
}
|
|
678
|
-
function stringifyCacheKeyValue(value) {
|
|
679
|
-
if (value instanceof SerializedCacheKeyValue) return value.value;
|
|
680
|
-
if (Buffer$1.isBuffer(value)) return `$buffer:${hashBytes(value)}`;
|
|
681
|
-
if (isArrayBuffer(value)) return `$arrayBuffer:${hashBytes(new Uint8Array(value))}`;
|
|
682
|
-
if (isSharedArrayBuffer(value)) return `$sharedArrayBuffer:${hashBytes(new Uint8Array(value))}`;
|
|
683
|
-
if (isArrayBufferView(value)) {
|
|
684
|
-
const bytes = new Uint8Array(value.buffer, value.byteOffset, value.byteLength);
|
|
685
|
-
return `$${value.constructor.name}:${hashBytes(bytes)}`;
|
|
686
|
-
}
|
|
687
|
-
if (isFile$1(value)) return `$file:${getCompositeKey({
|
|
688
|
-
lastModified: value.lastModified,
|
|
689
|
-
name: value.name,
|
|
690
|
-
size: value.size,
|
|
691
|
-
type: value.type
|
|
692
|
-
})}`;
|
|
693
|
-
if (isBlob$1(value)) return `$blob:${getCompositeKey({
|
|
694
|
-
size: value.size,
|
|
695
|
-
type: value.type
|
|
696
|
-
})}`;
|
|
697
|
-
}
|
|
698
|
-
async function materializeAsyncCacheKeyValue(value, refs = /* @__PURE__ */ new WeakSet()) {
|
|
699
|
-
const serialized = await stringifyAsyncCacheKeyValue(value);
|
|
700
|
-
if (serialized !== void 0) return new SerializedCacheKeyValue(serialized);
|
|
701
|
-
if (stringifyCacheKeyValue(value) !== void 0) return value;
|
|
702
|
-
if (!value || typeof value !== "object") return value;
|
|
703
|
-
if (Array.isArray(value)) {
|
|
704
|
-
const items = [];
|
|
705
|
-
for (const item of value) items.push(await materializeAsyncCacheKeyValue(item, refs));
|
|
706
|
-
return items;
|
|
707
|
-
}
|
|
708
|
-
if (refs.has(value)) throw new Error("Circular reference detected");
|
|
709
|
-
refs.add(value);
|
|
710
|
-
const entries = [];
|
|
711
|
-
for (const [key, entryValue] of Object.entries(value)) entries.push([key, await materializeAsyncCacheKeyValue(entryValue, refs)]);
|
|
712
|
-
refs.delete(value);
|
|
713
|
-
return Object.fromEntries(entries);
|
|
714
|
-
}
|
|
715
|
-
async function stringifyAsyncCacheKeyValue(value) {
|
|
716
|
-
if (isFile$1(value)) return `$file:${getCompositeKey({
|
|
717
|
-
bytes: await hashBlobBytes(value),
|
|
718
|
-
lastModified: value.lastModified,
|
|
719
|
-
name: value.name,
|
|
720
|
-
size: value.size,
|
|
721
|
-
type: value.type
|
|
722
|
-
})}`;
|
|
723
|
-
if (isBlob$1(value)) return `$blob:${getCompositeKey({
|
|
724
|
-
bytes: await hashBlobBytes(value),
|
|
725
|
-
size: value.size,
|
|
726
|
-
type: value.type
|
|
727
|
-
})}`;
|
|
728
|
-
}
|
|
729
|
-
async function hashBlobBytes(value) {
|
|
730
|
-
return hashBytes(new Uint8Array(await value.arrayBuffer()));
|
|
731
|
-
}
|
|
732
|
-
function hashBytes(value) {
|
|
733
|
-
return createHash("sha256").update(value).digest("hex");
|
|
734
|
-
}
|
|
735
|
-
function isArrayBuffer(value) {
|
|
736
|
-
return value instanceof ArrayBuffer;
|
|
737
|
-
}
|
|
738
|
-
function isSharedArrayBuffer(value) {
|
|
739
|
-
return value instanceof SharedArrayBuffer;
|
|
740
|
-
}
|
|
741
|
-
function isArrayBufferView(value) {
|
|
742
|
-
return ArrayBuffer.isView(value);
|
|
743
|
-
}
|
|
744
|
-
function isBlob$1(value) {
|
|
745
|
-
return value instanceof Blob;
|
|
746
|
-
}
|
|
747
|
-
function isFile$1(value) {
|
|
748
|
-
return value instanceof File;
|
|
749
|
-
}
|
|
750
|
-
function toJsonSafe(value) {
|
|
751
|
-
if (value === void 0) return void 0;
|
|
752
|
-
const text = JSON.stringify(value);
|
|
753
|
-
return JSON.parse(text);
|
|
754
|
-
}
|
|
755
|
-
function stripCacheAttributes(attributes) {
|
|
756
|
-
if (!attributes) return {};
|
|
757
|
-
const result = {};
|
|
758
|
-
for (const [key, value] of Object.entries(attributes)) if (!key.startsWith("cache.")) result[key] = value;
|
|
759
|
-
return result;
|
|
760
|
-
}
|
|
761
|
-
function serializeSubSpanTree(scope, spanId) {
|
|
762
|
-
const original = scope.spans.find((s) => s.id === spanId);
|
|
763
|
-
if (!original) return {
|
|
764
|
-
kind: "custom",
|
|
765
|
-
name: "unknown",
|
|
766
|
-
attributes: void 0,
|
|
767
|
-
status: "ok",
|
|
768
|
-
error: void 0,
|
|
769
|
-
errors: void 0,
|
|
770
|
-
children: []
|
|
771
|
-
};
|
|
772
|
-
const children = scope.spans.filter((s) => s.parentId === spanId).map((child) => serializeSubSpanTree(scope, child.id));
|
|
773
|
-
return {
|
|
774
|
-
kind: original.kind,
|
|
775
|
-
name: original.name,
|
|
776
|
-
attributes: original.attributes,
|
|
777
|
-
status: original.status,
|
|
778
|
-
error: original.error,
|
|
779
|
-
errors: original.errors,
|
|
780
|
-
children
|
|
781
|
-
};
|
|
782
|
-
}
|
|
783
|
-
function appendSubSpanOps(scope, frame) {
|
|
784
|
-
for (let i = frame.baseSpanIndex; i < scope.spans.length; i++) {
|
|
785
|
-
const candidate = scope.spans[i];
|
|
786
|
-
if (candidate?.parentId === frame.cachedSpanId) frame.ops.push({
|
|
787
|
-
kind: "subSpan",
|
|
788
|
-
span: serializeSubSpanTree(scope, candidate.id)
|
|
789
|
-
});
|
|
790
|
-
}
|
|
791
|
-
}
|
|
792
|
-
function replayRecording(scope, parentSpan, recording) {
|
|
793
|
-
scope.replayingDepth++;
|
|
794
|
-
try {
|
|
795
|
-
for (const op of recording.ops) applyRecordingOp(scope, parentSpan, op);
|
|
796
|
-
if (Object.keys(recording.finalAttributes).length > 0) mergeSpanAttributes(parentSpan, recording.finalAttributes);
|
|
797
|
-
if (recording.finalError !== void 0) parentSpan.error = recording.finalError;
|
|
798
|
-
if (recording.finalErrors !== void 0) parentSpan.errors = recording.finalErrors;
|
|
799
|
-
} finally {
|
|
800
|
-
scope.replayingDepth--;
|
|
801
|
-
}
|
|
802
|
-
}
|
|
803
|
-
function applyRecordingOp(scope, parentSpan, op) {
|
|
804
|
-
if (op.kind === "setOutput") {
|
|
805
|
-
scope.outputs[op.key] = op.value;
|
|
806
|
-
return;
|
|
807
|
-
}
|
|
808
|
-
if (op.kind === "incrementOutput") {
|
|
809
|
-
const existing = scope.outputs[op.key];
|
|
810
|
-
if (existing === void 0) scope.outputs[op.key] = op.delta;
|
|
811
|
-
else if (typeof existing === "number") scope.outputs[op.key] = existing + op.delta;
|
|
812
|
-
else scope.assertionFailures.push({ message: `replay incrementEvalOutput("${op.key}"): existing value is ${typeof existing}, expected number` });
|
|
813
|
-
return;
|
|
814
|
-
}
|
|
815
|
-
if (op.kind === "checkpoint") {
|
|
816
|
-
scope.checkpoints.set(op.name, op.data);
|
|
817
|
-
return;
|
|
818
|
-
}
|
|
819
|
-
replaySerializedSpan(scope, parentSpan.id, op.span);
|
|
820
|
-
}
|
|
821
|
-
function replaySerializedSpan(scope, parentId, serialized) {
|
|
822
|
-
const id = generateSpanId();
|
|
823
|
-
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
824
|
-
const replayed = {
|
|
825
|
-
id,
|
|
826
|
-
parentId,
|
|
827
|
-
caseId: scope.caseId,
|
|
828
|
-
kind: serialized.kind,
|
|
829
|
-
name: serialized.name,
|
|
830
|
-
startedAt: now,
|
|
831
|
-
endedAt: now,
|
|
832
|
-
status: serialized.status,
|
|
833
|
-
attributes: serialized.attributes,
|
|
834
|
-
error: serialized.error,
|
|
835
|
-
errors: serialized.errors
|
|
836
|
-
};
|
|
837
|
-
scope.spans.push(replayed);
|
|
838
|
-
for (const child of serialized.children) replaySerializedSpan(scope, id, child);
|
|
839
|
-
}
|
|
840
1236
|
//#endregion
|
|
841
1237
|
//#region ../shared/src/schemas/display.ts
|
|
842
1238
|
const scalarCellSchema = z.union([
|
|
@@ -976,6 +1372,8 @@ const traceSpanErrorSchema = z.object({
|
|
|
976
1372
|
stack: z.string().optional(),
|
|
977
1373
|
capturedAt: z.string().optional()
|
|
978
1374
|
}).catchall(z.unknown());
|
|
1375
|
+
/** Schema for a warning attached to a trace span. */
|
|
1376
|
+
const traceSpanWarningSchema = traceSpanErrorSchema;
|
|
979
1377
|
/** Schema for a persisted trace span captured during case execution. */
|
|
980
1378
|
const traceSpanSchema = z.object({
|
|
981
1379
|
id: z.string(),
|
|
@@ -993,7 +1391,9 @@ const traceSpanSchema = z.object({
|
|
|
993
1391
|
]),
|
|
994
1392
|
attributes: z.record(z.string(), z.unknown()).optional(),
|
|
995
1393
|
error: traceSpanErrorSchema.optional(),
|
|
996
|
-
errors: z.array(traceSpanErrorSchema).optional()
|
|
1394
|
+
errors: z.array(traceSpanErrorSchema).optional(),
|
|
1395
|
+
warning: traceSpanWarningSchema.optional(),
|
|
1396
|
+
warnings: z.array(traceSpanWarningSchema).optional()
|
|
997
1397
|
});
|
|
998
1398
|
//#endregion
|
|
999
1399
|
//#region ../shared/src/schemas/chart.ts
|
|
@@ -1254,12 +1654,16 @@ const spanCacheOptionsSchema = z.object({
|
|
|
1254
1654
|
/** Override the default namespace (`${evalId}__${spanName}`). */
|
|
1255
1655
|
namespace: z.string().optional()
|
|
1256
1656
|
});
|
|
1657
|
+
/** Category of operation stored in the eval cache. */
|
|
1658
|
+
const cacheOperationTypeSchema = z.enum(["span", "value"]);
|
|
1257
1659
|
/** Summary of a single persisted cache entry, used by list/delete endpoints. */
|
|
1258
1660
|
const cacheListItemSchema = z.object({
|
|
1259
1661
|
key: z.string(),
|
|
1260
1662
|
namespace: z.string(),
|
|
1261
|
-
|
|
1262
|
-
|
|
1663
|
+
operationType: cacheOperationTypeSchema,
|
|
1664
|
+
operationName: z.string(),
|
|
1665
|
+
spanName: z.string().optional(),
|
|
1666
|
+
spanKind: traceSpanKindSchema.optional(),
|
|
1263
1667
|
storedAt: z.string(),
|
|
1264
1668
|
codeFingerprint: z.string(),
|
|
1265
1669
|
sizeBytes: z.number()
|
|
@@ -1276,7 +1680,9 @@ const serializedCacheSpanSchema = z.object({
|
|
|
1276
1680
|
"cancelled"
|
|
1277
1681
|
]),
|
|
1278
1682
|
error: traceSpanErrorSchema.optional(),
|
|
1279
|
-
errors: z.array(traceSpanErrorSchema).optional()
|
|
1683
|
+
errors: z.array(traceSpanErrorSchema).optional(),
|
|
1684
|
+
warning: traceSpanWarningSchema.optional(),
|
|
1685
|
+
warnings: z.array(traceSpanWarningSchema).optional()
|
|
1280
1686
|
}).extend({ children: z.lazy(() => z.array(serializedCacheSpanSchema)) });
|
|
1281
1687
|
/**
|
|
1282
1688
|
* One captured operation performed while a cached span's body executed.
|
|
@@ -1290,6 +1696,16 @@ const cacheRecordingOpSchema = z.discriminatedUnion("kind", [
|
|
|
1290
1696
|
key: z.string(),
|
|
1291
1697
|
value: z.unknown()
|
|
1292
1698
|
}),
|
|
1699
|
+
z.object({
|
|
1700
|
+
kind: z.literal("appendOutput"),
|
|
1701
|
+
key: z.string(),
|
|
1702
|
+
value: z.unknown()
|
|
1703
|
+
}),
|
|
1704
|
+
z.object({
|
|
1705
|
+
kind: z.literal("mergeOutput"),
|
|
1706
|
+
key: z.string(),
|
|
1707
|
+
patch: z.record(z.string(), z.unknown())
|
|
1708
|
+
}),
|
|
1293
1709
|
z.object({
|
|
1294
1710
|
kind: z.literal("incrementOutput"),
|
|
1295
1711
|
key: z.string(),
|
|
@@ -1317,6 +1733,8 @@ const cacheRecordingSchema = z.object({
|
|
|
1317
1733
|
]).optional(),
|
|
1318
1734
|
finalError: traceSpanErrorSchema.optional(),
|
|
1319
1735
|
finalErrors: z.array(traceSpanErrorSchema).optional(),
|
|
1736
|
+
finalWarning: traceSpanWarningSchema.optional(),
|
|
1737
|
+
finalWarnings: z.array(traceSpanWarningSchema).optional(),
|
|
1320
1738
|
ops: z.array(cacheRecordingOpSchema)
|
|
1321
1739
|
});
|
|
1322
1740
|
/** Persisted cache file containing metadata and a recording. */
|
|
@@ -1324,8 +1742,10 @@ const cacheEntrySchema = z.object({
|
|
|
1324
1742
|
version: z.literal(1),
|
|
1325
1743
|
key: z.string(),
|
|
1326
1744
|
namespace: z.string(),
|
|
1327
|
-
|
|
1328
|
-
|
|
1745
|
+
operationType: cacheOperationTypeSchema.optional(),
|
|
1746
|
+
operationName: z.string().optional(),
|
|
1747
|
+
spanName: z.string().optional(),
|
|
1748
|
+
spanKind: traceSpanKindSchema.optional(),
|
|
1329
1749
|
storedAt: z.string(),
|
|
1330
1750
|
codeFingerprint: z.string(),
|
|
1331
1751
|
recording: cacheRecordingSchema
|
|
@@ -1635,15 +2055,21 @@ function createFsCacheStore(options) {
|
|
|
1635
2055
|
if (fileStatResult.error || !fileStatResult.value.isFile()) continue;
|
|
1636
2056
|
const cacheFile = await readCacheFilePath(filePath);
|
|
1637
2057
|
if (cacheFile === null) continue;
|
|
1638
|
-
for (const entry of Object.values(cacheFile.entries))
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
|
|
1643
|
-
|
|
1644
|
-
|
|
1645
|
-
|
|
1646
|
-
|
|
2058
|
+
for (const entry of Object.values(cacheFile.entries)) {
|
|
2059
|
+
const operationType = entry.operationType ?? "span";
|
|
2060
|
+
const operationName = entry.operationName ?? entry.spanName ?? entry.namespace;
|
|
2061
|
+
items.push({
|
|
2062
|
+
key: entry.key,
|
|
2063
|
+
namespace: entry.namespace,
|
|
2064
|
+
operationType,
|
|
2065
|
+
operationName,
|
|
2066
|
+
spanName: entry.spanName,
|
|
2067
|
+
spanKind: entry.spanKind,
|
|
2068
|
+
storedAt: entry.storedAt,
|
|
2069
|
+
codeFingerprint: entry.codeFingerprint,
|
|
2070
|
+
sizeBytes: Buffer.byteLength(JSON.stringify(entry), "utf8")
|
|
2071
|
+
});
|
|
2072
|
+
}
|
|
1647
2073
|
}
|
|
1648
2074
|
items.sort((a, b) => a.storedAt < b.storedAt ? 1 : -1);
|
|
1649
2075
|
return items;
|
|
@@ -2534,12 +2960,15 @@ async function runCase(params) {
|
|
|
2534
2960
|
input: evalCase.input,
|
|
2535
2961
|
signal
|
|
2536
2962
|
}]);
|
|
2537
|
-
}, {
|
|
2538
|
-
|
|
2539
|
-
|
|
2540
|
-
|
|
2541
|
-
|
|
2542
|
-
|
|
2963
|
+
}, {
|
|
2964
|
+
input: evalCase.input,
|
|
2965
|
+
cacheContext: cacheAdapter ? {
|
|
2966
|
+
adapter: cacheAdapter,
|
|
2967
|
+
mode: cacheMode,
|
|
2968
|
+
evalId,
|
|
2969
|
+
codeFingerprint
|
|
2970
|
+
} : void 0
|
|
2971
|
+
});
|
|
2543
2972
|
const traceTree = buildTraceTree(scope.spans, scope.checkpoints);
|
|
2544
2973
|
const nonAssertError = executeError && !(executeError instanceof EvalAssertionError) ? executeError : null;
|
|
2545
2974
|
if (executeError instanceof EvalAssertionError && scope.assertionFailures.length === 0) scope.assertionFailures.push(toAssertionFailure(executeError.message, executeError));
|
|
@@ -2555,20 +2984,31 @@ async function runCase(params) {
|
|
|
2555
2984
|
const message = `deriveFromTracing threw: ${e instanceof Error ? e.message : String(e)}`;
|
|
2556
2985
|
scope.assertionFailures.push(toAssertionFailure(message, e instanceof Error ? e : void 0));
|
|
2557
2986
|
}
|
|
2987
|
+
if (!nonAssertError && evalDef.outputsSchema) {
|
|
2988
|
+
const parsedOutputs = evalDef.outputsSchema.safeParse(getOutputsSchemaInput(evalDef.outputsSchema, scope.outputs));
|
|
2989
|
+
if (parsedOutputs.success) scope.outputs = {
|
|
2990
|
+
...scope.outputs,
|
|
2991
|
+
...parsedOutputs.data
|
|
2992
|
+
};
|
|
2993
|
+
else scope.assertionFailures.push(toAssertionFailure(formatOutputsSchemaError(parsedOutputs.error)));
|
|
2994
|
+
}
|
|
2558
2995
|
const scoreResults = /* @__PURE__ */ new Map();
|
|
2559
2996
|
const scoringTraces = {};
|
|
2560
|
-
if (!nonAssertError && evalDef.scores) for (const [key, def] of Object.entries(evalDef.scores)) {
|
|
2997
|
+
if (!nonAssertError && scope.assertionFailures.length === 0 && evalDef.scores) for (const [key, def] of Object.entries(evalDef.scores)) {
|
|
2561
2998
|
const { compute, passThreshold, label } = normalizeScoreDef(def);
|
|
2562
2999
|
const scoreRun = await runInEvalScope(evalCase.id, async () => await callWithUnknownResult(compute, [{
|
|
2563
3000
|
input: evalCase.input,
|
|
2564
3001
|
outputs: { ...scope.outputs },
|
|
2565
3002
|
case: evalCase
|
|
2566
|
-
}]), {
|
|
2567
|
-
|
|
2568
|
-
|
|
2569
|
-
|
|
2570
|
-
|
|
2571
|
-
|
|
3003
|
+
}]), {
|
|
3004
|
+
input: evalCase.input,
|
|
3005
|
+
cacheContext: cacheAdapter ? {
|
|
3006
|
+
adapter: cacheAdapter,
|
|
3007
|
+
mode: cacheMode,
|
|
3008
|
+
evalId: `${evalId}__score__${key}`,
|
|
3009
|
+
codeFingerprint
|
|
3010
|
+
} : void 0
|
|
3011
|
+
});
|
|
2572
3012
|
const { trace, traceDisplay } = resolveTracePresentation(scoreRun.scope.spans, globalTraceDisplay, evalDef.traceDisplay);
|
|
2573
3013
|
if (trace.length > 0) scoringTraces[key] = {
|
|
2574
3014
|
trace,
|
|
@@ -2659,6 +3099,19 @@ function isRecord(value) {
|
|
|
2659
3099
|
function isBlob(value) {
|
|
2660
3100
|
return value instanceof Blob;
|
|
2661
3101
|
}
|
|
3102
|
+
function getOutputsSchemaInput(schema, outputs) {
|
|
3103
|
+
if (!(schema instanceof z.ZodObject)) return outputs;
|
|
3104
|
+
const configuredOutputs = {};
|
|
3105
|
+
for (const key of Object.keys(schema.shape)) if (key in outputs) configuredOutputs[key] = outputs[key];
|
|
3106
|
+
return configuredOutputs;
|
|
3107
|
+
}
|
|
3108
|
+
function formatOutputsSchemaError(error) {
|
|
3109
|
+
const issueLines = error.issues.map((issue) => {
|
|
3110
|
+
return `${issue.path.length > 0 ? issue.path.join(".") : "<root>"}: ${issue.message}`;
|
|
3111
|
+
});
|
|
3112
|
+
if (issueLines.length === 0) return "outputsSchema validation failed";
|
|
3113
|
+
return `outputsSchema validation failed:\n${issueLines.join("\n")}`;
|
|
3114
|
+
}
|
|
2662
3115
|
function toAssertionFailure(message, error = void 0) {
|
|
2663
3116
|
return error?.stack ? {
|
|
2664
3117
|
message,
|
|
@@ -3614,6 +4067,7 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
3614
4067
|
//#endregion
|
|
3615
4068
|
//#region src/cli.ts
|
|
3616
4069
|
function parseArgs(argv) {
|
|
4070
|
+
const normalizedArgv = argv.filter((arg) => arg !== "--no-env");
|
|
3617
4071
|
const args = {
|
|
3618
4072
|
command: "help",
|
|
3619
4073
|
subcommand: void 0,
|
|
@@ -3627,9 +4081,10 @@ function parseArgs(argv) {
|
|
|
3627
4081
|
port: 4100,
|
|
3628
4082
|
cacheMode: "use",
|
|
3629
4083
|
clearCache: false,
|
|
3630
|
-
all: false
|
|
4084
|
+
all: false,
|
|
4085
|
+
loadEnv: normalizedArgv.length === argv.length
|
|
3631
4086
|
};
|
|
3632
|
-
const command =
|
|
4087
|
+
const command = normalizedArgv[0];
|
|
3633
4088
|
if (command === "--help" || command === "-h") {
|
|
3634
4089
|
args.showHelp = true;
|
|
3635
4090
|
return args;
|
|
@@ -3640,16 +4095,16 @@ function parseArgs(argv) {
|
|
|
3640
4095
|
} else if (command !== void 0 && !command.startsWith("-")) args.unknownHelpTarget = command;
|
|
3641
4096
|
let cursor = 1;
|
|
3642
4097
|
if (args.command === "cache") {
|
|
3643
|
-
const sub =
|
|
4098
|
+
const sub = normalizedArgv[cursor];
|
|
3644
4099
|
if (sub === "list" || sub === "clear") {
|
|
3645
4100
|
args.subcommand = sub;
|
|
3646
4101
|
args.helpTopic = `cache ${sub}`;
|
|
3647
4102
|
cursor++;
|
|
3648
4103
|
} else if (sub !== void 0 && !sub.startsWith("-")) args.unknownHelpTarget = `cache ${sub}`;
|
|
3649
4104
|
}
|
|
3650
|
-
for (let i = cursor; i <
|
|
3651
|
-
const arg =
|
|
3652
|
-
const next =
|
|
4105
|
+
for (let i = cursor; i < normalizedArgv.length; i++) {
|
|
4106
|
+
const arg = normalizedArgv[i];
|
|
4107
|
+
const next = normalizedArgv[i + 1];
|
|
3653
4108
|
if (arg === "--help" || arg === "-h") args.showHelp = true;
|
|
3654
4109
|
else if (arg === "--eval" && next) {
|
|
3655
4110
|
args.evalIds.push(...next.split(","));
|
|
@@ -3681,6 +4136,10 @@ function parseArgs(argv) {
|
|
|
3681
4136
|
*/
|
|
3682
4137
|
async function runCli(argv) {
|
|
3683
4138
|
const args = parseArgs(argv);
|
|
4139
|
+
if (args.loadEnv && !loadWorkspaceEnv()) {
|
|
4140
|
+
process.exit(1);
|
|
4141
|
+
return;
|
|
4142
|
+
}
|
|
3684
4143
|
if (args.showHelp) {
|
|
3685
4144
|
if (args.unknownHelpTarget !== void 0) {
|
|
3686
4145
|
console.error(`No help found for "${args.unknownHelpTarget}".`);
|
|
@@ -3711,6 +4170,18 @@ async function runCli(argv) {
|
|
|
3711
4170
|
function isCliCommand(command) {
|
|
3712
4171
|
return command === "app" || command === "list" || command === "run" || command === "cache" || command === "help";
|
|
3713
4172
|
}
|
|
4173
|
+
function loadWorkspaceEnv() {
|
|
4174
|
+
const envPath = resolve(process.cwd(), ".env");
|
|
4175
|
+
if (!existsSync(envPath)) return true;
|
|
4176
|
+
const loadResult = resultify(() => {
|
|
4177
|
+
process.loadEnvFile(envPath);
|
|
4178
|
+
});
|
|
4179
|
+
if (loadResult.error) {
|
|
4180
|
+
console.error(`Failed to load .env at ${envPath}: ${loadResult.error.message}`);
|
|
4181
|
+
return false;
|
|
4182
|
+
}
|
|
4183
|
+
return true;
|
|
4184
|
+
}
|
|
3714
4185
|
const currentDir = dirname(fileURLToPath(import.meta.url));
|
|
3715
4186
|
const repoRoot = resolve(currentDir, "../../..");
|
|
3716
4187
|
const pnpmCommand = process.platform === "win32" ? "pnpm.cmd" : "pnpm";
|
|
@@ -3759,8 +4230,8 @@ async function commandApp(args) {
|
|
|
3759
4230
|
const { serve } = await import("@hono/node-server");
|
|
3760
4231
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
3761
4232
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
3762
|
-
const appModule = await import("./app-
|
|
3763
|
-
const runnerModule = await import("./runner-
|
|
4233
|
+
const appModule = await import("./app-7qDBq_ub.mjs");
|
|
4234
|
+
const runnerModule = await import("./runner-uzzY8kk1.mjs");
|
|
3764
4235
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
3765
4236
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
3766
4237
|
await runnerModule.initRunner();
|
|
@@ -3861,7 +4332,8 @@ async function commandCache(args) {
|
|
|
3861
4332
|
for (const entry of entries) {
|
|
3862
4333
|
console.info(` ${entry.namespace}`);
|
|
3863
4334
|
console.info(` key: ${entry.key}`);
|
|
3864
|
-
|
|
4335
|
+
const operationLabel = entry.operationType === "span" ? `${entry.operationName} (span ${entry.spanKind ?? "unknown"})` : `${entry.operationName} (value)`;
|
|
4336
|
+
console.info(` operation: ${operationLabel}`);
|
|
3865
4337
|
console.info(` stored: ${entry.storedAt}`);
|
|
3866
4338
|
console.info(` size: ${String(entry.sizeBytes)} bytes`);
|
|
3867
4339
|
console.info("");
|
|
@@ -3916,6 +4388,7 @@ Usage:
|
|
|
3916
4388
|
|
|
3917
4389
|
Flags:
|
|
3918
4390
|
--port <n> Server port (default: 4100)
|
|
4391
|
+
--no-env Disable automatic .env loading
|
|
3919
4392
|
--help, -h Show this help
|
|
3920
4393
|
`);
|
|
3921
4394
|
return;
|
|
@@ -3928,6 +4401,7 @@ Usage:
|
|
|
3928
4401
|
agent-evals list [flags]
|
|
3929
4402
|
|
|
3930
4403
|
Flags:
|
|
4404
|
+
--no-env Disable automatic .env loading
|
|
3931
4405
|
--help, -h Show this help
|
|
3932
4406
|
`);
|
|
3933
4407
|
return;
|
|
@@ -3948,6 +4422,7 @@ Flags:
|
|
|
3948
4422
|
--no-cache Shortcut for --cache bypass
|
|
3949
4423
|
--refresh-cache Shortcut for --cache refresh
|
|
3950
4424
|
--clear-cache Clear the cache before starting the run
|
|
4425
|
+
--no-env Disable automatic .env loading
|
|
3951
4426
|
--help, -h Show this help
|
|
3952
4427
|
`);
|
|
3953
4428
|
return;
|
|
@@ -3965,6 +4440,7 @@ Flags:
|
|
|
3965
4440
|
--eval <id> Clear entries for specific eval(s) (comma-separated)
|
|
3966
4441
|
--all Confirm clearing every cached entry
|
|
3967
4442
|
--json Output cache listing as JSON
|
|
4443
|
+
--no-env Disable automatic .env loading
|
|
3968
4444
|
--help, -h Show this help
|
|
3969
4445
|
`);
|
|
3970
4446
|
return;
|
|
@@ -3991,8 +4467,9 @@ Options:
|
|
|
3991
4467
|
--no-cache Shortcut for --cache bypass
|
|
3992
4468
|
--refresh-cache Shortcut for --cache refresh
|
|
3993
4469
|
--clear-cache Clear the cache before starting the run
|
|
4470
|
+
--no-env Disable automatic .env loading
|
|
3994
4471
|
--help, -h Show help
|
|
3995
4472
|
`);
|
|
3996
4473
|
}
|
|
3997
4474
|
//#endregion
|
|
3998
|
-
export {
|
|
4475
|
+
export { columnDefSchema as $, evalStatsConfigSchema as A, evalChartTypeSchema as B, spanCacheOptionsSchema as C, setEvalOutput as Ct, evalFreshnessStatusSchema as D, getEvalRegistry as Dt, caseRowSchema as E, defineEval as Et, evalChartBuiltinMetricSchema as F, traceAttributeDisplaySchema as G, traceAttributeDisplayFormatSchema as H, evalChartColorSchema as I, traceSpanErrorSchema as J, traceDisplayConfigSchema as K, evalChartConfigSchema as L, scoreTraceSchema as M, evalChartAggregateSchema as N, evalStatAggregateSchema as O, evalChartAxisSchema as P, cellValueSchema as Q, evalChartMetricSchema as R, serializedCacheSpanSchema as S, runInEvalScope as St, caseDetailSchema as T, repoFile as Tt, traceAttributeDisplayInputSchema as U, evalChartsConfigSchema as V, traceAttributeDisplayPlacementSchema as W, traceSpanSchema as X, traceSpanKindSchema as Y, traceSpanWarningSchema as Z, cacheListItemSchema as _, getCurrentScope as _t, sseEnvelopeSchema as a, repoFileRefSchema as at, cacheRecordingOpSchema as b, isInEvalScope as bt, deriveScopedSummaryFromCases as c, buildTraceTree as ct, runManifestSchema as d, evalTracer as dt, columnFormatSchema as et, runSummarySchema as f, hashCacheKey as ft, cacheFileSchema as g, evalAssert as gt, cacheEntrySchema as h, appendToEvalOutput as ht, updateManualScoreRequestSchema as i, numberDisplayOptionsSchema as it, evalSummarySchema as j, evalStatItemSchema as k, deriveStatusFromCaseRows as l, captureEvalSpanError as lt, trialSelectionModeSchema as m, EvalAssertionError as mt, createRunner as n, fileRefSchema as nt, getEvalTitle as o, runArtifactRefSchema as ot, agentEvalsConfigSchema as p, hashCacheKeySync as pt, traceDisplayInputConfigSchema as q, createRunRequestSchema as r, jsonCellSchema as rt, getEvalDisplayStatus as s, z$1 as st, runCli as t, columnKindSchema as tt, deriveStatusFromChildStatuses as u, evalSpan as ut, cacheModeSchema as v, getEvalCaseInput as vt, assertionFailureSchema as w, setScopeCacheContext as wt, cacheRecordingSchema as x, mergeEvalOutput as xt, cacheOperationTypeSchema as y, incrementEvalOutput as yt, evalChartTooltipExtraSchema as z };
|