@ls-stack/agent-eval 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,10 @@
1
1
  import { createHash } from "node:crypto";
2
2
  import { mkdir, readFile, readdir, rename, rm, stat, writeFile } from "node:fs/promises";
3
3
  import { dirname, extname, join, relative, resolve } from "node:path";
4
+ import { z, z as z$1 } from "zod/v4";
4
5
  import { AsyncLocalStorage } from "node:async_hooks";
5
6
  import { Buffer as Buffer$1 } from "node:buffer";
6
7
  import { getCompositeKey } from "@ls-stack/utils/getCompositeKey";
7
- import { z } from "zod/v4";
8
8
  import { watch } from "chokidar";
9
9
  import { glob } from "glob";
10
10
  import { existsSync } from "node:fs";
@@ -70,6 +70,27 @@ function getCurrentScope() {
70
70
  function isInEvalScope() {
71
71
  return getCurrentScope() !== void 0;
72
72
  }
73
+ function isObjectLike(value) {
74
+ return typeof value === "object" && value !== null;
75
+ }
76
+ function isObjectRecord(value) {
77
+ return typeof value === "object" && value !== null && !Array.isArray(value);
78
+ }
79
+ function copyArray$1(value) {
80
+ return value.map((item) => item);
81
+ }
82
+ function getEvalCaseInput(path = void 0) {
83
+ const scope = getCurrentScope();
84
+ if (!scope) return void 0;
85
+ if (path === void 0) return scope.input;
86
+ if (path.length === 0) return void 0;
87
+ let current = scope.input;
88
+ for (const segment of path.split(".")) {
89
+ if (segment.length === 0 || !isObjectLike(current)) return;
90
+ current = current[segment];
91
+ }
92
+ return current;
93
+ }
73
94
  /**
74
95
  * Attach cache context (adapter, mode, eval id, fingerprint) to a scope.
75
96
  *
@@ -86,6 +107,7 @@ function setScopeCacheContext(scope, context) {
86
107
  async function runInEvalScope(caseId, fn, options = {}) {
87
108
  const scope = {
88
109
  caseId,
110
+ input: options.input,
89
111
  outputs: {},
90
112
  assertionFailures: [],
91
113
  spans: [],
@@ -145,6 +167,58 @@ function setEvalOutput(key, value) {
145
167
  });
146
168
  }
147
169
  /**
170
+ * Append an item to an output array in the current case scope.
171
+ *
172
+ * Missing values become `[value]`, existing arrays receive the item, and
173
+ * existing scalar/object values are preserved as `[existing, value]`.
174
+ */
175
+ function appendToEvalOutput(key, value) {
176
+ const scope = getCurrentScope();
177
+ if (!scope) return;
178
+ const existing = scope.outputs[key];
179
+ if (existing === void 0) scope.outputs[key] = [value];
180
+ else if (Array.isArray(existing)) scope.outputs[key] = [...copyArray$1(existing), value];
181
+ else scope.outputs[key] = [existing, value];
182
+ recordOpIfActive(scope, {
183
+ kind: "appendOutput",
184
+ key,
185
+ value
186
+ });
187
+ }
188
+ /**
189
+ * Shallow-merge object fields into an output value in the current case scope.
190
+ *
191
+ * Missing values become a copy of `patch`. Non-object existing values are
192
+ * recorded as assertion failures instead of being replaced.
193
+ */
194
+ function mergeEvalOutput(key, patch) {
195
+ const scope = getCurrentScope();
196
+ if (!scope) return;
197
+ const existing = scope.outputs[key];
198
+ if (existing === void 0) {
199
+ scope.outputs[key] = { ...patch };
200
+ recordOpIfActive(scope, {
201
+ kind: "mergeOutput",
202
+ key,
203
+ patch
204
+ });
205
+ return;
206
+ }
207
+ if (!isObjectRecord(existing)) {
208
+ scope.assertionFailures.push(toAssertionFailure$1(`mergeEvalOutput("${key}"): existing value is ${Array.isArray(existing) ? "array" : typeof existing}, expected object`));
209
+ return;
210
+ }
211
+ scope.outputs[key] = {
212
+ ...existing,
213
+ ...patch
214
+ };
215
+ recordOpIfActive(scope, {
216
+ kind: "mergeOutput",
217
+ key,
218
+ patch
219
+ });
220
+ }
221
+ /**
148
222
  * Add a numeric delta to an output value in the current case scope.
149
223
  *
150
224
  * If the existing value is non-numeric, the operation is recorded as an
@@ -189,6 +263,451 @@ function evalAssert(condition, message) {
189
263
  throw error;
190
264
  }
191
265
  //#endregion
266
+ //#region ../sdk/src/cacheKey.ts
267
+ var SerializedCacheKeyValue = class {
268
+ value;
269
+ constructor(value) {
270
+ this.value = value;
271
+ }
272
+ };
273
+ /**
274
+ * Hash the components of a cache key into a deterministic hex digest.
275
+ *
276
+ * Native `Blob` and `File` values are read asynchronously and hashed by
277
+ * content. Use `hashCacheKeySync` only when the key contains no async values.
278
+ */
279
+ async function hashCacheKey(input) {
280
+ return hashCacheKeySyncMaterialized(await materializeAsyncCacheKeyValue(input));
281
+ }
282
+ /**
283
+ * Synchronously hash cache key components. This supports JSON-like data and
284
+ * in-memory binary values such as `Buffer`, `ArrayBuffer`, and typed arrays,
285
+ * but cannot content-hash native `Blob` or `File` values.
286
+ */
287
+ function hashCacheKeySync(input) {
288
+ return hashCacheKeySyncMaterialized(input);
289
+ }
290
+ function hashCacheKeySyncMaterialized(input) {
291
+ return createHash("sha256").update(getCompositeKey(input, { stringify: stringifyCacheKeyValue })).digest("hex");
292
+ }
293
+ function stringifyCacheKeyValue(value) {
294
+ if (value instanceof SerializedCacheKeyValue) return value.value;
295
+ if (Buffer$1.isBuffer(value)) return `$buffer:${hashBytes(value)}`;
296
+ if (isArrayBuffer(value)) return `$arrayBuffer:${hashBytes(new Uint8Array(value))}`;
297
+ if (isSharedArrayBuffer(value)) return `$sharedArrayBuffer:${hashBytes(new Uint8Array(value))}`;
298
+ if (isArrayBufferView(value)) {
299
+ const bytes = new Uint8Array(value.buffer, value.byteOffset, value.byteLength);
300
+ return `$${value.constructor.name}:${hashBytes(bytes)}`;
301
+ }
302
+ if (isFile$1(value)) return `$file:${getCompositeKey({
303
+ lastModified: value.lastModified,
304
+ name: value.name,
305
+ size: value.size,
306
+ type: value.type
307
+ })}`;
308
+ if (isBlob$1(value)) return `$blob:${getCompositeKey({
309
+ size: value.size,
310
+ type: value.type
311
+ })}`;
312
+ }
313
+ async function materializeAsyncCacheKeyValue(value, refs = /* @__PURE__ */ new WeakSet()) {
314
+ const serialized = await stringifyAsyncCacheKeyValue(value);
315
+ if (serialized !== void 0) return new SerializedCacheKeyValue(serialized);
316
+ if (stringifyCacheKeyValue(value) !== void 0) return value;
317
+ if (!value || typeof value !== "object") return value;
318
+ if (Array.isArray(value)) {
319
+ const items = [];
320
+ for (const item of value) items.push(await materializeAsyncCacheKeyValue(item, refs));
321
+ return items;
322
+ }
323
+ if (refs.has(value)) throw new Error("Circular reference detected");
324
+ refs.add(value);
325
+ const entries = [];
326
+ for (const [key, entryValue] of Object.entries(value)) entries.push([key, await materializeAsyncCacheKeyValue(entryValue, refs)]);
327
+ refs.delete(value);
328
+ return Object.fromEntries(entries);
329
+ }
330
+ async function stringifyAsyncCacheKeyValue(value) {
331
+ if (isFile$1(value)) return `$file:${getCompositeKey({
332
+ bytes: await hashBlobBytes(value),
333
+ lastModified: value.lastModified,
334
+ name: value.name,
335
+ size: value.size,
336
+ type: value.type
337
+ })}`;
338
+ if (isBlob$1(value)) return `$blob:${getCompositeKey({
339
+ bytes: await hashBlobBytes(value),
340
+ size: value.size,
341
+ type: value.type
342
+ })}`;
343
+ }
344
+ async function hashBlobBytes(value) {
345
+ return hashBytes(new Uint8Array(await value.arrayBuffer()));
346
+ }
347
+ function hashBytes(value) {
348
+ return createHash("sha256").update(value).digest("hex");
349
+ }
350
+ function isArrayBuffer(value) {
351
+ return value instanceof ArrayBuffer;
352
+ }
353
+ function isSharedArrayBuffer(value) {
354
+ return value instanceof SharedArrayBuffer;
355
+ }
356
+ function isArrayBufferView(value) {
357
+ return ArrayBuffer.isView(value);
358
+ }
359
+ function isBlob$1(value) {
360
+ return value instanceof Blob;
361
+ }
362
+ function isFile$1(value) {
363
+ return value instanceof File;
364
+ }
365
+ function toJsonSafe(value) {
366
+ if (value === void 0) return void 0;
367
+ const text = JSON.stringify(value);
368
+ return JSON.parse(text);
369
+ }
370
+ //#endregion
371
+ //#region ../sdk/src/cacheRecording.ts
372
+ function mergeSpanAttributes$1(span, attributes) {
373
+ span.attributes = {
374
+ ...span.attributes,
375
+ ...attributes
376
+ };
377
+ }
378
+ function isRecordLike$1(value) {
379
+ return typeof value === "object" && value !== null && !Array.isArray(value);
380
+ }
381
+ function valueKind$1(value) {
382
+ return Array.isArray(value) ? "array" : typeof value;
383
+ }
384
+ function copyArray(value) {
385
+ return value.map((item) => item);
386
+ }
387
+ function stripCacheAttributes(attributes) {
388
+ if (!attributes) return {};
389
+ const result = {};
390
+ for (const [key, value] of Object.entries(attributes)) if (!key.startsWith("cache.")) result[key] = value;
391
+ return result;
392
+ }
393
+ function snapshotNonCacheAttributes(span) {
394
+ const snapshot = toJsonSafe(stripCacheAttributes(span?.attributes));
395
+ return isRecordLike$1(snapshot) ? snapshot : {};
396
+ }
397
+ function diffNonCacheAttributes(before, after) {
398
+ const result = {};
399
+ for (const [key, value] of Object.entries(after)) if (!cacheAttributeValuesEqual(before[key], value)) result[key] = value;
400
+ return result;
401
+ }
402
+ function cacheAttributeValuesEqual(left, right) {
403
+ if (Object.is(left, right)) return true;
404
+ try {
405
+ return JSON.stringify(left) === JSON.stringify(right);
406
+ } catch {
407
+ return false;
408
+ }
409
+ }
410
+ function appendCacheRef(span, ref) {
411
+ if (span === void 0) return;
412
+ const existing = span.attributes?.["cache.refs"];
413
+ mergeSpanAttributes$1(span, { "cache.refs": [...Array.isArray(existing) ? copyArray(existing) : [], ref] });
414
+ }
415
+ function serializeSubSpanTree(scope, spanId) {
416
+ const original = scope.spans.find((s) => s.id === spanId);
417
+ if (!original) return {
418
+ kind: "custom",
419
+ name: "unknown",
420
+ attributes: void 0,
421
+ status: "ok",
422
+ error: void 0,
423
+ errors: void 0,
424
+ warning: void 0,
425
+ warnings: void 0,
426
+ children: []
427
+ };
428
+ const children = scope.spans.filter((s) => s.parentId === spanId).map((child) => serializeSubSpanTree(scope, child.id));
429
+ return {
430
+ kind: original.kind,
431
+ name: original.name,
432
+ attributes: original.attributes,
433
+ status: original.status,
434
+ error: original.error,
435
+ errors: original.errors,
436
+ warning: original.warning,
437
+ warnings: original.warnings,
438
+ children
439
+ };
440
+ }
441
+ function appendSubSpanOps(scope, frame) {
442
+ for (let i = frame.baseSpanIndex; i < scope.spans.length; i++) {
443
+ const candidate = scope.spans[i];
444
+ if (candidate?.parentId === frame.replayParentSpanId) frame.ops.push({
445
+ kind: "subSpan",
446
+ span: serializeSubSpanTree(scope, candidate.id)
447
+ });
448
+ }
449
+ }
450
+ function replayRecording(scope, parentSpan, recording, options) {
451
+ scope.replayingDepth++;
452
+ try {
453
+ for (const op of recording.ops) applyRecordingOp(scope, parentSpan, op, options);
454
+ if (parentSpan !== void 0 && Object.keys(recording.finalAttributes).length > 0) mergeSpanAttributes$1(parentSpan, recording.finalAttributes);
455
+ if (parentSpan !== void 0 && recording.finalError !== void 0) parentSpan.error = recording.finalError;
456
+ if (parentSpan !== void 0 && recording.finalErrors !== void 0) parentSpan.errors = recording.finalErrors;
457
+ if (parentSpan !== void 0 && recording.finalWarning !== void 0) parentSpan.warning = recording.finalWarning;
458
+ if (parentSpan !== void 0 && recording.finalWarnings !== void 0) parentSpan.warnings = recording.finalWarnings;
459
+ } finally {
460
+ scope.replayingDepth--;
461
+ }
462
+ }
463
+ function applyRecordingOp(scope, parentSpan, op, options) {
464
+ if (op.kind === "setOutput") {
465
+ scope.outputs[op.key] = op.value;
466
+ return;
467
+ }
468
+ if (op.kind === "appendOutput") {
469
+ const existing = scope.outputs[op.key];
470
+ if (existing === void 0) scope.outputs[op.key] = [op.value];
471
+ else if (Array.isArray(existing)) scope.outputs[op.key] = [...copyArray(existing), op.value];
472
+ else scope.outputs[op.key] = [existing, op.value];
473
+ return;
474
+ }
475
+ if (op.kind === "mergeOutput") {
476
+ const existing = scope.outputs[op.key];
477
+ if (existing === void 0) scope.outputs[op.key] = { ...op.patch };
478
+ else if (isRecordLike$1(existing)) scope.outputs[op.key] = {
479
+ ...existing,
480
+ ...op.patch
481
+ };
482
+ else scope.assertionFailures.push({ message: `replay mergeEvalOutput("${op.key}"): existing value is ${valueKind$1(existing)}, expected object` });
483
+ return;
484
+ }
485
+ if (op.kind === "incrementOutput") {
486
+ const existing = scope.outputs[op.key];
487
+ if (existing === void 0) scope.outputs[op.key] = op.delta;
488
+ else if (typeof existing === "number") scope.outputs[op.key] = existing + op.delta;
489
+ else scope.assertionFailures.push({ message: `replay incrementEvalOutput("${op.key}"): existing value is ${valueKind$1(existing)}, expected number` });
490
+ return;
491
+ }
492
+ if (op.kind === "checkpoint") {
493
+ scope.checkpoints.set(op.name, op.data);
494
+ return;
495
+ }
496
+ replaySerializedSpan(scope, parentSpan?.id ?? null, op.span, options);
497
+ }
498
+ function replaySerializedSpan(scope, parentId, serialized, options) {
499
+ const id = options.generateSpanId();
500
+ const now = (/* @__PURE__ */ new Date()).toISOString();
501
+ const replayed = {
502
+ id,
503
+ parentId,
504
+ caseId: scope.caseId,
505
+ kind: serialized.kind,
506
+ name: serialized.name,
507
+ startedAt: now,
508
+ endedAt: now,
509
+ status: serialized.status,
510
+ attributes: serialized.attributes,
511
+ error: serialized.error,
512
+ errors: serialized.errors,
513
+ warning: serialized.warning,
514
+ warnings: serialized.warnings
515
+ };
516
+ scope.spans.push(replayed);
517
+ for (const child of serialized.children) replaySerializedSpan(scope, id, child, options);
518
+ }
519
+ //#endregion
520
+ //#region ../sdk/src/traceDiagnostics.ts
521
+ const errorCoreFields = new Set([
522
+ "name",
523
+ "message",
524
+ "stack",
525
+ "capturedAt"
526
+ ]);
527
+ function isRecord$2(value) {
528
+ return typeof value === "object" && value !== null && !Array.isArray(value);
529
+ }
530
+ function formatUnknownErrorMessage(error) {
531
+ if (typeof error === "string") return error;
532
+ if (typeof error === "number" || typeof error === "boolean") return String(error);
533
+ if (typeof error === "bigint") return String(error);
534
+ if (typeof error === "symbol") return error.description ?? "Symbol";
535
+ if (typeof error === "function") return error.name ? `[function ${error.name}]` : "[function]";
536
+ if (error === void 0) return "undefined";
537
+ if (error === null) return "null";
538
+ try {
539
+ return JSON.stringify(error);
540
+ } catch {
541
+ return "Unknown error";
542
+ }
543
+ }
544
+ function getErrorExtraFields(error) {
545
+ return Object.fromEntries(Object.entries(error).filter(([key]) => !errorCoreFields.has(key)));
546
+ }
547
+ function normalizeTraceError(error, capturedAt = void 0) {
548
+ if (error instanceof Error) return {
549
+ ...getErrorExtraFields(error),
550
+ name: error.name,
551
+ message: error.message,
552
+ stack: error.stack,
553
+ capturedAt
554
+ };
555
+ if (isRecord$2(error)) {
556
+ const extraFields = getErrorExtraFields(error);
557
+ const name = typeof error.name === "string" ? error.name : void 0;
558
+ const stack = typeof error.stack === "string" ? error.stack : void 0;
559
+ const message = error.message === void 0 ? formatUnknownErrorMessage(error) : formatUnknownErrorMessage(error.message);
560
+ return {
561
+ ...extraFields,
562
+ ...name === void 0 ? {} : { name },
563
+ message,
564
+ ...stack === void 0 ? {} : { stack },
565
+ capturedAt
566
+ };
567
+ }
568
+ return {
569
+ message: String(error),
570
+ capturedAt
571
+ };
572
+ }
573
+ function normalizeTraceErrors(errorOrErrors, additionalErrors, capturedAt) {
574
+ return (additionalErrors.length > 0 ? [errorOrErrors, ...additionalErrors] : Array.isArray(errorOrErrors) ? errorOrErrors : [errorOrErrors]).map((error) => normalizeTraceError(error, capturedAt));
575
+ }
576
+ function normalizeTraceWarnings(warningOrWarnings, additionalWarnings, capturedAt) {
577
+ return (additionalWarnings.length > 0 ? [warningOrWarnings, ...additionalWarnings] : Array.isArray(warningOrWarnings) ? warningOrWarnings : [warningOrWarnings]).map((warning) => normalizeTraceError(warning, capturedAt));
578
+ }
579
+ function isCaptureEvalSpanErrorOptions(value) {
580
+ if (!isRecord$2(value)) return false;
581
+ const keys = Object.keys(value);
582
+ if (keys.length === 0) return false;
583
+ if (!keys.every((key) => key === "level")) return false;
584
+ return value.level === void 0 || isCaptureEvalSpanErrorLevel(value.level);
585
+ }
586
+ function isCaptureEvalSpanErrorLevel(value) {
587
+ return value === "error" || value === "warning";
588
+ }
589
+ function splitCaptureEvalSpanErrorArgs(additionalErrorsOrOptions) {
590
+ const lastArg = additionalErrorsOrOptions.at(-1);
591
+ if (isCaptureEvalSpanErrorLevel(lastArg)) return {
592
+ additionalErrors: additionalErrorsOrOptions.slice(0, -1),
593
+ options: { level: lastArg }
594
+ };
595
+ if (isCaptureEvalSpanErrorOptions(lastArg)) return {
596
+ additionalErrors: additionalErrorsOrOptions.slice(0, -1),
597
+ options: lastArg
598
+ };
599
+ return {
600
+ additionalErrors: additionalErrorsOrOptions,
601
+ options: {}
602
+ };
603
+ }
604
+ function appendSpanErrors(span, errors) {
605
+ if (errors.length === 0) return;
606
+ const latestError = errors.at(-1);
607
+ if (latestError === void 0) return;
608
+ span.errors = [...span.errors ?? [], ...errors];
609
+ span.error = latestError;
610
+ span.status = "error";
611
+ }
612
+ function appendSpanWarnings(span, warnings) {
613
+ if (warnings.length === 0) return;
614
+ const latestWarning = warnings.at(-1);
615
+ if (latestWarning === void 0) return;
616
+ span.warnings = [...span.warnings ?? [], ...warnings];
617
+ span.warning = latestWarning;
618
+ }
619
+ function hasSpanError(span) {
620
+ return span.error !== void 0 || (span.errors?.length ?? 0) > 0;
621
+ }
622
+ //#endregion
623
+ //#region ../sdk/src/valueCache.ts
624
+ function createTraceCache(generateSpanId) {
625
+ return async function traceCache(info, fn) {
626
+ const scope = getCurrentScope();
627
+ if (!scope) return await fn();
628
+ const cacheCtx = scope.cacheContext;
629
+ if (cacheCtx === void 0 || scope.replayingDepth > 0) return await fn();
630
+ const namespace = info.namespace ?? `${cacheCtx.evalId}__${info.name}`;
631
+ const keyHash = await hashCacheKey({
632
+ namespace,
633
+ codeFingerprint: cacheCtx.codeFingerprint,
634
+ key: info.key
635
+ });
636
+ const activeSpan = scope.activeSpanStack.at(-1);
637
+ if (cacheCtx.mode === "use") {
638
+ const hit = await cacheCtx.adapter.lookup(namespace, keyHash);
639
+ if (hit) {
640
+ const storedAt = hit.storedAt;
641
+ const age = Date.now() - new Date(storedAt).getTime();
642
+ appendCacheRef(activeSpan, {
643
+ type: "value",
644
+ name: info.name,
645
+ namespace,
646
+ key: keyHash,
647
+ status: "hit",
648
+ storedAt,
649
+ age
650
+ });
651
+ replayRecording(scope, activeSpan, hit.recording, { generateSpanId });
652
+ return hit.recording.returnValue;
653
+ }
654
+ appendCacheRef(activeSpan, {
655
+ type: "value",
656
+ name: info.name,
657
+ namespace,
658
+ key: keyHash,
659
+ status: "miss"
660
+ });
661
+ } else if (cacheCtx.mode === "refresh") appendCacheRef(activeSpan, {
662
+ type: "value",
663
+ name: info.name,
664
+ namespace,
665
+ key: keyHash,
666
+ status: "refresh"
667
+ });
668
+ else appendCacheRef(activeSpan, {
669
+ type: "value",
670
+ name: info.name,
671
+ namespace,
672
+ key: keyHash,
673
+ status: "bypass"
674
+ });
675
+ const beforeAttributes = snapshotNonCacheAttributes(activeSpan);
676
+ const frame = {
677
+ baseSpanIndex: scope.spans.length,
678
+ replayParentSpanId: activeSpan?.id ?? null,
679
+ ops: []
680
+ };
681
+ scope.recordingStack.push(frame);
682
+ let bodyResult;
683
+ try {
684
+ bodyResult = await fn();
685
+ } finally {
686
+ scope.recordingStack.pop();
687
+ }
688
+ appendSubSpanOps(scope, frame);
689
+ if (cacheCtx.mode !== "bypass") {
690
+ const finalAttributes = diffNonCacheAttributes(beforeAttributes, snapshotNonCacheAttributes(activeSpan));
691
+ const recording = {
692
+ returnValue: toJsonSafe(bodyResult),
693
+ finalAttributes,
694
+ ops: frame.ops
695
+ };
696
+ await cacheCtx.adapter.write({
697
+ version: 1,
698
+ key: keyHash,
699
+ namespace,
700
+ operationType: "value",
701
+ operationName: info.name,
702
+ storedAt: (/* @__PURE__ */ new Date()).toISOString(),
703
+ codeFingerprint: cacheCtx.codeFingerprint,
704
+ recording
705
+ });
706
+ }
707
+ return bodyResult;
708
+ };
709
+ }
710
+ //#endregion
192
711
  //#region ../sdk/src/tracer.ts
193
712
  let spanIdCounter = 0;
194
713
  function generateSpanId() {
@@ -204,7 +723,10 @@ function noopActiveSpan() {
204
723
  return {
205
724
  setName() {},
206
725
  setAttribute() {},
207
- setAttributes() {}
726
+ setAttributes() {},
727
+ incrementAttribute() {},
728
+ appendToAttribute() {},
729
+ mergeAttribute() {}
208
730
  };
209
731
  }
210
732
  function noopExternalSpan(id) {
@@ -213,6 +735,9 @@ function noopExternalSpan(id) {
213
735
  setName() {},
214
736
  setAttribute() {},
215
737
  setAttributes() {},
738
+ incrementAttribute() {},
739
+ appendToAttribute() {},
740
+ mergeAttribute() {},
216
741
  end() {}
217
742
  };
218
743
  }
@@ -222,6 +747,61 @@ function mergeSpanAttributes(span, attributes) {
222
747
  ...attributes
223
748
  };
224
749
  }
750
+ function isRecordLike(value) {
751
+ return typeof value === "object" && value !== null && !Array.isArray(value);
752
+ }
753
+ function valueKind(value) {
754
+ return Array.isArray(value) ? "array" : typeof value;
755
+ }
756
+ function recordSpanAttributeAssertion(message) {
757
+ const scope = getCurrentScope();
758
+ if (!scope) return;
759
+ scope.assertionFailures.push({ message });
760
+ }
761
+ function incrementSpanAttribute(span, key, delta) {
762
+ const existing = span.attributes?.[key];
763
+ if (existing === void 0) {
764
+ mergeSpanAttributes(span, { [key]: delta });
765
+ return;
766
+ }
767
+ if (typeof existing !== "number") {
768
+ recordSpanAttributeAssertion(`evalSpan.incrementAttribute("${key}"): existing value is ${valueKind(existing)}, expected number`);
769
+ return;
770
+ }
771
+ mergeSpanAttributes(span, { [key]: existing + delta });
772
+ }
773
+ function appendToSpanAttribute(span, key, value) {
774
+ const existing = span.attributes?.[key];
775
+ if (existing === void 0) {
776
+ mergeSpanAttributes(span, { [key]: [value] });
777
+ return;
778
+ }
779
+ if (Array.isArray(existing)) {
780
+ const items = existing.map((item) => item);
781
+ mergeSpanAttributes(span, { [key]: [...items, value] });
782
+ return;
783
+ }
784
+ mergeSpanAttributes(span, { [key]: [existing, value] });
785
+ }
786
+ function mergeSpanAttribute(span, key, patch) {
787
+ const existing = span.attributes?.[key];
788
+ if (existing === void 0) {
789
+ mergeSpanAttributes(span, { [key]: { ...patch } });
790
+ return;
791
+ }
792
+ if (!isRecordLike(existing)) {
793
+ recordSpanAttributeAssertion(`evalSpan.mergeAttribute("${key}"): existing value is ${valueKind(existing)}, expected object`);
794
+ return;
795
+ }
796
+ mergeSpanAttributes(span, { [key]: {
797
+ ...existing,
798
+ ...patch
799
+ } });
800
+ }
801
+ function finishSpanWithoutThrownError(span) {
802
+ span.status = hasSpanError(span) ? "error" : "ok";
803
+ span.endedAt = (/* @__PURE__ */ new Date()).toISOString();
804
+ }
225
805
  function createSpanHandle(span) {
226
806
  return {
227
807
  setName(value) {
@@ -232,9 +812,25 @@ function createSpanHandle(span) {
232
812
  },
233
813
  setAttributes(value) {
234
814
  mergeSpanAttributes(span, value);
815
+ },
816
+ incrementAttribute(key, delta) {
817
+ incrementSpanAttribute(span, key, delta);
818
+ },
819
+ appendToAttribute(key, value) {
820
+ appendToSpanAttribute(span, key, value);
821
+ },
822
+ mergeAttribute(key, patch) {
823
+ mergeSpanAttribute(span, key, patch);
235
824
  }
236
825
  };
237
826
  }
827
+ function updateExternalSpanRecord(id, update) {
828
+ const scope = getCurrentScope();
829
+ if (!scope) return;
830
+ const span = findSpan(scope, id);
831
+ if (!span) return;
832
+ update(span);
833
+ }
238
834
  function createExternalSpanHandle(id) {
239
835
  return {
240
836
  id,
@@ -256,6 +852,21 @@ function createExternalSpanHandle(id) {
256
852
  attributes: value
257
853
  });
258
854
  },
855
+ incrementAttribute(key, delta) {
856
+ updateExternalSpanRecord(id, (span) => {
857
+ incrementSpanAttribute(span, key, delta);
858
+ });
859
+ },
860
+ appendToAttribute(key, value) {
861
+ updateExternalSpanRecord(id, (span) => {
862
+ appendToSpanAttribute(span, key, value);
863
+ });
864
+ },
865
+ mergeAttribute(key, patch) {
866
+ updateExternalSpanRecord(id, (span) => {
867
+ mergeSpanAttribute(span, key, patch);
868
+ });
869
+ },
259
870
  end(info = {}) {
260
871
  endExternalSpan({
261
872
  ...info,
@@ -312,6 +923,8 @@ function updateExternalSpan(info) {
312
923
  if (info.name !== void 0) span.name = info.name;
313
924
  if (info.status !== void 0) span.status = info.status;
314
925
  if (info.error !== void 0) span.error = info.error;
926
+ if (info.warning !== void 0) span.warning = info.warning;
927
+ if (info.warnings !== void 0) span.warnings = info.warnings;
315
928
  if (info.attributes !== void 0) mergeSpanAttributes(span, info.attributes);
316
929
  }
317
930
  function endExternalSpan(info) {
@@ -340,6 +953,8 @@ function recordExternalSpan(info) {
340
953
  existing.status = status;
341
954
  existing.attributes = info.attributes;
342
955
  existing.error = info.error;
956
+ existing.warning = info.warning;
957
+ existing.warnings = info.warnings;
343
958
  return id;
344
959
  }
345
960
  scope.spans.push({
@@ -352,7 +967,9 @@ function recordExternalSpan(info) {
352
967
  endedAt,
353
968
  status,
354
969
  attributes: info.attributes,
355
- error: info.error
970
+ error: info.error,
971
+ warning: info.warning,
972
+ warnings: info.warnings
356
973
  });
357
974
  return id;
358
975
  }
@@ -364,20 +981,58 @@ function recordExternalSpan(info) {
364
981
  const evalSpan = {
365
982
  setName(value) {
366
983
  updateCurrentSpan((currentSpan) => {
367
- currentSpan.name = value;
984
+ currentSpan.name = value;
985
+ });
986
+ },
987
+ setAttribute(key, value) {
988
+ updateCurrentSpan((currentSpan) => {
989
+ mergeSpanAttributes(currentSpan, { [key]: value });
990
+ });
991
+ },
992
+ setAttributes(value) {
993
+ updateCurrentSpan((currentSpan) => {
994
+ mergeSpanAttributes(currentSpan, value);
995
+ });
996
+ },
997
+ incrementAttribute(key, delta) {
998
+ updateCurrentSpan((currentSpan) => {
999
+ incrementSpanAttribute(currentSpan, key, delta);
368
1000
  });
369
1001
  },
370
- setAttribute(key, value) {
1002
+ appendToAttribute(key, value) {
371
1003
  updateCurrentSpan((currentSpan) => {
372
- mergeSpanAttributes(currentSpan, { [key]: value });
1004
+ appendToSpanAttribute(currentSpan, key, value);
373
1005
  });
374
1006
  },
375
- setAttributes(value) {
1007
+ mergeAttribute(key, patch) {
376
1008
  updateCurrentSpan((currentSpan) => {
377
- mergeSpanAttributes(currentSpan, value);
1009
+ mergeSpanAttribute(currentSpan, key, patch);
378
1010
  });
379
1011
  }
380
1012
  };
1013
+ /**
1014
+ * Attach one or more recoverable errors to the active eval span.
1015
+ *
1016
+ * By default the active span is marked as `error` even if its callback later
1017
+ * completes without throwing. Pass `'warning'` or `{ level: 'warning' }` as the
1018
+ * final argument to record the diagnostic without changing span status. Calls
1019
+ * outside `evalTracer.span(...)` are ignored.
1020
+ */
1021
+ function captureEvalSpanError(errorOrErrors, ...additionalErrorsOrOptions) {
1022
+ const { additionalErrors, options } = splitCaptureEvalSpanErrorArgs(additionalErrorsOrOptions);
1023
+ const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
1024
+ if ((options.level ?? "error") === "warning") {
1025
+ const warnings = normalizeTraceWarnings(errorOrErrors, additionalErrors, capturedAt);
1026
+ updateCurrentSpan((currentSpan) => {
1027
+ appendSpanWarnings(currentSpan, warnings);
1028
+ });
1029
+ return;
1030
+ }
1031
+ const errors = normalizeTraceErrors(errorOrErrors, additionalErrors, capturedAt);
1032
+ updateCurrentSpan((currentSpan) => {
1033
+ appendSpanErrors(currentSpan, errors);
1034
+ });
1035
+ }
381
1036
  async function traceSpan(info, fn) {
382
1037
  const scope = getCurrentScope();
383
1038
  if (!scope) return await fn(noopActiveSpan());
@@ -421,8 +1076,8 @@ async function traceSpan(info, fn) {
421
1076
  "cache.storedAt": storedAt,
422
1077
  "cache.age": Date.now() - new Date(storedAt).getTime()
423
1078
  });
424
- replayRecording(scope, spanRecord, hit.recording);
425
- spanRecord.status = "ok";
1079
+ replayRecording(scope, spanRecord, hit.recording, { generateSpanId });
1080
+ spanRecord.status = hit.recording.finalStatus ?? (hasSpanError(spanRecord) ? "error" : "ok");
426
1081
  spanRecord.endedAt = (/* @__PURE__ */ new Date()).toISOString();
427
1082
  return hit.recording.returnValue;
428
1083
  }
@@ -431,7 +1086,7 @@ async function traceSpan(info, fn) {
431
1086
  else mergeSpanAttributes(spanRecord, { "cache.status": "bypass" });
432
1087
  const frame = {
433
1088
  baseSpanIndex: scope.spans.length,
434
- cachedSpanId: id,
1089
+ replayParentSpanId: id,
435
1090
  ops: []
436
1091
  };
437
1092
  scope.recordingStack.push(frame);
@@ -442,16 +1097,24 @@ async function traceSpan(info, fn) {
442
1097
  scope.recordingStack.pop();
443
1098
  }
444
1099
  appendSubSpanOps(scope, frame);
1100
+ finishSpanWithoutThrownError(spanRecord);
445
1101
  if (ctx.mode !== "bypass") {
446
1102
  const recording = {
447
1103
  returnValue: toJsonSafe(bodyResult),
448
1104
  finalAttributes: stripCacheAttributes(spanRecord.attributes),
1105
+ finalStatus: spanRecord.status,
1106
+ finalError: spanRecord.error,
1107
+ finalErrors: spanRecord.errors,
1108
+ finalWarning: spanRecord.warning,
1109
+ finalWarnings: spanRecord.warnings,
449
1110
  ops: frame.ops
450
1111
  };
451
1112
  const entry = {
452
1113
  version: 1,
453
1114
  key: keyHash,
454
1115
  namespace,
1116
+ operationType: "span",
1117
+ operationName: info.name,
455
1118
  spanName: info.name,
456
1119
  spanKind: info.kind,
457
1120
  storedAt: (/* @__PURE__ */ new Date()).toISOString(),
@@ -460,23 +1123,15 @@ async function traceSpan(info, fn) {
460
1123
  };
461
1124
  await ctx.adapter.write(entry);
462
1125
  }
463
- spanRecord.status = "ok";
464
- spanRecord.endedAt = (/* @__PURE__ */ new Date()).toISOString();
465
1126
  return bodyResult;
466
1127
  }
467
1128
  const result = await fn(activeSpan);
468
- spanRecord.status = "ok";
469
- spanRecord.endedAt = (/* @__PURE__ */ new Date()).toISOString();
1129
+ finishSpanWithoutThrownError(spanRecord);
470
1130
  return result;
471
1131
  } catch (error) {
472
1132
  spanRecord.status = "error";
473
1133
  spanRecord.endedAt = (/* @__PURE__ */ new Date()).toISOString();
474
- if (error instanceof Error) spanRecord.error = {
475
- name: error.name,
476
- message: error.message,
477
- stack: error.stack
478
- };
479
- else spanRecord.error = { message: String(error) };
1134
+ spanRecord.error = normalizeTraceError(error);
480
1135
  throw error;
481
1136
  } finally {
482
1137
  scope.spanStack.pop();
@@ -491,6 +1146,13 @@ const evalTracer = {
491
1146
  /** Run a callback inside a new trace span and record its lifecycle. */
492
1147
  span: traceSpan,
493
1148
  /**
1149
+ * Cache a pure value without creating a trace span.
1150
+ *
1151
+ * When called inside an active span, the span receives a `cache.refs` entry
1152
+ * describing the value cache status for this run.
1153
+ */
1154
+ cache: createTraceCache(generateSpanId),
1155
+ /**
494
1156
  * Start a span whose lifecycle is controlled by an external tracer/exporter.
495
1157
  *
496
1158
  * Calls are no-ops outside an eval case scope, except that a generated or
@@ -571,189 +1233,6 @@ function buildTraceTree(spans, checkpoints) {
571
1233
  checkpoints
572
1234
  };
573
1235
  }
574
- var SerializedCacheKeyValue = class {
575
- value;
576
- constructor(value) {
577
- this.value = value;
578
- }
579
- };
580
- /**
581
- * Hash the components of a cache key into a deterministic hex digest.
582
- *
583
- * Native `Blob` and `File` values are read asynchronously and hashed by
584
- * content. Use `hashCacheKeySync` only when the key contains no async values.
585
- */
586
- async function hashCacheKey(input) {
587
- return hashCacheKeySyncMaterialized(await materializeAsyncCacheKeyValue(input));
588
- }
589
- /**
590
- * Synchronously hash cache key components. This supports JSON-like data and
591
- * in-memory binary values such as `Buffer`, `ArrayBuffer`, and typed arrays,
592
- * but cannot content-hash native `Blob` or `File` values.
593
- */
594
- function hashCacheKeySync(input) {
595
- return hashCacheKeySyncMaterialized(input);
596
- }
597
- function hashCacheKeySyncMaterialized(input) {
598
- return createHash("sha256").update(getCompositeKey(input, { stringify: stringifyCacheKeyValue })).digest("hex");
599
- }
600
- function stringifyCacheKeyValue(value) {
601
- if (value instanceof SerializedCacheKeyValue) return value.value;
602
- if (Buffer$1.isBuffer(value)) return `$buffer:${hashBytes(value)}`;
603
- if (isArrayBuffer(value)) return `$arrayBuffer:${hashBytes(new Uint8Array(value))}`;
604
- if (isSharedArrayBuffer(value)) return `$sharedArrayBuffer:${hashBytes(new Uint8Array(value))}`;
605
- if (isArrayBufferView(value)) {
606
- const bytes = new Uint8Array(value.buffer, value.byteOffset, value.byteLength);
607
- return `$${value.constructor.name}:${hashBytes(bytes)}`;
608
- }
609
- if (isFile$1(value)) return `$file:${getCompositeKey({
610
- lastModified: value.lastModified,
611
- name: value.name,
612
- size: value.size,
613
- type: value.type
614
- })}`;
615
- if (isBlob$1(value)) return `$blob:${getCompositeKey({
616
- size: value.size,
617
- type: value.type
618
- })}`;
619
- }
620
- async function materializeAsyncCacheKeyValue(value, refs = /* @__PURE__ */ new WeakSet()) {
621
- const serialized = await stringifyAsyncCacheKeyValue(value);
622
- if (serialized !== void 0) return new SerializedCacheKeyValue(serialized);
623
- if (stringifyCacheKeyValue(value) !== void 0) return value;
624
- if (!value || typeof value !== "object") return value;
625
- if (Array.isArray(value)) {
626
- const items = [];
627
- for (const item of value) items.push(await materializeAsyncCacheKeyValue(item, refs));
628
- return items;
629
- }
630
- if (refs.has(value)) throw new Error("Circular reference detected");
631
- refs.add(value);
632
- const entries = [];
633
- for (const [key, entryValue] of Object.entries(value)) entries.push([key, await materializeAsyncCacheKeyValue(entryValue, refs)]);
634
- refs.delete(value);
635
- return Object.fromEntries(entries);
636
- }
637
- async function stringifyAsyncCacheKeyValue(value) {
638
- if (isFile$1(value)) return `$file:${getCompositeKey({
639
- bytes: await hashBlobBytes(value),
640
- lastModified: value.lastModified,
641
- name: value.name,
642
- size: value.size,
643
- type: value.type
644
- })}`;
645
- if (isBlob$1(value)) return `$blob:${getCompositeKey({
646
- bytes: await hashBlobBytes(value),
647
- size: value.size,
648
- type: value.type
649
- })}`;
650
- }
651
- async function hashBlobBytes(value) {
652
- return hashBytes(new Uint8Array(await value.arrayBuffer()));
653
- }
654
- function hashBytes(value) {
655
- return createHash("sha256").update(value).digest("hex");
656
- }
657
- function isArrayBuffer(value) {
658
- return value instanceof ArrayBuffer;
659
- }
660
- function isSharedArrayBuffer(value) {
661
- return value instanceof SharedArrayBuffer;
662
- }
663
- function isArrayBufferView(value) {
664
- return ArrayBuffer.isView(value);
665
- }
666
- function isBlob$1(value) {
667
- return value instanceof Blob;
668
- }
669
- function isFile$1(value) {
670
- return value instanceof File;
671
- }
672
- function toJsonSafe(value) {
673
- if (value === void 0) return void 0;
674
- const text = JSON.stringify(value);
675
- return JSON.parse(text);
676
- }
677
- function stripCacheAttributes(attributes) {
678
- if (!attributes) return {};
679
- const result = {};
680
- for (const [key, value] of Object.entries(attributes)) if (!key.startsWith("cache.")) result[key] = value;
681
- return result;
682
- }
683
- function serializeSubSpanTree(scope, spanId) {
684
- const original = scope.spans.find((s) => s.id === spanId);
685
- if (!original) return {
686
- kind: "custom",
687
- name: "unknown",
688
- attributes: void 0,
689
- status: "ok",
690
- error: void 0,
691
- children: []
692
- };
693
- const children = scope.spans.filter((s) => s.parentId === spanId).map((child) => serializeSubSpanTree(scope, child.id));
694
- return {
695
- kind: original.kind,
696
- name: original.name,
697
- attributes: original.attributes,
698
- status: original.status,
699
- error: original.error,
700
- children
701
- };
702
- }
703
- function appendSubSpanOps(scope, frame) {
704
- for (let i = frame.baseSpanIndex; i < scope.spans.length; i++) {
705
- const candidate = scope.spans[i];
706
- if (candidate?.parentId === frame.cachedSpanId) frame.ops.push({
707
- kind: "subSpan",
708
- span: serializeSubSpanTree(scope, candidate.id)
709
- });
710
- }
711
- }
712
- function replayRecording(scope, parentSpan, recording) {
713
- scope.replayingDepth++;
714
- try {
715
- for (const op of recording.ops) applyRecordingOp(scope, parentSpan, op);
716
- if (Object.keys(recording.finalAttributes).length > 0) mergeSpanAttributes(parentSpan, recording.finalAttributes);
717
- } finally {
718
- scope.replayingDepth--;
719
- }
720
- }
721
- function applyRecordingOp(scope, parentSpan, op) {
722
- if (op.kind === "setOutput") {
723
- scope.outputs[op.key] = op.value;
724
- return;
725
- }
726
- if (op.kind === "incrementOutput") {
727
- const existing = scope.outputs[op.key];
728
- if (existing === void 0) scope.outputs[op.key] = op.delta;
729
- else if (typeof existing === "number") scope.outputs[op.key] = existing + op.delta;
730
- else scope.assertionFailures.push({ message: `replay incrementEvalOutput("${op.key}"): existing value is ${typeof existing}, expected number` });
731
- return;
732
- }
733
- if (op.kind === "checkpoint") {
734
- scope.checkpoints.set(op.name, op.data);
735
- return;
736
- }
737
- replaySerializedSpan(scope, parentSpan.id, op.span);
738
- }
739
- function replaySerializedSpan(scope, parentId, serialized) {
740
- const id = generateSpanId();
741
- const now = (/* @__PURE__ */ new Date()).toISOString();
742
- const replayed = {
743
- id,
744
- parentId,
745
- caseId: scope.caseId,
746
- kind: serialized.kind,
747
- name: serialized.name,
748
- startedAt: now,
749
- endedAt: now,
750
- status: serialized.status,
751
- attributes: serialized.attributes,
752
- error: serialized.error
753
- };
754
- scope.spans.push(replayed);
755
- for (const child of serialized.children) replaySerializedSpan(scope, id, child);
756
- }
757
1236
  //#endregion
758
1237
  //#region ../shared/src/schemas/display.ts
759
1238
  const scalarCellSchema = z.union([
@@ -886,6 +1365,15 @@ const traceAttributeDisplayInputSchema = z.object({
886
1365
  });
887
1366
  /** Schema for authored trace display config in eval or workspace config. */
888
1367
  const traceDisplayInputConfigSchema = z.object({ attributes: z.array(traceAttributeDisplayInputSchema).optional() });
1368
+ /** Schema for an error attached to a trace span. */
1369
+ const traceSpanErrorSchema = z.object({
1370
+ name: z.string().optional(),
1371
+ message: z.string(),
1372
+ stack: z.string().optional(),
1373
+ capturedAt: z.string().optional()
1374
+ }).catchall(z.unknown());
1375
+ /** Schema for a warning attached to a trace span. */
1376
+ const traceSpanWarningSchema = traceSpanErrorSchema;
889
1377
  /** Schema for a persisted trace span captured during case execution. */
890
1378
  const traceSpanSchema = z.object({
891
1379
  id: z.string(),
@@ -902,11 +1390,10 @@ const traceSpanSchema = z.object({
902
1390
  "cancelled"
903
1391
  ]),
904
1392
  attributes: z.record(z.string(), z.unknown()).optional(),
905
- error: z.object({
906
- name: z.string().optional(),
907
- message: z.string(),
908
- stack: z.string().optional()
909
- }).optional()
1393
+ error: traceSpanErrorSchema.optional(),
1394
+ errors: z.array(traceSpanErrorSchema).optional(),
1395
+ warning: traceSpanWarningSchema.optional(),
1396
+ warnings: z.array(traceSpanWarningSchema).optional()
910
1397
  });
911
1398
  //#endregion
912
1399
  //#region ../shared/src/schemas/chart.ts
@@ -1167,12 +1654,16 @@ const spanCacheOptionsSchema = z.object({
1167
1654
  /** Override the default namespace (`${evalId}__${spanName}`). */
1168
1655
  namespace: z.string().optional()
1169
1656
  });
1657
+ /** Category of operation stored in the eval cache. */
1658
+ const cacheOperationTypeSchema = z.enum(["span", "value"]);
1170
1659
  /** Summary of a single persisted cache entry, used by list/delete endpoints. */
1171
1660
  const cacheListItemSchema = z.object({
1172
1661
  key: z.string(),
1173
1662
  namespace: z.string(),
1174
- spanName: z.string(),
1175
- spanKind: traceSpanKindSchema,
1663
+ operationType: cacheOperationTypeSchema,
1664
+ operationName: z.string(),
1665
+ spanName: z.string().optional(),
1666
+ spanKind: traceSpanKindSchema.optional(),
1176
1667
  storedAt: z.string(),
1177
1668
  codeFingerprint: z.string(),
1178
1669
  sizeBytes: z.number()
@@ -1188,11 +1679,10 @@ const serializedCacheSpanSchema = z.object({
1188
1679
  "error",
1189
1680
  "cancelled"
1190
1681
  ]),
1191
- error: z.object({
1192
- name: z.string().optional(),
1193
- message: z.string(),
1194
- stack: z.string().optional()
1195
- }).optional()
1682
+ error: traceSpanErrorSchema.optional(),
1683
+ errors: z.array(traceSpanErrorSchema).optional(),
1684
+ warning: traceSpanWarningSchema.optional(),
1685
+ warnings: z.array(traceSpanWarningSchema).optional()
1196
1686
  }).extend({ children: z.lazy(() => z.array(serializedCacheSpanSchema)) });
1197
1687
  /**
1198
1688
  * One captured operation performed while a cached span's body executed.
@@ -1206,6 +1696,16 @@ const cacheRecordingOpSchema = z.discriminatedUnion("kind", [
1206
1696
  key: z.string(),
1207
1697
  value: z.unknown()
1208
1698
  }),
1699
+ z.object({
1700
+ kind: z.literal("appendOutput"),
1701
+ key: z.string(),
1702
+ value: z.unknown()
1703
+ }),
1704
+ z.object({
1705
+ kind: z.literal("mergeOutput"),
1706
+ key: z.string(),
1707
+ patch: z.record(z.string(), z.unknown())
1708
+ }),
1209
1709
  z.object({
1210
1710
  kind: z.literal("incrementOutput"),
1211
1711
  key: z.string(),
@@ -1225,6 +1725,16 @@ const cacheRecordingOpSchema = z.discriminatedUnion("kind", [
1225
1725
  const cacheRecordingSchema = z.object({
1226
1726
  returnValue: z.unknown(),
1227
1727
  finalAttributes: z.record(z.string(), z.unknown()),
1728
+ finalStatus: z.enum([
1729
+ "running",
1730
+ "ok",
1731
+ "error",
1732
+ "cancelled"
1733
+ ]).optional(),
1734
+ finalError: traceSpanErrorSchema.optional(),
1735
+ finalErrors: z.array(traceSpanErrorSchema).optional(),
1736
+ finalWarning: traceSpanWarningSchema.optional(),
1737
+ finalWarnings: z.array(traceSpanWarningSchema).optional(),
1228
1738
  ops: z.array(cacheRecordingOpSchema)
1229
1739
  });
1230
1740
  /** Persisted cache file containing metadata and a recording. */
@@ -1232,8 +1742,10 @@ const cacheEntrySchema = z.object({
1232
1742
  version: z.literal(1),
1233
1743
  key: z.string(),
1234
1744
  namespace: z.string(),
1235
- spanName: z.string(),
1236
- spanKind: traceSpanKindSchema,
1745
+ operationType: cacheOperationTypeSchema.optional(),
1746
+ operationName: z.string().optional(),
1747
+ spanName: z.string().optional(),
1748
+ spanKind: traceSpanKindSchema.optional(),
1237
1749
  storedAt: z.string(),
1238
1750
  codeFingerprint: z.string(),
1239
1751
  recording: cacheRecordingSchema
@@ -1543,15 +2055,21 @@ function createFsCacheStore(options) {
1543
2055
  if (fileStatResult.error || !fileStatResult.value.isFile()) continue;
1544
2056
  const cacheFile = await readCacheFilePath(filePath);
1545
2057
  if (cacheFile === null) continue;
1546
- for (const entry of Object.values(cacheFile.entries)) items.push({
1547
- key: entry.key,
1548
- namespace: entry.namespace,
1549
- spanName: entry.spanName,
1550
- spanKind: entry.spanKind,
1551
- storedAt: entry.storedAt,
1552
- codeFingerprint: entry.codeFingerprint,
1553
- sizeBytes: Buffer.byteLength(JSON.stringify(entry), "utf8")
1554
- });
2058
+ for (const entry of Object.values(cacheFile.entries)) {
2059
+ const operationType = entry.operationType ?? "span";
2060
+ const operationName = entry.operationName ?? entry.spanName ?? entry.namespace;
2061
+ items.push({
2062
+ key: entry.key,
2063
+ namespace: entry.namespace,
2064
+ operationType,
2065
+ operationName,
2066
+ spanName: entry.spanName,
2067
+ spanKind: entry.spanKind,
2068
+ storedAt: entry.storedAt,
2069
+ codeFingerprint: entry.codeFingerprint,
2070
+ sizeBytes: Buffer.byteLength(JSON.stringify(entry), "utf8")
2071
+ });
2072
+ }
1555
2073
  }
1556
2074
  items.sort((a, b) => a.storedAt < b.storedAt ? 1 : -1);
1557
2075
  return items;
@@ -2442,12 +2960,15 @@ async function runCase(params) {
2442
2960
  input: evalCase.input,
2443
2961
  signal
2444
2962
  }]);
2445
- }, { cacheContext: cacheAdapter ? {
2446
- adapter: cacheAdapter,
2447
- mode: cacheMode,
2448
- evalId,
2449
- codeFingerprint
2450
- } : void 0 });
2963
+ }, {
2964
+ input: evalCase.input,
2965
+ cacheContext: cacheAdapter ? {
2966
+ adapter: cacheAdapter,
2967
+ mode: cacheMode,
2968
+ evalId,
2969
+ codeFingerprint
2970
+ } : void 0
2971
+ });
2451
2972
  const traceTree = buildTraceTree(scope.spans, scope.checkpoints);
2452
2973
  const nonAssertError = executeError && !(executeError instanceof EvalAssertionError) ? executeError : null;
2453
2974
  if (executeError instanceof EvalAssertionError && scope.assertionFailures.length === 0) scope.assertionFailures.push(toAssertionFailure(executeError.message, executeError));
@@ -2463,20 +2984,31 @@ async function runCase(params) {
2463
2984
  const message = `deriveFromTracing threw: ${e instanceof Error ? e.message : String(e)}`;
2464
2985
  scope.assertionFailures.push(toAssertionFailure(message, e instanceof Error ? e : void 0));
2465
2986
  }
2987
+ if (!nonAssertError && evalDef.outputsSchema) {
2988
+ const parsedOutputs = evalDef.outputsSchema.safeParse(getOutputsSchemaInput(evalDef.outputsSchema, scope.outputs));
2989
+ if (parsedOutputs.success) scope.outputs = {
2990
+ ...scope.outputs,
2991
+ ...parsedOutputs.data
2992
+ };
2993
+ else scope.assertionFailures.push(toAssertionFailure(formatOutputsSchemaError(parsedOutputs.error)));
2994
+ }
2466
2995
  const scoreResults = /* @__PURE__ */ new Map();
2467
2996
  const scoringTraces = {};
2468
- if (!nonAssertError && evalDef.scores) for (const [key, def] of Object.entries(evalDef.scores)) {
2997
+ if (!nonAssertError && scope.assertionFailures.length === 0 && evalDef.scores) for (const [key, def] of Object.entries(evalDef.scores)) {
2469
2998
  const { compute, passThreshold, label } = normalizeScoreDef(def);
2470
2999
  const scoreRun = await runInEvalScope(evalCase.id, async () => await callWithUnknownResult(compute, [{
2471
3000
  input: evalCase.input,
2472
3001
  outputs: { ...scope.outputs },
2473
3002
  case: evalCase
2474
- }]), { cacheContext: cacheAdapter ? {
2475
- adapter: cacheAdapter,
2476
- mode: cacheMode,
2477
- evalId: `${evalId}__score__${key}`,
2478
- codeFingerprint
2479
- } : void 0 });
3003
+ }]), {
3004
+ input: evalCase.input,
3005
+ cacheContext: cacheAdapter ? {
3006
+ adapter: cacheAdapter,
3007
+ mode: cacheMode,
3008
+ evalId: `${evalId}__score__${key}`,
3009
+ codeFingerprint
3010
+ } : void 0
3011
+ });
2480
3012
  const { trace, traceDisplay } = resolveTracePresentation(scoreRun.scope.spans, globalTraceDisplay, evalDef.traceDisplay);
2481
3013
  if (trace.length > 0) scoringTraces[key] = {
2482
3014
  trace,
@@ -2567,6 +3099,19 @@ function isRecord(value) {
2567
3099
  function isBlob(value) {
2568
3100
  return value instanceof Blob;
2569
3101
  }
3102
+ function getOutputsSchemaInput(schema, outputs) {
3103
+ if (!(schema instanceof z.ZodObject)) return outputs;
3104
+ const configuredOutputs = {};
3105
+ for (const key of Object.keys(schema.shape)) if (key in outputs) configuredOutputs[key] = outputs[key];
3106
+ return configuredOutputs;
3107
+ }
3108
+ function formatOutputsSchemaError(error) {
3109
+ const issueLines = error.issues.map((issue) => {
3110
+ return `${issue.path.length > 0 ? issue.path.join(".") : "<root>"}: ${issue.message}`;
3111
+ });
3112
+ if (issueLines.length === 0) return "outputsSchema validation failed";
3113
+ return `outputsSchema validation failed:\n${issueLines.join("\n")}`;
3114
+ }
2570
3115
  function toAssertionFailure(message, error = void 0) {
2571
3116
  return error?.stack ? {
2572
3117
  message,
@@ -3036,6 +3581,39 @@ function toLastRunStatus(status) {
3036
3581
  }
3037
3582
  //#endregion
3038
3583
  //#region ../runner/src/runner.ts
3584
+ const globMagicCharacters = new Set([
3585
+ "*",
3586
+ "?",
3587
+ "[",
3588
+ "]",
3589
+ "{",
3590
+ "}",
3591
+ "(",
3592
+ ")",
3593
+ "!",
3594
+ "+",
3595
+ "@"
3596
+ ]);
3597
+ function hasGlobMagic(value) {
3598
+ for (const char of value) if (globMagicCharacters.has(char)) return true;
3599
+ return false;
3600
+ }
3601
+ function getWatchRootForIncludePattern(params) {
3602
+ const segments = params.pattern.replaceAll("\\", "/").split("/").filter((part) => part !== "");
3603
+ const firstGlobSegmentIndex = segments.findIndex(hasGlobMagic);
3604
+ if (firstGlobSegmentIndex === -1) return dirname(resolve(params.workspaceRoot, params.pattern));
3605
+ if (firstGlobSegmentIndex === 0) return params.workspaceRoot;
3606
+ return resolve(params.workspaceRoot, segments.slice(0, firstGlobSegmentIndex).join("/"));
3607
+ }
3608
+ function getWatchRootsForIncludePatterns(params) {
3609
+ const roots = /* @__PURE__ */ new Set();
3610
+ for (const pattern of params.patterns) roots.add(getWatchRootForIncludePattern({
3611
+ pattern,
3612
+ workspaceRoot: params.workspaceRoot
3613
+ }));
3614
+ if (roots.size === 0) return [params.workspaceRoot];
3615
+ return [...roots];
3616
+ }
3039
3617
  /** Create an in-memory eval runner bound to the current workspace config. */
3040
3618
  function createRunner({ watchForChanges = true } = {}) {
3041
3619
  let config;
@@ -3048,6 +3626,8 @@ function createRunner({ watchForChanges = true } = {}) {
3048
3626
  const latestRunInfoMap = /* @__PURE__ */ new Map();
3049
3627
  const discoveryListeners = /* @__PURE__ */ new Set();
3050
3628
  let nextShortIdNum = 0;
3629
+ let discoveryWatcher;
3630
+ let discoveryRefreshTimer;
3051
3631
  function toWorkspaceRelativePath(filePath) {
3052
3632
  return relative(workspaceRoot, filePath).replaceAll("\\", "/");
3053
3633
  }
@@ -3076,7 +3656,7 @@ function createRunner({ watchForChanges = true } = {}) {
3076
3656
  });
3077
3657
  await loadPersistedRuns();
3078
3658
  await runner.refreshDiscovery();
3079
- if (watchForChanges) setupWatcher();
3659
+ if (watchForChanges) await setupWatcher();
3080
3660
  },
3081
3661
  async listCache() {
3082
3662
  return cacheStore.list();
@@ -3401,6 +3981,16 @@ function createRunner({ watchForChanges = true } = {}) {
3401
3981
  discoveryListeners.delete(listener);
3402
3982
  };
3403
3983
  },
3984
+ async close() {
3985
+ if (discoveryRefreshTimer !== void 0) {
3986
+ clearTimeout(discoveryRefreshTimer);
3987
+ discoveryRefreshTimer = void 0;
3988
+ }
3989
+ const watcher = discoveryWatcher;
3990
+ if (watcher === void 0) return;
3991
+ discoveryWatcher = void 0;
3992
+ await watcher.close();
3993
+ },
3404
3994
  getWorkspaceRoot() {
3405
3995
  return workspaceRoot;
3406
3996
  },
@@ -3408,19 +3998,29 @@ function createRunner({ watchForChanges = true } = {}) {
3408
3998
  return resolveArtifactPath(join(localStateDir, "runs"), artifactId_);
3409
3999
  }
3410
4000
  };
3411
- function setupWatcher() {
3412
- const watcher = watch(config.include.map((p) => resolve(workspaceRoot, p)), {
4001
+ async function setupWatcher() {
4002
+ const watcher = watch(getWatchRootsForIncludePatterns({
4003
+ patterns: config.include,
4004
+ workspaceRoot
4005
+ }), {
3413
4006
  ignoreInitial: true,
3414
4007
  persistent: true
3415
4008
  });
3416
- watcher.on("change", () => {
3417
- runner.refreshDiscovery();
3418
- });
3419
- watcher.on("add", () => {
3420
- runner.refreshDiscovery();
3421
- });
3422
- watcher.on("unlink", () => {
3423
- runner.refreshDiscovery();
4009
+ discoveryWatcher = watcher;
4010
+ const scheduleRefresh = () => {
4011
+ if (discoveryRefreshTimer !== void 0) clearTimeout(discoveryRefreshTimer);
4012
+ discoveryRefreshTimer = setTimeout(() => {
4013
+ discoveryRefreshTimer = void 0;
4014
+ runner.refreshDiscovery();
4015
+ }, 50);
4016
+ };
4017
+ watcher.on("change", scheduleRefresh);
4018
+ watcher.on("add", scheduleRefresh);
4019
+ watcher.on("unlink", scheduleRefresh);
4020
+ watcher.on("addDir", scheduleRefresh);
4021
+ watcher.on("unlinkDir", scheduleRefresh);
4022
+ await new Promise((ready) => {
4023
+ watcher.once("ready", ready);
3424
4024
  });
3425
4025
  }
3426
4026
  function emitDiscoveryEvent() {
@@ -3467,6 +4067,7 @@ function createRunner({ watchForChanges = true } = {}) {
3467
4067
  //#endregion
3468
4068
  //#region src/cli.ts
3469
4069
  function parseArgs(argv) {
4070
+ const normalizedArgv = argv.filter((arg) => arg !== "--no-env");
3470
4071
  const args = {
3471
4072
  command: "help",
3472
4073
  subcommand: void 0,
@@ -3480,9 +4081,10 @@ function parseArgs(argv) {
3480
4081
  port: 4100,
3481
4082
  cacheMode: "use",
3482
4083
  clearCache: false,
3483
- all: false
4084
+ all: false,
4085
+ loadEnv: normalizedArgv.length === argv.length
3484
4086
  };
3485
- const command = argv[0];
4087
+ const command = normalizedArgv[0];
3486
4088
  if (command === "--help" || command === "-h") {
3487
4089
  args.showHelp = true;
3488
4090
  return args;
@@ -3493,16 +4095,16 @@ function parseArgs(argv) {
3493
4095
  } else if (command !== void 0 && !command.startsWith("-")) args.unknownHelpTarget = command;
3494
4096
  let cursor = 1;
3495
4097
  if (args.command === "cache") {
3496
- const sub = argv[cursor];
4098
+ const sub = normalizedArgv[cursor];
3497
4099
  if (sub === "list" || sub === "clear") {
3498
4100
  args.subcommand = sub;
3499
4101
  args.helpTopic = `cache ${sub}`;
3500
4102
  cursor++;
3501
4103
  } else if (sub !== void 0 && !sub.startsWith("-")) args.unknownHelpTarget = `cache ${sub}`;
3502
4104
  }
3503
- for (let i = cursor; i < argv.length; i++) {
3504
- const arg = argv[i];
3505
- const next = argv[i + 1];
4105
+ for (let i = cursor; i < normalizedArgv.length; i++) {
4106
+ const arg = normalizedArgv[i];
4107
+ const next = normalizedArgv[i + 1];
3506
4108
  if (arg === "--help" || arg === "-h") args.showHelp = true;
3507
4109
  else if (arg === "--eval" && next) {
3508
4110
  args.evalIds.push(...next.split(","));
@@ -3534,6 +4136,10 @@ function parseArgs(argv) {
3534
4136
  */
3535
4137
  async function runCli(argv) {
3536
4138
  const args = parseArgs(argv);
4139
+ if (args.loadEnv && !loadWorkspaceEnv()) {
4140
+ process.exit(1);
4141
+ return;
4142
+ }
3537
4143
  if (args.showHelp) {
3538
4144
  if (args.unknownHelpTarget !== void 0) {
3539
4145
  console.error(`No help found for "${args.unknownHelpTarget}".`);
@@ -3564,6 +4170,18 @@ async function runCli(argv) {
3564
4170
  function isCliCommand(command) {
3565
4171
  return command === "app" || command === "list" || command === "run" || command === "cache" || command === "help";
3566
4172
  }
4173
+ function loadWorkspaceEnv() {
4174
+ const envPath = resolve(process.cwd(), ".env");
4175
+ if (!existsSync(envPath)) return true;
4176
+ const loadResult = resultify(() => {
4177
+ process.loadEnvFile(envPath);
4178
+ });
4179
+ if (loadResult.error) {
4180
+ console.error(`Failed to load .env at ${envPath}: ${loadResult.error.message}`);
4181
+ return false;
4182
+ }
4183
+ return true;
4184
+ }
3567
4185
  const currentDir = dirname(fileURLToPath(import.meta.url));
3568
4186
  const repoRoot = resolve(currentDir, "../../..");
3569
4187
  const pnpmCommand = process.platform === "win32" ? "pnpm.cmd" : "pnpm";
@@ -3612,8 +4230,8 @@ async function commandApp(args) {
3612
4230
  const { serve } = await import("@hono/node-server");
3613
4231
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
3614
4232
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
3615
- const appModule = await import("./app-CljutWb7.mjs");
3616
- const runnerModule = await import("./runner-CsSJwWE4.mjs");
4233
+ const appModule = await import("./app-7qDBq_ub.mjs");
4234
+ const runnerModule = await import("./runner-uzzY8kk1.mjs");
3617
4235
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
3618
4236
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
3619
4237
  await runnerModule.initRunner();
@@ -3714,7 +4332,8 @@ async function commandCache(args) {
3714
4332
  for (const entry of entries) {
3715
4333
  console.info(` ${entry.namespace}`);
3716
4334
  console.info(` key: ${entry.key}`);
3717
- console.info(` span: ${entry.spanName} (${entry.spanKind})`);
4335
+ const operationLabel = entry.operationType === "span" ? `${entry.operationName} (span ${entry.spanKind ?? "unknown"})` : `${entry.operationName} (value)`;
4336
+ console.info(` operation: ${operationLabel}`);
3718
4337
  console.info(` stored: ${entry.storedAt}`);
3719
4338
  console.info(` size: ${String(entry.sizeBytes)} bytes`);
3720
4339
  console.info("");
@@ -3769,6 +4388,7 @@ Usage:
3769
4388
 
3770
4389
  Flags:
3771
4390
  --port <n> Server port (default: 4100)
4391
+ --no-env Disable automatic .env loading
3772
4392
  --help, -h Show this help
3773
4393
  `);
3774
4394
  return;
@@ -3781,6 +4401,7 @@ Usage:
3781
4401
  agent-evals list [flags]
3782
4402
 
3783
4403
  Flags:
4404
+ --no-env Disable automatic .env loading
3784
4405
  --help, -h Show this help
3785
4406
  `);
3786
4407
  return;
@@ -3801,6 +4422,7 @@ Flags:
3801
4422
  --no-cache Shortcut for --cache bypass
3802
4423
  --refresh-cache Shortcut for --cache refresh
3803
4424
  --clear-cache Clear the cache before starting the run
4425
+ --no-env Disable automatic .env loading
3804
4426
  --help, -h Show this help
3805
4427
  `);
3806
4428
  return;
@@ -3818,6 +4440,7 @@ Flags:
3818
4440
  --eval <id> Clear entries for specific eval(s) (comma-separated)
3819
4441
  --all Confirm clearing every cached entry
3820
4442
  --json Output cache listing as JSON
4443
+ --no-env Disable automatic .env loading
3821
4444
  --help, -h Show this help
3822
4445
  `);
3823
4446
  return;
@@ -3844,8 +4467,9 @@ Options:
3844
4467
  --no-cache Shortcut for --cache bypass
3845
4468
  --refresh-cache Shortcut for --cache refresh
3846
4469
  --clear-cache Clear the cache before starting the run
4470
+ --no-env Disable automatic .env loading
3847
4471
  --help, -h Show help
3848
4472
  `);
3849
4473
  }
3850
4474
  //#endregion
3851
- export { fileRefSchema as $, evalSummarySchema as A, evalChartsConfigSchema as B, assertionFailureSchema as C, evalStatAggregateSchema as D, evalFreshnessStatusSchema as E, evalChartColorSchema as F, traceDisplayConfigSchema as G, traceAttributeDisplayInputSchema as H, evalChartConfigSchema as I, traceSpanSchema as J, traceDisplayInputConfigSchema as K, evalChartMetricSchema as L, evalChartAggregateSchema as M, evalChartAxisSchema as N, evalStatItemSchema as O, evalChartBuiltinMetricSchema as P, columnKindSchema as Q, evalChartTooltipExtraSchema as R, spanCacheOptionsSchema as S, caseRowSchema as T, traceAttributeDisplayPlacementSchema as U, traceAttributeDisplayFormatSchema as V, traceAttributeDisplaySchema as W, columnDefSchema as X, cellValueSchema as Y, columnFormatSchema as Z, cacheListItemSchema as _, repoFile as _t, sseEnvelopeSchema as a, evalSpan as at, cacheRecordingSchema as b, deriveScopedSummaryFromCases as c, hashCacheKeySync as ct, runManifestSchema as d, getCurrentScope as dt, jsonCellSchema as et, runSummarySchema as f, incrementEvalOutput as ft, cacheFileSchema as g, setScopeCacheContext as gt, cacheEntrySchema as h, setEvalOutput as ht, updateManualScoreRequestSchema as i, buildTraceTree as it, scoreTraceSchema as j, evalStatsConfigSchema as k, deriveStatusFromCaseRows as l, EvalAssertionError as lt, trialSelectionModeSchema as m, runInEvalScope as mt, createRunner as n, repoFileRefSchema as nt, getEvalTitle as o, evalTracer as ot, agentEvalsConfigSchema as p, isInEvalScope as pt, traceSpanKindSchema as q, createRunRequestSchema as r, runArtifactRefSchema as rt, getEvalDisplayStatus as s, hashCacheKey as st, runCli as t, numberDisplayOptionsSchema as tt, deriveStatusFromChildStatuses as u, evalAssert as ut, cacheModeSchema as v, defineEval as vt, caseDetailSchema as w, serializedCacheSpanSchema as x, cacheRecordingOpSchema as y, getEvalRegistry as yt, evalChartTypeSchema as z };
4475
+ export { columnDefSchema as $, evalStatsConfigSchema as A, evalChartTypeSchema as B, spanCacheOptionsSchema as C, setEvalOutput as Ct, evalFreshnessStatusSchema as D, getEvalRegistry as Dt, caseRowSchema as E, defineEval as Et, evalChartBuiltinMetricSchema as F, traceAttributeDisplaySchema as G, traceAttributeDisplayFormatSchema as H, evalChartColorSchema as I, traceSpanErrorSchema as J, traceDisplayConfigSchema as K, evalChartConfigSchema as L, scoreTraceSchema as M, evalChartAggregateSchema as N, evalStatAggregateSchema as O, evalChartAxisSchema as P, cellValueSchema as Q, evalChartMetricSchema as R, serializedCacheSpanSchema as S, runInEvalScope as St, caseDetailSchema as T, repoFile as Tt, traceAttributeDisplayInputSchema as U, evalChartsConfigSchema as V, traceAttributeDisplayPlacementSchema as W, traceSpanSchema as X, traceSpanKindSchema as Y, traceSpanWarningSchema as Z, cacheListItemSchema as _, getCurrentScope as _t, sseEnvelopeSchema as a, repoFileRefSchema as at, cacheRecordingOpSchema as b, isInEvalScope as bt, deriveScopedSummaryFromCases as c, buildTraceTree as ct, runManifestSchema as d, evalTracer as dt, columnFormatSchema as et, runSummarySchema as f, hashCacheKey as ft, cacheFileSchema as g, evalAssert as gt, cacheEntrySchema as h, appendToEvalOutput as ht, updateManualScoreRequestSchema as i, numberDisplayOptionsSchema as it, evalSummarySchema as j, evalStatItemSchema as k, deriveStatusFromCaseRows as l, captureEvalSpanError as lt, trialSelectionModeSchema as m, EvalAssertionError as mt, createRunner as n, fileRefSchema as nt, getEvalTitle as o, runArtifactRefSchema as ot, agentEvalsConfigSchema as p, hashCacheKeySync as pt, traceDisplayInputConfigSchema as q, createRunRequestSchema as r, jsonCellSchema as rt, getEvalDisplayStatus as s, z$1 as st, runCli as t, columnKindSchema as tt, deriveStatusFromChildStatuses as u, evalSpan as ut, cacheModeSchema as v, getEvalCaseInput as vt, assertionFailureSchema as w, setScopeCacheContext as wt, cacheRecordingSchema as x, mergeEvalOutput as xt, cacheOperationTypeSchema as y, incrementEvalOutput as yt, evalChartTooltipExtraSchema as z };