@ls-stack/agent-eval 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,10 @@
1
1
  import { createHash } from "node:crypto";
2
2
  import { mkdir, readFile, readdir, rename, rm, stat, writeFile } from "node:fs/promises";
3
3
  import { dirname, extname, join, relative, resolve } from "node:path";
4
+ import { z, z as z$1 } from "zod/v4";
4
5
  import { AsyncLocalStorage } from "node:async_hooks";
5
6
  import { Buffer as Buffer$1 } from "node:buffer";
6
7
  import { getCompositeKey } from "@ls-stack/utils/getCompositeKey";
7
- import { z } from "zod/v4";
8
8
  import { watch } from "chokidar";
9
9
  import { glob } from "glob";
10
10
  import { existsSync } from "node:fs";
@@ -70,6 +70,27 @@ function getCurrentScope() {
70
70
  function isInEvalScope() {
71
71
  return getCurrentScope() !== void 0;
72
72
  }
73
+ function isObjectLike(value) {
74
+ return typeof value === "object" && value !== null;
75
+ }
76
+ function isObjectRecord(value) {
77
+ return typeof value === "object" && value !== null && !Array.isArray(value);
78
+ }
79
+ function copyArray$1(value) {
80
+ return value.map((item) => item);
81
+ }
82
+ function getEvalCaseInput(path = void 0) {
83
+ const scope = getCurrentScope();
84
+ if (!scope) return void 0;
85
+ if (path === void 0) return scope.input;
86
+ if (path.length === 0) return void 0;
87
+ let current = scope.input;
88
+ for (const segment of path.split(".")) {
89
+ if (segment.length === 0 || !isObjectLike(current)) return;
90
+ current = current[segment];
91
+ }
92
+ return current;
93
+ }
73
94
  /**
74
95
  * Attach cache context (adapter, mode, eval id, fingerprint) to a scope.
75
96
  *
@@ -86,6 +107,7 @@ function setScopeCacheContext(scope, context) {
86
107
  async function runInEvalScope(caseId, fn, options = {}) {
87
108
  const scope = {
88
109
  caseId,
110
+ input: options.input,
89
111
  outputs: {},
90
112
  assertionFailures: [],
91
113
  spans: [],
@@ -145,6 +167,58 @@ function setEvalOutput(key, value) {
145
167
  });
146
168
  }
147
169
  /**
170
+ * Append an item to an output array in the current case scope.
171
+ *
172
+ * Missing values become `[value]`, existing arrays receive the item, and
173
+ * existing scalar/object values are preserved as `[existing, value]`.
174
+ */
175
+ function appendToEvalOutput(key, value) {
176
+ const scope = getCurrentScope();
177
+ if (!scope) return;
178
+ const existing = scope.outputs[key];
179
+ if (existing === void 0) scope.outputs[key] = [value];
180
+ else if (Array.isArray(existing)) scope.outputs[key] = [...copyArray$1(existing), value];
181
+ else scope.outputs[key] = [existing, value];
182
+ recordOpIfActive(scope, {
183
+ kind: "appendOutput",
184
+ key,
185
+ value
186
+ });
187
+ }
188
+ /**
189
+ * Shallow-merge object fields into an output value in the current case scope.
190
+ *
191
+ * Missing values become a copy of `patch`. Non-object existing values are
192
+ * recorded as assertion failures instead of being replaced.
193
+ */
194
+ function mergeEvalOutput(key, patch) {
195
+ const scope = getCurrentScope();
196
+ if (!scope) return;
197
+ const existing = scope.outputs[key];
198
+ if (existing === void 0) {
199
+ scope.outputs[key] = { ...patch };
200
+ recordOpIfActive(scope, {
201
+ kind: "mergeOutput",
202
+ key,
203
+ patch
204
+ });
205
+ return;
206
+ }
207
+ if (!isObjectRecord(existing)) {
208
+ scope.assertionFailures.push(toAssertionFailure$1(`mergeEvalOutput("${key}"): existing value is ${Array.isArray(existing) ? "array" : typeof existing}, expected object`));
209
+ return;
210
+ }
211
+ scope.outputs[key] = {
212
+ ...existing,
213
+ ...patch
214
+ };
215
+ recordOpIfActive(scope, {
216
+ kind: "mergeOutput",
217
+ key,
218
+ patch
219
+ });
220
+ }
221
+ /**
148
222
  * Add a numeric delta to an output value in the current case scope.
149
223
  *
150
224
  * If the existing value is non-numeric, the operation is recorded as an
@@ -189,18 +263,267 @@ function evalAssert(condition, message) {
189
263
  throw error;
190
264
  }
191
265
  //#endregion
192
- //#region ../sdk/src/tracer.ts
193
- let spanIdCounter = 0;
266
+ //#region ../sdk/src/cacheKey.ts
267
+ var SerializedCacheKeyValue = class {
268
+ value;
269
+ constructor(value) {
270
+ this.value = value;
271
+ }
272
+ };
273
+ /**
274
+ * Hash the components of a cache key into a deterministic hex digest.
275
+ *
276
+ * Native `Blob` and `File` values are read asynchronously and hashed by
277
+ * content. Use `hashCacheKeySync` only when the key contains no async values.
278
+ */
279
+ async function hashCacheKey(input) {
280
+ return hashCacheKeySyncMaterialized(await materializeAsyncCacheKeyValue(input));
281
+ }
282
+ /**
283
+ * Synchronously hash cache key components. This supports JSON-like data and
284
+ * in-memory binary values such as `Buffer`, `ArrayBuffer`, and typed arrays,
285
+ * but cannot content-hash native `Blob` or `File` values.
286
+ */
287
+ function hashCacheKeySync(input) {
288
+ return hashCacheKeySyncMaterialized(input);
289
+ }
290
+ function hashCacheKeySyncMaterialized(input) {
291
+ return createHash("sha256").update(getCompositeKey(input, { stringify: stringifyCacheKeyValue })).digest("hex");
292
+ }
293
+ function stringifyCacheKeyValue(value) {
294
+ if (value instanceof SerializedCacheKeyValue) return value.value;
295
+ if (Buffer$1.isBuffer(value)) return `$buffer:${hashBytes(value)}`;
296
+ if (isArrayBuffer(value)) return `$arrayBuffer:${hashBytes(new Uint8Array(value))}`;
297
+ if (isSharedArrayBuffer(value)) return `$sharedArrayBuffer:${hashBytes(new Uint8Array(value))}`;
298
+ if (isArrayBufferView(value)) {
299
+ const bytes = new Uint8Array(value.buffer, value.byteOffset, value.byteLength);
300
+ return `$${value.constructor.name}:${hashBytes(bytes)}`;
301
+ }
302
+ if (isFile$1(value)) return `$file:${getCompositeKey({
303
+ lastModified: value.lastModified,
304
+ name: value.name,
305
+ size: value.size,
306
+ type: value.type
307
+ })}`;
308
+ if (isBlob$1(value)) return `$blob:${getCompositeKey({
309
+ size: value.size,
310
+ type: value.type
311
+ })}`;
312
+ }
313
+ async function materializeAsyncCacheKeyValue(value, refs = /* @__PURE__ */ new WeakSet()) {
314
+ const serialized = await stringifyAsyncCacheKeyValue(value);
315
+ if (serialized !== void 0) return new SerializedCacheKeyValue(serialized);
316
+ if (stringifyCacheKeyValue(value) !== void 0) return value;
317
+ if (!value || typeof value !== "object") return value;
318
+ if (Array.isArray(value)) {
319
+ const items = [];
320
+ for (const item of value) items.push(await materializeAsyncCacheKeyValue(item, refs));
321
+ return items;
322
+ }
323
+ if (refs.has(value)) throw new Error("Circular reference detected");
324
+ refs.add(value);
325
+ const entries = [];
326
+ for (const [key, entryValue] of Object.entries(value)) entries.push([key, await materializeAsyncCacheKeyValue(entryValue, refs)]);
327
+ refs.delete(value);
328
+ return Object.fromEntries(entries);
329
+ }
330
+ async function stringifyAsyncCacheKeyValue(value) {
331
+ if (isFile$1(value)) return `$file:${getCompositeKey({
332
+ bytes: await hashBlobBytes(value),
333
+ lastModified: value.lastModified,
334
+ name: value.name,
335
+ size: value.size,
336
+ type: value.type
337
+ })}`;
338
+ if (isBlob$1(value)) return `$blob:${getCompositeKey({
339
+ bytes: await hashBlobBytes(value),
340
+ size: value.size,
341
+ type: value.type
342
+ })}`;
343
+ }
344
+ async function hashBlobBytes(value) {
345
+ return hashBytes(new Uint8Array(await value.arrayBuffer()));
346
+ }
347
+ function hashBytes(value) {
348
+ return createHash("sha256").update(value).digest("hex");
349
+ }
350
+ function isArrayBuffer(value) {
351
+ return value instanceof ArrayBuffer;
352
+ }
353
+ function isSharedArrayBuffer(value) {
354
+ return value instanceof SharedArrayBuffer;
355
+ }
356
+ function isArrayBufferView(value) {
357
+ return ArrayBuffer.isView(value);
358
+ }
359
+ function isBlob$1(value) {
360
+ return value instanceof Blob;
361
+ }
362
+ function isFile$1(value) {
363
+ return value instanceof File;
364
+ }
365
+ function toJsonSafe(value) {
366
+ if (value === void 0) return void 0;
367
+ const text = JSON.stringify(value);
368
+ return JSON.parse(text);
369
+ }
370
+ //#endregion
371
+ //#region ../sdk/src/cacheRecording.ts
372
+ function mergeSpanAttributes$1(span, attributes) {
373
+ span.attributes = {
374
+ ...span.attributes,
375
+ ...attributes
376
+ };
377
+ }
378
+ function isRecordLike$1(value) {
379
+ return typeof value === "object" && value !== null && !Array.isArray(value);
380
+ }
381
+ function valueKind$1(value) {
382
+ return Array.isArray(value) ? "array" : typeof value;
383
+ }
384
+ function copyArray(value) {
385
+ return value.map((item) => item);
386
+ }
387
+ function stripCacheAttributes(attributes) {
388
+ if (!attributes) return {};
389
+ const result = {};
390
+ for (const [key, value] of Object.entries(attributes)) if (!key.startsWith("cache.")) result[key] = value;
391
+ return result;
392
+ }
393
+ function snapshotNonCacheAttributes(span) {
394
+ const snapshot = toJsonSafe(stripCacheAttributes(span?.attributes));
395
+ return isRecordLike$1(snapshot) ? snapshot : {};
396
+ }
397
+ function diffNonCacheAttributes(before, after) {
398
+ const result = {};
399
+ for (const [key, value] of Object.entries(after)) if (!cacheAttributeValuesEqual(before[key], value)) result[key] = value;
400
+ return result;
401
+ }
402
+ function cacheAttributeValuesEqual(left, right) {
403
+ if (Object.is(left, right)) return true;
404
+ try {
405
+ return JSON.stringify(left) === JSON.stringify(right);
406
+ } catch {
407
+ return false;
408
+ }
409
+ }
410
+ function appendCacheRef(span, ref) {
411
+ if (span === void 0) return;
412
+ const existing = span.attributes?.["cache.refs"];
413
+ mergeSpanAttributes$1(span, { "cache.refs": [...Array.isArray(existing) ? copyArray(existing) : [], ref] });
414
+ }
415
+ function serializeSubSpanTree(scope, spanId) {
416
+ const original = scope.spans.find((s) => s.id === spanId);
417
+ if (!original) return {
418
+ kind: "custom",
419
+ name: "unknown",
420
+ attributes: void 0,
421
+ status: "ok",
422
+ error: void 0,
423
+ errors: void 0,
424
+ warning: void 0,
425
+ warnings: void 0,
426
+ children: []
427
+ };
428
+ const children = scope.spans.filter((s) => s.parentId === spanId).map((child) => serializeSubSpanTree(scope, child.id));
429
+ return {
430
+ kind: original.kind,
431
+ name: original.name,
432
+ attributes: original.attributes,
433
+ status: original.status,
434
+ error: original.error,
435
+ errors: original.errors,
436
+ warning: original.warning,
437
+ warnings: original.warnings,
438
+ children
439
+ };
440
+ }
441
+ function appendSubSpanOps(scope, frame) {
442
+ for (let i = frame.baseSpanIndex; i < scope.spans.length; i++) {
443
+ const candidate = scope.spans[i];
444
+ if (candidate?.parentId === frame.replayParentSpanId) frame.ops.push({
445
+ kind: "subSpan",
446
+ span: serializeSubSpanTree(scope, candidate.id)
447
+ });
448
+ }
449
+ }
450
+ function replayRecording(scope, parentSpan, recording, options) {
451
+ scope.replayingDepth++;
452
+ try {
453
+ for (const op of recording.ops) applyRecordingOp(scope, parentSpan, op, options);
454
+ if (parentSpan !== void 0 && Object.keys(recording.finalAttributes).length > 0) mergeSpanAttributes$1(parentSpan, recording.finalAttributes);
455
+ if (parentSpan !== void 0 && recording.finalError !== void 0) parentSpan.error = recording.finalError;
456
+ if (parentSpan !== void 0 && recording.finalErrors !== void 0) parentSpan.errors = recording.finalErrors;
457
+ if (parentSpan !== void 0 && recording.finalWarning !== void 0) parentSpan.warning = recording.finalWarning;
458
+ if (parentSpan !== void 0 && recording.finalWarnings !== void 0) parentSpan.warnings = recording.finalWarnings;
459
+ } finally {
460
+ scope.replayingDepth--;
461
+ }
462
+ }
463
+ function applyRecordingOp(scope, parentSpan, op, options) {
464
+ if (op.kind === "setOutput") {
465
+ scope.outputs[op.key] = op.value;
466
+ return;
467
+ }
468
+ if (op.kind === "appendOutput") {
469
+ const existing = scope.outputs[op.key];
470
+ if (existing === void 0) scope.outputs[op.key] = [op.value];
471
+ else if (Array.isArray(existing)) scope.outputs[op.key] = [...copyArray(existing), op.value];
472
+ else scope.outputs[op.key] = [existing, op.value];
473
+ return;
474
+ }
475
+ if (op.kind === "mergeOutput") {
476
+ const existing = scope.outputs[op.key];
477
+ if (existing === void 0) scope.outputs[op.key] = { ...op.patch };
478
+ else if (isRecordLike$1(existing)) scope.outputs[op.key] = {
479
+ ...existing,
480
+ ...op.patch
481
+ };
482
+ else scope.assertionFailures.push({ message: `replay mergeEvalOutput("${op.key}"): existing value is ${valueKind$1(existing)}, expected object` });
483
+ return;
484
+ }
485
+ if (op.kind === "incrementOutput") {
486
+ const existing = scope.outputs[op.key];
487
+ if (existing === void 0) scope.outputs[op.key] = op.delta;
488
+ else if (typeof existing === "number") scope.outputs[op.key] = existing + op.delta;
489
+ else scope.assertionFailures.push({ message: `replay incrementEvalOutput("${op.key}"): existing value is ${valueKind$1(existing)}, expected number` });
490
+ return;
491
+ }
492
+ if (op.kind === "checkpoint") {
493
+ scope.checkpoints.set(op.name, op.data);
494
+ return;
495
+ }
496
+ replaySerializedSpan(scope, parentSpan?.id ?? null, op.span, options);
497
+ }
498
+ function replaySerializedSpan(scope, parentId, serialized, options) {
499
+ const id = options.generateSpanId();
500
+ const now = (/* @__PURE__ */ new Date()).toISOString();
501
+ const replayed = {
502
+ id,
503
+ parentId,
504
+ caseId: scope.caseId,
505
+ kind: serialized.kind,
506
+ name: serialized.name,
507
+ startedAt: now,
508
+ endedAt: now,
509
+ status: serialized.status,
510
+ attributes: serialized.attributes,
511
+ error: serialized.error,
512
+ errors: serialized.errors,
513
+ warning: serialized.warning,
514
+ warnings: serialized.warnings
515
+ };
516
+ scope.spans.push(replayed);
517
+ for (const child of serialized.children) replaySerializedSpan(scope, id, child, options);
518
+ }
519
+ //#endregion
520
+ //#region ../sdk/src/traceDiagnostics.ts
194
521
  const errorCoreFields = new Set([
195
522
  "name",
196
523
  "message",
197
524
  "stack",
198
525
  "capturedAt"
199
526
  ]);
200
- function generateSpanId() {
201
- spanIdCounter++;
202
- return `span_${String(Date.now())}_${String(spanIdCounter)}`;
203
- }
204
527
  function isRecord$2(value) {
205
528
  return typeof value === "object" && value !== null && !Array.isArray(value);
206
529
  }
@@ -221,33 +544,6 @@ function formatUnknownErrorMessage(error) {
221
544
  function getErrorExtraFields(error) {
222
545
  return Object.fromEntries(Object.entries(error).filter(([key]) => !errorCoreFields.has(key)));
223
546
  }
224
- function updateCurrentSpan(update) {
225
- const currentSpan = getCurrentScope()?.activeSpanStack.at(-1);
226
- if (!currentSpan) return;
227
- update(currentSpan);
228
- }
229
- function noopActiveSpan() {
230
- return {
231
- setName() {},
232
- setAttribute() {},
233
- setAttributes() {}
234
- };
235
- }
236
- function noopExternalSpan(id) {
237
- return {
238
- id,
239
- setName() {},
240
- setAttribute() {},
241
- setAttributes() {},
242
- end() {}
243
- };
244
- }
245
- function mergeSpanAttributes(span, attributes) {
246
- span.attributes = {
247
- ...span.attributes,
248
- ...attributes
249
- };
250
- }
251
547
  function normalizeTraceError(error, capturedAt = void 0) {
252
548
  if (error instanceof Error) return {
253
549
  ...getErrorExtraFields(error),
@@ -277,6 +573,34 @@ function normalizeTraceError(error, capturedAt = void 0) {
277
573
  function normalizeTraceErrors(errorOrErrors, additionalErrors, capturedAt) {
278
574
  return (additionalErrors.length > 0 ? [errorOrErrors, ...additionalErrors] : Array.isArray(errorOrErrors) ? errorOrErrors : [errorOrErrors]).map((error) => normalizeTraceError(error, capturedAt));
279
575
  }
576
+ function normalizeTraceWarnings(warningOrWarnings, additionalWarnings, capturedAt) {
577
+ return (additionalWarnings.length > 0 ? [warningOrWarnings, ...additionalWarnings] : Array.isArray(warningOrWarnings) ? warningOrWarnings : [warningOrWarnings]).map((warning) => normalizeTraceError(warning, capturedAt));
578
+ }
579
+ function isCaptureEvalSpanErrorOptions(value) {
580
+ if (!isRecord$2(value)) return false;
581
+ const keys = Object.keys(value);
582
+ if (keys.length === 0) return false;
583
+ if (!keys.every((key) => key === "level")) return false;
584
+ return value.level === void 0 || isCaptureEvalSpanErrorLevel(value.level);
585
+ }
586
+ function isCaptureEvalSpanErrorLevel(value) {
587
+ return value === "error" || value === "warning";
588
+ }
589
+ function splitCaptureEvalSpanErrorArgs(additionalErrorsOrOptions) {
590
+ const lastArg = additionalErrorsOrOptions.at(-1);
591
+ if (isCaptureEvalSpanErrorLevel(lastArg)) return {
592
+ additionalErrors: additionalErrorsOrOptions.slice(0, -1),
593
+ options: { level: lastArg }
594
+ };
595
+ if (isCaptureEvalSpanErrorOptions(lastArg)) return {
596
+ additionalErrors: additionalErrorsOrOptions.slice(0, -1),
597
+ options: lastArg
598
+ };
599
+ return {
600
+ additionalErrors: additionalErrorsOrOptions,
601
+ options: {}
602
+ };
603
+ }
280
604
  function appendSpanErrors(span, errors) {
281
605
  if (errors.length === 0) return;
282
606
  const latestError = errors.at(-1);
@@ -285,8 +609,194 @@ function appendSpanErrors(span, errors) {
285
609
  span.error = latestError;
286
610
  span.status = "error";
287
611
  }
288
- function hasSpanError(span) {
289
- return span.error !== void 0 || (span.errors?.length ?? 0) > 0;
612
+ function appendSpanWarnings(span, warnings) {
613
+ if (warnings.length === 0) return;
614
+ const latestWarning = warnings.at(-1);
615
+ if (latestWarning === void 0) return;
616
+ span.warnings = [...span.warnings ?? [], ...warnings];
617
+ span.warning = latestWarning;
618
+ }
619
+ function hasSpanError(span) {
620
+ return span.error !== void 0 || (span.errors?.length ?? 0) > 0;
621
+ }
622
+ //#endregion
623
+ //#region ../sdk/src/valueCache.ts
624
+ function createTraceCache(generateSpanId) {
625
+ return async function traceCache(info, fn) {
626
+ const scope = getCurrentScope();
627
+ if (!scope) return await fn();
628
+ const cacheCtx = scope.cacheContext;
629
+ if (cacheCtx === void 0 || scope.replayingDepth > 0) return await fn();
630
+ const namespace = info.namespace ?? `${cacheCtx.evalId}__${info.name}`;
631
+ const keyHash = await hashCacheKey({
632
+ namespace,
633
+ codeFingerprint: cacheCtx.codeFingerprint,
634
+ key: info.key
635
+ });
636
+ const activeSpan = scope.activeSpanStack.at(-1);
637
+ if (cacheCtx.mode === "use") {
638
+ const hit = await cacheCtx.adapter.lookup(namespace, keyHash);
639
+ if (hit) {
640
+ const storedAt = hit.storedAt;
641
+ const age = Date.now() - new Date(storedAt).getTime();
642
+ appendCacheRef(activeSpan, {
643
+ type: "value",
644
+ name: info.name,
645
+ namespace,
646
+ key: keyHash,
647
+ status: "hit",
648
+ storedAt,
649
+ age
650
+ });
651
+ replayRecording(scope, activeSpan, hit.recording, { generateSpanId });
652
+ return hit.recording.returnValue;
653
+ }
654
+ appendCacheRef(activeSpan, {
655
+ type: "value",
656
+ name: info.name,
657
+ namespace,
658
+ key: keyHash,
659
+ status: "miss"
660
+ });
661
+ } else if (cacheCtx.mode === "refresh") appendCacheRef(activeSpan, {
662
+ type: "value",
663
+ name: info.name,
664
+ namespace,
665
+ key: keyHash,
666
+ status: "refresh"
667
+ });
668
+ else appendCacheRef(activeSpan, {
669
+ type: "value",
670
+ name: info.name,
671
+ namespace,
672
+ key: keyHash,
673
+ status: "bypass"
674
+ });
675
+ const beforeAttributes = snapshotNonCacheAttributes(activeSpan);
676
+ const frame = {
677
+ baseSpanIndex: scope.spans.length,
678
+ replayParentSpanId: activeSpan?.id ?? null,
679
+ ops: []
680
+ };
681
+ scope.recordingStack.push(frame);
682
+ let bodyResult;
683
+ try {
684
+ bodyResult = await fn();
685
+ } finally {
686
+ scope.recordingStack.pop();
687
+ }
688
+ appendSubSpanOps(scope, frame);
689
+ if (cacheCtx.mode !== "bypass") {
690
+ const finalAttributes = diffNonCacheAttributes(beforeAttributes, snapshotNonCacheAttributes(activeSpan));
691
+ const recording = {
692
+ returnValue: toJsonSafe(bodyResult),
693
+ finalAttributes,
694
+ ops: frame.ops
695
+ };
696
+ await cacheCtx.adapter.write({
697
+ version: 1,
698
+ key: keyHash,
699
+ namespace,
700
+ operationType: "value",
701
+ operationName: info.name,
702
+ storedAt: (/* @__PURE__ */ new Date()).toISOString(),
703
+ codeFingerprint: cacheCtx.codeFingerprint,
704
+ recording
705
+ });
706
+ }
707
+ return bodyResult;
708
+ };
709
+ }
710
+ //#endregion
711
+ //#region ../sdk/src/tracer.ts
712
+ let spanIdCounter = 0;
713
+ function generateSpanId() {
714
+ spanIdCounter++;
715
+ return `span_${String(Date.now())}_${String(spanIdCounter)}`;
716
+ }
717
+ function updateCurrentSpan(update) {
718
+ const currentSpan = getCurrentScope()?.activeSpanStack.at(-1);
719
+ if (!currentSpan) return;
720
+ update(currentSpan);
721
+ }
722
+ function noopActiveSpan() {
723
+ return {
724
+ setName() {},
725
+ setAttribute() {},
726
+ setAttributes() {},
727
+ incrementAttribute() {},
728
+ appendToAttribute() {},
729
+ mergeAttribute() {}
730
+ };
731
+ }
732
+ function noopExternalSpan(id) {
733
+ return {
734
+ id,
735
+ setName() {},
736
+ setAttribute() {},
737
+ setAttributes() {},
738
+ incrementAttribute() {},
739
+ appendToAttribute() {},
740
+ mergeAttribute() {},
741
+ end() {}
742
+ };
743
+ }
744
+ function mergeSpanAttributes(span, attributes) {
745
+ span.attributes = {
746
+ ...span.attributes,
747
+ ...attributes
748
+ };
749
+ }
750
+ function isRecordLike(value) {
751
+ return typeof value === "object" && value !== null && !Array.isArray(value);
752
+ }
753
+ function valueKind(value) {
754
+ return Array.isArray(value) ? "array" : typeof value;
755
+ }
756
+ function recordSpanAttributeAssertion(message) {
757
+ const scope = getCurrentScope();
758
+ if (!scope) return;
759
+ scope.assertionFailures.push({ message });
760
+ }
761
+ function incrementSpanAttribute(span, key, delta) {
762
+ const existing = span.attributes?.[key];
763
+ if (existing === void 0) {
764
+ mergeSpanAttributes(span, { [key]: delta });
765
+ return;
766
+ }
767
+ if (typeof existing !== "number") {
768
+ recordSpanAttributeAssertion(`evalSpan.incrementAttribute("${key}"): existing value is ${valueKind(existing)}, expected number`);
769
+ return;
770
+ }
771
+ mergeSpanAttributes(span, { [key]: existing + delta });
772
+ }
773
+ function appendToSpanAttribute(span, key, value) {
774
+ const existing = span.attributes?.[key];
775
+ if (existing === void 0) {
776
+ mergeSpanAttributes(span, { [key]: [value] });
777
+ return;
778
+ }
779
+ if (Array.isArray(existing)) {
780
+ const items = existing.map((item) => item);
781
+ mergeSpanAttributes(span, { [key]: [...items, value] });
782
+ return;
783
+ }
784
+ mergeSpanAttributes(span, { [key]: [existing, value] });
785
+ }
786
+ function mergeSpanAttribute(span, key, patch) {
787
+ const existing = span.attributes?.[key];
788
+ if (existing === void 0) {
789
+ mergeSpanAttributes(span, { [key]: { ...patch } });
790
+ return;
791
+ }
792
+ if (!isRecordLike(existing)) {
793
+ recordSpanAttributeAssertion(`evalSpan.mergeAttribute("${key}"): existing value is ${valueKind(existing)}, expected object`);
794
+ return;
795
+ }
796
+ mergeSpanAttributes(span, { [key]: {
797
+ ...existing,
798
+ ...patch
799
+ } });
290
800
  }
291
801
  function finishSpanWithoutThrownError(span) {
292
802
  span.status = hasSpanError(span) ? "error" : "ok";
@@ -302,9 +812,25 @@ function createSpanHandle(span) {
302
812
  },
303
813
  setAttributes(value) {
304
814
  mergeSpanAttributes(span, value);
815
+ },
816
+ incrementAttribute(key, delta) {
817
+ incrementSpanAttribute(span, key, delta);
818
+ },
819
+ appendToAttribute(key, value) {
820
+ appendToSpanAttribute(span, key, value);
821
+ },
822
+ mergeAttribute(key, patch) {
823
+ mergeSpanAttribute(span, key, patch);
305
824
  }
306
825
  };
307
826
  }
827
+ function updateExternalSpanRecord(id, update) {
828
+ const scope = getCurrentScope();
829
+ if (!scope) return;
830
+ const span = findSpan(scope, id);
831
+ if (!span) return;
832
+ update(span);
833
+ }
308
834
  function createExternalSpanHandle(id) {
309
835
  return {
310
836
  id,
@@ -326,6 +852,21 @@ function createExternalSpanHandle(id) {
326
852
  attributes: value
327
853
  });
328
854
  },
855
+ incrementAttribute(key, delta) {
856
+ updateExternalSpanRecord(id, (span) => {
857
+ incrementSpanAttribute(span, key, delta);
858
+ });
859
+ },
860
+ appendToAttribute(key, value) {
861
+ updateExternalSpanRecord(id, (span) => {
862
+ appendToSpanAttribute(span, key, value);
863
+ });
864
+ },
865
+ mergeAttribute(key, patch) {
866
+ updateExternalSpanRecord(id, (span) => {
867
+ mergeSpanAttribute(span, key, patch);
868
+ });
869
+ },
329
870
  end(info = {}) {
330
871
  endExternalSpan({
331
872
  ...info,
@@ -382,6 +923,8 @@ function updateExternalSpan(info) {
382
923
  if (info.name !== void 0) span.name = info.name;
383
924
  if (info.status !== void 0) span.status = info.status;
384
925
  if (info.error !== void 0) span.error = info.error;
926
+ if (info.warning !== void 0) span.warning = info.warning;
927
+ if (info.warnings !== void 0) span.warnings = info.warnings;
385
928
  if (info.attributes !== void 0) mergeSpanAttributes(span, info.attributes);
386
929
  }
387
930
  function endExternalSpan(info) {
@@ -410,6 +953,8 @@ function recordExternalSpan(info) {
410
953
  existing.status = status;
411
954
  existing.attributes = info.attributes;
412
955
  existing.error = info.error;
956
+ existing.warning = info.warning;
957
+ existing.warnings = info.warnings;
413
958
  return id;
414
959
  }
415
960
  scope.spans.push({
@@ -422,7 +967,9 @@ function recordExternalSpan(info) {
422
967
  endedAt,
423
968
  status,
424
969
  attributes: info.attributes,
425
- error: info.error
970
+ error: info.error,
971
+ warning: info.warning,
972
+ warnings: info.warnings
426
973
  });
427
974
  return id;
428
975
  }
@@ -446,16 +993,42 @@ const evalSpan = {
446
993
  updateCurrentSpan((currentSpan) => {
447
994
  mergeSpanAttributes(currentSpan, value);
448
995
  });
996
+ },
997
+ incrementAttribute(key, delta) {
998
+ updateCurrentSpan((currentSpan) => {
999
+ incrementSpanAttribute(currentSpan, key, delta);
1000
+ });
1001
+ },
1002
+ appendToAttribute(key, value) {
1003
+ updateCurrentSpan((currentSpan) => {
1004
+ appendToSpanAttribute(currentSpan, key, value);
1005
+ });
1006
+ },
1007
+ mergeAttribute(key, patch) {
1008
+ updateCurrentSpan((currentSpan) => {
1009
+ mergeSpanAttribute(currentSpan, key, patch);
1010
+ });
449
1011
  }
450
1012
  };
451
1013
  /**
452
1014
  * Attach one or more recoverable errors to the active eval span.
453
1015
  *
454
- * The active span is marked as `error` even if its callback later completes
455
- * without throwing. Calls outside `evalTracer.span(...)` are ignored.
1016
+ * By default the active span is marked as `error` even if its callback later
1017
+ * completes without throwing. Pass `'warning'` or `{ level: 'warning' }` as the
1018
+ * final argument to record the diagnostic without changing span status. Calls
1019
+ * outside `evalTracer.span(...)` are ignored.
456
1020
  */
457
- function captureEvalSpanError(errorOrErrors, ...additionalErrors) {
458
- const errors = normalizeTraceErrors(errorOrErrors, additionalErrors, (/* @__PURE__ */ new Date()).toISOString());
1021
+ function captureEvalSpanError(errorOrErrors, ...additionalErrorsOrOptions) {
1022
+ const { additionalErrors, options } = splitCaptureEvalSpanErrorArgs(additionalErrorsOrOptions);
1023
+ const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
1024
+ if ((options.level ?? "error") === "warning") {
1025
+ const warnings = normalizeTraceWarnings(errorOrErrors, additionalErrors, capturedAt);
1026
+ updateCurrentSpan((currentSpan) => {
1027
+ appendSpanWarnings(currentSpan, warnings);
1028
+ });
1029
+ return;
1030
+ }
1031
+ const errors = normalizeTraceErrors(errorOrErrors, additionalErrors, capturedAt);
459
1032
  updateCurrentSpan((currentSpan) => {
460
1033
  appendSpanErrors(currentSpan, errors);
461
1034
  });
@@ -503,7 +1076,7 @@ async function traceSpan(info, fn) {
503
1076
  "cache.storedAt": storedAt,
504
1077
  "cache.age": Date.now() - new Date(storedAt).getTime()
505
1078
  });
506
- replayRecording(scope, spanRecord, hit.recording);
1079
+ replayRecording(scope, spanRecord, hit.recording, { generateSpanId });
507
1080
  spanRecord.status = hit.recording.finalStatus ?? (hasSpanError(spanRecord) ? "error" : "ok");
508
1081
  spanRecord.endedAt = (/* @__PURE__ */ new Date()).toISOString();
509
1082
  return hit.recording.returnValue;
@@ -513,7 +1086,7 @@ async function traceSpan(info, fn) {
513
1086
  else mergeSpanAttributes(spanRecord, { "cache.status": "bypass" });
514
1087
  const frame = {
515
1088
  baseSpanIndex: scope.spans.length,
516
- cachedSpanId: id,
1089
+ replayParentSpanId: id,
517
1090
  ops: []
518
1091
  };
519
1092
  scope.recordingStack.push(frame);
@@ -532,12 +1105,16 @@ async function traceSpan(info, fn) {
532
1105
  finalStatus: spanRecord.status,
533
1106
  finalError: spanRecord.error,
534
1107
  finalErrors: spanRecord.errors,
1108
+ finalWarning: spanRecord.warning,
1109
+ finalWarnings: spanRecord.warnings,
535
1110
  ops: frame.ops
536
1111
  };
537
1112
  const entry = {
538
1113
  version: 1,
539
1114
  key: keyHash,
540
1115
  namespace,
1116
+ operationType: "span",
1117
+ operationName: info.name,
541
1118
  spanName: info.name,
542
1119
  spanKind: info.kind,
543
1120
  storedAt: (/* @__PURE__ */ new Date()).toISOString(),
@@ -569,6 +1146,13 @@ const evalTracer = {
569
1146
  /** Run a callback inside a new trace span and record its lifecycle. */
570
1147
  span: traceSpan,
571
1148
  /**
1149
+ * Cache a pure value without creating a trace span.
1150
+ *
1151
+ * When called inside an active span, the span receives a `cache.refs` entry
1152
+ * describing the value cache status for this run.
1153
+ */
1154
+ cache: createTraceCache(generateSpanId),
1155
+ /**
572
1156
  * Start a span whose lifecycle is controlled by an external tracer/exporter.
573
1157
  *
574
1158
  * Calls are no-ops outside an eval case scope, except that a generated or
@@ -649,194 +1233,6 @@ function buildTraceTree(spans, checkpoints) {
649
1233
  checkpoints
650
1234
  };
651
1235
  }
652
- var SerializedCacheKeyValue = class {
653
- value;
654
- constructor(value) {
655
- this.value = value;
656
- }
657
- };
658
- /**
659
- * Hash the components of a cache key into a deterministic hex digest.
660
- *
661
- * Native `Blob` and `File` values are read asynchronously and hashed by
662
- * content. Use `hashCacheKeySync` only when the key contains no async values.
663
- */
664
- async function hashCacheKey(input) {
665
- return hashCacheKeySyncMaterialized(await materializeAsyncCacheKeyValue(input));
666
- }
667
- /**
668
- * Synchronously hash cache key components. This supports JSON-like data and
669
- * in-memory binary values such as `Buffer`, `ArrayBuffer`, and typed arrays,
670
- * but cannot content-hash native `Blob` or `File` values.
671
- */
672
- function hashCacheKeySync(input) {
673
- return hashCacheKeySyncMaterialized(input);
674
- }
675
- function hashCacheKeySyncMaterialized(input) {
676
- return createHash("sha256").update(getCompositeKey(input, { stringify: stringifyCacheKeyValue })).digest("hex");
677
- }
678
- function stringifyCacheKeyValue(value) {
679
- if (value instanceof SerializedCacheKeyValue) return value.value;
680
- if (Buffer$1.isBuffer(value)) return `$buffer:${hashBytes(value)}`;
681
- if (isArrayBuffer(value)) return `$arrayBuffer:${hashBytes(new Uint8Array(value))}`;
682
- if (isSharedArrayBuffer(value)) return `$sharedArrayBuffer:${hashBytes(new Uint8Array(value))}`;
683
- if (isArrayBufferView(value)) {
684
- const bytes = new Uint8Array(value.buffer, value.byteOffset, value.byteLength);
685
- return `$${value.constructor.name}:${hashBytes(bytes)}`;
686
- }
687
- if (isFile$1(value)) return `$file:${getCompositeKey({
688
- lastModified: value.lastModified,
689
- name: value.name,
690
- size: value.size,
691
- type: value.type
692
- })}`;
693
- if (isBlob$1(value)) return `$blob:${getCompositeKey({
694
- size: value.size,
695
- type: value.type
696
- })}`;
697
- }
698
- async function materializeAsyncCacheKeyValue(value, refs = /* @__PURE__ */ new WeakSet()) {
699
- const serialized = await stringifyAsyncCacheKeyValue(value);
700
- if (serialized !== void 0) return new SerializedCacheKeyValue(serialized);
701
- if (stringifyCacheKeyValue(value) !== void 0) return value;
702
- if (!value || typeof value !== "object") return value;
703
- if (Array.isArray(value)) {
704
- const items = [];
705
- for (const item of value) items.push(await materializeAsyncCacheKeyValue(item, refs));
706
- return items;
707
- }
708
- if (refs.has(value)) throw new Error("Circular reference detected");
709
- refs.add(value);
710
- const entries = [];
711
- for (const [key, entryValue] of Object.entries(value)) entries.push([key, await materializeAsyncCacheKeyValue(entryValue, refs)]);
712
- refs.delete(value);
713
- return Object.fromEntries(entries);
714
- }
715
- async function stringifyAsyncCacheKeyValue(value) {
716
- if (isFile$1(value)) return `$file:${getCompositeKey({
717
- bytes: await hashBlobBytes(value),
718
- lastModified: value.lastModified,
719
- name: value.name,
720
- size: value.size,
721
- type: value.type
722
- })}`;
723
- if (isBlob$1(value)) return `$blob:${getCompositeKey({
724
- bytes: await hashBlobBytes(value),
725
- size: value.size,
726
- type: value.type
727
- })}`;
728
- }
729
- async function hashBlobBytes(value) {
730
- return hashBytes(new Uint8Array(await value.arrayBuffer()));
731
- }
732
- function hashBytes(value) {
733
- return createHash("sha256").update(value).digest("hex");
734
- }
735
- function isArrayBuffer(value) {
736
- return value instanceof ArrayBuffer;
737
- }
738
- function isSharedArrayBuffer(value) {
739
- return value instanceof SharedArrayBuffer;
740
- }
741
- function isArrayBufferView(value) {
742
- return ArrayBuffer.isView(value);
743
- }
744
- function isBlob$1(value) {
745
- return value instanceof Blob;
746
- }
747
- function isFile$1(value) {
748
- return value instanceof File;
749
- }
750
- function toJsonSafe(value) {
751
- if (value === void 0) return void 0;
752
- const text = JSON.stringify(value);
753
- return JSON.parse(text);
754
- }
755
- function stripCacheAttributes(attributes) {
756
- if (!attributes) return {};
757
- const result = {};
758
- for (const [key, value] of Object.entries(attributes)) if (!key.startsWith("cache.")) result[key] = value;
759
- return result;
760
- }
761
- function serializeSubSpanTree(scope, spanId) {
762
- const original = scope.spans.find((s) => s.id === spanId);
763
- if (!original) return {
764
- kind: "custom",
765
- name: "unknown",
766
- attributes: void 0,
767
- status: "ok",
768
- error: void 0,
769
- errors: void 0,
770
- children: []
771
- };
772
- const children = scope.spans.filter((s) => s.parentId === spanId).map((child) => serializeSubSpanTree(scope, child.id));
773
- return {
774
- kind: original.kind,
775
- name: original.name,
776
- attributes: original.attributes,
777
- status: original.status,
778
- error: original.error,
779
- errors: original.errors,
780
- children
781
- };
782
- }
783
- function appendSubSpanOps(scope, frame) {
784
- for (let i = frame.baseSpanIndex; i < scope.spans.length; i++) {
785
- const candidate = scope.spans[i];
786
- if (candidate?.parentId === frame.cachedSpanId) frame.ops.push({
787
- kind: "subSpan",
788
- span: serializeSubSpanTree(scope, candidate.id)
789
- });
790
- }
791
- }
792
- function replayRecording(scope, parentSpan, recording) {
793
- scope.replayingDepth++;
794
- try {
795
- for (const op of recording.ops) applyRecordingOp(scope, parentSpan, op);
796
- if (Object.keys(recording.finalAttributes).length > 0) mergeSpanAttributes(parentSpan, recording.finalAttributes);
797
- if (recording.finalError !== void 0) parentSpan.error = recording.finalError;
798
- if (recording.finalErrors !== void 0) parentSpan.errors = recording.finalErrors;
799
- } finally {
800
- scope.replayingDepth--;
801
- }
802
- }
803
- function applyRecordingOp(scope, parentSpan, op) {
804
- if (op.kind === "setOutput") {
805
- scope.outputs[op.key] = op.value;
806
- return;
807
- }
808
- if (op.kind === "incrementOutput") {
809
- const existing = scope.outputs[op.key];
810
- if (existing === void 0) scope.outputs[op.key] = op.delta;
811
- else if (typeof existing === "number") scope.outputs[op.key] = existing + op.delta;
812
- else scope.assertionFailures.push({ message: `replay incrementEvalOutput("${op.key}"): existing value is ${typeof existing}, expected number` });
813
- return;
814
- }
815
- if (op.kind === "checkpoint") {
816
- scope.checkpoints.set(op.name, op.data);
817
- return;
818
- }
819
- replaySerializedSpan(scope, parentSpan.id, op.span);
820
- }
821
- function replaySerializedSpan(scope, parentId, serialized) {
822
- const id = generateSpanId();
823
- const now = (/* @__PURE__ */ new Date()).toISOString();
824
- const replayed = {
825
- id,
826
- parentId,
827
- caseId: scope.caseId,
828
- kind: serialized.kind,
829
- name: serialized.name,
830
- startedAt: now,
831
- endedAt: now,
832
- status: serialized.status,
833
- attributes: serialized.attributes,
834
- error: serialized.error,
835
- errors: serialized.errors
836
- };
837
- scope.spans.push(replayed);
838
- for (const child of serialized.children) replaySerializedSpan(scope, id, child);
839
- }
840
1236
  //#endregion
841
1237
  //#region ../shared/src/schemas/display.ts
842
1238
  const scalarCellSchema = z.union([
@@ -976,6 +1372,8 @@ const traceSpanErrorSchema = z.object({
976
1372
  stack: z.string().optional(),
977
1373
  capturedAt: z.string().optional()
978
1374
  }).catchall(z.unknown());
1375
+ /** Schema for a warning attached to a trace span. */
1376
+ const traceSpanWarningSchema = traceSpanErrorSchema;
979
1377
  /** Schema for a persisted trace span captured during case execution. */
980
1378
  const traceSpanSchema = z.object({
981
1379
  id: z.string(),
@@ -993,7 +1391,9 @@ const traceSpanSchema = z.object({
993
1391
  ]),
994
1392
  attributes: z.record(z.string(), z.unknown()).optional(),
995
1393
  error: traceSpanErrorSchema.optional(),
996
- errors: z.array(traceSpanErrorSchema).optional()
1394
+ errors: z.array(traceSpanErrorSchema).optional(),
1395
+ warning: traceSpanWarningSchema.optional(),
1396
+ warnings: z.array(traceSpanWarningSchema).optional()
997
1397
  });
998
1398
  //#endregion
999
1399
  //#region ../shared/src/schemas/chart.ts
@@ -1254,12 +1654,16 @@ const spanCacheOptionsSchema = z.object({
1254
1654
  /** Override the default namespace (`${evalId}__${spanName}`). */
1255
1655
  namespace: z.string().optional()
1256
1656
  });
1657
+ /** Category of operation stored in the eval cache. */
1658
+ const cacheOperationTypeSchema = z.enum(["span", "value"]);
1257
1659
  /** Summary of a single persisted cache entry, used by list/delete endpoints. */
1258
1660
  const cacheListItemSchema = z.object({
1259
1661
  key: z.string(),
1260
1662
  namespace: z.string(),
1261
- spanName: z.string(),
1262
- spanKind: traceSpanKindSchema,
1663
+ operationType: cacheOperationTypeSchema,
1664
+ operationName: z.string(),
1665
+ spanName: z.string().optional(),
1666
+ spanKind: traceSpanKindSchema.optional(),
1263
1667
  storedAt: z.string(),
1264
1668
  codeFingerprint: z.string(),
1265
1669
  sizeBytes: z.number()
@@ -1276,7 +1680,9 @@ const serializedCacheSpanSchema = z.object({
1276
1680
  "cancelled"
1277
1681
  ]),
1278
1682
  error: traceSpanErrorSchema.optional(),
1279
- errors: z.array(traceSpanErrorSchema).optional()
1683
+ errors: z.array(traceSpanErrorSchema).optional(),
1684
+ warning: traceSpanWarningSchema.optional(),
1685
+ warnings: z.array(traceSpanWarningSchema).optional()
1280
1686
  }).extend({ children: z.lazy(() => z.array(serializedCacheSpanSchema)) });
1281
1687
  /**
1282
1688
  * One captured operation performed while a cached span's body executed.
@@ -1290,6 +1696,16 @@ const cacheRecordingOpSchema = z.discriminatedUnion("kind", [
1290
1696
  key: z.string(),
1291
1697
  value: z.unknown()
1292
1698
  }),
1699
+ z.object({
1700
+ kind: z.literal("appendOutput"),
1701
+ key: z.string(),
1702
+ value: z.unknown()
1703
+ }),
1704
+ z.object({
1705
+ kind: z.literal("mergeOutput"),
1706
+ key: z.string(),
1707
+ patch: z.record(z.string(), z.unknown())
1708
+ }),
1293
1709
  z.object({
1294
1710
  kind: z.literal("incrementOutput"),
1295
1711
  key: z.string(),
@@ -1317,6 +1733,8 @@ const cacheRecordingSchema = z.object({
1317
1733
  ]).optional(),
1318
1734
  finalError: traceSpanErrorSchema.optional(),
1319
1735
  finalErrors: z.array(traceSpanErrorSchema).optional(),
1736
+ finalWarning: traceSpanWarningSchema.optional(),
1737
+ finalWarnings: z.array(traceSpanWarningSchema).optional(),
1320
1738
  ops: z.array(cacheRecordingOpSchema)
1321
1739
  });
1322
1740
  /** Persisted cache file containing metadata and a recording. */
@@ -1324,8 +1742,10 @@ const cacheEntrySchema = z.object({
1324
1742
  version: z.literal(1),
1325
1743
  key: z.string(),
1326
1744
  namespace: z.string(),
1327
- spanName: z.string(),
1328
- spanKind: traceSpanKindSchema,
1745
+ operationType: cacheOperationTypeSchema.optional(),
1746
+ operationName: z.string().optional(),
1747
+ spanName: z.string().optional(),
1748
+ spanKind: traceSpanKindSchema.optional(),
1329
1749
  storedAt: z.string(),
1330
1750
  codeFingerprint: z.string(),
1331
1751
  recording: cacheRecordingSchema
@@ -1635,15 +2055,21 @@ function createFsCacheStore(options) {
1635
2055
  if (fileStatResult.error || !fileStatResult.value.isFile()) continue;
1636
2056
  const cacheFile = await readCacheFilePath(filePath);
1637
2057
  if (cacheFile === null) continue;
1638
- for (const entry of Object.values(cacheFile.entries)) items.push({
1639
- key: entry.key,
1640
- namespace: entry.namespace,
1641
- spanName: entry.spanName,
1642
- spanKind: entry.spanKind,
1643
- storedAt: entry.storedAt,
1644
- codeFingerprint: entry.codeFingerprint,
1645
- sizeBytes: Buffer.byteLength(JSON.stringify(entry), "utf8")
1646
- });
2058
+ for (const entry of Object.values(cacheFile.entries)) {
2059
+ const operationType = entry.operationType ?? "span";
2060
+ const operationName = entry.operationName ?? entry.spanName ?? entry.namespace;
2061
+ items.push({
2062
+ key: entry.key,
2063
+ namespace: entry.namespace,
2064
+ operationType,
2065
+ operationName,
2066
+ spanName: entry.spanName,
2067
+ spanKind: entry.spanKind,
2068
+ storedAt: entry.storedAt,
2069
+ codeFingerprint: entry.codeFingerprint,
2070
+ sizeBytes: Buffer.byteLength(JSON.stringify(entry), "utf8")
2071
+ });
2072
+ }
1647
2073
  }
1648
2074
  items.sort((a, b) => a.storedAt < b.storedAt ? 1 : -1);
1649
2075
  return items;
@@ -2534,12 +2960,15 @@ async function runCase(params) {
2534
2960
  input: evalCase.input,
2535
2961
  signal
2536
2962
  }]);
2537
- }, { cacheContext: cacheAdapter ? {
2538
- adapter: cacheAdapter,
2539
- mode: cacheMode,
2540
- evalId,
2541
- codeFingerprint
2542
- } : void 0 });
2963
+ }, {
2964
+ input: evalCase.input,
2965
+ cacheContext: cacheAdapter ? {
2966
+ adapter: cacheAdapter,
2967
+ mode: cacheMode,
2968
+ evalId,
2969
+ codeFingerprint
2970
+ } : void 0
2971
+ });
2543
2972
  const traceTree = buildTraceTree(scope.spans, scope.checkpoints);
2544
2973
  const nonAssertError = executeError && !(executeError instanceof EvalAssertionError) ? executeError : null;
2545
2974
  if (executeError instanceof EvalAssertionError && scope.assertionFailures.length === 0) scope.assertionFailures.push(toAssertionFailure(executeError.message, executeError));
@@ -2555,20 +2984,31 @@ async function runCase(params) {
2555
2984
  const message = `deriveFromTracing threw: ${e instanceof Error ? e.message : String(e)}`;
2556
2985
  scope.assertionFailures.push(toAssertionFailure(message, e instanceof Error ? e : void 0));
2557
2986
  }
2987
+ if (!nonAssertError && evalDef.outputsSchema) {
2988
+ const parsedOutputs = evalDef.outputsSchema.safeParse(getOutputsSchemaInput(evalDef.outputsSchema, scope.outputs));
2989
+ if (parsedOutputs.success) scope.outputs = {
2990
+ ...scope.outputs,
2991
+ ...parsedOutputs.data
2992
+ };
2993
+ else scope.assertionFailures.push(toAssertionFailure(formatOutputsSchemaError(parsedOutputs.error)));
2994
+ }
2558
2995
  const scoreResults = /* @__PURE__ */ new Map();
2559
2996
  const scoringTraces = {};
2560
- if (!nonAssertError && evalDef.scores) for (const [key, def] of Object.entries(evalDef.scores)) {
2997
+ if (!nonAssertError && scope.assertionFailures.length === 0 && evalDef.scores) for (const [key, def] of Object.entries(evalDef.scores)) {
2561
2998
  const { compute, passThreshold, label } = normalizeScoreDef(def);
2562
2999
  const scoreRun = await runInEvalScope(evalCase.id, async () => await callWithUnknownResult(compute, [{
2563
3000
  input: evalCase.input,
2564
3001
  outputs: { ...scope.outputs },
2565
3002
  case: evalCase
2566
- }]), { cacheContext: cacheAdapter ? {
2567
- adapter: cacheAdapter,
2568
- mode: cacheMode,
2569
- evalId: `${evalId}__score__${key}`,
2570
- codeFingerprint
2571
- } : void 0 });
3003
+ }]), {
3004
+ input: evalCase.input,
3005
+ cacheContext: cacheAdapter ? {
3006
+ adapter: cacheAdapter,
3007
+ mode: cacheMode,
3008
+ evalId: `${evalId}__score__${key}`,
3009
+ codeFingerprint
3010
+ } : void 0
3011
+ });
2572
3012
  const { trace, traceDisplay } = resolveTracePresentation(scoreRun.scope.spans, globalTraceDisplay, evalDef.traceDisplay);
2573
3013
  if (trace.length > 0) scoringTraces[key] = {
2574
3014
  trace,
@@ -2659,6 +3099,19 @@ function isRecord(value) {
2659
3099
  function isBlob(value) {
2660
3100
  return value instanceof Blob;
2661
3101
  }
3102
+ function getOutputsSchemaInput(schema, outputs) {
3103
+ if (!(schema instanceof z.ZodObject)) return outputs;
3104
+ const configuredOutputs = {};
3105
+ for (const key of Object.keys(schema.shape)) if (key in outputs) configuredOutputs[key] = outputs[key];
3106
+ return configuredOutputs;
3107
+ }
3108
+ function formatOutputsSchemaError(error) {
3109
+ const issueLines = error.issues.map((issue) => {
3110
+ return `${issue.path.length > 0 ? issue.path.join(".") : "<root>"}: ${issue.message}`;
3111
+ });
3112
+ if (issueLines.length === 0) return "outputsSchema validation failed";
3113
+ return `outputsSchema validation failed:\n${issueLines.join("\n")}`;
3114
+ }
2662
3115
  function toAssertionFailure(message, error = void 0) {
2663
3116
  return error?.stack ? {
2664
3117
  message,
@@ -3614,6 +4067,7 @@ function createRunner({ watchForChanges = true } = {}) {
3614
4067
  //#endregion
3615
4068
  //#region src/cli.ts
3616
4069
  function parseArgs(argv) {
4070
+ const normalizedArgv = argv.filter((arg) => arg !== "--no-env");
3617
4071
  const args = {
3618
4072
  command: "help",
3619
4073
  subcommand: void 0,
@@ -3627,9 +4081,10 @@ function parseArgs(argv) {
3627
4081
  port: 4100,
3628
4082
  cacheMode: "use",
3629
4083
  clearCache: false,
3630
- all: false
4084
+ all: false,
4085
+ loadEnv: normalizedArgv.length === argv.length
3631
4086
  };
3632
- const command = argv[0];
4087
+ const command = normalizedArgv[0];
3633
4088
  if (command === "--help" || command === "-h") {
3634
4089
  args.showHelp = true;
3635
4090
  return args;
@@ -3640,16 +4095,16 @@ function parseArgs(argv) {
3640
4095
  } else if (command !== void 0 && !command.startsWith("-")) args.unknownHelpTarget = command;
3641
4096
  let cursor = 1;
3642
4097
  if (args.command === "cache") {
3643
- const sub = argv[cursor];
4098
+ const sub = normalizedArgv[cursor];
3644
4099
  if (sub === "list" || sub === "clear") {
3645
4100
  args.subcommand = sub;
3646
4101
  args.helpTopic = `cache ${sub}`;
3647
4102
  cursor++;
3648
4103
  } else if (sub !== void 0 && !sub.startsWith("-")) args.unknownHelpTarget = `cache ${sub}`;
3649
4104
  }
3650
- for (let i = cursor; i < argv.length; i++) {
3651
- const arg = argv[i];
3652
- const next = argv[i + 1];
4105
+ for (let i = cursor; i < normalizedArgv.length; i++) {
4106
+ const arg = normalizedArgv[i];
4107
+ const next = normalizedArgv[i + 1];
3653
4108
  if (arg === "--help" || arg === "-h") args.showHelp = true;
3654
4109
  else if (arg === "--eval" && next) {
3655
4110
  args.evalIds.push(...next.split(","));
@@ -3681,6 +4136,10 @@ function parseArgs(argv) {
3681
4136
  */
3682
4137
  async function runCli(argv) {
3683
4138
  const args = parseArgs(argv);
4139
+ if (args.loadEnv && !loadWorkspaceEnv()) {
4140
+ process.exit(1);
4141
+ return;
4142
+ }
3684
4143
  if (args.showHelp) {
3685
4144
  if (args.unknownHelpTarget !== void 0) {
3686
4145
  console.error(`No help found for "${args.unknownHelpTarget}".`);
@@ -3711,6 +4170,18 @@ async function runCli(argv) {
3711
4170
  function isCliCommand(command) {
3712
4171
  return command === "app" || command === "list" || command === "run" || command === "cache" || command === "help";
3713
4172
  }
4173
+ function loadWorkspaceEnv() {
4174
+ const envPath = resolve(process.cwd(), ".env");
4175
+ if (!existsSync(envPath)) return true;
4176
+ const loadResult = resultify(() => {
4177
+ process.loadEnvFile(envPath);
4178
+ });
4179
+ if (loadResult.error) {
4180
+ console.error(`Failed to load .env at ${envPath}: ${loadResult.error.message}`);
4181
+ return false;
4182
+ }
4183
+ return true;
4184
+ }
3714
4185
  const currentDir = dirname(fileURLToPath(import.meta.url));
3715
4186
  const repoRoot = resolve(currentDir, "../../..");
3716
4187
  const pnpmCommand = process.platform === "win32" ? "pnpm.cmd" : "pnpm";
@@ -3759,8 +4230,8 @@ async function commandApp(args) {
3759
4230
  const { serve } = await import("@hono/node-server");
3760
4231
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
3761
4232
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
3762
- const appModule = await import("./app-C5CJ1sX6.mjs");
3763
- const runnerModule = await import("./runner-Cdlvk56X.mjs");
4233
+ const appModule = await import("./app-7qDBq_ub.mjs");
4234
+ const runnerModule = await import("./runner-uzzY8kk1.mjs");
3764
4235
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
3765
4236
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
3766
4237
  await runnerModule.initRunner();
@@ -3861,7 +4332,8 @@ async function commandCache(args) {
3861
4332
  for (const entry of entries) {
3862
4333
  console.info(` ${entry.namespace}`);
3863
4334
  console.info(` key: ${entry.key}`);
3864
- console.info(` span: ${entry.spanName} (${entry.spanKind})`);
4335
+ const operationLabel = entry.operationType === "span" ? `${entry.operationName} (span ${entry.spanKind ?? "unknown"})` : `${entry.operationName} (value)`;
4336
+ console.info(` operation: ${operationLabel}`);
3865
4337
  console.info(` stored: ${entry.storedAt}`);
3866
4338
  console.info(` size: ${String(entry.sizeBytes)} bytes`);
3867
4339
  console.info("");
@@ -3916,6 +4388,7 @@ Usage:
3916
4388
 
3917
4389
  Flags:
3918
4390
  --port <n> Server port (default: 4100)
4391
+ --no-env Disable automatic .env loading
3919
4392
  --help, -h Show this help
3920
4393
  `);
3921
4394
  return;
@@ -3928,6 +4401,7 @@ Usage:
3928
4401
  agent-evals list [flags]
3929
4402
 
3930
4403
  Flags:
4404
+ --no-env Disable automatic .env loading
3931
4405
  --help, -h Show this help
3932
4406
  `);
3933
4407
  return;
@@ -3948,6 +4422,7 @@ Flags:
3948
4422
  --no-cache Shortcut for --cache bypass
3949
4423
  --refresh-cache Shortcut for --cache refresh
3950
4424
  --clear-cache Clear the cache before starting the run
4425
+ --no-env Disable automatic .env loading
3951
4426
  --help, -h Show this help
3952
4427
  `);
3953
4428
  return;
@@ -3965,6 +4440,7 @@ Flags:
3965
4440
  --eval <id> Clear entries for specific eval(s) (comma-separated)
3966
4441
  --all Confirm clearing every cached entry
3967
4442
  --json Output cache listing as JSON
4443
+ --no-env Disable automatic .env loading
3968
4444
  --help, -h Show this help
3969
4445
  `);
3970
4446
  return;
@@ -3991,8 +4467,9 @@ Options:
3991
4467
  --no-cache Shortcut for --cache bypass
3992
4468
  --refresh-cache Shortcut for --cache refresh
3993
4469
  --clear-cache Clear the cache before starting the run
4470
+ --no-env Disable automatic .env loading
3994
4471
  --help, -h Show help
3995
4472
  `);
3996
4473
  }
3997
4474
  //#endregion
3998
- export { columnKindSchema as $, evalSummarySchema as A, evalChartsConfigSchema as B, assertionFailureSchema as C, evalStatAggregateSchema as D, evalFreshnessStatusSchema as E, evalChartColorSchema as F, traceDisplayConfigSchema as G, traceAttributeDisplayInputSchema as H, evalChartConfigSchema as I, traceSpanKindSchema as J, traceDisplayInputConfigSchema as K, evalChartMetricSchema as L, evalChartAggregateSchema as M, evalChartAxisSchema as N, evalStatItemSchema as O, evalChartBuiltinMetricSchema as P, columnFormatSchema as Q, evalChartTooltipExtraSchema as R, spanCacheOptionsSchema as S, caseRowSchema as T, traceAttributeDisplayPlacementSchema as U, traceAttributeDisplayFormatSchema as V, traceAttributeDisplaySchema as W, cellValueSchema as X, traceSpanSchema as Y, columnDefSchema as Z, cacheListItemSchema as _, setEvalOutput as _t, sseEnvelopeSchema as a, buildTraceTree as at, cacheRecordingSchema as b, defineEval as bt, deriveScopedSummaryFromCases as c, evalTracer as ct, runManifestSchema as d, EvalAssertionError as dt, fileRefSchema as et, runSummarySchema as f, evalAssert as ft, cacheFileSchema as g, runInEvalScope as gt, cacheEntrySchema as h, isInEvalScope as ht, updateManualScoreRequestSchema as i, runArtifactRefSchema as it, scoreTraceSchema as j, evalStatsConfigSchema as k, deriveStatusFromCaseRows as l, hashCacheKey as lt, trialSelectionModeSchema as m, incrementEvalOutput as mt, createRunner as n, numberDisplayOptionsSchema as nt, getEvalTitle as o, captureEvalSpanError as ot, agentEvalsConfigSchema as p, getCurrentScope as pt, traceSpanErrorSchema as q, createRunRequestSchema as r, repoFileRefSchema as rt, getEvalDisplayStatus as s, evalSpan as st, runCli as t, jsonCellSchema as tt, deriveStatusFromChildStatuses as u, hashCacheKeySync as ut, cacheModeSchema as v, setScopeCacheContext as vt, caseDetailSchema as w, serializedCacheSpanSchema as x, getEvalRegistry as xt, cacheRecordingOpSchema as y, repoFile as yt, evalChartTypeSchema as z };
4475
+ export { columnDefSchema as $, evalStatsConfigSchema as A, evalChartTypeSchema as B, spanCacheOptionsSchema as C, setEvalOutput as Ct, evalFreshnessStatusSchema as D, getEvalRegistry as Dt, caseRowSchema as E, defineEval as Et, evalChartBuiltinMetricSchema as F, traceAttributeDisplaySchema as G, traceAttributeDisplayFormatSchema as H, evalChartColorSchema as I, traceSpanErrorSchema as J, traceDisplayConfigSchema as K, evalChartConfigSchema as L, scoreTraceSchema as M, evalChartAggregateSchema as N, evalStatAggregateSchema as O, evalChartAxisSchema as P, cellValueSchema as Q, evalChartMetricSchema as R, serializedCacheSpanSchema as S, runInEvalScope as St, caseDetailSchema as T, repoFile as Tt, traceAttributeDisplayInputSchema as U, evalChartsConfigSchema as V, traceAttributeDisplayPlacementSchema as W, traceSpanSchema as X, traceSpanKindSchema as Y, traceSpanWarningSchema as Z, cacheListItemSchema as _, getCurrentScope as _t, sseEnvelopeSchema as a, repoFileRefSchema as at, cacheRecordingOpSchema as b, isInEvalScope as bt, deriveScopedSummaryFromCases as c, buildTraceTree as ct, runManifestSchema as d, evalTracer as dt, columnFormatSchema as et, runSummarySchema as f, hashCacheKey as ft, cacheFileSchema as g, evalAssert as gt, cacheEntrySchema as h, appendToEvalOutput as ht, updateManualScoreRequestSchema as i, numberDisplayOptionsSchema as it, evalSummarySchema as j, evalStatItemSchema as k, deriveStatusFromCaseRows as l, captureEvalSpanError as lt, trialSelectionModeSchema as m, EvalAssertionError as mt, createRunner as n, fileRefSchema as nt, getEvalTitle as o, runArtifactRefSchema as ot, agentEvalsConfigSchema as p, hashCacheKeySync as pt, traceDisplayInputConfigSchema as q, createRunRequestSchema as r, jsonCellSchema as rt, getEvalDisplayStatus as s, z$1 as st, runCli as t, columnKindSchema as tt, deriveStatusFromChildStatuses as u, evalSpan as ut, cacheModeSchema as v, getEvalCaseInput as vt, assertionFailureSchema as w, setScopeCacheContext as wt, cacheRecordingSchema as x, mergeEvalOutput as xt, cacheOperationTypeSchema as y, incrementEvalOutput as yt, evalChartTooltipExtraSchema as z };