@ls-stack/agent-eval 0.16.1 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -666,6 +666,46 @@ declare const assertionFailureSchema: z$1.ZodObject<{
666
666
  }, z$1.core.$strip>;
667
667
  /** Assertion failure metadata captured for one case run. */
668
668
  type AssertionFailure = z$1.infer<typeof assertionFailureSchema>;
669
+ /** Severity level for one log captured during a case run. */
670
+ declare const runLogLevelSchema: z$1.ZodEnum<{
671
+ error: "error";
672
+ log: "log";
673
+ info: "info";
674
+ warn: "warn";
675
+ }>;
676
+ /** Severity level for one log captured during a case run. */
677
+ type RunLogLevel = z$1.infer<typeof runLogLevelSchema>;
678
+ /** Eval runner phase that emitted a captured case log. */
679
+ declare const runLogPhaseSchema: z$1.ZodEnum<{
680
+ eval: "eval";
681
+ derive: "derive";
682
+ outputsSchema: "outputsSchema";
683
+ scorer: "scorer";
684
+ }>;
685
+ /** Eval runner phase that emitted a captured case log. */
686
+ type RunLogPhase = z$1.infer<typeof runLogPhaseSchema>;
687
+ /** Schema for one persisted log entry captured during a case run. */
688
+ declare const runLogEntrySchema: z$1.ZodObject<{
689
+ timestamp: z$1.ZodString;
690
+ level: z$1.ZodEnum<{
691
+ error: "error";
692
+ log: "log";
693
+ info: "info";
694
+ warn: "warn";
695
+ }>;
696
+ phase: z$1.ZodEnum<{
697
+ eval: "eval";
698
+ derive: "derive";
699
+ outputsSchema: "outputsSchema";
700
+ scorer: "scorer";
701
+ }>;
702
+ message: z$1.ZodString;
703
+ args: z$1.ZodDefault<z$1.ZodArray<z$1.ZodUnknown>>;
704
+ truncated: z$1.ZodDefault<z$1.ZodBoolean>;
705
+ source: z$1.ZodOptional<z$1.ZodString>;
706
+ }, z$1.core.$strip>;
707
+ /** Persisted log entry captured during a case run. */
708
+ type RunLogEntry = z$1.infer<typeof runLogEntrySchema>;
669
709
  /** Trace payload captured while computing one score for a case. */
670
710
  declare const scoreTraceSchema: z$1.ZodObject<{
671
711
  trace: z$1.ZodArray<z$1.ZodObject<{
@@ -907,6 +947,25 @@ declare const caseDetailSchema: z$1.ZodObject<{
907
947
  message: string;
908
948
  stack?: string | undefined;
909
949
  }, string>>]>>;
950
+ logs: z$1.ZodDefault<z$1.ZodArray<z$1.ZodObject<{
951
+ timestamp: z$1.ZodString;
952
+ level: z$1.ZodEnum<{
953
+ error: "error";
954
+ log: "log";
955
+ info: "info";
956
+ warn: "warn";
957
+ }>;
958
+ phase: z$1.ZodEnum<{
959
+ eval: "eval";
960
+ derive: "derive";
961
+ outputsSchema: "outputsSchema";
962
+ scorer: "scorer";
963
+ }>;
964
+ message: z$1.ZodString;
965
+ args: z$1.ZodDefault<z$1.ZodArray<z$1.ZodUnknown>>;
966
+ truncated: z$1.ZodDefault<z$1.ZodBoolean>;
967
+ source: z$1.ZodOptional<z$1.ZodString>;
968
+ }, z$1.core.$strip>>>;
910
969
  error: z$1.ZodNullable<z$1.ZodObject<{
911
970
  name: z$1.ZodOptional<z$1.ZodString>;
912
971
  message: z$1.ZodString;
@@ -1564,6 +1623,12 @@ declare const apiCallsConfigSchema: z$1.ZodObject<{
1564
1623
  }, z$1.core.$strip>;
1565
1624
  /** Authored API calls config accepted from `agent-evals.config.ts`. */
1566
1625
  type ApiCallsConfigInput = z$1.infer<typeof apiCallsConfigSchema>;
1626
+ /** Schema for workspace-level run log capture options. */
1627
+ declare const runLogsConfigSchema: z$1.ZodObject<{
1628
+ captureConsole: z$1.ZodOptional<z$1.ZodBoolean>;
1629
+ }, z$1.core.$strip>;
1630
+ /** Workspace-level run log capture options. */
1631
+ type RunLogsConfigInput = z$1.infer<typeof runLogsConfigSchema>;
1567
1632
  /** Resolved LLM-calls config sent to the UI with all defaults applied. */
1568
1633
  type ResolvedLlmCallsConfig = {
1569
1634
  kinds: string[];
@@ -1737,6 +1802,16 @@ type AgentEvalsConfig = {
1737
1802
  * ```
1738
1803
  */
1739
1804
  apiCalls?: ApiCallsConfigInput;
1805
+ /**
1806
+ * Configuration for case run logs.
1807
+ *
1808
+ * Console capture is enabled by default and stores `console.log`,
1809
+ * `console.info`, `console.warn`, and `console.error` calls made during
1810
+ * active case-owned phases. Set `captureConsole: false` to keep console
1811
+ * output visible in the terminal without persisting it to case details.
1812
+ * Manual `evalLog(...)` calls are still persisted.
1813
+ */
1814
+ runLogs?: RunLogsConfigInput;
1740
1815
  /**
1741
1816
  * Optional controls for the operation cache. When omitted, the cache is
1742
1817
  * enabled and stored under `<workspaceRoot>/.agent-evals/cache`.
@@ -1872,6 +1947,9 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
1872
1947
  }>>>;
1873
1948
  }, z$1.core.$strip>>>;
1874
1949
  }, z$1.core.$strip>>;
1950
+ runLogs: z$1.ZodOptional<z$1.ZodObject<{
1951
+ captureConsole: z$1.ZodOptional<z$1.ZodBoolean>;
1952
+ }, z$1.core.$strip>>;
1875
1953
  cache: z$1.ZodOptional<z$1.ZodObject<{
1876
1954
  enabled: z$1.ZodOptional<z$1.ZodBoolean>;
1877
1955
  dir: z$1.ZodOptional<z$1.ZodString>;
@@ -2247,6 +2325,110 @@ declare const cacheEntrySchema: z$1.ZodObject<{
2247
2325
  }, z$1.core.$strip>;
2248
2326
  /** Persisted cache file contents. */
2249
2327
  type CacheEntry = z$1.infer<typeof cacheEntrySchema>;
2328
+ /** Debug-only raw key metadata stored outside the reusable cache entry. */
2329
+ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
2330
+ version: z$1.ZodLiteral<1>;
2331
+ key: z$1.ZodString;
2332
+ namespace: z$1.ZodString;
2333
+ operationType: z$1.ZodEnum<{
2334
+ span: "span";
2335
+ value: "value";
2336
+ }>;
2337
+ operationName: z$1.ZodString;
2338
+ storedAt: z$1.ZodString;
2339
+ codeFingerprint: z$1.ZodString;
2340
+ rawKey: z$1.ZodUnknown;
2341
+ }, z$1.core.$strip>;
2342
+ /** Debug-only raw cache key entry. May contain sensitive prompt/input data. */
2343
+ type CacheDebugKeyEntry = z$1.infer<typeof cacheDebugKeyEntrySchema>;
2344
+ /** Cache lookup response with optional debug-only raw key data. */
2345
+ declare const cacheEntryWithDebugKeySchema: z$1.ZodObject<{
2346
+ version: z$1.ZodLiteral<1>;
2347
+ key: z$1.ZodString;
2348
+ namespace: z$1.ZodString;
2349
+ operationType: z$1.ZodOptional<z$1.ZodEnum<{
2350
+ span: "span";
2351
+ value: "value";
2352
+ }>>;
2353
+ operationName: z$1.ZodOptional<z$1.ZodString>;
2354
+ spanName: z$1.ZodOptional<z$1.ZodString>;
2355
+ spanKind: z$1.ZodOptional<z$1.ZodString>;
2356
+ storedAt: z$1.ZodString;
2357
+ codeFingerprint: z$1.ZodString;
2358
+ recording: z$1.ZodObject<{
2359
+ returnValue: z$1.ZodUnknown;
2360
+ finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
2361
+ finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
2362
+ error: "error";
2363
+ running: "running";
2364
+ ok: "ok";
2365
+ cancelled: "cancelled";
2366
+ }>>;
2367
+ finalError: z$1.ZodOptional<z$1.ZodObject<{
2368
+ name: z$1.ZodOptional<z$1.ZodString>;
2369
+ message: z$1.ZodString;
2370
+ stack: z$1.ZodOptional<z$1.ZodString>;
2371
+ capturedAt: z$1.ZodOptional<z$1.ZodString>;
2372
+ }, z$1.core.$catchall<z$1.ZodUnknown>>>;
2373
+ finalErrors: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
2374
+ name: z$1.ZodOptional<z$1.ZodString>;
2375
+ message: z$1.ZodString;
2376
+ stack: z$1.ZodOptional<z$1.ZodString>;
2377
+ capturedAt: z$1.ZodOptional<z$1.ZodString>;
2378
+ }, z$1.core.$catchall<z$1.ZodUnknown>>>>;
2379
+ finalWarning: z$1.ZodOptional<z$1.ZodObject<{
2380
+ name: z$1.ZodOptional<z$1.ZodString>;
2381
+ message: z$1.ZodString;
2382
+ stack: z$1.ZodOptional<z$1.ZodString>;
2383
+ capturedAt: z$1.ZodOptional<z$1.ZodString>;
2384
+ }, z$1.core.$catchall<z$1.ZodUnknown>>>;
2385
+ finalWarnings: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
2386
+ name: z$1.ZodOptional<z$1.ZodString>;
2387
+ message: z$1.ZodString;
2388
+ stack: z$1.ZodOptional<z$1.ZodString>;
2389
+ capturedAt: z$1.ZodOptional<z$1.ZodString>;
2390
+ }, z$1.core.$catchall<z$1.ZodUnknown>>>>;
2391
+ ops: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2392
+ kind: z$1.ZodLiteral<"setOutput">;
2393
+ key: z$1.ZodString;
2394
+ value: z$1.ZodUnknown;
2395
+ }, z$1.core.$strip>, z$1.ZodObject<{
2396
+ kind: z$1.ZodLiteral<"appendOutput">;
2397
+ key: z$1.ZodString;
2398
+ value: z$1.ZodUnknown;
2399
+ }, z$1.core.$strip>, z$1.ZodObject<{
2400
+ kind: z$1.ZodLiteral<"mergeOutput">;
2401
+ key: z$1.ZodString;
2402
+ patch: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
2403
+ }, z$1.core.$strip>, z$1.ZodObject<{
2404
+ kind: z$1.ZodLiteral<"incrementOutput">;
2405
+ key: z$1.ZodString;
2406
+ delta: z$1.ZodNumber;
2407
+ }, z$1.core.$strip>, z$1.ZodObject<{
2408
+ kind: z$1.ZodLiteral<"checkpoint">;
2409
+ name: z$1.ZodString;
2410
+ data: z$1.ZodUnknown;
2411
+ }, z$1.core.$strip>, z$1.ZodObject<{
2412
+ kind: z$1.ZodLiteral<"subSpan">;
2413
+ span: z$1.ZodType<SerializedCacheSpan, unknown, z$1.core.$ZodTypeInternals<SerializedCacheSpan, unknown>>;
2414
+ }, z$1.core.$strip>], "kind">>;
2415
+ }, z$1.core.$strip>;
2416
+ debugKey: z$1.ZodOptional<z$1.ZodObject<{
2417
+ version: z$1.ZodLiteral<1>;
2418
+ key: z$1.ZodString;
2419
+ namespace: z$1.ZodString;
2420
+ operationType: z$1.ZodEnum<{
2421
+ span: "span";
2422
+ value: "value";
2423
+ }>;
2424
+ operationName: z$1.ZodString;
2425
+ storedAt: z$1.ZodString;
2426
+ codeFingerprint: z$1.ZodString;
2427
+ rawKey: z$1.ZodUnknown;
2428
+ }, z$1.core.$strip>>;
2429
+ }, z$1.core.$strip>;
2430
+ /** Cache lookup response returned by cache APIs when raw-key debug data exists. */
2431
+ type CacheEntryWithDebugKey = z$1.infer<typeof cacheEntryWithDebugKeySchema>;
2250
2432
  /** Persisted per-owner cache file containing multiple cache entries. */
2251
2433
  declare const cacheFileSchema: z$1.ZodObject<{
2252
2434
  version: z$1.ZodLiteral<1>;
@@ -2326,6 +2508,26 @@ declare const cacheFileSchema: z$1.ZodObject<{
2326
2508
  }, z$1.core.$strip>;
2327
2509
  /** Persisted per-owner cache file contents. */
2328
2510
  type CacheFile = z$1.infer<typeof cacheFileSchema>;
2511
+ /** Persisted per-owner debug file containing raw cache key metadata. */
2512
+ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
2513
+ version: z$1.ZodLiteral<1>;
2514
+ owner: z$1.ZodString;
2515
+ entries: z$1.ZodRecord<z$1.ZodString, z$1.ZodObject<{
2516
+ version: z$1.ZodLiteral<1>;
2517
+ key: z$1.ZodString;
2518
+ namespace: z$1.ZodString;
2519
+ operationType: z$1.ZodEnum<{
2520
+ span: "span";
2521
+ value: "value";
2522
+ }>;
2523
+ operationName: z$1.ZodString;
2524
+ storedAt: z$1.ZodString;
2525
+ codeFingerprint: z$1.ZodString;
2526
+ rawKey: z$1.ZodUnknown;
2527
+ }, z$1.core.$strip>>;
2528
+ }, z$1.core.$strip>;
2529
+ /** Persisted per-owner raw cache key debug file contents. */
2530
+ type CacheDebugKeyFile = z$1.infer<typeof cacheDebugKeyFileSchema>;
2329
2531
  //#endregion
2330
2532
  //#region ../shared/src/utils/extractCacheHits.d.ts
2331
2533
  /**
@@ -2673,6 +2875,19 @@ declare function defineEval<TInput = unknown, TOutputs extends EvalOutputs = Eva
2673
2875
  declare function repoFile(path: string, mimeType?: string): RepoFileRef;
2674
2876
  //#endregion
2675
2877
  //#region ../sdk/src/runtime.d.ts
2878
+ /**
2879
+ * Raw-key debug payload passed alongside cache writes.
2880
+ *
2881
+ * `rawKey` may include prompt text, user input, or other sensitive material.
2882
+ * Runners store it outside the reusable cache so projects can gitignore the
2883
+ * debug folder while keeping hash-only cache entries shareable.
2884
+ */
2885
+ type CacheDebugKeyWrite = {
2886
+ rawKey: unknown;
2887
+ operationType: CacheOperationType;
2888
+ operationName: string;
2889
+ codeFingerprint: string;
2890
+ };
2676
2891
  /**
2677
2892
  * Adapter used by the SDK to read and write cache entries.
2678
2893
  *
@@ -2680,8 +2895,15 @@ declare function repoFile(path: string, mimeType?: string): RepoFileRef;
2680
2895
  * starts executing.
2681
2896
  */
2682
2897
  type CacheAdapter = {
2683
- /** Return the stored entry for `keyHash` under `namespace`, or `null`. */lookup(namespace: string, keyHash: string): Promise<CacheEntry | null>; /** Persist a cache entry. Must be safe under concurrent calls. */
2684
- write(entry: CacheEntry): Promise<void>;
2898
+ /** Return the stored entry for `keyHash` under `namespace`, or `null`. */lookup(namespace: string, keyHash: string): Promise<CacheEntry | null>;
2899
+ /**
2900
+ * Persist a cache entry. Must be safe under concurrent calls.
2901
+ *
2902
+ * `debugKey` is optional and contains the authored raw key value for
2903
+ * debugging. It may contain sensitive prompt/input data and should be stored
2904
+ * separately from reusable cache files.
2905
+ */
2906
+ write(entry: CacheEntry, debugKey?: CacheDebugKeyWrite): Promise<void>;
2685
2907
  };
2686
2908
  /** Runner-supplied cache context attached to an eval case scope. */
2687
2909
  type CacheScopeContext = {
@@ -2703,7 +2925,8 @@ type EvalCaseScope = {
2703
2925
  nextEvalIdCounter: number; /** Authored input for the current case, when provided by the runner. */
2704
2926
  input?: unknown;
2705
2927
  outputs: Record<string, unknown>; /** Structured assertion failures recorded for the current case. */
2706
- assertionFailures: AssertionFailure[];
2928
+ assertionFailures: AssertionFailure[]; /** Logs captured from manual `evalLog(...)` calls and enabled console calls. */
2929
+ logs: RunLogEntry[];
2707
2930
  spans: EvalTraceSpan[];
2708
2931
  checkpoints: Map<string, unknown>;
2709
2932
  spanStack: string[];
@@ -2735,6 +2958,7 @@ type EvalCaseScope = {
2735
2958
  * modules imported while a run is being prepared.
2736
2959
  */
2737
2960
  type EvalRuntimeScope = 'env' | 'cases' | 'eval' | 'derive' | 'outputsSchema' | 'scorer';
2961
+ type EvalLogLevelInput = RunLogLevel | 'warning';
2738
2962
  /** Error thrown when an eval assertion fails during case execution. */
2739
2963
  declare class EvalAssertionError extends Error {
2740
2964
  constructor(message: string);
@@ -2750,6 +2974,14 @@ declare function getCurrentScope(): EvalCaseScope | undefined;
2750
2974
  * while validating outputs, and `scorer` while computing scores.
2751
2975
  */
2752
2976
  declare function isInEvalScope(): EvalRuntimeScope | null;
2977
+ /**
2978
+ * Record a manual log entry on the active eval case.
2979
+ *
2980
+ * Values are formatted with Node-style console formatting and capped before
2981
+ * persistence so a single log cannot make run artifacts unbounded. Calls made
2982
+ * outside active case-owned eval phases are ignored.
2983
+ */
2984
+ declare function evalLog(level: EvalLogLevelInput, ...args: unknown[]): void;
2753
2985
  /**
2754
2986
  * Register background work that should settle before eval finalization.
2755
2987
  *
@@ -2845,6 +3077,352 @@ declare function incrementEvalOutput(key: string, delta: number): void;
2845
3077
  */
2846
3078
  declare function evalAssert(condition: boolean, message: string): void;
2847
3079
  //#endregion
3080
+ //#region ../../node_modules/.pnpm/seroval@1.5.2/node_modules/seroval/dist/types/core/constants.d.ts
3081
+ declare const enum SerovalConstant {
3082
+ Null = 0,
3083
+ Undefined = 1,
3084
+ True = 2,
3085
+ False = 3,
3086
+ NegZero = 4,
3087
+ Inf = 5,
3088
+ NegInf = 6,
3089
+ Nan = 7
3090
+ }
3091
+ declare const enum SerovalNodeType {
3092
+ Number = 0,
3093
+ String = 1,
3094
+ Constant = 2,
3095
+ BigInt = 3,
3096
+ IndexedValue = 4,
3097
+ Date = 5,
3098
+ RegExp = 6,
3099
+ Set = 7,
3100
+ Map = 8,
3101
+ Array = 9,
3102
+ Object = 10,
3103
+ NullConstructor = 11,
3104
+ Promise = 12,
3105
+ Error = 13,
3106
+ AggregateError = 14,
3107
+ TypedArray = 15,
3108
+ BigIntTypedArray = 16,
3109
+ WKSymbol = 17,
3110
+ Reference = 18,
3111
+ ArrayBuffer = 19,
3112
+ DataView = 20,
3113
+ Boxed = 21,
3114
+ PromiseConstructor = 22,
3115
+ PromiseSuccess = 23,
3116
+ PromiseFailure = 24,
3117
+ Plugin = 25,
3118
+ SpecialReference = 26,
3119
+ IteratorFactory = 27,
3120
+ IteratorFactoryInstance = 28,
3121
+ AsyncIteratorFactory = 29,
3122
+ AsyncIteratorFactoryInstance = 30,
3123
+ StreamConstructor = 31,
3124
+ StreamNext = 32,
3125
+ StreamThrow = 33,
3126
+ StreamReturn = 34,
3127
+ Sequence = 35
3128
+ }
3129
+ declare const enum SerovalObjectFlags {
3130
+ None = 0,
3131
+ NonExtensible = 1,
3132
+ Sealed = 2,
3133
+ Frozen = 3
3134
+ }
3135
+ declare const enum Symbols {
3136
+ AsyncIterator = 0,
3137
+ HasInstance = 1,
3138
+ IsConcatSpreadable = 2,
3139
+ Iterator = 3,
3140
+ Match = 4,
3141
+ MatchAll = 5,
3142
+ Replace = 6,
3143
+ Search = 7,
3144
+ Species = 8,
3145
+ Split = 9,
3146
+ ToPrimitive = 10,
3147
+ ToStringTag = 11,
3148
+ Unscopables = 12
3149
+ }
3150
+ declare const enum ErrorConstructorTag {
3151
+ Error = 0,
3152
+ EvalError = 1,
3153
+ RangeError = 2,
3154
+ ReferenceError = 3,
3155
+ SyntaxError = 4,
3156
+ TypeError = 5,
3157
+ URIError = 6
3158
+ }
3159
+ //#endregion
3160
+ //#region ../../node_modules/.pnpm/seroval@1.5.2/node_modules/seroval/dist/types/core/special-reference.d.ts
3161
+ declare const enum SpecialReference {
3162
+ MapSentinel = 0,
3163
+ PromiseConstructor = 1,
3164
+ PromiseSuccess = 2,
3165
+ PromiseFailure = 3,
3166
+ StreamConstructor = 4,
3167
+ ArrayBufferConstructor = 5
3168
+ }
3169
+ //#endregion
3170
+ //#region ../../node_modules/.pnpm/seroval@1.5.2/node_modules/seroval/dist/types/core/types.d.ts
3171
+ interface SerovalBaseNode {
3172
+ t: SerovalNodeType;
3173
+ i: number | undefined;
3174
+ s: unknown;
3175
+ c: string | undefined;
3176
+ m: string | undefined;
3177
+ p: SerovalObjectRecordNode | undefined;
3178
+ e: SerovalMapRecordNode | undefined;
3179
+ a: (SerovalNode | 0)[] | undefined;
3180
+ f: SerovalNode | undefined;
3181
+ b: number | undefined;
3182
+ o: SerovalObjectFlags | undefined;
3183
+ l: number | undefined;
3184
+ }
3185
+ type SerovalObjectRecordKey = string | SerovalNode;
3186
+ interface SerovalObjectRecordNode {
3187
+ k: SerovalObjectRecordKey[];
3188
+ v: SerovalNode[];
3189
+ }
3190
+ interface SerovalMapRecordNode {
3191
+ k: SerovalNode[];
3192
+ v: SerovalNode[];
3193
+ }
3194
+ interface SerovalNumberNode extends SerovalBaseNode {
3195
+ t: SerovalNodeType.Number;
3196
+ s: number;
3197
+ }
3198
+ interface SerovalStringNode extends SerovalBaseNode {
3199
+ t: SerovalNodeType.String;
3200
+ s: string;
3201
+ }
3202
+ interface SerovalConstantNode extends SerovalBaseNode {
3203
+ t: SerovalNodeType.Constant;
3204
+ s: SerovalConstant;
3205
+ }
3206
+ type SerovalPrimitiveNode = SerovalNumberNode | SerovalStringNode | SerovalConstantNode;
3207
+ interface SerovalIndexedValueNode extends SerovalBaseNode {
3208
+ t: SerovalNodeType.IndexedValue;
3209
+ i: number;
3210
+ }
3211
+ interface SerovalBigIntNode extends SerovalBaseNode {
3212
+ t: SerovalNodeType.BigInt;
3213
+ s: string;
3214
+ }
3215
+ interface SerovalDateNode extends SerovalBaseNode {
3216
+ t: SerovalNodeType.Date;
3217
+ i: number;
3218
+ s: string;
3219
+ }
3220
+ interface SerovalRegExpNode extends SerovalBaseNode {
3221
+ t: SerovalNodeType.RegExp;
3222
+ i: number;
3223
+ c: string;
3224
+ m: string;
3225
+ }
3226
+ interface SerovalArrayBufferNode extends SerovalBaseNode {
3227
+ t: SerovalNodeType.ArrayBuffer;
3228
+ i: number;
3229
+ s: string;
3230
+ f: SerovalNodeWithID;
3231
+ }
3232
+ interface SerovalTypedArrayNode extends SerovalBaseNode {
3233
+ t: SerovalNodeType.TypedArray;
3234
+ i: number;
3235
+ c: string;
3236
+ f: SerovalNode;
3237
+ b: number;
3238
+ l: number;
3239
+ }
3240
+ interface SerovalBigIntTypedArrayNode extends SerovalBaseNode {
3241
+ t: SerovalNodeType.BigIntTypedArray;
3242
+ i: number;
3243
+ c: string;
3244
+ f: SerovalNode;
3245
+ b: number;
3246
+ l: number;
3247
+ }
3248
+ type SerovalSemiPrimitiveNode = SerovalBigIntNode | SerovalDateNode | SerovalRegExpNode | SerovalTypedArrayNode | SerovalBigIntTypedArrayNode;
3249
+ interface SerovalSetNode extends SerovalBaseNode {
3250
+ t: SerovalNodeType.Set;
3251
+ i: number;
3252
+ a: SerovalNode[];
3253
+ }
3254
+ interface SerovalMapNode extends SerovalBaseNode {
3255
+ t: SerovalNodeType.Map;
3256
+ i: number;
3257
+ e: SerovalMapRecordNode;
3258
+ f: SerovalNodeWithID;
3259
+ }
3260
+ interface SerovalArrayNode extends SerovalBaseNode {
3261
+ t: SerovalNodeType.Array;
3262
+ a: (SerovalNode | 0)[];
3263
+ i: number;
3264
+ o: SerovalObjectFlags;
3265
+ }
3266
+ interface SerovalObjectNode extends SerovalBaseNode {
3267
+ t: SerovalNodeType.Object;
3268
+ p: SerovalObjectRecordNode;
3269
+ i: number;
3270
+ o: SerovalObjectFlags;
3271
+ }
3272
+ interface SerovalNullConstructorNode extends SerovalBaseNode {
3273
+ t: SerovalNodeType.NullConstructor;
3274
+ p: SerovalObjectRecordNode;
3275
+ i: number;
3276
+ o: SerovalObjectFlags;
3277
+ }
3278
+ interface SerovalPromiseNode extends SerovalBaseNode {
3279
+ t: SerovalNodeType.Promise;
3280
+ s: 0 | 1;
3281
+ f: SerovalNode;
3282
+ i: number;
3283
+ }
3284
+ interface SerovalErrorNode extends SerovalBaseNode {
3285
+ t: SerovalNodeType.Error;
3286
+ s: ErrorConstructorTag;
3287
+ m: string;
3288
+ p: SerovalObjectRecordNode | undefined;
3289
+ i: number;
3290
+ }
3291
+ interface SerovalAggregateErrorNode extends SerovalBaseNode {
3292
+ t: SerovalNodeType.AggregateError;
3293
+ i: number;
3294
+ m: string;
3295
+ p: SerovalObjectRecordNode | undefined;
3296
+ }
3297
+ interface SerovalWKSymbolNode extends SerovalBaseNode {
3298
+ t: SerovalNodeType.WKSymbol;
3299
+ i: number;
3300
+ s: Symbols;
3301
+ }
3302
+ interface SerovalReferenceNode extends SerovalBaseNode {
3303
+ t: SerovalNodeType.Reference;
3304
+ i: number;
3305
+ s: string;
3306
+ }
3307
+ interface SerovalDataViewNode extends SerovalBaseNode {
3308
+ t: SerovalNodeType.DataView;
3309
+ i: number;
3310
+ f: SerovalNode;
3311
+ b: number;
3312
+ l: number;
3313
+ }
3314
+ interface SerovalBoxedNode extends SerovalBaseNode {
3315
+ t: SerovalNodeType.Boxed;
3316
+ i: number;
3317
+ f: SerovalNode;
3318
+ }
3319
+ interface SerovalPromiseConstructorNode extends SerovalBaseNode {
3320
+ t: SerovalNodeType.PromiseConstructor;
3321
+ i: number;
3322
+ s: number;
3323
+ f: SerovalNodeWithID;
3324
+ }
3325
+ interface SerovalPromiseResolveNode extends SerovalBaseNode {
3326
+ t: SerovalNodeType.PromiseSuccess;
3327
+ i: number;
3328
+ a: [resolver: SerovalNodeWithID, resolved: SerovalNode];
3329
+ }
3330
+ interface SerovalPromiseRejectNode extends SerovalBaseNode {
3331
+ t: SerovalNodeType.PromiseFailure;
3332
+ i: number;
3333
+ a: [resolver: SerovalNodeWithID, resolved: SerovalNode];
3334
+ }
3335
+ interface SerovalPluginNode extends SerovalBaseNode {
3336
+ t: SerovalNodeType.Plugin;
3337
+ i: number;
3338
+ s: Record<string, SerovalNode>;
3339
+ c: string;
3340
+ }
3341
+ /**
3342
+ * Represents special values as placeholders
3343
+ */
3344
+ interface SerovalSpecialReferenceNode extends SerovalBaseNode {
3345
+ t: SerovalNodeType.SpecialReference;
3346
+ i: number;
3347
+ s: SpecialReference;
3348
+ }
3349
+ interface SerovalIteratorFactoryNode extends SerovalBaseNode {
3350
+ t: SerovalNodeType.IteratorFactory;
3351
+ i: number;
3352
+ f: SerovalNodeWithID;
3353
+ }
3354
+ interface SerovalIteratorFactoryInstanceNode extends SerovalBaseNode {
3355
+ t: SerovalNodeType.IteratorFactoryInstance;
3356
+ a: [instance: SerovalNodeWithID, sequence: SerovalNodeWithID];
3357
+ }
3358
+ interface SerovalAsyncIteratorFactoryNode extends SerovalBaseNode {
3359
+ t: SerovalNodeType.AsyncIteratorFactory;
3360
+ i: number;
3361
+ a: [promise: SerovalNodeWithID, symbol: SerovalNodeWithID];
3362
+ }
3363
+ interface SerovalAsyncIteratorFactoryInstanceNode extends SerovalBaseNode {
3364
+ t: SerovalNodeType.AsyncIteratorFactoryInstance;
3365
+ a: [instance: SerovalNodeWithID, sequence: SerovalNodeWithID];
3366
+ }
3367
+ interface SerovalStreamConstructorNode extends SerovalBaseNode {
3368
+ t: SerovalNodeType.StreamConstructor;
3369
+ i: number;
3370
+ a: SerovalNode[];
3371
+ f: SerovalNodeWithID;
3372
+ }
3373
+ interface SerovalStreamNextNode extends SerovalBaseNode {
3374
+ t: SerovalNodeType.StreamNext;
3375
+ i: number;
3376
+ f: SerovalNode;
3377
+ }
3378
+ interface SerovalStreamThrowNode extends SerovalBaseNode {
3379
+ t: SerovalNodeType.StreamThrow;
3380
+ i: number;
3381
+ f: SerovalNode;
3382
+ }
3383
+ interface SerovalStreamReturnNode extends SerovalBaseNode {
3384
+ t: SerovalNodeType.StreamReturn;
3385
+ i: number;
3386
+ f: SerovalNode;
3387
+ }
3388
+ interface SerovalSequenceNode extends SerovalBaseNode {
3389
+ t: SerovalNodeType.Sequence;
3390
+ i: number;
3391
+ s: number;
3392
+ a: SerovalNode[];
3393
+ l: number;
3394
+ }
3395
+ type SerovalSyncNode = SerovalPrimitiveNode | SerovalIndexedValueNode | SerovalSemiPrimitiveNode | SerovalSetNode | SerovalMapNode | SerovalArrayNode | SerovalObjectNode | SerovalNullConstructorNode | SerovalPromiseNode | SerovalErrorNode | SerovalAggregateErrorNode | SerovalWKSymbolNode | SerovalReferenceNode | SerovalArrayBufferNode | SerovalDataViewNode | SerovalBoxedNode | SerovalPluginNode | SerovalSpecialReferenceNode | SerovalIteratorFactoryNode | SerovalIteratorFactoryInstanceNode | SerovalAsyncIteratorFactoryNode | SerovalAsyncIteratorFactoryInstanceNode | SerovalSequenceNode;
3396
+ type SerovalAsyncNode = SerovalPromiseNode | SerovalPromiseConstructorNode | SerovalPromiseResolveNode | SerovalPromiseRejectNode | SerovalStreamConstructorNode | SerovalStreamNextNode | SerovalStreamThrowNode | SerovalStreamReturnNode;
3397
+ type SerovalNode = SerovalSyncNode | SerovalAsyncNode;
3398
+ type SerovalNodeWithID = Extract<SerovalNode, {
3399
+ i: number;
3400
+ }>;
3401
+ //#endregion
3402
+ //#region ../../node_modules/.pnpm/seroval@1.5.2/node_modules/seroval/dist/types/core/tree/index.d.ts
3403
+ interface SerovalJSON {
3404
+ t: SerovalNode;
3405
+ f: number;
3406
+ m: number[];
3407
+ }
3408
+ //#endregion
3409
+ //#region ../sdk/src/cacheSerialization.d.ts
3410
+ declare const serializedCacheValueMarker = "__agentEvalsCacheSerialization";
3411
+ declare const serializedCacheValueVersion = "seroval-web-v1";
3412
+ /** Seroval-backed persisted representation for one cached value. */
3413
+ type SerializedCacheValue = {
3414
+ [serializedCacheValueMarker]: typeof serializedCacheValueVersion;
3415
+ value: SerovalJSON;
3416
+ };
3417
+ /** Serialize one cached value with Seroval plus the Web API plugin set. */
3418
+ declare function serializeCacheValue(value: unknown): Promise<unknown>;
3419
+ /** Revive one cached value, while preserving legacy JSON-round-tripped data. */
3420
+ declare function deserializeCacheValue(value: unknown): unknown;
3421
+ /** Serialize all rich values captured in a cache recording before persistence. */
3422
+ declare function serializeCacheRecording(recording: CacheRecording): Promise<CacheRecording>;
3423
+ /** Revive all rich values captured in a cache recording after lookup. */
3424
+ declare function deserializeCacheRecording(recording: CacheRecording): CacheRecording;
3425
+ //#endregion
2848
3426
  //#region ../sdk/src/traceDiagnostics.d.ts
2849
3427
  /** Severity used when attaching a recoverable diagnostic to an active span. */
2850
3428
  type CaptureEvalSpanErrorLevel = 'error' | 'warning';
@@ -3110,11 +3688,11 @@ type EvalRunner = {
3110
3688
  listCache(): Promise<CacheListItem[]>;
3111
3689
  /**
3112
3690
  * Return the full persisted cache entry for `namespace` + `key`, including
3113
- * its recording. Returns `null` when no entry matches. Used by the case
3114
- * drawer's Cache hits tab to lazily fetch the cached return value when a
3115
- * row is expanded.
3691
+ * its recording and optional raw-key debug metadata. Returns `null` when no
3692
+ * entry matches. Used by the case drawer's Cache hits tab to lazily fetch
3693
+ * the cached return value when a row is expanded.
3116
3694
  */
3117
- getCacheEntry(namespace: string, key: string): Promise<CacheEntry | null>;
3695
+ getCacheEntry(namespace: string, key: string): Promise<CacheEntryWithDebugKey | null>;
3118
3696
  /**
3119
3697
  * Remove cache entries matching `filter`, or all entries when no filter is
3120
3698
  * supplied.
@@ -3169,4 +3747,4 @@ declare function createRunner({
3169
3747
  */
3170
3748
  declare function runCli(argv: string[]): Promise<void>;
3171
3749
  //#endregion
3172
- export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
3750
+ export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };