@ls-stack/agent-eval 0.16.1 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-B8e-oWYc.mjs → app-DTotEBoY.mjs} +3 -3
- package/dist/apps/web/dist/assets/index-C5IRkeUz.js +118 -0
- package/dist/apps/web/dist/assets/{index-MARPw1bH.css → index-Cn9WoTj5.css} +1 -1
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-BmrtjQj_.mjs → cli-CULTt3Xp.mjs} +64 -13
- package/dist/index.d.mts +586 -8
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +2 -1
- package/dist/{runOrchestration-BDyNrRQT.mjs → runOrchestration-D2okEB3I.mjs} +514 -125
- package/dist/{runner-CsZqhbiA.mjs → runner-BSXZiQIi.mjs} +2 -2
- package/dist/{runner-DABFPXkx.mjs → runner-DyM0Gp8G.mjs} +1 -1
- package/dist/src-CNf3xwVw.mjs +3 -0
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +17 -4
- package/dist/apps/web/dist/assets/index-BZ1TdyEg.js +0 -117
- package/dist/src-CEAJYN_X.mjs +0 -3
|
@@ -4,6 +4,7 @@ import { mkdir, readFile, readdir, rename, rm, stat, writeFile } from "node:fs/p
|
|
|
4
4
|
import { extname, isAbsolute, join, relative, resolve } from "node:path";
|
|
5
5
|
import { z, z as z$1 } from "zod/v4";
|
|
6
6
|
import { AsyncLocalStorage } from "node:async_hooks";
|
|
7
|
+
import { formatWithOptions } from "node:util";
|
|
7
8
|
import { Buffer as Buffer$1 } from "node:buffer";
|
|
8
9
|
import { getCompositeKey } from "@ls-stack/utils/getCompositeKey";
|
|
9
10
|
import { existsSync } from "node:fs";
|
|
@@ -49,6 +50,25 @@ const scopeStorage = new AsyncLocalStorage();
|
|
|
49
50
|
const runtimeScopeStorage = new AsyncLocalStorage();
|
|
50
51
|
let activeEvalScopeCount = 0;
|
|
51
52
|
let activeEvalRuntimeScopeCount = 0;
|
|
53
|
+
let consoleCaptureEnabled = true;
|
|
54
|
+
const maxLogMessageLength = 2e4;
|
|
55
|
+
const maxLogStringLength = 1e4;
|
|
56
|
+
const maxLogArrayLength = 100;
|
|
57
|
+
const maxLogObjectEntries = 100;
|
|
58
|
+
const maxLogValueDepth = 5;
|
|
59
|
+
const consoleCaptureMethods = [
|
|
60
|
+
"log",
|
|
61
|
+
"info",
|
|
62
|
+
"warn",
|
|
63
|
+
"error"
|
|
64
|
+
];
|
|
65
|
+
const runtimeConsole = globalThis.console;
|
|
66
|
+
const originalConsoleMethods = {
|
|
67
|
+
log: runtimeConsole.log.bind(runtimeConsole),
|
|
68
|
+
info: runtimeConsole.info.bind(runtimeConsole),
|
|
69
|
+
warn: runtimeConsole.warn.bind(runtimeConsole),
|
|
70
|
+
error: runtimeConsole.error.bind(runtimeConsole)
|
|
71
|
+
};
|
|
52
72
|
/** Error thrown when an eval assertion fails during case execution. */
|
|
53
73
|
var EvalAssertionError = class extends Error {
|
|
54
74
|
constructor(message) {
|
|
@@ -73,6 +93,155 @@ function isInEvalScope() {
|
|
|
73
93
|
if (activeEvalRuntimeScopeCount === 0) return null;
|
|
74
94
|
return runtimeScopeStorage.getStore() ?? null;
|
|
75
95
|
}
|
|
96
|
+
function normalizeLogLevel(level) {
|
|
97
|
+
return level === "warning" ? "warn" : level;
|
|
98
|
+
}
|
|
99
|
+
function getCurrentLogPhase() {
|
|
100
|
+
const runtimeScope = runtimeScopeStorage.getStore();
|
|
101
|
+
if (runtimeScope === "eval" || runtimeScope === "derive" || runtimeScope === "outputsSchema" || runtimeScope === "scorer") return runtimeScope;
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
function formatLogArgs(args) {
|
|
105
|
+
const formatted = formatWithOptions({
|
|
106
|
+
depth: 2,
|
|
107
|
+
maxArrayLength: 100,
|
|
108
|
+
maxStringLength: 1e4,
|
|
109
|
+
breakLength: 80,
|
|
110
|
+
compact: 3
|
|
111
|
+
}, ...args);
|
|
112
|
+
if (formatted.length <= maxLogMessageLength) return {
|
|
113
|
+
message: formatted,
|
|
114
|
+
truncated: false
|
|
115
|
+
};
|
|
116
|
+
return {
|
|
117
|
+
message: `${formatted.slice(0, maxLogMessageLength)}...`,
|
|
118
|
+
truncated: true
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
function truncateLogString(value, ctx) {
|
|
122
|
+
if (value.length <= maxLogStringLength) return value;
|
|
123
|
+
ctx.truncated = true;
|
|
124
|
+
return `${value.slice(0, maxLogStringLength)}...`;
|
|
125
|
+
}
|
|
126
|
+
function primitiveToLogValue(value, ctx) {
|
|
127
|
+
if (typeof value === "string") return {
|
|
128
|
+
handled: true,
|
|
129
|
+
value: truncateLogString(value, ctx)
|
|
130
|
+
};
|
|
131
|
+
if (value === null || typeof value === "number" || typeof value === "boolean") return {
|
|
132
|
+
handled: true,
|
|
133
|
+
value
|
|
134
|
+
};
|
|
135
|
+
if (value === void 0) return {
|
|
136
|
+
handled: true,
|
|
137
|
+
value: "[undefined]"
|
|
138
|
+
};
|
|
139
|
+
if (typeof value === "bigint") return {
|
|
140
|
+
handled: true,
|
|
141
|
+
value: `${value.toString()}n`
|
|
142
|
+
};
|
|
143
|
+
if (typeof value === "symbol") return {
|
|
144
|
+
handled: true,
|
|
145
|
+
value: String(value)
|
|
146
|
+
};
|
|
147
|
+
if (typeof value === "function") return {
|
|
148
|
+
handled: true,
|
|
149
|
+
value: `[Function${value.name.length > 0 ? `: ${value.name}` : ""}]`
|
|
150
|
+
};
|
|
151
|
+
return {
|
|
152
|
+
handled: false,
|
|
153
|
+
value: null
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
function objectToLogValue(value, ctx, depth) {
|
|
157
|
+
if (value instanceof Date) return value.toISOString();
|
|
158
|
+
if (value instanceof Error) return {
|
|
159
|
+
name: value.name,
|
|
160
|
+
message: value.message,
|
|
161
|
+
stack: value.stack
|
|
162
|
+
};
|
|
163
|
+
if (ctx.seen.has(value)) return "[Circular]";
|
|
164
|
+
if (depth >= maxLogValueDepth) {
|
|
165
|
+
ctx.truncated = true;
|
|
166
|
+
return Array.isArray(value) ? "[Array]" : "[Object]";
|
|
167
|
+
}
|
|
168
|
+
ctx.seen.add(value);
|
|
169
|
+
try {
|
|
170
|
+
if (Array.isArray(value)) {
|
|
171
|
+
const limited = value.slice(0, maxLogArrayLength).map((item) => toLogJsonValue(item, ctx, depth + 1));
|
|
172
|
+
if (value.length > maxLogArrayLength) {
|
|
173
|
+
ctx.truncated = true;
|
|
174
|
+
limited.push(`[... ${String(value.length - maxLogArrayLength)} more]`);
|
|
175
|
+
}
|
|
176
|
+
return limited;
|
|
177
|
+
}
|
|
178
|
+
const entries = Object.entries(value);
|
|
179
|
+
const result = {};
|
|
180
|
+
for (const [key, entryValue] of entries.slice(0, maxLogObjectEntries)) result[key] = toLogJsonValue(entryValue, ctx, depth + 1);
|
|
181
|
+
if (entries.length > maxLogObjectEntries) {
|
|
182
|
+
ctx.truncated = true;
|
|
183
|
+
result.__truncated = `${String(entries.length - maxLogObjectEntries)} more properties`;
|
|
184
|
+
}
|
|
185
|
+
return result;
|
|
186
|
+
} finally {
|
|
187
|
+
ctx.seen.delete(value);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
function toLogJsonValue(value, ctx, depth) {
|
|
191
|
+
const primitive = primitiveToLogValue(value, ctx);
|
|
192
|
+
if (primitive.handled) return primitive.value;
|
|
193
|
+
if (typeof value === "object" && value !== null) return objectToLogValue(value, ctx, depth);
|
|
194
|
+
return String(value);
|
|
195
|
+
}
|
|
196
|
+
function toLogJsonArgs(args) {
|
|
197
|
+
const ctx = {
|
|
198
|
+
seen: /* @__PURE__ */ new WeakSet(),
|
|
199
|
+
truncated: false
|
|
200
|
+
};
|
|
201
|
+
return {
|
|
202
|
+
args: args.map((value) => toLogJsonValue(value, ctx, 0)),
|
|
203
|
+
truncated: ctx.truncated
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
function recordEvalLog(level, args) {
|
|
207
|
+
const scope = getCurrentScope();
|
|
208
|
+
const phase = getCurrentLogPhase();
|
|
209
|
+
if (!scope || !phase) return;
|
|
210
|
+
const preview = formatLogArgs(args);
|
|
211
|
+
const jsonArgs = toLogJsonArgs(args);
|
|
212
|
+
scope.logs.push({
|
|
213
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
214
|
+
level: normalizeLogLevel(level),
|
|
215
|
+
phase,
|
|
216
|
+
message: preview.message,
|
|
217
|
+
args: jsonArgs.args,
|
|
218
|
+
truncated: preview.truncated || jsonArgs.truncated
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
for (const method of consoleCaptureMethods) runtimeConsole[method] = (...args) => {
|
|
222
|
+
if (consoleCaptureEnabled) recordEvalLog(method, args);
|
|
223
|
+
originalConsoleMethods[method](...args);
|
|
224
|
+
};
|
|
225
|
+
/**
|
|
226
|
+
* Configure whether console methods are captured as eval case logs.
|
|
227
|
+
*
|
|
228
|
+
* Runner-internal helper. When disabled, console output still prints normally;
|
|
229
|
+
* only automatic persistence to `caseDetail.logs` is skipped. Manual
|
|
230
|
+
* `evalLog(...)` calls are unaffected.
|
|
231
|
+
*/
|
|
232
|
+
function configureEvalRunLogs(options) {
|
|
233
|
+
consoleCaptureEnabled = options.captureConsole;
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Record a manual log entry on the active eval case.
|
|
237
|
+
*
|
|
238
|
+
* Values are formatted with Node-style console formatting and capped before
|
|
239
|
+
* persistence so a single log cannot make run artifacts unbounded. Calls made
|
|
240
|
+
* outside active case-owned eval phases are ignored.
|
|
241
|
+
*/
|
|
242
|
+
function evalLog(level, ...args) {
|
|
243
|
+
recordEvalLog(level, args);
|
|
244
|
+
}
|
|
76
245
|
function registerBackgroundJobInScope(scope, promise) {
|
|
77
246
|
const trackedPromise = promise.then(() => {
|
|
78
247
|
scope.pendingBackgroundJobs.delete(trackedPromise);
|
|
@@ -164,6 +333,7 @@ async function runInEvalScope(caseId, fn, options = {}) {
|
|
|
164
333
|
input: options.input,
|
|
165
334
|
outputs: {},
|
|
166
335
|
assertionFailures: [],
|
|
336
|
+
logs: [],
|
|
167
337
|
spans: [],
|
|
168
338
|
checkpoints: /* @__PURE__ */ new Map(),
|
|
169
339
|
spanStack: [],
|
|
@@ -332,107 +502,6 @@ function evalAssert(condition, message) {
|
|
|
332
502
|
throw error;
|
|
333
503
|
}
|
|
334
504
|
//#endregion
|
|
335
|
-
//#region ../sdk/src/cacheKey.ts
|
|
336
|
-
var SerializedCacheKeyValue = class {
|
|
337
|
-
value;
|
|
338
|
-
constructor(value) {
|
|
339
|
-
this.value = value;
|
|
340
|
-
}
|
|
341
|
-
};
|
|
342
|
-
/**
|
|
343
|
-
* Hash the components of a cache key into a deterministic hex digest.
|
|
344
|
-
*
|
|
345
|
-
* Native `Blob` and `File` values use stable metadata by default. Pass
|
|
346
|
-
* `serializeFileBytes: true` to read them asynchronously and include their byte
|
|
347
|
-
* hash in the key.
|
|
348
|
-
*/
|
|
349
|
-
async function hashCacheKey(input, options = {}) {
|
|
350
|
-
return hashCacheKeySyncMaterialized(options.serializeFileBytes === true ? await materializeAsyncCacheKeyValue(input) : input);
|
|
351
|
-
}
|
|
352
|
-
/**
|
|
353
|
-
* Synchronously hash cache key components. This supports JSON-like data and
|
|
354
|
-
* in-memory binary values such as `Buffer`, `ArrayBuffer`, and typed arrays,
|
|
355
|
-
* plus stable metadata for native `Blob` and `File` values.
|
|
356
|
-
*/
|
|
357
|
-
function hashCacheKeySync(input) {
|
|
358
|
-
return hashCacheKeySyncMaterialized(input);
|
|
359
|
-
}
|
|
360
|
-
function hashCacheKeySyncMaterialized(input) {
|
|
361
|
-
return createHash("sha256").update(getCompositeKey(input, { stringify: stringifyCacheKeyValue })).digest("hex");
|
|
362
|
-
}
|
|
363
|
-
function stringifyCacheKeyValue(value) {
|
|
364
|
-
if (value instanceof SerializedCacheKeyValue) return value.value;
|
|
365
|
-
if (Buffer$1.isBuffer(value)) return `$buffer:${hashBytes(value)}`;
|
|
366
|
-
if (isArrayBuffer(value)) return `$arrayBuffer:${hashBytes(new Uint8Array(value))}`;
|
|
367
|
-
if (isSharedArrayBuffer(value)) return `$sharedArrayBuffer:${hashBytes(new Uint8Array(value))}`;
|
|
368
|
-
if (isArrayBufferView(value)) {
|
|
369
|
-
const bytes = new Uint8Array(value.buffer, value.byteOffset, value.byteLength);
|
|
370
|
-
return `$${value.constructor.name}:${hashBytes(bytes)}`;
|
|
371
|
-
}
|
|
372
|
-
if (isFile$1(value)) return `$file:${getCompositeKey({
|
|
373
|
-
lastModified: value.lastModified,
|
|
374
|
-
name: value.name,
|
|
375
|
-
size: value.size,
|
|
376
|
-
type: value.type
|
|
377
|
-
})}`;
|
|
378
|
-
if (isBlob$1(value)) return `$blob:${getCompositeKey({
|
|
379
|
-
size: value.size,
|
|
380
|
-
type: value.type
|
|
381
|
-
})}`;
|
|
382
|
-
}
|
|
383
|
-
async function materializeAsyncCacheKeyValue(value, refs = /* @__PURE__ */ new WeakSet()) {
|
|
384
|
-
const serialized = await stringifyAsyncCacheKeyValue(value);
|
|
385
|
-
if (serialized !== void 0) return new SerializedCacheKeyValue(serialized);
|
|
386
|
-
if (stringifyCacheKeyValue(value) !== void 0) return value;
|
|
387
|
-
if (!value || typeof value !== "object") return value;
|
|
388
|
-
if (Array.isArray(value)) {
|
|
389
|
-
const items = [];
|
|
390
|
-
for (const item of value) items.push(await materializeAsyncCacheKeyValue(item, refs));
|
|
391
|
-
return items;
|
|
392
|
-
}
|
|
393
|
-
if (refs.has(value)) throw new Error("Circular reference detected");
|
|
394
|
-
refs.add(value);
|
|
395
|
-
const entries = [];
|
|
396
|
-
for (const [key, entryValue] of Object.entries(value)) entries.push([key, await materializeAsyncCacheKeyValue(entryValue, refs)]);
|
|
397
|
-
refs.delete(value);
|
|
398
|
-
return Object.fromEntries(entries);
|
|
399
|
-
}
|
|
400
|
-
async function stringifyAsyncCacheKeyValue(value) {
|
|
401
|
-
if (isFile$1(value)) return `$file:${getCompositeKey({
|
|
402
|
-
bytes: await hashBlobBytes(value),
|
|
403
|
-
lastModified: value.lastModified,
|
|
404
|
-
name: value.name,
|
|
405
|
-
size: value.size,
|
|
406
|
-
type: value.type
|
|
407
|
-
})}`;
|
|
408
|
-
if (isBlob$1(value)) return `$blob:${getCompositeKey({
|
|
409
|
-
bytes: await hashBlobBytes(value),
|
|
410
|
-
size: value.size,
|
|
411
|
-
type: value.type
|
|
412
|
-
})}`;
|
|
413
|
-
}
|
|
414
|
-
async function hashBlobBytes(value) {
|
|
415
|
-
return hashBytes(new Uint8Array(await value.arrayBuffer()));
|
|
416
|
-
}
|
|
417
|
-
function hashBytes(value) {
|
|
418
|
-
return createHash("sha256").update(value).digest("hex");
|
|
419
|
-
}
|
|
420
|
-
function isArrayBuffer(value) {
|
|
421
|
-
return value instanceof ArrayBuffer;
|
|
422
|
-
}
|
|
423
|
-
function isSharedArrayBuffer(value) {
|
|
424
|
-
return value instanceof SharedArrayBuffer;
|
|
425
|
-
}
|
|
426
|
-
function isArrayBufferView(value) {
|
|
427
|
-
return ArrayBuffer.isView(value);
|
|
428
|
-
}
|
|
429
|
-
function isBlob$1(value) {
|
|
430
|
-
return value instanceof Blob;
|
|
431
|
-
}
|
|
432
|
-
function isFile$1(value) {
|
|
433
|
-
return value instanceof File;
|
|
434
|
-
}
|
|
435
|
-
//#endregion
|
|
436
505
|
//#region ../../node_modules/.pnpm/seroval@1.5.2/node_modules/seroval/dist/esm/production/index.mjs
|
|
437
506
|
var L$1 = ((i) => (i[i.AggregateError = 1] = "AggregateError", i[i.ArrowFunction = 2] = "ArrowFunction", i[i.ErrorPrototypeStack = 4] = "ErrorPrototypeStack", i[i.ObjectAssign = 8] = "ObjectAssign", i[i.BigIntTypedArray = 16] = "BigIntTypedArray", i[i.RegExp = 32] = "RegExp", i))(L$1 || {});
|
|
438
507
|
var v$1 = Symbol.asyncIterator, mr = Symbol.hasInstance, R = Symbol.isConcatSpreadable, C = Symbol.iterator, pr = Symbol.match, dr = Symbol.matchAll, gr = Symbol.replace, yr = Symbol.search, Nr = Symbol.species, br = Symbol.split, vr = Symbol.toPrimitive, P$1 = Symbol.toStringTag, Cr = Symbol.unscopables, ve = {
|
|
@@ -2223,6 +2292,107 @@ function deserializeCacheRecording(recording) {
|
|
|
2223
2292
|
};
|
|
2224
2293
|
}
|
|
2225
2294
|
//#endregion
|
|
2295
|
+
//#region ../sdk/src/cacheKey.ts
|
|
2296
|
+
var SerializedCacheKeyValue = class {
|
|
2297
|
+
value;
|
|
2298
|
+
constructor(value) {
|
|
2299
|
+
this.value = value;
|
|
2300
|
+
}
|
|
2301
|
+
};
|
|
2302
|
+
/**
|
|
2303
|
+
* Hash the components of a cache key into a deterministic hex digest.
|
|
2304
|
+
*
|
|
2305
|
+
* Native `Blob` and `File` values use stable metadata by default. Pass
|
|
2306
|
+
* `serializeFileBytes: true` to read them asynchronously and include their byte
|
|
2307
|
+
* hash in the key.
|
|
2308
|
+
*/
|
|
2309
|
+
async function hashCacheKey(input, options = {}) {
|
|
2310
|
+
return hashCacheKeySyncMaterialized(options.serializeFileBytes === true ? await materializeAsyncCacheKeyValue(input) : input);
|
|
2311
|
+
}
|
|
2312
|
+
/**
|
|
2313
|
+
* Synchronously hash cache key components. This supports JSON-like data and
|
|
2314
|
+
* in-memory binary values such as `Buffer`, `ArrayBuffer`, and typed arrays,
|
|
2315
|
+
* plus stable metadata for native `Blob` and `File` values.
|
|
2316
|
+
*/
|
|
2317
|
+
function hashCacheKeySync(input) {
|
|
2318
|
+
return hashCacheKeySyncMaterialized(input);
|
|
2319
|
+
}
|
|
2320
|
+
function hashCacheKeySyncMaterialized(input) {
|
|
2321
|
+
return createHash("sha256").update(getCompositeKey(input, { stringify: stringifyCacheKeyValue })).digest("hex");
|
|
2322
|
+
}
|
|
2323
|
+
function stringifyCacheKeyValue(value) {
|
|
2324
|
+
if (value instanceof SerializedCacheKeyValue) return value.value;
|
|
2325
|
+
if (Buffer$1.isBuffer(value)) return `$buffer:${hashBytes(value)}`;
|
|
2326
|
+
if (isArrayBuffer(value)) return `$arrayBuffer:${hashBytes(new Uint8Array(value))}`;
|
|
2327
|
+
if (isSharedArrayBuffer(value)) return `$sharedArrayBuffer:${hashBytes(new Uint8Array(value))}`;
|
|
2328
|
+
if (isArrayBufferView(value)) {
|
|
2329
|
+
const bytes = new Uint8Array(value.buffer, value.byteOffset, value.byteLength);
|
|
2330
|
+
return `$${value.constructor.name}:${hashBytes(bytes)}`;
|
|
2331
|
+
}
|
|
2332
|
+
if (isFile$1(value)) return `$file:${getCompositeKey({
|
|
2333
|
+
lastModified: value.lastModified,
|
|
2334
|
+
name: value.name,
|
|
2335
|
+
size: value.size,
|
|
2336
|
+
type: value.type
|
|
2337
|
+
})}`;
|
|
2338
|
+
if (isBlob$1(value)) return `$blob:${getCompositeKey({
|
|
2339
|
+
size: value.size,
|
|
2340
|
+
type: value.type
|
|
2341
|
+
})}`;
|
|
2342
|
+
}
|
|
2343
|
+
async function materializeAsyncCacheKeyValue(value, refs = /* @__PURE__ */ new WeakSet()) {
|
|
2344
|
+
const serialized = await stringifyAsyncCacheKeyValue(value);
|
|
2345
|
+
if (serialized !== void 0) return new SerializedCacheKeyValue(serialized);
|
|
2346
|
+
if (stringifyCacheKeyValue(value) !== void 0) return value;
|
|
2347
|
+
if (!value || typeof value !== "object") return value;
|
|
2348
|
+
if (Array.isArray(value)) {
|
|
2349
|
+
const items = [];
|
|
2350
|
+
for (const item of value) items.push(await materializeAsyncCacheKeyValue(item, refs));
|
|
2351
|
+
return items;
|
|
2352
|
+
}
|
|
2353
|
+
if (refs.has(value)) throw new Error("Circular reference detected");
|
|
2354
|
+
refs.add(value);
|
|
2355
|
+
const entries = [];
|
|
2356
|
+
for (const [key, entryValue] of Object.entries(value)) entries.push([key, await materializeAsyncCacheKeyValue(entryValue, refs)]);
|
|
2357
|
+
refs.delete(value);
|
|
2358
|
+
return Object.fromEntries(entries);
|
|
2359
|
+
}
|
|
2360
|
+
async function stringifyAsyncCacheKeyValue(value) {
|
|
2361
|
+
if (isFile$1(value)) return `$file:${getCompositeKey({
|
|
2362
|
+
bytes: await hashBlobBytes(value),
|
|
2363
|
+
lastModified: value.lastModified,
|
|
2364
|
+
name: value.name,
|
|
2365
|
+
size: value.size,
|
|
2366
|
+
type: value.type
|
|
2367
|
+
})}`;
|
|
2368
|
+
if (isBlob$1(value)) return `$blob:${getCompositeKey({
|
|
2369
|
+
bytes: await hashBlobBytes(value),
|
|
2370
|
+
size: value.size,
|
|
2371
|
+
type: value.type
|
|
2372
|
+
})}`;
|
|
2373
|
+
}
|
|
2374
|
+
async function hashBlobBytes(value) {
|
|
2375
|
+
return hashBytes(new Uint8Array(await value.arrayBuffer()));
|
|
2376
|
+
}
|
|
2377
|
+
function hashBytes(value) {
|
|
2378
|
+
return createHash("sha256").update(value).digest("hex");
|
|
2379
|
+
}
|
|
2380
|
+
function isArrayBuffer(value) {
|
|
2381
|
+
return value instanceof ArrayBuffer;
|
|
2382
|
+
}
|
|
2383
|
+
function isSharedArrayBuffer(value) {
|
|
2384
|
+
return value instanceof SharedArrayBuffer;
|
|
2385
|
+
}
|
|
2386
|
+
function isArrayBufferView(value) {
|
|
2387
|
+
return ArrayBuffer.isView(value);
|
|
2388
|
+
}
|
|
2389
|
+
function isBlob$1(value) {
|
|
2390
|
+
return value instanceof Blob;
|
|
2391
|
+
}
|
|
2392
|
+
function isFile$1(value) {
|
|
2393
|
+
return value instanceof File;
|
|
2394
|
+
}
|
|
2395
|
+
//#endregion
|
|
2226
2396
|
//#region ../sdk/src/cacheRecording.ts
|
|
2227
2397
|
function mergeSpanAttributes$1(span, attributes) {
|
|
2228
2398
|
span.attributes = {
|
|
@@ -2571,6 +2741,11 @@ function createTraceCache(generateSpanId) {
|
|
|
2571
2741
|
storedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2572
2742
|
codeFingerprint: cacheCtx.codeFingerprint,
|
|
2573
2743
|
recording: await serializeCacheRecording(recording)
|
|
2744
|
+
}, {
|
|
2745
|
+
rawKey: info.key,
|
|
2746
|
+
operationType: "value",
|
|
2747
|
+
operationName: info.name,
|
|
2748
|
+
codeFingerprint: cacheCtx.codeFingerprint
|
|
2574
2749
|
});
|
|
2575
2750
|
}
|
|
2576
2751
|
return bodyResult;
|
|
@@ -2996,7 +3171,12 @@ async function traceSpanInternal(info, fn) {
|
|
|
2996
3171
|
codeFingerprint: ctx.codeFingerprint,
|
|
2997
3172
|
recording: await serializeCacheRecording(recording)
|
|
2998
3173
|
};
|
|
2999
|
-
await ctx.adapter.write(entry
|
|
3174
|
+
await ctx.adapter.write(entry, {
|
|
3175
|
+
rawKey: cacheOpts.key,
|
|
3176
|
+
operationType: "span",
|
|
3177
|
+
operationName: info.name,
|
|
3178
|
+
codeFingerprint: ctx.codeFingerprint
|
|
3179
|
+
});
|
|
3000
3180
|
}
|
|
3001
3181
|
return bodyResult;
|
|
3002
3182
|
}
|
|
@@ -3415,12 +3595,31 @@ const cacheEntrySchema = z.object({
|
|
|
3415
3595
|
codeFingerprint: z.string(),
|
|
3416
3596
|
recording: cacheRecordingSchema
|
|
3417
3597
|
});
|
|
3598
|
+
/** Debug-only raw key metadata stored outside the reusable cache entry. */
|
|
3599
|
+
const cacheDebugKeyEntrySchema = z.object({
|
|
3600
|
+
version: z.literal(1),
|
|
3601
|
+
key: z.string(),
|
|
3602
|
+
namespace: z.string(),
|
|
3603
|
+
operationType: cacheOperationTypeSchema,
|
|
3604
|
+
operationName: z.string(),
|
|
3605
|
+
storedAt: z.string(),
|
|
3606
|
+
codeFingerprint: z.string(),
|
|
3607
|
+
rawKey: z.unknown()
|
|
3608
|
+
});
|
|
3609
|
+
/** Cache lookup response with optional debug-only raw key data. */
|
|
3610
|
+
const cacheEntryWithDebugKeySchema = cacheEntrySchema.extend({ debugKey: cacheDebugKeyEntrySchema.optional() });
|
|
3418
3611
|
/** Persisted per-owner cache file containing multiple cache entries. */
|
|
3419
3612
|
const cacheFileSchema = z.object({
|
|
3420
3613
|
version: z.literal(1),
|
|
3421
3614
|
owner: z.string(),
|
|
3422
3615
|
entries: z.record(z.string(), cacheEntrySchema)
|
|
3423
3616
|
});
|
|
3617
|
+
/** Persisted per-owner debug file containing raw cache key metadata. */
|
|
3618
|
+
const cacheDebugKeyFileSchema = z.object({
|
|
3619
|
+
version: z.literal(1),
|
|
3620
|
+
owner: z.string(),
|
|
3621
|
+
entries: z.record(z.string(), cacheDebugKeyEntrySchema)
|
|
3622
|
+
});
|
|
3424
3623
|
//#endregion
|
|
3425
3624
|
//#region ../shared/src/schemas/chart.ts
|
|
3426
3625
|
/** Chart type rendered for a single eval history chart. */
|
|
@@ -3623,6 +3822,40 @@ const assertionFailureSchema = z.object({
|
|
|
3623
3822
|
stack: z.string().optional()
|
|
3624
3823
|
});
|
|
3625
3824
|
const legacyAssertionFailureSchema = z.string().transform((message) => ({ message }));
|
|
3825
|
+
/** Severity level for one log captured during a case run. */
|
|
3826
|
+
const runLogLevelSchema = z.enum([
|
|
3827
|
+
"log",
|
|
3828
|
+
"info",
|
|
3829
|
+
"warn",
|
|
3830
|
+
"error"
|
|
3831
|
+
]);
|
|
3832
|
+
/** Eval runner phase that emitted a captured case log. */
|
|
3833
|
+
const runLogPhaseSchema = z.enum([
|
|
3834
|
+
"eval",
|
|
3835
|
+
"derive",
|
|
3836
|
+
"outputsSchema",
|
|
3837
|
+
"scorer"
|
|
3838
|
+
]);
|
|
3839
|
+
/** Schema for one persisted log entry captured during a case run. */
|
|
3840
|
+
const runLogEntrySchema = z.object({
|
|
3841
|
+
/** ISO timestamp for when the log was captured. */
|
|
3842
|
+
timestamp: z.string(),
|
|
3843
|
+
/** Normalized log level. */
|
|
3844
|
+
level: runLogLevelSchema,
|
|
3845
|
+
/** Case-owned runner phase that emitted the log. */
|
|
3846
|
+
phase: runLogPhaseSchema,
|
|
3847
|
+
/** Human-readable preview formatted from the original log arguments. */
|
|
3848
|
+
message: z.string(),
|
|
3849
|
+
/** JSON-safe captured log arguments rendered in the UI. */
|
|
3850
|
+
args: z.array(z.unknown()).default([]),
|
|
3851
|
+
/** Whether `message` was capped before persistence. */
|
|
3852
|
+
truncated: z.boolean().default(false),
|
|
3853
|
+
/**
|
|
3854
|
+
* Optional source label for logs emitted from a nested case-owned activity,
|
|
3855
|
+
* such as a score key.
|
|
3856
|
+
*/
|
|
3857
|
+
source: z.string().optional()
|
|
3858
|
+
});
|
|
3626
3859
|
/** Trace payload captured while computing one score for a case. */
|
|
3627
3860
|
const scoreTraceSchema = z.object({
|
|
3628
3861
|
trace: z.array(traceSpanSchema),
|
|
@@ -3651,6 +3884,8 @@ const caseDetailSchema = z.object({
|
|
|
3651
3884
|
scoringTraces: z.record(z.string(), scoreTraceSchema).optional(),
|
|
3652
3885
|
columns: z.record(z.string(), cellValueSchema),
|
|
3653
3886
|
assertionFailures: z.array(z.union([assertionFailureSchema, legacyAssertionFailureSchema])),
|
|
3887
|
+
/** Logs captured from manual `evalLog(...)` calls and enabled console calls. */
|
|
3888
|
+
logs: z.array(runLogEntrySchema).default([]),
|
|
3654
3889
|
error: z.object({
|
|
3655
3890
|
name: z.string().optional(),
|
|
3656
3891
|
message: z.string(),
|
|
@@ -3802,6 +4037,14 @@ const apiCallsConfigSchema = z.object({
|
|
|
3802
4037
|
/** Custom user-defined metrics surfaced on each API call. */
|
|
3803
4038
|
metrics: z.array(apiCallMetricSchema).optional()
|
|
3804
4039
|
});
|
|
4040
|
+
/** Schema for workspace-level run log capture options. */
|
|
4041
|
+
const runLogsConfigSchema = z.object({
|
|
4042
|
+
/**
|
|
4043
|
+
* Capture `console.log`, `console.info`, `console.warn`, and
|
|
4044
|
+
* `console.error` calls made inside active eval case scopes. Defaults to
|
|
4045
|
+
* `true`; manual `evalLog(...)` calls are always captured.
|
|
4046
|
+
*/
|
|
4047
|
+
captureConsole: z.boolean().optional() });
|
|
3805
4048
|
/** Default LLM-calls config the UI uses before the workspace fetch resolves. */
|
|
3806
4049
|
const DEFAULT_LLM_CALLS_CONFIG = {
|
|
3807
4050
|
kinds: ["llm"],
|
|
@@ -3917,6 +4160,7 @@ const agentEvalsConfigSchema = z.object({
|
|
|
3917
4160
|
traceDisplay: traceDisplayInputConfigSchema.optional(),
|
|
3918
4161
|
llmCalls: llmCallsConfigSchema.optional(),
|
|
3919
4162
|
apiCalls: apiCallsConfigSchema.optional(),
|
|
4163
|
+
runLogs: runLogsConfigSchema.optional(),
|
|
3920
4164
|
cache: z.object({
|
|
3921
4165
|
enabled: z.boolean().optional(),
|
|
3922
4166
|
dir: z.string().optional(),
|
|
@@ -4490,15 +4734,33 @@ const defaultMaxEntriesPerNamespace = 100;
|
|
|
4490
4734
|
*/
|
|
4491
4735
|
function createFsCacheStore(options) {
|
|
4492
4736
|
const cacheDir = resolve(options.workspaceRoot, options.dir ?? ".agent-evals/cache");
|
|
4737
|
+
const debugDir = resolve(options.workspaceRoot, options.debugDir ?? ".agent-evals/cache-debug");
|
|
4493
4738
|
const defaultMaxEntries = normalizeMaxEntries(options.maxEntriesPerNamespace);
|
|
4494
4739
|
return {
|
|
4495
4740
|
dir() {
|
|
4496
4741
|
return cacheDir;
|
|
4497
4742
|
},
|
|
4743
|
+
debugDir() {
|
|
4744
|
+
return debugDir;
|
|
4745
|
+
},
|
|
4498
4746
|
async lookup(namespace, keyHash) {
|
|
4499
4747
|
return (await readCacheFile(cacheDir, ownerFromNamespace(namespace)))?.entries[keyHash] ?? null;
|
|
4500
4748
|
},
|
|
4501
|
-
async
|
|
4749
|
+
async lookupWithDebug(namespace, keyHash) {
|
|
4750
|
+
const owner = ownerFromNamespace(namespace);
|
|
4751
|
+
const entry = (await readCacheFile(cacheDir, owner))?.entries[keyHash] ?? null;
|
|
4752
|
+
if (entry === null) return null;
|
|
4753
|
+
const debugKey = (await readDebugKeyFile(debugDir, owner))?.entries[keyHash];
|
|
4754
|
+
const deserializedEntry = {
|
|
4755
|
+
...entry,
|
|
4756
|
+
recording: deserializeCacheRecording(entry.recording)
|
|
4757
|
+
};
|
|
4758
|
+
return debugKey === void 0 ? deserializedEntry : {
|
|
4759
|
+
...deserializedEntry,
|
|
4760
|
+
debugKey
|
|
4761
|
+
};
|
|
4762
|
+
},
|
|
4763
|
+
async write(entry, debugKey) {
|
|
4502
4764
|
const owner = ownerFromNamespace(entry.namespace);
|
|
4503
4765
|
const filePath = ownerPath(cacheDir, owner);
|
|
4504
4766
|
await mkdir(cacheDir, { recursive: true });
|
|
@@ -4512,6 +4774,17 @@ function createFsCacheStore(options) {
|
|
|
4512
4774
|
}, entry.namespace, maxEntriesForNamespace(entry.namespace, defaultMaxEntries, options.maxEntriesByNamespace), entry.key)
|
|
4513
4775
|
});
|
|
4514
4776
|
});
|
|
4777
|
+
if (debugKey !== void 0) {
|
|
4778
|
+
if ((await resultify(() => writeDebugKeyEntry({
|
|
4779
|
+
debugDir,
|
|
4780
|
+
entry,
|
|
4781
|
+
debugKey,
|
|
4782
|
+
maxEntries: maxEntriesForNamespace(entry.namespace, defaultMaxEntries, options.maxEntriesByNamespace)
|
|
4783
|
+
}))).error) await resultify(() => clearDebugEntries(debugDir, {
|
|
4784
|
+
namespace: entry.namespace,
|
|
4785
|
+
key: entry.key
|
|
4786
|
+
}));
|
|
4787
|
+
}
|
|
4515
4788
|
},
|
|
4516
4789
|
async list() {
|
|
4517
4790
|
if (!existsSync(cacheDir)) return [];
|
|
@@ -4544,17 +4817,21 @@ function createFsCacheStore(options) {
|
|
|
4544
4817
|
return items;
|
|
4545
4818
|
},
|
|
4546
4819
|
async clear(filter) {
|
|
4547
|
-
if (!existsSync(cacheDir)) return;
|
|
4548
4820
|
if (!filter || filter.namespace === void 0 && filter.key === void 0) {
|
|
4549
4821
|
await rm(cacheDir, {
|
|
4550
4822
|
recursive: true,
|
|
4551
4823
|
force: true
|
|
4552
4824
|
});
|
|
4825
|
+
await rm(debugDir, {
|
|
4826
|
+
recursive: true,
|
|
4827
|
+
force: true
|
|
4828
|
+
});
|
|
4553
4829
|
return;
|
|
4554
4830
|
}
|
|
4555
4831
|
if (filter.namespace !== void 0) {
|
|
4556
4832
|
const owner = ownerFromNamespace(filter.namespace);
|
|
4557
|
-
|
|
4833
|
+
const filePath = ownerPath(cacheDir, owner);
|
|
4834
|
+
if (existsSync(cacheDir)) await withCacheFileLock(filePath, async () => {
|
|
4558
4835
|
const cacheFile = await readCacheFile(cacheDir, owner);
|
|
4559
4836
|
if (cacheFile === null) return;
|
|
4560
4837
|
await writeOrRemoveCacheFile(cacheDir, {
|
|
@@ -4566,23 +4843,27 @@ function createFsCacheStore(options) {
|
|
|
4566
4843
|
}))
|
|
4567
4844
|
});
|
|
4568
4845
|
});
|
|
4846
|
+
await clearDebugEntries(debugDir, filter);
|
|
4569
4847
|
return;
|
|
4570
4848
|
}
|
|
4571
|
-
|
|
4572
|
-
|
|
4573
|
-
|
|
4574
|
-
|
|
4575
|
-
|
|
4576
|
-
|
|
4577
|
-
|
|
4578
|
-
|
|
4579
|
-
|
|
4580
|
-
|
|
4581
|
-
|
|
4582
|
-
|
|
4849
|
+
if (existsSync(cacheDir)) {
|
|
4850
|
+
const files = await readdir(cacheDir);
|
|
4851
|
+
for (const fileName of files) {
|
|
4852
|
+
if (!fileName.endsWith(".json")) continue;
|
|
4853
|
+
const filePath = join(cacheDir, fileName);
|
|
4854
|
+
await withCacheFileLock(filePath, async () => {
|
|
4855
|
+
const cacheFile = await readCacheFilePath(filePath);
|
|
4856
|
+
if (cacheFile === null) return;
|
|
4857
|
+
const entries = Object.fromEntries(Object.entries(cacheFile.entries).filter(([key]) => key !== filter.key));
|
|
4858
|
+
await writeOrRemoveCacheFile(cacheDir, {
|
|
4859
|
+
version: 1,
|
|
4860
|
+
owner: cacheFile.owner,
|
|
4861
|
+
entries
|
|
4862
|
+
});
|
|
4583
4863
|
});
|
|
4584
|
-
}
|
|
4864
|
+
}
|
|
4585
4865
|
}
|
|
4866
|
+
await clearDebugEntries(debugDir, filter);
|
|
4586
4867
|
}
|
|
4587
4868
|
};
|
|
4588
4869
|
}
|
|
@@ -4598,18 +4879,21 @@ function createBufferedCacheStore(backingStore) {
|
|
|
4598
4879
|
return {
|
|
4599
4880
|
async lookup(namespace, keyHash) {
|
|
4600
4881
|
const buffered = pendingEntries.get(toPendingKey(namespace, keyHash));
|
|
4601
|
-
if (buffered !== void 0) return buffered;
|
|
4882
|
+
if (buffered !== void 0) return buffered.entry;
|
|
4602
4883
|
return backingStore.lookup(namespace, keyHash);
|
|
4603
4884
|
},
|
|
4604
|
-
write(entry) {
|
|
4605
|
-
pendingEntries.set(toPendingKey(entry.namespace, entry.key),
|
|
4885
|
+
write(entry, debugKey) {
|
|
4886
|
+
pendingEntries.set(toPendingKey(entry.namespace, entry.key), {
|
|
4887
|
+
entry,
|
|
4888
|
+
debugKey
|
|
4889
|
+
});
|
|
4606
4890
|
return Promise.resolve();
|
|
4607
4891
|
},
|
|
4608
4892
|
async commit() {
|
|
4609
|
-
for (const
|
|
4893
|
+
for (const pending of pendingEntries.values()) await backingStore.write(pending.entry, pending.debugKey);
|
|
4610
4894
|
},
|
|
4611
4895
|
getPendingEntries() {
|
|
4612
|
-
return [...pendingEntries.values()];
|
|
4896
|
+
return [...pendingEntries.values()].map((pending) => pending.entry);
|
|
4613
4897
|
}
|
|
4614
4898
|
};
|
|
4615
4899
|
}
|
|
@@ -4661,6 +4945,94 @@ async function writeCacheFile(cacheDir, cacheFile) {
|
|
|
4661
4945
|
await writeFile(tmpPath, JSON.stringify(cacheFile, null, 2));
|
|
4662
4946
|
await rename(tmpPath, filePath);
|
|
4663
4947
|
}
|
|
4948
|
+
async function readDebugKeyFile(debugDir, owner) {
|
|
4949
|
+
return readDebugKeyFilePath(ownerPath(debugDir, owner));
|
|
4950
|
+
}
|
|
4951
|
+
async function readDebugKeyFilePath(filePath) {
|
|
4952
|
+
if (!existsSync(filePath)) return null;
|
|
4953
|
+
const rawResult = await resultify(() => readFile(filePath, "utf-8"));
|
|
4954
|
+
if (rawResult.error) return null;
|
|
4955
|
+
const json = safeJsonParse(rawResult.value);
|
|
4956
|
+
if (json === null) return null;
|
|
4957
|
+
const parsed = cacheDebugKeyFileSchema.safeParse(json);
|
|
4958
|
+
if (!parsed.success) return null;
|
|
4959
|
+
return parsed.data;
|
|
4960
|
+
}
|
|
4961
|
+
async function writeDebugKeyEntry(params) {
|
|
4962
|
+
const { debugDir, entry, debugKey, maxEntries } = params;
|
|
4963
|
+
const owner = ownerFromNamespace(entry.namespace);
|
|
4964
|
+
const filePath = ownerPath(debugDir, owner);
|
|
4965
|
+
await mkdir(debugDir, { recursive: true });
|
|
4966
|
+
await withCacheFileLock(filePath, async () => {
|
|
4967
|
+
const entries = (await readDebugKeyFile(debugDir, owner))?.entries ?? {};
|
|
4968
|
+
const debugEntry = {
|
|
4969
|
+
version: 1,
|
|
4970
|
+
key: entry.key,
|
|
4971
|
+
namespace: entry.namespace,
|
|
4972
|
+
operationType: debugKey.operationType,
|
|
4973
|
+
operationName: debugKey.operationName,
|
|
4974
|
+
storedAt: entry.storedAt,
|
|
4975
|
+
codeFingerprint: debugKey.codeFingerprint,
|
|
4976
|
+
rawKey: debugKey.rawKey
|
|
4977
|
+
};
|
|
4978
|
+
await writeDebugKeyFile(debugDir, {
|
|
4979
|
+
version: 1,
|
|
4980
|
+
owner,
|
|
4981
|
+
entries: pruneDebugKeyEntries({
|
|
4982
|
+
...entries,
|
|
4983
|
+
[entry.key]: debugEntry
|
|
4984
|
+
}, entry.namespace, maxEntries, entry.key)
|
|
4985
|
+
});
|
|
4986
|
+
});
|
|
4987
|
+
}
|
|
4988
|
+
async function clearDebugEntries(debugDir, filter) {
|
|
4989
|
+
if (!existsSync(debugDir)) return;
|
|
4990
|
+
if (filter.namespace !== void 0) {
|
|
4991
|
+
const owner = ownerFromNamespace(filter.namespace);
|
|
4992
|
+
await withCacheFileLock(ownerPath(debugDir, owner), async () => {
|
|
4993
|
+
const debugFile = await readDebugKeyFile(debugDir, owner);
|
|
4994
|
+
if (debugFile === null) return;
|
|
4995
|
+
await writeOrRemoveDebugKeyFile(debugDir, {
|
|
4996
|
+
version: 1,
|
|
4997
|
+
owner,
|
|
4998
|
+
entries: Object.fromEntries(Object.entries(debugFile.entries).filter(([key, entry]) => {
|
|
4999
|
+
if (filter.key !== void 0) return key !== filter.key;
|
|
5000
|
+
return entry.namespace !== filter.namespace;
|
|
5001
|
+
}))
|
|
5002
|
+
});
|
|
5003
|
+
});
|
|
5004
|
+
return;
|
|
5005
|
+
}
|
|
5006
|
+
const files = await readdir(debugDir);
|
|
5007
|
+
for (const fileName of files) {
|
|
5008
|
+
if (!fileName.endsWith(".json")) continue;
|
|
5009
|
+
const filePath = join(debugDir, fileName);
|
|
5010
|
+
await withCacheFileLock(filePath, async () => {
|
|
5011
|
+
const debugFile = await readDebugKeyFilePath(filePath);
|
|
5012
|
+
if (debugFile === null) return;
|
|
5013
|
+
const entries = Object.fromEntries(Object.entries(debugFile.entries).filter(([key]) => key !== filter.key));
|
|
5014
|
+
await writeOrRemoveDebugKeyFile(debugDir, {
|
|
5015
|
+
version: 1,
|
|
5016
|
+
owner: debugFile.owner,
|
|
5017
|
+
entries
|
|
5018
|
+
});
|
|
5019
|
+
});
|
|
5020
|
+
}
|
|
5021
|
+
}
|
|
5022
|
+
async function writeOrRemoveDebugKeyFile(debugDir, debugFile) {
|
|
5023
|
+
if (Object.keys(debugFile.entries).length === 0) {
|
|
5024
|
+
await rm(ownerPath(debugDir, debugFile.owner), { force: true });
|
|
5025
|
+
return;
|
|
5026
|
+
}
|
|
5027
|
+
await writeDebugKeyFile(debugDir, debugFile);
|
|
5028
|
+
}
|
|
5029
|
+
async function writeDebugKeyFile(debugDir, debugFile) {
|
|
5030
|
+
await mkdir(debugDir, { recursive: true });
|
|
5031
|
+
const filePath = ownerPath(debugDir, debugFile.owner);
|
|
5032
|
+
const tmpPath = `${filePath}.${process.pid.toString()}.tmp`;
|
|
5033
|
+
await writeFile(tmpPath, JSON.stringify(debugFile, null, 2));
|
|
5034
|
+
await rename(tmpPath, filePath);
|
|
5035
|
+
}
|
|
4664
5036
|
function pruneEntries(entries, namespace, maxEntries, protectedKey) {
|
|
4665
5037
|
const sorted = Object.values(entries).filter((entry) => entry.namespace === namespace).toSorted((a, b) => a.storedAt < b.storedAt ? 1 : -1);
|
|
4666
5038
|
const kept = /* @__PURE__ */ new Map();
|
|
@@ -4672,6 +5044,17 @@ function pruneEntries(entries, namespace, maxEntries, protectedKey) {
|
|
|
4672
5044
|
}
|
|
4673
5045
|
return Object.fromEntries(Object.values(entries).filter((entry) => entry.namespace !== namespace || kept.has(entry.key)).toSorted((a, b) => a.key < b.key ? -1 : 1).map((entry) => [entry.key, entry]));
|
|
4674
5046
|
}
|
|
5047
|
+
function pruneDebugKeyEntries(entries, namespace, maxEntries, protectedKey) {
|
|
5048
|
+
const sorted = Object.values(entries).filter((entry) => entry.namespace === namespace).toSorted((a, b) => a.storedAt < b.storedAt ? 1 : -1);
|
|
5049
|
+
const kept = /* @__PURE__ */ new Map();
|
|
5050
|
+
const protectedEntry = entries[protectedKey];
|
|
5051
|
+
if (protectedEntry?.namespace === namespace) kept.set(protectedEntry.key, protectedEntry);
|
|
5052
|
+
for (const entry of sorted) {
|
|
5053
|
+
if (kept.size >= maxEntries) break;
|
|
5054
|
+
kept.set(entry.key, entry);
|
|
5055
|
+
}
|
|
5056
|
+
return Object.fromEntries(Object.values(entries).filter((entry) => entry.namespace !== namespace || kept.has(entry.key)).toSorted((a, b) => a.key < b.key ? -1 : 1).map((entry) => [entry.key, entry]));
|
|
5057
|
+
}
|
|
4675
5058
|
async function withCacheFileLock(filePath, fn) {
|
|
4676
5059
|
const lockPath = `${filePath}.lock`;
|
|
4677
5060
|
await acquireLock(lockPath);
|
|
@@ -4893,7 +5276,8 @@ const defaultConfig = {
|
|
|
4893
5276
|
label: "Output",
|
|
4894
5277
|
format: "json",
|
|
4895
5278
|
placements: ["section"]
|
|
4896
|
-
}] }
|
|
5279
|
+
}] },
|
|
5280
|
+
runLogs: { captureConsole: true }
|
|
4897
5281
|
};
|
|
4898
5282
|
async function loadConfig() {
|
|
4899
5283
|
const configPath = resolve(process.cwd(), "agent-evals.config.ts");
|
|
@@ -5657,6 +6041,10 @@ async function runCase(params) {
|
|
|
5657
6041
|
} : void 0
|
|
5658
6042
|
});
|
|
5659
6043
|
const { trace, traceDisplay } = resolveTracePresentation(scoreRun.scope.spans, globalTraceDisplay, evalDef.traceDisplay);
|
|
6044
|
+
scope.logs.push(...scoreRun.scope.logs.map((entry) => ({
|
|
6045
|
+
...entry,
|
|
6046
|
+
source: key
|
|
6047
|
+
})));
|
|
5660
6048
|
if (trace.length > 0) scoringTraces[key] = {
|
|
5661
6049
|
trace,
|
|
5662
6050
|
traceDisplay
|
|
@@ -5727,6 +6115,7 @@ async function runCase(params) {
|
|
|
5727
6115
|
traceDisplay,
|
|
5728
6116
|
columns,
|
|
5729
6117
|
assertionFailures: scope.assertionFailures,
|
|
6118
|
+
logs: scope.logs,
|
|
5730
6119
|
error: errorInfo,
|
|
5731
6120
|
trial,
|
|
5732
6121
|
cacheRefs: scope.caseCacheRefs
|
|
@@ -6142,4 +6531,4 @@ function toLastRunStatus(status) {
|
|
|
6142
6531
|
return status === "pending" ? null : status;
|
|
6143
6532
|
}
|
|
6144
6533
|
//#endregion
|
|
6145
|
-
export {
|
|
6534
|
+
export { caseDetailSchema as $, buildTraceTree as $t, getEvalTitle as A, serializedCacheSpanSchema as At, apiCallMetricFormatSchema as B, traceSpanKindSchema as Bt, createRunRequestSchema as C, setEvalOutput as Cn, cacheFileSchema as Ct, extractApiCalls as D, defineEval as Dn, cacheRecordingOpSchema as Dt, extractCacheHits as E, repoFile as En, cacheOperationTypeSchema as Et, runManifestSchema as F, traceAttributeDisplayPlacementSchema as Ft, llmCallMetricPlacementSchema as G, columnFormatSchema as Gt, apiCallMetricSchema as H, traceSpanWarningSchema as Ht, runSummarySchema as I, traceAttributeDisplaySchema as It, resolveApiCallsConfig as J, jsonCellSchema as Jt, llmCallMetricSchema as K, columnKindSchema as Kt, DEFAULT_API_CALLS_CONFIG as L, traceDisplayConfigSchema as Lt, deriveScopedSummaryFromCases as M, traceCacheRefSchema as Mt, deriveStatusFromCaseRows as N, traceAttributeDisplayFormatSchema as Nt, extractLlmCalls as O, getEvalRegistry as On, cacheRecordingSchema as Ot, deriveStatusFromChildStatuses as P, traceAttributeDisplayInputSchema as Pt, assertionFailureSchema as Q, z$1 as Qt, DEFAULT_LLM_CALLS_CONFIG as R, traceDisplayInputConfigSchema as Rt, createFsCacheStore as S, runInExistingEvalScope as Sn, cacheEntryWithDebugKeySchema as St, sseEnvelopeSchema as T, startEvalBackgroundJob as Tn, cacheModeSchema as Tt, apiCallsConfigSchema as U, cellValueSchema as Ut, apiCallMetricPlacementSchema as V, traceSpanSchema as Vt, llmCallMetricFormatSchema as W, columnDefSchema as Wt, runLogsConfigSchema as X, repoFileRefSchema as Xt, resolveLlmCallsConfig as Y, numberDisplayOptionsSchema as Yt, trialSelectionModeSchema as Z, runArtifactRefSchema as Zt, loadEvalModule as _, isInEvalScope as _n, evalChartTypeSchema as _t, loadPersistedRunSnapshot as a, deserializeCacheRecording as an, evalSummarySchema as at, buildDeclaredColumnDefs as b, runInEvalRuntimeScope as bn, cacheDebugKeyFileSchema as bt, persistCaseDetail as c, serializeCacheValue as cn, runLogPhaseSchema as ct, recomputePersistedCaseStatus as d, configureEvalRunLogs as dn, evalChartAxisSchema as dt, captureEvalSpanError as en, caseRowSchema as et, runTouchesEval as f, evalAssert as fn, evalChartBuiltinMetricSchema as ft, setLatestRunInfoMap as g, incrementEvalOutput as gn, evalChartTooltipExtraSchema as gt, getTargetEvalIds as h, getEvalCaseInput as hn, evalChartMetricSchema as ht, getLatestRunInfos as i, hashCacheKeySync as in, evalStatsConfigSchema as it, getEvalDisplayStatus as j, spanCacheOptionsSchema as jt, getNestedAttribute as k, cacheStatusSchema as kt, persistRunState as l, EvalAssertionError as ln, scoreTraceSchema as lt, buildEvalSummary as m, getCurrentScope as mn, evalChartConfigSchema as mt, generateRunId as n, evalTracer as nn, evalStatAggregateSchema as nt, loadPersistedRunSnapshots as o, deserializeCacheValue as on, runLogEntrySchema as ot, resolveArtifactPath as p, evalLog as pn, evalChartColorSchema as pt, llmCallsConfigSchema as q, fileRefSchema as qt, getLastRunStatuses as r, hashCacheKey as rn, evalStatItemSchema as rt, nextShortIdFromSnapshots as s, serializeCacheRecording as sn, runLogLevelSchema as st, executeRun as t, evalSpan as tn, evalFreshnessStatusSchema as tt, recomputeEvalStatusesInRuns as u, appendToEvalOutput as un, evalChartAggregateSchema as ut, parseEvalMetas as v, mergeEvalOutput as vn, evalChartsConfigSchema as vt, updateManualScoreRequestSchema as w, setScopeCacheContext as wn, cacheListItemSchema as wt, normalizeScoreDef as x, runInEvalScope as xn, cacheEntrySchema as xt, loadConfig as y, nextEvalId as yn, cacheDebugKeyEntrySchema as yt, agentEvalsConfigSchema as z, traceSpanErrorSchema as zt };
|