@ls-stack/agent-eval 0.55.0 → 0.55.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-CunZ8Dku.mjs → app-NI4to6lp.mjs} +87 -4
- package/dist/apps/web/dist/assets/{index-CvsPmlHl.js → index-C7QjETk8.js} +54 -54
- package/dist/apps/web/dist/index.html +1 -1
- package/dist/bin.mjs +1 -1
- package/dist/caseChild.mjs +167 -0
- package/dist/{cli-rvPrUj6S.mjs → cli-Bu9347r1.mjs} +60 -12
- package/dist/index.d.mts +151 -138
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +3 -1
- package/dist/{runOrchestration-BWyE5lRX.mjs → runExecution-C31dpemR.mjs} +394 -1609
- package/dist/runOrchestration-3RoHLW4U.mjs +1596 -0
- package/dist/{runner-CFQ8LZmY.mjs → runner-B4EfMn1d.mjs} +2 -2
- package/dist/{runner-C2fvjKZP.mjs → runner-CTp9zHbM.mjs} +1 -1
- package/dist/{src-DEENkbkn.mjs → src-FR60ZR_4.mjs} +2 -2
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +19 -8
|
@@ -10,7 +10,7 @@ import { createHash, randomUUID } from "node:crypto";
|
|
|
10
10
|
import { getCompositeKey } from "@ls-stack/utils/getCompositeKey";
|
|
11
11
|
import { existsSync } from "node:fs";
|
|
12
12
|
import { brotliCompressSync, brotliDecompressSync } from "node:zlib";
|
|
13
|
-
import {
|
|
13
|
+
import { resultify } from "t-result";
|
|
14
14
|
import { fileURLToPath, pathToFileURL } from "node:url";
|
|
15
15
|
//#region ../sdk/src/defineEval.ts
|
|
16
16
|
const evalRegistry = /* @__PURE__ */ new Map();
|
|
@@ -275,12 +275,15 @@ const traceCacheRefSchema = z.object({
|
|
|
275
275
|
z.object({
|
|
276
276
|
key: z.string(),
|
|
277
277
|
namespace: z.string(),
|
|
278
|
-
operationType: cacheOperationTypeSchema,
|
|
279
|
-
operationName: z.string(),
|
|
280
|
-
spanName: z.string().optional(),
|
|
281
|
-
spanKind: traceSpanKindSchema.optional(),
|
|
282
278
|
storedAt: z.string(),
|
|
283
|
-
|
|
279
|
+
lastAccessedAt: z.string()
|
|
280
|
+
});
|
|
281
|
+
z.object({
|
|
282
|
+
removedCacheFiles: z.number(),
|
|
283
|
+
removedDebugFiles: z.number(),
|
|
284
|
+
removedBlobFiles: z.number(),
|
|
285
|
+
removedIndexRows: z.number(),
|
|
286
|
+
rewrittenIndexes: z.number()
|
|
284
287
|
});
|
|
285
288
|
/** Zod schema for `SerializedCacheSpan`, defined lazily for recursion. */
|
|
286
289
|
const serializedCacheSpanSchema = z.object({
|
|
@@ -1377,6 +1380,7 @@ const agentEvalsConfigSchema = z.object({
|
|
|
1377
1380
|
dir: z.string().optional(),
|
|
1378
1381
|
maxEntriesPerNamespace: z.preprocess((value) => typeof value === "number" && Number.isFinite(value) ? value : void 0, z.number().optional()),
|
|
1379
1382
|
maxEntriesByNamespace: z.record(z.string(), z.number()).optional(),
|
|
1383
|
+
pruneIdleDelayMs: z.preprocess((value) => typeof value === "number" && Number.isFinite(value) ? value : void 0, z.number().optional()),
|
|
1380
1384
|
maxEntriesPerEval: z.preprocess((value) => typeof value === "number" && Number.isFinite(value) ? value : void 0, z.number().optional())
|
|
1381
1385
|
}).optional()
|
|
1382
1386
|
});
|
|
@@ -1692,10 +1696,10 @@ function evaluateTagExpression(expression, tags) {
|
|
|
1692
1696
|
}
|
|
1693
1697
|
function tagMatchesPattern(tag, pattern) {
|
|
1694
1698
|
if (!pattern.includes("*")) return tag === pattern;
|
|
1695
|
-
const source = pattern.split("*").map(escapeRegex
|
|
1699
|
+
const source = pattern.split("*").map(escapeRegex).join(".*");
|
|
1696
1700
|
return new RegExp(`^${source}$`).test(tag);
|
|
1697
1701
|
}
|
|
1698
|
-
function escapeRegex
|
|
1702
|
+
function escapeRegex(value) {
|
|
1699
1703
|
return value.replace(/[|\\{}()[\]^$+?.]/g, "\\$&");
|
|
1700
1704
|
}
|
|
1701
1705
|
//#endregion
|
|
@@ -4926,8 +4930,13 @@ const cacheSerializationMarker = "__aecs";
|
|
|
4926
4930
|
const supportedCacheSerializationPrefix = "v1:";
|
|
4927
4931
|
const externalJsonCacheSerializationMarker = "v1:ExternalJson";
|
|
4928
4932
|
const externalJsonBlobExtension = ".json.br";
|
|
4933
|
+
const externalJsonBlobDirName = "cache-blobs";
|
|
4929
4934
|
const cacheEntryExtension = ".json.br";
|
|
4930
4935
|
const debugEntryExtension = ".json";
|
|
4936
|
+
const cacheIndexFilePrefix = ".index-";
|
|
4937
|
+
async function commitPendingCacheWrites(params) {
|
|
4938
|
+
for (const pendingWrite of params.pendingWrites) await params.backingStore.write(pendingWrite.entry, pendingWrite.debugKey);
|
|
4939
|
+
}
|
|
4931
4940
|
/**
|
|
4932
4941
|
* Create a filesystem-backed cache adapter rooted at `<workspaceRoot>/<dir>`.
|
|
4933
4942
|
*
|
|
@@ -4938,8 +4947,14 @@ const debugEntryExtension = ".json";
|
|
|
4938
4947
|
function createFsCacheStore(options) {
|
|
4939
4948
|
const cacheDir = resolve(options.workspaceRoot, options.dir ?? ".agent-evals/cache");
|
|
4940
4949
|
const debugDir = resolve(options.workspaceRoot, options.debugDir ?? ".agent-evals/cache-debug");
|
|
4941
|
-
const blobDir = resolve(options.workspaceRoot, options.blobDir
|
|
4942
|
-
const
|
|
4950
|
+
const blobDir = options.blobDir === void 0 ? join(cacheDir, externalJsonBlobDirName) : resolve(options.workspaceRoot, options.blobDir);
|
|
4951
|
+
const legacyBlobDir = resolve(options.workspaceRoot, ".agent-evals/cache-blobs");
|
|
4952
|
+
const fallbackBlobDirs = options.blobDir === void 0 && legacyBlobDir !== blobDir ? [legacyBlobDir] : [];
|
|
4953
|
+
const blobDirs = [blobDir, ...fallbackBlobDirs];
|
|
4954
|
+
const externalJsonStore = createExternalJsonBlobStore({
|
|
4955
|
+
fallbackDirs: fallbackBlobDirs,
|
|
4956
|
+
primaryDir: blobDir
|
|
4957
|
+
});
|
|
4943
4958
|
const defaultMaxEntries = normalizeMaxEntries(options.maxEntriesPerNamespace);
|
|
4944
4959
|
return {
|
|
4945
4960
|
externalJsonStore,
|
|
@@ -4953,11 +4968,22 @@ function createFsCacheStore(options) {
|
|
|
4953
4968
|
return blobDir;
|
|
4954
4969
|
},
|
|
4955
4970
|
async lookup(namespace, keyHash) {
|
|
4956
|
-
const entry = await
|
|
4957
|
-
|
|
4971
|
+
const entry = await readIndexedCacheEntry({
|
|
4972
|
+
cacheDir,
|
|
4973
|
+
key: keyHash,
|
|
4974
|
+
namespace
|
|
4975
|
+
});
|
|
4976
|
+
if (entry === null) return null;
|
|
4977
|
+
const materialized = await materializeExternalJsonCacheEntryOrNull(entry, externalJsonStore);
|
|
4978
|
+
if (materialized !== null) await updateCacheIndexLastAccessedAt(cacheDir, namespace, keyHash);
|
|
4979
|
+
return materialized;
|
|
4958
4980
|
},
|
|
4959
4981
|
async lookupWithDebug(namespace, keyHash) {
|
|
4960
|
-
const rawEntry = await
|
|
4982
|
+
const rawEntry = await readIndexedCacheEntry({
|
|
4983
|
+
cacheDir,
|
|
4984
|
+
key: keyHash,
|
|
4985
|
+
namespace
|
|
4986
|
+
});
|
|
4961
4987
|
if (rawEntry === null) return null;
|
|
4962
4988
|
const entry = await materializeExternalJsonCacheEntryOrNull(rawEntry, externalJsonStore);
|
|
4963
4989
|
if (entry === null) return null;
|
|
@@ -4972,8 +4998,17 @@ function createFsCacheStore(options) {
|
|
|
4972
4998
|
};
|
|
4973
4999
|
},
|
|
4974
5000
|
async write(entry, debugKey) {
|
|
4975
|
-
|
|
4976
|
-
|
|
5001
|
+
await withCacheFileLock(namespaceLockPath(cacheDir, entry.namespace), async () => {
|
|
5002
|
+
await writeCompressedCacheEntry(cacheDir, entry);
|
|
5003
|
+
if (!usesSupportedCacheSerialization(entry.recording)) return;
|
|
5004
|
+
const index = await readNamespaceIndex(cacheDir, entry.namespace);
|
|
5005
|
+
index.entries[entry.key] = {
|
|
5006
|
+
storedAt: entry.storedAt,
|
|
5007
|
+
lastAccessedAt: entry.storedAt,
|
|
5008
|
+
blobRefs: await collectExternalJsonBlobRefs(entry, blobDirs)
|
|
5009
|
+
};
|
|
5010
|
+
await writeNamespaceIndex(cacheDir, index);
|
|
5011
|
+
});
|
|
4977
5012
|
if (debugKey !== void 0) {
|
|
4978
5013
|
if ((await resultify(() => writeDebugKeyEntry({
|
|
4979
5014
|
debugDir,
|
|
@@ -4984,36 +5019,11 @@ function createFsCacheStore(options) {
|
|
|
4984
5019
|
key: entry.key
|
|
4985
5020
|
}));
|
|
4986
5021
|
}
|
|
4987
|
-
await pruneEntriesForNamespace({
|
|
4988
|
-
cacheDir,
|
|
4989
|
-
debugDir,
|
|
4990
|
-
namespace: entry.namespace,
|
|
4991
|
-
maxEntries,
|
|
4992
|
-
protectedKey: entry.key
|
|
4993
|
-
});
|
|
4994
|
-
await pruneExternalJsonBlobs(cacheDir, blobDir);
|
|
4995
5022
|
},
|
|
4996
5023
|
async list() {
|
|
4997
|
-
const files = await listCacheEntryFiles(cacheDir);
|
|
4998
5024
|
const items = [];
|
|
4999
|
-
for (const
|
|
5000
|
-
|
|
5001
|
-
if (fileEntry === null || !entryMatchesPath(filePath, fileEntry.entry)) continue;
|
|
5002
|
-
const entry = fileEntry.entry;
|
|
5003
|
-
const operationType = entry.operationType ?? "span";
|
|
5004
|
-
const operationName = entry.operationName ?? entry.spanName ?? entry.namespace;
|
|
5005
|
-
items.push({
|
|
5006
|
-
key: entry.key,
|
|
5007
|
-
namespace: entry.namespace,
|
|
5008
|
-
operationType,
|
|
5009
|
-
operationName,
|
|
5010
|
-
spanName: entry.spanName,
|
|
5011
|
-
spanKind: entry.spanKind,
|
|
5012
|
-
storedAt: entry.storedAt,
|
|
5013
|
-
sizeBytes: fileEntry.sizeBytes
|
|
5014
|
-
});
|
|
5015
|
-
}
|
|
5016
|
-
items.sort((a, b) => a.storedAt < b.storedAt ? 1 : -1);
|
|
5025
|
+
for (const index of await listCacheIndexes(cacheDir)) for (const [key, entry] of Object.entries(index.entries)) items.push(toCacheListItem(index.namespace, key, entry));
|
|
5026
|
+
items.sort((a, b) => a.lastAccessedAt < b.lastAccessedAt ? 1 : -1);
|
|
5017
5027
|
return items;
|
|
5018
5028
|
},
|
|
5019
5029
|
async clear(filter) {
|
|
@@ -5026,21 +5036,46 @@ function createFsCacheStore(options) {
|
|
|
5026
5036
|
recursive: true,
|
|
5027
5037
|
force: true
|
|
5028
5038
|
});
|
|
5029
|
-
await rm(
|
|
5039
|
+
await Promise.all(blobDirs.map((dir) => rm(dir, {
|
|
5030
5040
|
recursive: true,
|
|
5031
5041
|
force: true
|
|
5032
|
-
});
|
|
5042
|
+
})));
|
|
5033
5043
|
return;
|
|
5034
5044
|
}
|
|
5035
5045
|
if (filter.namespace !== void 0) {
|
|
5036
5046
|
await clearCacheEntries(cacheDir, filter);
|
|
5037
5047
|
await clearDebugEntries(debugDir, filter);
|
|
5038
|
-
await
|
|
5048
|
+
await pruneUnreferencedExternalJsonBlobs(cacheDir, blobDirs);
|
|
5039
5049
|
return;
|
|
5040
5050
|
}
|
|
5041
5051
|
await clearCacheEntries(cacheDir, filter);
|
|
5042
5052
|
await clearDebugEntries(debugDir, filter);
|
|
5043
|
-
await
|
|
5053
|
+
await pruneUnreferencedExternalJsonBlobs(cacheDir, blobDirs);
|
|
5054
|
+
},
|
|
5055
|
+
async pruneExternalJsonBlobs() {
|
|
5056
|
+
await pruneUnreferencedExternalJsonBlobs(cacheDir, blobDirs);
|
|
5057
|
+
},
|
|
5058
|
+
async pruneRetention() {
|
|
5059
|
+
for (const index_ of await listCacheIndexes(cacheDir)) {
|
|
5060
|
+
const namespace = index_.namespace;
|
|
5061
|
+
const maxEntries = maxEntriesForNamespace(namespace, defaultMaxEntries, options.maxEntriesByNamespace);
|
|
5062
|
+
const keptKeys = await withCacheFileLock(namespaceLockPath(cacheDir, namespace), async () => {
|
|
5063
|
+
return pruneCacheEntriesForNamespace({
|
|
5064
|
+
cacheDir,
|
|
5065
|
+
index: await readNamespaceIndex(cacheDir, namespace),
|
|
5066
|
+
maxEntries
|
|
5067
|
+
});
|
|
5068
|
+
});
|
|
5069
|
+
await withCacheFileLock(namespaceLockPath(debugDir, namespace), () => pruneDebugEntriesForNamespace(debugDir, namespace, keptKeys));
|
|
5070
|
+
}
|
|
5071
|
+
await pruneUnreferencedExternalJsonBlobs(cacheDir, blobDirs);
|
|
5072
|
+
},
|
|
5073
|
+
async repair() {
|
|
5074
|
+
return repairIndexedCache({
|
|
5075
|
+
blobDirs,
|
|
5076
|
+
cacheDir,
|
|
5077
|
+
debugDir
|
|
5078
|
+
});
|
|
5044
5079
|
}
|
|
5045
5080
|
};
|
|
5046
5081
|
}
|
|
@@ -5068,10 +5103,16 @@ function createBufferedCacheStore(backingStore) {
|
|
|
5068
5103
|
return Promise.resolve();
|
|
5069
5104
|
},
|
|
5070
5105
|
async commit() {
|
|
5071
|
-
|
|
5106
|
+
await commitPendingCacheWrites({
|
|
5107
|
+
backingStore,
|
|
5108
|
+
pendingWrites: [...pendingEntries.values()].map((pending) => ({ ...pending }))
|
|
5109
|
+
});
|
|
5072
5110
|
},
|
|
5073
5111
|
getPendingEntries() {
|
|
5074
5112
|
return [...pendingEntries.values()].map((pending) => pending.entry);
|
|
5113
|
+
},
|
|
5114
|
+
getPendingWrites() {
|
|
5115
|
+
return [...pendingEntries.values()].map((pending) => ({ ...pending }));
|
|
5075
5116
|
}
|
|
5076
5117
|
};
|
|
5077
5118
|
}
|
|
@@ -5117,11 +5158,31 @@ function entryPath(params) {
|
|
|
5117
5158
|
if (filePath !== namespaceDir && !filePath.startsWith(`${namespaceDir}${sep}`)) throw new Error(`Cache entry key escapes namespace directory: ${params.key}`);
|
|
5118
5159
|
return filePath;
|
|
5119
5160
|
}
|
|
5120
|
-
|
|
5121
|
-
return (
|
|
5122
|
-
|
|
5123
|
-
|
|
5124
|
-
|
|
5161
|
+
function cacheIndexPath(cacheDir, namespace) {
|
|
5162
|
+
return join(namespaceDirPath(cacheDir, namespace), `${cacheIndexFilePrefix}${hashNamespace(namespace)}${debugEntryExtension}`);
|
|
5163
|
+
}
|
|
5164
|
+
async function readIndexedCacheEntry(params) {
|
|
5165
|
+
return withCacheFileLock(namespaceLockPath(params.cacheDir, params.namespace), async () => {
|
|
5166
|
+
if ((await readNamespaceIndex(params.cacheDir, params.namespace)).entries[params.key] === void 0) return null;
|
|
5167
|
+
const fileEntry = await readCacheEntryFilePath(cacheEntryPath(params.cacheDir, params.namespace, params.key), {
|
|
5168
|
+
namespace: params.namespace,
|
|
5169
|
+
key: params.key
|
|
5170
|
+
});
|
|
5171
|
+
if (fileEntry === null) return null;
|
|
5172
|
+
return fileEntry.entry;
|
|
5173
|
+
});
|
|
5174
|
+
}
|
|
5175
|
+
async function updateCacheIndexLastAccessedAt(cacheDir, namespace, key) {
|
|
5176
|
+
await withCacheFileLock(namespaceLockPath(cacheDir, namespace), async () => {
|
|
5177
|
+
const index = await readNamespaceIndex(cacheDir, namespace);
|
|
5178
|
+
const entry = index.entries[key];
|
|
5179
|
+
if (entry === void 0) return;
|
|
5180
|
+
index.entries[key] = {
|
|
5181
|
+
...entry,
|
|
5182
|
+
lastAccessedAt: new Date(getRealDateNowMs()).toISOString()
|
|
5183
|
+
};
|
|
5184
|
+
await writeNamespaceIndex(cacheDir, index);
|
|
5185
|
+
});
|
|
5125
5186
|
}
|
|
5126
5187
|
async function readCacheEntryFilePath(filePath, expected) {
|
|
5127
5188
|
if (!existsSync(filePath)) return null;
|
|
@@ -5136,10 +5197,7 @@ async function readCacheEntryFilePath(filePath, expected) {
|
|
|
5136
5197
|
const entry = parsed.data;
|
|
5137
5198
|
if (!usesSupportedCacheSerialization(entry.recording)) return null;
|
|
5138
5199
|
if (expected !== void 0 && (entry.namespace !== expected.namespace || entry.key !== expected.key)) return null;
|
|
5139
|
-
return {
|
|
5140
|
-
entry,
|
|
5141
|
-
sizeBytes: compressedResult.value.byteLength
|
|
5142
|
-
};
|
|
5200
|
+
return { entry };
|
|
5143
5201
|
}
|
|
5144
5202
|
async function writeCompressedCacheEntry(cacheDir, entry) {
|
|
5145
5203
|
const filePath = cacheEntryPath(cacheDir, entry.namespace, entry.key);
|
|
@@ -5188,23 +5246,132 @@ async function writeAtomicFile(filePath, contents) {
|
|
|
5188
5246
|
await writeFile(tmpPath, contents);
|
|
5189
5247
|
await rename(tmpPath, filePath);
|
|
5190
5248
|
}
|
|
5249
|
+
const emptyCacheIndex = (namespace) => ({
|
|
5250
|
+
version: 1,
|
|
5251
|
+
namespace,
|
|
5252
|
+
entries: {}
|
|
5253
|
+
});
|
|
5254
|
+
async function readNamespaceIndex(cacheDir, namespace) {
|
|
5255
|
+
const indexPath = cacheIndexPath(cacheDir, namespace);
|
|
5256
|
+
if (!existsSync(indexPath)) return emptyCacheIndex(namespace);
|
|
5257
|
+
const rawResult = await resultify(() => readFile(indexPath, "utf8"));
|
|
5258
|
+
if (rawResult.error) return emptyCacheIndex(namespace);
|
|
5259
|
+
return parseCacheIndexFile(safeJsonParse(rawResult.value), namespace) ?? emptyCacheIndex(namespace);
|
|
5260
|
+
}
|
|
5261
|
+
async function writeNamespaceIndex(cacheDir, index) {
|
|
5262
|
+
const entries = Object.entries(index.entries);
|
|
5263
|
+
if (entries.length === 0) {
|
|
5264
|
+
await rm(cacheIndexPath(cacheDir, index.namespace), { force: true });
|
|
5265
|
+
await removeDirIfEmpty(namespaceDirPath(cacheDir, index.namespace));
|
|
5266
|
+
return;
|
|
5267
|
+
}
|
|
5268
|
+
const sortedEntries = entries.toSorted(([a], [b]) => a < b ? -1 : 1);
|
|
5269
|
+
const normalizedEntries = Object.fromEntries(sortedEntries.map(([key, entry]) => [key, entry]));
|
|
5270
|
+
await writeAtomicFile(cacheIndexPath(cacheDir, index.namespace), JSON.stringify({
|
|
5271
|
+
...index,
|
|
5272
|
+
entries: normalizedEntries
|
|
5273
|
+
}, null, 2));
|
|
5274
|
+
}
|
|
5275
|
+
async function listCacheIndexes(cacheDir) {
|
|
5276
|
+
if (!existsSync(cacheDir)) return [];
|
|
5277
|
+
const entriesResult = await resultify(() => readdir(cacheDir, { withFileTypes: true }));
|
|
5278
|
+
if (entriesResult.error) return [];
|
|
5279
|
+
const records = [];
|
|
5280
|
+
for (const entry of entriesResult.value) {
|
|
5281
|
+
if (!entry.isDirectory()) continue;
|
|
5282
|
+
const namespaceDir = join(cacheDir, entry.name);
|
|
5283
|
+
for (const indexFilePath of await listCacheIndexFiles(namespaceDir)) {
|
|
5284
|
+
const rawResult = await resultify(() => readFile(indexFilePath, "utf8"));
|
|
5285
|
+
if (rawResult.error) continue;
|
|
5286
|
+
const parsed = parseCacheIndexFile(safeJsonParse(rawResult.value));
|
|
5287
|
+
if (parsed === null) continue;
|
|
5288
|
+
records.push(parsed);
|
|
5289
|
+
}
|
|
5290
|
+
}
|
|
5291
|
+
return records;
|
|
5292
|
+
}
|
|
5293
|
+
function hashNamespace(namespace) {
|
|
5294
|
+
return createHash("sha256").update(namespace).digest("hex");
|
|
5295
|
+
}
|
|
5296
|
+
function parseCacheIndexFile(value, expectedNamespace) {
|
|
5297
|
+
if (!isRecordLike(value)) return null;
|
|
5298
|
+
if (value.version !== 1 || typeof value.namespace !== "string") return null;
|
|
5299
|
+
if (expectedNamespace !== void 0 && value.namespace !== expectedNamespace) return null;
|
|
5300
|
+
if (!isRecordLike(value.entries)) return null;
|
|
5301
|
+
const entries = {};
|
|
5302
|
+
for (const [key, entryValue] of Object.entries(value.entries)) {
|
|
5303
|
+
const entry = parseCacheIndexEntry(entryValue);
|
|
5304
|
+
if (entry === null) return null;
|
|
5305
|
+
entries[key] = entry;
|
|
5306
|
+
}
|
|
5307
|
+
return {
|
|
5308
|
+
version: 1,
|
|
5309
|
+
namespace: value.namespace,
|
|
5310
|
+
entries
|
|
5311
|
+
};
|
|
5312
|
+
}
|
|
5313
|
+
function parseCacheIndexEntry(value) {
|
|
5314
|
+
if (!isRecordLike(value)) return null;
|
|
5315
|
+
if (typeof value.storedAt !== "string" || typeof value.lastAccessedAt !== "string") return null;
|
|
5316
|
+
if (!Array.isArray(value.blobRefs)) return null;
|
|
5317
|
+
const blobRefs = [];
|
|
5318
|
+
for (const blobRef of value.blobRefs) {
|
|
5319
|
+
if (typeof blobRef !== "string") return null;
|
|
5320
|
+
blobRefs.push(blobRef);
|
|
5321
|
+
}
|
|
5322
|
+
return {
|
|
5323
|
+
storedAt: value.storedAt,
|
|
5324
|
+
lastAccessedAt: value.lastAccessedAt,
|
|
5325
|
+
blobRefs
|
|
5326
|
+
};
|
|
5327
|
+
}
|
|
5328
|
+
function toCacheListItem(namespace, key, entry) {
|
|
5329
|
+
return {
|
|
5330
|
+
key,
|
|
5331
|
+
namespace,
|
|
5332
|
+
storedAt: entry.storedAt,
|
|
5333
|
+
lastAccessedAt: entry.lastAccessedAt
|
|
5334
|
+
};
|
|
5335
|
+
}
|
|
5336
|
+
function keyFromEntryFilePath(filePath, extension) {
|
|
5337
|
+
const name = basename(filePath);
|
|
5338
|
+
if (!name.endsWith(extension)) return null;
|
|
5339
|
+
return name.slice(0, -extension.length);
|
|
5340
|
+
}
|
|
5341
|
+
function debugNamespaceFromPath(debugDir, filePath) {
|
|
5342
|
+
return basename(dirname(resolve(debugDir, relative(debugDir, filePath))));
|
|
5343
|
+
}
|
|
5191
5344
|
async function clearCacheEntries(cacheDir, filter) {
|
|
5192
|
-
const
|
|
5193
|
-
for (const
|
|
5194
|
-
const
|
|
5195
|
-
|
|
5196
|
-
|
|
5197
|
-
|
|
5198
|
-
|
|
5345
|
+
const indexes = filter.namespace === void 0 ? await listCacheIndexes(cacheDir) : [await readNamespaceIndex(cacheDir, filter.namespace)];
|
|
5346
|
+
for (const record of indexes) {
|
|
5347
|
+
const namespace = record.namespace;
|
|
5348
|
+
await withCacheFileLock(namespaceLockPath(cacheDir, namespace), async () => {
|
|
5349
|
+
const index = await readNamespaceIndex(cacheDir, namespace);
|
|
5350
|
+
const matchingKeys = Object.keys(index.entries).filter((key) => {
|
|
5351
|
+
return index.entries[key] !== void 0 && entryMatchesFilter({
|
|
5352
|
+
namespace,
|
|
5353
|
+
key
|
|
5354
|
+
}, filter);
|
|
5355
|
+
});
|
|
5356
|
+
for (const key of matchingKeys) {
|
|
5357
|
+
await rm(cacheEntryPath(cacheDir, namespace, key), { force: true });
|
|
5358
|
+
delete index.entries[key];
|
|
5359
|
+
}
|
|
5360
|
+
await writeNamespaceIndex(cacheDir, index);
|
|
5361
|
+
});
|
|
5199
5362
|
}
|
|
5200
|
-
if (filter.namespace !== void 0) await removeDirIfEmpty(namespaceDirPath(cacheDir, filter.namespace));
|
|
5201
5363
|
}
|
|
5202
5364
|
async function clearDebugEntries(debugDir, filter) {
|
|
5203
|
-
const files = filter.namespace === void 0 ?
|
|
5365
|
+
const files = await listDebugEntryFiles(filter.namespace === void 0 ? debugDir : namespaceDirPath(debugDir, filter.namespace));
|
|
5204
5366
|
for (const filePath of files) {
|
|
5205
|
-
const
|
|
5206
|
-
|
|
5207
|
-
|
|
5367
|
+
const namespace = filter.namespace === void 0 ? debugNamespaceFromPath(debugDir, filePath) : filter.namespace;
|
|
5368
|
+
const key = keyFromEntryFilePath(filePath, debugEntryExtension);
|
|
5369
|
+
if (key === null) continue;
|
|
5370
|
+
if (!entryMatchesFilter({
|
|
5371
|
+
namespace,
|
|
5372
|
+
key
|
|
5373
|
+
}, filter)) continue;
|
|
5374
|
+
await withCacheFileLock(namespaceLockPath(debugDir, namespace), () => rm(filePath, { force: true }));
|
|
5208
5375
|
}
|
|
5209
5376
|
if (filter.namespace !== void 0) await removeDirIfEmpty(namespaceDirPath(debugDir, filter.namespace));
|
|
5210
5377
|
}
|
|
@@ -5212,49 +5379,86 @@ function entryMatchesFilter(entry, filter) {
|
|
|
5212
5379
|
if (filter.namespace !== void 0 && entry.namespace !== filter.namespace) return false;
|
|
5213
5380
|
return filter.key === void 0 || entry.key === filter.key;
|
|
5214
5381
|
}
|
|
5215
|
-
async function
|
|
5216
|
-
const { cacheDir,
|
|
5217
|
-
|
|
5218
|
-
|
|
5219
|
-
await withCacheFileLock(namespaceLockPath(debugDir, namespace), () => pruneDebugEntriesForNamespace(debugDir, namespace, keptKeys));
|
|
5220
|
-
});
|
|
5221
|
-
}
|
|
5222
|
-
async function pruneCacheEntriesForNamespace(cacheDir, namespace, maxEntries, protectedKey) {
|
|
5223
|
-
const entries = await listCacheEntriesForNamespace(cacheDir, namespace);
|
|
5224
|
-
const sorted = entries.toSorted((a, b) => a.entry.storedAt < b.entry.storedAt ? 1 : -1);
|
|
5382
|
+
async function pruneCacheEntriesForNamespace(params) {
|
|
5383
|
+
const { cacheDir, index, maxEntries } = params;
|
|
5384
|
+
const entries = Object.entries(index.entries);
|
|
5385
|
+
const sorted = entries.toSorted(([, a], [, b]) => a.lastAccessedAt < b.lastAccessedAt ? 1 : -1);
|
|
5225
5386
|
const keptKeys = /* @__PURE__ */ new Set();
|
|
5226
|
-
|
|
5227
|
-
if (protectedEntry !== void 0) keptKeys.add(protectedEntry.entry.key);
|
|
5228
|
-
for (const item of sorted) {
|
|
5387
|
+
for (const [key] of sorted) {
|
|
5229
5388
|
if (keptKeys.size >= maxEntries) break;
|
|
5230
|
-
keptKeys.add(
|
|
5389
|
+
keptKeys.add(key);
|
|
5390
|
+
}
|
|
5391
|
+
for (const [key] of entries) if (!keptKeys.has(key)) {
|
|
5392
|
+
await rm(cacheEntryPath(cacheDir, index.namespace, key), { force: true });
|
|
5393
|
+
delete index.entries[key];
|
|
5231
5394
|
}
|
|
5232
|
-
|
|
5233
|
-
await removeDirIfEmpty(namespaceDirPath(cacheDir, namespace));
|
|
5395
|
+
await writeNamespaceIndex(cacheDir, index);
|
|
5234
5396
|
return keptKeys;
|
|
5235
5397
|
}
|
|
5236
5398
|
async function pruneDebugEntriesForNamespace(debugDir, namespace, keptKeys) {
|
|
5237
5399
|
const files = await listDebugEntryFiles(namespaceDirPath(debugDir, namespace));
|
|
5238
5400
|
for (const filePath of files) {
|
|
5239
|
-
const
|
|
5240
|
-
if (
|
|
5401
|
+
const key = keyFromEntryFilePath(filePath, debugEntryExtension);
|
|
5402
|
+
if (key !== null && !keptKeys.has(key)) await rm(filePath, { force: true });
|
|
5241
5403
|
}
|
|
5242
5404
|
await removeDirIfEmpty(namespaceDirPath(debugDir, namespace));
|
|
5243
5405
|
}
|
|
5244
|
-
async function
|
|
5245
|
-
const
|
|
5246
|
-
|
|
5247
|
-
|
|
5248
|
-
|
|
5249
|
-
|
|
5250
|
-
|
|
5251
|
-
|
|
5406
|
+
async function repairIndexedCache(params) {
|
|
5407
|
+
const summary = {
|
|
5408
|
+
removedCacheFiles: 0,
|
|
5409
|
+
removedDebugFiles: 0,
|
|
5410
|
+
removedBlobFiles: 0,
|
|
5411
|
+
removedIndexRows: 0,
|
|
5412
|
+
rewrittenIndexes: 0
|
|
5413
|
+
};
|
|
5414
|
+
for (const index_ of await listCacheIndexes(params.cacheDir)) {
|
|
5415
|
+
const result = await withCacheFileLock(namespaceLockPath(params.cacheDir, index_.namespace), async () => {
|
|
5416
|
+
const index = await readNamespaceIndex(params.cacheDir, index_.namespace);
|
|
5417
|
+
let removedRows = 0;
|
|
5418
|
+
for (const key of Object.keys(index.entries)) if (!existsSync(cacheEntryPath(params.cacheDir, index.namespace, key))) {
|
|
5419
|
+
delete index.entries[key];
|
|
5420
|
+
removedRows++;
|
|
5421
|
+
}
|
|
5422
|
+
if (removedRows === 0) return {
|
|
5423
|
+
removedRows,
|
|
5424
|
+
rewritten: false
|
|
5425
|
+
};
|
|
5426
|
+
await writeNamespaceIndex(params.cacheDir, index);
|
|
5427
|
+
return {
|
|
5428
|
+
removedRows,
|
|
5429
|
+
rewritten: true
|
|
5430
|
+
};
|
|
5252
5431
|
});
|
|
5432
|
+
summary.removedIndexRows += result.removedRows;
|
|
5433
|
+
if (result.rewritten) summary.rewrittenIndexes++;
|
|
5434
|
+
}
|
|
5435
|
+
const indexes = await listCacheIndexes(params.cacheDir);
|
|
5436
|
+
const indexedCacheFiles = /* @__PURE__ */ new Set();
|
|
5437
|
+
const indexedDebugFiles = /* @__PURE__ */ new Set();
|
|
5438
|
+
const indexedBlobRefs = /* @__PURE__ */ new Set();
|
|
5439
|
+
for (const index_ of indexes) for (const [key, entry] of Object.entries(index_.entries)) {
|
|
5440
|
+
indexedCacheFiles.add(cacheEntryPath(params.cacheDir, index_.namespace, key));
|
|
5441
|
+
indexedDebugFiles.add(debugEntryPath(params.debugDir, index_.namespace, key));
|
|
5442
|
+
for (const blobRef of entry.blobRefs) indexedBlobRefs.add(blobRef);
|
|
5443
|
+
}
|
|
5444
|
+
for (const filePath of await listCacheEntryFiles(params.cacheDir, "allNamespaces")) if (!indexedCacheFiles.has(filePath)) {
|
|
5445
|
+
await rm(filePath, { force: true });
|
|
5446
|
+
summary.removedCacheFiles++;
|
|
5447
|
+
await removeDirIfEmpty(dirname(filePath));
|
|
5448
|
+
}
|
|
5449
|
+
for (const filePath of await listDebugEntryFiles(params.debugDir)) if (!indexedDebugFiles.has(filePath)) {
|
|
5450
|
+
await rm(filePath, { force: true });
|
|
5451
|
+
summary.removedDebugFiles++;
|
|
5452
|
+
await removeDirIfEmpty(dirname(filePath));
|
|
5453
|
+
}
|
|
5454
|
+
for (const blobDir of params.blobDirs) {
|
|
5455
|
+
if (!existsSync(blobDir)) continue;
|
|
5456
|
+
for (const blobRef of await listExternalJsonBlobPaths(blobDir)) if (!indexedBlobRefs.has(blobRef)) {
|
|
5457
|
+
await rm(resolveStorePath(blobDir, blobRef), { force: true });
|
|
5458
|
+
summary.removedBlobFiles++;
|
|
5459
|
+
}
|
|
5253
5460
|
}
|
|
5254
|
-
return
|
|
5255
|
-
}
|
|
5256
|
-
function entryMatchesPath(filePath, entry) {
|
|
5257
|
-
return basename(filePath) === `${entry.key}${cacheEntryExtension}` && basename(dirname(filePath)) === sanitizeSegment$1(entry.namespace);
|
|
5461
|
+
return summary;
|
|
5258
5462
|
}
|
|
5259
5463
|
function usesSupportedCacheSerialization(value) {
|
|
5260
5464
|
if (Array.isArray(value)) return value.every(usesSupportedCacheSerialization);
|
|
@@ -5262,14 +5466,14 @@ function usesSupportedCacheSerialization(value) {
|
|
|
5262
5466
|
if (Object.hasOwn(value, cacheSerializationMarker) && (typeof value[cacheSerializationMarker] !== "string" || !value[cacheSerializationMarker].startsWith(supportedCacheSerializationPrefix))) return false;
|
|
5263
5467
|
return Object.values(value).every(usesSupportedCacheSerialization);
|
|
5264
5468
|
}
|
|
5265
|
-
function createExternalJsonBlobStore(
|
|
5469
|
+
function createExternalJsonBlobStore(params) {
|
|
5266
5470
|
return {
|
|
5267
5471
|
async write(rawJson) {
|
|
5268
5472
|
const rawBytes = Buffer.from(rawJson, "utf8");
|
|
5269
5473
|
const hash = hashExternalJson(rawBytes);
|
|
5270
5474
|
const path = externalJsonBlobPath(hash);
|
|
5271
5475
|
const compressed = brotliCompressSync(rawBytes);
|
|
5272
|
-
const filePath = resolveStorePath(
|
|
5476
|
+
const filePath = resolveStorePath(params.primaryDir, path);
|
|
5273
5477
|
if (!existsSync(filePath)) await writeAtomicFile(filePath, compressed);
|
|
5274
5478
|
return {
|
|
5275
5479
|
compressedLength: compressed.byteLength,
|
|
@@ -5279,10 +5483,15 @@ function createExternalJsonBlobStore(blobDir) {
|
|
|
5279
5483
|
};
|
|
5280
5484
|
},
|
|
5281
5485
|
async read(ref) {
|
|
5282
|
-
const
|
|
5283
|
-
|
|
5284
|
-
|
|
5285
|
-
|
|
5486
|
+
for (const dir of [params.primaryDir, ...params.fallbackDirs]) {
|
|
5487
|
+
const compressedResult = await resultify(() => readFile(resolveStorePath(dir, ref.path)));
|
|
5488
|
+
if (compressedResult.error) continue;
|
|
5489
|
+
const rawBytesResult = resultify(() => brotliDecompressSync(compressedResult.value));
|
|
5490
|
+
if (rawBytesResult.error) continue;
|
|
5491
|
+
const rawBytes = rawBytesResult.value;
|
|
5492
|
+
if (rawBytes.byteLength === ref.length && hashExternalJson(rawBytes) === ref.hash) return rawBytes.toString("utf8");
|
|
5493
|
+
}
|
|
5494
|
+
throw new Error(`External cache blob failed integrity check: ${ref.hash}`);
|
|
5286
5495
|
}
|
|
5287
5496
|
};
|
|
5288
5497
|
}
|
|
@@ -5308,28 +5517,55 @@ async function materializeExternalJsonCacheEntryOrNull(entry, store) {
|
|
|
5308
5517
|
const result = await resultify(() => materializeExternalJsonCacheEntry(entry, store));
|
|
5309
5518
|
return result.error ? null : result.value;
|
|
5310
5519
|
}
|
|
5311
|
-
async function
|
|
5312
|
-
if (!existsSync(blobDir)) return;
|
|
5520
|
+
async function pruneUnreferencedExternalJsonBlobs(cacheDir, blobDirs) {
|
|
5313
5521
|
const referenced = await collectReferencedExternalJsonBlobPaths(cacheDir);
|
|
5314
|
-
for (const
|
|
5522
|
+
for (const blobDir of blobDirs) {
|
|
5523
|
+
if (!existsSync(blobDir)) continue;
|
|
5524
|
+
for (const path of await listExternalJsonBlobPaths(blobDir)) if (!referenced.has(path)) await rm(resolveStorePath(blobDir, path), { force: true });
|
|
5525
|
+
}
|
|
5315
5526
|
}
|
|
5316
5527
|
async function collectReferencedExternalJsonBlobPaths(cacheDir) {
|
|
5317
5528
|
const paths = /* @__PURE__ */ new Set();
|
|
5318
|
-
for (const
|
|
5319
|
-
const fileEntry = await readCacheEntryFilePath(filePath);
|
|
5320
|
-
if (fileEntry === null || !entryMatchesPath(filePath, fileEntry.entry)) continue;
|
|
5321
|
-
collectExternalJsonBlobPaths(fileEntry.entry, paths);
|
|
5322
|
-
}
|
|
5529
|
+
for (const index_ of await listCacheIndexes(cacheDir)) for (const entry of Object.values(index_.entries)) for (const blobRef of entry.blobRefs) paths.add(blobRef);
|
|
5323
5530
|
return paths;
|
|
5324
5531
|
}
|
|
5325
|
-
function
|
|
5532
|
+
async function collectExternalJsonBlobRefs(value, blobDirs) {
|
|
5533
|
+
const paths = /* @__PURE__ */ new Set();
|
|
5534
|
+
const pendingBlobPaths = [];
|
|
5535
|
+
collectExternalJsonBlobPaths(value, paths, pendingBlobPaths);
|
|
5536
|
+
while (pendingBlobPaths.length > 0) {
|
|
5537
|
+
const blobPath = pendingBlobPaths.pop();
|
|
5538
|
+
if (blobPath === void 0) continue;
|
|
5539
|
+
const rawJson = await readExternalJsonBlobByPath(blobDirs, blobPath);
|
|
5540
|
+
if (rawJson === null) continue;
|
|
5541
|
+
const json = safeJsonParse(rawJson);
|
|
5542
|
+
if (json === null) continue;
|
|
5543
|
+
collectExternalJsonBlobPaths(json, paths, pendingBlobPaths);
|
|
5544
|
+
}
|
|
5545
|
+
return [...paths].sort();
|
|
5546
|
+
}
|
|
5547
|
+
function collectExternalJsonBlobPaths(value, paths, pendingBlobPaths) {
|
|
5326
5548
|
if (Array.isArray(value)) {
|
|
5327
|
-
for (const item of value) collectExternalJsonBlobPaths(item, paths);
|
|
5549
|
+
for (const item of value) collectExternalJsonBlobPaths(item, paths, pendingBlobPaths);
|
|
5328
5550
|
return;
|
|
5329
5551
|
}
|
|
5330
5552
|
if (!isRecordLike(value)) return;
|
|
5331
|
-
if (value[cacheSerializationMarker] === externalJsonCacheSerializationMarker && typeof value.path === "string")
|
|
5332
|
-
|
|
5553
|
+
if (value[cacheSerializationMarker] === externalJsonCacheSerializationMarker && typeof value.path === "string") {
|
|
5554
|
+
if (!paths.has(value.path)) {
|
|
5555
|
+
paths.add(value.path);
|
|
5556
|
+
pendingBlobPaths.push(value.path);
|
|
5557
|
+
}
|
|
5558
|
+
}
|
|
5559
|
+
for (const entryValue of Object.values(value)) collectExternalJsonBlobPaths(entryValue, paths, pendingBlobPaths);
|
|
5560
|
+
}
|
|
5561
|
+
async function readExternalJsonBlobByPath(blobDirs, path) {
|
|
5562
|
+
for (const blobDir of blobDirs) {
|
|
5563
|
+
const compressedResult = await resultify(() => readFile(resolveStorePath(blobDir, path)));
|
|
5564
|
+
if (compressedResult.error) continue;
|
|
5565
|
+
const rawResult = resultify(() => brotliDecompressSync(compressedResult.value).toString("utf8"));
|
|
5566
|
+
if (!rawResult.error) return rawResult.value;
|
|
5567
|
+
}
|
|
5568
|
+
return null;
|
|
5333
5569
|
}
|
|
5334
5570
|
async function listExternalJsonBlobPaths(blobDir) {
|
|
5335
5571
|
const paths = [];
|
|
@@ -5348,12 +5584,33 @@ async function collectExternalJsonBlobFilePaths(root, dir, paths) {
|
|
|
5348
5584
|
if (entry.isFile() && entry.name.endsWith(externalJsonBlobExtension)) paths.push(relative(root, path));
|
|
5349
5585
|
}
|
|
5350
5586
|
}
|
|
5351
|
-
async function listCacheEntryFiles(rootDir) {
|
|
5352
|
-
return
|
|
5587
|
+
async function listCacheEntryFiles(rootDir, scope) {
|
|
5588
|
+
if (scope === "namespace") return listDirectFilesWithExtension(rootDir, cacheEntryExtension);
|
|
5589
|
+
if (!existsSync(rootDir)) return [];
|
|
5590
|
+
const entriesResult = await resultify(() => readdir(rootDir, { withFileTypes: true }));
|
|
5591
|
+
if (entriesResult.error) return [];
|
|
5592
|
+
const files = [];
|
|
5593
|
+
for (const entry of entriesResult.value) {
|
|
5594
|
+
if (!entry.isDirectory()) continue;
|
|
5595
|
+
files.push(...await listDirectFilesWithExtension(join(rootDir, entry.name), cacheEntryExtension));
|
|
5596
|
+
}
|
|
5597
|
+
return files;
|
|
5353
5598
|
}
|
|
5354
5599
|
async function listDebugEntryFiles(rootDir) {
|
|
5355
5600
|
return listFilesWithExtension(rootDir, debugEntryExtension);
|
|
5356
5601
|
}
|
|
5602
|
+
async function listCacheIndexFiles(rootDir) {
|
|
5603
|
+
if (!existsSync(rootDir)) return [];
|
|
5604
|
+
const entriesResult = await resultify(() => readdir(rootDir, { withFileTypes: true }));
|
|
5605
|
+
if (entriesResult.error) return [];
|
|
5606
|
+
return entriesResult.value.filter((entry) => entry.isFile() && entry.name.startsWith(cacheIndexFilePrefix) && entry.name.endsWith(debugEntryExtension)).map((entry) => join(rootDir, entry.name));
|
|
5607
|
+
}
|
|
5608
|
+
async function listDirectFilesWithExtension(rootDir, extension) {
|
|
5609
|
+
if (!existsSync(rootDir)) return [];
|
|
5610
|
+
const entriesResult = await resultify(() => readdir(rootDir, { withFileTypes: true }));
|
|
5611
|
+
if (entriesResult.error) return [];
|
|
5612
|
+
return entriesResult.value.filter((entry) => entry.isFile() && entry.name.endsWith(extension)).map((entry) => join(rootDir, entry.name));
|
|
5613
|
+
}
|
|
5357
5614
|
async function listFilesWithExtension(rootDir, extension) {
|
|
5358
5615
|
if (!existsSync(rootDir)) return [];
|
|
5359
5616
|
const entriesResult = await resultify(() => readdir(rootDir, { withFileTypes: true }));
|
|
@@ -5387,6 +5644,7 @@ async function withCacheFileLock(filePath, fn) {
|
|
|
5387
5644
|
force: true
|
|
5388
5645
|
});
|
|
5389
5646
|
if (result.error) throw result.error;
|
|
5647
|
+
return result.value;
|
|
5390
5648
|
}
|
|
5391
5649
|
async function acquireLock(lockPath) {
|
|
5392
5650
|
const startedAt = Date.now();
|
|
@@ -5412,80 +5670,6 @@ function isRecordLike(value) {
|
|
|
5412
5670
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
5413
5671
|
}
|
|
5414
5672
|
//#endregion
|
|
5415
|
-
//#region ../runner/src/chartValidation.ts
|
|
5416
|
-
function isValidColumnMetric(metric, columnsByKey, evalId, warnings) {
|
|
5417
|
-
const columnDef = columnsByKey.get(metric.key);
|
|
5418
|
-
if (!columnDef) {
|
|
5419
|
-
warnings.push(`[${evalId}] chart metric references unknown column "${metric.key}" — dropped`);
|
|
5420
|
-
return false;
|
|
5421
|
-
}
|
|
5422
|
-
if (metric.aggregate === "passThresholdRate") {
|
|
5423
|
-
if (columnDef.isScore !== true || typeof columnDef.passThreshold !== "number") {
|
|
5424
|
-
warnings.push(`[${evalId}] chart metric "${metric.key}" uses "passThresholdRate" but the column is not a score with passThreshold — dropped`);
|
|
5425
|
-
return false;
|
|
5426
|
-
}
|
|
5427
|
-
}
|
|
5428
|
-
return true;
|
|
5429
|
-
}
|
|
5430
|
-
function isValidTooltipExtra(extra, columnsByKey, evalId, warnings) {
|
|
5431
|
-
const columnDef = columnsByKey.get(extra.key);
|
|
5432
|
-
if (!columnDef) {
|
|
5433
|
-
warnings.push(`[${evalId}] chart tooltip extra references unknown column "${extra.key}" — dropped`);
|
|
5434
|
-
return false;
|
|
5435
|
-
}
|
|
5436
|
-
if (extra.aggregate === "passThresholdRate") {
|
|
5437
|
-
if (columnDef.isScore !== true || typeof columnDef.passThreshold !== "number") {
|
|
5438
|
-
warnings.push(`[${evalId}] chart tooltip extra "${extra.key}" uses "passThresholdRate" but the column is not a score with passThreshold — dropped`);
|
|
5439
|
-
return false;
|
|
5440
|
-
}
|
|
5441
|
-
}
|
|
5442
|
-
return true;
|
|
5443
|
-
}
|
|
5444
|
-
function sanitizeChart(chart, columnsByKey, evalId, warnings) {
|
|
5445
|
-
const metrics = chart.metrics.filter((metric) => {
|
|
5446
|
-
if (metric.source === "builtin") return true;
|
|
5447
|
-
return isValidColumnMetric(metric, columnsByKey, evalId, warnings);
|
|
5448
|
-
});
|
|
5449
|
-
if (metrics.length === 0) {
|
|
5450
|
-
warnings.push(`[${evalId}] chart had no valid metrics after validation — chart dropped`);
|
|
5451
|
-
return null;
|
|
5452
|
-
}
|
|
5453
|
-
const tooltipExtras = chart.tooltipExtras?.filter((extra) => {
|
|
5454
|
-
if (extra.source === "builtin") return true;
|
|
5455
|
-
return isValidTooltipExtra(extra, columnsByKey, evalId, warnings);
|
|
5456
|
-
});
|
|
5457
|
-
return {
|
|
5458
|
-
...chart,
|
|
5459
|
-
metrics,
|
|
5460
|
-
tooltipExtras: tooltipExtras?.length ? tooltipExtras : void 0
|
|
5461
|
-
};
|
|
5462
|
-
}
|
|
5463
|
-
/**
|
|
5464
|
-
* Validate and sanitize an authored `charts` config against the eval's
|
|
5465
|
-
* declared columns. Drops metrics/extras that reference unknown columns or
|
|
5466
|
-
* misuse `passThresholdRate`, and drops entire charts whose metrics are all
|
|
5467
|
-
* invalid. Returns `charts: undefined` when nothing valid remains so the UI
|
|
5468
|
-
* falls back to rendering no chart (matching the opt-in default).
|
|
5469
|
-
*/
|
|
5470
|
-
function validateCharts(params) {
|
|
5471
|
-
const { charts, columnDefs, evalId } = params;
|
|
5472
|
-
if (!charts || charts.length === 0) return {
|
|
5473
|
-
charts: void 0,
|
|
5474
|
-
warnings: []
|
|
5475
|
-
};
|
|
5476
|
-
const columnsByKey = new Map(columnDefs.map((def) => [def.key, def]));
|
|
5477
|
-
const warnings = [];
|
|
5478
|
-
const sanitized = [];
|
|
5479
|
-
for (const chart of charts) {
|
|
5480
|
-
const result = sanitizeChart(chart, columnsByKey, evalId, warnings);
|
|
5481
|
-
if (result) sanitized.push(result);
|
|
5482
|
-
}
|
|
5483
|
-
return {
|
|
5484
|
-
charts: sanitized.length > 0 ? sanitized : void 0,
|
|
5485
|
-
warnings
|
|
5486
|
-
};
|
|
5487
|
-
}
|
|
5488
|
-
//#endregion
|
|
5489
5673
|
//#region ../runner/src/columnBuilder.ts
|
|
5490
5674
|
/**
|
|
5491
5675
|
* Normalize a user-provided score definition (either a function or an
|
|
@@ -5991,112 +6175,6 @@ function addDefaultOutputs(params) {
|
|
|
5991
6175
|
});
|
|
5992
6176
|
}
|
|
5993
6177
|
//#endregion
|
|
5994
|
-
//#region ../runner/src/discovery.ts
|
|
5995
|
-
const evalIdMatchRegex = /\bid\s*:\s*['"]([^'"]+)['"]/;
|
|
5996
|
-
const evalTitleMatchRegex = /\btitle\s*:\s*['"]([^'"]+)['"]/;
|
|
5997
|
-
/** Parse static eval metadata and discovery issues from one eval file. */
|
|
5998
|
-
function parseEvalDiscovery(filePath, content) {
|
|
5999
|
-
const metas = [];
|
|
6000
|
-
let searchIndex = 0;
|
|
6001
|
-
while (searchIndex < content.length) {
|
|
6002
|
-
const defineEvalIndex = content.indexOf("defineEval", searchIndex);
|
|
6003
|
-
if (defineEvalIndex === -1) break;
|
|
6004
|
-
const extracted = extractDefineEvalObject(content, defineEvalIndex);
|
|
6005
|
-
if (!extracted) {
|
|
6006
|
-
searchIndex = defineEvalIndex + 10;
|
|
6007
|
-
continue;
|
|
6008
|
-
}
|
|
6009
|
-
const id = evalIdMatchRegex.exec(extracted.objectText)?.[1];
|
|
6010
|
-
if (id !== void 0) {
|
|
6011
|
-
const result = {
|
|
6012
|
-
filePath,
|
|
6013
|
-
id
|
|
6014
|
-
};
|
|
6015
|
-
const title = evalTitleMatchRegex.exec(extracted.objectText)?.[1];
|
|
6016
|
-
if (title !== void 0) result.title = title;
|
|
6017
|
-
metas.push(result);
|
|
6018
|
-
}
|
|
6019
|
-
searchIndex = extracted.nextIndex;
|
|
6020
|
-
}
|
|
6021
|
-
const countsById = /* @__PURE__ */ new Map();
|
|
6022
|
-
for (const meta of metas) countsById.set(meta.id, (countsById.get(meta.id) ?? 0) + 1);
|
|
6023
|
-
const duplicateIds = new Set([...countsById].filter(([, count]) => count > 1).map(([id]) => id));
|
|
6024
|
-
const issues = [...duplicateIds].map((evalId) => ({
|
|
6025
|
-
type: "duplicate-eval-id",
|
|
6026
|
-
severity: "error",
|
|
6027
|
-
filePath,
|
|
6028
|
-
evalId,
|
|
6029
|
-
message: `Duplicate eval id "${evalId}" in ${filePath}. Eval ids must be unique within one file.`
|
|
6030
|
-
}));
|
|
6031
|
-
return {
|
|
6032
|
-
metas: metas.filter((meta) => !duplicateIds.has(meta.id)),
|
|
6033
|
-
issues
|
|
6034
|
-
};
|
|
6035
|
-
}
|
|
6036
|
-
function extractDefineEvalObject(content, defineEvalIndex) {
|
|
6037
|
-
const openParenIndex = content.indexOf("(", defineEvalIndex);
|
|
6038
|
-
if (openParenIndex === -1) return void 0;
|
|
6039
|
-
const objectStartIndex = content.indexOf("{", openParenIndex);
|
|
6040
|
-
if (objectStartIndex === -1) return void 0;
|
|
6041
|
-
let depth = 0;
|
|
6042
|
-
let quote;
|
|
6043
|
-
let inBlockComment = false;
|
|
6044
|
-
let inLineComment = false;
|
|
6045
|
-
let isEscaped = false;
|
|
6046
|
-
for (let index = objectStartIndex; index < content.length; index++) {
|
|
6047
|
-
const currentChar = content[index];
|
|
6048
|
-
const nextChar = content[index + 1];
|
|
6049
|
-
if (inLineComment) {
|
|
6050
|
-
if (currentChar === "\n") inLineComment = false;
|
|
6051
|
-
continue;
|
|
6052
|
-
}
|
|
6053
|
-
if (inBlockComment) {
|
|
6054
|
-
if (currentChar === "*" && nextChar === "/") {
|
|
6055
|
-
inBlockComment = false;
|
|
6056
|
-
index++;
|
|
6057
|
-
}
|
|
6058
|
-
continue;
|
|
6059
|
-
}
|
|
6060
|
-
if (quote) {
|
|
6061
|
-
if (isEscaped) {
|
|
6062
|
-
isEscaped = false;
|
|
6063
|
-
continue;
|
|
6064
|
-
}
|
|
6065
|
-
if (currentChar === "\\") {
|
|
6066
|
-
isEscaped = true;
|
|
6067
|
-
continue;
|
|
6068
|
-
}
|
|
6069
|
-
if (currentChar === quote) quote = void 0;
|
|
6070
|
-
continue;
|
|
6071
|
-
}
|
|
6072
|
-
if (currentChar === "/" && nextChar === "/") {
|
|
6073
|
-
inLineComment = true;
|
|
6074
|
-
index++;
|
|
6075
|
-
continue;
|
|
6076
|
-
}
|
|
6077
|
-
if (currentChar === "/" && nextChar === "*") {
|
|
6078
|
-
inBlockComment = true;
|
|
6079
|
-
index++;
|
|
6080
|
-
continue;
|
|
6081
|
-
}
|
|
6082
|
-
if (currentChar === "\"" || currentChar === "'" || currentChar === "`") {
|
|
6083
|
-
quote = currentChar;
|
|
6084
|
-
continue;
|
|
6085
|
-
}
|
|
6086
|
-
if (currentChar === "{") {
|
|
6087
|
-
depth++;
|
|
6088
|
-
continue;
|
|
6089
|
-
}
|
|
6090
|
-
if (currentChar === "}") {
|
|
6091
|
-
depth--;
|
|
6092
|
-
if (depth === 0) return {
|
|
6093
|
-
nextIndex: index + 1,
|
|
6094
|
-
objectText: content.slice(objectStartIndex, index + 1)
|
|
6095
|
-
};
|
|
6096
|
-
}
|
|
6097
|
-
}
|
|
6098
|
-
}
|
|
6099
|
-
//#endregion
|
|
6100
6178
|
//#region ../runner/src/evalModuleLoader.ts
|
|
6101
6179
|
/**
|
|
6102
6180
|
* Import one eval module with a cache key derived from its current source so
|
|
@@ -6121,6 +6199,7 @@ const agentPackageUrlBySpecifier = new Map([
|
|
|
6121
6199
|
"@agent-evals/sdk",
|
|
6122
6200
|
"@agent-evals/shared",
|
|
6123
6201
|
"@agent-evals/runner",
|
|
6202
|
+
"@agent-evals/runner/case-child",
|
|
6124
6203
|
"@agent-evals/runner/run-child"
|
|
6125
6204
|
].flatMap((specifier) => {
|
|
6126
6205
|
try {
|
|
@@ -6146,7 +6225,8 @@ function isIsolatableFilePath(filePath, workspaceRoot) {
|
|
|
6146
6225
|
if (isAgentEvalsPackageFilePath(filePath)) return false;
|
|
6147
6226
|
const relativePath = relative(workspaceRoot, filePath);
|
|
6148
6227
|
if (relativePath === "" || relativePath.startsWith("..") || isAbsolute(relativePath)) return false;
|
|
6149
|
-
|
|
6228
|
+
const segments = relativePath.split(pathSegmentSeparatorPattern);
|
|
6229
|
+
return !segments.includes(".agent-evals") && !segments.includes("node_modules");
|
|
6150
6230
|
}
|
|
6151
6231
|
function isAgentEvalsPackageFilePath(filePath) {
|
|
6152
6232
|
return agentPackageDirectoryPaths.some((packageDirectoryPath) => {
|
|
@@ -6184,6 +6264,9 @@ function registerModuleIsolationHooks() {
|
|
|
6184
6264
|
};
|
|
6185
6265
|
} });
|
|
6186
6266
|
}
|
|
6267
|
+
function registerAgentEvalsPackageResolutionHooks() {
|
|
6268
|
+
registerModuleIsolationHooks();
|
|
6269
|
+
}
|
|
6187
6270
|
function clearWorkspaceRequireCacheOnce(context) {
|
|
6188
6271
|
if (clearedRequireCacheKeys.has(context.key)) return;
|
|
6189
6272
|
clearedRequireCacheKeys.add(context.key);
|
|
@@ -6205,432 +6288,6 @@ async function runWithModuleIsolation(context, fn) {
|
|
|
6205
6288
|
return await isolationStorage.run(context, fn);
|
|
6206
6289
|
}
|
|
6207
6290
|
//#endregion
|
|
6208
|
-
//#region ../runner/src/evalRegistryLoader.ts
|
|
6209
|
-
async function loadIsolatedEvalRegistry(params) {
|
|
6210
|
-
return await runWithEvalRegistry(async (registry) => {
|
|
6211
|
-
await runWithModuleIsolation(params.moduleIsolation, async () => {
|
|
6212
|
-
await runInEvalRuntimeScope(params.runtimeScope, async () => {
|
|
6213
|
-
await loadEvalModule(params.evalFilePath, params.sourceFingerprint);
|
|
6214
|
-
});
|
|
6215
|
-
});
|
|
6216
|
-
return registry;
|
|
6217
|
-
});
|
|
6218
|
-
}
|
|
6219
|
-
async function useIsolatedEvalDefinition(params) {
|
|
6220
|
-
const entry = (await loadIsolatedEvalRegistry(params)).get(params.evalId);
|
|
6221
|
-
if (entry === void 0) throw new Error(`Eval "${params.evalId}" was not registered after importing ${params.evalFilePath}`);
|
|
6222
|
-
return await entry.use(async (evalDef) => {
|
|
6223
|
-
return await params.use(evalDef);
|
|
6224
|
-
});
|
|
6225
|
-
}
|
|
6226
|
-
//#endregion
|
|
6227
|
-
//#region ../runner/src/freshness.ts
|
|
6228
|
-
/**
|
|
6229
|
-
* Derive eval freshness from the latest run, current eval-file fingerprint,
|
|
6230
|
-
* current git commit, and an age threshold.
|
|
6231
|
-
*/
|
|
6232
|
-
function deriveEvalFreshness(params) {
|
|
6233
|
-
const { latestRun, gitState, currentEvalSourceFingerprint, staleAfterDays, now = /* @__PURE__ */ new Date() } = params;
|
|
6234
|
-
const stale = latestRun?.evalSourceFingerprint !== void 0 && latestRun.evalSourceFingerprint !== null && currentEvalSourceFingerprint !== null && currentEvalSourceFingerprint !== latestRun.evalSourceFingerprint;
|
|
6235
|
-
const latestRunCommitSha = latestRun?.commitSha;
|
|
6236
|
-
if (latestRunCommitSha === void 0 || latestRunCommitSha === null) return {
|
|
6237
|
-
freshnessStatus: stale ? "stale" : "fresh",
|
|
6238
|
-
stale,
|
|
6239
|
-
outdated: false
|
|
6240
|
-
};
|
|
6241
|
-
if (gitState.commitSha === null) return {
|
|
6242
|
-
freshnessStatus: stale ? "stale" : "fresh",
|
|
6243
|
-
stale,
|
|
6244
|
-
outdated: false
|
|
6245
|
-
};
|
|
6246
|
-
if (latestRunCommitSha === gitState.commitSha) return {
|
|
6247
|
-
freshnessStatus: stale ? "stale" : "fresh",
|
|
6248
|
-
stale,
|
|
6249
|
-
outdated: false
|
|
6250
|
-
};
|
|
6251
|
-
const latestRunStartedAt = new Date(latestRun?.startedAt ?? "").getTime();
|
|
6252
|
-
if (!Number.isFinite(latestRunStartedAt)) return {
|
|
6253
|
-
freshnessStatus: stale ? "stale" : "fresh",
|
|
6254
|
-
stale,
|
|
6255
|
-
outdated: false
|
|
6256
|
-
};
|
|
6257
|
-
const outdated = now.getTime() - latestRunStartedAt >= staleAfterDays * 24 * 60 * 60 * 1e3;
|
|
6258
|
-
return {
|
|
6259
|
-
freshnessStatus: stale ? "stale" : outdated ? "outdated" : "fresh",
|
|
6260
|
-
stale,
|
|
6261
|
-
outdated
|
|
6262
|
-
};
|
|
6263
|
-
}
|
|
6264
|
-
/** Return the timestamp used when ordering and displaying a run recency. */
|
|
6265
|
-
function getRunFreshnessTimestamp(manifest) {
|
|
6266
|
-
return manifest.endedAt ?? manifest.startedAt;
|
|
6267
|
-
}
|
|
6268
|
-
//#endregion
|
|
6269
|
-
//#region ../runner/src/manualInput/walker.ts
|
|
6270
|
-
function isObject(value) {
|
|
6271
|
-
return typeof value === "object" && value !== null;
|
|
6272
|
-
}
|
|
6273
|
-
function getZodDef(schema) {
|
|
6274
|
-
if (!isObject(schema)) return null;
|
|
6275
|
-
const zodHolder = schema._zod;
|
|
6276
|
-
if (!isObject(zodHolder)) return null;
|
|
6277
|
-
const def = zodHolder.def;
|
|
6278
|
-
if (!isObject(def)) return null;
|
|
6279
|
-
if (typeof def.type !== "string") return null;
|
|
6280
|
-
return {
|
|
6281
|
-
...def,
|
|
6282
|
-
type: def.type
|
|
6283
|
-
};
|
|
6284
|
-
}
|
|
6285
|
-
function getDescription(schema) {
|
|
6286
|
-
if (!isObject(schema)) return void 0;
|
|
6287
|
-
const description = schema.description;
|
|
6288
|
-
return typeof description === "string" ? description : void 0;
|
|
6289
|
-
}
|
|
6290
|
-
function getInnerSchema(def) {
|
|
6291
|
-
return def.innerType;
|
|
6292
|
-
}
|
|
6293
|
-
function getChecks(def) {
|
|
6294
|
-
const checks = def.checks;
|
|
6295
|
-
if (!Array.isArray(checks)) return [];
|
|
6296
|
-
const out = [];
|
|
6297
|
-
for (const check of checks) {
|
|
6298
|
-
if (!isObject(check)) continue;
|
|
6299
|
-
const zodHolder = check._zod;
|
|
6300
|
-
if (!isObject(zodHolder)) continue;
|
|
6301
|
-
const checkDef = zodHolder.def;
|
|
6302
|
-
if (!isObject(checkDef)) continue;
|
|
6303
|
-
if (typeof checkDef.check !== "string") continue;
|
|
6304
|
-
out.push({
|
|
6305
|
-
...checkDef,
|
|
6306
|
-
check: checkDef.check
|
|
6307
|
-
});
|
|
6308
|
-
}
|
|
6309
|
-
return out;
|
|
6310
|
-
}
|
|
6311
|
-
function findCheck(checks, name) {
|
|
6312
|
-
return checks.find((check) => check.check === name);
|
|
6313
|
-
}
|
|
6314
|
-
function unwrap(schema) {
|
|
6315
|
-
let current = schema;
|
|
6316
|
-
let required = true;
|
|
6317
|
-
let defaultValue = void 0;
|
|
6318
|
-
for (let depth = 0; depth < 8; depth += 1) {
|
|
6319
|
-
const def = getZodDef(current);
|
|
6320
|
-
if (!def) return null;
|
|
6321
|
-
if (def.type === "optional" || def.type === "nullable") {
|
|
6322
|
-
required = false;
|
|
6323
|
-
current = getInnerSchema(def);
|
|
6324
|
-
continue;
|
|
6325
|
-
}
|
|
6326
|
-
if (def.type === "nullish") {
|
|
6327
|
-
required = false;
|
|
6328
|
-
current = getInnerSchema(def);
|
|
6329
|
-
continue;
|
|
6330
|
-
}
|
|
6331
|
-
if (def.type === "default" || def.type === "prefault") {
|
|
6332
|
-
const raw = def.defaultValue;
|
|
6333
|
-
if (typeof raw === "function") defaultValue = Reflect.apply(raw, void 0, []);
|
|
6334
|
-
else defaultValue = raw;
|
|
6335
|
-
current = getInnerSchema(def);
|
|
6336
|
-
continue;
|
|
6337
|
-
}
|
|
6338
|
-
if (def.type === "readonly" || def.type === "pipe") {
|
|
6339
|
-
current = getInnerSchema(def) ?? def.in;
|
|
6340
|
-
continue;
|
|
6341
|
-
}
|
|
6342
|
-
return {
|
|
6343
|
-
schema: current,
|
|
6344
|
-
def,
|
|
6345
|
-
required,
|
|
6346
|
-
defaultValue
|
|
6347
|
-
};
|
|
6348
|
-
}
|
|
6349
|
-
return null;
|
|
6350
|
-
}
|
|
6351
|
-
function humaniseKey(key) {
|
|
6352
|
-
const spaced = key.replace(/([a-z0-9])([A-Z])/g, "$1 $2").replace(/[_-]+/g, " ").trim();
|
|
6353
|
-
if (!spaced) return key;
|
|
6354
|
-
const lowered = spaced.toLowerCase();
|
|
6355
|
-
return lowered.charAt(0).toUpperCase() + lowered.slice(1);
|
|
6356
|
-
}
|
|
6357
|
-
function normaliseSelectOptions(raw) {
|
|
6358
|
-
if (!raw) return void 0;
|
|
6359
|
-
return raw.map((entry) => {
|
|
6360
|
-
if (typeof entry === "string") return {
|
|
6361
|
-
value: entry,
|
|
6362
|
-
label: entry
|
|
6363
|
-
};
|
|
6364
|
-
return {
|
|
6365
|
-
value: entry.value,
|
|
6366
|
-
label: entry.label ?? entry.value
|
|
6367
|
-
};
|
|
6368
|
-
});
|
|
6369
|
-
}
|
|
6370
|
-
function enumOptionsFromEntries(def) {
|
|
6371
|
-
const entries = def.entries;
|
|
6372
|
-
if (!isObject(entries)) return null;
|
|
6373
|
-
const out = [];
|
|
6374
|
-
for (const [label, value] of Object.entries(entries)) if (typeof value === "string") out.push({
|
|
6375
|
-
value,
|
|
6376
|
-
label
|
|
6377
|
-
});
|
|
6378
|
-
else if (typeof value === "number") out.push({
|
|
6379
|
-
value: String(value),
|
|
6380
|
-
label
|
|
6381
|
-
});
|
|
6382
|
-
else return null;
|
|
6383
|
-
return out;
|
|
6384
|
-
}
|
|
6385
|
-
function literalUnionOptions(def) {
|
|
6386
|
-
const options = def.options;
|
|
6387
|
-
if (!Array.isArray(options)) return null;
|
|
6388
|
-
const out = [];
|
|
6389
|
-
for (const option of options) {
|
|
6390
|
-
const optDef = getZodDef(option);
|
|
6391
|
-
if (optDef?.type !== "literal") return null;
|
|
6392
|
-
const values = optDef.values;
|
|
6393
|
-
if (!Array.isArray(values) || values.length !== 1) return null;
|
|
6394
|
-
const value = values[0];
|
|
6395
|
-
if (typeof value === "string") out.push({
|
|
6396
|
-
value,
|
|
6397
|
-
label: value
|
|
6398
|
-
});
|
|
6399
|
-
else if (typeof value === "number") {
|
|
6400
|
-
const stringValue = String(value);
|
|
6401
|
-
out.push({
|
|
6402
|
-
value: stringValue,
|
|
6403
|
-
label: stringValue
|
|
6404
|
-
});
|
|
6405
|
-
} else return null;
|
|
6406
|
-
}
|
|
6407
|
-
return out.length > 0 ? out : null;
|
|
6408
|
-
}
|
|
6409
|
-
function literalSelectOptions(def) {
|
|
6410
|
-
const values = def.values;
|
|
6411
|
-
if (!Array.isArray(values)) return null;
|
|
6412
|
-
const out = [];
|
|
6413
|
-
for (const value of values) if (typeof value === "string") out.push({
|
|
6414
|
-
value,
|
|
6415
|
-
label: value
|
|
6416
|
-
});
|
|
6417
|
-
else if (typeof value === "number") {
|
|
6418
|
-
const stringValue = String(value);
|
|
6419
|
-
out.push({
|
|
6420
|
-
value: stringValue,
|
|
6421
|
-
label: stringValue
|
|
6422
|
-
});
|
|
6423
|
-
} else return null;
|
|
6424
|
-
return out;
|
|
6425
|
-
}
|
|
6426
|
-
function readStringChecks(def) {
|
|
6427
|
-
const checks = getChecks(def);
|
|
6428
|
-
const out = {};
|
|
6429
|
-
const min = findCheck(checks, "min_length");
|
|
6430
|
-
if (min && typeof min.minimum === "number") out.minLength = min.minimum;
|
|
6431
|
-
const max = findCheck(checks, "max_length");
|
|
6432
|
-
if (max && typeof max.maximum === "number") out.maxLength = max.maximum;
|
|
6433
|
-
return out;
|
|
6434
|
-
}
|
|
6435
|
-
const integerNumberFormats = new Set([
|
|
6436
|
-
"int",
|
|
6437
|
-
"safeint",
|
|
6438
|
-
"int32",
|
|
6439
|
-
"uint32",
|
|
6440
|
-
"int64",
|
|
6441
|
-
"uint64"
|
|
6442
|
-
]);
|
|
6443
|
-
function readNumberChecks(def) {
|
|
6444
|
-
const checks = getChecks(def);
|
|
6445
|
-
const out = {};
|
|
6446
|
-
const gt = findCheck(checks, "greater_than");
|
|
6447
|
-
if (gt && typeof gt.value === "number" && gt.inclusive === true) out.min = gt.value;
|
|
6448
|
-
const lt = findCheck(checks, "less_than");
|
|
6449
|
-
if (lt && typeof lt.value === "number" && lt.inclusive === true) out.max = lt.value;
|
|
6450
|
-
const format = findCheck(checks, "number_format");
|
|
6451
|
-
if (format && typeof format.format === "string" && integerNumberFormats.has(format.format)) out.integer = true;
|
|
6452
|
-
return out;
|
|
6453
|
-
}
|
|
6454
|
-
function buildField(key, fieldSchema, override) {
|
|
6455
|
-
const unwrapped = unwrap(fieldSchema);
|
|
6456
|
-
if (!unwrapped) return Result.err(/* @__PURE__ */ new Error(`manualInput: field "${key}" uses an unsupported Zod schema (could not introspect)`));
|
|
6457
|
-
const inner = unwrapped.def;
|
|
6458
|
-
const description = override?.description ?? getDescription(unwrapped.schema);
|
|
6459
|
-
const base = {
|
|
6460
|
-
key,
|
|
6461
|
-
label: override?.label ?? humaniseKey(key),
|
|
6462
|
-
description,
|
|
6463
|
-
placeholder: override?.placeholder,
|
|
6464
|
-
required: unwrapped.required,
|
|
6465
|
-
defaultValue: override?.defaultValue !== void 0 ? override.defaultValue : unwrapped.defaultValue
|
|
6466
|
-
};
|
|
6467
|
-
if (override?.asJson === true) {
|
|
6468
|
-
const rows = override.rows;
|
|
6469
|
-
return Result.ok({
|
|
6470
|
-
...base,
|
|
6471
|
-
kind: "json",
|
|
6472
|
-
rows
|
|
6473
|
-
});
|
|
6474
|
-
}
|
|
6475
|
-
if (override?.asFile === true) return Result.ok({
|
|
6476
|
-
...base,
|
|
6477
|
-
kind: "file",
|
|
6478
|
-
accept: override.accept,
|
|
6479
|
-
maxSizeBytes: override.maxSizeBytes
|
|
6480
|
-
});
|
|
6481
|
-
const overrideOptions = normaliseSelectOptions(override?.options);
|
|
6482
|
-
if (overrideOptions) return Result.ok({
|
|
6483
|
-
...base,
|
|
6484
|
-
kind: "select",
|
|
6485
|
-
options: overrideOptions
|
|
6486
|
-
});
|
|
6487
|
-
switch (inner.type) {
|
|
6488
|
-
case "string": {
|
|
6489
|
-
const checks = readStringChecks(inner);
|
|
6490
|
-
if (override?.multiline === true) return Result.ok({
|
|
6491
|
-
...base,
|
|
6492
|
-
kind: "multiline",
|
|
6493
|
-
rows: override.rows,
|
|
6494
|
-
minLength: checks.minLength,
|
|
6495
|
-
maxLength: checks.maxLength
|
|
6496
|
-
});
|
|
6497
|
-
return Result.ok({
|
|
6498
|
-
...base,
|
|
6499
|
-
kind: "text",
|
|
6500
|
-
minLength: checks.minLength,
|
|
6501
|
-
maxLength: checks.maxLength
|
|
6502
|
-
});
|
|
6503
|
-
}
|
|
6504
|
-
case "number":
|
|
6505
|
-
case "int":
|
|
6506
|
-
case "bigint": {
|
|
6507
|
-
const checks = readNumberChecks(inner);
|
|
6508
|
-
return Result.ok({
|
|
6509
|
-
...base,
|
|
6510
|
-
kind: "number",
|
|
6511
|
-
min: checks.min,
|
|
6512
|
-
max: checks.max,
|
|
6513
|
-
integer: checks.integer
|
|
6514
|
-
});
|
|
6515
|
-
}
|
|
6516
|
-
case "boolean": return Result.ok({
|
|
6517
|
-
...base,
|
|
6518
|
-
kind: "boolean"
|
|
6519
|
-
});
|
|
6520
|
-
case "enum": {
|
|
6521
|
-
const options = enumOptionsFromEntries(inner);
|
|
6522
|
-
if (options) return Result.ok({
|
|
6523
|
-
...base,
|
|
6524
|
-
kind: "select",
|
|
6525
|
-
options
|
|
6526
|
-
});
|
|
6527
|
-
return Result.ok({
|
|
6528
|
-
...base,
|
|
6529
|
-
kind: "json",
|
|
6530
|
-
rows: override?.rows
|
|
6531
|
-
});
|
|
6532
|
-
}
|
|
6533
|
-
case "literal": {
|
|
6534
|
-
const options = literalSelectOptions(inner);
|
|
6535
|
-
if (options && options.length > 0) return Result.ok({
|
|
6536
|
-
...base,
|
|
6537
|
-
kind: "select",
|
|
6538
|
-
options
|
|
6539
|
-
});
|
|
6540
|
-
return Result.ok({
|
|
6541
|
-
...base,
|
|
6542
|
-
kind: "json",
|
|
6543
|
-
rows: override?.rows
|
|
6544
|
-
});
|
|
6545
|
-
}
|
|
6546
|
-
case "union": {
|
|
6547
|
-
const options = literalUnionOptions(inner);
|
|
6548
|
-
if (options) return Result.ok({
|
|
6549
|
-
...base,
|
|
6550
|
-
kind: "select",
|
|
6551
|
-
options
|
|
6552
|
-
});
|
|
6553
|
-
return Result.ok({
|
|
6554
|
-
...base,
|
|
6555
|
-
kind: "json",
|
|
6556
|
-
rows: override?.rows
|
|
6557
|
-
});
|
|
6558
|
-
}
|
|
6559
|
-
default: return Result.ok({
|
|
6560
|
-
...base,
|
|
6561
|
-
kind: "json",
|
|
6562
|
-
rows: override?.rows
|
|
6563
|
-
});
|
|
6564
|
-
}
|
|
6565
|
-
}
|
|
6566
|
-
function getObjectShape(schema) {
|
|
6567
|
-
const def = getZodDef(schema);
|
|
6568
|
-
if (!def) return null;
|
|
6569
|
-
if (def.type !== "object") return null;
|
|
6570
|
-
const shape = def.shape;
|
|
6571
|
-
if (!isObject(shape)) return null;
|
|
6572
|
-
return shape;
|
|
6573
|
-
}
|
|
6574
|
-
/**
|
|
6575
|
-
* Walk an eval's `manualInput` configuration and produce the wire-format
|
|
6576
|
-
* descriptor consumed by the web UI. The schema must resolve to a top-level
|
|
6577
|
-
* `z.object(...)`; nested objects, arrays, unions, and other unsupported
|
|
6578
|
-
* shapes inside fields fall back to the JSON textarea widget.
|
|
6579
|
-
*
|
|
6580
|
-
* Returns a `Result` so the caller (eval discovery) can surface a discovery
|
|
6581
|
-
* issue without throwing when the schema is incompatible.
|
|
6582
|
-
*/
|
|
6583
|
-
function buildManualInputDescriptor(config) {
|
|
6584
|
-
const shape = getObjectShape(config.schema);
|
|
6585
|
-
if (!shape) return Result.err(/* @__PURE__ */ new Error("manualInput.schema must be a top-level z.object(...). Wrap nested types in an object schema."));
|
|
6586
|
-
const overrides = {};
|
|
6587
|
-
const rawOverrides = config.fields;
|
|
6588
|
-
if (rawOverrides) {
|
|
6589
|
-
for (const [key, override] of Object.entries(rawOverrides)) if (override) overrides[key] = override;
|
|
6590
|
-
}
|
|
6591
|
-
const fields = [];
|
|
6592
|
-
for (const [key, fieldSchema] of Object.entries(shape)) {
|
|
6593
|
-
const fieldResult = buildField(key, fieldSchema, overrides[key]);
|
|
6594
|
-
if (fieldResult.error) return fieldResult.errorResult();
|
|
6595
|
-
fields.push(fieldResult.value);
|
|
6596
|
-
}
|
|
6597
|
-
return Result.ok({
|
|
6598
|
-
title: config.title,
|
|
6599
|
-
description: config.description,
|
|
6600
|
-
submitLabel: config.submitLabel,
|
|
6601
|
-
fields
|
|
6602
|
-
});
|
|
6603
|
-
}
|
|
6604
|
-
/**
|
|
6605
|
-
* Resolve an eval's `manualInput` Zod schema against a raw user submission.
|
|
6606
|
-
* Returns the parsed value typed against the eval's `TInput` generic, or a
|
|
6607
|
-
* structured `Error` carrying the Zod issues for the caller to surface.
|
|
6608
|
-
*/
|
|
6609
|
-
function parseManualInputValues(config, raw) {
|
|
6610
|
-
const parsed = config.schema.safeParse(raw);
|
|
6611
|
-
if (!parsed.success) return Result.err(new ManualInputValidationError(parsed.error.issues.map(formatIssue)));
|
|
6612
|
-
return Result.ok(parsed.data);
|
|
6613
|
-
}
|
|
6614
|
-
/**
|
|
6615
|
-
* Error thrown / returned when manual-input values fail validation against
|
|
6616
|
-
* the eval's `manualInput.schema`. Carries the structured Zod issues so the
|
|
6617
|
-
* CLI and HTTP layers can surface them per-field.
|
|
6618
|
-
*/
|
|
6619
|
-
var ManualInputValidationError = class extends Error {
|
|
6620
|
-
issues;
|
|
6621
|
-
constructor(issues) {
|
|
6622
|
-
super(issues.length === 0 ? "manualInput validation failed" : `manualInput validation failed: ${issues.map((issue) => issue.path ? `${issue.path}: ${issue.message}` : issue.message).join("; ")}`);
|
|
6623
|
-
this.name = "ManualInputValidationError";
|
|
6624
|
-
this.issues = issues;
|
|
6625
|
-
}
|
|
6626
|
-
};
|
|
6627
|
-
function formatIssue(issue) {
|
|
6628
|
-
return {
|
|
6629
|
-
path: issue.path.map((segment) => typeof segment === "string" || typeof segment === "number" ? String(segment) : "").filter((segment) => segment !== "").join("."),
|
|
6630
|
-
message: issue.message
|
|
6631
|
-
};
|
|
6632
|
-
}
|
|
6633
|
-
//#endregion
|
|
6634
6291
|
//#region ../runner/src/outputArtifacts.ts
|
|
6635
6292
|
const mimeTypeExtensionMap = {
|
|
6636
6293
|
"application/json": ".json",
|
|
@@ -6776,254 +6433,23 @@ function resolveTracePresentation(spans, globalTraceDisplay, evalTraceDisplay) {
|
|
|
6776
6433
|
};
|
|
6777
6434
|
}
|
|
6778
6435
|
//#endregion
|
|
6779
|
-
//#region ../runner/src/runMaintenance.ts
|
|
6780
|
-
async function persistRunState(runState) {
|
|
6781
|
-
await writeFile(join(runState.runDir, "summary.json"), JSON.stringify(runState.summary, null, 2));
|
|
6782
|
-
await writeFile(join(runState.runDir, "run.json"), JSON.stringify(runState.manifest, null, 2));
|
|
6783
|
-
const casesJsonl = runState.cases.map((c) => JSON.stringify(c)).join("\n");
|
|
6784
|
-
await writeFile(join(runState.runDir, "cases.jsonl"), casesJsonl);
|
|
6785
|
-
}
|
|
6786
|
-
/**
|
|
6787
|
-
* Recompute a persisted case's status after score definitions changed.
|
|
6788
|
-
*
|
|
6789
|
-
* Pass/fail gates are per-score: a case fails when any score with a declared
|
|
6790
|
-
* `passThreshold` reports a numeric value below that threshold. Scores
|
|
6791
|
-
* without a threshold are informational and never gate. Cancelled and
|
|
6792
|
-
* errored cases retain their terminal status.
|
|
6793
|
-
*/
|
|
6794
|
-
function recomputePersistedCaseStatus(caseRow, caseDetail, scoreThresholds) {
|
|
6795
|
-
if (caseRow.status === "cancelled") return "cancelled";
|
|
6796
|
-
if (caseDetail?.error !== null && caseDetail?.error !== void 0) return "error";
|
|
6797
|
-
if ((caseDetail?.assertionFailures.length ?? 0) > 0) return "fail";
|
|
6798
|
-
for (const [key, passThreshold] of scoreThresholds) {
|
|
6799
|
-
const rawValue = caseRow.columns[key] ?? caseDetail?.columns[key];
|
|
6800
|
-
if (typeof rawValue !== "number") continue;
|
|
6801
|
-
if (rawValue < passThreshold) return "fail";
|
|
6802
|
-
}
|
|
6803
|
-
return caseRow.status === "error" ? "error" : "pass";
|
|
6804
|
-
}
|
|
6805
|
-
function runTouchesEval(params) {
|
|
6806
|
-
if (params.caseRows.some((caseRow) => caseRow.evalKey === params.evalKey)) return true;
|
|
6807
|
-
if (params.target.mode === "all") return params.evalExists;
|
|
6808
|
-
if (params.target.mode === "evalIds") return params.target.evalKeys?.includes(params.evalKey) ?? false;
|
|
6809
|
-
return false;
|
|
6810
|
-
}
|
|
6811
|
-
async function deleteTemporaryRuns(params) {
|
|
6812
|
-
let deletedRuns = 0;
|
|
6813
|
-
for (const [runId, run] of [...params.runs]) {
|
|
6814
|
-
if (run.manifest.temporary !== true) continue;
|
|
6815
|
-
if (run.manifest.status === "running") {
|
|
6816
|
-
const endedAt = /* @__PURE__ */ new Date();
|
|
6817
|
-
run.manifest.status = "cancelled";
|
|
6818
|
-
run.manifest.endedAt = endedAt.toISOString();
|
|
6819
|
-
run.summary.status = "cancelled";
|
|
6820
|
-
run.summary.totalDurationMs = endedAt.getTime() - new Date(run.manifest.startedAt).getTime();
|
|
6821
|
-
params.cancelRunningRun(run);
|
|
6822
|
-
}
|
|
6823
|
-
params.runs.delete(runId);
|
|
6824
|
-
await rm(run.runDir, {
|
|
6825
|
-
recursive: true,
|
|
6826
|
-
force: true
|
|
6827
|
-
});
|
|
6828
|
-
deletedRuns += 1;
|
|
6829
|
-
}
|
|
6830
|
-
return deletedRuns;
|
|
6831
|
-
}
|
|
6832
|
-
async function recomputeEvalStatusesInRuns(params) {
|
|
6833
|
-
let updatedRuns = 0;
|
|
6834
|
-
for (const run of params.runs) {
|
|
6835
|
-
if (!runTouchesEval({
|
|
6836
|
-
target: run.manifest.target,
|
|
6837
|
-
caseRows: run.cases,
|
|
6838
|
-
evalKey: params.evalKey,
|
|
6839
|
-
evalExists: params.evalExists
|
|
6840
|
-
})) continue;
|
|
6841
|
-
if (run.manifest.status === "running") continue;
|
|
6842
|
-
let changed = false;
|
|
6843
|
-
for (const caseRow of run.cases) {
|
|
6844
|
-
if (caseRow.evalKey !== params.evalKey) continue;
|
|
6845
|
-
const caseDetail = run.caseDetails.get(getCaseRowCaseKey(caseRow));
|
|
6846
|
-
const nextStatus = recomputePersistedCaseStatus(caseRow, caseDetail, params.scoreThresholds);
|
|
6847
|
-
if (caseRow.status === nextStatus) continue;
|
|
6848
|
-
caseRow.status = nextStatus;
|
|
6849
|
-
if (caseDetail) {
|
|
6850
|
-
caseDetail.status = nextStatus;
|
|
6851
|
-
await params.persistCaseDetail(run.runDir, caseDetail);
|
|
6852
|
-
}
|
|
6853
|
-
changed = true;
|
|
6854
|
-
}
|
|
6855
|
-
if (!changed) continue;
|
|
6856
|
-
const derivedSummary = deriveScopedSummaryFromCases({ caseRows: run.cases });
|
|
6857
|
-
run.summary.totalCases = derivedSummary.totalCases;
|
|
6858
|
-
run.summary.passedCases = derivedSummary.passedCases;
|
|
6859
|
-
run.summary.failedCases = derivedSummary.failedCases;
|
|
6860
|
-
run.summary.errorCases = derivedSummary.errorCases;
|
|
6861
|
-
run.summary.cancelledCases = derivedSummary.cancelledCases;
|
|
6862
|
-
await persistRunState(run);
|
|
6863
|
-
updatedRuns += 1;
|
|
6864
|
-
}
|
|
6865
|
-
return updatedRuns;
|
|
6866
|
-
}
|
|
6867
|
-
//#endregion
|
|
6868
|
-
//#region ../runner/src/runPersistence.ts
|
|
6869
|
-
const SHORT_ID_PATTERN = /^r(\d+)$/;
|
|
6870
|
-
/**
|
|
6871
|
-
* Generate a filesystem-safe, sortable run id combining a UTC timestamp
|
|
6872
|
-
* with a short random suffix.
|
|
6873
|
-
*/
|
|
6874
|
-
function generateRunId() {
|
|
6875
|
-
const now = /* @__PURE__ */ new Date();
|
|
6876
|
-
const pad = (n) => String(n).padStart(2, "0");
|
|
6877
|
-
return `${`${String(now.getUTCFullYear())}-${pad(now.getUTCMonth() + 1)}-${pad(now.getUTCDate())}T${pad(now.getUTCHours())}-${pad(now.getUTCMinutes())}-${pad(now.getUTCSeconds())}Z`}_${Math.random().toString(36).slice(2, 8)}`;
|
|
6878
|
-
}
|
|
6879
|
-
function parseShortIdNum(shortId) {
|
|
6880
|
-
if (shortId === void 0) return null;
|
|
6881
|
-
const match = SHORT_ID_PATTERN.exec(shortId);
|
|
6882
|
-
if (!match) return null;
|
|
6883
|
-
const num = Number(match[1]);
|
|
6884
|
-
if (!Number.isFinite(num)) return null;
|
|
6885
|
-
return num;
|
|
6886
|
-
}
|
|
6887
|
-
/**
|
|
6888
|
-
* Return the next `shortId` number to assign based on the existing
|
|
6889
|
-
* loaded snapshots. Legacy runs that don't match the `r\d+` format are
|
|
6890
|
-
* ignored.
|
|
6891
|
-
*/
|
|
6892
|
-
function nextShortIdFromSnapshots(snapshots) {
|
|
6893
|
-
let maxNum = -1;
|
|
6894
|
-
for (const snapshot of snapshots) {
|
|
6895
|
-
const num = parseShortIdNum(snapshot.manifest.shortId);
|
|
6896
|
-
if (num !== null && num > maxNum) maxNum = num;
|
|
6897
|
-
}
|
|
6898
|
-
return maxNum + 1;
|
|
6899
|
-
}
|
|
6900
|
-
async function loadPersistedRunSnapshots(localStateDir) {
|
|
6901
|
-
const runsDir = join(localStateDir, "runs");
|
|
6902
|
-
const entriesResult = await resultify(() => readdir(runsDir, { withFileTypes: true }));
|
|
6903
|
-
if (entriesResult.error) return [];
|
|
6904
|
-
const snapshots = [];
|
|
6905
|
-
const runDirs = entriesResult.value.filter((entry) => entry.isDirectory()).map((entry) => join(runsDir, entry.name)).toSorted();
|
|
6906
|
-
for (const runDir of runDirs) {
|
|
6907
|
-
const snapshot = await loadPersistedRunSnapshot(runDir);
|
|
6908
|
-
if (!snapshot) continue;
|
|
6909
|
-
snapshots.push(snapshot);
|
|
6910
|
-
}
|
|
6911
|
-
return snapshots;
|
|
6912
|
-
}
|
|
6913
|
-
async function persistCaseDetail(runDir, caseDetail, fileId = caseDetail.caseId) {
|
|
6914
|
-
await writeFile(join(runDir, "case-details", `${encodeCaseDetailFileName(fileId)}.json`), JSON.stringify(caseDetail, null, 2));
|
|
6915
|
-
}
|
|
6916
|
-
function getLastRunStatuses(params) {
|
|
6917
|
-
const latestRunInfos = getLatestRunInfos(params);
|
|
6918
|
-
return new Map([...latestRunInfos].map(([evalId, info]) => [evalId, info.status]));
|
|
6919
|
-
}
|
|
6920
|
-
/**
|
|
6921
|
-
* Return the latest scoped run metadata for each eval based on persisted and
|
|
6922
|
-
* in-memory runs.
|
|
6923
|
-
*/
|
|
6924
|
-
function getLatestRunInfos(params) {
|
|
6925
|
-
const { runs, knownEvals } = params;
|
|
6926
|
-
const knownEvalMetas = [...knownEvals];
|
|
6927
|
-
const manualScoreKeysByEval = new Map(knownEvalMetas.map((evalMeta) => [evalMeta.key, evalMeta.columnDefs.filter((columnDef) => columnDef.isManualScore === true).map((columnDef) => columnDef.key)]));
|
|
6928
|
-
const orderedRuns = [...runs].toSorted((a, b) => new Date(getRunFreshnessTimestamp(a.manifest)).getTime() - new Date(getRunFreshnessTimestamp(b.manifest)).getTime());
|
|
6929
|
-
const latestRunInfos = /* @__PURE__ */ new Map();
|
|
6930
|
-
for (const run of orderedRuns) for (const evalKey of getRunEvalKeys(run, knownEvalMetas)) latestRunInfos.set(evalKey, {
|
|
6931
|
-
status: getEvalStatusForRun(run, evalKey, manualScoreKeysByEval.get(evalKey) ?? []),
|
|
6932
|
-
startedAt: getRunFreshnessTimestamp(run.manifest),
|
|
6933
|
-
commitSha: run.manifest.commitSha ?? null,
|
|
6934
|
-
evalSourceFingerprint: run.manifest.evalSourceFingerprints[evalKey] ?? null
|
|
6935
|
-
});
|
|
6936
|
-
return latestRunInfos;
|
|
6937
|
-
}
|
|
6938
|
-
function toLastRunStatus$1(status) {
|
|
6939
|
-
return status === "pending" ? null : status;
|
|
6940
|
-
}
|
|
6941
|
-
async function loadPersistedRunSnapshot(runDir) {
|
|
6942
|
-
const manifest = await readParsedJsonFile(join(runDir, "run.json"), { safeParse: runManifestSchema.safeParse.bind(runManifestSchema) });
|
|
6943
|
-
if (!manifest) return null;
|
|
6944
|
-
const summary = await readParsedJsonFile(join(runDir, "summary.json"), { safeParse: runSummarySchema.safeParse.bind(runSummarySchema) });
|
|
6945
|
-
if (!summary) return null;
|
|
6946
|
-
return {
|
|
6947
|
-
runDir,
|
|
6948
|
-
manifest,
|
|
6949
|
-
summary,
|
|
6950
|
-
cases: await readCaseRows(runDir),
|
|
6951
|
-
caseDetails: await readCaseDetails(runDir)
|
|
6952
|
-
};
|
|
6953
|
-
}
|
|
6954
|
-
async function readParsedJsonFile(filePath, schema) {
|
|
6955
|
-
const fileResult = await resultify(() => readFile(filePath, "utf-8"));
|
|
6956
|
-
if (fileResult.error) return null;
|
|
6957
|
-
const jsonResult = resultify(() => JSON.parse(fileResult.value));
|
|
6958
|
-
if (jsonResult.error) return null;
|
|
6959
|
-
const parsed = schema.safeParse(jsonResult.value);
|
|
6960
|
-
if (!parsed.success) return null;
|
|
6961
|
-
return parsed.data;
|
|
6962
|
-
}
|
|
6963
|
-
async function readCaseRows(runDir) {
|
|
6964
|
-
const fileResult = await resultify(() => readFile(join(runDir, "cases.jsonl"), "utf-8"));
|
|
6965
|
-
if (fileResult.error) return [];
|
|
6966
|
-
const rows = [];
|
|
6967
|
-
for (const rawLine of fileResult.value.split("\n")) {
|
|
6968
|
-
const line = rawLine.trim();
|
|
6969
|
-
if (line.length === 0) continue;
|
|
6970
|
-
const jsonResult = resultify(() => JSON.parse(line));
|
|
6971
|
-
if (jsonResult.error) continue;
|
|
6972
|
-
const parsed = caseRowSchema.safeParse(jsonResult.value);
|
|
6973
|
-
if (!parsed.success) continue;
|
|
6974
|
-
rows.push(parsed.data);
|
|
6975
|
-
}
|
|
6976
|
-
return rows;
|
|
6977
|
-
}
|
|
6978
|
-
async function readCaseDetails(runDir) {
|
|
6979
|
-
const detailsDir = join(runDir, "case-details");
|
|
6980
|
-
const entriesResult = await resultify(() => readdir(detailsDir, { withFileTypes: true }));
|
|
6981
|
-
if (entriesResult.error) return /* @__PURE__ */ new Map();
|
|
6982
|
-
const caseDetails = /* @__PURE__ */ new Map();
|
|
6983
|
-
for (const entry of entriesResult.value) {
|
|
6984
|
-
if (!entry.isFile() || !entry.name.endsWith(".json")) continue;
|
|
6985
|
-
const detail = await readParsedJsonFile(join(detailsDir, entry.name), { safeParse: caseDetailSchema.safeParse.bind(caseDetailSchema) });
|
|
6986
|
-
if (!detail) continue;
|
|
6987
|
-
caseDetails.set(detail.caseKey ?? detail.caseId, detail);
|
|
6988
|
-
}
|
|
6989
|
-
return caseDetails;
|
|
6990
|
-
}
|
|
6991
|
-
function getRunEvalKeys(run, knownEvals) {
|
|
6992
|
-
const knownEvalMetas = [...knownEvals];
|
|
6993
|
-
const evalKeys = /* @__PURE__ */ new Set();
|
|
6994
|
-
for (const caseRow of run.cases) if (caseRow.evalKey !== void 0) evalKeys.add(caseRow.evalKey);
|
|
6995
|
-
if (run.manifest.target.mode === "evalIds") for (const evalKey of run.manifest.target.evalKeys ?? []) evalKeys.add(evalKey);
|
|
6996
|
-
else if (run.manifest.target.mode === "all" && evalKeys.size === 0) for (const evalMeta of knownEvalMetas) evalKeys.add(evalMeta.key);
|
|
6997
|
-
return [...evalKeys];
|
|
6998
|
-
}
|
|
6999
|
-
function getEvalStatusForRun(run, evalKey, manualScoreKeys) {
|
|
7000
|
-
const evalCases = run.cases.filter((caseRow) => caseRow.evalKey === evalKey);
|
|
7001
|
-
if (evalCases.length > 0) {
|
|
7002
|
-
if (hasPendingManualScores(evalCases, manualScoreKeys)) return "unscored";
|
|
7003
|
-
return toLastRunStatus$1(deriveStatusFromCaseRows({ caseRows: evalCases }));
|
|
7004
|
-
}
|
|
7005
|
-
return toLastRunStatus$1(deriveStatusFromChildStatuses({
|
|
7006
|
-
statuses: [],
|
|
7007
|
-
lifecycleStatus: run.manifest.status
|
|
7008
|
-
}));
|
|
7009
|
-
}
|
|
7010
|
-
function hasPendingManualScores(caseRows, manualScoreKeys) {
|
|
7011
|
-
if (manualScoreKeys.length === 0) return false;
|
|
7012
|
-
return caseRows.some((caseRow) => manualScoreKeys.some((key) => {
|
|
7013
|
-
const value = caseRow.columns[key];
|
|
7014
|
-
return typeof value !== "number" || !Number.isFinite(value);
|
|
7015
|
-
}));
|
|
7016
|
-
}
|
|
7017
|
-
function encodeCaseDetailFileName(caseId) {
|
|
7018
|
-
return encodeURIComponent(caseId);
|
|
7019
|
-
}
|
|
7020
|
-
//#endregion
|
|
7021
6436
|
//#region ../runner/src/stackFormatting.ts
|
|
7022
6437
|
const orphanedAnsiSgrPattern = /\[(?:\d{1,3}(?:;\d{1,3})*)?m/g;
|
|
7023
6438
|
function stripTerminalControlCodes(value) {
|
|
7024
6439
|
return stripVTControlCharacters(value).replaceAll(orphanedAnsiSgrPattern, "");
|
|
7025
6440
|
}
|
|
7026
6441
|
//#endregion
|
|
6442
|
+
//#region ../runner/src/caseChildProtocol.ts
|
|
6443
|
+
function isCaseChildParentMessage(value) {
|
|
6444
|
+
return typeof value === "object" && value !== null && "type" in value && value.type === "start" && "context" in value;
|
|
6445
|
+
}
|
|
6446
|
+
function isCaseChildMessage(value) {
|
|
6447
|
+
if (typeof value !== "object" || value === null) return false;
|
|
6448
|
+
if (!("type" in value) || typeof value.type !== "string") return false;
|
|
6449
|
+
if (value.type === "done") return "result" in value;
|
|
6450
|
+
return value.type === "error" && "message" in value;
|
|
6451
|
+
}
|
|
6452
|
+
//#endregion
|
|
7027
6453
|
//#region ../runner/src/runExecution.ts
|
|
7028
6454
|
function filterEvalCases(cases, caseIds) {
|
|
7029
6455
|
if (!caseIds || caseIds.length === 0) return cases;
|
|
@@ -7335,645 +6761,4 @@ function toAssertionFailure(message, error = void 0, nameOverride = void 0) {
|
|
|
7335
6761
|
};
|
|
7336
6762
|
}
|
|
7337
6763
|
//#endregion
|
|
7338
|
-
|
|
7339
|
-
async function executeQueuedCases(params) {
|
|
7340
|
-
const { queuedCases, concurrency, globalTraceDisplay } = params;
|
|
7341
|
-
let nextCaseIndex = 0;
|
|
7342
|
-
let workerError = void 0;
|
|
7343
|
-
const workerCount = Math.min(concurrency, queuedCases.length);
|
|
7344
|
-
const workers = Array.from({ length: workerCount }, async () => {
|
|
7345
|
-
while (workerError === void 0) {
|
|
7346
|
-
const queuedCase = queuedCases[nextCaseIndex];
|
|
7347
|
-
nextCaseIndex += 1;
|
|
7348
|
-
if (queuedCase === void 0) return;
|
|
7349
|
-
try {
|
|
7350
|
-
await executeQueuedCase({
|
|
7351
|
-
queuedCase,
|
|
7352
|
-
globalTraceDisplay
|
|
7353
|
-
});
|
|
7354
|
-
} catch (error) {
|
|
7355
|
-
workerError = error instanceof Error ? error : new Error(String(error));
|
|
7356
|
-
return;
|
|
7357
|
-
}
|
|
7358
|
-
}
|
|
7359
|
-
});
|
|
7360
|
-
await Promise.all(workers);
|
|
7361
|
-
if (workerError instanceof Error) throw workerError;
|
|
7362
|
-
if (workerError !== void 0) throw new Error(typeof workerError === "string" ? workerError : typeof workerError === "number" || typeof workerError === "boolean" || typeof workerError === "bigint" ? String(workerError) : workerError === null ? "null" : "Unknown queue worker error");
|
|
7363
|
-
}
|
|
7364
|
-
async function executeQueuedCase(params) {
|
|
7365
|
-
const { queuedCase, globalTraceDisplay } = params;
|
|
7366
|
-
const startTime = Date.now();
|
|
7367
|
-
const result = await queuedCase.execute({
|
|
7368
|
-
globalTraceDisplay,
|
|
7369
|
-
startTime
|
|
7370
|
-
});
|
|
7371
|
-
await queuedCase.onComplete(result);
|
|
7372
|
-
}
|
|
7373
|
-
//#endregion
|
|
7374
|
-
//#region ../runner/src/tags.ts
|
|
7375
|
-
function getInvalidTagMessages(params) {
|
|
7376
|
-
return (params.tags ?? []).flatMap((tag) => {
|
|
7377
|
-
const validation = validateEvalTagName(tag);
|
|
7378
|
-
return validation.ok ? [] : [`${params.source} tag "${tag}" is invalid: ${validation.message}`];
|
|
7379
|
-
});
|
|
7380
|
-
}
|
|
7381
|
-
/** Resolve effective eval-level tags and discovery issues for one eval. */
|
|
7382
|
-
function resolveEvalTags(params) {
|
|
7383
|
-
const configTags = params.configTags ?? [];
|
|
7384
|
-
const removeTags = params.evalDef.removeTags ?? [];
|
|
7385
|
-
const messages = [
|
|
7386
|
-
...getInvalidTagMessages({
|
|
7387
|
-
tags: configTags,
|
|
7388
|
-
source: "config"
|
|
7389
|
-
}),
|
|
7390
|
-
...getInvalidTagMessages({
|
|
7391
|
-
tags: params.evalDef.tags,
|
|
7392
|
-
source: "eval"
|
|
7393
|
-
}),
|
|
7394
|
-
...getInvalidTagMessages({
|
|
7395
|
-
tags: removeTags,
|
|
7396
|
-
source: "removeTags"
|
|
7397
|
-
})
|
|
7398
|
-
];
|
|
7399
|
-
const globalTagSet = new Set(configTags);
|
|
7400
|
-
for (const tag of removeTags) if (!globalTagSet.has(tag)) messages.push(`removeTags tag "${tag}" is not defined in AgentEvalsConfig.tags.`);
|
|
7401
|
-
const removeTagSet = new Set(removeTags);
|
|
7402
|
-
return {
|
|
7403
|
-
tags: dedupeEvalTags([...configTags.filter((tag) => !removeTagSet.has(tag)), ...params.evalDef.tags ?? []]),
|
|
7404
|
-
issues: messages.map((message) => ({
|
|
7405
|
-
type: "invalid-tags",
|
|
7406
|
-
severity: "error",
|
|
7407
|
-
filePath: params.filePath,
|
|
7408
|
-
evalId: params.evalId,
|
|
7409
|
-
message: `Invalid tags for eval "${params.evalId}" in ${params.filePath}: ${message}`
|
|
7410
|
-
}))
|
|
7411
|
-
};
|
|
7412
|
-
}
|
|
7413
|
-
/** Return effective case tags or throw when authored case tags are invalid. */
|
|
7414
|
-
function resolveCaseTags(params) {
|
|
7415
|
-
const messages = getInvalidTagMessages({
|
|
7416
|
-
tags: params.evalCase.tags,
|
|
7417
|
-
source: `case "${params.evalCase.id}"`
|
|
7418
|
-
});
|
|
7419
|
-
if (messages.length > 0) throw new Error(`Invalid tags for case "${params.evalCase.id}" in ${params.filePath}#${params.evalId}: ${messages.join("; ")}`);
|
|
7420
|
-
return dedupeEvalTags([...params.evalTags, ...params.evalCase.tags ?? []]);
|
|
7421
|
-
}
|
|
7422
|
-
/** Validate CLI/API tags filters and return the first error message. */
|
|
7423
|
-
function validateTagsFilters(filters) {
|
|
7424
|
-
for (const filter of filters ?? []) {
|
|
7425
|
-
const error = validateTagsFilterExpression(filter);
|
|
7426
|
-
if (error !== null) return `Invalid --tags-filter "${filter}": ${error}`;
|
|
7427
|
-
}
|
|
7428
|
-
return null;
|
|
7429
|
-
}
|
|
7430
|
-
/** Filter cases by Vitest-style tag expressions. */
|
|
7431
|
-
function filterEvalCasesByTags(cases, tagsFilter) {
|
|
7432
|
-
if (tagsFilter === void 0 || tagsFilter.length === 0) return [...cases];
|
|
7433
|
-
return cases.filter((evalCase) => matchesTagsFilter({
|
|
7434
|
-
tags: evalCase.tags,
|
|
7435
|
-
filters: tagsFilter
|
|
7436
|
-
}));
|
|
7437
|
-
}
|
|
7438
|
-
/** Return whether eval-level tags alone satisfy the run's tag filters. */
|
|
7439
|
-
function evalTagsMatchFilter(params) {
|
|
7440
|
-
return matchesTagsFilter({
|
|
7441
|
-
tags: params.tags,
|
|
7442
|
-
filters: params.tagsFilter
|
|
7443
|
-
});
|
|
7444
|
-
}
|
|
7445
|
-
//#endregion
|
|
7446
|
-
//#region ../runner/src/targeting.ts
|
|
7447
|
-
function escapeRegex(value) {
|
|
7448
|
-
return value.replace(/[|\\{}()[\]^$+?.]/g, "\\$&");
|
|
7449
|
-
}
|
|
7450
|
-
function globToRegex(pattern) {
|
|
7451
|
-
const normalized = pattern.replaceAll("\\", "/");
|
|
7452
|
-
let regex = "^";
|
|
7453
|
-
for (let i = 0; i < normalized.length; i++) {
|
|
7454
|
-
const char = normalized[i];
|
|
7455
|
-
const next = normalized[i + 1];
|
|
7456
|
-
if (char === "*" && next === "*") {
|
|
7457
|
-
regex += ".*";
|
|
7458
|
-
i++;
|
|
7459
|
-
} else if (char === "*") regex += "[^/]*";
|
|
7460
|
-
else if (char === "?") regex += "[^/]";
|
|
7461
|
-
else regex += escapeRegex(char ?? "");
|
|
7462
|
-
}
|
|
7463
|
-
regex += "$";
|
|
7464
|
-
return new RegExp(regex);
|
|
7465
|
-
}
|
|
7466
|
-
function fileMatches(pattern, filePath) {
|
|
7467
|
-
const normalizedPattern = pattern.replaceAll("\\", "/");
|
|
7468
|
-
if (normalizedPattern === filePath) return true;
|
|
7469
|
-
return globToRegex(normalizedPattern).test(filePath);
|
|
7470
|
-
}
|
|
7471
|
-
function matchesFiles(evalMeta, files) {
|
|
7472
|
-
if (files === void 0 || files.length === 0) return true;
|
|
7473
|
-
return files.some((file) => fileMatches(file, evalMeta.filePath));
|
|
7474
|
-
}
|
|
7475
|
-
function matchesEvalIds(evalMeta, evalIds) {
|
|
7476
|
-
if (evalIds === void 0 || evalIds.length === 0) return true;
|
|
7477
|
-
return evalIds.includes(evalMeta.id);
|
|
7478
|
-
}
|
|
7479
|
-
function matchesEvalKeys(evalMeta, evalKeys) {
|
|
7480
|
-
if (evalKeys === void 0 || evalKeys.length === 0) return true;
|
|
7481
|
-
return evalKeys.includes(evalMeta.key);
|
|
7482
|
-
}
|
|
7483
|
-
/** Return the discovered evals selected by a run target. */
|
|
7484
|
-
function getTargetEvals(params) {
|
|
7485
|
-
const { target } = params.request;
|
|
7486
|
-
return [...params.evals].filter((evalMeta) => matchesEvalKeys(evalMeta, target.evalKeys)).filter((evalMeta) => matchesEvalIds(evalMeta, target.evalIds)).filter((evalMeta) => matchesFiles(evalMeta, target.files)).toSorted((a, b) => a.filePath.localeCompare(b.filePath));
|
|
7487
|
-
}
|
|
7488
|
-
/** Resolve which exact eval keys a run request can affect. */
|
|
7489
|
-
function getTargetEvalKeys(params) {
|
|
7490
|
-
return getTargetEvals({
|
|
7491
|
-
evals: params.sortedEvals,
|
|
7492
|
-
request: params.request
|
|
7493
|
-
}).map((evalMeta) => evalMeta.key);
|
|
7494
|
-
}
|
|
7495
|
-
//#endregion
|
|
7496
|
-
//#region ../runner/src/runOrchestration.ts
|
|
7497
|
-
function toOptionalSourceFingerprint(sourceFingerprint) {
|
|
7498
|
-
return sourceFingerprint.length > 0 ? sourceFingerprint : void 0;
|
|
7499
|
-
}
|
|
7500
|
-
function buildCaseModuleIsolation(params) {
|
|
7501
|
-
return {
|
|
7502
|
-
key: [
|
|
7503
|
-
params.runId,
|
|
7504
|
-
params.evalKey,
|
|
7505
|
-
params.caseId,
|
|
7506
|
-
`trial-${String(params.trial)}`
|
|
7507
|
-
].join(":"),
|
|
7508
|
-
workspaceRoot: params.workspaceRoot
|
|
7509
|
-
};
|
|
7510
|
-
}
|
|
7511
|
-
function buildEvalPreparationModuleIsolation(params) {
|
|
7512
|
-
return {
|
|
7513
|
-
key: [
|
|
7514
|
-
params.runId,
|
|
7515
|
-
params.evalKey,
|
|
7516
|
-
"prepare"
|
|
7517
|
-
].join(":"),
|
|
7518
|
-
workspaceRoot: params.workspaceRoot
|
|
7519
|
-
};
|
|
7520
|
-
}
|
|
7521
|
-
/**
|
|
7522
|
-
* Ranks case statuses from worst to best. Used to order trial attempts so the
|
|
7523
|
-
* pessimistic (`lowestScore`) strategy can pick the worst attempt. Any
|
|
7524
|
-
* non-terminal status outside `pass`/`fail`/`error` is treated as indistinct
|
|
7525
|
-
* from `fail` for comparison purposes.
|
|
7526
|
-
*/
|
|
7527
|
-
function statusRank(status) {
|
|
7528
|
-
if (status === "pass") return 2;
|
|
7529
|
-
if (status === "error") return 0;
|
|
7530
|
-
return 1;
|
|
7531
|
-
}
|
|
7532
|
-
/**
|
|
7533
|
-
* Returns the minimum numeric value across the declared score columns for a
|
|
7534
|
-
* trial, or `-Infinity` when no score has a numeric value. Used as a
|
|
7535
|
-
* tiebreaker between trials that share the same status.
|
|
7536
|
-
*/
|
|
7537
|
-
function minScoreValue(caseRow, scoreKeys) {
|
|
7538
|
-
let min = Number.POSITIVE_INFINITY;
|
|
7539
|
-
for (const key of scoreKeys) {
|
|
7540
|
-
const v = caseRow.columns[key];
|
|
7541
|
-
if (typeof v === "number" && Number.isFinite(v)) {
|
|
7542
|
-
if (v < min) min = v;
|
|
7543
|
-
}
|
|
7544
|
-
}
|
|
7545
|
-
return Number.isFinite(min) ? min : Number.NEGATIVE_INFINITY;
|
|
7546
|
-
}
|
|
7547
|
-
function compareTrialResults(left, right, scoreKeys) {
|
|
7548
|
-
const statusDiff = statusRank(left.caseRow.status) - statusRank(right.caseRow.status);
|
|
7549
|
-
if (statusDiff !== 0) return statusDiff;
|
|
7550
|
-
const scoreDiff = minScoreValue(left.caseRow, scoreKeys) - minScoreValue(right.caseRow, scoreKeys);
|
|
7551
|
-
if (scoreDiff !== 0) return scoreDiff;
|
|
7552
|
-
return left.caseRow.trial - right.caseRow.trial;
|
|
7553
|
-
}
|
|
7554
|
-
function pickWinningTrial(params) {
|
|
7555
|
-
const orderedAttempts = [...params.attempts].toSorted((left, right) => compareTrialResults(left, right, params.scoreKeys));
|
|
7556
|
-
if (params.strategy === "lowestScore") {
|
|
7557
|
-
const [lowestAttempt] = orderedAttempts;
|
|
7558
|
-
if (lowestAttempt === void 0) throw new Error("Expected at least one trial attempt");
|
|
7559
|
-
return lowestAttempt;
|
|
7560
|
-
}
|
|
7561
|
-
const medianAttempt = orderedAttempts[Math.floor((orderedAttempts.length - 1) / 2)];
|
|
7562
|
-
if (medianAttempt === void 0) throw new Error("Expected at least one trial attempt");
|
|
7563
|
-
return medianAttempt;
|
|
7564
|
-
}
|
|
7565
|
-
function formatUnknownErrorDetails(error) {
|
|
7566
|
-
if (error instanceof Error) return error.stack ?? error.message;
|
|
7567
|
-
if (typeof error === "string") return error;
|
|
7568
|
-
return String(error);
|
|
7569
|
-
}
|
|
7570
|
-
function findDuplicateCaseIds(cases) {
|
|
7571
|
-
const counts = /* @__PURE__ */ new Map();
|
|
7572
|
-
for (const evalCase of cases) counts.set(evalCase.id, (counts.get(evalCase.id) ?? 0) + 1);
|
|
7573
|
-
return [...counts].filter(([, count]) => count > 1).map(([caseId]) => caseId).toSorted();
|
|
7574
|
-
}
|
|
7575
|
-
function throwIfDiscoveryIssues(issues) {
|
|
7576
|
-
if (issues.length === 0) return;
|
|
7577
|
-
throw new Error(issues.map((issue) => issue.message).join("\n"));
|
|
7578
|
-
}
|
|
7579
|
-
function findAmbiguousTargetCaseIds(preparedEvals) {
|
|
7580
|
-
const ownersByCaseId = /* @__PURE__ */ new Map();
|
|
7581
|
-
for (const preparedEval of preparedEvals) for (const preparedCase of preparedEval.preparedCases) {
|
|
7582
|
-
const owners = ownersByCaseId.get(preparedCase.caseId) ?? /* @__PURE__ */ new Set();
|
|
7583
|
-
owners.add(`${preparedEval.evalMeta.filePath}#${preparedEval.evalMeta.id}`);
|
|
7584
|
-
ownersByCaseId.set(preparedCase.caseId, owners);
|
|
7585
|
-
}
|
|
7586
|
-
return [...ownersByCaseId].filter(([, owners]) => owners.size > 1).map(([caseId, owners]) => `${caseId} (${[...owners].join(", ")})`);
|
|
7587
|
-
}
|
|
7588
|
-
function buildRunErrorMessage(errors) {
|
|
7589
|
-
return errors.map((entry) => {
|
|
7590
|
-
const [firstLine, ...detailLines] = entry.details.split("\n");
|
|
7591
|
-
const messageLine = firstLine?.trim() ?? "Unknown error";
|
|
7592
|
-
const details = detailLines.join("\n").trim();
|
|
7593
|
-
if (details.length === 0) return `[${entry.evalId}] ${messageLine}`;
|
|
7594
|
-
return `[${entry.evalId}] ${messageLine}\n${details}`;
|
|
7595
|
-
}).join("\n");
|
|
7596
|
-
}
|
|
7597
|
-
async function finalizePreparedCase(params) {
|
|
7598
|
-
const { runState, runDir, preparedEval, preparedCase, onCaseFinished, emitEvent } = params;
|
|
7599
|
-
if (preparedCase.finalized || preparedCase.trialResults.length === 0) return;
|
|
7600
|
-
preparedCase.finalized = true;
|
|
7601
|
-
const winningTrial = pickWinningTrial({
|
|
7602
|
-
strategy: runState.manifest.trialSelection,
|
|
7603
|
-
attempts: preparedCase.trialResults,
|
|
7604
|
-
scoreKeys: preparedEval.scoreKeys
|
|
7605
|
-
});
|
|
7606
|
-
if (winningTrial.bufferedCacheStore !== null) await winningTrial.bufferedCacheStore.commit();
|
|
7607
|
-
const artifactFileId = getCaseArtifactFileId(runState, winningTrial.caseRow);
|
|
7608
|
-
runState.cases.push(winningTrial.caseRow);
|
|
7609
|
-
runState.caseDetails.set(getCaseRowCaseKey(winningTrial.caseRow), winningTrial.caseDetail);
|
|
7610
|
-
if (winningTrial.caseRow.status === "pass") runState.summary.passedCases++;
|
|
7611
|
-
else if (winningTrial.caseRow.status === "error") runState.summary.errorCases++;
|
|
7612
|
-
else runState.summary.failedCases++;
|
|
7613
|
-
await writeFile(join(runDir, "traces", `${encodeURIComponent(artifactFileId)}.json`), JSON.stringify(winningTrial.caseDetail.trace, null, 2));
|
|
7614
|
-
await persistCaseDetail(runDir, winningTrial.caseDetail, artifactFileId);
|
|
7615
|
-
onCaseFinished?.(winningTrial.caseDetail, winningTrial.caseRow);
|
|
7616
|
-
emitEvent(runState, {
|
|
7617
|
-
type: "case.finished",
|
|
7618
|
-
runId: runState.manifest.id,
|
|
7619
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7620
|
-
payload: winningTrial.caseRow
|
|
7621
|
-
});
|
|
7622
|
-
preparedEval.evalCaseRows.push(winningTrial.caseRow);
|
|
7623
|
-
}
|
|
7624
|
-
function getPreparedCaseOrderKey(caseRow) {
|
|
7625
|
-
return `${caseRow.evalKey ?? caseRow.evalId}\u0000${caseRow.caseId}`;
|
|
7626
|
-
}
|
|
7627
|
-
function getCaseArtifactFileId(runState, caseRow) {
|
|
7628
|
-
const caseKey = getCaseRowCaseKey(caseRow);
|
|
7629
|
-
return runState.cases.some((existing) => existing.caseId === caseRow.caseId && getCaseRowCaseKey(existing) !== caseKey) ? caseKey : caseRow.caseId;
|
|
7630
|
-
}
|
|
7631
|
-
function sortCaseRowsByPreparedOrder(caseRows, preparedEvals) {
|
|
7632
|
-
const orderByCase = /* @__PURE__ */ new Map();
|
|
7633
|
-
let order = 0;
|
|
7634
|
-
for (const preparedEval of preparedEvals) for (const preparedCase of preparedEval.preparedCases) {
|
|
7635
|
-
orderByCase.set(`${preparedEval.evalMeta.key}\u0000${preparedCase.caseId}`, order);
|
|
7636
|
-
order++;
|
|
7637
|
-
}
|
|
7638
|
-
caseRows.sort((left, right) => {
|
|
7639
|
-
return (orderByCase.get(getPreparedCaseOrderKey(left)) ?? Number.MAX_SAFE_INTEGER) - (orderByCase.get(getPreparedCaseOrderKey(right)) ?? Number.MAX_SAFE_INTEGER);
|
|
7640
|
-
});
|
|
7641
|
-
}
|
|
7642
|
-
async function executeRun({ runState, request, runDir, config, cacheStore, lastRunStatusMap, latestRunInfoMap, emitEvent, emitDiscoveryEvent, workspaceRoot, getSourceFingerprint, getConfiguredConcurrency, getSortedEvalMetas, getTargetEvals, onCaseFinished }) {
|
|
7643
|
-
try {
|
|
7644
|
-
const tagsFilterError = validateTagsFilters(request.target.tagsFilter);
|
|
7645
|
-
if (tagsFilterError !== null) throw new Error(tagsFilterError);
|
|
7646
|
-
const targetEvals = getTargetEvals(request);
|
|
7647
|
-
emitEvent(runState, {
|
|
7648
|
-
type: "run.started",
|
|
7649
|
-
runId: runState.manifest.id,
|
|
7650
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7651
|
-
payload: runState.manifest
|
|
7652
|
-
});
|
|
7653
|
-
const evalErrors = [];
|
|
7654
|
-
const queuedCases = [];
|
|
7655
|
-
const preparedEvals = [];
|
|
7656
|
-
const cacheMode = runState.manifest.cacheMode ?? "use";
|
|
7657
|
-
const cacheEnabled = config.cache?.enabled !== false;
|
|
7658
|
-
const llmCallsConfig = resolveLlmCallsConfig(config.llmCalls);
|
|
7659
|
-
const apiCallsConfig = resolveApiCallsConfig(config.apiCalls);
|
|
7660
|
-
for (const evalMeta of targetEvals) {
|
|
7661
|
-
const evalFilePath = evalMeta.sourceFilePath;
|
|
7662
|
-
const evalModuleIsolation = buildEvalPreparationModuleIsolation({
|
|
7663
|
-
runId: runState.manifest.id,
|
|
7664
|
-
evalKey: evalMeta.key,
|
|
7665
|
-
workspaceRoot
|
|
7666
|
-
});
|
|
7667
|
-
let sourceFingerprint = "";
|
|
7668
|
-
try {
|
|
7669
|
-
sourceFingerprint = getSourceFingerprint(await readFile(evalFilePath, "utf-8"));
|
|
7670
|
-
} catch {
|
|
7671
|
-
sourceFingerprint = "";
|
|
7672
|
-
}
|
|
7673
|
-
if (sourceFingerprint.length > 0) {
|
|
7674
|
-
runState.manifest.evalSourceFingerprints[evalMeta.key] = sourceFingerprint;
|
|
7675
|
-
evalMeta.sourceFingerprint = sourceFingerprint;
|
|
7676
|
-
} else {
|
|
7677
|
-
delete runState.manifest.evalSourceFingerprints[evalMeta.key];
|
|
7678
|
-
evalMeta.sourceFingerprint = null;
|
|
7679
|
-
}
|
|
7680
|
-
try {
|
|
7681
|
-
const entry = (await loadIsolatedEvalRegistry({
|
|
7682
|
-
evalFilePath,
|
|
7683
|
-
sourceFingerprint: toOptionalSourceFingerprint(sourceFingerprint),
|
|
7684
|
-
moduleIsolation: evalModuleIsolation,
|
|
7685
|
-
runtimeScope: "env"
|
|
7686
|
-
})).get(evalMeta.id);
|
|
7687
|
-
if (!entry) {
|
|
7688
|
-
evalErrors.push({
|
|
7689
|
-
evalId: evalMeta.id,
|
|
7690
|
-
details: `Eval "${evalMeta.id}" was not registered after importing ${evalFilePath}`
|
|
7691
|
-
});
|
|
7692
|
-
continue;
|
|
7693
|
-
}
|
|
7694
|
-
await runWithModuleIsolation(evalModuleIsolation, async () => {
|
|
7695
|
-
await runInEvalRuntimeScope("cases", async () => {
|
|
7696
|
-
await entry.use(async (evalDef) => {
|
|
7697
|
-
const evalTagsResult = resolveEvalTags({
|
|
7698
|
-
configTags: config.tags,
|
|
7699
|
-
evalDef,
|
|
7700
|
-
evalId: evalMeta.id,
|
|
7701
|
-
filePath: evalMeta.filePath
|
|
7702
|
-
});
|
|
7703
|
-
throwIfDiscoveryIssues(evalTagsResult.issues);
|
|
7704
|
-
evalMeta.tags = evalTagsResult.tags;
|
|
7705
|
-
if (evalDef.manualInput && evalDef.cases !== void 0) throw new Error(`Eval "${evalMeta.id}" cannot declare both "cases" and "manualInput". Remove one of them.`);
|
|
7706
|
-
let manualInputCase = null;
|
|
7707
|
-
if (evalDef.manualInput) {
|
|
7708
|
-
const manualTags = evalTagsResult.tags;
|
|
7709
|
-
if (!filterEvalCasesByTags([{
|
|
7710
|
-
id: `${evalMeta.id}-manual`,
|
|
7711
|
-
input: {},
|
|
7712
|
-
tags: manualTags
|
|
7713
|
-
}], request.target.tagsFilter).length) {
|
|
7714
|
-
evalMeta.caseCount = 1;
|
|
7715
|
-
evalMeta.caseIds = [`${evalMeta.id}-manual`];
|
|
7716
|
-
return;
|
|
7717
|
-
}
|
|
7718
|
-
const rawValue = request.manualInputs?.[evalMeta.key];
|
|
7719
|
-
if (rawValue === void 0) throw new Error(`Eval "${evalMeta.id}" requires manual input. Provide it via the run modal in the web UI or "--input" / "--input-file" on the CLI.`);
|
|
7720
|
-
const parsed = parseManualInputValues(evalDef.manualInput, rawValue);
|
|
7721
|
-
if (parsed.error) {
|
|
7722
|
-
const formatted = parsed.error.issues.map((issue) => issue.path ? `${issue.path}: ${issue.message}` : issue.message).join("; ");
|
|
7723
|
-
throw new Error(`Invalid manual input for eval "${evalMeta.id}": ${formatted}`);
|
|
7724
|
-
}
|
|
7725
|
-
manualInputCase = {
|
|
7726
|
-
id: `${evalMeta.id}-manual`,
|
|
7727
|
-
input: parsed.value,
|
|
7728
|
-
tags: manualTags
|
|
7729
|
-
};
|
|
7730
|
-
}
|
|
7731
|
-
const evalCases = manualInputCase ? [manualInputCase] : typeof evalDef.cases === "function" && !evalTagsMatchFilter({
|
|
7732
|
-
tags: evalTagsResult.tags,
|
|
7733
|
-
tagsFilter: request.target.tagsFilter
|
|
7734
|
-
}) ? [] : await runWithEvalClock(evalDef.startTime, async () => typeof evalDef.cases === "function" ? await evalDef.cases() : evalDef.cases ?? [], { freezeTime: evalDef.freezeTime });
|
|
7735
|
-
const runnableCases = (manualInputCase ? evalCases : resolveRunnableEvalCases({
|
|
7736
|
-
cases: evalCases,
|
|
7737
|
-
evalId: evalMeta.id
|
|
7738
|
-
})).map((evalCase) => ({
|
|
7739
|
-
...evalCase,
|
|
7740
|
-
tags: resolveCaseTags({
|
|
7741
|
-
evalTags: evalTagsResult.tags,
|
|
7742
|
-
evalCase,
|
|
7743
|
-
evalId: evalMeta.id,
|
|
7744
|
-
filePath: evalMeta.filePath
|
|
7745
|
-
})
|
|
7746
|
-
}));
|
|
7747
|
-
const duplicateCaseIds = findDuplicateCaseIds(runnableCases);
|
|
7748
|
-
if (duplicateCaseIds.length > 0) throw new Error(`Duplicate case id${duplicateCaseIds.length === 1 ? "" : "s"} in ${evalMeta.filePath}#${evalMeta.id}: ${duplicateCaseIds.join(", ")}`);
|
|
7749
|
-
const cases = filterEvalCasesByTags(filterEvalCases(runnableCases, request.target.caseIds), request.target.tagsFilter);
|
|
7750
|
-
evalMeta.caseCount = runnableCases.length;
|
|
7751
|
-
evalMeta.caseIds = runnableCases.map((evalCase) => evalCase.id);
|
|
7752
|
-
runState.summary.totalCases += cases.length;
|
|
7753
|
-
const defaultConfig = resolveEvalDefaultConfig({
|
|
7754
|
-
evalDef,
|
|
7755
|
-
globalColumns: config.columns,
|
|
7756
|
-
globalStats: config.stats,
|
|
7757
|
-
globalDefaultStatAggregate: config.defaultStatAggregate,
|
|
7758
|
-
globalRemove: config.removeDefaultConfig
|
|
7759
|
-
});
|
|
7760
|
-
const declaredColumnDefs = buildDeclaredColumnDefs(defaultConfig.columns, evalDef.scores, evalDef.manualScores);
|
|
7761
|
-
const validatedCharts = validateCharts({
|
|
7762
|
-
charts: defaultConfig.charts,
|
|
7763
|
-
columnDefs: declaredColumnDefs,
|
|
7764
|
-
evalId: evalMeta.id
|
|
7765
|
-
});
|
|
7766
|
-
for (const warning of validatedCharts.warnings) console.warn(warning);
|
|
7767
|
-
evalMeta.columnDefs = declaredColumnDefs;
|
|
7768
|
-
evalMeta.stats = defaultConfig.stats;
|
|
7769
|
-
evalMeta.defaultStatAggregate = defaultConfig.defaultStatAggregate;
|
|
7770
|
-
evalMeta.charts = validatedCharts.charts;
|
|
7771
|
-
const evalCaseRows = [];
|
|
7772
|
-
const preparedCases = [];
|
|
7773
|
-
const scoreKeys = Object.freeze(Object.keys(evalDef.scores ?? {}));
|
|
7774
|
-
const manualScoreKeys = Object.freeze(Object.keys(evalDef.manualScores ?? {}));
|
|
7775
|
-
const preparedEval = {
|
|
7776
|
-
evalMeta,
|
|
7777
|
-
evalCaseRows,
|
|
7778
|
-
preparedCases,
|
|
7779
|
-
scoreKeys: Object.freeze([...scoreKeys, ...manualScoreKeys])
|
|
7780
|
-
};
|
|
7781
|
-
preparedEvals.push(preparedEval);
|
|
7782
|
-
for (const evalCase of cases) {
|
|
7783
|
-
const trialResults = [];
|
|
7784
|
-
const preparedCase = {
|
|
7785
|
-
caseId: evalCase.id,
|
|
7786
|
-
trialResults,
|
|
7787
|
-
finalized: false
|
|
7788
|
-
};
|
|
7789
|
-
preparedCases.push(preparedCase);
|
|
7790
|
-
for (let trial = 0; trial < request.trials; trial++) {
|
|
7791
|
-
const bufferedCacheStore = cacheEnabled && cacheMode !== "bypass" ? createBufferedCacheStore(cacheStore) : null;
|
|
7792
|
-
const caseModuleIsolation = buildCaseModuleIsolation({
|
|
7793
|
-
runId: runState.manifest.id,
|
|
7794
|
-
evalKey: evalMeta.key,
|
|
7795
|
-
caseId: evalCase.id,
|
|
7796
|
-
trial,
|
|
7797
|
-
workspaceRoot
|
|
7798
|
-
});
|
|
7799
|
-
queuedCases.push({
|
|
7800
|
-
execute: async ({ startTime, globalTraceDisplay }) => {
|
|
7801
|
-
const { caseDetail, caseRowUpdate } = await useIsolatedEvalDefinition({
|
|
7802
|
-
evalId: evalMeta.id,
|
|
7803
|
-
evalFilePath,
|
|
7804
|
-
sourceFingerprint: toOptionalSourceFingerprint(sourceFingerprint),
|
|
7805
|
-
moduleIsolation: caseModuleIsolation,
|
|
7806
|
-
runtimeScope: "env",
|
|
7807
|
-
use: async (isolatedEvalDef) => await runCase({
|
|
7808
|
-
evalDef: isolatedEvalDef,
|
|
7809
|
-
evalId: evalMeta.id,
|
|
7810
|
-
evalKey: evalMeta.key,
|
|
7811
|
-
evalCase,
|
|
7812
|
-
globalTraceDisplay,
|
|
7813
|
-
globalColumns: config.columns,
|
|
7814
|
-
globalDeriveFromTracing: config.deriveFromTracing,
|
|
7815
|
-
llmCallsConfig,
|
|
7816
|
-
apiCallsConfig,
|
|
7817
|
-
globalRemoveDefaultConfig: config.removeDefaultConfig,
|
|
7818
|
-
trial,
|
|
7819
|
-
startTime,
|
|
7820
|
-
cacheAdapter: bufferedCacheStore ?? (cacheEnabled ? cacheStore : null),
|
|
7821
|
-
cacheMode,
|
|
7822
|
-
moduleIsolation: caseModuleIsolation,
|
|
7823
|
-
evalFilePath,
|
|
7824
|
-
evalFileRelativePath: evalMeta.filePath,
|
|
7825
|
-
workspaceRoot,
|
|
7826
|
-
artifactDir: join(runDir, "artifacts"),
|
|
7827
|
-
runId: runState.manifest.id
|
|
7828
|
-
})
|
|
7829
|
-
});
|
|
7830
|
-
return {
|
|
7831
|
-
caseDetail,
|
|
7832
|
-
caseRow: {
|
|
7833
|
-
caseId: evalCase.id,
|
|
7834
|
-
evalId: evalMeta.id,
|
|
7835
|
-
evalKey: evalMeta.key,
|
|
7836
|
-
caseKey: caseDetail.caseKey,
|
|
7837
|
-
tags: caseDetail.tags,
|
|
7838
|
-
status: caseRowUpdate.status ?? "pending",
|
|
7839
|
-
durationMs: caseRowUpdate.durationMs ?? null,
|
|
7840
|
-
cacheHits: caseRowUpdate.cacheHits ?? 0,
|
|
7841
|
-
cacheOperations: caseRowUpdate.cacheOperations ?? 0,
|
|
7842
|
-
columns: caseRowUpdate.columns ?? {},
|
|
7843
|
-
trial
|
|
7844
|
-
}
|
|
7845
|
-
};
|
|
7846
|
-
},
|
|
7847
|
-
onComplete: async ({ caseDetail, caseRow }) => {
|
|
7848
|
-
trialResults.push({
|
|
7849
|
-
caseDetail,
|
|
7850
|
-
caseRow,
|
|
7851
|
-
bufferedCacheStore
|
|
7852
|
-
});
|
|
7853
|
-
if (trialResults.length !== request.trials) return;
|
|
7854
|
-
await finalizePreparedCase({
|
|
7855
|
-
runState,
|
|
7856
|
-
runDir,
|
|
7857
|
-
preparedEval,
|
|
7858
|
-
preparedCase,
|
|
7859
|
-
onCaseFinished,
|
|
7860
|
-
emitEvent
|
|
7861
|
-
});
|
|
7862
|
-
}
|
|
7863
|
-
});
|
|
7864
|
-
}
|
|
7865
|
-
}
|
|
7866
|
-
});
|
|
7867
|
-
});
|
|
7868
|
-
});
|
|
7869
|
-
} catch (error) {
|
|
7870
|
-
console.error(`Error running eval ${evalMeta.id}:`, error);
|
|
7871
|
-
evalErrors.push({
|
|
7872
|
-
evalId: evalMeta.id,
|
|
7873
|
-
details: formatUnknownErrorDetails(error)
|
|
7874
|
-
});
|
|
7875
|
-
lastRunStatusMap.set(evalMeta.key, "error");
|
|
7876
|
-
latestRunInfoMap.set(evalMeta.key, {
|
|
7877
|
-
status: "error",
|
|
7878
|
-
startedAt: runState.manifest.endedAt ?? runState.manifest.startedAt,
|
|
7879
|
-
commitSha: runState.manifest.commitSha ?? null,
|
|
7880
|
-
evalSourceFingerprint: runState.manifest.evalSourceFingerprints[evalMeta.key] ?? null
|
|
7881
|
-
});
|
|
7882
|
-
}
|
|
7883
|
-
}
|
|
7884
|
-
const ambiguousCaseTargets = request.target.caseIds && request.target.caseIds.length > 0 ? findAmbiguousTargetCaseIds(preparedEvals) : [];
|
|
7885
|
-
if (ambiguousCaseTargets.length > 0) {
|
|
7886
|
-
queuedCases.length = 0;
|
|
7887
|
-
evalErrors.push({
|
|
7888
|
-
evalId: "target",
|
|
7889
|
-
details: `Ambiguous --case target. Narrow it with --file and/or --eval: ${ambiguousCaseTargets.join("; ")}`
|
|
7890
|
-
});
|
|
7891
|
-
} else await executeQueuedCases({
|
|
7892
|
-
queuedCases,
|
|
7893
|
-
concurrency: getConfiguredConcurrency(),
|
|
7894
|
-
globalTraceDisplay: config.traceDisplay
|
|
7895
|
-
});
|
|
7896
|
-
for (const preparedEval of preparedEvals) {
|
|
7897
|
-
for (const preparedCase of preparedEval.preparedCases) await finalizePreparedCase({
|
|
7898
|
-
runState,
|
|
7899
|
-
runDir,
|
|
7900
|
-
preparedEval,
|
|
7901
|
-
preparedCase,
|
|
7902
|
-
onCaseFinished,
|
|
7903
|
-
emitEvent
|
|
7904
|
-
});
|
|
7905
|
-
lastRunStatusMap.set(preparedEval.evalMeta.key, toLastRunStatus(deriveStatusFromCaseRows({ caseRows: preparedEval.evalCaseRows })));
|
|
7906
|
-
const latestStatus = lastRunStatusMap.get(preparedEval.evalMeta.key) ?? null;
|
|
7907
|
-
latestRunInfoMap.set(preparedEval.evalMeta.key, {
|
|
7908
|
-
status: latestStatus,
|
|
7909
|
-
startedAt: runState.manifest.endedAt ?? runState.manifest.startedAt,
|
|
7910
|
-
commitSha: runState.manifest.commitSha ?? null,
|
|
7911
|
-
evalSourceFingerprint: runState.manifest.evalSourceFingerprints[preparedEval.evalMeta.key] ?? null
|
|
7912
|
-
});
|
|
7913
|
-
}
|
|
7914
|
-
sortCaseRowsByPreparedOrder(runState.cases, preparedEvals);
|
|
7915
|
-
for (const preparedEval of preparedEvals) sortCaseRowsByPreparedOrder(preparedEval.evalCaseRows, preparedEvals);
|
|
7916
|
-
const endTime = /* @__PURE__ */ new Date();
|
|
7917
|
-
runState.summary.totalDurationMs = endTime.getTime() - new Date(runState.manifest.startedAt).getTime();
|
|
7918
|
-
const finalStatus = evalErrors.length > 0 ? "error" : "completed";
|
|
7919
|
-
runState.summary.status = finalStatus;
|
|
7920
|
-
runState.manifest.status = finalStatus;
|
|
7921
|
-
const completedRunAt = endTime.toISOString();
|
|
7922
|
-
runState.manifest.endedAt = completedRunAt;
|
|
7923
|
-
runState.summary.errorMessage = evalErrors.length > 0 ? buildRunErrorMessage(evalErrors) : null;
|
|
7924
|
-
for (const evalKey of getTargetEvalKeys({
|
|
7925
|
-
request,
|
|
7926
|
-
sortedEvals: getSortedEvalMetas()
|
|
7927
|
-
})) {
|
|
7928
|
-
const latestStatus = lastRunStatusMap.get(evalKey) ?? toLastRunStatus(deriveStatusFromCaseRows({
|
|
7929
|
-
caseRows: [],
|
|
7930
|
-
lifecycleStatus: runState.manifest.status
|
|
7931
|
-
}));
|
|
7932
|
-
latestRunInfoMap.set(evalKey, {
|
|
7933
|
-
status: latestStatus,
|
|
7934
|
-
startedAt: completedRunAt,
|
|
7935
|
-
commitSha: runState.manifest.commitSha ?? null,
|
|
7936
|
-
evalSourceFingerprint: runState.manifest.evalSourceFingerprints[evalKey] ?? null
|
|
7937
|
-
});
|
|
7938
|
-
}
|
|
7939
|
-
await persistRunState(runState);
|
|
7940
|
-
emitEvent(runState, {
|
|
7941
|
-
type: "run.summary",
|
|
7942
|
-
runId: runState.manifest.id,
|
|
7943
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7944
|
-
payload: runState.summary
|
|
7945
|
-
});
|
|
7946
|
-
if (finalStatus === "error") emitEvent(runState, {
|
|
7947
|
-
type: "run.error",
|
|
7948
|
-
runId: runState.manifest.id,
|
|
7949
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7950
|
-
payload: { message: buildRunErrorMessage(evalErrors) }
|
|
7951
|
-
});
|
|
7952
|
-
else emitEvent(runState, {
|
|
7953
|
-
type: "run.finished",
|
|
7954
|
-
runId: runState.manifest.id,
|
|
7955
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7956
|
-
payload: runState.summary
|
|
7957
|
-
});
|
|
7958
|
-
emitDiscoveryEvent();
|
|
7959
|
-
} catch (error) {
|
|
7960
|
-
const message = formatUnknownErrorDetails(error);
|
|
7961
|
-
runState.manifest.status = "error";
|
|
7962
|
-
runState.manifest.endedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
7963
|
-
runState.summary.status = "error";
|
|
7964
|
-
runState.summary.errorMessage = message;
|
|
7965
|
-
await persistRunState(runState);
|
|
7966
|
-
emitEvent(runState, {
|
|
7967
|
-
type: "run.error",
|
|
7968
|
-
runId: runState.manifest.id,
|
|
7969
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7970
|
-
payload: { message }
|
|
7971
|
-
});
|
|
7972
|
-
emitDiscoveryEvent();
|
|
7973
|
-
}
|
|
7974
|
-
}
|
|
7975
|
-
function toLastRunStatus(status) {
|
|
7976
|
-
return status === "pending" ? null : status;
|
|
7977
|
-
}
|
|
7978
|
-
//#endregion
|
|
7979
|
-
export { getCurrentScope as $, validateCharts as A, buildEvalKey as At, deserializeCacheValue as B, deriveEvalFreshness as C, getEvalDisplayStatus as Ct, loadConfig as D, runSummarySchema as Dt, resolveEvalDefaultConfig as E, runManifestSchema as Et, evalSpan as F, evalChartsConfigSchema as Ft, readManualInputFile as G, serializeCacheValue as H, evalTracer as I, columnDefSchema as It, appendToEvalOutput as J, evalExpect as K, hashCacheKey as L, defineEval as Lt, z$1 as M, evalStatAggregateSchema as Mt, buildTraceTree as N, evalStatsConfigSchema as Nt, buildDeclaredColumnDefs as O, resolveApiCallsConfig as Ot, captureEvalSpanError as P, manualInputDescriptorSchema as Pt, evalTime as Q, hashCacheKeySync as R, getEvalRegistry as Rt, parseManualInputValues as S, getEvalTitle as St, parseEvalDiscovery as T, matchesTagsFilter as Tt, repoFile as U, serializeCacheRecording as V, manualInputFileValueSchema as W, evalAssert as X, configureEvalRunLogs as Y, evalLog as Z, recomputePersistedCaseStatus as _, extractLlmCalls as _t, validateTagsFilters as a, nextEvalId as at, resolveArtifactPath as b, applyDerivedCallAttributes as bt, getLastRunStatuses as c, runInExistingEvalScope as ct, loadPersistedRunSnapshots as d, startEvalBackgroundJob as dt, getEvalCaseInput as et, nextShortIdFromSnapshots as f, createRunRequestSchema as ft, recomputeEvalStatusesInRuns as g, extractApiCalls as gt, persistRunState as h, extractCacheHits as ht, resolveEvalTags as i, mergeEvalOutput as it, createFsCacheStore as j, getCaseRowCaseKey as jt, normalizeScoreDef as k, resolveLlmCallsConfig as kt, getLatestRunInfos as l, setEvalOutput as lt, deleteTemporaryRuns as m, extractCacheEntries as mt, getTargetEvalKeys as n, isInEvalScope as nt, stripTerminalControlCodes as o, runInEvalRuntimeScope as ot, persistCaseDetail as p, updateManualScoreRequestSchema as pt, EvalAssertionError as q, getTargetEvals as r, matchesEvalTags as rt, generateRunId as s, runInEvalScope as st, executeRun as t, incrementEvalOutput as tt, loadPersistedRunSnapshot as u, setScopeCacheContext as ut, runTouchesEval as v, simulateLlmCallCost as vt, loadIsolatedEvalRegistry as w, deriveScopedSummaryFromCases as wt, buildManualInputDescriptor as x, getNestedAttribute as xt, resolveTracePresentation as y, simulateTokenAllocation as yt, deserializeCacheRecording as z };
|
|
6764
|
+
export { startEvalBackgroundJob as $, repoFile as A, evalChartsConfigSchema as At, getCurrentScope as B, evalTracer as C, buildEvalKey as Ct, deserializeCacheValue as D, evalStatAggregateSchema as Dt, deserializeCacheRecording as E, caseRowSchema as Et, appendToEvalOutput as F, mergeEvalOutput as G, incrementEvalOutput as H, configureEvalRunLogs as I, runInEvalScope as J, nextEvalId as K, evalAssert as L, readManualInputFile as M, defineEval as Mt, evalExpect as N, getEvalRegistry as Nt, serializeCacheRecording as O, evalStatsConfigSchema as Ot, EvalAssertionError as P, runWithEvalRegistry as Pt, setScopeCacheContext as Q, evalLog as R, evalSpan as S, resolveLlmCallsConfig as St, hashCacheKeySync as T, caseDetailSchema as Tt, isInEvalScope as U, getEvalCaseInput as V, matchesEvalTags as W, runWithEvalClock as X, runInExistingEvalScope as Y, setEvalOutput as Z, createBufferedCacheStore as _, validateEvalTagName as _t, isCaseChildParentMessage as a, extractLlmCalls as at, buildTraceTree as b, runSummarySchema as bt, resolveArtifactPath as c, applyDerivedCallAttributes as ct, loadEvalModule as d, getEvalDisplayStatus as dt, createRunRequestSchema as et, resolveEvalDefaultConfig as f, deriveScopedSummaryFromCases as ft, commitPendingCacheWrites as g, matchesTagsFilter as gt, normalizeScoreDef as h, dedupeEvalTags as ht, isCaseChildMessage as i, extractApiCalls as it, manualInputFileValueSchema as j, columnDefSchema as jt, serializeCacheValue as k, manualInputDescriptorSchema as kt, registerAgentEvalsPackageResolutionHooks as l, getNestedAttribute as lt, buildDeclaredColumnDefs as m, deriveStatusFromChildStatuses as mt, resolveRunnableEvalCases as n, extractCacheEntries as nt, stripTerminalControlCodes as o, simulateLlmCallCost as ot, loadConfig as p, deriveStatusFromCaseRows as pt, runInEvalRuntimeScope as q, runCase as r, extractCacheHits as rt, resolveTracePresentation as s, simulateTokenAllocation as st, filterEvalCases as t, updateManualScoreRequestSchema as tt, runWithModuleIsolation as u, getEvalTitle as ut, createFsCacheStore as v, validateTagsFilterExpression as vt, hashCacheKey as w, getCaseRowCaseKey as wt, captureEvalSpanError as x, resolveApiCallsConfig as xt, z$1 as y, runManifestSchema as yt, evalTime as z };
|