@ls-stack/agent-eval 0.36.0 → 0.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-BlNzXWDM.mjs → app-DD-8kx5H.mjs} +50 -6
- package/dist/apps/web/dist/assets/index-C6PgBOfP.css +1 -0
- package/dist/apps/web/dist/assets/index-CO86PsY-.js +140 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +14 -3
- package/dist/{cli-Dg3abrOv.mjs → cli-BUX6tr9J.mjs} +106 -25
- package/dist/index.d.mts +159 -150
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +73 -2
- package/dist/{runOrchestration-V1TxX8es.mjs → runOrchestration-BhUFWvq9.mjs} +293 -121
- package/dist/{runner-BCs5rzej.mjs → runner-B1wfPKNH.mjs} +2 -2
- package/dist/{runner-znY6PY1M.mjs → runner-CoRf7Vef.mjs} +1 -1
- package/dist/src-BwJ5tod2.mjs +3 -0
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +18 -6
- package/dist/apps/web/dist/assets/index-BYtcGddU.js +0 -140
- package/dist/apps/web/dist/assets/index-D0rC5MSS.css +0 -1
- package/dist/src-DBypR4TV.mjs +0 -3
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
import { createRequire, registerHooks } from "node:module";
|
|
2
2
|
import { createHash, randomUUID } from "node:crypto";
|
|
3
3
|
import { mkdir, readFile, readdir, rename, rm, stat, writeFile } from "node:fs/promises";
|
|
4
|
-
import { extname, isAbsolute, join, relative, resolve } from "node:path";
|
|
4
|
+
import { dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
|
|
5
5
|
import { formatWithOptions, isDeepStrictEqual, stripVTControlCharacters } from "node:util";
|
|
6
6
|
import { AsyncLocalStorage } from "node:async_hooks";
|
|
7
7
|
import { z, z as z$1 } from "zod/v4";
|
|
8
8
|
import { Blob as Blob$1, Buffer as Buffer$1, File as File$1 } from "node:buffer";
|
|
9
|
-
import { gunzipSync, gzipSync } from "node:zlib";
|
|
10
9
|
import { getCompositeKey } from "@ls-stack/utils/getCompositeKey";
|
|
11
10
|
import { existsSync } from "node:fs";
|
|
11
|
+
import { brotliCompressSync, brotliDecompressSync } from "node:zlib";
|
|
12
12
|
import { Result, resultify } from "t-result";
|
|
13
13
|
import { fileURLToPath, pathToFileURL } from "node:url";
|
|
14
14
|
//#region ../sdk/src/defineEval.ts
|
|
@@ -867,33 +867,52 @@ function repoFile(path, mimeType) {
|
|
|
867
867
|
//#endregion
|
|
868
868
|
//#region ../sdk/src/cacheSerialization.ts
|
|
869
869
|
const serializedCacheValueMarker = "__aecs";
|
|
870
|
-
const
|
|
871
|
-
const jsonSafeCacheValueVersion = "json-safe-v1";
|
|
870
|
+
const jsonSafeCacheValueVersion = "v1";
|
|
872
871
|
const packedNumberArrayMinLength = 128;
|
|
873
|
-
const
|
|
874
|
-
const
|
|
875
|
-
const
|
|
872
|
+
const maxPackedNumberArraySizeRatio = .8;
|
|
873
|
+
const externalJsonMinChars = 10 * 1024;
|
|
874
|
+
const jsonSafeCacheValueTypes = new Set("ArrayBuffer BigInt Blob Date Error ExternalJson File Float64Array Headers Map Number Object RegExp Set URL URLSearchParams Undefined".split(" "));
|
|
876
875
|
function isRecordLike$3(value) {
|
|
877
876
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
878
877
|
}
|
|
879
878
|
function isJsonSafeSerializedCacheValue(value) {
|
|
880
|
-
return isRecordLike$3(value) &&
|
|
879
|
+
return isRecordLike$3(value) && jsonSafeValueType(value) !== void 0;
|
|
881
880
|
}
|
|
882
881
|
function jsonSafeValue(type, value) {
|
|
883
|
-
return value === void 0 ? {
|
|
884
|
-
[serializedCacheValueMarker]:
|
|
885
|
-
type
|
|
886
|
-
} : {
|
|
887
|
-
[serializedCacheValueMarker]: jsonSafeCacheValueVersion,
|
|
888
|
-
type,
|
|
882
|
+
return value === void 0 ? { [serializedCacheValueMarker]: jsonSafeMarker(type) } : {
|
|
883
|
+
[serializedCacheValueMarker]: jsonSafeMarker(type),
|
|
889
884
|
value
|
|
890
885
|
};
|
|
891
886
|
}
|
|
892
887
|
function hasSerializationMarkerKey(value) {
|
|
893
|
-
return Object.hasOwn(value, serializedCacheValueMarker)
|
|
888
|
+
return Object.hasOwn(value, serializedCacheValueMarker);
|
|
889
|
+
}
|
|
890
|
+
function jsonSafeMarker(type) {
|
|
891
|
+
return `${jsonSafeCacheValueVersion}:${type}`;
|
|
892
|
+
}
|
|
893
|
+
function jsonSafeValueType(value) {
|
|
894
|
+
const marker = value[serializedCacheValueMarker];
|
|
895
|
+
if (typeof marker !== "string") return void 0;
|
|
896
|
+
if (!marker.startsWith(`${jsonSafeCacheValueVersion}:`)) return void 0;
|
|
897
|
+
const type = marker.slice(3);
|
|
898
|
+
return isJsonSafeCacheValueType(type) ? type : void 0;
|
|
899
|
+
}
|
|
900
|
+
function isJsonSafeCacheValueType(value) {
|
|
901
|
+
return jsonSafeCacheValueTypes.has(value);
|
|
902
|
+
}
|
|
903
|
+
function externalJsonRefFromWrapper(value) {
|
|
904
|
+
const hash = typeof value.hash === "string" ? toExternalJsonHash(value.hash) : void 0;
|
|
905
|
+
if (hash === void 0 || typeof value.length !== "number" || typeof value.compressedLength !== "number" || typeof value.path !== "string") return;
|
|
906
|
+
return {
|
|
907
|
+
compressedLength: value.compressedLength,
|
|
908
|
+
hash,
|
|
909
|
+
length: value.length,
|
|
910
|
+
path: value.path
|
|
911
|
+
};
|
|
894
912
|
}
|
|
895
|
-
function
|
|
896
|
-
|
|
913
|
+
function toExternalJsonHash(value) {
|
|
914
|
+
if (!value.startsWith("sha256:")) return void 0;
|
|
915
|
+
return `sha256:${value.slice(7)}`;
|
|
897
916
|
}
|
|
898
917
|
/**
|
|
899
918
|
* Serialize one cached value while keeping plain JSON as plain JSON.
|
|
@@ -902,50 +921,57 @@ function serializationMarkerValue(value) {
|
|
|
902
921
|
* by default; pass `preserveUndefined: true` to round-trip them explicitly.
|
|
903
922
|
*/
|
|
904
923
|
async function serializeCacheValue(value, options = void 0) {
|
|
905
|
-
return serializeJsonSafeValue(value, /* @__PURE__ */ new WeakSet(), 0, normalizeCacheSerializationOptions(options));
|
|
906
|
-
}
|
|
907
|
-
/** Revive one cached value, while preserving legacy JSON-round-tripped data. */
|
|
908
|
-
function deserializeCacheValue(value) {
|
|
909
|
-
return deserializeJsonSafeValue(value);
|
|
910
|
-
}
|
|
911
|
-
/** Clone one value through the same serialization path used for cache data. */
|
|
912
|
-
async function cloneCacheValue(value, options = void 0) {
|
|
913
|
-
return deserializeCacheValue(await serializeCacheValue(value, options));
|
|
924
|
+
return (await serializeJsonSafeValue(value, /* @__PURE__ */ new WeakSet(), 0, normalizeCacheSerializationOptions(options))).value;
|
|
914
925
|
}
|
|
915
|
-
function
|
|
926
|
+
function serializedResult(value, jsonLength = jsonLengthOfSerializedValue(value)) {
|
|
916
927
|
return {
|
|
917
|
-
|
|
918
|
-
|
|
928
|
+
value,
|
|
929
|
+
jsonLength
|
|
919
930
|
};
|
|
920
931
|
}
|
|
932
|
+
function jsonLengthOfSerializedValue(value) {
|
|
933
|
+
if (value === void 0) return 0;
|
|
934
|
+
if (value === null) return 4;
|
|
935
|
+
if (typeof value === "string") return approximateJsonStringLength(value);
|
|
936
|
+
return JSON.stringify(value).length;
|
|
937
|
+
}
|
|
938
|
+
function approximateJsonStringLength(value) {
|
|
939
|
+
return value.length + 2;
|
|
940
|
+
}
|
|
941
|
+
function jsonArrayLength(itemLengths) {
|
|
942
|
+
return 2 + itemLengths.reduce((total, itemLength) => total + itemLength, 0) + Math.max(itemLengths.length - 1, 0);
|
|
943
|
+
}
|
|
944
|
+
function jsonObjectLength(entries) {
|
|
945
|
+
return 2 + entries.reduce((total, [key, valueLength]) => total + approximateJsonStringLength(key) + 1 + valueLength, 0) + Math.max(entries.length - 1, 0);
|
|
946
|
+
}
|
|
921
947
|
async function serializeJsonSafeValue(value, refs, depth, config) {
|
|
922
|
-
if (value === void 0) return config.preserveUndefined ? jsonSafeValue("Undefined") : void 0;
|
|
923
|
-
if (typeof value === "bigint") return jsonSafeValue("BigInt", value.toString());
|
|
924
|
-
if (typeof value === "number") return serializeNumber(value);
|
|
925
|
-
if (typeof value === "string") return
|
|
926
|
-
if (value instanceof Date) return jsonSafeValue("Date", value.toISOString());
|
|
948
|
+
if (value === void 0) return config.preserveUndefined ? serializedResult(jsonSafeValue("Undefined")) : serializedResult(void 0);
|
|
949
|
+
if (typeof value === "bigint") return serializedResult(jsonSafeValue("BigInt", value.toString()));
|
|
950
|
+
if (typeof value === "number") return serializedResult(serializeNumber(value));
|
|
951
|
+
if (typeof value === "string") return await externalizeNestedJsonValue(serializedResult(value, approximateJsonStringLength(value)), depth, config);
|
|
952
|
+
if (value instanceof Date) return serializedResult(jsonSafeValue("Date", value.toISOString()));
|
|
927
953
|
if (value instanceof Map) return serializeMap(value, refs, depth, config);
|
|
928
954
|
if (value instanceof Set) return serializeSet(value, refs, depth, config);
|
|
929
|
-
if (value instanceof RegExp) return jsonSafeValue("RegExp", {
|
|
955
|
+
if (value instanceof RegExp) return serializedResult(jsonSafeValue("RegExp", {
|
|
930
956
|
flags: value.flags,
|
|
931
957
|
source: value.source
|
|
932
|
-
});
|
|
933
|
-
if (value instanceof URL) return jsonSafeValue("URL", value.toString());
|
|
934
|
-
if (value instanceof URLSearchParams) return jsonSafeValue("URLSearchParams", value.toString());
|
|
935
|
-
if (value instanceof Headers) return jsonSafeValue("Headers", [...value.entries()]);
|
|
936
|
-
if (value instanceof File) return jsonSafeValue("File", {
|
|
958
|
+
}));
|
|
959
|
+
if (value instanceof URL) return serializedResult(jsonSafeValue("URL", value.toString()));
|
|
960
|
+
if (value instanceof URLSearchParams) return serializedResult(jsonSafeValue("URLSearchParams", value.toString()));
|
|
961
|
+
if (value instanceof Headers) return serializedResult(jsonSafeValue("Headers", [...value.entries()]));
|
|
962
|
+
if (value instanceof File) return serializedResult(jsonSafeValue("File", {
|
|
937
963
|
bytes: await blobToBase64(value),
|
|
938
964
|
lastModified: value.lastModified,
|
|
939
965
|
name: value.name,
|
|
940
966
|
type: value.type
|
|
941
|
-
});
|
|
942
|
-
if (value instanceof Blob) return jsonSafeValue("Blob", {
|
|
967
|
+
}));
|
|
968
|
+
if (value instanceof Blob) return serializedResult(jsonSafeValue("Blob", {
|
|
943
969
|
bytes: await blobToBase64(value),
|
|
944
970
|
type: value.type
|
|
945
|
-
});
|
|
946
|
-
if (value instanceof ArrayBuffer) return jsonSafeValue("ArrayBuffer", bytesToBase64(new Uint8Array(value)));
|
|
971
|
+
}));
|
|
972
|
+
if (value instanceof ArrayBuffer) return serializedResult(jsonSafeValue("ArrayBuffer", bytesToBase64(new Uint8Array(value))));
|
|
947
973
|
if (value instanceof Error) return serializeError(value, refs, depth, config);
|
|
948
|
-
if (!value || typeof value !== "object") return value;
|
|
974
|
+
if (!value || typeof value !== "object") return serializedResult(value);
|
|
949
975
|
if (refs.has(value)) throw new Error("Circular cache values are not supported");
|
|
950
976
|
refs.add(value);
|
|
951
977
|
if (Array.isArray(value)) {
|
|
@@ -953,25 +979,59 @@ async function serializeJsonSafeValue(value, refs, depth, config) {
|
|
|
953
979
|
const packed = packNumberArray(value);
|
|
954
980
|
if (packed !== void 0) {
|
|
955
981
|
refs.delete(value);
|
|
956
|
-
return packed;
|
|
982
|
+
return serializedResult(packed);
|
|
957
983
|
}
|
|
958
984
|
}
|
|
959
985
|
const items = [];
|
|
986
|
+
const itemLengths = [];
|
|
960
987
|
for (const item of value) {
|
|
961
988
|
const serializedItem = await serializeJsonSafeValue(item, refs, depth + 1, config);
|
|
962
|
-
if (serializedItem !== void 0)
|
|
989
|
+
if (serializedItem.value !== void 0) {
|
|
990
|
+
items.push(serializedItem.value);
|
|
991
|
+
itemLengths.push(serializedItem.jsonLength);
|
|
992
|
+
}
|
|
963
993
|
}
|
|
964
994
|
refs.delete(value);
|
|
965
|
-
return
|
|
995
|
+
return await externalizeNestedJsonValue(serializedResult(items, jsonArrayLength(itemLengths)), depth, config);
|
|
966
996
|
}
|
|
967
997
|
const entries = [];
|
|
998
|
+
const entryLengths = [];
|
|
968
999
|
for (const [key, entryValue] of Object.entries(value)) {
|
|
969
1000
|
const serializedEntryValue = await serializeJsonSafeValue(entryValue, refs, depth + 1, config);
|
|
970
|
-
if (serializedEntryValue !== void 0)
|
|
1001
|
+
if (serializedEntryValue.value !== void 0) {
|
|
1002
|
+
entries.push([key, serializedEntryValue.value]);
|
|
1003
|
+
entryLengths.push([key, serializedEntryValue.jsonLength]);
|
|
1004
|
+
}
|
|
971
1005
|
}
|
|
972
1006
|
refs.delete(value);
|
|
973
|
-
|
|
974
|
-
return
|
|
1007
|
+
if (hasSerializationMarkerKey(value)) return await externalizeNestedJsonValue(serializedResult(jsonSafeValue("Object", entries)), depth, config);
|
|
1008
|
+
return await externalizeNestedJsonValue(serializedResult(Object.fromEntries(entries), jsonObjectLength(entryLengths)), depth, config);
|
|
1009
|
+
}
|
|
1010
|
+
/** Revive one cached value, while preserving legacy JSON-round-tripped data. */
|
|
1011
|
+
function deserializeCacheValue(value) {
|
|
1012
|
+
return deserializeJsonSafeValue(value);
|
|
1013
|
+
}
|
|
1014
|
+
/** Replace external JSON blob refs with their parsed serialized payloads. */
|
|
1015
|
+
async function materializeExternalJsonValues(value, store) {
|
|
1016
|
+
if (isJsonSafeSerializedCacheValue(value) && jsonSafeValueType(value) === "ExternalJson") {
|
|
1017
|
+
const ref = externalJsonRefFromWrapper(value);
|
|
1018
|
+
if (ref === void 0) return value;
|
|
1019
|
+
return materializeExternalJsonValues(JSON.parse(await store.read(ref)), store);
|
|
1020
|
+
}
|
|
1021
|
+
if (Array.isArray(value)) return Promise.all(value.map((item) => materializeExternalJsonValues(item, store)));
|
|
1022
|
+
if (!isRecordLike$3(value)) return value;
|
|
1023
|
+
return Object.fromEntries(await Promise.all(Object.entries(value).map(async ([key, entryValue]) => [key, await materializeExternalJsonValues(entryValue, store)])));
|
|
1024
|
+
}
|
|
1025
|
+
/** Clone one value through the same serialization path used for cache data. */
|
|
1026
|
+
async function cloneCacheValue(value, options = void 0) {
|
|
1027
|
+
return deserializeCacheValue(await serializeCacheValue(value, options));
|
|
1028
|
+
}
|
|
1029
|
+
function normalizeCacheSerializationOptions(options) {
|
|
1030
|
+
return {
|
|
1031
|
+
compress: options?.compress !== false,
|
|
1032
|
+
externalJsonStore: options?.externalJsonStore,
|
|
1033
|
+
preserveUndefined: options?.preserveUndefined === true
|
|
1034
|
+
};
|
|
975
1035
|
}
|
|
976
1036
|
function serializeNumber(value) {
|
|
977
1037
|
if (Number.isNaN(value)) return jsonSafeValue("Number", "NaN");
|
|
@@ -980,11 +1040,6 @@ function serializeNumber(value) {
|
|
|
980
1040
|
if (Object.is(value, -0)) return jsonSafeValue("Number", "-0");
|
|
981
1041
|
return value;
|
|
982
1042
|
}
|
|
983
|
-
function serializeString(value, depth, config) {
|
|
984
|
-
if (depth === 0) return value;
|
|
985
|
-
if (!config.compress) return value;
|
|
986
|
-
return compressNestedStringValue(value) ?? value;
|
|
987
|
-
}
|
|
988
1043
|
function isDenseNumberArray(value) {
|
|
989
1044
|
for (let index = 0; index < value.length; index++) if (typeof value[index] !== "number") return false;
|
|
990
1045
|
return true;
|
|
@@ -997,48 +1052,32 @@ function encodeFloat64Array(value) {
|
|
|
997
1052
|
}
|
|
998
1053
|
function packNumberArray(value) {
|
|
999
1054
|
const serialized = {
|
|
1000
|
-
[serializedCacheValueMarker]:
|
|
1055
|
+
[serializedCacheValueMarker]: jsonSafeMarker("Float64Array"),
|
|
1001
1056
|
length: value.length,
|
|
1002
|
-
type: "Float64Array",
|
|
1003
1057
|
value: encodeFloat64Array(value)
|
|
1004
1058
|
};
|
|
1005
|
-
return
|
|
1059
|
+
return JSON.stringify(serialized).length < JSON.stringify(value).length * maxPackedNumberArraySizeRatio ? serialized : void 0;
|
|
1006
1060
|
}
|
|
1007
1061
|
function decodeFloat64Array(value, length) {
|
|
1008
1062
|
const bytes = base64ToArrayBuffer(value);
|
|
1009
1063
|
const view = new DataView(bytes);
|
|
1010
1064
|
return Array.from({ length }, (_, index) => view.getFloat64(index * 8, true));
|
|
1011
1065
|
}
|
|
1012
|
-
function
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
codec: "gzip",
|
|
1019
|
-
length: Buffer$1.byteLength(value),
|
|
1020
|
-
type: "CompressedString",
|
|
1021
|
-
value: compressed.toString("base64")
|
|
1066
|
+
async function externalizeNestedJsonValue(result, depth, config) {
|
|
1067
|
+
if (depth === 0 || !config.compress || config.externalJsonStore === void 0 || result.jsonLength < externalJsonMinChars) return result;
|
|
1068
|
+
const raw = JSON.stringify(result.value);
|
|
1069
|
+
if (raw.length < externalJsonMinChars) return {
|
|
1070
|
+
...result,
|
|
1071
|
+
jsonLength: raw.length
|
|
1022
1072
|
};
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
const serialized = {
|
|
1032
|
-
[serializedCacheValueMarker]: jsonSafeCacheValueVersion,
|
|
1033
|
-
codec: "gzip",
|
|
1034
|
-
length: rawSize,
|
|
1035
|
-
type: "CompressedJson",
|
|
1036
|
-
value: gzipSync(raw).toString("base64")
|
|
1037
|
-
};
|
|
1038
|
-
return compressionIsWorthIt(serialized, rawSize) ? serialized : void 0;
|
|
1039
|
-
}
|
|
1040
|
-
function compressionIsWorthIt(value, rawSize) {
|
|
1041
|
-
return Buffer$1.byteLength(JSON.stringify(value)) < rawSize * maxCompressedSizeRatio;
|
|
1073
|
+
const ref = await config.externalJsonStore.write(raw);
|
|
1074
|
+
return serializedResult({
|
|
1075
|
+
[serializedCacheValueMarker]: jsonSafeMarker("ExternalJson"),
|
|
1076
|
+
compressedLength: ref.compressedLength,
|
|
1077
|
+
hash: ref.hash,
|
|
1078
|
+
length: ref.length,
|
|
1079
|
+
path: ref.path
|
|
1080
|
+
});
|
|
1042
1081
|
}
|
|
1043
1082
|
async function serializeMap(value, refs, depth, config) {
|
|
1044
1083
|
if (refs.has(value)) throw new Error("Circular cache values are not supported");
|
|
@@ -1047,10 +1086,10 @@ async function serializeMap(value, refs, depth, config) {
|
|
|
1047
1086
|
for (const [key, entryValue] of value.entries()) {
|
|
1048
1087
|
const serializedKey = await serializeJsonSafeValue(key, refs, depth + 1, config);
|
|
1049
1088
|
const serializedEntryValue = await serializeJsonSafeValue(entryValue, refs, depth + 1, config);
|
|
1050
|
-
if (serializedKey !== void 0 && serializedEntryValue !== void 0) entries.push([serializedKey, serializedEntryValue]);
|
|
1089
|
+
if (serializedKey.value !== void 0 && serializedEntryValue.value !== void 0) entries.push([serializedKey.value, serializedEntryValue.value]);
|
|
1051
1090
|
}
|
|
1052
1091
|
refs.delete(value);
|
|
1053
|
-
return jsonSafeValue("Map", entries);
|
|
1092
|
+
return serializedResult(jsonSafeValue("Map", entries));
|
|
1054
1093
|
}
|
|
1055
1094
|
async function serializeSet(value, refs, depth, config) {
|
|
1056
1095
|
if (refs.has(value)) throw new Error("Circular cache values are not supported");
|
|
@@ -1058,10 +1097,10 @@ async function serializeSet(value, refs, depth, config) {
|
|
|
1058
1097
|
const items = [];
|
|
1059
1098
|
for (const item of value.values()) {
|
|
1060
1099
|
const serializedItem = await serializeJsonSafeValue(item, refs, depth + 1, config);
|
|
1061
|
-
if (serializedItem !== void 0) items.push(serializedItem);
|
|
1100
|
+
if (serializedItem.value !== void 0) items.push(serializedItem.value);
|
|
1062
1101
|
}
|
|
1063
1102
|
refs.delete(value);
|
|
1064
|
-
return jsonSafeValue("Set", items);
|
|
1103
|
+
return serializedResult(jsonSafeValue("Set", items));
|
|
1065
1104
|
}
|
|
1066
1105
|
async function serializeError(value, refs, depth, config) {
|
|
1067
1106
|
if (refs.has(value)) throw new Error("Circular cache values are not supported");
|
|
@@ -1070,17 +1109,17 @@ async function serializeError(value, refs, depth, config) {
|
|
|
1070
1109
|
for (const [key, entryValue] of Object.entries(value)) {
|
|
1071
1110
|
if (key === "cause") continue;
|
|
1072
1111
|
const serializedEntryValue = await serializeJsonSafeValue(entryValue, refs, depth + 1, config);
|
|
1073
|
-
if (serializedEntryValue !== void 0) props.push([key, serializedEntryValue]);
|
|
1112
|
+
if (serializedEntryValue.value !== void 0) props.push([key, serializedEntryValue.value]);
|
|
1074
1113
|
}
|
|
1075
1114
|
const serialized = jsonSafeValue("Error", {
|
|
1076
|
-
cause: "cause" in value ? await serializeJsonSafeValue(value.cause, refs, depth + 1, config) : void 0,
|
|
1115
|
+
cause: "cause" in value ? (await serializeJsonSafeValue(value.cause, refs, depth + 1, config)).value : void 0,
|
|
1077
1116
|
message: value.message,
|
|
1078
1117
|
name: value.name,
|
|
1079
1118
|
props,
|
|
1080
1119
|
stack: value.stack
|
|
1081
1120
|
});
|
|
1082
1121
|
refs.delete(value);
|
|
1083
|
-
return serialized;
|
|
1122
|
+
return serializedResult(serialized);
|
|
1084
1123
|
}
|
|
1085
1124
|
async function blobToBase64(value) {
|
|
1086
1125
|
return bytesToBase64(new Uint8Array(await value.arrayBuffer()));
|
|
@@ -1101,14 +1140,13 @@ function deserializeJsonSafeValue(value) {
|
|
|
1101
1140
|
return Object.fromEntries(Object.entries(value).map(([key, entryValue]) => [key, deserializeJsonSafeValue(entryValue)]));
|
|
1102
1141
|
}
|
|
1103
1142
|
function deserializeJsonSafeWrapper(value) {
|
|
1104
|
-
switch (value
|
|
1143
|
+
switch (jsonSafeValueType(value)) {
|
|
1105
1144
|
case "ArrayBuffer": return deserializeArrayBuffer(value.value);
|
|
1106
1145
|
case "BigInt": return typeof value.value === "string" ? BigInt(value.value) : value.value;
|
|
1107
1146
|
case "Blob": return deserializeBlob(value.value);
|
|
1108
|
-
case "CompressedJson": return deserializeCompressedJson(value.value);
|
|
1109
|
-
case "CompressedString": return deserializeCompressedString(value.value);
|
|
1110
1147
|
case "Date": return typeof value.value === "string" ? new Date(value.value) : value.value;
|
|
1111
1148
|
case "Error": return deserializeError(value.value);
|
|
1149
|
+
case "ExternalJson": return value;
|
|
1112
1150
|
case "File": return deserializeFile(value.value);
|
|
1113
1151
|
case "Float64Array": return deserializeFloat64Array(value.value, value.length);
|
|
1114
1152
|
case "Headers": return new Headers(deserializeStringPairArray(value.value));
|
|
@@ -1120,6 +1158,7 @@ function deserializeJsonSafeWrapper(value) {
|
|
|
1120
1158
|
case "URL": return typeof value.value === "string" ? new URL(value.value) : value.value;
|
|
1121
1159
|
case "URLSearchParams": return typeof value.value === "string" ? new URLSearchParams(value.value) : value.value;
|
|
1122
1160
|
case "Undefined": return;
|
|
1161
|
+
default: return value;
|
|
1123
1162
|
}
|
|
1124
1163
|
}
|
|
1125
1164
|
function deserializeNumber(value) {
|
|
@@ -1129,14 +1168,6 @@ function deserializeNumber(value) {
|
|
|
1129
1168
|
if (value === "-0") return -0;
|
|
1130
1169
|
return value;
|
|
1131
1170
|
}
|
|
1132
|
-
function deserializeCompressedString(value) {
|
|
1133
|
-
if (typeof value !== "string") return value;
|
|
1134
|
-
return gunzipSync(Buffer$1.from(value, "base64")).toString("utf8");
|
|
1135
|
-
}
|
|
1136
|
-
function deserializeCompressedJson(value) {
|
|
1137
|
-
if (typeof value !== "string") return value;
|
|
1138
|
-
return deserializeJsonSafeValue(JSON.parse(gunzipSync(Buffer$1.from(value, "base64")).toString("utf8")));
|
|
1139
|
-
}
|
|
1140
1171
|
function deserializeFloat64Array(value, length) {
|
|
1141
1172
|
if (typeof value !== "string" || typeof length !== "number") return value;
|
|
1142
1173
|
return decodeFloat64Array(value, length);
|
|
@@ -1766,7 +1797,7 @@ function createTraceCache(generateSpanId) {
|
|
|
1766
1797
|
operationType: "value",
|
|
1767
1798
|
operationName: info.name,
|
|
1768
1799
|
storedAt: new Date(getRealDateNowMs()).toISOString(),
|
|
1769
|
-
recording: await serializeCacheRecording(recording)
|
|
1800
|
+
recording: await serializeCacheRecording(recording, { externalJsonStore: cacheCtx.adapter.externalJsonStore })
|
|
1770
1801
|
}, {
|
|
1771
1802
|
rawKey: info.key,
|
|
1772
1803
|
operationType: "value",
|
|
@@ -2209,7 +2240,7 @@ async function traceSpanInternal(info, fn) {
|
|
|
2209
2240
|
spanName: info.name,
|
|
2210
2241
|
spanKind: info.kind,
|
|
2211
2242
|
storedAt: new Date(getRealDateNowMs()).toISOString(),
|
|
2212
|
-
recording: await serializeCacheRecording(recording)
|
|
2243
|
+
recording: await serializeCacheRecording(recording, { externalJsonStore: ctx.adapter.externalJsonStore })
|
|
2213
2244
|
};
|
|
2214
2245
|
await ctx.adapter.write(entry, {
|
|
2215
2246
|
rawKey: cacheOpts.key,
|
|
@@ -2980,6 +3011,8 @@ const evalSummarySchema = z.object({
|
|
|
2980
3011
|
currentCommitSha: z.string().nullable(),
|
|
2981
3012
|
columnDefs: z.array(columnDefSchema),
|
|
2982
3013
|
caseCount: z.number().nullable(),
|
|
3014
|
+
/** Authored case ids discovered for this eval, when case generation has run. */
|
|
3015
|
+
caseIds: z.array(z.string()).optional(),
|
|
2983
3016
|
lastRunStatus: z.enum([
|
|
2984
3017
|
"pass",
|
|
2985
3018
|
"fail",
|
|
@@ -3626,6 +3659,11 @@ const runManifestSchema = z.object({
|
|
|
3626
3659
|
"cancelled",
|
|
3627
3660
|
"error"
|
|
3628
3661
|
]),
|
|
3662
|
+
/**
|
|
3663
|
+
* Temporary runs are persisted like normal runs, but are deleted before the
|
|
3664
|
+
* next run starts. Older persisted runs default to durable history.
|
|
3665
|
+
*/
|
|
3666
|
+
temporary: z.boolean().optional().default(false),
|
|
3629
3667
|
startedAt: z.string(),
|
|
3630
3668
|
endedAt: z.string().nullable(),
|
|
3631
3669
|
/**
|
|
@@ -4521,6 +4559,11 @@ const createRunRequestSchema = z.object({
|
|
|
4521
4559
|
}),
|
|
4522
4560
|
trials: z.number().min(1),
|
|
4523
4561
|
/**
|
|
4562
|
+
* Persist this run as temporary history. Temporary runs are visible while
|
|
4563
|
+
* present, then deleted before the next run of any kind starts.
|
|
4564
|
+
*/
|
|
4565
|
+
temporary: z.boolean().optional(),
|
|
4566
|
+
/**
|
|
4524
4567
|
* Optional cache controls for the run. When omitted, the cache is used in
|
|
4525
4568
|
* its default read-through / write-on-miss mode.
|
|
4526
4569
|
*/
|
|
@@ -4539,8 +4582,9 @@ const updateManualScoreRequestSchema = z.object({ value: z.number().min(0).max(1
|
|
|
4539
4582
|
//#region ../runner/src/cacheStore.ts
|
|
4540
4583
|
const defaultMaxEntriesPerNamespace = 100;
|
|
4541
4584
|
const cacheSerializationMarker = "__aecs";
|
|
4542
|
-
const
|
|
4543
|
-
const
|
|
4585
|
+
const supportedCacheSerializationPrefix = "v1:";
|
|
4586
|
+
const externalJsonCacheSerializationMarker = "v1:ExternalJson";
|
|
4587
|
+
const externalJsonBlobExtension = ".json.br";
|
|
4544
4588
|
/**
|
|
4545
4589
|
* Create a filesystem-backed cache adapter rooted at `<workspaceRoot>/<dir>`.
|
|
4546
4590
|
*
|
|
@@ -4551,20 +4595,28 @@ const supportedCacheSerializationVersion = "json-safe-v1";
|
|
|
4551
4595
|
function createFsCacheStore(options) {
|
|
4552
4596
|
const cacheDir = resolve(options.workspaceRoot, options.dir ?? ".agent-evals/cache");
|
|
4553
4597
|
const debugDir = resolve(options.workspaceRoot, options.debugDir ?? ".agent-evals/cache-debug");
|
|
4598
|
+
const blobDir = resolve(options.workspaceRoot, options.blobDir ?? ".agent-evals/cache-blobs");
|
|
4599
|
+
const externalJsonStore = createExternalJsonBlobStore(blobDir);
|
|
4554
4600
|
const defaultMaxEntries = normalizeMaxEntries(options.maxEntriesPerNamespace);
|
|
4555
4601
|
return {
|
|
4602
|
+
externalJsonStore,
|
|
4556
4603
|
dir() {
|
|
4557
4604
|
return cacheDir;
|
|
4558
4605
|
},
|
|
4559
4606
|
debugDir() {
|
|
4560
4607
|
return debugDir;
|
|
4561
4608
|
},
|
|
4609
|
+
blobDir() {
|
|
4610
|
+
return blobDir;
|
|
4611
|
+
},
|
|
4562
4612
|
async lookup(namespace, keyHash) {
|
|
4563
|
-
|
|
4613
|
+
const entry = (await readCacheFile(cacheDir, ownerFromNamespace(namespace)))?.entries[keyHash] ?? null;
|
|
4614
|
+
return entry === null ? null : await materializeExternalJsonCacheEntry(entry, externalJsonStore);
|
|
4564
4615
|
},
|
|
4565
4616
|
async lookupWithDebug(namespace, keyHash) {
|
|
4566
4617
|
const owner = ownerFromNamespace(namespace);
|
|
4567
|
-
const
|
|
4618
|
+
const rawEntry = (await readCacheFile(cacheDir, owner))?.entries[keyHash] ?? null;
|
|
4619
|
+
const entry = rawEntry === null ? null : await materializeExternalJsonCacheEntry(rawEntry, externalJsonStore);
|
|
4568
4620
|
if (entry === null) return null;
|
|
4569
4621
|
const debugKey = (await readDebugKeyFile(debugDir, owner))?.entries[keyHash];
|
|
4570
4622
|
const deserializedEntry = {
|
|
@@ -4590,6 +4642,7 @@ function createFsCacheStore(options) {
|
|
|
4590
4642
|
}, entry.namespace, maxEntriesForNamespace(entry.namespace, defaultMaxEntries, options.maxEntriesByNamespace), entry.key)
|
|
4591
4643
|
});
|
|
4592
4644
|
});
|
|
4645
|
+
await pruneExternalJsonBlobs(cacheDir, blobDir);
|
|
4593
4646
|
if (debugKey !== void 0) {
|
|
4594
4647
|
if ((await resultify(() => writeDebugKeyEntry({
|
|
4595
4648
|
debugDir,
|
|
@@ -4641,6 +4694,10 @@ function createFsCacheStore(options) {
|
|
|
4641
4694
|
recursive: true,
|
|
4642
4695
|
force: true
|
|
4643
4696
|
});
|
|
4697
|
+
await rm(blobDir, {
|
|
4698
|
+
recursive: true,
|
|
4699
|
+
force: true
|
|
4700
|
+
});
|
|
4644
4701
|
return;
|
|
4645
4702
|
}
|
|
4646
4703
|
if (filter.namespace !== void 0) {
|
|
@@ -4659,6 +4716,7 @@ function createFsCacheStore(options) {
|
|
|
4659
4716
|
});
|
|
4660
4717
|
});
|
|
4661
4718
|
await clearDebugEntries(debugDir, filter);
|
|
4719
|
+
await pruneExternalJsonBlobs(cacheDir, blobDir);
|
|
4662
4720
|
return;
|
|
4663
4721
|
}
|
|
4664
4722
|
if (existsSync(cacheDir)) {
|
|
@@ -4679,6 +4737,7 @@ function createFsCacheStore(options) {
|
|
|
4679
4737
|
}
|
|
4680
4738
|
}
|
|
4681
4739
|
await clearDebugEntries(debugDir, filter);
|
|
4740
|
+
await pruneExternalJsonBlobs(cacheDir, blobDir);
|
|
4682
4741
|
}
|
|
4683
4742
|
};
|
|
4684
4743
|
}
|
|
@@ -4692,9 +4751,10 @@ function createFsCacheStore(options) {
|
|
|
4692
4751
|
function createBufferedCacheStore(backingStore) {
|
|
4693
4752
|
const pendingEntries = /* @__PURE__ */ new Map();
|
|
4694
4753
|
return {
|
|
4754
|
+
externalJsonStore: backingStore.externalJsonStore,
|
|
4695
4755
|
async lookup(namespace, keyHash) {
|
|
4696
4756
|
const buffered = pendingEntries.get(toPendingKey(namespace, keyHash));
|
|
4697
|
-
if (buffered !== void 0) return buffered.entry;
|
|
4757
|
+
if (buffered !== void 0) return backingStore.externalJsonStore === void 0 ? buffered.entry : await materializeExternalJsonCacheEntry(buffered.entry, backingStore.externalJsonStore);
|
|
4698
4758
|
return backingStore.lookup(namespace, keyHash);
|
|
4699
4759
|
},
|
|
4700
4760
|
write(entry, debugKey) {
|
|
@@ -4733,6 +4793,95 @@ function toPendingKey(namespace, keyHash) {
|
|
|
4733
4793
|
function sanitizeSegment$1(segment) {
|
|
4734
4794
|
return segment.replace(/[^a-zA-Z0-9_.-]/g, "_");
|
|
4735
4795
|
}
|
|
4796
|
+
function createExternalJsonBlobStore(blobDir) {
|
|
4797
|
+
return {
|
|
4798
|
+
async write(rawJson) {
|
|
4799
|
+
const rawBytes = Buffer.from(rawJson, "utf8");
|
|
4800
|
+
const hash = hashExternalJson(rawBytes);
|
|
4801
|
+
const path = externalJsonBlobPath(hash);
|
|
4802
|
+
const compressed = brotliCompressSync(rawBytes);
|
|
4803
|
+
const filePath = resolveStorePath(blobDir, path);
|
|
4804
|
+
if (!existsSync(filePath)) {
|
|
4805
|
+
await mkdir(dirname(filePath), { recursive: true });
|
|
4806
|
+
const tmpPath = `${filePath}.${process.pid.toString()}.tmp`;
|
|
4807
|
+
await writeFile(tmpPath, compressed);
|
|
4808
|
+
await rename(tmpPath, filePath);
|
|
4809
|
+
}
|
|
4810
|
+
return {
|
|
4811
|
+
compressedLength: compressed.byteLength,
|
|
4812
|
+
hash,
|
|
4813
|
+
length: rawBytes.byteLength,
|
|
4814
|
+
path
|
|
4815
|
+
};
|
|
4816
|
+
},
|
|
4817
|
+
async read(ref) {
|
|
4818
|
+
const rawBytes = brotliDecompressSync(await readFile(resolveStorePath(blobDir, ref.path)));
|
|
4819
|
+
const rawJson = rawBytes.toString("utf8");
|
|
4820
|
+
if (rawBytes.byteLength !== ref.length || hashExternalJson(rawBytes) !== ref.hash) throw new Error(`External cache blob failed integrity check: ${ref.hash}`);
|
|
4821
|
+
return rawJson;
|
|
4822
|
+
}
|
|
4823
|
+
};
|
|
4824
|
+
}
|
|
4825
|
+
function hashExternalJson(rawBytes) {
|
|
4826
|
+
return `sha256:${createHash("sha256").update(rawBytes).digest("hex")}`;
|
|
4827
|
+
}
|
|
4828
|
+
function externalJsonBlobPath(hash) {
|
|
4829
|
+
const digest = hash.slice(7);
|
|
4830
|
+
return join("sha256", digest.slice(0, 2), `${digest}${externalJsonBlobExtension}`);
|
|
4831
|
+
}
|
|
4832
|
+
function resolveStorePath(root, relativePath) {
|
|
4833
|
+
const path = resolve(root, relativePath);
|
|
4834
|
+
if (path !== root && !path.startsWith(`${root}${sep}`)) throw new Error(`External cache blob path escapes store: ${relativePath}`);
|
|
4835
|
+
return path;
|
|
4836
|
+
}
|
|
4837
|
+
async function materializeExternalJsonCacheEntry(entry, store) {
|
|
4838
|
+
return {
|
|
4839
|
+
...entry,
|
|
4840
|
+
recording: cacheRecordingSchema.parse(await materializeExternalJsonValues(entry.recording, store))
|
|
4841
|
+
};
|
|
4842
|
+
}
|
|
4843
|
+
async function pruneExternalJsonBlobs(cacheDir, blobDir) {
|
|
4844
|
+
if (!existsSync(blobDir)) return;
|
|
4845
|
+
const referenced = await collectReferencedExternalJsonBlobPaths(cacheDir);
|
|
4846
|
+
for (const path of await listExternalJsonBlobPaths(blobDir)) if (!referenced.has(path)) await rm(resolveStorePath(blobDir, path), { force: true });
|
|
4847
|
+
}
|
|
4848
|
+
async function collectReferencedExternalJsonBlobPaths(cacheDir) {
|
|
4849
|
+
const paths = /* @__PURE__ */ new Set();
|
|
4850
|
+
if (!existsSync(cacheDir)) return paths;
|
|
4851
|
+
const files = await readdir(cacheDir);
|
|
4852
|
+
for (const fileName of files) {
|
|
4853
|
+
if (!fileName.endsWith(".json")) continue;
|
|
4854
|
+
const cacheFile = await readCacheFilePath(join(cacheDir, fileName));
|
|
4855
|
+
if (cacheFile === null) continue;
|
|
4856
|
+
collectExternalJsonBlobPaths(cacheFile, paths);
|
|
4857
|
+
}
|
|
4858
|
+
return paths;
|
|
4859
|
+
}
|
|
4860
|
+
function collectExternalJsonBlobPaths(value, paths) {
|
|
4861
|
+
if (Array.isArray(value)) {
|
|
4862
|
+
for (const item of value) collectExternalJsonBlobPaths(item, paths);
|
|
4863
|
+
return;
|
|
4864
|
+
}
|
|
4865
|
+
if (!isRecordLike(value)) return;
|
|
4866
|
+
if (value[cacheSerializationMarker] === externalJsonCacheSerializationMarker && typeof value.path === "string") paths.add(value.path);
|
|
4867
|
+
for (const entryValue of Object.values(value)) collectExternalJsonBlobPaths(entryValue, paths);
|
|
4868
|
+
}
|
|
4869
|
+
async function listExternalJsonBlobPaths(blobDir) {
|
|
4870
|
+
const paths = [];
|
|
4871
|
+
await collectExternalJsonBlobFilePaths(blobDir, blobDir, paths);
|
|
4872
|
+
return paths;
|
|
4873
|
+
}
|
|
4874
|
+
async function collectExternalJsonBlobFilePaths(root, dir, paths) {
|
|
4875
|
+
const entries = await readdir(dir, { withFileTypes: true });
|
|
4876
|
+
for (const entry of entries) {
|
|
4877
|
+
const path = join(dir, entry.name);
|
|
4878
|
+
if (entry.isDirectory()) {
|
|
4879
|
+
await collectExternalJsonBlobFilePaths(root, path, paths);
|
|
4880
|
+
continue;
|
|
4881
|
+
}
|
|
4882
|
+
if (entry.isFile() && entry.name.endsWith(externalJsonBlobExtension)) paths.push(relative(root, path));
|
|
4883
|
+
}
|
|
4884
|
+
}
|
|
4736
4885
|
async function readCacheFile(cacheDir, owner) {
|
|
4737
4886
|
return readCacheFilePath(ownerPath(cacheDir, owner));
|
|
4738
4887
|
}
|
|
@@ -4752,7 +4901,7 @@ async function readCacheFilePath(filePath) {
|
|
|
4752
4901
|
function usesSupportedCacheSerialization(value) {
|
|
4753
4902
|
if (Array.isArray(value)) return value.every(usesSupportedCacheSerialization);
|
|
4754
4903
|
if (!isRecordLike(value)) return true;
|
|
4755
|
-
|
|
4904
|
+
if (Object.hasOwn(value, cacheSerializationMarker) && (typeof value[cacheSerializationMarker] !== "string" || !value[cacheSerializationMarker].startsWith(supportedCacheSerializationPrefix))) return false;
|
|
4756
4905
|
return Object.values(value).every(usesSupportedCacheSerialization);
|
|
4757
4906
|
}
|
|
4758
4907
|
async function writeOrRemoveCacheFile(cacheDir, cacheFile) {
|
|
@@ -6188,6 +6337,27 @@ function runTouchesEval(params) {
|
|
|
6188
6337
|
if (params.target.mode === "evalIds") return params.target.evalKeys?.includes(params.evalKey) ?? params.target.evalIds?.includes(params.evalId ?? params.evalKey) ?? false;
|
|
6189
6338
|
return false;
|
|
6190
6339
|
}
|
|
6340
|
+
async function deleteTemporaryRuns(params) {
|
|
6341
|
+
let deletedRuns = 0;
|
|
6342
|
+
for (const [runId, run] of [...params.runs]) {
|
|
6343
|
+
if (run.manifest.temporary !== true) continue;
|
|
6344
|
+
if (run.manifest.status === "running") {
|
|
6345
|
+
const endedAt = /* @__PURE__ */ new Date();
|
|
6346
|
+
run.manifest.status = "cancelled";
|
|
6347
|
+
run.manifest.endedAt = endedAt.toISOString();
|
|
6348
|
+
run.summary.status = "cancelled";
|
|
6349
|
+
run.summary.totalDurationMs = endedAt.getTime() - new Date(run.manifest.startedAt).getTime();
|
|
6350
|
+
params.cancelRunningRun(run);
|
|
6351
|
+
}
|
|
6352
|
+
params.runs.delete(runId);
|
|
6353
|
+
await rm(run.runDir, {
|
|
6354
|
+
recursive: true,
|
|
6355
|
+
force: true
|
|
6356
|
+
});
|
|
6357
|
+
deletedRuns += 1;
|
|
6358
|
+
}
|
|
6359
|
+
return deletedRuns;
|
|
6360
|
+
}
|
|
6191
6361
|
async function recomputeEvalStatusesInRuns(params) {
|
|
6192
6362
|
let updatedRuns = 0;
|
|
6193
6363
|
for (const run of params.runs) {
|
|
@@ -6384,6 +6554,12 @@ function encodeCaseDetailFileName(caseId) {
|
|
|
6384
6554
|
return encodeURIComponent(caseId);
|
|
6385
6555
|
}
|
|
6386
6556
|
//#endregion
|
|
6557
|
+
//#region ../runner/src/stackFormatting.ts
|
|
6558
|
+
const orphanedAnsiSgrPattern = /\[(?:\d{1,3}(?:;\d{1,3})*)?m/g;
|
|
6559
|
+
function stripTerminalControlCodes(value) {
|
|
6560
|
+
return stripVTControlCharacters(value).replaceAll(orphanedAnsiSgrPattern, "");
|
|
6561
|
+
}
|
|
6562
|
+
//#endregion
|
|
6387
6563
|
//#region ../runner/src/moduleIsolation.ts
|
|
6388
6564
|
const isolationParam = "agent-evals-isolate";
|
|
6389
6565
|
const pathSegmentSeparatorPattern = /[\\/]+/;
|
|
@@ -6474,12 +6650,6 @@ async function runWithModuleIsolation(context, fn) {
|
|
|
6474
6650
|
return await isolationStorage.run(context, fn);
|
|
6475
6651
|
}
|
|
6476
6652
|
//#endregion
|
|
6477
|
-
//#region ../runner/src/stackFormatting.ts
|
|
6478
|
-
const orphanedAnsiSgrPattern = /\[(?:\d{1,3}(?:;\d{1,3})*)?m/g;
|
|
6479
|
-
function stripTerminalControlCodes(value) {
|
|
6480
|
-
return stripVTControlCharacters(value).replaceAll(orphanedAnsiSgrPattern, "");
|
|
6481
|
-
}
|
|
6482
|
-
//#endregion
|
|
6483
6653
|
//#region ../runner/src/runExecution.ts
|
|
6484
6654
|
function filterEvalCases(cases, caseIds) {
|
|
6485
6655
|
if (!caseIds || caseIds.length === 0) return cases;
|
|
@@ -7061,6 +7231,8 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
|
|
|
7061
7231
|
const duplicateCaseIds = findDuplicateCaseIds(runnableCases);
|
|
7062
7232
|
if (duplicateCaseIds.length > 0) throw new Error(`Duplicate case id${duplicateCaseIds.length === 1 ? "" : "s"} in ${evalMeta.filePath}#${evalMeta.id}: ${duplicateCaseIds.join(", ")}`);
|
|
7063
7233
|
const cases = filterEvalCases(runnableCases, request.target.caseIds);
|
|
7234
|
+
evalMeta.caseCount = runnableCases.length;
|
|
7235
|
+
evalMeta.caseIds = runnableCases.map((evalCase) => evalCase.id);
|
|
7064
7236
|
runState.summary.totalCases += cases.length;
|
|
7065
7237
|
const defaultConfig = resolveEvalDefaultConfig({
|
|
7066
7238
|
evalDef,
|
|
@@ -7269,4 +7441,4 @@ function toLastRunStatus(status) {
|
|
|
7269
7441
|
return status === "pending" ? null : status;
|
|
7270
7442
|
}
|
|
7271
7443
|
//#endregion
|
|
7272
|
-
export {
|
|
7444
|
+
export { apiCallMetricFormatSchema as $, evalAssert as $n, evalChartTypeSchema as $t, configReloadStateSchema as A, jsonCellSchema as An, evalStatsConfigSchema as At, simulateTokenAllocation as B, hashCacheKeySync as Bn, manualInputJsonFieldSchema as Bt, parseEvalDiscovery as C, traceSpanSchema as Cn, assertionFailureSchema as Ct, normalizeScoreDef as D, columnFormatSchema as Dn, evalFreshnessStatusSchema as Dt, buildDeclaredColumnDefs as E, columnDefSchema as En, discoveryIssueSchema as Et, extractCacheEntries as F, buildTraceTree as Fn, runLogPhaseSchema as Ft, deriveScopedSummaryFromCases as G, repoFile as Gn, manualInputTextFieldSchema as Gt, getNestedAttribute as H, deserializeCacheValue as Hn, manualInputNumberFieldSchema as Ht, extractCacheHits as I, captureEvalSpanError as In, scoreTraceSchema as It, runManifestSchema as J, evalExpect as Jn, evalChartBuiltinMetricSchema as Jt, deriveStatusFromCaseRows as K, manualInputFileValueSchema as Kn, evalChartAggregateSchema as Kt, extractApiCalls as L, evalSpan as Ln, manualInputBooleanFieldSchema as Lt, createRunRequestSchema as M, repoFileRefSchema as Mn, runLogEntrySchema as Mt, updateManualScoreRequestSchema as N, runArtifactRefSchema as Nn, runLogLevelSchema as Nt, validateCharts as O, columnKindSchema as On, evalStatAggregateSchema as Ot, sseEnvelopeSchema as P, z$1 as Pn, runLogLocationSchema as Pt, agentEvalsConfigSchema as Q, configureEvalRunLogs as Qn, evalChartTooltipExtraSchema as Qt, extractLlmCalls as R, evalTracer as Rn, manualInputDescriptorSchema as Rt, loadEvalModule as S, traceSpanKindSchema as Sn, getCaseRowEvalKey as St, loadConfig as T, cellValueSchema as Tn, caseRowSchema as Tt, getEvalTitle as U, serializeCacheRecording as Un, manualInputSelectFieldSchema as Ut, applyDerivedCallAttributes as V, deserializeCacheRecording as Vn, manualInputMultilineFieldSchema as Vt, getEvalDisplayStatus as W, serializeCacheValue as Wn, manualInputSelectOptionSchema as Wt, DEFAULT_API_CALLS_CONFIG as X, advanceEvalTime as Xn, evalChartConfigSchema as Xt, runSummarySchema as Y, EvalAssertionError as Yn, evalChartColorSchema as Yt, DEFAULT_LLM_CALLS_CONFIG as Z, appendToEvalOutput as Zn, evalChartMetricSchema as Zt, resolveTracePresentation as _, traceAttributeDisplayPlacementSchema as _n, runLogsConfigSchema as _t, generateRunId as a, cacheFileSchema as an, isInEvalScope as ar, evalColumnsSchema as at, parseManualInputValues as b, traceDisplayInputConfigSchema as bn, buildEvalKey as bt, loadPersistedRunSnapshot as c, cacheOperationTypeSchema as cn, runInEvalRuntimeScope as cr, llmCallMetricFormatSchema as ct, persistCaseDetail as d, cacheStatusSchema as dn, setEvalOutput as dr, llmCallPricingRateSchema as dt, evalChartsConfigSchema as en, evalLog as er, apiCallMetricPlacementSchema as et, deleteTemporaryRuns as f, serializedCacheSpanSchema as fn, setScopeCacheContext as fr, llmCallPricingSchema as ft, runTouchesEval as g, traceAttributeDisplayInputSchema as gn, resolveLlmCallsConfig as gt, recomputePersistedCaseStatus as h, traceAttributeDisplayFormatSchema as hn, getEvalRegistry as hr, resolveApiCallsConfig as ht, stripTerminalControlCodes as i, cacheEntryWithDebugKeySchema as in, incrementEvalOutput as ir, evalColumnOverrideSchema as it, configReloadStatusSchema as j, numberDisplayOptionsSchema as jn, evalSummarySchema as jt, createFsCacheStore as k, fileRefSchema as kn, evalStatItemSchema as kt, loadPersistedRunSnapshots as l, cacheRecordingOpSchema as ln, runInEvalScope as lr, llmCallMetricPlacementSchema as lt, recomputeEvalStatusesInRuns as m, traceCacheRefSchema as mn, defineEval as mr, removeDefaultConfigSchema as mt, getTargetEvalKeys as n, cacheDebugKeyFileSchema as nn, getEvalCaseInput as nr, apiCallsConfigSchema as nt, getLastRunStatuses as o, cacheListItemSchema as on, mergeEvalOutput as or, evalDeriveConfigSchema as ot, persistRunState as p, spanCacheOptionsSchema as pn, startEvalBackgroundJob as pr, llmCallsConfigSchema as pt, deriveStatusFromChildStatuses as q, readManualInputFile as qn, evalChartAxisSchema as qt, getTargetEvals as r, cacheEntrySchema as rn, getEvalStartTime as rr, defaultConfigKeySchema as rt, getLatestRunInfos as s, cacheModeSchema as sn, nextEvalId as sr, llmCallCostCurrencySchema as st, executeRun as t, cacheDebugKeyEntrySchema as tn, getCurrentScope as tr, apiCallMetricSchema as tt, nextShortIdFromSnapshots as u, cacheRecordingSchema as un, runInExistingEvalScope as ur, llmCallMetricSchema as ut, resolveArtifactPath as v, traceAttributeDisplaySchema as vn, trialSelectionModeSchema as vt, resolveEvalDefaultConfig as w, traceSpanWarningSchema as wn, caseDetailSchema as wt, deriveEvalFreshness as x, traceSpanErrorSchema as xn, getCaseRowCaseKey as xt, buildManualInputDescriptor as y, traceDisplayConfigSchema as yn, buildCaseKey as yt, simulateLlmCallCost as z, hashCacheKey as zn, manualInputFieldDescriptorSchema as zt };
|