@ls-stack/agent-eval 0.50.0 → 0.51.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-DR9WPMA4.mjs → app-CzLj4ZX0.mjs} +4 -4
- package/dist/apps/web/dist/assets/index-C5SveD-X.css +1 -0
- package/dist/apps/web/dist/assets/{index-BkXnL_y8.js → index-DwgyYZgf.js} +40 -40
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-R7_V6YWa.mjs → cli-Cvs7tc2v.mjs} +3 -3
- package/dist/index.d.mts +59 -61
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-CokPQet7.mjs → runOrchestration-o38J7uZO.mjs} +112 -106
- package/dist/{runner-Coc9wBWz.mjs → runner-LdMiDmAN.mjs} +2 -2
- package/dist/{runner-B8dLVAyM.mjs → runner-iWtmKx9z.mjs} +1 -1
- package/dist/{src-B43qR0Ea.mjs → src-Jahivm6d.mjs} +2 -2
- package/package.json +3 -3
- package/dist/apps/web/dist/assets/index-BQY_snr3.css +0 -1
|
@@ -25,8 +25,8 @@
|
|
|
25
25
|
href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
|
|
26
26
|
rel="stylesheet"
|
|
27
27
|
/>
|
|
28
|
-
<script type="module" crossorigin src="/assets/index-
|
|
29
|
-
<link rel="stylesheet" crossorigin href="/assets/index-
|
|
28
|
+
<script type="module" crossorigin src="/assets/index-DwgyYZgf.js"></script>
|
|
29
|
+
<link rel="stylesheet" crossorigin href="/assets/index-C5SveD-X.css">
|
|
30
30
|
</head>
|
|
31
31
|
<body>
|
|
32
32
|
<div id="root"></div>
|
package/dist/bin.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as validateCharts, At as buildEvalKey, C as deriveEvalFreshness, Ct as getEvalDisplayStatus, D as loadConfig, Dt as runSummarySchema, E as resolveEvalDefaultConfig, Lt as getEvalRegistry, O as buildDeclaredColumnDefs, Ot as resolveApiCallsConfig, S as parseManualInputValues, St as getEvalTitle, T as parseEvalDiscovery, Tt as matchesTagsFilter, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as resolveArtifactPath, bt as applyDerivedCallAttributes, c as getLastRunStatuses, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, j as createFsCacheStore, jt as getCaseRowCaseKey, k as normalizeScoreDef, kt as resolveLlmCallsConfig, l as getLatestRunInfos, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as stripTerminalControlCodes, p as persistCaseDetail, s as generateRunId, u as loadPersistedRunSnapshot, v as runTouchesEval, w as loadEvalModule, wt as deriveScopedSummaryFromCases, x as buildManualInputDescriptor, y as resolveTracePresentation } from "./runOrchestration-
|
|
1
|
+
import { A as validateCharts, At as buildEvalKey, C as deriveEvalFreshness, Ct as getEvalDisplayStatus, D as loadConfig, Dt as runSummarySchema, E as resolveEvalDefaultConfig, Lt as getEvalRegistry, O as buildDeclaredColumnDefs, Ot as resolveApiCallsConfig, S as parseManualInputValues, St as getEvalTitle, T as parseEvalDiscovery, Tt as matchesTagsFilter, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as resolveArtifactPath, bt as applyDerivedCallAttributes, c as getLastRunStatuses, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, j as createFsCacheStore, jt as getCaseRowCaseKey, k as normalizeScoreDef, kt as resolveLlmCallsConfig, l as getLatestRunInfos, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as stripTerminalControlCodes, p as persistCaseDetail, s as generateRunId, u as loadPersistedRunSnapshot, v as runTouchesEval, w as loadEvalModule, wt as deriveScopedSummaryFromCases, x as buildManualInputDescriptor, y as resolveTracePresentation } from "./runOrchestration-o38J7uZO.mjs";
|
|
2
2
|
import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
3
3
|
import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
|
|
4
4
|
import { createHash, randomUUID } from "node:crypto";
|
|
@@ -2095,8 +2095,8 @@ async function commandApp(args) {
|
|
|
2095
2095
|
const { serve } = await import("@hono/node-server");
|
|
2096
2096
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
2097
2097
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
2098
|
-
const appModule = await import("./app-
|
|
2099
|
-
const runnerModule = await import("./runner-
|
|
2098
|
+
const appModule = await import("./app-CzLj4ZX0.mjs");
|
|
2099
|
+
const runnerModule = await import("./runner-iWtmKx9z.mjs");
|
|
2100
2100
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
2101
2101
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
2102
2102
|
await runnerModule.initRunner();
|
package/dist/index.d.mts
CHANGED
|
@@ -1308,8 +1308,6 @@ type EvalCaseScope = {
|
|
|
1308
1308
|
logs: RunLogEntry$1[];
|
|
1309
1309
|
spans: EvalTraceSpan$2[];
|
|
1310
1310
|
checkpoints: Map<string, unknown>;
|
|
1311
|
-
spanStack: string[];
|
|
1312
|
-
activeSpanStack: EvalTraceSpan$2[];
|
|
1313
1311
|
/**
|
|
1314
1312
|
* Stack of active cache recorders. Ops are written to the top-most frame
|
|
1315
1313
|
* when it exists and `replayingDepth === 0`.
|
|
@@ -1837,9 +1835,9 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
|
|
|
1837
1835
|
subtree: "subtree";
|
|
1838
1836
|
}>>;
|
|
1839
1837
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1840
|
-
sum: "sum";
|
|
1841
|
-
last: "last";
|
|
1842
1838
|
all: "all";
|
|
1839
|
+
last: "last";
|
|
1840
|
+
sum: "sum";
|
|
1843
1841
|
}>>;
|
|
1844
1842
|
}, z$1.core.$strip>;
|
|
1845
1843
|
/**
|
|
@@ -1873,9 +1871,9 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
|
|
|
1873
1871
|
subtree: "subtree";
|
|
1874
1872
|
}>>;
|
|
1875
1873
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1876
|
-
sum: "sum";
|
|
1877
|
-
last: "last";
|
|
1878
1874
|
all: "all";
|
|
1875
|
+
last: "last";
|
|
1876
|
+
sum: "sum";
|
|
1879
1877
|
}>>;
|
|
1880
1878
|
}, z$1.core.$strip>>>;
|
|
1881
1879
|
}, z$1.core.$strip>;
|
|
@@ -1913,9 +1911,9 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
|
|
|
1913
1911
|
subtree: "subtree";
|
|
1914
1912
|
}>>;
|
|
1915
1913
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1916
|
-
sum: "sum";
|
|
1917
|
-
last: "last";
|
|
1918
1914
|
all: "all";
|
|
1915
|
+
last: "last";
|
|
1916
|
+
sum: "sum";
|
|
1919
1917
|
}>>;
|
|
1920
1918
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
1921
1919
|
}, z$1.core.$strip>;
|
|
@@ -1951,9 +1949,9 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
|
|
|
1951
1949
|
subtree: "subtree";
|
|
1952
1950
|
}>>;
|
|
1953
1951
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1954
|
-
sum: "sum";
|
|
1955
|
-
last: "last";
|
|
1956
1952
|
all: "all";
|
|
1953
|
+
last: "last";
|
|
1954
|
+
sum: "sum";
|
|
1957
1955
|
}>>;
|
|
1958
1956
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
1959
1957
|
}, z$1.core.$strip>>>;
|
|
@@ -1990,8 +1988,8 @@ declare const traceSpanSchema$1: z$1.ZodObject<{
|
|
|
1990
1988
|
status: z$1.ZodEnum<{
|
|
1991
1989
|
error: "error";
|
|
1992
1990
|
running: "running";
|
|
1993
|
-
ok: "ok";
|
|
1994
1991
|
cancelled: "cancelled";
|
|
1992
|
+
ok: "ok";
|
|
1995
1993
|
}>;
|
|
1996
1994
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
1997
1995
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -2032,11 +2030,11 @@ declare const evalFreshnessStatusSchema: z$1.ZodEnum<{
|
|
|
2032
2030
|
type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
|
|
2033
2031
|
/** Reducer used to collapse a column's per-case values into a single stat. */
|
|
2034
2032
|
declare const evalStatAggregateSchema: z$1.ZodEnum<{
|
|
2033
|
+
last: "last";
|
|
2034
|
+
sum: "sum";
|
|
2035
2035
|
avg: "avg";
|
|
2036
2036
|
min: "min";
|
|
2037
2037
|
max: "max";
|
|
2038
|
-
sum: "sum";
|
|
2039
|
-
last: "last";
|
|
2040
2038
|
}>;
|
|
2041
2039
|
/** Reducer used to collapse a column's per-case values into a single stat. */
|
|
2042
2040
|
type EvalStatAggregate = z$1.infer<typeof evalStatAggregateSchema>;
|
|
@@ -2065,11 +2063,11 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2065
2063
|
key: z$1.ZodString;
|
|
2066
2064
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2067
2065
|
aggregate: z$1.ZodEnum<{
|
|
2066
|
+
last: "last";
|
|
2067
|
+
sum: "sum";
|
|
2068
2068
|
avg: "avg";
|
|
2069
2069
|
min: "min";
|
|
2070
2070
|
max: "max";
|
|
2071
|
-
sum: "sum";
|
|
2072
|
-
last: "last";
|
|
2073
2071
|
}>;
|
|
2074
2072
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2075
2073
|
number: "number";
|
|
@@ -2110,11 +2108,11 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
2110
2108
|
key: z$1.ZodString;
|
|
2111
2109
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2112
2110
|
aggregate: z$1.ZodEnum<{
|
|
2111
|
+
last: "last";
|
|
2112
|
+
sum: "sum";
|
|
2113
2113
|
avg: "avg";
|
|
2114
2114
|
min: "min";
|
|
2115
2115
|
max: "max";
|
|
2116
|
-
sum: "sum";
|
|
2117
|
-
last: "last";
|
|
2118
2116
|
}>;
|
|
2119
2117
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2120
2118
|
number: "number";
|
|
@@ -2216,11 +2214,11 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2216
2214
|
key: z$1.ZodString;
|
|
2217
2215
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2218
2216
|
aggregate: z$1.ZodEnum<{
|
|
2217
|
+
last: "last";
|
|
2218
|
+
sum: "sum";
|
|
2219
2219
|
avg: "avg";
|
|
2220
2220
|
min: "min";
|
|
2221
2221
|
max: "max";
|
|
2222
|
-
sum: "sum";
|
|
2223
|
-
last: "last";
|
|
2224
2222
|
}>;
|
|
2225
2223
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2226
2224
|
number: "number";
|
|
@@ -2258,9 +2256,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2258
2256
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2259
2257
|
success: "success";
|
|
2260
2258
|
error: "error";
|
|
2259
|
+
warning: "warning";
|
|
2261
2260
|
accent: "accent";
|
|
2262
2261
|
accentDim: "accentDim";
|
|
2263
|
-
warning: "warning";
|
|
2264
2262
|
textMuted: "textMuted";
|
|
2265
2263
|
}>>;
|
|
2266
2264
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -2271,10 +2269,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2271
2269
|
source: z$1.ZodLiteral<"column">;
|
|
2272
2270
|
key: z$1.ZodString;
|
|
2273
2271
|
aggregate: z$1.ZodEnum<{
|
|
2272
|
+
sum: "sum";
|
|
2274
2273
|
avg: "avg";
|
|
2275
2274
|
min: "min";
|
|
2276
2275
|
max: "max";
|
|
2277
|
-
sum: "sum";
|
|
2278
2276
|
latest: "latest";
|
|
2279
2277
|
passThresholdRate: "passThresholdRate";
|
|
2280
2278
|
}>;
|
|
@@ -2282,9 +2280,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2282
2280
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2283
2281
|
success: "success";
|
|
2284
2282
|
error: "error";
|
|
2283
|
+
warning: "warning";
|
|
2285
2284
|
accent: "accent";
|
|
2286
2285
|
accentDim: "accentDim";
|
|
2287
|
-
warning: "warning";
|
|
2288
2286
|
textMuted: "textMuted";
|
|
2289
2287
|
}>>;
|
|
2290
2288
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -2313,10 +2311,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2313
2311
|
source: z$1.ZodLiteral<"column">;
|
|
2314
2312
|
key: z$1.ZodString;
|
|
2315
2313
|
aggregate: z$1.ZodEnum<{
|
|
2314
|
+
sum: "sum";
|
|
2316
2315
|
avg: "avg";
|
|
2317
2316
|
min: "min";
|
|
2318
2317
|
max: "max";
|
|
2319
|
-
sum: "sum";
|
|
2320
2318
|
latest: "latest";
|
|
2321
2319
|
passThresholdRate: "passThresholdRate";
|
|
2322
2320
|
}>;
|
|
@@ -2413,11 +2411,11 @@ declare const caseRowSchema$1: z$1.ZodObject<{
|
|
|
2413
2411
|
tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2414
2412
|
status: z$1.ZodEnum<{
|
|
2415
2413
|
error: "error";
|
|
2414
|
+
pending: "pending";
|
|
2416
2415
|
running: "running";
|
|
2417
2416
|
cancelled: "cancelled";
|
|
2418
2417
|
pass: "pass";
|
|
2419
2418
|
fail: "fail";
|
|
2420
|
-
pending: "pending";
|
|
2421
2419
|
}>;
|
|
2422
2420
|
durationMs: z$1.ZodNullable<z$1.ZodNumber>;
|
|
2423
2421
|
cacheHits: z$1.ZodOptional<z$1.ZodNumber>;
|
|
@@ -2513,8 +2511,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
2513
2511
|
status: z$1.ZodEnum<{
|
|
2514
2512
|
error: "error";
|
|
2515
2513
|
running: "running";
|
|
2516
|
-
ok: "ok";
|
|
2517
2514
|
cancelled: "cancelled";
|
|
2515
|
+
ok: "ok";
|
|
2518
2516
|
}>;
|
|
2519
2517
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
2520
2518
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -2564,9 +2562,9 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
2564
2562
|
subtree: "subtree";
|
|
2565
2563
|
}>>;
|
|
2566
2564
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2567
|
-
sum: "sum";
|
|
2568
|
-
last: "last";
|
|
2569
2565
|
all: "all";
|
|
2566
|
+
last: "last";
|
|
2567
|
+
sum: "sum";
|
|
2570
2568
|
}>>;
|
|
2571
2569
|
}, z$1.core.$strip>>>;
|
|
2572
2570
|
}, z$1.core.$strip>;
|
|
@@ -2582,11 +2580,11 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2582
2580
|
tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2583
2581
|
status: z$1.ZodEnum<{
|
|
2584
2582
|
error: "error";
|
|
2583
|
+
pending: "pending";
|
|
2585
2584
|
running: "running";
|
|
2586
2585
|
cancelled: "cancelled";
|
|
2587
2586
|
pass: "pass";
|
|
2588
2587
|
fail: "fail";
|
|
2589
|
-
pending: "pending";
|
|
2590
2588
|
}>;
|
|
2591
2589
|
input: z$1.ZodUnknown;
|
|
2592
2590
|
trace: z$1.ZodArray<z$1.ZodObject<{
|
|
@@ -2600,8 +2598,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2600
2598
|
status: z$1.ZodEnum<{
|
|
2601
2599
|
error: "error";
|
|
2602
2600
|
running: "running";
|
|
2603
|
-
ok: "ok";
|
|
2604
2601
|
cancelled: "cancelled";
|
|
2602
|
+
ok: "ok";
|
|
2605
2603
|
}>;
|
|
2606
2604
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
2607
2605
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -2651,9 +2649,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2651
2649
|
subtree: "subtree";
|
|
2652
2650
|
}>>;
|
|
2653
2651
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2654
|
-
sum: "sum";
|
|
2655
|
-
last: "last";
|
|
2656
2652
|
all: "all";
|
|
2653
|
+
last: "last";
|
|
2654
|
+
sum: "sum";
|
|
2657
2655
|
}>>;
|
|
2658
2656
|
}, z$1.core.$strip>>>;
|
|
2659
2657
|
}, z$1.core.$strip>;
|
|
@@ -2669,8 +2667,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2669
2667
|
status: z$1.ZodEnum<{
|
|
2670
2668
|
error: "error";
|
|
2671
2669
|
running: "running";
|
|
2672
|
-
ok: "ok";
|
|
2673
2670
|
cancelled: "cancelled";
|
|
2671
|
+
ok: "ok";
|
|
2674
2672
|
}>;
|
|
2675
2673
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
2676
2674
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -2720,9 +2718,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2720
2718
|
subtree: "subtree";
|
|
2721
2719
|
}>>;
|
|
2722
2720
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2723
|
-
sum: "sum";
|
|
2724
|
-
last: "last";
|
|
2725
2721
|
all: "all";
|
|
2722
|
+
last: "last";
|
|
2723
|
+
sum: "sum";
|
|
2726
2724
|
}>>;
|
|
2727
2725
|
}, z$1.core.$strip>>>;
|
|
2728
2726
|
}, z$1.core.$strip>;
|
|
@@ -2839,10 +2837,10 @@ declare const evalChartBuiltinMetricSchema: z$1.ZodEnum<{
|
|
|
2839
2837
|
type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
|
|
2840
2838
|
/** Reducer applied to a numeric column across all cases of a single run. */
|
|
2841
2839
|
declare const evalChartAggregateSchema: z$1.ZodEnum<{
|
|
2840
|
+
sum: "sum";
|
|
2842
2841
|
avg: "avg";
|
|
2843
2842
|
min: "min";
|
|
2844
2843
|
max: "max";
|
|
2845
|
-
sum: "sum";
|
|
2846
2844
|
latest: "latest";
|
|
2847
2845
|
passThresholdRate: "passThresholdRate";
|
|
2848
2846
|
}>;
|
|
@@ -2855,9 +2853,9 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
|
|
|
2855
2853
|
declare const evalChartColorSchema: z$1.ZodEnum<{
|
|
2856
2854
|
success: "success";
|
|
2857
2855
|
error: "error";
|
|
2856
|
+
warning: "warning";
|
|
2858
2857
|
accent: "accent";
|
|
2859
2858
|
accentDim: "accentDim";
|
|
2860
|
-
warning: "warning";
|
|
2861
2859
|
textMuted: "textMuted";
|
|
2862
2860
|
}>;
|
|
2863
2861
|
/** Semantic color token resolved to a theme color by the web UI. */
|
|
@@ -2884,9 +2882,9 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2884
2882
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2885
2883
|
success: "success";
|
|
2886
2884
|
error: "error";
|
|
2885
|
+
warning: "warning";
|
|
2887
2886
|
accent: "accent";
|
|
2888
2887
|
accentDim: "accentDim";
|
|
2889
|
-
warning: "warning";
|
|
2890
2888
|
textMuted: "textMuted";
|
|
2891
2889
|
}>>;
|
|
2892
2890
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -2897,10 +2895,10 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2897
2895
|
source: z$1.ZodLiteral<"column">;
|
|
2898
2896
|
key: z$1.ZodString;
|
|
2899
2897
|
aggregate: z$1.ZodEnum<{
|
|
2898
|
+
sum: "sum";
|
|
2900
2899
|
avg: "avg";
|
|
2901
2900
|
min: "min";
|
|
2902
2901
|
max: "max";
|
|
2903
|
-
sum: "sum";
|
|
2904
2902
|
latest: "latest";
|
|
2905
2903
|
passThresholdRate: "passThresholdRate";
|
|
2906
2904
|
}>;
|
|
@@ -2908,9 +2906,9 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2908
2906
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2909
2907
|
success: "success";
|
|
2910
2908
|
error: "error";
|
|
2909
|
+
warning: "warning";
|
|
2911
2910
|
accent: "accent";
|
|
2912
2911
|
accentDim: "accentDim";
|
|
2913
|
-
warning: "warning";
|
|
2914
2912
|
textMuted: "textMuted";
|
|
2915
2913
|
}>>;
|
|
2916
2914
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -2932,10 +2930,10 @@ declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObj
|
|
|
2932
2930
|
source: z$1.ZodLiteral<"column">;
|
|
2933
2931
|
key: z$1.ZodString;
|
|
2934
2932
|
aggregate: z$1.ZodEnum<{
|
|
2933
|
+
sum: "sum";
|
|
2935
2934
|
avg: "avg";
|
|
2936
2935
|
min: "min";
|
|
2937
2936
|
max: "max";
|
|
2938
|
-
sum: "sum";
|
|
2939
2937
|
latest: "latest";
|
|
2940
2938
|
passThresholdRate: "passThresholdRate";
|
|
2941
2939
|
}>;
|
|
@@ -2967,9 +2965,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
2967
2965
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2968
2966
|
success: "success";
|
|
2969
2967
|
error: "error";
|
|
2968
|
+
warning: "warning";
|
|
2970
2969
|
accent: "accent";
|
|
2971
2970
|
accentDim: "accentDim";
|
|
2972
|
-
warning: "warning";
|
|
2973
2971
|
textMuted: "textMuted";
|
|
2974
2972
|
}>>;
|
|
2975
2973
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -2980,10 +2978,10 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
2980
2978
|
source: z$1.ZodLiteral<"column">;
|
|
2981
2979
|
key: z$1.ZodString;
|
|
2982
2980
|
aggregate: z$1.ZodEnum<{
|
|
2981
|
+
sum: "sum";
|
|
2983
2982
|
avg: "avg";
|
|
2984
2983
|
min: "min";
|
|
2985
2984
|
max: "max";
|
|
2986
|
-
sum: "sum";
|
|
2987
2985
|
latest: "latest";
|
|
2988
2986
|
passThresholdRate: "passThresholdRate";
|
|
2989
2987
|
}>;
|
|
@@ -2991,9 +2989,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
2991
2989
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2992
2990
|
success: "success";
|
|
2993
2991
|
error: "error";
|
|
2992
|
+
warning: "warning";
|
|
2994
2993
|
accent: "accent";
|
|
2995
2994
|
accentDim: "accentDim";
|
|
2996
|
-
warning: "warning";
|
|
2997
2995
|
textMuted: "textMuted";
|
|
2998
2996
|
}>>;
|
|
2999
2997
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -3022,10 +3020,10 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
3022
3020
|
source: z$1.ZodLiteral<"column">;
|
|
3023
3021
|
key: z$1.ZodString;
|
|
3024
3022
|
aggregate: z$1.ZodEnum<{
|
|
3023
|
+
sum: "sum";
|
|
3025
3024
|
avg: "avg";
|
|
3026
3025
|
min: "min";
|
|
3027
3026
|
max: "max";
|
|
3028
|
-
sum: "sum";
|
|
3029
3027
|
latest: "latest";
|
|
3030
3028
|
passThresholdRate: "passThresholdRate";
|
|
3031
3029
|
}>;
|
|
@@ -3057,9 +3055,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3057
3055
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3058
3056
|
success: "success";
|
|
3059
3057
|
error: "error";
|
|
3058
|
+
warning: "warning";
|
|
3060
3059
|
accent: "accent";
|
|
3061
3060
|
accentDim: "accentDim";
|
|
3062
|
-
warning: "warning";
|
|
3063
3061
|
textMuted: "textMuted";
|
|
3064
3062
|
}>>;
|
|
3065
3063
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -3070,10 +3068,10 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3070
3068
|
source: z$1.ZodLiteral<"column">;
|
|
3071
3069
|
key: z$1.ZodString;
|
|
3072
3070
|
aggregate: z$1.ZodEnum<{
|
|
3071
|
+
sum: "sum";
|
|
3073
3072
|
avg: "avg";
|
|
3074
3073
|
min: "min";
|
|
3075
3074
|
max: "max";
|
|
3076
|
-
sum: "sum";
|
|
3077
3075
|
latest: "latest";
|
|
3078
3076
|
passThresholdRate: "passThresholdRate";
|
|
3079
3077
|
}>;
|
|
@@ -3081,9 +3079,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3081
3079
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3082
3080
|
success: "success";
|
|
3083
3081
|
error: "error";
|
|
3082
|
+
warning: "warning";
|
|
3084
3083
|
accent: "accent";
|
|
3085
3084
|
accentDim: "accentDim";
|
|
3086
|
-
warning: "warning";
|
|
3087
3085
|
textMuted: "textMuted";
|
|
3088
3086
|
}>>;
|
|
3089
3087
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -3112,10 +3110,10 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3112
3110
|
source: z$1.ZodLiteral<"column">;
|
|
3113
3111
|
key: z$1.ZodString;
|
|
3114
3112
|
aggregate: z$1.ZodEnum<{
|
|
3113
|
+
sum: "sum";
|
|
3115
3114
|
avg: "avg";
|
|
3116
3115
|
min: "min";
|
|
3117
3116
|
max: "max";
|
|
3118
|
-
sum: "sum";
|
|
3119
3117
|
latest: "latest";
|
|
3120
3118
|
passThresholdRate: "passThresholdRate";
|
|
3121
3119
|
}>;
|
|
@@ -3131,10 +3129,10 @@ declare const runManifestSchema$1: z$1.ZodObject<{
|
|
|
3131
3129
|
shortId: z$1.ZodString;
|
|
3132
3130
|
status: z$1.ZodEnum<{
|
|
3133
3131
|
error: "error";
|
|
3134
|
-
running: "running";
|
|
3135
|
-
cancelled: "cancelled";
|
|
3136
3132
|
pending: "pending";
|
|
3133
|
+
running: "running";
|
|
3137
3134
|
completed: "completed";
|
|
3135
|
+
cancelled: "cancelled";
|
|
3138
3136
|
}>;
|
|
3139
3137
|
temporary: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodBoolean>>;
|
|
3140
3138
|
startedAt: z$1.ZodString;
|
|
@@ -3143,9 +3141,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
|
|
|
3143
3141
|
evalSourceFingerprints: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodString>>>;
|
|
3144
3142
|
target: z$1.ZodObject<{
|
|
3145
3143
|
mode: z$1.ZodEnum<{
|
|
3146
|
-
caseIds: "caseIds";
|
|
3147
3144
|
all: "all";
|
|
3148
3145
|
evalIds: "evalIds";
|
|
3146
|
+
caseIds: "caseIds";
|
|
3149
3147
|
}>;
|
|
3150
3148
|
evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
3151
3149
|
files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -3171,10 +3169,10 @@ declare const runSummarySchema$1: z$1.ZodObject<{
|
|
|
3171
3169
|
runId: z$1.ZodString;
|
|
3172
3170
|
status: z$1.ZodEnum<{
|
|
3173
3171
|
error: "error";
|
|
3174
|
-
running: "running";
|
|
3175
|
-
cancelled: "cancelled";
|
|
3176
3172
|
pending: "pending";
|
|
3173
|
+
running: "running";
|
|
3177
3174
|
completed: "completed";
|
|
3175
|
+
cancelled: "cancelled";
|
|
3178
3176
|
}>;
|
|
3179
3177
|
totalCases: z$1.ZodNumber;
|
|
3180
3178
|
passedCases: z$1.ZodNumber;
|
|
@@ -4186,8 +4184,8 @@ declare const cacheRecordingSchema: z$1.ZodObject<{
|
|
|
4186
4184
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4187
4185
|
error: "error";
|
|
4188
4186
|
running: "running";
|
|
4189
|
-
ok: "ok";
|
|
4190
4187
|
cancelled: "cancelled";
|
|
4188
|
+
ok: "ok";
|
|
4191
4189
|
}>>;
|
|
4192
4190
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
4193
4191
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4259,8 +4257,8 @@ declare const cacheEntrySchema: z$1.ZodObject<{
|
|
|
4259
4257
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4260
4258
|
error: "error";
|
|
4261
4259
|
running: "running";
|
|
4262
|
-
ok: "ok";
|
|
4263
4260
|
cancelled: "cancelled";
|
|
4261
|
+
ok: "ok";
|
|
4264
4262
|
}>>;
|
|
4265
4263
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
4266
4264
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4349,8 +4347,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
|
|
|
4349
4347
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4350
4348
|
error: "error";
|
|
4351
4349
|
running: "running";
|
|
4352
|
-
ok: "ok";
|
|
4353
4350
|
cancelled: "cancelled";
|
|
4351
|
+
ok: "ok";
|
|
4354
4352
|
}>>;
|
|
4355
4353
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
4356
4354
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4428,8 +4426,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
4428
4426
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4429
4427
|
error: "error";
|
|
4430
4428
|
running: "running";
|
|
4431
|
-
ok: "ok";
|
|
4432
4429
|
cancelled: "cancelled";
|
|
4430
|
+
ok: "ok";
|
|
4433
4431
|
}>>;
|
|
4434
4432
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
4435
4433
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4509,8 +4507,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
4509
4507
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4510
4508
|
error: "error";
|
|
4511
4509
|
running: "running";
|
|
4512
|
-
ok: "ok";
|
|
4513
4510
|
cancelled: "cancelled";
|
|
4511
|
+
ok: "ok";
|
|
4514
4512
|
}>>;
|
|
4515
4513
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
4516
4514
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4588,8 +4586,8 @@ declare const cacheFileSchema: z$1.ZodObject<{
|
|
|
4588
4586
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4589
4587
|
error: "error";
|
|
4590
4588
|
running: "running";
|
|
4591
|
-
ok: "ok";
|
|
4592
4589
|
cancelled: "cancelled";
|
|
4590
|
+
ok: "ok";
|
|
4593
4591
|
}>>;
|
|
4594
4592
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
4595
4593
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4677,8 +4675,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
|
|
|
4677
4675
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4678
4676
|
error: "error";
|
|
4679
4677
|
running: "running";
|
|
4680
|
-
ok: "ok";
|
|
4681
4678
|
cancelled: "cancelled";
|
|
4679
|
+
ok: "ok";
|
|
4682
4680
|
}>>;
|
|
4683
4681
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
4684
4682
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4834,9 +4832,9 @@ type ConfigReloadState = z$1.infer<typeof configReloadStateSchema$1>;
|
|
|
4834
4832
|
declare const createRunRequestSchema$1: z$1.ZodObject<{
|
|
4835
4833
|
target: z$1.ZodObject<{
|
|
4836
4834
|
mode: z$1.ZodEnum<{
|
|
4837
|
-
caseIds: "caseIds";
|
|
4838
4835
|
all: "all";
|
|
4839
4836
|
evalIds: "evalIds";
|
|
4837
|
+
caseIds: "caseIds";
|
|
4840
4838
|
}>;
|
|
4841
4839
|
evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
4842
4840
|
files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, Lt as getEvalRegistry, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync, U as repoFile, V as serializeCacheRecording, W as manualInputFileValueSchema, X as evalAssert, Z as evalLog, _t as extractLlmCalls, at as nextEvalId, ct as runInExistingEvalScope, dt as startEvalBackgroundJob, et as getEvalCaseInput, gt as extractApiCalls, ht as extractCacheHits, it as mergeEvalOutput, lt as setEvalOutput, mt as extractCacheEntries, nt as isInEvalScope, ot as runInEvalRuntimeScope, q as EvalAssertionError, st as runInEvalScope, tt as incrementEvalOutput, ut as setScopeCacheContext, vt as simulateLlmCallCost, xt as getNestedAttribute, yt as simulateTokenAllocation, z as deserializeCacheRecording } from "./runOrchestration-
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import { n as matchesEvalTags, t as defineEval } from "./src-
|
|
1
|
+
import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, Lt as getEvalRegistry, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync, U as repoFile, V as serializeCacheRecording, W as manualInputFileValueSchema, X as evalAssert, Z as evalLog, _t as extractLlmCalls, at as nextEvalId, ct as runInExistingEvalScope, dt as startEvalBackgroundJob, et as getEvalCaseInput, gt as extractApiCalls, ht as extractCacheHits, it as mergeEvalOutput, lt as setEvalOutput, mt as extractCacheEntries, nt as isInEvalScope, ot as runInEvalRuntimeScope, q as EvalAssertionError, st as runInEvalScope, tt as incrementEvalOutput, ut as setScopeCacheContext, vt as simulateLlmCallCost, xt as getNestedAttribute, yt as simulateTokenAllocation, z as deserializeCacheRecording } from "./runOrchestration-o38J7uZO.mjs";
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Cvs7tc2v.mjs";
|
|
3
|
+
import { n as matchesEvalTags, t as defineEval } from "./src-Jahivm6d.mjs";
|
|
4
4
|
export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as columnDefSchema, Mt as evalStatsConfigSchema, Nt as manualInputDescriptorSchema, Pt as evalChartsConfigSchema, T as parseEvalDiscovery, Y as configureEvalRunLogs, ft as createRunRequestSchema, h as persistRunState, j as createFsCacheStore, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-
|
|
1
|
+
import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as columnDefSchema, Mt as evalStatsConfigSchema, Nt as manualInputDescriptorSchema, Pt as evalChartsConfigSchema, T as parseEvalDiscovery, Y as configureEvalRunLogs, ft as createRunRequestSchema, h as persistRunState, j as createFsCacheStore, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-o38J7uZO.mjs";
|
|
2
2
|
import { z } from "zod/v4";
|
|
3
3
|
import { readFile } from "node:fs/promises";
|
|
4
4
|
import { relative } from "node:path";
|