@ls-stack/agent-eval 0.58.0 → 0.58.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-L9GdY28I.mjs → app-DhMIbjlE.mjs} +4 -4
- package/dist/bin.mjs +1 -1
- package/dist/caseChild.mjs +1 -1
- package/dist/{cli-Cf37PZKi.mjs → cli-_g2qOMK6.mjs} +4 -4
- package/dist/index.d.mts +60 -63
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +2 -2
- package/dist/{runExecution-C4kAOhC1.mjs → runExecution-d42Lm0i5.mjs} +63 -68
- package/dist/{runOrchestration-5xEiQxiS.mjs → runOrchestration-CvmFeOmT.mjs} +1 -1
- package/dist/{runner-JIykMlve.mjs → runner-BKogjiYd.mjs} +1 -1
- package/dist/{runner-bjd_UB9i.mjs → runner-MSr8sAWm.mjs} +2 -2
- package/dist/{src-303BocMW.mjs → src-CdZsOn6y.mjs} +2 -2
- package/package.json +3 -3
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { et as createRunRequestSchema, nt as extractCacheEntries, tt as updateManualScoreRequestSchema, ut as getEvalTitle } from "./runExecution-
|
|
2
|
-
import { o as stageManualInputFile } from "./cli-
|
|
3
|
-
import "./src-
|
|
4
|
-
import { t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { et as createRunRequestSchema, nt as extractCacheEntries, tt as updateManualScoreRequestSchema, ut as getEvalTitle } from "./runExecution-d42Lm0i5.mjs";
|
|
2
|
+
import { o as stageManualInputFile } from "./cli-_g2qOMK6.mjs";
|
|
3
|
+
import "./src-CdZsOn6y.mjs";
|
|
4
|
+
import { t as getRunnerInstance } from "./runner-MSr8sAWm.mjs";
|
|
5
5
|
import { z } from "zod/v4";
|
|
6
6
|
import { readFile } from "node:fs/promises";
|
|
7
7
|
import { dirname, isAbsolute, join, relative, resolve, sep } from "node:path";
|
package/dist/bin.mjs
CHANGED
package/dist/caseChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { I as configureEvalRunLogs, Pt as runWithEvalRegistry, St as resolveLlmCallsConfig, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as createFsCacheStore, xt as resolveApiCallsConfig } from "./runExecution-
|
|
1
|
+
import { I as configureEvalRunLogs, Pt as runWithEvalRegistry, St as resolveLlmCallsConfig, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as createFsCacheStore, xt as resolveApiCallsConfig } from "./runExecution-d42Lm0i5.mjs";
|
|
2
2
|
//#region ../runner/src/caseChild.ts
|
|
3
3
|
let fatalErrorReported = false;
|
|
4
4
|
let disconnectExpected = false;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { Ct as buildEvalKey, Nt as getEvalRegistry, St as resolveLlmCallsConfig, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as normalizeScoreDef, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as createFsCacheStore, wt as getCaseRowCaseKey, xt as resolveApiCallsConfig } from "./runExecution-
|
|
2
|
-
import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-
|
|
1
|
+
import { Ct as buildEvalKey, Nt as getEvalRegistry, St as resolveLlmCallsConfig, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as normalizeScoreDef, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as createFsCacheStore, wt as getCaseRowCaseKey, xt as resolveApiCallsConfig } from "./runExecution-d42Lm0i5.mjs";
|
|
2
|
+
import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-CvmFeOmT.mjs";
|
|
3
3
|
import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
4
4
|
import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
|
|
5
5
|
import { createHash, randomUUID } from "node:crypto";
|
|
@@ -2172,8 +2172,8 @@ async function commandApp(args) {
|
|
|
2172
2172
|
const { serve } = await import("@hono/node-server");
|
|
2173
2173
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
2174
2174
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
2175
|
-
const appModule = await import("./app-
|
|
2176
|
-
const runnerModule = await import("./runner-
|
|
2175
|
+
const appModule = await import("./app-DhMIbjlE.mjs");
|
|
2176
|
+
const runnerModule = await import("./runner-BKogjiYd.mjs");
|
|
2177
2177
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
2178
2178
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
2179
2179
|
await runnerModule.initRunner();
|
package/dist/index.d.mts
CHANGED
|
@@ -1455,7 +1455,9 @@ type CacheScopeContext = {
|
|
|
1455
1455
|
/** Active recording frame captured while a cached operation body executes. */
|
|
1456
1456
|
type CacheRecordingFrame = {
|
|
1457
1457
|
/** Length of `scope.spans` immediately before the cached body started. */baseSpanIndex: number; /** Parent id used when recording and replaying direct child spans. */
|
|
1458
|
-
replayParentSpanId: string | null; /**
|
|
1458
|
+
replayParentSpanId: string | null; /** Spans created by this cache body's async execution branch. */
|
|
1459
|
+
spanIds: Set<string>; /** Non-cache attributes written to the replay parent by this async branch. */
|
|
1460
|
+
finalAttributes: Record<string, unknown>; /** Ordered observable effects recorded during the cached body. */
|
|
1459
1461
|
ops: CacheRecordingOp$1[];
|
|
1460
1462
|
};
|
|
1461
1463
|
/** Mutable per-case runtime state stored in async local storage. */
|
|
@@ -1480,11 +1482,6 @@ type EvalCaseScope = {
|
|
|
1480
1482
|
logs: RunLogEntry$1[];
|
|
1481
1483
|
spans: EvalTraceSpan$2[];
|
|
1482
1484
|
checkpoints: Map<string, unknown>;
|
|
1483
|
-
/**
|
|
1484
|
-
* Stack of active cache recorders. Ops are written to the top-most frame
|
|
1485
|
-
* when it exists and `replayingDepth === 0`.
|
|
1486
|
-
*/
|
|
1487
|
-
recordingStack: CacheRecordingFrame[];
|
|
1488
1485
|
/**
|
|
1489
1486
|
* Incremented while replaying a cached operation, so nested SDK calls do not
|
|
1490
1487
|
* accidentally double-record ops into outer recorders.
|
|
@@ -2017,8 +2014,8 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
|
|
|
2017
2014
|
subtree: "subtree";
|
|
2018
2015
|
}>>;
|
|
2019
2016
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2020
|
-
all: "all";
|
|
2021
2017
|
sum: "sum";
|
|
2018
|
+
all: "all";
|
|
2022
2019
|
last: "last";
|
|
2023
2020
|
}>>;
|
|
2024
2021
|
}, z$1.core.$strip>;
|
|
@@ -2053,8 +2050,8 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
|
|
|
2053
2050
|
subtree: "subtree";
|
|
2054
2051
|
}>>;
|
|
2055
2052
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2056
|
-
all: "all";
|
|
2057
2053
|
sum: "sum";
|
|
2054
|
+
all: "all";
|
|
2058
2055
|
last: "last";
|
|
2059
2056
|
}>>;
|
|
2060
2057
|
}, z$1.core.$strip>>>;
|
|
@@ -2093,8 +2090,8 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
|
|
|
2093
2090
|
subtree: "subtree";
|
|
2094
2091
|
}>>;
|
|
2095
2092
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2096
|
-
all: "all";
|
|
2097
2093
|
sum: "sum";
|
|
2094
|
+
all: "all";
|
|
2098
2095
|
last: "last";
|
|
2099
2096
|
}>>;
|
|
2100
2097
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
@@ -2131,8 +2128,8 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
|
|
|
2131
2128
|
subtree: "subtree";
|
|
2132
2129
|
}>>;
|
|
2133
2130
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2134
|
-
all: "all";
|
|
2135
2131
|
sum: "sum";
|
|
2132
|
+
all: "all";
|
|
2136
2133
|
last: "last";
|
|
2137
2134
|
}>>;
|
|
2138
2135
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
@@ -2217,9 +2214,9 @@ type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
|
|
|
2217
2214
|
*/
|
|
2218
2215
|
declare const evalStatAggregateSchema: z$1.ZodEnum<{
|
|
2219
2216
|
avg: "avg";
|
|
2220
|
-
sum: "sum";
|
|
2221
2217
|
min: "min";
|
|
2222
2218
|
max: "max";
|
|
2219
|
+
sum: "sum";
|
|
2223
2220
|
best: "best";
|
|
2224
2221
|
worst: "worst";
|
|
2225
2222
|
}>;
|
|
@@ -2249,9 +2246,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2249
2246
|
kind: z$1.ZodLiteral<"duration">;
|
|
2250
2247
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2251
2248
|
avg: "avg";
|
|
2252
|
-
sum: "sum";
|
|
2253
2249
|
min: "min";
|
|
2254
2250
|
max: "max";
|
|
2251
|
+
sum: "sum";
|
|
2255
2252
|
best: "best";
|
|
2256
2253
|
worst: "worst";
|
|
2257
2254
|
}>>;
|
|
@@ -2260,9 +2257,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2260
2257
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
2261
2258
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2262
2259
|
avg: "avg";
|
|
2263
|
-
sum: "sum";
|
|
2264
2260
|
min: "min";
|
|
2265
2261
|
max: "max";
|
|
2262
|
+
sum: "sum";
|
|
2266
2263
|
best: "best";
|
|
2267
2264
|
worst: "worst";
|
|
2268
2265
|
}>>;
|
|
@@ -2273,9 +2270,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2273
2270
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2274
2271
|
aggregate: z$1.ZodEnum<{
|
|
2275
2272
|
avg: "avg";
|
|
2276
|
-
sum: "sum";
|
|
2277
2273
|
min: "min";
|
|
2278
2274
|
max: "max";
|
|
2275
|
+
sum: "sum";
|
|
2279
2276
|
best: "best";
|
|
2280
2277
|
worst: "worst";
|
|
2281
2278
|
}>;
|
|
@@ -2313,9 +2310,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
2313
2310
|
kind: z$1.ZodLiteral<"duration">;
|
|
2314
2311
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2315
2312
|
avg: "avg";
|
|
2316
|
-
sum: "sum";
|
|
2317
2313
|
min: "min";
|
|
2318
2314
|
max: "max";
|
|
2315
|
+
sum: "sum";
|
|
2319
2316
|
best: "best";
|
|
2320
2317
|
worst: "worst";
|
|
2321
2318
|
}>>;
|
|
@@ -2324,9 +2321,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
2324
2321
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
2325
2322
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2326
2323
|
avg: "avg";
|
|
2327
|
-
sum: "sum";
|
|
2328
2324
|
min: "min";
|
|
2329
2325
|
max: "max";
|
|
2326
|
+
sum: "sum";
|
|
2330
2327
|
best: "best";
|
|
2331
2328
|
worst: "worst";
|
|
2332
2329
|
}>>;
|
|
@@ -2337,9 +2334,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
2337
2334
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2338
2335
|
aggregate: z$1.ZodEnum<{
|
|
2339
2336
|
avg: "avg";
|
|
2340
|
-
sum: "sum";
|
|
2341
2337
|
min: "min";
|
|
2342
2338
|
max: "max";
|
|
2339
|
+
sum: "sum";
|
|
2343
2340
|
best: "best";
|
|
2344
2341
|
worst: "worst";
|
|
2345
2342
|
}>;
|
|
@@ -2422,10 +2419,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2422
2419
|
caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2423
2420
|
lastRunStatus: z$1.ZodNullable<z$1.ZodEnum<{
|
|
2424
2421
|
error: "error";
|
|
2425
|
-
running: "running";
|
|
2426
|
-
cancelled: "cancelled";
|
|
2427
2422
|
pass: "pass";
|
|
2428
2423
|
fail: "fail";
|
|
2424
|
+
running: "running";
|
|
2425
|
+
cancelled: "cancelled";
|
|
2429
2426
|
unscored: "unscored";
|
|
2430
2427
|
}>>;
|
|
2431
2428
|
stats: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
@@ -2440,9 +2437,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2440
2437
|
kind: z$1.ZodLiteral<"duration">;
|
|
2441
2438
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2442
2439
|
avg: "avg";
|
|
2443
|
-
sum: "sum";
|
|
2444
2440
|
min: "min";
|
|
2445
2441
|
max: "max";
|
|
2442
|
+
sum: "sum";
|
|
2446
2443
|
best: "best";
|
|
2447
2444
|
worst: "worst";
|
|
2448
2445
|
}>>;
|
|
@@ -2451,9 +2448,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2451
2448
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
2452
2449
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2453
2450
|
avg: "avg";
|
|
2454
|
-
sum: "sum";
|
|
2455
2451
|
min: "min";
|
|
2456
2452
|
max: "max";
|
|
2453
|
+
sum: "sum";
|
|
2457
2454
|
best: "best";
|
|
2458
2455
|
worst: "worst";
|
|
2459
2456
|
}>>;
|
|
@@ -2464,9 +2461,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2464
2461
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2465
2462
|
aggregate: z$1.ZodEnum<{
|
|
2466
2463
|
avg: "avg";
|
|
2467
|
-
sum: "sum";
|
|
2468
2464
|
min: "min";
|
|
2469
2465
|
max: "max";
|
|
2466
|
+
sum: "sum";
|
|
2470
2467
|
best: "best";
|
|
2471
2468
|
worst: "worst";
|
|
2472
2469
|
}>;
|
|
@@ -2491,9 +2488,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2491
2488
|
}, z$1.core.$strip>], "kind">>>;
|
|
2492
2489
|
defaultStatAggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2493
2490
|
avg: "avg";
|
|
2494
|
-
sum: "sum";
|
|
2495
2491
|
min: "min";
|
|
2496
2492
|
max: "max";
|
|
2493
|
+
sum: "sum";
|
|
2497
2494
|
best: "best";
|
|
2498
2495
|
worst: "worst";
|
|
2499
2496
|
}>>;
|
|
@@ -2530,9 +2527,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2530
2527
|
key: z$1.ZodString;
|
|
2531
2528
|
aggregate: z$1.ZodEnum<{
|
|
2532
2529
|
avg: "avg";
|
|
2533
|
-
sum: "sum";
|
|
2534
2530
|
min: "min";
|
|
2535
2531
|
max: "max";
|
|
2532
|
+
sum: "sum";
|
|
2536
2533
|
latest: "latest";
|
|
2537
2534
|
passThresholdRate: "passThresholdRate";
|
|
2538
2535
|
}>;
|
|
@@ -2572,9 +2569,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2572
2569
|
key: z$1.ZodString;
|
|
2573
2570
|
aggregate: z$1.ZodEnum<{
|
|
2574
2571
|
avg: "avg";
|
|
2575
|
-
sum: "sum";
|
|
2576
2572
|
min: "min";
|
|
2577
2573
|
max: "max";
|
|
2574
|
+
sum: "sum";
|
|
2578
2575
|
latest: "latest";
|
|
2579
2576
|
passThresholdRate: "passThresholdRate";
|
|
2580
2577
|
}>;
|
|
@@ -2671,11 +2668,11 @@ declare const caseRowSchema$1: z$1.ZodObject<{
|
|
|
2671
2668
|
tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2672
2669
|
status: z$1.ZodEnum<{
|
|
2673
2670
|
error: "error";
|
|
2674
|
-
pending: "pending";
|
|
2675
|
-
running: "running";
|
|
2676
|
-
cancelled: "cancelled";
|
|
2677
2671
|
pass: "pass";
|
|
2678
2672
|
fail: "fail";
|
|
2673
|
+
running: "running";
|
|
2674
|
+
cancelled: "cancelled";
|
|
2675
|
+
pending: "pending";
|
|
2679
2676
|
}>;
|
|
2680
2677
|
durationMs: z$1.ZodNullable<z$1.ZodNumber>;
|
|
2681
2678
|
cacheHits: z$1.ZodOptional<z$1.ZodNumber>;
|
|
@@ -2862,8 +2859,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
2862
2859
|
subtree: "subtree";
|
|
2863
2860
|
}>>;
|
|
2864
2861
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2865
|
-
all: "all";
|
|
2866
2862
|
sum: "sum";
|
|
2863
|
+
all: "all";
|
|
2867
2864
|
last: "last";
|
|
2868
2865
|
}>>;
|
|
2869
2866
|
}, z$1.core.$strip>>>;
|
|
@@ -2874,10 +2871,10 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
2874
2871
|
namespace: z$1.ZodString;
|
|
2875
2872
|
key: z$1.ZodString;
|
|
2876
2873
|
status: z$1.ZodEnum<{
|
|
2877
|
-
bypass: "bypass";
|
|
2878
|
-
refresh: "refresh";
|
|
2879
2874
|
hit: "hit";
|
|
2880
2875
|
miss: "miss";
|
|
2876
|
+
refresh: "refresh";
|
|
2877
|
+
bypass: "bypass";
|
|
2881
2878
|
}>;
|
|
2882
2879
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2883
2880
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -2896,11 +2893,11 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2896
2893
|
tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2897
2894
|
status: z$1.ZodEnum<{
|
|
2898
2895
|
error: "error";
|
|
2899
|
-
pending: "pending";
|
|
2900
|
-
running: "running";
|
|
2901
|
-
cancelled: "cancelled";
|
|
2902
2896
|
pass: "pass";
|
|
2903
2897
|
fail: "fail";
|
|
2898
|
+
running: "running";
|
|
2899
|
+
cancelled: "cancelled";
|
|
2900
|
+
pending: "pending";
|
|
2904
2901
|
}>;
|
|
2905
2902
|
input: z$1.ZodUnknown;
|
|
2906
2903
|
trace: z$1.ZodArray<z$1.ZodObject<{
|
|
@@ -2965,8 +2962,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2965
2962
|
subtree: "subtree";
|
|
2966
2963
|
}>>;
|
|
2967
2964
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2968
|
-
all: "all";
|
|
2969
2965
|
sum: "sum";
|
|
2966
|
+
all: "all";
|
|
2970
2967
|
last: "last";
|
|
2971
2968
|
}>>;
|
|
2972
2969
|
}, z$1.core.$strip>>>;
|
|
@@ -3034,8 +3031,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
3034
3031
|
subtree: "subtree";
|
|
3035
3032
|
}>>;
|
|
3036
3033
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3037
|
-
all: "all";
|
|
3038
3034
|
sum: "sum";
|
|
3035
|
+
all: "all";
|
|
3039
3036
|
last: "last";
|
|
3040
3037
|
}>>;
|
|
3041
3038
|
}, z$1.core.$strip>>>;
|
|
@@ -3046,10 +3043,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
3046
3043
|
namespace: z$1.ZodString;
|
|
3047
3044
|
key: z$1.ZodString;
|
|
3048
3045
|
status: z$1.ZodEnum<{
|
|
3049
|
-
bypass: "bypass";
|
|
3050
|
-
refresh: "refresh";
|
|
3051
3046
|
hit: "hit";
|
|
3052
3047
|
miss: "miss";
|
|
3048
|
+
refresh: "refresh";
|
|
3049
|
+
bypass: "bypass";
|
|
3053
3050
|
}>;
|
|
3054
3051
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
3055
3052
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -3166,10 +3163,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
3166
3163
|
namespace: z$1.ZodString;
|
|
3167
3164
|
key: z$1.ZodString;
|
|
3168
3165
|
status: z$1.ZodEnum<{
|
|
3169
|
-
bypass: "bypass";
|
|
3170
|
-
refresh: "refresh";
|
|
3171
3166
|
hit: "hit";
|
|
3172
3167
|
miss: "miss";
|
|
3168
|
+
refresh: "refresh";
|
|
3169
|
+
bypass: "bypass";
|
|
3173
3170
|
}>;
|
|
3174
3171
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
3175
3172
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -3223,9 +3220,9 @@ type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
|
|
|
3223
3220
|
/** Reducer applied to a numeric column across all cases of a single run. */
|
|
3224
3221
|
declare const evalChartAggregateSchema: z$1.ZodEnum<{
|
|
3225
3222
|
avg: "avg";
|
|
3226
|
-
sum: "sum";
|
|
3227
3223
|
min: "min";
|
|
3228
3224
|
max: "max";
|
|
3225
|
+
sum: "sum";
|
|
3229
3226
|
latest: "latest";
|
|
3230
3227
|
passThresholdRate: "passThresholdRate";
|
|
3231
3228
|
}>;
|
|
@@ -3281,9 +3278,9 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
3281
3278
|
key: z$1.ZodString;
|
|
3282
3279
|
aggregate: z$1.ZodEnum<{
|
|
3283
3280
|
avg: "avg";
|
|
3284
|
-
sum: "sum";
|
|
3285
3281
|
min: "min";
|
|
3286
3282
|
max: "max";
|
|
3283
|
+
sum: "sum";
|
|
3287
3284
|
latest: "latest";
|
|
3288
3285
|
passThresholdRate: "passThresholdRate";
|
|
3289
3286
|
}>;
|
|
@@ -3316,9 +3313,9 @@ declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObj
|
|
|
3316
3313
|
key: z$1.ZodString;
|
|
3317
3314
|
aggregate: z$1.ZodEnum<{
|
|
3318
3315
|
avg: "avg";
|
|
3319
|
-
sum: "sum";
|
|
3320
3316
|
min: "min";
|
|
3321
3317
|
max: "max";
|
|
3318
|
+
sum: "sum";
|
|
3322
3319
|
latest: "latest";
|
|
3323
3320
|
passThresholdRate: "passThresholdRate";
|
|
3324
3321
|
}>;
|
|
@@ -3364,9 +3361,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
3364
3361
|
key: z$1.ZodString;
|
|
3365
3362
|
aggregate: z$1.ZodEnum<{
|
|
3366
3363
|
avg: "avg";
|
|
3367
|
-
sum: "sum";
|
|
3368
3364
|
min: "min";
|
|
3369
3365
|
max: "max";
|
|
3366
|
+
sum: "sum";
|
|
3370
3367
|
latest: "latest";
|
|
3371
3368
|
passThresholdRate: "passThresholdRate";
|
|
3372
3369
|
}>;
|
|
@@ -3406,9 +3403,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
3406
3403
|
key: z$1.ZodString;
|
|
3407
3404
|
aggregate: z$1.ZodEnum<{
|
|
3408
3405
|
avg: "avg";
|
|
3409
|
-
sum: "sum";
|
|
3410
3406
|
min: "min";
|
|
3411
3407
|
max: "max";
|
|
3408
|
+
sum: "sum";
|
|
3412
3409
|
latest: "latest";
|
|
3413
3410
|
passThresholdRate: "passThresholdRate";
|
|
3414
3411
|
}>;
|
|
@@ -3454,9 +3451,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3454
3451
|
key: z$1.ZodString;
|
|
3455
3452
|
aggregate: z$1.ZodEnum<{
|
|
3456
3453
|
avg: "avg";
|
|
3457
|
-
sum: "sum";
|
|
3458
3454
|
min: "min";
|
|
3459
3455
|
max: "max";
|
|
3456
|
+
sum: "sum";
|
|
3460
3457
|
latest: "latest";
|
|
3461
3458
|
passThresholdRate: "passThresholdRate";
|
|
3462
3459
|
}>;
|
|
@@ -3496,9 +3493,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3496
3493
|
key: z$1.ZodString;
|
|
3497
3494
|
aggregate: z$1.ZodEnum<{
|
|
3498
3495
|
avg: "avg";
|
|
3499
|
-
sum: "sum";
|
|
3500
3496
|
min: "min";
|
|
3501
3497
|
max: "max";
|
|
3498
|
+
sum: "sum";
|
|
3502
3499
|
latest: "latest";
|
|
3503
3500
|
passThresholdRate: "passThresholdRate";
|
|
3504
3501
|
}>;
|
|
@@ -3514,10 +3511,10 @@ declare const runManifestSchema$1: z$1.ZodObject<{
|
|
|
3514
3511
|
shortId: z$1.ZodString;
|
|
3515
3512
|
status: z$1.ZodEnum<{
|
|
3516
3513
|
error: "error";
|
|
3517
|
-
pending: "pending";
|
|
3518
3514
|
running: "running";
|
|
3519
|
-
completed: "completed";
|
|
3520
3515
|
cancelled: "cancelled";
|
|
3516
|
+
pending: "pending";
|
|
3517
|
+
completed: "completed";
|
|
3521
3518
|
}>;
|
|
3522
3519
|
temporary: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodBoolean>>;
|
|
3523
3520
|
startedAt: z$1.ZodString;
|
|
@@ -3526,9 +3523,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
|
|
|
3526
3523
|
evalSourceFingerprints: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodString>>>;
|
|
3527
3524
|
target: z$1.ZodObject<{
|
|
3528
3525
|
mode: z$1.ZodEnum<{
|
|
3526
|
+
caseIds: "caseIds";
|
|
3529
3527
|
all: "all";
|
|
3530
3528
|
evalIds: "evalIds";
|
|
3531
|
-
caseIds: "caseIds";
|
|
3532
3529
|
}>;
|
|
3533
3530
|
evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
3534
3531
|
files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -3542,9 +3539,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
|
|
|
3542
3539
|
median: "median";
|
|
3543
3540
|
}>>>;
|
|
3544
3541
|
cacheMode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3545
|
-
use: "use";
|
|
3546
|
-
bypass: "bypass";
|
|
3547
3542
|
refresh: "refresh";
|
|
3543
|
+
bypass: "bypass";
|
|
3544
|
+
use: "use";
|
|
3548
3545
|
}>>;
|
|
3549
3546
|
}, z$1.core.$strip>;
|
|
3550
3547
|
/** Persisted lifecycle metadata for a single eval run. */
|
|
@@ -3554,10 +3551,10 @@ declare const runSummarySchema$1: z$1.ZodObject<{
|
|
|
3554
3551
|
runId: z$1.ZodString;
|
|
3555
3552
|
status: z$1.ZodEnum<{
|
|
3556
3553
|
error: "error";
|
|
3557
|
-
pending: "pending";
|
|
3558
3554
|
running: "running";
|
|
3559
|
-
completed: "completed";
|
|
3560
3555
|
cancelled: "cancelled";
|
|
3556
|
+
pending: "pending";
|
|
3557
|
+
completed: "completed";
|
|
3561
3558
|
}>;
|
|
3562
3559
|
totalCases: z$1.ZodNumber;
|
|
3563
3560
|
passedCases: z$1.ZodNumber;
|
|
@@ -4469,9 +4466,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
|
|
|
4469
4466
|
* - `refresh`: never read, always write (forces re-execution and overwrites).
|
|
4470
4467
|
*/
|
|
4471
4468
|
declare const cacheModeSchema: z$1.ZodEnum<{
|
|
4472
|
-
use: "use";
|
|
4473
|
-
bypass: "bypass";
|
|
4474
4469
|
refresh: "refresh";
|
|
4470
|
+
bypass: "bypass";
|
|
4471
|
+
use: "use";
|
|
4475
4472
|
}>;
|
|
4476
4473
|
/** Mode controlling how cached spans behave during a run. */
|
|
4477
4474
|
type CacheMode = z$1.infer<typeof cacheModeSchema>;
|
|
@@ -4492,10 +4489,10 @@ declare const cacheOperationTypeSchema: z$1.ZodEnum<{
|
|
|
4492
4489
|
type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
|
|
4493
4490
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
4494
4491
|
declare const cacheStatusSchema: z$1.ZodEnum<{
|
|
4495
|
-
bypass: "bypass";
|
|
4496
|
-
refresh: "refresh";
|
|
4497
4492
|
hit: "hit";
|
|
4498
4493
|
miss: "miss";
|
|
4494
|
+
refresh: "refresh";
|
|
4495
|
+
bypass: "bypass";
|
|
4499
4496
|
}>;
|
|
4500
4497
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
4501
4498
|
type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
|
|
@@ -4512,10 +4509,10 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
|
|
|
4512
4509
|
namespace: z$1.ZodString;
|
|
4513
4510
|
key: z$1.ZodString;
|
|
4514
4511
|
status: z$1.ZodEnum<{
|
|
4515
|
-
bypass: "bypass";
|
|
4516
|
-
refresh: "refresh";
|
|
4517
4512
|
hit: "hit";
|
|
4518
4513
|
miss: "miss";
|
|
4514
|
+
refresh: "refresh";
|
|
4515
|
+
bypass: "bypass";
|
|
4519
4516
|
}>;
|
|
4520
4517
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
4521
4518
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -5467,9 +5464,9 @@ type ConfigReloadState = z$1.infer<typeof configReloadStateSchema$1>;
|
|
|
5467
5464
|
declare const createRunRequestSchema$1: z$1.ZodObject<{
|
|
5468
5465
|
target: z$1.ZodObject<{
|
|
5469
5466
|
mode: z$1.ZodEnum<{
|
|
5467
|
+
caseIds: "caseIds";
|
|
5470
5468
|
all: "all";
|
|
5471
5469
|
evalIds: "evalIds";
|
|
5472
|
-
caseIds: "caseIds";
|
|
5473
5470
|
}>;
|
|
5474
5471
|
evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
5475
5472
|
files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -5481,9 +5478,9 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
|
|
|
5481
5478
|
temporary: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
5482
5479
|
cache: z$1.ZodOptional<z$1.ZodObject<{
|
|
5483
5480
|
mode: z$1.ZodDefault<z$1.ZodEnum<{
|
|
5484
|
-
use: "use";
|
|
5485
|
-
bypass: "bypass";
|
|
5486
5481
|
refresh: "refresh";
|
|
5482
|
+
bypass: "bypass";
|
|
5483
|
+
use: "use";
|
|
5487
5484
|
}>>;
|
|
5488
5485
|
}, z$1.core.$strip>>;
|
|
5489
5486
|
manualInputs: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import { n as matchesEvalTags, t as defineEval } from "./src-
|
|
1
|
+
import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-d42Lm0i5.mjs";
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-_g2qOMK6.mjs";
|
|
3
|
+
import { n as matchesEvalTags, t as defineEval } from "./src-CdZsOn6y.mjs";
|
|
4
4
|
export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-
|
|
2
|
-
import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-
|
|
1
|
+
import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-d42Lm0i5.mjs";
|
|
2
|
+
import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-CvmFeOmT.mjs";
|
|
3
3
|
import { z } from "zod/v4";
|
|
4
4
|
import { readFile } from "node:fs/promises";
|
|
5
5
|
import { relative } from "node:path";
|
|
@@ -2651,6 +2651,7 @@ const scopeStorage = new AsyncLocalStorage();
|
|
|
2651
2651
|
const runtimeScopeStorage = new AsyncLocalStorage();
|
|
2652
2652
|
const evalClockStorage = new AsyncLocalStorage();
|
|
2653
2653
|
const activeSpanStackStorage = new AsyncLocalStorage();
|
|
2654
|
+
const recordingStackStorage = new AsyncLocalStorage();
|
|
2654
2655
|
let activeEvalScopeCount = 0;
|
|
2655
2656
|
let activeEvalRuntimeScopeCount = 0;
|
|
2656
2657
|
let consoleCaptureEnabled = true;
|
|
@@ -2803,6 +2804,20 @@ async function runWithActiveSpan(span, fn) {
|
|
|
2803
2804
|
const currentStack = activeSpanStackStorage.getStore() ?? [];
|
|
2804
2805
|
return await activeSpanStackStorage.run([...currentStack, span], fn);
|
|
2805
2806
|
}
|
|
2807
|
+
/** Execute a callback with a cache recording frame scoped to this async branch. */
|
|
2808
|
+
async function runWithCacheRecordingFrame(frame, fn) {
|
|
2809
|
+
const currentStack = recordingStackStorage.getStore() ?? [];
|
|
2810
|
+
return await recordingStackStorage.run([...currentStack, frame], fn);
|
|
2811
|
+
}
|
|
2812
|
+
function getCurrentCacheRecordingFrame(scope) {
|
|
2813
|
+
if (scope.replayingDepth > 0) return void 0;
|
|
2814
|
+
return recordingStackStorage.getStore()?.at(-1);
|
|
2815
|
+
}
|
|
2816
|
+
/** Mark a span as created by the active cache recorder, when one exists. */
|
|
2817
|
+
function recordSpanForActiveCacheRecording(scope, spanId) {
|
|
2818
|
+
if (scope.replayingDepth > 0) return;
|
|
2819
|
+
for (const frame of recordingStackStorage.getStore() ?? []) frame.spanIds.add(spanId);
|
|
2820
|
+
}
|
|
2806
2821
|
/**
|
|
2807
2822
|
* Return the current eval runner phase for this async execution.
|
|
2808
2823
|
*
|
|
@@ -3110,7 +3125,6 @@ async function runInEvalScope(caseId, fn, options = {}) {
|
|
|
3110
3125
|
logs: [],
|
|
3111
3126
|
spans: [],
|
|
3112
3127
|
checkpoints: /* @__PURE__ */ new Map(),
|
|
3113
|
-
recordingStack: [],
|
|
3114
3128
|
replayingDepth: 0,
|
|
3115
3129
|
cacheContext: options.cacheContext,
|
|
3116
3130
|
caseCacheRefs: [],
|
|
@@ -3150,10 +3164,16 @@ function nextEvalId() {
|
|
|
3150
3164
|
scope.nextEvalIdCounter++;
|
|
3151
3165
|
return `${scope.idPrefix}-${scope.nextEvalIdCounter}`;
|
|
3152
3166
|
}
|
|
3153
|
-
function
|
|
3154
|
-
|
|
3155
|
-
|
|
3156
|
-
|
|
3167
|
+
function recordCacheRecordingOpIfActive(scope, op) {
|
|
3168
|
+
getCurrentCacheRecordingFrame(scope)?.ops.push(op);
|
|
3169
|
+
}
|
|
3170
|
+
function recordCacheRecordingAttributesIfActive(scope, span, attributes) {
|
|
3171
|
+
const frames = recordingStackStorage.getStore();
|
|
3172
|
+
if (scope.replayingDepth > 0 || frames === void 0) return;
|
|
3173
|
+
for (const [key, value] of Object.entries(attributes)) {
|
|
3174
|
+
if (key.startsWith("cache.")) continue;
|
|
3175
|
+
for (const frame of frames) if (span.id === frame.replayParentSpanId) frame.finalAttributes[key] = value;
|
|
3176
|
+
}
|
|
3157
3177
|
}
|
|
3158
3178
|
function normalizeEvalOutputOptions(options) {
|
|
3159
3179
|
if (options === void 0) return void 0;
|
|
@@ -3185,7 +3205,7 @@ function setEvalOutput(key, value, options = void 0) {
|
|
|
3185
3205
|
scope.outputs[key] = value;
|
|
3186
3206
|
const column = normalizeEvalOutputOptions(options);
|
|
3187
3207
|
if (column !== void 0) scope.outputColumnOverrides[key] = column;
|
|
3188
|
-
|
|
3208
|
+
recordCacheRecordingOpIfActive(scope, {
|
|
3189
3209
|
kind: "setOutput",
|
|
3190
3210
|
key,
|
|
3191
3211
|
value,
|
|
@@ -3205,7 +3225,7 @@ function appendToEvalOutput(key, value) {
|
|
|
3205
3225
|
if (existing === void 0) scope.outputs[key] = [value];
|
|
3206
3226
|
else if (Array.isArray(existing)) scope.outputs[key] = [...copyArray$1(existing), value];
|
|
3207
3227
|
else scope.outputs[key] = [existing, value];
|
|
3208
|
-
|
|
3228
|
+
recordCacheRecordingOpIfActive(scope, {
|
|
3209
3229
|
kind: "appendOutput",
|
|
3210
3230
|
key,
|
|
3211
3231
|
value
|
|
@@ -3223,7 +3243,7 @@ function mergeEvalOutput(key, patch) {
|
|
|
3223
3243
|
const existing = scope.outputs[key];
|
|
3224
3244
|
if (existing === void 0) {
|
|
3225
3245
|
scope.outputs[key] = { ...patch };
|
|
3226
|
-
|
|
3246
|
+
recordCacheRecordingOpIfActive(scope, {
|
|
3227
3247
|
kind: "mergeOutput",
|
|
3228
3248
|
key,
|
|
3229
3249
|
patch
|
|
@@ -3238,7 +3258,7 @@ function mergeEvalOutput(key, patch) {
|
|
|
3238
3258
|
...existing,
|
|
3239
3259
|
...patch
|
|
3240
3260
|
};
|
|
3241
|
-
|
|
3261
|
+
recordCacheRecordingOpIfActive(scope, {
|
|
3242
3262
|
kind: "mergeOutput",
|
|
3243
3263
|
key,
|
|
3244
3264
|
patch
|
|
@@ -3256,7 +3276,7 @@ function incrementEvalOutput(key, delta) {
|
|
|
3256
3276
|
const existing = scope.outputs[key];
|
|
3257
3277
|
if (existing === void 0) {
|
|
3258
3278
|
scope.outputs[key] = delta;
|
|
3259
|
-
|
|
3279
|
+
recordCacheRecordingOpIfActive(scope, {
|
|
3260
3280
|
kind: "incrementOutput",
|
|
3261
3281
|
key,
|
|
3262
3282
|
delta
|
|
@@ -3268,7 +3288,7 @@ function incrementEvalOutput(key, delta) {
|
|
|
3268
3288
|
return;
|
|
3269
3289
|
}
|
|
3270
3290
|
scope.outputs[key] = existing + delta;
|
|
3271
|
-
|
|
3291
|
+
recordCacheRecordingOpIfActive(scope, {
|
|
3272
3292
|
kind: "incrementOutput",
|
|
3273
3293
|
key,
|
|
3274
3294
|
delta
|
|
@@ -3675,10 +3695,6 @@ async function materializeExternalJsonValues(value, store) {
|
|
|
3675
3695
|
if (!isRecordLike$3(value)) return value;
|
|
3676
3696
|
return Object.fromEntries(await Promise.all(Object.entries(value).map(async ([key, entryValue]) => [key, await materializeExternalJsonValues(entryValue, store)])));
|
|
3677
3697
|
}
|
|
3678
|
-
/** Clone one value through the same serialization path used for cache data. */
|
|
3679
|
-
async function cloneCacheValue(value, options = void 0) {
|
|
3680
|
-
return deserializeCacheValue(await serializeCacheValue(value, options));
|
|
3681
|
-
}
|
|
3682
3698
|
function normalizeCacheSerializationOptions(options) {
|
|
3683
3699
|
return {
|
|
3684
3700
|
compress: options?.compress !== false,
|
|
@@ -4109,29 +4125,6 @@ function valueKind$1(value) {
|
|
|
4109
4125
|
function copyArray(value) {
|
|
4110
4126
|
return value.map((item) => item);
|
|
4111
4127
|
}
|
|
4112
|
-
function stripCacheAttributes(attributes) {
|
|
4113
|
-
if (!attributes) return {};
|
|
4114
|
-
const result = {};
|
|
4115
|
-
for (const [key, value] of Object.entries(attributes)) if (!key.startsWith("cache.")) result[key] = value;
|
|
4116
|
-
return result;
|
|
4117
|
-
}
|
|
4118
|
-
async function snapshotNonCacheAttributes(span) {
|
|
4119
|
-
const snapshot = await cloneCacheValue(stripCacheAttributes(span?.attributes));
|
|
4120
|
-
return isRecordLike$2(snapshot) ? snapshot : {};
|
|
4121
|
-
}
|
|
4122
|
-
function diffNonCacheAttributes(before, after) {
|
|
4123
|
-
const result = {};
|
|
4124
|
-
for (const [key, value] of Object.entries(after)) if (!cacheAttributeValuesEqual(before[key], value)) result[key] = value;
|
|
4125
|
-
return result;
|
|
4126
|
-
}
|
|
4127
|
-
function cacheAttributeValuesEqual(left, right) {
|
|
4128
|
-
if (Object.is(left, right)) return true;
|
|
4129
|
-
try {
|
|
4130
|
-
return JSON.stringify(left) === JSON.stringify(right);
|
|
4131
|
-
} catch {
|
|
4132
|
-
return false;
|
|
4133
|
-
}
|
|
4134
|
-
}
|
|
4135
4128
|
function appendCacheRef(span, ref) {
|
|
4136
4129
|
if (span === void 0) return;
|
|
4137
4130
|
const existing = span.attributes?.["cache.refs"];
|
|
@@ -4150,7 +4143,7 @@ function recordCacheRef(scope, span, ref) {
|
|
|
4150
4143
|
}
|
|
4151
4144
|
scope.caseCacheRefs.push(ref);
|
|
4152
4145
|
}
|
|
4153
|
-
function serializeSubSpanTree(scope, spanId) {
|
|
4146
|
+
function serializeSubSpanTree(scope, spanId, spanIds) {
|
|
4154
4147
|
const original = scope.spans.find((s) => s.id === spanId);
|
|
4155
4148
|
if (!original) return {
|
|
4156
4149
|
kind: "custom",
|
|
@@ -4163,7 +4156,7 @@ function serializeSubSpanTree(scope, spanId) {
|
|
|
4163
4156
|
warnings: void 0,
|
|
4164
4157
|
children: []
|
|
4165
4158
|
};
|
|
4166
|
-
const children = scope.spans.filter((s) => s.parentId === spanId).map((child) => serializeSubSpanTree(scope, child.id));
|
|
4159
|
+
const children = scope.spans.filter((s) => s.parentId === spanId && spanIds.has(s.id)).map((child) => serializeSubSpanTree(scope, child.id, spanIds));
|
|
4167
4160
|
return {
|
|
4168
4161
|
kind: original.kind,
|
|
4169
4162
|
name: original.name,
|
|
@@ -4179,9 +4172,9 @@ function serializeSubSpanTree(scope, spanId) {
|
|
|
4179
4172
|
function appendSubSpanOps(scope, frame) {
|
|
4180
4173
|
for (let i = frame.baseSpanIndex; i < scope.spans.length; i++) {
|
|
4181
4174
|
const candidate = scope.spans[i];
|
|
4182
|
-
if (candidate?.parentId === frame.replayParentSpanId) frame.ops.push({
|
|
4175
|
+
if (candidate?.parentId === frame.replayParentSpanId && frame.spanIds.has(candidate.id)) frame.ops.push({
|
|
4183
4176
|
kind: "subSpan",
|
|
4184
|
-
span: serializeSubSpanTree(scope, candidate.id)
|
|
4177
|
+
span: serializeSubSpanTree(scope, candidate.id, frame.spanIds)
|
|
4185
4178
|
});
|
|
4186
4179
|
}
|
|
4187
4180
|
}
|
|
@@ -4437,25 +4430,21 @@ function createTraceCache(generateSpanId) {
|
|
|
4437
4430
|
key: keyHash,
|
|
4438
4431
|
status: "bypass"
|
|
4439
4432
|
});
|
|
4440
|
-
const beforeAttributes = await snapshotNonCacheAttributes(activeSpan);
|
|
4441
4433
|
const frame = {
|
|
4442
4434
|
baseSpanIndex: scope.spans.length,
|
|
4443
4435
|
replayParentSpanId: activeSpan?.id ?? null,
|
|
4436
|
+
spanIds: /* @__PURE__ */ new Set(),
|
|
4437
|
+
finalAttributes: {},
|
|
4444
4438
|
ops: []
|
|
4445
4439
|
};
|
|
4446
|
-
|
|
4447
|
-
|
|
4448
|
-
|
|
4449
|
-
bodyResult = await fn();
|
|
4450
|
-
} finally {
|
|
4451
|
-
scope.recordingStack.pop();
|
|
4452
|
-
}
|
|
4440
|
+
const bodyResult = await runWithCacheRecordingFrame(frame, async () => {
|
|
4441
|
+
return await fn();
|
|
4442
|
+
});
|
|
4453
4443
|
appendSubSpanOps(scope, frame);
|
|
4454
4444
|
if (canStore) {
|
|
4455
|
-
const finalAttributes = diffNonCacheAttributes(beforeAttributes, await snapshotNonCacheAttributes(activeSpan));
|
|
4456
4445
|
const recording = {
|
|
4457
4446
|
returnValue: bodyResult,
|
|
4458
|
-
finalAttributes,
|
|
4447
|
+
finalAttributes: frame.finalAttributes,
|
|
4459
4448
|
ops: frame.ops
|
|
4460
4449
|
};
|
|
4461
4450
|
await cacheCtx.adapter.write({
|
|
@@ -4514,6 +4503,13 @@ function mergeSpanAttributes(span, attributes) {
|
|
|
4514
4503
|
...span.attributes,
|
|
4515
4504
|
...attributes
|
|
4516
4505
|
};
|
|
4506
|
+
const scope = getCurrentScope();
|
|
4507
|
+
if (scope !== void 0) recordCacheRecordingAttributesIfActive(scope, span, attributes);
|
|
4508
|
+
}
|
|
4509
|
+
function copyNonCacheAttributes(attributes) {
|
|
4510
|
+
const result = {};
|
|
4511
|
+
for (const [key, value] of Object.entries(attributes ?? {})) if (!key.startsWith("cache.")) result[key] = value;
|
|
4512
|
+
return result;
|
|
4517
4513
|
}
|
|
4518
4514
|
function isRecordLike$1(value) {
|
|
4519
4515
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -4688,6 +4684,7 @@ function startExternalSpan(info) {
|
|
|
4688
4684
|
status: "running",
|
|
4689
4685
|
attributes: info.attributes
|
|
4690
4686
|
});
|
|
4687
|
+
recordSpanForActiveCacheRecording(scope, id);
|
|
4691
4688
|
return createExternalSpanHandle(id);
|
|
4692
4689
|
}
|
|
4693
4690
|
function updateExternalSpan(info) {
|
|
@@ -4746,6 +4743,7 @@ function recordExternalSpan(info) {
|
|
|
4746
4743
|
warning: info.warning,
|
|
4747
4744
|
warnings: info.warnings
|
|
4748
4745
|
});
|
|
4746
|
+
recordSpanForActiveCacheRecording(scope, id);
|
|
4749
4747
|
return id;
|
|
4750
4748
|
}
|
|
4751
4749
|
/**
|
|
@@ -4831,6 +4829,7 @@ async function traceSpanInternal(info, fn) {
|
|
|
4831
4829
|
attributes: info.attributes
|
|
4832
4830
|
};
|
|
4833
4831
|
scope.spans.push(spanRecord);
|
|
4832
|
+
recordSpanForActiveCacheRecording(scope, id);
|
|
4834
4833
|
const activeSpan = createSpanHandle(spanRecord);
|
|
4835
4834
|
return await runWithActiveSpan(spanRecord, async () => {
|
|
4836
4835
|
try {
|
|
@@ -4880,21 +4879,19 @@ async function traceSpanInternal(info, fn) {
|
|
|
4880
4879
|
const frame = {
|
|
4881
4880
|
baseSpanIndex: scope.spans.length,
|
|
4882
4881
|
replayParentSpanId: id,
|
|
4882
|
+
spanIds: /* @__PURE__ */ new Set(),
|
|
4883
|
+
finalAttributes: copyNonCacheAttributes(spanRecord.attributes),
|
|
4883
4884
|
ops: []
|
|
4884
4885
|
};
|
|
4885
|
-
|
|
4886
|
-
|
|
4887
|
-
|
|
4888
|
-
bodyResult = await fn(activeSpan);
|
|
4889
|
-
} finally {
|
|
4890
|
-
scope.recordingStack.pop();
|
|
4891
|
-
}
|
|
4886
|
+
const bodyResult = await runWithCacheRecordingFrame(frame, async () => {
|
|
4887
|
+
return await fn(activeSpan);
|
|
4888
|
+
});
|
|
4892
4889
|
appendSubSpanOps(scope, frame);
|
|
4893
4890
|
finishSpanWithoutThrownError(spanRecord, realStartedAt);
|
|
4894
4891
|
if (canStore) {
|
|
4895
4892
|
const recording = {
|
|
4896
4893
|
returnValue: bodyResult,
|
|
4897
|
-
finalAttributes:
|
|
4894
|
+
finalAttributes: frame.finalAttributes,
|
|
4898
4895
|
finalStatus: spanRecord.status,
|
|
4899
4896
|
finalError: spanRecord.error,
|
|
4900
4897
|
finalErrors: spanRecord.errors,
|
|
@@ -4998,14 +4995,12 @@ const evalTracer = {
|
|
|
4998
4995
|
status: "ok",
|
|
4999
4996
|
attributes: { value: data }
|
|
5000
4997
|
});
|
|
5001
|
-
|
|
5002
|
-
|
|
5003
|
-
|
|
5004
|
-
|
|
5005
|
-
|
|
5006
|
-
|
|
5007
|
-
});
|
|
5008
|
-
}
|
|
4998
|
+
recordSpanForActiveCacheRecording(scope, id);
|
|
4999
|
+
recordCacheRecordingOpIfActive(scope, {
|
|
5000
|
+
kind: "checkpoint",
|
|
5001
|
+
name,
|
|
5002
|
+
data
|
|
5003
|
+
});
|
|
5009
5004
|
}
|
|
5010
5005
|
};
|
|
5011
5006
|
/** Build a queryable trace tree helper from a flat span list and checkpoints. */
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Et as caseRowSchema, Pt as runWithEvalRegistry, Tt as caseDetailSchema, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, g as commitPendingCacheWrites, gt as matchesTagsFilter, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, wt as getCaseRowCaseKey, yt as runManifestSchema } from "./runExecution-
|
|
1
|
+
import { Et as caseRowSchema, Pt as runWithEvalRegistry, Tt as caseDetailSchema, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, g as commitPendingCacheWrites, gt as matchesTagsFilter, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, wt as getCaseRowCaseKey, yt as runManifestSchema } from "./runExecution-d42Lm0i5.mjs";
|
|
2
2
|
import { readFile, readdir, rm, writeFile } from "node:fs/promises";
|
|
3
3
|
import { dirname, join } from "node:path";
|
|
4
4
|
import { existsSync } from "node:fs";
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-MSr8sAWm.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { n as createRunner } from "./cli-_g2qOMK6.mjs";
|
|
2
|
+
import "./src-CdZsOn6y.mjs";
|
|
3
3
|
//#region ../../apps/server/src/runner.ts
|
|
4
4
|
let runnerInstance = null;
|
|
5
5
|
function getRunnerInstance() {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { Mt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-
|
|
2
|
-
import "./cli-
|
|
1
|
+
import { Mt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-d42Lm0i5.mjs";
|
|
2
|
+
import "./cli-_g2qOMK6.mjs";
|
|
3
3
|
//#region src/index.ts
|
|
4
4
|
/** Register an eval definition with typed tag support. */
|
|
5
5
|
function defineEval(definition) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ls-stack/agent-eval",
|
|
3
|
-
"version": "0.58.
|
|
3
|
+
"version": "0.58.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"bin": {
|
|
6
6
|
"agent-evals": "./dist/bin.mjs"
|
|
@@ -33,8 +33,8 @@
|
|
|
33
33
|
"@types/node": "^24.7.2",
|
|
34
34
|
"typescript": "^5.9.2",
|
|
35
35
|
"@agent-evals/runner": "0.0.1",
|
|
36
|
-
"@agent-evals/
|
|
37
|
-
"@agent-evals/
|
|
36
|
+
"@agent-evals/shared": "0.0.1",
|
|
37
|
+
"@agent-evals/sdk": "0.0.1"
|
|
38
38
|
},
|
|
39
39
|
"scripts": {
|
|
40
40
|
"build": "pnpm --filter @agent-evals/web build && pnpm --filter @agent-evals/shared build && pnpm --filter @agent-evals/sdk build && pnpm --filter @agent-evals/runner build && tsdown --filter cli-js && tsdown --filter cli-types",
|