@ls-stack/agent-eval 0.60.4 → 0.61.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-gg10KvzS.mjs → app-Dm_9ZTVa.mjs} +4 -4
- package/dist/apps/web/dist/assets/index-CM_zUhl_.css +1 -0
- package/dist/apps/web/dist/assets/{index-CM6MDNqo.js → index-DxZsizjg.js} +76 -76
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/caseChild.mjs +1 -1
- package/dist/{cli-OLZIjQpx.mjs → cli-CPBIcMP-.mjs} +4 -4
- package/dist/index.d.mts +47 -38
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +2 -2
- package/dist/{runExecution-Bu9yfdUS.mjs → runExecution-D-CnSRYy.mjs} +17 -1
- package/dist/{runOrchestration-mpgZmEZ6.mjs → runOrchestration-Basvyp4u.mjs} +1 -1
- package/dist/{runner-C4Y0lWb1.mjs → runner-B6UT1K7L.mjs} +1 -1
- package/dist/{runner-SxtKn-Xh.mjs → runner-DwNb5TCb.mjs} +2 -2
- package/dist/{src-Cy3OxoZW.mjs → src-SixIk0b7.mjs} +2 -2
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +76 -432
- package/dist/apps/web/dist/assets/index-CqWfzcFb.css +0 -1
|
@@ -25,8 +25,8 @@
|
|
|
25
25
|
href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
|
|
26
26
|
rel="stylesheet"
|
|
27
27
|
/>
|
|
28
|
-
<script type="module" crossorigin src="/assets/index-
|
|
29
|
-
<link rel="stylesheet" crossorigin href="/assets/index-
|
|
28
|
+
<script type="module" crossorigin src="/assets/index-DxZsizjg.js"></script>
|
|
29
|
+
<link rel="stylesheet" crossorigin href="/assets/index-CM_zUhl_.css">
|
|
30
30
|
</head>
|
|
31
31
|
<body>
|
|
32
32
|
<div id="root"></div>
|
package/dist/bin.mjs
CHANGED
package/dist/caseChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Ft as runWithEvalRegistry, I as configureEvalRunLogs, St as resolveLlmCallsConfig, _ as createFsCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, g as createBufferedCacheStore, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as getCacheRetentionOptions, xt as resolveApiCallsConfig } from "./runExecution-
|
|
1
|
+
import { Ft as runWithEvalRegistry, I as configureEvalRunLogs, St as resolveLlmCallsConfig, _ as createFsCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, g as createBufferedCacheStore, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as getCacheRetentionOptions, xt as resolveApiCallsConfig } from "./runExecution-D-CnSRYy.mjs";
|
|
2
2
|
//#region ../runner/src/caseChild.ts
|
|
3
3
|
let fatalErrorReported = false;
|
|
4
4
|
let disconnectExpected = false;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { Dt as caseRowSchema, Pt as getEvalRegistry, St as resolveLlmCallsConfig, Tt as getCaseRowCaseKey, _ as createFsCacheStore, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as getCacheRetentionOptions, wt as buildEvalKey, xt as resolveApiCallsConfig } from "./runExecution-
|
|
2
|
-
import { C as parseEvalDiscovery, S as loadIsolatedEvalRegistry, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as parseManualInputValues, c as getLatestRunInfos, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, l as loadPersistedCaseDetail, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as generateRunId, p as persistCaseDetail, s as getLastRunStatuses, u as loadPersistedRunSnapshot, v as runTouchesEval, w as validateCharts, x as deriveEvalFreshness, y as buildManualInputDescriptor } from "./runOrchestration-
|
|
1
|
+
import { Dt as caseRowSchema, Pt as getEvalRegistry, St as resolveLlmCallsConfig, Tt as getCaseRowCaseKey, _ as createFsCacheStore, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as getCacheRetentionOptions, wt as buildEvalKey, xt as resolveApiCallsConfig } from "./runExecution-D-CnSRYy.mjs";
|
|
2
|
+
import { C as parseEvalDiscovery, S as loadIsolatedEvalRegistry, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as parseManualInputValues, c as getLatestRunInfos, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, l as loadPersistedCaseDetail, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as generateRunId, p as persistCaseDetail, s as getLastRunStatuses, u as loadPersistedRunSnapshot, v as runTouchesEval, w as validateCharts, x as deriveEvalFreshness, y as buildManualInputDescriptor } from "./runOrchestration-Basvyp4u.mjs";
|
|
3
3
|
import { parseEnv } from "node:util";
|
|
4
4
|
import { resultify } from "t-result";
|
|
5
5
|
import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
@@ -2243,8 +2243,8 @@ async function commandApp(args) {
|
|
|
2243
2243
|
const { serve } = await import("@hono/node-server");
|
|
2244
2244
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
2245
2245
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
2246
|
-
const appModule = await import("./app-
|
|
2247
|
-
const runnerModule = await import("./runner-
|
|
2246
|
+
const appModule = await import("./app-Dm_9ZTVa.mjs");
|
|
2247
|
+
const runnerModule = await import("./runner-B6UT1K7L.mjs");
|
|
2248
2248
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
2249
2249
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
2250
2250
|
await runnerModule.initRunner({ loadEnv: args.loadEnv });
|
package/dist/index.d.mts
CHANGED
|
@@ -2061,9 +2061,9 @@ declare const traceAttributeDisplaySchema: z.ZodObject<{
|
|
|
2061
2061
|
subtree: "subtree";
|
|
2062
2062
|
}>>;
|
|
2063
2063
|
mode: z.ZodOptional<z.ZodEnum<{
|
|
2064
|
-
sum: "sum";
|
|
2065
2064
|
all: "all";
|
|
2066
2065
|
last: "last";
|
|
2066
|
+
sum: "sum";
|
|
2067
2067
|
}>>;
|
|
2068
2068
|
}, z.core.$strip>;
|
|
2069
2069
|
/**
|
|
@@ -2097,9 +2097,9 @@ declare const traceDisplayConfigSchema: z.ZodObject<{
|
|
|
2097
2097
|
subtree: "subtree";
|
|
2098
2098
|
}>>;
|
|
2099
2099
|
mode: z.ZodOptional<z.ZodEnum<{
|
|
2100
|
-
sum: "sum";
|
|
2101
2100
|
all: "all";
|
|
2102
2101
|
last: "last";
|
|
2102
|
+
sum: "sum";
|
|
2103
2103
|
}>>;
|
|
2104
2104
|
}, z.core.$strip>>>;
|
|
2105
2105
|
}, z.core.$strip>;
|
|
@@ -2137,9 +2137,9 @@ declare const traceAttributeDisplayInputSchema: z.ZodObject<{
|
|
|
2137
2137
|
subtree: "subtree";
|
|
2138
2138
|
}>>;
|
|
2139
2139
|
mode: z.ZodOptional<z.ZodEnum<{
|
|
2140
|
-
sum: "sum";
|
|
2141
2140
|
all: "all";
|
|
2142
2141
|
last: "last";
|
|
2142
|
+
sum: "sum";
|
|
2143
2143
|
}>>;
|
|
2144
2144
|
transform: z.ZodOptional<z.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
2145
2145
|
}, z.core.$strip>;
|
|
@@ -2175,9 +2175,9 @@ declare const traceDisplayInputConfigSchema: z.ZodObject<{
|
|
|
2175
2175
|
subtree: "subtree";
|
|
2176
2176
|
}>>;
|
|
2177
2177
|
mode: z.ZodOptional<z.ZodEnum<{
|
|
2178
|
-
sum: "sum";
|
|
2179
2178
|
all: "all";
|
|
2180
2179
|
last: "last";
|
|
2180
|
+
sum: "sum";
|
|
2181
2181
|
}>>;
|
|
2182
2182
|
transform: z.ZodOptional<z.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
2183
2183
|
}, z.core.$strip>>>;
|
|
@@ -2260,10 +2260,10 @@ type EvalFreshnessStatus = z.infer<typeof evalFreshnessStatusSchema>;
|
|
|
2260
2260
|
* `best` selects the highest finite value and `worst` selects the lowest.
|
|
2261
2261
|
*/
|
|
2262
2262
|
declare const evalStatAggregateSchema: z.ZodEnum<{
|
|
2263
|
+
sum: "sum";
|
|
2263
2264
|
avg: "avg";
|
|
2264
2265
|
min: "min";
|
|
2265
2266
|
max: "max";
|
|
2266
|
-
sum: "sum";
|
|
2267
2267
|
best: "best";
|
|
2268
2268
|
worst: "worst";
|
|
2269
2269
|
}>;
|
|
@@ -2292,10 +2292,10 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
2292
2292
|
hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
|
|
2293
2293
|
kind: z.ZodLiteral<"duration">;
|
|
2294
2294
|
aggregate: z.ZodOptional<z.ZodEnum<{
|
|
2295
|
+
sum: "sum";
|
|
2295
2296
|
avg: "avg";
|
|
2296
2297
|
min: "min";
|
|
2297
2298
|
max: "max";
|
|
2298
|
-
sum: "sum";
|
|
2299
2299
|
best: "best";
|
|
2300
2300
|
worst: "worst";
|
|
2301
2301
|
}>>;
|
|
@@ -2303,10 +2303,10 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
2303
2303
|
hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
|
|
2304
2304
|
kind: z.ZodLiteral<"cacheHits">;
|
|
2305
2305
|
aggregate: z.ZodOptional<z.ZodEnum<{
|
|
2306
|
+
sum: "sum";
|
|
2306
2307
|
avg: "avg";
|
|
2307
2308
|
min: "min";
|
|
2308
2309
|
max: "max";
|
|
2309
|
-
sum: "sum";
|
|
2310
2310
|
best: "best";
|
|
2311
2311
|
worst: "worst";
|
|
2312
2312
|
}>>;
|
|
@@ -2316,10 +2316,10 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
2316
2316
|
key: z.ZodString;
|
|
2317
2317
|
label: z.ZodOptional<z.ZodString>;
|
|
2318
2318
|
aggregate: z.ZodEnum<{
|
|
2319
|
+
sum: "sum";
|
|
2319
2320
|
avg: "avg";
|
|
2320
2321
|
min: "min";
|
|
2321
2322
|
max: "max";
|
|
2322
|
-
sum: "sum";
|
|
2323
2323
|
best: "best";
|
|
2324
2324
|
worst: "worst";
|
|
2325
2325
|
}>;
|
|
@@ -2356,10 +2356,10 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
|
|
|
2356
2356
|
hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
|
|
2357
2357
|
kind: z.ZodLiteral<"duration">;
|
|
2358
2358
|
aggregate: z.ZodOptional<z.ZodEnum<{
|
|
2359
|
+
sum: "sum";
|
|
2359
2360
|
avg: "avg";
|
|
2360
2361
|
min: "min";
|
|
2361
2362
|
max: "max";
|
|
2362
|
-
sum: "sum";
|
|
2363
2363
|
best: "best";
|
|
2364
2364
|
worst: "worst";
|
|
2365
2365
|
}>>;
|
|
@@ -2367,10 +2367,10 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
|
|
|
2367
2367
|
hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
|
|
2368
2368
|
kind: z.ZodLiteral<"cacheHits">;
|
|
2369
2369
|
aggregate: z.ZodOptional<z.ZodEnum<{
|
|
2370
|
+
sum: "sum";
|
|
2370
2371
|
avg: "avg";
|
|
2371
2372
|
min: "min";
|
|
2372
2373
|
max: "max";
|
|
2373
|
-
sum: "sum";
|
|
2374
2374
|
best: "best";
|
|
2375
2375
|
worst: "worst";
|
|
2376
2376
|
}>>;
|
|
@@ -2380,10 +2380,10 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
|
|
|
2380
2380
|
key: z.ZodString;
|
|
2381
2381
|
label: z.ZodOptional<z.ZodString>;
|
|
2382
2382
|
aggregate: z.ZodEnum<{
|
|
2383
|
+
sum: "sum";
|
|
2383
2384
|
avg: "avg";
|
|
2384
2385
|
min: "min";
|
|
2385
2386
|
max: "max";
|
|
2386
|
-
sum: "sum";
|
|
2387
2387
|
best: "best";
|
|
2388
2388
|
worst: "worst";
|
|
2389
2389
|
}>;
|
|
@@ -2483,10 +2483,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2483
2483
|
hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
|
|
2484
2484
|
kind: z.ZodLiteral<"duration">;
|
|
2485
2485
|
aggregate: z.ZodOptional<z.ZodEnum<{
|
|
2486
|
+
sum: "sum";
|
|
2486
2487
|
avg: "avg";
|
|
2487
2488
|
min: "min";
|
|
2488
2489
|
max: "max";
|
|
2489
|
-
sum: "sum";
|
|
2490
2490
|
best: "best";
|
|
2491
2491
|
worst: "worst";
|
|
2492
2492
|
}>>;
|
|
@@ -2494,10 +2494,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2494
2494
|
hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
|
|
2495
2495
|
kind: z.ZodLiteral<"cacheHits">;
|
|
2496
2496
|
aggregate: z.ZodOptional<z.ZodEnum<{
|
|
2497
|
+
sum: "sum";
|
|
2497
2498
|
avg: "avg";
|
|
2498
2499
|
min: "min";
|
|
2499
2500
|
max: "max";
|
|
2500
|
-
sum: "sum";
|
|
2501
2501
|
best: "best";
|
|
2502
2502
|
worst: "worst";
|
|
2503
2503
|
}>>;
|
|
@@ -2507,10 +2507,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2507
2507
|
key: z.ZodString;
|
|
2508
2508
|
label: z.ZodOptional<z.ZodString>;
|
|
2509
2509
|
aggregate: z.ZodEnum<{
|
|
2510
|
+
sum: "sum";
|
|
2510
2511
|
avg: "avg";
|
|
2511
2512
|
min: "min";
|
|
2512
2513
|
max: "max";
|
|
2513
|
-
sum: "sum";
|
|
2514
2514
|
best: "best";
|
|
2515
2515
|
worst: "worst";
|
|
2516
2516
|
}>;
|
|
@@ -2534,10 +2534,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2534
2534
|
accent: z.ZodOptional<z.ZodBoolean>;
|
|
2535
2535
|
}, z.core.$strip>], "kind">>>;
|
|
2536
2536
|
defaultStatAggregate: z.ZodOptional<z.ZodEnum<{
|
|
2537
|
+
sum: "sum";
|
|
2537
2538
|
avg: "avg";
|
|
2538
2539
|
min: "min";
|
|
2539
2540
|
max: "max";
|
|
2540
|
-
sum: "sum";
|
|
2541
2541
|
best: "best";
|
|
2542
2542
|
worst: "worst";
|
|
2543
2543
|
}>>;
|
|
@@ -2560,9 +2560,9 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2560
2560
|
color: z.ZodOptional<z.ZodEnum<{
|
|
2561
2561
|
error: "error";
|
|
2562
2562
|
success: "success";
|
|
2563
|
+
warning: "warning";
|
|
2563
2564
|
accent: "accent";
|
|
2564
2565
|
accentDim: "accentDim";
|
|
2565
|
-
warning: "warning";
|
|
2566
2566
|
textMuted: "textMuted";
|
|
2567
2567
|
}>>;
|
|
2568
2568
|
axis: z.ZodOptional<z.ZodEnum<{
|
|
@@ -2573,10 +2573,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2573
2573
|
source: z.ZodLiteral<"column">;
|
|
2574
2574
|
key: z.ZodString;
|
|
2575
2575
|
aggregate: z.ZodEnum<{
|
|
2576
|
+
sum: "sum";
|
|
2576
2577
|
avg: "avg";
|
|
2577
2578
|
min: "min";
|
|
2578
2579
|
max: "max";
|
|
2579
|
-
sum: "sum";
|
|
2580
2580
|
latest: "latest";
|
|
2581
2581
|
passThresholdRate: "passThresholdRate";
|
|
2582
2582
|
}>;
|
|
@@ -2584,9 +2584,9 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2584
2584
|
color: z.ZodOptional<z.ZodEnum<{
|
|
2585
2585
|
error: "error";
|
|
2586
2586
|
success: "success";
|
|
2587
|
+
warning: "warning";
|
|
2587
2588
|
accent: "accent";
|
|
2588
2589
|
accentDim: "accentDim";
|
|
2589
|
-
warning: "warning";
|
|
2590
2590
|
textMuted: "textMuted";
|
|
2591
2591
|
}>>;
|
|
2592
2592
|
axis: z.ZodOptional<z.ZodEnum<{
|
|
@@ -2615,10 +2615,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2615
2615
|
source: z.ZodLiteral<"column">;
|
|
2616
2616
|
key: z.ZodString;
|
|
2617
2617
|
aggregate: z.ZodEnum<{
|
|
2618
|
+
sum: "sum";
|
|
2618
2619
|
avg: "avg";
|
|
2619
2620
|
min: "min";
|
|
2620
2621
|
max: "max";
|
|
2621
|
-
sum: "sum";
|
|
2622
2622
|
latest: "latest";
|
|
2623
2623
|
passThresholdRate: "passThresholdRate";
|
|
2624
2624
|
}>;
|
|
@@ -2908,9 +2908,9 @@ declare const scoreTraceSchema: z.ZodObject<{
|
|
|
2908
2908
|
subtree: "subtree";
|
|
2909
2909
|
}>>;
|
|
2910
2910
|
mode: z.ZodOptional<z.ZodEnum<{
|
|
2911
|
-
sum: "sum";
|
|
2912
2911
|
all: "all";
|
|
2913
2912
|
last: "last";
|
|
2913
|
+
sum: "sum";
|
|
2914
2914
|
}>>;
|
|
2915
2915
|
}, z.core.$strip>>>;
|
|
2916
2916
|
}, z.core.$strip>;
|
|
@@ -3011,9 +3011,9 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3011
3011
|
subtree: "subtree";
|
|
3012
3012
|
}>>;
|
|
3013
3013
|
mode: z.ZodOptional<z.ZodEnum<{
|
|
3014
|
-
sum: "sum";
|
|
3015
3014
|
all: "all";
|
|
3016
3015
|
last: "last";
|
|
3016
|
+
sum: "sum";
|
|
3017
3017
|
}>>;
|
|
3018
3018
|
}, z.core.$strip>>>;
|
|
3019
3019
|
}, z.core.$strip>;
|
|
@@ -3080,9 +3080,9 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3080
3080
|
subtree: "subtree";
|
|
3081
3081
|
}>>;
|
|
3082
3082
|
mode: z.ZodOptional<z.ZodEnum<{
|
|
3083
|
-
sum: "sum";
|
|
3084
3083
|
all: "all";
|
|
3085
3084
|
last: "last";
|
|
3085
|
+
sum: "sum";
|
|
3086
3086
|
}>>;
|
|
3087
3087
|
}, z.core.$strip>>>;
|
|
3088
3088
|
}, z.core.$strip>;
|
|
@@ -3269,10 +3269,10 @@ declare const evalChartBuiltinMetricSchema: z.ZodEnum<{
|
|
|
3269
3269
|
type EvalChartBuiltinMetric = z.infer<typeof evalChartBuiltinMetricSchema>;
|
|
3270
3270
|
/** Reducer applied to a numeric column across all cases of a single run. */
|
|
3271
3271
|
declare const evalChartAggregateSchema: z.ZodEnum<{
|
|
3272
|
+
sum: "sum";
|
|
3272
3273
|
avg: "avg";
|
|
3273
3274
|
min: "min";
|
|
3274
3275
|
max: "max";
|
|
3275
|
-
sum: "sum";
|
|
3276
3276
|
latest: "latest";
|
|
3277
3277
|
passThresholdRate: "passThresholdRate";
|
|
3278
3278
|
}>;
|
|
@@ -3285,9 +3285,9 @@ type EvalChartAggregate = z.infer<typeof evalChartAggregateSchema>;
|
|
|
3285
3285
|
declare const evalChartColorSchema: z.ZodEnum<{
|
|
3286
3286
|
error: "error";
|
|
3287
3287
|
success: "success";
|
|
3288
|
+
warning: "warning";
|
|
3288
3289
|
accent: "accent";
|
|
3289
3290
|
accentDim: "accentDim";
|
|
3290
|
-
warning: "warning";
|
|
3291
3291
|
textMuted: "textMuted";
|
|
3292
3292
|
}>;
|
|
3293
3293
|
/** Semantic color token resolved to a theme color by the web UI. */
|
|
@@ -3314,9 +3314,9 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
3314
3314
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3315
3315
|
error: "error";
|
|
3316
3316
|
success: "success";
|
|
3317
|
+
warning: "warning";
|
|
3317
3318
|
accent: "accent";
|
|
3318
3319
|
accentDim: "accentDim";
|
|
3319
|
-
warning: "warning";
|
|
3320
3320
|
textMuted: "textMuted";
|
|
3321
3321
|
}>>;
|
|
3322
3322
|
axis: z.ZodOptional<z.ZodEnum<{
|
|
@@ -3327,10 +3327,10 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
3327
3327
|
source: z.ZodLiteral<"column">;
|
|
3328
3328
|
key: z.ZodString;
|
|
3329
3329
|
aggregate: z.ZodEnum<{
|
|
3330
|
+
sum: "sum";
|
|
3330
3331
|
avg: "avg";
|
|
3331
3332
|
min: "min";
|
|
3332
3333
|
max: "max";
|
|
3333
|
-
sum: "sum";
|
|
3334
3334
|
latest: "latest";
|
|
3335
3335
|
passThresholdRate: "passThresholdRate";
|
|
3336
3336
|
}>;
|
|
@@ -3338,9 +3338,9 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
3338
3338
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3339
3339
|
error: "error";
|
|
3340
3340
|
success: "success";
|
|
3341
|
+
warning: "warning";
|
|
3341
3342
|
accent: "accent";
|
|
3342
3343
|
accentDim: "accentDim";
|
|
3343
|
-
warning: "warning";
|
|
3344
3344
|
textMuted: "textMuted";
|
|
3345
3345
|
}>>;
|
|
3346
3346
|
axis: z.ZodOptional<z.ZodEnum<{
|
|
@@ -3362,10 +3362,10 @@ declare const evalChartTooltipExtraSchema: z.ZodDiscriminatedUnion<[z.ZodObject<
|
|
|
3362
3362
|
source: z.ZodLiteral<"column">;
|
|
3363
3363
|
key: z.ZodString;
|
|
3364
3364
|
aggregate: z.ZodEnum<{
|
|
3365
|
+
sum: "sum";
|
|
3365
3366
|
avg: "avg";
|
|
3366
3367
|
min: "min";
|
|
3367
3368
|
max: "max";
|
|
3368
|
-
sum: "sum";
|
|
3369
3369
|
latest: "latest";
|
|
3370
3370
|
passThresholdRate: "passThresholdRate";
|
|
3371
3371
|
}>;
|
|
@@ -3397,9 +3397,9 @@ declare const evalChartConfigSchema: z.ZodObject<{
|
|
|
3397
3397
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3398
3398
|
error: "error";
|
|
3399
3399
|
success: "success";
|
|
3400
|
+
warning: "warning";
|
|
3400
3401
|
accent: "accent";
|
|
3401
3402
|
accentDim: "accentDim";
|
|
3402
|
-
warning: "warning";
|
|
3403
3403
|
textMuted: "textMuted";
|
|
3404
3404
|
}>>;
|
|
3405
3405
|
axis: z.ZodOptional<z.ZodEnum<{
|
|
@@ -3410,10 +3410,10 @@ declare const evalChartConfigSchema: z.ZodObject<{
|
|
|
3410
3410
|
source: z.ZodLiteral<"column">;
|
|
3411
3411
|
key: z.ZodString;
|
|
3412
3412
|
aggregate: z.ZodEnum<{
|
|
3413
|
+
sum: "sum";
|
|
3413
3414
|
avg: "avg";
|
|
3414
3415
|
min: "min";
|
|
3415
3416
|
max: "max";
|
|
3416
|
-
sum: "sum";
|
|
3417
3417
|
latest: "latest";
|
|
3418
3418
|
passThresholdRate: "passThresholdRate";
|
|
3419
3419
|
}>;
|
|
@@ -3421,9 +3421,9 @@ declare const evalChartConfigSchema: z.ZodObject<{
|
|
|
3421
3421
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3422
3422
|
error: "error";
|
|
3423
3423
|
success: "success";
|
|
3424
|
+
warning: "warning";
|
|
3424
3425
|
accent: "accent";
|
|
3425
3426
|
accentDim: "accentDim";
|
|
3426
|
-
warning: "warning";
|
|
3427
3427
|
textMuted: "textMuted";
|
|
3428
3428
|
}>>;
|
|
3429
3429
|
axis: z.ZodOptional<z.ZodEnum<{
|
|
@@ -3452,10 +3452,10 @@ declare const evalChartConfigSchema: z.ZodObject<{
|
|
|
3452
3452
|
source: z.ZodLiteral<"column">;
|
|
3453
3453
|
key: z.ZodString;
|
|
3454
3454
|
aggregate: z.ZodEnum<{
|
|
3455
|
+
sum: "sum";
|
|
3455
3456
|
avg: "avg";
|
|
3456
3457
|
min: "min";
|
|
3457
3458
|
max: "max";
|
|
3458
|
-
sum: "sum";
|
|
3459
3459
|
latest: "latest";
|
|
3460
3460
|
passThresholdRate: "passThresholdRate";
|
|
3461
3461
|
}>;
|
|
@@ -3487,9 +3487,9 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
|
|
|
3487
3487
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3488
3488
|
error: "error";
|
|
3489
3489
|
success: "success";
|
|
3490
|
+
warning: "warning";
|
|
3490
3491
|
accent: "accent";
|
|
3491
3492
|
accentDim: "accentDim";
|
|
3492
|
-
warning: "warning";
|
|
3493
3493
|
textMuted: "textMuted";
|
|
3494
3494
|
}>>;
|
|
3495
3495
|
axis: z.ZodOptional<z.ZodEnum<{
|
|
@@ -3500,10 +3500,10 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
|
|
|
3500
3500
|
source: z.ZodLiteral<"column">;
|
|
3501
3501
|
key: z.ZodString;
|
|
3502
3502
|
aggregate: z.ZodEnum<{
|
|
3503
|
+
sum: "sum";
|
|
3503
3504
|
avg: "avg";
|
|
3504
3505
|
min: "min";
|
|
3505
3506
|
max: "max";
|
|
3506
|
-
sum: "sum";
|
|
3507
3507
|
latest: "latest";
|
|
3508
3508
|
passThresholdRate: "passThresholdRate";
|
|
3509
3509
|
}>;
|
|
@@ -3511,9 +3511,9 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
|
|
|
3511
3511
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3512
3512
|
error: "error";
|
|
3513
3513
|
success: "success";
|
|
3514
|
+
warning: "warning";
|
|
3514
3515
|
accent: "accent";
|
|
3515
3516
|
accentDim: "accentDim";
|
|
3516
|
-
warning: "warning";
|
|
3517
3517
|
textMuted: "textMuted";
|
|
3518
3518
|
}>>;
|
|
3519
3519
|
axis: z.ZodOptional<z.ZodEnum<{
|
|
@@ -3542,10 +3542,10 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
|
|
|
3542
3542
|
source: z.ZodLiteral<"column">;
|
|
3543
3543
|
key: z.ZodString;
|
|
3544
3544
|
aggregate: z.ZodEnum<{
|
|
3545
|
+
sum: "sum";
|
|
3545
3546
|
avg: "avg";
|
|
3546
3547
|
min: "min";
|
|
3547
3548
|
max: "max";
|
|
3548
|
-
sum: "sum";
|
|
3549
3549
|
latest: "latest";
|
|
3550
3550
|
passThresholdRate: "passThresholdRate";
|
|
3551
3551
|
}>;
|
|
@@ -3573,8 +3573,8 @@ declare const runManifestSchema$1: z.ZodObject<{
|
|
|
3573
3573
|
evalSourceFingerprints: z.ZodDefault<z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>>;
|
|
3574
3574
|
target: z.ZodObject<{
|
|
3575
3575
|
mode: z.ZodEnum<{
|
|
3576
|
-
caseIds: "caseIds";
|
|
3577
3576
|
all: "all";
|
|
3577
|
+
caseIds: "caseIds";
|
|
3578
3578
|
evalIds: "evalIds";
|
|
3579
3579
|
}>;
|
|
3580
3580
|
evalKeys: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
@@ -4049,6 +4049,7 @@ declare const apiCallsConfigSchema: z.ZodObject<{
|
|
|
4049
4049
|
attributes: z.ZodOptional<z.ZodObject<{
|
|
4050
4050
|
method: z.ZodOptional<z.ZodString>;
|
|
4051
4051
|
url: z.ZodOptional<z.ZodString>;
|
|
4052
|
+
routeAlias: z.ZodOptional<z.ZodString>;
|
|
4052
4053
|
statusCode: z.ZodOptional<z.ZodString>;
|
|
4053
4054
|
request: z.ZodOptional<z.ZodString>;
|
|
4054
4055
|
response: z.ZodOptional<z.ZodString>;
|
|
@@ -4116,6 +4117,7 @@ type ResolvedApiCallsConfig = {
|
|
|
4116
4117
|
attributes: {
|
|
4117
4118
|
method: string;
|
|
4118
4119
|
url: string;
|
|
4120
|
+
routeAlias: string;
|
|
4119
4121
|
statusCode: string;
|
|
4120
4122
|
request: string;
|
|
4121
4123
|
response: string;
|
|
@@ -4302,6 +4304,7 @@ type AgentEvalsConfig$1 = {
|
|
|
4302
4304
|
* kinds: ['api', 'http.client', 'undici.request'],
|
|
4303
4305
|
* attributes: {
|
|
4304
4306
|
* statusCode: 'http.status_code',
|
|
4307
|
+
* routeAlias: 'http.route',
|
|
4305
4308
|
* },
|
|
4306
4309
|
* metrics: [
|
|
4307
4310
|
* { label: 'Retries', path: 'retryCount', format: 'number' },
|
|
@@ -4533,6 +4536,11 @@ type ApiCallEntry = {
|
|
|
4533
4536
|
status: EvalTraceSpan$1['status'];
|
|
4534
4537
|
method: string | null;
|
|
4535
4538
|
url: string | null;
|
|
4539
|
+
/**
|
|
4540
|
+
* Dynamic route alias read from the API span, such as `/v3/tabs/:id`.
|
|
4541
|
+
* The original `url` stays available for request details.
|
|
4542
|
+
*/
|
|
4543
|
+
routeAlias: string | null;
|
|
4536
4544
|
statusCode: number | null; /** Elapsed API call duration in milliseconds. */
|
|
4537
4545
|
durationMs: number | null;
|
|
4538
4546
|
request: unknown;
|
|
@@ -5565,8 +5573,8 @@ type ConfigReloadState = z.infer<typeof configReloadStateSchema$1>;
|
|
|
5565
5573
|
declare const createRunRequestSchema$1: z.ZodObject<{
|
|
5566
5574
|
target: z.ZodObject<{
|
|
5567
5575
|
mode: z.ZodEnum<{
|
|
5568
|
-
caseIds: "caseIds";
|
|
5569
5576
|
all: "all";
|
|
5577
|
+
caseIds: "caseIds";
|
|
5570
5578
|
evalIds: "evalIds";
|
|
5571
5579
|
}>;
|
|
5572
5580
|
evalKeys: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
@@ -6682,6 +6690,7 @@ type ResolvedApiCallsConfig$1 = {
|
|
|
6682
6690
|
attributes: {
|
|
6683
6691
|
method: string;
|
|
6684
6692
|
url: string;
|
|
6693
|
+
routeAlias: string;
|
|
6685
6694
|
statusCode: string;
|
|
6686
6695
|
request: string;
|
|
6687
6696
|
response: string;
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as startEvalBackgroundJob, A as manualInputFileValueSchema, B as getCurrentScope, C as hashCacheKey, D as serializeCacheRecording, E as deserializeCacheValue, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as evalExpect, N as EvalAssertionError, O as serializeCacheValue, P as EvalRuntimeUsageError, Pt as getEvalRegistry, Q as setScopeCacheContext, R as evalLog, S as evalTracer, T as deserializeCacheRecording, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as captureEvalSpanError, it as extractApiCalls, j as readManualInputFile, k as repoFile, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKeySync, x as evalSpan, y as buildTraceTree, z as evalTime } from "./runExecution-
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import { n as matchesEvalTags, t as defineEval } from "./src-
|
|
1
|
+
import { $ as startEvalBackgroundJob, A as manualInputFileValueSchema, B as getCurrentScope, C as hashCacheKey, D as serializeCacheRecording, E as deserializeCacheValue, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as evalExpect, N as EvalAssertionError, O as serializeCacheValue, P as EvalRuntimeUsageError, Pt as getEvalRegistry, Q as setScopeCacheContext, R as evalLog, S as evalTracer, T as deserializeCacheRecording, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as captureEvalSpanError, it as extractApiCalls, j as readManualInputFile, k as repoFile, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKeySync, x as evalSpan, y as buildTraceTree, z as evalTime } from "./runExecution-D-CnSRYy.mjs";
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CPBIcMP-.mjs";
|
|
3
|
+
import { n as matchesEvalTags, t as defineEval } from "./src-SixIk0b7.mjs";
|
|
4
4
|
export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { At as manualInputDescriptorSchema, I as configureEvalRunLogs, Mt as columnDefSchema, Ot as evalStatAggregateSchema, _ as createFsCacheStore, bt as runSummarySchema, et as createRunRequestSchema, jt as evalChartsConfigSchema, kt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as getCacheRetentionOptions, wt as buildEvalKey, yt as runManifestSchema } from "./runExecution-
|
|
2
|
-
import { C as parseEvalDiscovery, h as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-
|
|
1
|
+
import { At as manualInputDescriptorSchema, I as configureEvalRunLogs, Mt as columnDefSchema, Ot as evalStatAggregateSchema, _ as createFsCacheStore, bt as runSummarySchema, et as createRunRequestSchema, jt as evalChartsConfigSchema, kt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as getCacheRetentionOptions, wt as buildEvalKey, yt as runManifestSchema } from "./runExecution-D-CnSRYy.mjs";
|
|
2
|
+
import { C as parseEvalDiscovery, h as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-Basvyp4u.mjs";
|
|
3
3
|
import { z } from "zod";
|
|
4
4
|
import { readFile } from "node:fs/promises";
|
|
5
5
|
import { relative } from "node:path";
|
|
@@ -1214,6 +1214,7 @@ const apiCallsConfigSchema = z.object({
|
|
|
1214
1214
|
attributes: z.object({
|
|
1215
1215
|
method: z.string().optional(),
|
|
1216
1216
|
url: z.string().optional(),
|
|
1217
|
+
routeAlias: z.string().optional(),
|
|
1217
1218
|
statusCode: z.string().optional(),
|
|
1218
1219
|
request: z.string().optional(),
|
|
1219
1220
|
response: z.string().optional(),
|
|
@@ -1278,6 +1279,7 @@ const DEFAULT_API_CALLS_CONFIG = {
|
|
|
1278
1279
|
attributes: {
|
|
1279
1280
|
method: "method",
|
|
1280
1281
|
url: "url",
|
|
1282
|
+
routeAlias: "routeAlias",
|
|
1281
1283
|
statusCode: "statusCode",
|
|
1282
1284
|
request: "request",
|
|
1283
1285
|
response: "response",
|
|
@@ -2494,6 +2496,17 @@ function pickError(span) {
|
|
|
2494
2496
|
if (span.errors && span.errors.length > 0) return span.errors[0] ?? null;
|
|
2495
2497
|
return null;
|
|
2496
2498
|
}
|
|
2499
|
+
function stripSearchAndHash(value) {
|
|
2500
|
+
const endIndex = [value.indexOf("?"), value.indexOf("#")].filter((index) => index !== -1).toSorted((a, b) => a - b)[0];
|
|
2501
|
+
return endIndex === void 0 ? value : value.slice(0, endIndex);
|
|
2502
|
+
}
|
|
2503
|
+
function normalizeRouteAlias(routeAlias) {
|
|
2504
|
+
if (routeAlias === null) return null;
|
|
2505
|
+
const trimmed = routeAlias.trim();
|
|
2506
|
+
if (trimmed.length === 0) return null;
|
|
2507
|
+
const withoutSearch = stripSearchAndHash(trimmed);
|
|
2508
|
+
return withoutSearch.startsWith("/") ? withoutSearch : `/${withoutSearch}`;
|
|
2509
|
+
}
|
|
2497
2510
|
/**
|
|
2498
2511
|
* Filter `spans` down to API calls and project each one to the structured
|
|
2499
2512
|
* shape consumed by the API calls tab.
|
|
@@ -2525,13 +2538,16 @@ function extractApiCalls(spans, config) {
|
|
|
2525
2538
|
placements: metric.placements
|
|
2526
2539
|
});
|
|
2527
2540
|
}
|
|
2541
|
+
const url = readString$1(attrs, config.attributes.url);
|
|
2542
|
+
const routeAlias = normalizeRouteAlias(readString$1(attrs, config.attributes.routeAlias));
|
|
2528
2543
|
result.push({
|
|
2529
2544
|
id: span.id,
|
|
2530
2545
|
name: span.name,
|
|
2531
2546
|
kind: span.kind,
|
|
2532
2547
|
status: span.status,
|
|
2533
2548
|
method: readString$1(attrs, config.attributes.method),
|
|
2534
|
-
url
|
|
2549
|
+
url,
|
|
2550
|
+
routeAlias,
|
|
2535
2551
|
statusCode: readNumber$1(attrs, config.attributes.statusCode),
|
|
2536
2552
|
durationMs: readNumber$1(attrs, config.attributes.durationMs) ?? computeDurationMs(span),
|
|
2537
2553
|
request: getNestedAttribute(attrs, config.attributes.request),
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Ct as buildCaseKey, Dt as caseRowSchema, Et as caseDetailSchema, Ft as runWithEvalRegistry, Tt as getCaseRowCaseKey, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as commitPendingCacheWrites, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, yt as runManifestSchema } from "./runExecution-
|
|
1
|
+
import { Ct as buildCaseKey, Dt as caseRowSchema, Et as caseDetailSchema, Ft as runWithEvalRegistry, Tt as getCaseRowCaseKey, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as commitPendingCacheWrites, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, yt as runManifestSchema } from "./runExecution-D-CnSRYy.mjs";
|
|
2
2
|
import { Result, resultify } from "t-result";
|
|
3
3
|
import { readFile, readdir, rm, writeFile } from "node:fs/promises";
|
|
4
4
|
import { dirname, join } from "node:path";
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-DwNb5TCb.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { n as createRunner } from "./cli-CPBIcMP-.mjs";
|
|
2
|
+
import "./src-SixIk0b7.mjs";
|
|
3
3
|
//#region ../../apps/server/src/runner.ts
|
|
4
4
|
let runnerInstance = null;
|
|
5
5
|
function getRunnerInstance({ loadEnv = true } = {}) {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { Nt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-
|
|
2
|
-
import "./cli-
|
|
1
|
+
import { Nt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-D-CnSRYy.mjs";
|
|
2
|
+
import "./cli-CPBIcMP-.mjs";
|
|
3
3
|
//#region src/index.ts
|
|
4
4
|
/** Register an eval definition with typed tag support. */
|
|
5
5
|
function defineEval(definition) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ls-stack/agent-eval",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.61.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"bin": {
|
|
6
6
|
"agent-evals": "./dist/bin.mjs"
|
|
@@ -32,9 +32,9 @@
|
|
|
32
32
|
"devDependencies": {
|
|
33
33
|
"@types/node": "^24.7.2",
|
|
34
34
|
"typescript": "^5.9.2",
|
|
35
|
+
"@agent-evals/runner": "0.0.1",
|
|
35
36
|
"@agent-evals/shared": "0.0.1",
|
|
36
|
-
"@agent-evals/sdk": "0.0.1"
|
|
37
|
-
"@agent-evals/runner": "0.0.1"
|
|
37
|
+
"@agent-evals/sdk": "0.0.1"
|
|
38
38
|
},
|
|
39
39
|
"scripts": {
|
|
40
40
|
"build": "pnpm --filter @agent-evals/web build && pnpm --filter @agent-evals/shared build && pnpm --filter @agent-evals/sdk build && pnpm --filter @agent-evals/runner build && tsdown --filter cli-js && tsdown --filter cli-types",
|