@ls-stack/agent-eval 0.60.4 → 0.61.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,8 +25,8 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-CM6MDNqo.js"></script>
29
- <link rel="stylesheet" crossorigin href="/assets/index-CqWfzcFb.css">
28
+ <script type="module" crossorigin src="/assets/index-DxZsizjg.js"></script>
29
+ <link rel="stylesheet" crossorigin href="/assets/index-CM_zUhl_.css">
30
30
  </head>
31
31
  <body>
32
32
  <div id="root"></div>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-OLZIjQpx.mjs";
2
+ import { t as runCli } from "./cli-CPBIcMP-.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { Ft as runWithEvalRegistry, I as configureEvalRunLogs, St as resolveLlmCallsConfig, _ as createFsCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, g as createBufferedCacheStore, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as getCacheRetentionOptions, xt as resolveApiCallsConfig } from "./runExecution-Bu9yfdUS.mjs";
1
+ import { Ft as runWithEvalRegistry, I as configureEvalRunLogs, St as resolveLlmCallsConfig, _ as createFsCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, g as createBufferedCacheStore, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as getCacheRetentionOptions, xt as resolveApiCallsConfig } from "./runExecution-D-CnSRYy.mjs";
2
2
  //#region ../runner/src/caseChild.ts
3
3
  let fatalErrorReported = false;
4
4
  let disconnectExpected = false;
@@ -1,5 +1,5 @@
1
- import { Dt as caseRowSchema, Pt as getEvalRegistry, St as resolveLlmCallsConfig, Tt as getCaseRowCaseKey, _ as createFsCacheStore, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as getCacheRetentionOptions, wt as buildEvalKey, xt as resolveApiCallsConfig } from "./runExecution-Bu9yfdUS.mjs";
2
- import { C as parseEvalDiscovery, S as loadIsolatedEvalRegistry, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as parseManualInputValues, c as getLatestRunInfos, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, l as loadPersistedCaseDetail, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as generateRunId, p as persistCaseDetail, s as getLastRunStatuses, u as loadPersistedRunSnapshot, v as runTouchesEval, w as validateCharts, x as deriveEvalFreshness, y as buildManualInputDescriptor } from "./runOrchestration-mpgZmEZ6.mjs";
1
+ import { Dt as caseRowSchema, Pt as getEvalRegistry, St as resolveLlmCallsConfig, Tt as getCaseRowCaseKey, _ as createFsCacheStore, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as getCacheRetentionOptions, wt as buildEvalKey, xt as resolveApiCallsConfig } from "./runExecution-D-CnSRYy.mjs";
2
+ import { C as parseEvalDiscovery, S as loadIsolatedEvalRegistry, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as parseManualInputValues, c as getLatestRunInfos, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, l as loadPersistedCaseDetail, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as generateRunId, p as persistCaseDetail, s as getLastRunStatuses, u as loadPersistedRunSnapshot, v as runTouchesEval, w as validateCharts, x as deriveEvalFreshness, y as buildManualInputDescriptor } from "./runOrchestration-Basvyp4u.mjs";
3
3
  import { parseEnv } from "node:util";
4
4
  import { resultify } from "t-result";
5
5
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
@@ -2243,8 +2243,8 @@ async function commandApp(args) {
2243
2243
  const { serve } = await import("@hono/node-server");
2244
2244
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
2245
2245
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
2246
- const appModule = await import("./app-gg10KvzS.mjs");
2247
- const runnerModule = await import("./runner-C4Y0lWb1.mjs");
2246
+ const appModule = await import("./app-Dm_9ZTVa.mjs");
2247
+ const runnerModule = await import("./runner-B6UT1K7L.mjs");
2248
2248
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
2249
2249
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
2250
2250
  await runnerModule.initRunner({ loadEnv: args.loadEnv });
package/dist/index.d.mts CHANGED
@@ -2061,9 +2061,9 @@ declare const traceAttributeDisplaySchema: z.ZodObject<{
2061
2061
  subtree: "subtree";
2062
2062
  }>>;
2063
2063
  mode: z.ZodOptional<z.ZodEnum<{
2064
- sum: "sum";
2065
2064
  all: "all";
2066
2065
  last: "last";
2066
+ sum: "sum";
2067
2067
  }>>;
2068
2068
  }, z.core.$strip>;
2069
2069
  /**
@@ -2097,9 +2097,9 @@ declare const traceDisplayConfigSchema: z.ZodObject<{
2097
2097
  subtree: "subtree";
2098
2098
  }>>;
2099
2099
  mode: z.ZodOptional<z.ZodEnum<{
2100
- sum: "sum";
2101
2100
  all: "all";
2102
2101
  last: "last";
2102
+ sum: "sum";
2103
2103
  }>>;
2104
2104
  }, z.core.$strip>>>;
2105
2105
  }, z.core.$strip>;
@@ -2137,9 +2137,9 @@ declare const traceAttributeDisplayInputSchema: z.ZodObject<{
2137
2137
  subtree: "subtree";
2138
2138
  }>>;
2139
2139
  mode: z.ZodOptional<z.ZodEnum<{
2140
- sum: "sum";
2141
2140
  all: "all";
2142
2141
  last: "last";
2142
+ sum: "sum";
2143
2143
  }>>;
2144
2144
  transform: z.ZodOptional<z.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
2145
2145
  }, z.core.$strip>;
@@ -2175,9 +2175,9 @@ declare const traceDisplayInputConfigSchema: z.ZodObject<{
2175
2175
  subtree: "subtree";
2176
2176
  }>>;
2177
2177
  mode: z.ZodOptional<z.ZodEnum<{
2178
- sum: "sum";
2179
2178
  all: "all";
2180
2179
  last: "last";
2180
+ sum: "sum";
2181
2181
  }>>;
2182
2182
  transform: z.ZodOptional<z.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
2183
2183
  }, z.core.$strip>>>;
@@ -2260,10 +2260,10 @@ type EvalFreshnessStatus = z.infer<typeof evalFreshnessStatusSchema>;
2260
2260
  * `best` selects the highest finite value and `worst` selects the lowest.
2261
2261
  */
2262
2262
  declare const evalStatAggregateSchema: z.ZodEnum<{
2263
+ sum: "sum";
2263
2264
  avg: "avg";
2264
2265
  min: "min";
2265
2266
  max: "max";
2266
- sum: "sum";
2267
2267
  best: "best";
2268
2268
  worst: "worst";
2269
2269
  }>;
@@ -2292,10 +2292,10 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
2292
2292
  hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
2293
2293
  kind: z.ZodLiteral<"duration">;
2294
2294
  aggregate: z.ZodOptional<z.ZodEnum<{
2295
+ sum: "sum";
2295
2296
  avg: "avg";
2296
2297
  min: "min";
2297
2298
  max: "max";
2298
- sum: "sum";
2299
2299
  best: "best";
2300
2300
  worst: "worst";
2301
2301
  }>>;
@@ -2303,10 +2303,10 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
2303
2303
  hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
2304
2304
  kind: z.ZodLiteral<"cacheHits">;
2305
2305
  aggregate: z.ZodOptional<z.ZodEnum<{
2306
+ sum: "sum";
2306
2307
  avg: "avg";
2307
2308
  min: "min";
2308
2309
  max: "max";
2309
- sum: "sum";
2310
2310
  best: "best";
2311
2311
  worst: "worst";
2312
2312
  }>>;
@@ -2316,10 +2316,10 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
2316
2316
  key: z.ZodString;
2317
2317
  label: z.ZodOptional<z.ZodString>;
2318
2318
  aggregate: z.ZodEnum<{
2319
+ sum: "sum";
2319
2320
  avg: "avg";
2320
2321
  min: "min";
2321
2322
  max: "max";
2322
- sum: "sum";
2323
2323
  best: "best";
2324
2324
  worst: "worst";
2325
2325
  }>;
@@ -2356,10 +2356,10 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
2356
2356
  hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
2357
2357
  kind: z.ZodLiteral<"duration">;
2358
2358
  aggregate: z.ZodOptional<z.ZodEnum<{
2359
+ sum: "sum";
2359
2360
  avg: "avg";
2360
2361
  min: "min";
2361
2362
  max: "max";
2362
- sum: "sum";
2363
2363
  best: "best";
2364
2364
  worst: "worst";
2365
2365
  }>>;
@@ -2367,10 +2367,10 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
2367
2367
  hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
2368
2368
  kind: z.ZodLiteral<"cacheHits">;
2369
2369
  aggregate: z.ZodOptional<z.ZodEnum<{
2370
+ sum: "sum";
2370
2371
  avg: "avg";
2371
2372
  min: "min";
2372
2373
  max: "max";
2373
- sum: "sum";
2374
2374
  best: "best";
2375
2375
  worst: "worst";
2376
2376
  }>>;
@@ -2380,10 +2380,10 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
2380
2380
  key: z.ZodString;
2381
2381
  label: z.ZodOptional<z.ZodString>;
2382
2382
  aggregate: z.ZodEnum<{
2383
+ sum: "sum";
2383
2384
  avg: "avg";
2384
2385
  min: "min";
2385
2386
  max: "max";
2386
- sum: "sum";
2387
2387
  best: "best";
2388
2388
  worst: "worst";
2389
2389
  }>;
@@ -2483,10 +2483,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
2483
2483
  hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
2484
2484
  kind: z.ZodLiteral<"duration">;
2485
2485
  aggregate: z.ZodOptional<z.ZodEnum<{
2486
+ sum: "sum";
2486
2487
  avg: "avg";
2487
2488
  min: "min";
2488
2489
  max: "max";
2489
- sum: "sum";
2490
2490
  best: "best";
2491
2491
  worst: "worst";
2492
2492
  }>>;
@@ -2494,10 +2494,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
2494
2494
  hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
2495
2495
  kind: z.ZodLiteral<"cacheHits">;
2496
2496
  aggregate: z.ZodOptional<z.ZodEnum<{
2497
+ sum: "sum";
2497
2498
  avg: "avg";
2498
2499
  min: "min";
2499
2500
  max: "max";
2500
- sum: "sum";
2501
2501
  best: "best";
2502
2502
  worst: "worst";
2503
2503
  }>>;
@@ -2507,10 +2507,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
2507
2507
  key: z.ZodString;
2508
2508
  label: z.ZodOptional<z.ZodString>;
2509
2509
  aggregate: z.ZodEnum<{
2510
+ sum: "sum";
2510
2511
  avg: "avg";
2511
2512
  min: "min";
2512
2513
  max: "max";
2513
- sum: "sum";
2514
2514
  best: "best";
2515
2515
  worst: "worst";
2516
2516
  }>;
@@ -2534,10 +2534,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
2534
2534
  accent: z.ZodOptional<z.ZodBoolean>;
2535
2535
  }, z.core.$strip>], "kind">>>;
2536
2536
  defaultStatAggregate: z.ZodOptional<z.ZodEnum<{
2537
+ sum: "sum";
2537
2538
  avg: "avg";
2538
2539
  min: "min";
2539
2540
  max: "max";
2540
- sum: "sum";
2541
2541
  best: "best";
2542
2542
  worst: "worst";
2543
2543
  }>>;
@@ -2560,9 +2560,9 @@ declare const evalSummarySchema$1: z.ZodObject<{
2560
2560
  color: z.ZodOptional<z.ZodEnum<{
2561
2561
  error: "error";
2562
2562
  success: "success";
2563
+ warning: "warning";
2563
2564
  accent: "accent";
2564
2565
  accentDim: "accentDim";
2565
- warning: "warning";
2566
2566
  textMuted: "textMuted";
2567
2567
  }>>;
2568
2568
  axis: z.ZodOptional<z.ZodEnum<{
@@ -2573,10 +2573,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
2573
2573
  source: z.ZodLiteral<"column">;
2574
2574
  key: z.ZodString;
2575
2575
  aggregate: z.ZodEnum<{
2576
+ sum: "sum";
2576
2577
  avg: "avg";
2577
2578
  min: "min";
2578
2579
  max: "max";
2579
- sum: "sum";
2580
2580
  latest: "latest";
2581
2581
  passThresholdRate: "passThresholdRate";
2582
2582
  }>;
@@ -2584,9 +2584,9 @@ declare const evalSummarySchema$1: z.ZodObject<{
2584
2584
  color: z.ZodOptional<z.ZodEnum<{
2585
2585
  error: "error";
2586
2586
  success: "success";
2587
+ warning: "warning";
2587
2588
  accent: "accent";
2588
2589
  accentDim: "accentDim";
2589
- warning: "warning";
2590
2590
  textMuted: "textMuted";
2591
2591
  }>>;
2592
2592
  axis: z.ZodOptional<z.ZodEnum<{
@@ -2615,10 +2615,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
2615
2615
  source: z.ZodLiteral<"column">;
2616
2616
  key: z.ZodString;
2617
2617
  aggregate: z.ZodEnum<{
2618
+ sum: "sum";
2618
2619
  avg: "avg";
2619
2620
  min: "min";
2620
2621
  max: "max";
2621
- sum: "sum";
2622
2622
  latest: "latest";
2623
2623
  passThresholdRate: "passThresholdRate";
2624
2624
  }>;
@@ -2908,9 +2908,9 @@ declare const scoreTraceSchema: z.ZodObject<{
2908
2908
  subtree: "subtree";
2909
2909
  }>>;
2910
2910
  mode: z.ZodOptional<z.ZodEnum<{
2911
- sum: "sum";
2912
2911
  all: "all";
2913
2912
  last: "last";
2913
+ sum: "sum";
2914
2914
  }>>;
2915
2915
  }, z.core.$strip>>>;
2916
2916
  }, z.core.$strip>;
@@ -3011,9 +3011,9 @@ declare const caseDetailSchema$1: z.ZodObject<{
3011
3011
  subtree: "subtree";
3012
3012
  }>>;
3013
3013
  mode: z.ZodOptional<z.ZodEnum<{
3014
- sum: "sum";
3015
3014
  all: "all";
3016
3015
  last: "last";
3016
+ sum: "sum";
3017
3017
  }>>;
3018
3018
  }, z.core.$strip>>>;
3019
3019
  }, z.core.$strip>;
@@ -3080,9 +3080,9 @@ declare const caseDetailSchema$1: z.ZodObject<{
3080
3080
  subtree: "subtree";
3081
3081
  }>>;
3082
3082
  mode: z.ZodOptional<z.ZodEnum<{
3083
- sum: "sum";
3084
3083
  all: "all";
3085
3084
  last: "last";
3085
+ sum: "sum";
3086
3086
  }>>;
3087
3087
  }, z.core.$strip>>>;
3088
3088
  }, z.core.$strip>;
@@ -3269,10 +3269,10 @@ declare const evalChartBuiltinMetricSchema: z.ZodEnum<{
3269
3269
  type EvalChartBuiltinMetric = z.infer<typeof evalChartBuiltinMetricSchema>;
3270
3270
  /** Reducer applied to a numeric column across all cases of a single run. */
3271
3271
  declare const evalChartAggregateSchema: z.ZodEnum<{
3272
+ sum: "sum";
3272
3273
  avg: "avg";
3273
3274
  min: "min";
3274
3275
  max: "max";
3275
- sum: "sum";
3276
3276
  latest: "latest";
3277
3277
  passThresholdRate: "passThresholdRate";
3278
3278
  }>;
@@ -3285,9 +3285,9 @@ type EvalChartAggregate = z.infer<typeof evalChartAggregateSchema>;
3285
3285
  declare const evalChartColorSchema: z.ZodEnum<{
3286
3286
  error: "error";
3287
3287
  success: "success";
3288
+ warning: "warning";
3288
3289
  accent: "accent";
3289
3290
  accentDim: "accentDim";
3290
- warning: "warning";
3291
3291
  textMuted: "textMuted";
3292
3292
  }>;
3293
3293
  /** Semantic color token resolved to a theme color by the web UI. */
@@ -3314,9 +3314,9 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
3314
3314
  color: z.ZodOptional<z.ZodEnum<{
3315
3315
  error: "error";
3316
3316
  success: "success";
3317
+ warning: "warning";
3317
3318
  accent: "accent";
3318
3319
  accentDim: "accentDim";
3319
- warning: "warning";
3320
3320
  textMuted: "textMuted";
3321
3321
  }>>;
3322
3322
  axis: z.ZodOptional<z.ZodEnum<{
@@ -3327,10 +3327,10 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
3327
3327
  source: z.ZodLiteral<"column">;
3328
3328
  key: z.ZodString;
3329
3329
  aggregate: z.ZodEnum<{
3330
+ sum: "sum";
3330
3331
  avg: "avg";
3331
3332
  min: "min";
3332
3333
  max: "max";
3333
- sum: "sum";
3334
3334
  latest: "latest";
3335
3335
  passThresholdRate: "passThresholdRate";
3336
3336
  }>;
@@ -3338,9 +3338,9 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
3338
3338
  color: z.ZodOptional<z.ZodEnum<{
3339
3339
  error: "error";
3340
3340
  success: "success";
3341
+ warning: "warning";
3341
3342
  accent: "accent";
3342
3343
  accentDim: "accentDim";
3343
- warning: "warning";
3344
3344
  textMuted: "textMuted";
3345
3345
  }>>;
3346
3346
  axis: z.ZodOptional<z.ZodEnum<{
@@ -3362,10 +3362,10 @@ declare const evalChartTooltipExtraSchema: z.ZodDiscriminatedUnion<[z.ZodObject<
3362
3362
  source: z.ZodLiteral<"column">;
3363
3363
  key: z.ZodString;
3364
3364
  aggregate: z.ZodEnum<{
3365
+ sum: "sum";
3365
3366
  avg: "avg";
3366
3367
  min: "min";
3367
3368
  max: "max";
3368
- sum: "sum";
3369
3369
  latest: "latest";
3370
3370
  passThresholdRate: "passThresholdRate";
3371
3371
  }>;
@@ -3397,9 +3397,9 @@ declare const evalChartConfigSchema: z.ZodObject<{
3397
3397
  color: z.ZodOptional<z.ZodEnum<{
3398
3398
  error: "error";
3399
3399
  success: "success";
3400
+ warning: "warning";
3400
3401
  accent: "accent";
3401
3402
  accentDim: "accentDim";
3402
- warning: "warning";
3403
3403
  textMuted: "textMuted";
3404
3404
  }>>;
3405
3405
  axis: z.ZodOptional<z.ZodEnum<{
@@ -3410,10 +3410,10 @@ declare const evalChartConfigSchema: z.ZodObject<{
3410
3410
  source: z.ZodLiteral<"column">;
3411
3411
  key: z.ZodString;
3412
3412
  aggregate: z.ZodEnum<{
3413
+ sum: "sum";
3413
3414
  avg: "avg";
3414
3415
  min: "min";
3415
3416
  max: "max";
3416
- sum: "sum";
3417
3417
  latest: "latest";
3418
3418
  passThresholdRate: "passThresholdRate";
3419
3419
  }>;
@@ -3421,9 +3421,9 @@ declare const evalChartConfigSchema: z.ZodObject<{
3421
3421
  color: z.ZodOptional<z.ZodEnum<{
3422
3422
  error: "error";
3423
3423
  success: "success";
3424
+ warning: "warning";
3424
3425
  accent: "accent";
3425
3426
  accentDim: "accentDim";
3426
- warning: "warning";
3427
3427
  textMuted: "textMuted";
3428
3428
  }>>;
3429
3429
  axis: z.ZodOptional<z.ZodEnum<{
@@ -3452,10 +3452,10 @@ declare const evalChartConfigSchema: z.ZodObject<{
3452
3452
  source: z.ZodLiteral<"column">;
3453
3453
  key: z.ZodString;
3454
3454
  aggregate: z.ZodEnum<{
3455
+ sum: "sum";
3455
3456
  avg: "avg";
3456
3457
  min: "min";
3457
3458
  max: "max";
3458
- sum: "sum";
3459
3459
  latest: "latest";
3460
3460
  passThresholdRate: "passThresholdRate";
3461
3461
  }>;
@@ -3487,9 +3487,9 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
3487
3487
  color: z.ZodOptional<z.ZodEnum<{
3488
3488
  error: "error";
3489
3489
  success: "success";
3490
+ warning: "warning";
3490
3491
  accent: "accent";
3491
3492
  accentDim: "accentDim";
3492
- warning: "warning";
3493
3493
  textMuted: "textMuted";
3494
3494
  }>>;
3495
3495
  axis: z.ZodOptional<z.ZodEnum<{
@@ -3500,10 +3500,10 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
3500
3500
  source: z.ZodLiteral<"column">;
3501
3501
  key: z.ZodString;
3502
3502
  aggregate: z.ZodEnum<{
3503
+ sum: "sum";
3503
3504
  avg: "avg";
3504
3505
  min: "min";
3505
3506
  max: "max";
3506
- sum: "sum";
3507
3507
  latest: "latest";
3508
3508
  passThresholdRate: "passThresholdRate";
3509
3509
  }>;
@@ -3511,9 +3511,9 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
3511
3511
  color: z.ZodOptional<z.ZodEnum<{
3512
3512
  error: "error";
3513
3513
  success: "success";
3514
+ warning: "warning";
3514
3515
  accent: "accent";
3515
3516
  accentDim: "accentDim";
3516
- warning: "warning";
3517
3517
  textMuted: "textMuted";
3518
3518
  }>>;
3519
3519
  axis: z.ZodOptional<z.ZodEnum<{
@@ -3542,10 +3542,10 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
3542
3542
  source: z.ZodLiteral<"column">;
3543
3543
  key: z.ZodString;
3544
3544
  aggregate: z.ZodEnum<{
3545
+ sum: "sum";
3545
3546
  avg: "avg";
3546
3547
  min: "min";
3547
3548
  max: "max";
3548
- sum: "sum";
3549
3549
  latest: "latest";
3550
3550
  passThresholdRate: "passThresholdRate";
3551
3551
  }>;
@@ -3573,8 +3573,8 @@ declare const runManifestSchema$1: z.ZodObject<{
3573
3573
  evalSourceFingerprints: z.ZodDefault<z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>>;
3574
3574
  target: z.ZodObject<{
3575
3575
  mode: z.ZodEnum<{
3576
- caseIds: "caseIds";
3577
3576
  all: "all";
3577
+ caseIds: "caseIds";
3578
3578
  evalIds: "evalIds";
3579
3579
  }>;
3580
3580
  evalKeys: z.ZodOptional<z.ZodArray<z.ZodString>>;
@@ -4049,6 +4049,7 @@ declare const apiCallsConfigSchema: z.ZodObject<{
4049
4049
  attributes: z.ZodOptional<z.ZodObject<{
4050
4050
  method: z.ZodOptional<z.ZodString>;
4051
4051
  url: z.ZodOptional<z.ZodString>;
4052
+ routeAlias: z.ZodOptional<z.ZodString>;
4052
4053
  statusCode: z.ZodOptional<z.ZodString>;
4053
4054
  request: z.ZodOptional<z.ZodString>;
4054
4055
  response: z.ZodOptional<z.ZodString>;
@@ -4116,6 +4117,7 @@ type ResolvedApiCallsConfig = {
4116
4117
  attributes: {
4117
4118
  method: string;
4118
4119
  url: string;
4120
+ routeAlias: string;
4119
4121
  statusCode: string;
4120
4122
  request: string;
4121
4123
  response: string;
@@ -4302,6 +4304,7 @@ type AgentEvalsConfig$1 = {
4302
4304
  * kinds: ['api', 'http.client', 'undici.request'],
4303
4305
  * attributes: {
4304
4306
  * statusCode: 'http.status_code',
4307
+ * routeAlias: 'http.route',
4305
4308
  * },
4306
4309
  * metrics: [
4307
4310
  * { label: 'Retries', path: 'retryCount', format: 'number' },
@@ -4533,6 +4536,11 @@ type ApiCallEntry = {
4533
4536
  status: EvalTraceSpan$1['status'];
4534
4537
  method: string | null;
4535
4538
  url: string | null;
4539
+ /**
4540
+ * Dynamic route alias read from the API span, such as `/v3/tabs/:id`.
4541
+ * The original `url` stays available for request details.
4542
+ */
4543
+ routeAlias: string | null;
4536
4544
  statusCode: number | null; /** Elapsed API call duration in milliseconds. */
4537
4545
  durationMs: number | null;
4538
4546
  request: unknown;
@@ -5565,8 +5573,8 @@ type ConfigReloadState = z.infer<typeof configReloadStateSchema$1>;
5565
5573
  declare const createRunRequestSchema$1: z.ZodObject<{
5566
5574
  target: z.ZodObject<{
5567
5575
  mode: z.ZodEnum<{
5568
- caseIds: "caseIds";
5569
5576
  all: "all";
5577
+ caseIds: "caseIds";
5570
5578
  evalIds: "evalIds";
5571
5579
  }>;
5572
5580
  evalKeys: z.ZodOptional<z.ZodArray<z.ZodString>>;
@@ -6682,6 +6690,7 @@ type ResolvedApiCallsConfig$1 = {
6682
6690
  attributes: {
6683
6691
  method: string;
6684
6692
  url: string;
6693
+ routeAlias: string;
6685
6694
  statusCode: string;
6686
6695
  request: string;
6687
6696
  response: string;
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as startEvalBackgroundJob, A as manualInputFileValueSchema, B as getCurrentScope, C as hashCacheKey, D as serializeCacheRecording, E as deserializeCacheValue, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as evalExpect, N as EvalAssertionError, O as serializeCacheValue, P as EvalRuntimeUsageError, Pt as getEvalRegistry, Q as setScopeCacheContext, R as evalLog, S as evalTracer, T as deserializeCacheRecording, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as captureEvalSpanError, it as extractApiCalls, j as readManualInputFile, k as repoFile, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKeySync, x as evalSpan, y as buildTraceTree, z as evalTime } from "./runExecution-Bu9yfdUS.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-OLZIjQpx.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-Cy3OxoZW.mjs";
1
+ import { $ as startEvalBackgroundJob, A as manualInputFileValueSchema, B as getCurrentScope, C as hashCacheKey, D as serializeCacheRecording, E as deserializeCacheValue, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as evalExpect, N as EvalAssertionError, O as serializeCacheValue, P as EvalRuntimeUsageError, Pt as getEvalRegistry, Q as setScopeCacheContext, R as evalLog, S as evalTracer, T as deserializeCacheRecording, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as captureEvalSpanError, it as extractApiCalls, j as readManualInputFile, k as repoFile, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKeySync, x as evalSpan, y as buildTraceTree, z as evalTime } from "./runExecution-D-CnSRYy.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CPBIcMP-.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-SixIk0b7.mjs";
4
4
  export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob };
package/dist/runChild.mjs CHANGED
@@ -1,5 +1,5 @@
1
- import { At as manualInputDescriptorSchema, I as configureEvalRunLogs, Mt as columnDefSchema, Ot as evalStatAggregateSchema, _ as createFsCacheStore, bt as runSummarySchema, et as createRunRequestSchema, jt as evalChartsConfigSchema, kt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as getCacheRetentionOptions, wt as buildEvalKey, yt as runManifestSchema } from "./runExecution-Bu9yfdUS.mjs";
2
- import { C as parseEvalDiscovery, h as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-mpgZmEZ6.mjs";
1
+ import { At as manualInputDescriptorSchema, I as configureEvalRunLogs, Mt as columnDefSchema, Ot as evalStatAggregateSchema, _ as createFsCacheStore, bt as runSummarySchema, et as createRunRequestSchema, jt as evalChartsConfigSchema, kt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as getCacheRetentionOptions, wt as buildEvalKey, yt as runManifestSchema } from "./runExecution-D-CnSRYy.mjs";
2
+ import { C as parseEvalDiscovery, h as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-Basvyp4u.mjs";
3
3
  import { z } from "zod";
4
4
  import { readFile } from "node:fs/promises";
5
5
  import { relative } from "node:path";
@@ -1214,6 +1214,7 @@ const apiCallsConfigSchema = z.object({
1214
1214
  attributes: z.object({
1215
1215
  method: z.string().optional(),
1216
1216
  url: z.string().optional(),
1217
+ routeAlias: z.string().optional(),
1217
1218
  statusCode: z.string().optional(),
1218
1219
  request: z.string().optional(),
1219
1220
  response: z.string().optional(),
@@ -1278,6 +1279,7 @@ const DEFAULT_API_CALLS_CONFIG = {
1278
1279
  attributes: {
1279
1280
  method: "method",
1280
1281
  url: "url",
1282
+ routeAlias: "routeAlias",
1281
1283
  statusCode: "statusCode",
1282
1284
  request: "request",
1283
1285
  response: "response",
@@ -2494,6 +2496,17 @@ function pickError(span) {
2494
2496
  if (span.errors && span.errors.length > 0) return span.errors[0] ?? null;
2495
2497
  return null;
2496
2498
  }
2499
+ function stripSearchAndHash(value) {
2500
+ const endIndex = [value.indexOf("?"), value.indexOf("#")].filter((index) => index !== -1).toSorted((a, b) => a - b)[0];
2501
+ return endIndex === void 0 ? value : value.slice(0, endIndex);
2502
+ }
2503
+ function normalizeRouteAlias(routeAlias) {
2504
+ if (routeAlias === null) return null;
2505
+ const trimmed = routeAlias.trim();
2506
+ if (trimmed.length === 0) return null;
2507
+ const withoutSearch = stripSearchAndHash(trimmed);
2508
+ return withoutSearch.startsWith("/") ? withoutSearch : `/${withoutSearch}`;
2509
+ }
2497
2510
  /**
2498
2511
  * Filter `spans` down to API calls and project each one to the structured
2499
2512
  * shape consumed by the API calls tab.
@@ -2525,13 +2538,16 @@ function extractApiCalls(spans, config) {
2525
2538
  placements: metric.placements
2526
2539
  });
2527
2540
  }
2541
+ const url = readString$1(attrs, config.attributes.url);
2542
+ const routeAlias = normalizeRouteAlias(readString$1(attrs, config.attributes.routeAlias));
2528
2543
  result.push({
2529
2544
  id: span.id,
2530
2545
  name: span.name,
2531
2546
  kind: span.kind,
2532
2547
  status: span.status,
2533
2548
  method: readString$1(attrs, config.attributes.method),
2534
- url: readString$1(attrs, config.attributes.url),
2549
+ url,
2550
+ routeAlias,
2535
2551
  statusCode: readNumber$1(attrs, config.attributes.statusCode),
2536
2552
  durationMs: readNumber$1(attrs, config.attributes.durationMs) ?? computeDurationMs(span),
2537
2553
  request: getNestedAttribute(attrs, config.attributes.request),
@@ -1,4 +1,4 @@
1
- import { Ct as buildCaseKey, Dt as caseRowSchema, Et as caseDetailSchema, Ft as runWithEvalRegistry, Tt as getCaseRowCaseKey, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as commitPendingCacheWrites, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, yt as runManifestSchema } from "./runExecution-Bu9yfdUS.mjs";
1
+ import { Ct as buildCaseKey, Dt as caseRowSchema, Et as caseDetailSchema, Ft as runWithEvalRegistry, Tt as getCaseRowCaseKey, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as commitPendingCacheWrites, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, yt as runManifestSchema } from "./runExecution-D-CnSRYy.mjs";
2
2
  import { Result, resultify } from "t-result";
3
3
  import { readFile, readdir, rm, writeFile } from "node:fs/promises";
4
4
  import { dirname, join } from "node:path";
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-SxtKn-Xh.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-DwNb5TCb.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-OLZIjQpx.mjs";
2
- import "./src-Cy3OxoZW.mjs";
1
+ import { n as createRunner } from "./cli-CPBIcMP-.mjs";
2
+ import "./src-SixIk0b7.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance({ loadEnv = true } = {}) {
@@ -1,5 +1,5 @@
1
- import { Nt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-Bu9yfdUS.mjs";
2
- import "./cli-OLZIjQpx.mjs";
1
+ import { Nt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-D-CnSRYy.mjs";
2
+ import "./cli-CPBIcMP-.mjs";
3
3
  //#region src/index.ts
4
4
  /** Register an eval definition with typed tag support. */
5
5
  function defineEval(definition) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.60.4",
3
+ "version": "0.61.0",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -32,9 +32,9 @@
32
32
  "devDependencies": {
33
33
  "@types/node": "^24.7.2",
34
34
  "typescript": "^5.9.2",
35
+ "@agent-evals/runner": "0.0.1",
35
36
  "@agent-evals/shared": "0.0.1",
36
- "@agent-evals/sdk": "0.0.1",
37
- "@agent-evals/runner": "0.0.1"
37
+ "@agent-evals/sdk": "0.0.1"
38
38
  },
39
39
  "scripts": {
40
40
  "build": "pnpm --filter @agent-evals/web build && pnpm --filter @agent-evals/shared build && pnpm --filter @agent-evals/sdk build && pnpm --filter @agent-evals/runner build && tsdown --filter cli-js && tsdown --filter cli-types",