@ls-stack/agent-eval 0.60.4 → 0.61.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,8 +25,8 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-CM6MDNqo.js"></script>
29
- <link rel="stylesheet" crossorigin href="/assets/index-CqWfzcFb.css">
28
+ <script type="module" crossorigin src="/assets/index-CwSehYad.js"></script>
29
+ <link rel="stylesheet" crossorigin href="/assets/index-CM_zUhl_.css">
30
30
  </head>
31
31
  <body>
32
32
  <div id="root"></div>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-OLZIjQpx.mjs";
2
+ import { t as runCli } from "./cli-CPBIcMP-.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { Ft as runWithEvalRegistry, I as configureEvalRunLogs, St as resolveLlmCallsConfig, _ as createFsCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, g as createBufferedCacheStore, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as getCacheRetentionOptions, xt as resolveApiCallsConfig } from "./runExecution-Bu9yfdUS.mjs";
1
+ import { Ft as runWithEvalRegistry, I as configureEvalRunLogs, St as resolveLlmCallsConfig, _ as createFsCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, g as createBufferedCacheStore, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as getCacheRetentionOptions, xt as resolveApiCallsConfig } from "./runExecution-D-CnSRYy.mjs";
2
2
  //#region ../runner/src/caseChild.ts
3
3
  let fatalErrorReported = false;
4
4
  let disconnectExpected = false;
@@ -1,5 +1,5 @@
1
- import { Dt as caseRowSchema, Pt as getEvalRegistry, St as resolveLlmCallsConfig, Tt as getCaseRowCaseKey, _ as createFsCacheStore, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as getCacheRetentionOptions, wt as buildEvalKey, xt as resolveApiCallsConfig } from "./runExecution-Bu9yfdUS.mjs";
2
- import { C as parseEvalDiscovery, S as loadIsolatedEvalRegistry, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as parseManualInputValues, c as getLatestRunInfos, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, l as loadPersistedCaseDetail, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as generateRunId, p as persistCaseDetail, s as getLastRunStatuses, u as loadPersistedRunSnapshot, v as runTouchesEval, w as validateCharts, x as deriveEvalFreshness, y as buildManualInputDescriptor } from "./runOrchestration-mpgZmEZ6.mjs";
1
+ import { Dt as caseRowSchema, Pt as getEvalRegistry, St as resolveLlmCallsConfig, Tt as getCaseRowCaseKey, _ as createFsCacheStore, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as getCacheRetentionOptions, wt as buildEvalKey, xt as resolveApiCallsConfig } from "./runExecution-D-CnSRYy.mjs";
2
+ import { C as parseEvalDiscovery, S as loadIsolatedEvalRegistry, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as parseManualInputValues, c as getLatestRunInfos, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, l as loadPersistedCaseDetail, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as generateRunId, p as persistCaseDetail, s as getLastRunStatuses, u as loadPersistedRunSnapshot, v as runTouchesEval, w as validateCharts, x as deriveEvalFreshness, y as buildManualInputDescriptor } from "./runOrchestration-Basvyp4u.mjs";
3
3
  import { parseEnv } from "node:util";
4
4
  import { resultify } from "t-result";
5
5
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
@@ -2243,8 +2243,8 @@ async function commandApp(args) {
2243
2243
  const { serve } = await import("@hono/node-server");
2244
2244
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
2245
2245
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
2246
- const appModule = await import("./app-gg10KvzS.mjs");
2247
- const runnerModule = await import("./runner-C4Y0lWb1.mjs");
2246
+ const appModule = await import("./app-Dm_9ZTVa.mjs");
2247
+ const runnerModule = await import("./runner-B6UT1K7L.mjs");
2248
2248
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
2249
2249
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
2250
2250
  await runnerModule.initRunner({ loadEnv: args.loadEnv });
package/dist/index.d.mts CHANGED
@@ -2214,8 +2214,8 @@ declare const traceSpanSchema$1: z.ZodObject<{
2214
2214
  status: z.ZodEnum<{
2215
2215
  error: "error";
2216
2216
  running: "running";
2217
- ok: "ok";
2218
2217
  cancelled: "cancelled";
2218
+ ok: "ok";
2219
2219
  }>;
2220
2220
  attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
2221
2221
  error: z.ZodOptional<z.ZodObject<{
@@ -2466,10 +2466,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
2466
2466
  caseIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
2467
2467
  lastRunStatus: z.ZodNullable<z.ZodEnum<{
2468
2468
  error: "error";
2469
- running: "running";
2470
- cancelled: "cancelled";
2471
2469
  pass: "pass";
2472
2470
  fail: "fail";
2471
+ running: "running";
2472
+ cancelled: "cancelled";
2473
2473
  unscored: "unscored";
2474
2474
  }>>;
2475
2475
  stats: z.ZodOptional<z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
@@ -2558,8 +2558,8 @@ declare const evalSummarySchema$1: z.ZodObject<{
2558
2558
  }>;
2559
2559
  label: z.ZodOptional<z.ZodString>;
2560
2560
  color: z.ZodOptional<z.ZodEnum<{
2561
- error: "error";
2562
2561
  success: "success";
2562
+ error: "error";
2563
2563
  accent: "accent";
2564
2564
  accentDim: "accentDim";
2565
2565
  warning: "warning";
@@ -2582,8 +2582,8 @@ declare const evalSummarySchema$1: z.ZodObject<{
2582
2582
  }>;
2583
2583
  label: z.ZodOptional<z.ZodString>;
2584
2584
  color: z.ZodOptional<z.ZodEnum<{
2585
- error: "error";
2586
2585
  success: "success";
2586
+ error: "error";
2587
2587
  accent: "accent";
2588
2588
  accentDim: "accentDim";
2589
2589
  warning: "warning";
@@ -2715,10 +2715,10 @@ declare const caseRowSchema$1: z.ZodObject<{
2715
2715
  tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
2716
2716
  status: z.ZodEnum<{
2717
2717
  error: "error";
2718
- running: "running";
2719
- cancelled: "cancelled";
2720
2718
  pass: "pass";
2721
2719
  fail: "fail";
2720
+ running: "running";
2721
+ cancelled: "cancelled";
2722
2722
  pending: "pending";
2723
2723
  }>;
2724
2724
  durationMs: z.ZodNullable<z.ZodNumber>;
@@ -2857,8 +2857,8 @@ declare const scoreTraceSchema: z.ZodObject<{
2857
2857
  status: z.ZodEnum<{
2858
2858
  error: "error";
2859
2859
  running: "running";
2860
- ok: "ok";
2861
2860
  cancelled: "cancelled";
2861
+ ok: "ok";
2862
2862
  }>;
2863
2863
  attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
2864
2864
  error: z.ZodOptional<z.ZodObject<{
@@ -2920,10 +2920,10 @@ declare const scoreTraceSchema: z.ZodObject<{
2920
2920
  namespace: z.ZodString;
2921
2921
  key: z.ZodString;
2922
2922
  status: z.ZodEnum<{
2923
- bypass: "bypass";
2924
- refresh: "refresh";
2925
2923
  hit: "hit";
2926
2924
  miss: "miss";
2925
+ refresh: "refresh";
2926
+ bypass: "bypass";
2927
2927
  }>;
2928
2928
  read: z.ZodOptional<z.ZodBoolean>;
2929
2929
  stored: z.ZodOptional<z.ZodBoolean>;
@@ -2942,10 +2942,10 @@ declare const caseDetailSchema$1: z.ZodObject<{
2942
2942
  tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
2943
2943
  status: z.ZodEnum<{
2944
2944
  error: "error";
2945
- running: "running";
2946
- cancelled: "cancelled";
2947
2945
  pass: "pass";
2948
2946
  fail: "fail";
2947
+ running: "running";
2948
+ cancelled: "cancelled";
2949
2949
  pending: "pending";
2950
2950
  }>;
2951
2951
  input: z.ZodUnknown;
@@ -2960,8 +2960,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
2960
2960
  status: z.ZodEnum<{
2961
2961
  error: "error";
2962
2962
  running: "running";
2963
- ok: "ok";
2964
2963
  cancelled: "cancelled";
2964
+ ok: "ok";
2965
2965
  }>;
2966
2966
  attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
2967
2967
  error: z.ZodOptional<z.ZodObject<{
@@ -3029,8 +3029,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
3029
3029
  status: z.ZodEnum<{
3030
3030
  error: "error";
3031
3031
  running: "running";
3032
- ok: "ok";
3033
3032
  cancelled: "cancelled";
3033
+ ok: "ok";
3034
3034
  }>;
3035
3035
  attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
3036
3036
  error: z.ZodOptional<z.ZodObject<{
@@ -3092,10 +3092,10 @@ declare const caseDetailSchema$1: z.ZodObject<{
3092
3092
  namespace: z.ZodString;
3093
3093
  key: z.ZodString;
3094
3094
  status: z.ZodEnum<{
3095
- bypass: "bypass";
3096
- refresh: "refresh";
3097
3095
  hit: "hit";
3098
3096
  miss: "miss";
3097
+ refresh: "refresh";
3098
+ bypass: "bypass";
3099
3099
  }>;
3100
3100
  read: z.ZodOptional<z.ZodBoolean>;
3101
3101
  stored: z.ZodOptional<z.ZodBoolean>;
@@ -3213,10 +3213,10 @@ declare const caseDetailSchema$1: z.ZodObject<{
3213
3213
  namespace: z.ZodString;
3214
3214
  key: z.ZodString;
3215
3215
  status: z.ZodEnum<{
3216
- bypass: "bypass";
3217
- refresh: "refresh";
3218
3216
  hit: "hit";
3219
3217
  miss: "miss";
3218
+ refresh: "refresh";
3219
+ bypass: "bypass";
3220
3220
  }>;
3221
3221
  read: z.ZodOptional<z.ZodBoolean>;
3222
3222
  stored: z.ZodOptional<z.ZodBoolean>;
@@ -3283,8 +3283,8 @@ type EvalChartAggregate = z.infer<typeof evalChartAggregateSchema>;
3283
3283
  * not emit raw hex so authored evals stay decoupled from the web theme.
3284
3284
  */
3285
3285
  declare const evalChartColorSchema: z.ZodEnum<{
3286
- error: "error";
3287
3286
  success: "success";
3287
+ error: "error";
3288
3288
  accent: "accent";
3289
3289
  accentDim: "accentDim";
3290
3290
  warning: "warning";
@@ -3312,8 +3312,8 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
3312
3312
  }>;
3313
3313
  label: z.ZodOptional<z.ZodString>;
3314
3314
  color: z.ZodOptional<z.ZodEnum<{
3315
- error: "error";
3316
3315
  success: "success";
3316
+ error: "error";
3317
3317
  accent: "accent";
3318
3318
  accentDim: "accentDim";
3319
3319
  warning: "warning";
@@ -3336,8 +3336,8 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
3336
3336
  }>;
3337
3337
  label: z.ZodOptional<z.ZodString>;
3338
3338
  color: z.ZodOptional<z.ZodEnum<{
3339
- error: "error";
3340
3339
  success: "success";
3340
+ error: "error";
3341
3341
  accent: "accent";
3342
3342
  accentDim: "accentDim";
3343
3343
  warning: "warning";
@@ -3395,8 +3395,8 @@ declare const evalChartConfigSchema: z.ZodObject<{
3395
3395
  }>;
3396
3396
  label: z.ZodOptional<z.ZodString>;
3397
3397
  color: z.ZodOptional<z.ZodEnum<{
3398
- error: "error";
3399
3398
  success: "success";
3399
+ error: "error";
3400
3400
  accent: "accent";
3401
3401
  accentDim: "accentDim";
3402
3402
  warning: "warning";
@@ -3419,8 +3419,8 @@ declare const evalChartConfigSchema: z.ZodObject<{
3419
3419
  }>;
3420
3420
  label: z.ZodOptional<z.ZodString>;
3421
3421
  color: z.ZodOptional<z.ZodEnum<{
3422
- error: "error";
3423
3422
  success: "success";
3423
+ error: "error";
3424
3424
  accent: "accent";
3425
3425
  accentDim: "accentDim";
3426
3426
  warning: "warning";
@@ -3485,8 +3485,8 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
3485
3485
  }>;
3486
3486
  label: z.ZodOptional<z.ZodString>;
3487
3487
  color: z.ZodOptional<z.ZodEnum<{
3488
- error: "error";
3489
3488
  success: "success";
3489
+ error: "error";
3490
3490
  accent: "accent";
3491
3491
  accentDim: "accentDim";
3492
3492
  warning: "warning";
@@ -3509,8 +3509,8 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
3509
3509
  }>;
3510
3510
  label: z.ZodOptional<z.ZodString>;
3511
3511
  color: z.ZodOptional<z.ZodEnum<{
3512
- error: "error";
3513
3512
  success: "success";
3513
+ error: "error";
3514
3514
  accent: "accent";
3515
3515
  accentDim: "accentDim";
3516
3516
  warning: "warning";
@@ -3589,9 +3589,9 @@ declare const runManifestSchema$1: z.ZodObject<{
3589
3589
  median: "median";
3590
3590
  }>>>;
3591
3591
  cacheMode: z.ZodOptional<z.ZodEnum<{
3592
- use: "use";
3593
- bypass: "bypass";
3594
3592
  refresh: "refresh";
3593
+ bypass: "bypass";
3594
+ use: "use";
3595
3595
  }>>;
3596
3596
  }, z.core.$strip>;
3597
3597
  /** Persisted lifecycle metadata for a single eval run. */
@@ -4049,6 +4049,7 @@ declare const apiCallsConfigSchema: z.ZodObject<{
4049
4049
  attributes: z.ZodOptional<z.ZodObject<{
4050
4050
  method: z.ZodOptional<z.ZodString>;
4051
4051
  url: z.ZodOptional<z.ZodString>;
4052
+ routeAlias: z.ZodOptional<z.ZodString>;
4052
4053
  statusCode: z.ZodOptional<z.ZodString>;
4053
4054
  request: z.ZodOptional<z.ZodString>;
4054
4055
  response: z.ZodOptional<z.ZodString>;
@@ -4116,6 +4117,7 @@ type ResolvedApiCallsConfig = {
4116
4117
  attributes: {
4117
4118
  method: string;
4118
4119
  url: string;
4120
+ routeAlias: string;
4119
4121
  statusCode: string;
4120
4122
  request: string;
4121
4123
  response: string;
@@ -4302,6 +4304,7 @@ type AgentEvalsConfig$1 = {
4302
4304
  * kinds: ['api', 'http.client', 'undici.request'],
4303
4305
  * attributes: {
4304
4306
  * statusCode: 'http.status_code',
4307
+ * routeAlias: 'http.route',
4305
4308
  * },
4306
4309
  * metrics: [
4307
4310
  * { label: 'Retries', path: 'retryCount', format: 'number' },
@@ -4533,6 +4536,11 @@ type ApiCallEntry = {
4533
4536
  status: EvalTraceSpan$1['status'];
4534
4537
  method: string | null;
4535
4538
  url: string | null;
4539
+ /**
4540
+ * Dynamic route alias read from the API span, such as `/v3/tabs/:id`.
4541
+ * The original `url` stays available for request details.
4542
+ */
4543
+ routeAlias: string | null;
4536
4544
  statusCode: number | null; /** Elapsed API call duration in milliseconds. */
4537
4545
  durationMs: number | null;
4538
4546
  request: unknown;
@@ -4567,9 +4575,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
4567
4575
  * - `refresh`: never read, always write (forces re-execution and overwrites).
4568
4576
  */
4569
4577
  declare const cacheModeSchema: z.ZodEnum<{
4570
- use: "use";
4571
- bypass: "bypass";
4572
4578
  refresh: "refresh";
4579
+ bypass: "bypass";
4580
+ use: "use";
4573
4581
  }>;
4574
4582
  /** Mode controlling how cached spans behave during a run. */
4575
4583
  type CacheMode = z.infer<typeof cacheModeSchema>;
@@ -4583,17 +4591,17 @@ declare const spanCacheOptionsSchema: z.ZodObject<{
4583
4591
  type SpanCacheOptions = z.infer<typeof spanCacheOptionsSchema>;
4584
4592
  /** Category of operation stored in the eval cache. */
4585
4593
  declare const cacheOperationTypeSchema: z.ZodEnum<{
4586
- span: "span";
4587
4594
  value: "value";
4595
+ span: "span";
4588
4596
  }>;
4589
4597
  /** Category of operation stored in the eval cache. */
4590
4598
  type CacheOperationType = z.infer<typeof cacheOperationTypeSchema>;
4591
4599
  /** Status of a cache lookup recorded on a span or case scope. */
4592
4600
  declare const cacheStatusSchema: z.ZodEnum<{
4593
- bypass: "bypass";
4594
- refresh: "refresh";
4595
4601
  hit: "hit";
4596
4602
  miss: "miss";
4603
+ refresh: "refresh";
4604
+ bypass: "bypass";
4597
4605
  }>;
4598
4606
  /** Status of a cache lookup recorded on a span or case scope. */
4599
4607
  type CacheStatus = z.infer<typeof cacheStatusSchema>;
@@ -4610,10 +4618,10 @@ declare const traceCacheRefSchema: z.ZodObject<{
4610
4618
  namespace: z.ZodString;
4611
4619
  key: z.ZodString;
4612
4620
  status: z.ZodEnum<{
4613
- bypass: "bypass";
4614
- refresh: "refresh";
4615
4621
  hit: "hit";
4616
4622
  miss: "miss";
4623
+ refresh: "refresh";
4624
+ bypass: "bypass";
4617
4625
  }>;
4618
4626
  read: z.ZodOptional<z.ZodBoolean>;
4619
4627
  stored: z.ZodOptional<z.ZodBoolean>;
@@ -4721,8 +4729,8 @@ declare const cacheRecordingSchema: z.ZodObject<{
4721
4729
  finalStatus: z.ZodOptional<z.ZodEnum<{
4722
4730
  error: "error";
4723
4731
  running: "running";
4724
- ok: "ok";
4725
4732
  cancelled: "cancelled";
4733
+ ok: "ok";
4726
4734
  }>>;
4727
4735
  finalError: z.ZodOptional<z.ZodObject<{
4728
4736
  name: z.ZodOptional<z.ZodString>;
@@ -4809,8 +4817,8 @@ declare const cacheEntrySchema: z.ZodObject<{
4809
4817
  key: z.ZodString;
4810
4818
  namespace: z.ZodString;
4811
4819
  operationType: z.ZodOptional<z.ZodEnum<{
4812
- span: "span";
4813
4820
  value: "value";
4821
+ span: "span";
4814
4822
  }>>;
4815
4823
  operationName: z.ZodOptional<z.ZodString>;
4816
4824
  spanName: z.ZodOptional<z.ZodString>;
@@ -4822,8 +4830,8 @@ declare const cacheEntrySchema: z.ZodObject<{
4822
4830
  finalStatus: z.ZodOptional<z.ZodEnum<{
4823
4831
  error: "error";
4824
4832
  running: "running";
4825
- ok: "ok";
4826
4833
  cancelled: "cancelled";
4834
+ ok: "ok";
4827
4835
  }>>;
4828
4836
  finalError: z.ZodOptional<z.ZodObject<{
4829
4837
  name: z.ZodOptional<z.ZodString>;
@@ -4916,8 +4924,8 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
4916
4924
  key: z.ZodString;
4917
4925
  namespace: z.ZodString;
4918
4926
  operationType: z.ZodEnum<{
4919
- span: "span";
4920
4927
  value: "value";
4928
+ span: "span";
4921
4929
  }>;
4922
4930
  operationName: z.ZodString;
4923
4931
  storedAt: z.ZodString;
@@ -4927,8 +4935,8 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
4927
4935
  key: z.ZodString;
4928
4936
  namespace: z.ZodString;
4929
4937
  operationType: z.ZodOptional<z.ZodEnum<{
4930
- span: "span";
4931
4938
  value: "value";
4939
+ span: "span";
4932
4940
  }>>;
4933
4941
  operationName: z.ZodOptional<z.ZodString>;
4934
4942
  spanName: z.ZodOptional<z.ZodString>;
@@ -4940,8 +4948,8 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
4940
4948
  finalStatus: z.ZodOptional<z.ZodEnum<{
4941
4949
  error: "error";
4942
4950
  running: "running";
4943
- ok: "ok";
4944
4951
  cancelled: "cancelled";
4952
+ ok: "ok";
4945
4953
  }>>;
4946
4954
  finalError: z.ZodOptional<z.ZodObject<{
4947
4955
  name: z.ZodOptional<z.ZodString>;
@@ -5034,8 +5042,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5034
5042
  key: z.ZodString;
5035
5043
  namespace: z.ZodString;
5036
5044
  operationType: z.ZodOptional<z.ZodEnum<{
5037
- span: "span";
5038
5045
  value: "value";
5046
+ span: "span";
5039
5047
  }>>;
5040
5048
  operationName: z.ZodOptional<z.ZodString>;
5041
5049
  spanName: z.ZodOptional<z.ZodString>;
@@ -5047,8 +5055,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5047
5055
  finalStatus: z.ZodOptional<z.ZodEnum<{
5048
5056
  error: "error";
5049
5057
  running: "running";
5050
- ok: "ok";
5051
5058
  cancelled: "cancelled";
5059
+ ok: "ok";
5052
5060
  }>>;
5053
5061
  finalError: z.ZodOptional<z.ZodObject<{
5054
5062
  name: z.ZodOptional<z.ZodString>;
@@ -5132,8 +5140,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5132
5140
  key: z.ZodString;
5133
5141
  namespace: z.ZodString;
5134
5142
  operationType: z.ZodEnum<{
5135
- span: "span";
5136
5143
  value: "value";
5144
+ span: "span";
5137
5145
  }>;
5138
5146
  operationName: z.ZodString;
5139
5147
  storedAt: z.ZodString;
@@ -5143,8 +5151,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5143
5151
  key: z.ZodString;
5144
5152
  namespace: z.ZodString;
5145
5153
  operationType: z.ZodOptional<z.ZodEnum<{
5146
- span: "span";
5147
5154
  value: "value";
5155
+ span: "span";
5148
5156
  }>>;
5149
5157
  operationName: z.ZodOptional<z.ZodString>;
5150
5158
  spanName: z.ZodOptional<z.ZodString>;
@@ -5156,8 +5164,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5156
5164
  finalStatus: z.ZodOptional<z.ZodEnum<{
5157
5165
  error: "error";
5158
5166
  running: "running";
5159
- ok: "ok";
5160
5167
  cancelled: "cancelled";
5168
+ ok: "ok";
5161
5169
  }>>;
5162
5170
  finalError: z.ZodOptional<z.ZodObject<{
5163
5171
  name: z.ZodOptional<z.ZodString>;
@@ -5250,8 +5258,8 @@ declare const cacheFileSchema: z.ZodObject<{
5250
5258
  key: z.ZodString;
5251
5259
  namespace: z.ZodString;
5252
5260
  operationType: z.ZodOptional<z.ZodEnum<{
5253
- span: "span";
5254
5261
  value: "value";
5262
+ span: "span";
5255
5263
  }>>;
5256
5264
  operationName: z.ZodOptional<z.ZodString>;
5257
5265
  spanName: z.ZodOptional<z.ZodString>;
@@ -5263,8 +5271,8 @@ declare const cacheFileSchema: z.ZodObject<{
5263
5271
  finalStatus: z.ZodOptional<z.ZodEnum<{
5264
5272
  error: "error";
5265
5273
  running: "running";
5266
- ok: "ok";
5267
5274
  cancelled: "cancelled";
5275
+ ok: "ok";
5268
5276
  }>>;
5269
5277
  finalError: z.ZodOptional<z.ZodObject<{
5270
5278
  name: z.ZodOptional<z.ZodString>;
@@ -5356,8 +5364,8 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
5356
5364
  key: z.ZodString;
5357
5365
  namespace: z.ZodString;
5358
5366
  operationType: z.ZodEnum<{
5359
- span: "span";
5360
5367
  value: "value";
5368
+ span: "span";
5361
5369
  }>;
5362
5370
  operationName: z.ZodString;
5363
5371
  storedAt: z.ZodString;
@@ -5367,8 +5375,8 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
5367
5375
  key: z.ZodString;
5368
5376
  namespace: z.ZodString;
5369
5377
  operationType: z.ZodOptional<z.ZodEnum<{
5370
- span: "span";
5371
5378
  value: "value";
5379
+ span: "span";
5372
5380
  }>>;
5373
5381
  operationName: z.ZodOptional<z.ZodString>;
5374
5382
  spanName: z.ZodOptional<z.ZodString>;
@@ -5380,8 +5388,8 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
5380
5388
  finalStatus: z.ZodOptional<z.ZodEnum<{
5381
5389
  error: "error";
5382
5390
  running: "running";
5383
- ok: "ok";
5384
5391
  cancelled: "cancelled";
5392
+ ok: "ok";
5385
5393
  }>>;
5386
5394
  finalError: z.ZodOptional<z.ZodObject<{
5387
5395
  name: z.ZodOptional<z.ZodString>;
@@ -5579,9 +5587,9 @@ declare const createRunRequestSchema$1: z.ZodObject<{
5579
5587
  temporary: z.ZodOptional<z.ZodBoolean>;
5580
5588
  cache: z.ZodOptional<z.ZodObject<{
5581
5589
  mode: z.ZodDefault<z.ZodEnum<{
5582
- use: "use";
5583
- bypass: "bypass";
5584
5590
  refresh: "refresh";
5591
+ bypass: "bypass";
5592
+ use: "use";
5585
5593
  }>>;
5586
5594
  }, z.core.$strip>>;
5587
5595
  manualInputs: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
@@ -6682,6 +6690,7 @@ type ResolvedApiCallsConfig$1 = {
6682
6690
  attributes: {
6683
6691
  method: string;
6684
6692
  url: string;
6693
+ routeAlias: string;
6685
6694
  statusCode: string;
6686
6695
  request: string;
6687
6696
  response: string;
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as startEvalBackgroundJob, A as manualInputFileValueSchema, B as getCurrentScope, C as hashCacheKey, D as serializeCacheRecording, E as deserializeCacheValue, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as evalExpect, N as EvalAssertionError, O as serializeCacheValue, P as EvalRuntimeUsageError, Pt as getEvalRegistry, Q as setScopeCacheContext, R as evalLog, S as evalTracer, T as deserializeCacheRecording, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as captureEvalSpanError, it as extractApiCalls, j as readManualInputFile, k as repoFile, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKeySync, x as evalSpan, y as buildTraceTree, z as evalTime } from "./runExecution-Bu9yfdUS.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-OLZIjQpx.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-Cy3OxoZW.mjs";
1
+ import { $ as startEvalBackgroundJob, A as manualInputFileValueSchema, B as getCurrentScope, C as hashCacheKey, D as serializeCacheRecording, E as deserializeCacheValue, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as evalExpect, N as EvalAssertionError, O as serializeCacheValue, P as EvalRuntimeUsageError, Pt as getEvalRegistry, Q as setScopeCacheContext, R as evalLog, S as evalTracer, T as deserializeCacheRecording, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as captureEvalSpanError, it as extractApiCalls, j as readManualInputFile, k as repoFile, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKeySync, x as evalSpan, y as buildTraceTree, z as evalTime } from "./runExecution-D-CnSRYy.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CPBIcMP-.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-SixIk0b7.mjs";
4
4
  export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob };
package/dist/runChild.mjs CHANGED
@@ -1,5 +1,5 @@
1
- import { At as manualInputDescriptorSchema, I as configureEvalRunLogs, Mt as columnDefSchema, Ot as evalStatAggregateSchema, _ as createFsCacheStore, bt as runSummarySchema, et as createRunRequestSchema, jt as evalChartsConfigSchema, kt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as getCacheRetentionOptions, wt as buildEvalKey, yt as runManifestSchema } from "./runExecution-Bu9yfdUS.mjs";
2
- import { C as parseEvalDiscovery, h as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-mpgZmEZ6.mjs";
1
+ import { At as manualInputDescriptorSchema, I as configureEvalRunLogs, Mt as columnDefSchema, Ot as evalStatAggregateSchema, _ as createFsCacheStore, bt as runSummarySchema, et as createRunRequestSchema, jt as evalChartsConfigSchema, kt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as getCacheRetentionOptions, wt as buildEvalKey, yt as runManifestSchema } from "./runExecution-D-CnSRYy.mjs";
2
+ import { C as parseEvalDiscovery, h as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-Basvyp4u.mjs";
3
3
  import { z } from "zod";
4
4
  import { readFile } from "node:fs/promises";
5
5
  import { relative } from "node:path";
@@ -1214,6 +1214,7 @@ const apiCallsConfigSchema = z.object({
1214
1214
  attributes: z.object({
1215
1215
  method: z.string().optional(),
1216
1216
  url: z.string().optional(),
1217
+ routeAlias: z.string().optional(),
1217
1218
  statusCode: z.string().optional(),
1218
1219
  request: z.string().optional(),
1219
1220
  response: z.string().optional(),
@@ -1278,6 +1279,7 @@ const DEFAULT_API_CALLS_CONFIG = {
1278
1279
  attributes: {
1279
1280
  method: "method",
1280
1281
  url: "url",
1282
+ routeAlias: "routeAlias",
1281
1283
  statusCode: "statusCode",
1282
1284
  request: "request",
1283
1285
  response: "response",
@@ -2494,6 +2496,17 @@ function pickError(span) {
2494
2496
  if (span.errors && span.errors.length > 0) return span.errors[0] ?? null;
2495
2497
  return null;
2496
2498
  }
2499
+ function stripSearchAndHash(value) {
2500
+ const endIndex = [value.indexOf("?"), value.indexOf("#")].filter((index) => index !== -1).toSorted((a, b) => a - b)[0];
2501
+ return endIndex === void 0 ? value : value.slice(0, endIndex);
2502
+ }
2503
+ function normalizeRouteAlias(routeAlias) {
2504
+ if (routeAlias === null) return null;
2505
+ const trimmed = routeAlias.trim();
2506
+ if (trimmed.length === 0) return null;
2507
+ const withoutSearch = stripSearchAndHash(trimmed);
2508
+ return withoutSearch.startsWith("/") ? withoutSearch : `/${withoutSearch}`;
2509
+ }
2497
2510
  /**
2498
2511
  * Filter `spans` down to API calls and project each one to the structured
2499
2512
  * shape consumed by the API calls tab.
@@ -2525,13 +2538,16 @@ function extractApiCalls(spans, config) {
2525
2538
  placements: metric.placements
2526
2539
  });
2527
2540
  }
2541
+ const url = readString$1(attrs, config.attributes.url);
2542
+ const routeAlias = normalizeRouteAlias(readString$1(attrs, config.attributes.routeAlias));
2528
2543
  result.push({
2529
2544
  id: span.id,
2530
2545
  name: span.name,
2531
2546
  kind: span.kind,
2532
2547
  status: span.status,
2533
2548
  method: readString$1(attrs, config.attributes.method),
2534
- url: readString$1(attrs, config.attributes.url),
2549
+ url,
2550
+ routeAlias,
2535
2551
  statusCode: readNumber$1(attrs, config.attributes.statusCode),
2536
2552
  durationMs: readNumber$1(attrs, config.attributes.durationMs) ?? computeDurationMs(span),
2537
2553
  request: getNestedAttribute(attrs, config.attributes.request),
@@ -1,4 +1,4 @@
1
- import { Ct as buildCaseKey, Dt as caseRowSchema, Et as caseDetailSchema, Ft as runWithEvalRegistry, Tt as getCaseRowCaseKey, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as commitPendingCacheWrites, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, yt as runManifestSchema } from "./runExecution-Bu9yfdUS.mjs";
1
+ import { Ct as buildCaseKey, Dt as caseRowSchema, Et as caseDetailSchema, Ft as runWithEvalRegistry, Tt as getCaseRowCaseKey, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as commitPendingCacheWrites, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, yt as runManifestSchema } from "./runExecution-D-CnSRYy.mjs";
2
2
  import { Result, resultify } from "t-result";
3
3
  import { readFile, readdir, rm, writeFile } from "node:fs/promises";
4
4
  import { dirname, join } from "node:path";
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-SxtKn-Xh.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-DwNb5TCb.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-OLZIjQpx.mjs";
2
- import "./src-Cy3OxoZW.mjs";
1
+ import { n as createRunner } from "./cli-CPBIcMP-.mjs";
2
+ import "./src-SixIk0b7.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance({ loadEnv = true } = {}) {
@@ -1,5 +1,5 @@
1
- import { Nt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-Bu9yfdUS.mjs";
2
- import "./cli-OLZIjQpx.mjs";
1
+ import { Nt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-D-CnSRYy.mjs";
2
+ import "./cli-CPBIcMP-.mjs";
3
3
  //#region src/index.ts
4
4
  /** Register an eval definition with typed tag support. */
5
5
  function defineEval(definition) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.60.4",
3
+ "version": "0.61.1",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -32,9 +32,9 @@
32
32
  "devDependencies": {
33
33
  "@types/node": "^24.7.2",
34
34
  "typescript": "^5.9.2",
35
- "@agent-evals/shared": "0.0.1",
35
+ "@agent-evals/runner": "0.0.1",
36
36
  "@agent-evals/sdk": "0.0.1",
37
- "@agent-evals/runner": "0.0.1"
37
+ "@agent-evals/shared": "0.0.1"
38
38
  },
39
39
  "scripts": {
40
40
  "build": "pnpm --filter @agent-evals/web build && pnpm --filter @agent-evals/shared build && pnpm --filter @agent-evals/sdk build && pnpm --filter @agent-evals/runner build && tsdown --filter cli-js && tsdown --filter cli-types",