@ls-stack/agent-eval 0.24.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1506,7 +1506,7 @@ function createTraceCache(generateSpanId) {
1506
1506
  namespace,
1507
1507
  operationType: "value",
1508
1508
  operationName: info.name,
1509
- storedAt: (/* @__PURE__ */ new Date()).toISOString(),
1509
+ storedAt: new Date(getRealDateNowMs()).toISOString(),
1510
1510
  codeFingerprint: cacheCtx.codeFingerprint,
1511
1511
  recording: await serializeCacheRecording(recording)
1512
1512
  }, {
@@ -1940,7 +1940,7 @@ async function traceSpanInternal(info, fn) {
1940
1940
  operationName: info.name,
1941
1941
  spanName: info.name,
1942
1942
  spanKind: info.kind,
1943
- storedAt: (/* @__PURE__ */ new Date()).toISOString(),
1943
+ storedAt: new Date(getRealDateNowMs()).toISOString(),
1944
1944
  codeFingerprint: ctx.codeFingerprint,
1945
1945
  recording: await serializeCacheRecording(recording)
1946
1946
  };
@@ -2541,6 +2541,12 @@ const evalStatItemSchema = z.discriminatedUnion("kind", [
2541
2541
  const evalStatsConfigSchema = z.array(evalStatItemSchema);
2542
2542
  /** Schema summarizing a discovered eval for list and overview screens. */
2543
2543
  const evalSummarySchema = z.object({
2544
+ /**
2545
+ * Stable eval identity derived from the workspace-relative file path and
2546
+ * authored eval id. Older clients should display `id`; callers that need an
2547
+ * exact eval must use `key`.
2548
+ */
2549
+ key: z.string().default(""),
2544
2550
  id: z.string(),
2545
2551
  title: z.string().optional(),
2546
2552
  /** Eval file path relative to the active workspace root. */
@@ -2580,6 +2586,16 @@ const evalSummarySchema = z.object({
2580
2586
  });
2581
2587
  /** Schema for one case row in an eval run result table. */
2582
2588
  const caseRowSchema = z.object({
2589
+ /**
2590
+ * Stable eval identity for this case row. Legacy rows may omit it and fall
2591
+ * back to `evalId`.
2592
+ */
2593
+ evalKey: z.string().optional(),
2594
+ /**
2595
+ * Stable case identity derived from file path, eval id, and case id. Legacy
2596
+ * rows may omit it and fall back to `caseId`.
2597
+ */
2598
+ caseKey: z.string().optional(),
2583
2599
  caseId: z.string(),
2584
2600
  evalId: z.string(),
2585
2601
  status: z.enum([
@@ -2657,6 +2673,10 @@ const scoreTraceSchema = z.object({
2657
2673
  });
2658
2674
  /** Schema for the detailed payload shown when opening a specific case. */
2659
2675
  const caseDetailSchema = z.object({
2676
+ /** Stable eval identity for this case detail. */
2677
+ evalKey: z.string().optional(),
2678
+ /** Stable case identity for this case detail. */
2679
+ caseKey: z.string().optional(),
2660
2680
  caseId: z.string(),
2661
2681
  evalId: z.string(),
2662
2682
  status: z.enum([
@@ -2694,6 +2714,36 @@ const caseDetailSchema = z.object({
2694
2714
  */
2695
2715
  cacheRefs: z.array(traceCacheRefSchema).default([])
2696
2716
  });
2717
+ /** Schema for discovery problems that should be shown before running evals. */
2718
+ const discoveryIssueSchema = z.object({
2719
+ type: z.enum(["duplicate-eval-id"]),
2720
+ severity: z.enum(["error"]),
2721
+ filePath: z.string(),
2722
+ evalId: z.string(),
2723
+ message: z.string()
2724
+ });
2725
+ //#endregion
2726
+ //#region ../shared/src/evalIdentity.ts
2727
+ /** Build the stable identity for one eval inside a workspace. */
2728
+ function buildEvalKey(params) {
2729
+ return `${encodeURIComponent(params.filePath)}#${encodeURIComponent(params.evalId)}`;
2730
+ }
2731
+ /** Build the stable identity for one eval case inside a workspace. */
2732
+ function buildCaseKey(params) {
2733
+ return [
2734
+ encodeURIComponent(params.filePath),
2735
+ encodeURIComponent(params.evalId),
2736
+ encodeURIComponent(params.caseId)
2737
+ ].join("#");
2738
+ }
2739
+ /** Return the collision-safe eval key stored on a row, falling back for legacy data. */
2740
+ function getCaseRowEvalKey(row) {
2741
+ return row.evalKey ?? row.evalId;
2742
+ }
2743
+ /** Return the collision-safe case key stored on a row, falling back for legacy data. */
2744
+ function getCaseRowCaseKey(row) {
2745
+ return row.caseKey ?? row.caseId;
2746
+ }
2697
2747
  //#endregion
2698
2748
  //#region ../shared/src/schemas/config.ts
2699
2749
  /** Strategy used to collapse repeated trials into one stored case result. */
@@ -3072,8 +3122,8 @@ const runManifestSchema = z.object({
3072
3122
  */
3073
3123
  commitSha: z.string().nullable().optional().default(null),
3074
3124
  /**
3075
- * Eval-file fingerprints captured for this run, keyed by eval id. Older
3076
- * persisted runs may not include this field.
3125
+ * Eval-file fingerprints captured for this run, keyed by exact eval key.
3126
+ * Older persisted runs may use authored eval ids or omit this field.
3077
3127
  */
3078
3128
  evalSourceFingerprints: z.record(z.string(), z.string()).optional().default({}),
3079
3129
  target: z.object({
@@ -3082,6 +3132,10 @@ const runManifestSchema = z.object({
3082
3132
  "evalIds",
3083
3133
  "caseIds"
3084
3134
  ]),
3135
+ /** Exact stable eval identities (`filePath + evalId`) selected by UI/API callers. */
3136
+ evalKeys: z.array(z.string()).optional(),
3137
+ /** Workspace-relative file paths or glob patterns used to filter selected evals. */
3138
+ files: z.array(z.string()).optional(),
3085
3139
  evalIds: z.array(z.string()).optional(),
3086
3140
  caseIds: z.array(z.string()).optional()
3087
3141
  }),
@@ -3808,6 +3862,10 @@ const createRunRequestSchema = z.object({
3808
3862
  "evalIds",
3809
3863
  "caseIds"
3810
3864
  ]),
3865
+ /** Exact stable eval identities (`filePath + evalId`) selected by UI/API callers. */
3866
+ evalKeys: z.array(z.string()).optional(),
3867
+ /** Workspace-relative file paths or glob patterns used to filter selected evals. */
3868
+ files: z.array(z.string()).optional(),
3811
3869
  evalIds: z.array(z.string()).optional(),
3812
3870
  caseIds: z.array(z.string()).optional()
3813
3871
  }),
@@ -4778,7 +4836,8 @@ function addDefaultOutputs(params) {
4778
4836
  //#region ../runner/src/discovery.ts
4779
4837
  const evalIdMatchRegex = /\bid\s*:\s*['"]([^'"]+)['"]/;
4780
4838
  const evalTitleMatchRegex = /\btitle\s*:\s*['"]([^'"]+)['"]/;
4781
- function parseEvalMetas(filePath, content) {
4839
+ /** Parse static eval metadata and discovery issues from one eval file. */
4840
+ function parseEvalDiscovery(filePath, content) {
4782
4841
  const metas = [];
4783
4842
  let searchIndex = 0;
4784
4843
  while (searchIndex < content.length) {
@@ -4801,7 +4860,20 @@ function parseEvalMetas(filePath, content) {
4801
4860
  }
4802
4861
  searchIndex = extracted.nextIndex;
4803
4862
  }
4804
- return metas;
4863
+ const countsById = /* @__PURE__ */ new Map();
4864
+ for (const meta of metas) countsById.set(meta.id, (countsById.get(meta.id) ?? 0) + 1);
4865
+ const duplicateIds = new Set([...countsById].filter(([, count]) => count > 1).map(([id]) => id));
4866
+ const issues = [...duplicateIds].map((evalId) => ({
4867
+ type: "duplicate-eval-id",
4868
+ severity: "error",
4869
+ filePath,
4870
+ evalId,
4871
+ message: `Duplicate eval id "${evalId}" in ${filePath}. Eval ids must be unique within one file.`
4872
+ }));
4873
+ return {
4874
+ metas: metas.filter((meta) => !duplicateIds.has(meta.id)),
4875
+ issues
4876
+ };
4805
4877
  }
4806
4878
  function extractDefineEvalObject(content, defineEvalIndex) {
4807
4879
  const openParenIndex = content.indexOf("(", defineEvalIndex);
@@ -4920,40 +4992,6 @@ function getRunFreshnessTimestamp(manifest) {
4920
4992
  return manifest.endedAt ?? manifest.startedAt;
4921
4993
  }
4922
4994
  //#endregion
4923
- //#region ../runner/src/evalSummaries.ts
4924
- /** Build the API/UI summary payload for one discovered eval. */
4925
- function buildEvalSummary(params) {
4926
- const { meta, config, gitState, latestRun, lastRunStatus } = params;
4927
- const { sourceFingerprint, ...summaryMeta } = meta;
4928
- const freshness = deriveEvalFreshness({
4929
- latestRun,
4930
- gitState,
4931
- currentEvalSourceFingerprint: sourceFingerprint,
4932
- staleAfterDays: config.staleAfterDays ?? 14
4933
- });
4934
- return {
4935
- ...summaryMeta,
4936
- stale: freshness.stale,
4937
- outdated: freshness.outdated,
4938
- freshnessStatus: freshness.freshnessStatus,
4939
- latestRunAt: latestRun?.startedAt ?? null,
4940
- latestRunCommitSha: latestRun?.commitSha ?? null,
4941
- currentCommitSha: gitState.commitSha,
4942
- lastRunStatus
4943
- };
4944
- }
4945
- /** Resolve which eval ids a run request should mark as the latest run. */
4946
- function getTargetEvalIds(params) {
4947
- const { request, sortedEvalIds, knownEvalIds } = params;
4948
- if (request.target.evalIds && request.target.evalIds.length > 0) return request.target.evalIds.filter((evalId) => knownEvalIds.has(evalId));
4949
- return sortedEvalIds;
4950
- }
4951
- /** Write one latest-run snapshot to each targeted eval id. */
4952
- function setLatestRunInfoMap(params) {
4953
- const { latestRunInfoMap, evalIds, info } = params;
4954
- for (const evalId of evalIds) latestRunInfoMap.set(evalId, info);
4955
- }
4956
- //#endregion
4957
4995
  //#region ../runner/src/outputArtifacts.ts
4958
4996
  const mimeTypeExtensionMap = {
4959
4997
  "application/json": ".json",
@@ -5064,9 +5102,9 @@ function recomputePersistedCaseStatus(caseRow, caseDetail, scoreThresholds) {
5064
5102
  return caseRow.status === "error" ? "error" : "pass";
5065
5103
  }
5066
5104
  function runTouchesEval(params) {
5067
- if (params.caseRows.some((caseRow) => caseRow.evalId === params.evalId)) return true;
5105
+ if (params.caseRows.some((caseRow) => getCaseRowEvalKey(caseRow) === params.evalKey || caseRow.evalKey === void 0 && caseRow.evalId === params.evalId)) return true;
5068
5106
  if (params.target.mode === "all") return params.evalExists;
5069
- if (params.target.mode === "evalIds") return params.target.evalIds?.includes(params.evalId) ?? false;
5107
+ if (params.target.mode === "evalIds") return params.target.evalKeys?.includes(params.evalKey) ?? params.target.evalIds?.includes(params.evalId ?? params.evalKey) ?? false;
5070
5108
  return false;
5071
5109
  }
5072
5110
  async function recomputeEvalStatusesInRuns(params) {
@@ -5075,14 +5113,15 @@ async function recomputeEvalStatusesInRuns(params) {
5075
5113
  if (!runTouchesEval({
5076
5114
  target: run.manifest.target,
5077
5115
  caseRows: run.cases,
5116
+ evalKey: params.evalKey,
5078
5117
  evalId: params.evalId,
5079
5118
  evalExists: params.evalExists
5080
5119
  })) continue;
5081
5120
  if (run.manifest.status === "running") continue;
5082
5121
  let changed = false;
5083
5122
  for (const caseRow of run.cases) {
5084
- if (caseRow.evalId !== params.evalId) continue;
5085
- const caseDetail = run.caseDetails.get(caseRow.caseId);
5123
+ if (getCaseRowEvalKey(caseRow) !== params.evalKey && !(caseRow.evalKey === void 0 && caseRow.evalId === params.evalId)) continue;
5124
+ const caseDetail = run.caseDetails.get(getCaseRowCaseKey(caseRow));
5086
5125
  const nextStatus = recomputePersistedCaseStatus(caseRow, caseDetail, params.scoreThresholds);
5087
5126
  if (caseRow.status === nextStatus) continue;
5088
5127
  caseRow.status = nextStatus;
@@ -5150,8 +5189,8 @@ async function loadPersistedRunSnapshots(localStateDir) {
5150
5189
  }
5151
5190
  return snapshots;
5152
5191
  }
5153
- async function persistCaseDetail(runDir, caseDetail) {
5154
- await writeFile(join(runDir, "case-details", `${encodeCaseDetailFileName(caseDetail.caseId)}.json`), JSON.stringify(caseDetail, null, 2));
5192
+ async function persistCaseDetail(runDir, caseDetail, fileId = caseDetail.caseId) {
5193
+ await writeFile(join(runDir, "case-details", `${encodeCaseDetailFileName(fileId)}.json`), JSON.stringify(caseDetail, null, 2));
5155
5194
  }
5156
5195
  function getLastRunStatuses(params) {
5157
5196
  const latestRunInfos = getLatestRunInfos(params);
@@ -5164,14 +5203,15 @@ function getLastRunStatuses(params) {
5164
5203
  function getLatestRunInfos(params) {
5165
5204
  const { runs, knownEvals } = params;
5166
5205
  const knownEvalMetas = [...knownEvals];
5167
- const manualScoreKeysByEval = new Map(knownEvalMetas.map((evalMeta) => [evalMeta.id, evalMeta.columnDefs.filter((columnDef) => columnDef.isManualScore === true).map((columnDef) => columnDef.key)]));
5206
+ const evalIdByKey = new Map(knownEvalMetas.map((evalMeta) => [evalMeta.key, evalMeta.id]));
5207
+ const manualScoreKeysByEval = new Map(knownEvalMetas.map((evalMeta) => [evalMeta.key, evalMeta.columnDefs.filter((columnDef) => columnDef.isManualScore === true).map((columnDef) => columnDef.key)]));
5168
5208
  const orderedRuns = [...runs].toSorted((a, b) => new Date(getRunFreshnessTimestamp(a.manifest)).getTime() - new Date(getRunFreshnessTimestamp(b.manifest)).getTime());
5169
5209
  const latestRunInfos = /* @__PURE__ */ new Map();
5170
- for (const run of orderedRuns) for (const evalId of getRunEvalIds(run, knownEvalMetas.map((evalMeta) => evalMeta.id))) latestRunInfos.set(evalId, {
5171
- status: getEvalStatusForRun(run, evalId, manualScoreKeysByEval.get(evalId) ?? []),
5210
+ for (const run of orderedRuns) for (const evalKey of getRunEvalKeys(run, knownEvalMetas)) latestRunInfos.set(evalKey, {
5211
+ status: getEvalStatusForRun(run, evalKey, evalIdByKey.get(evalKey), manualScoreKeysByEval.get(evalKey) ?? []),
5172
5212
  startedAt: getRunFreshnessTimestamp(run.manifest),
5173
5213
  commitSha: run.manifest.commitSha ?? null,
5174
- evalSourceFingerprint: run.manifest.evalSourceFingerprints[evalId] ?? null
5214
+ evalSourceFingerprint: run.manifest.evalSourceFingerprints[evalKey] ?? run.manifest.evalSourceFingerprints[evalIdByKey.get(evalKey) ?? ""] ?? null
5175
5215
  });
5176
5216
  return latestRunInfos;
5177
5217
  }
@@ -5224,18 +5264,25 @@ async function readCaseDetails(runDir) {
5224
5264
  if (!entry.isFile() || !entry.name.endsWith(".json")) continue;
5225
5265
  const detail = await readParsedJsonFile(join(detailsDir, entry.name), { safeParse: caseDetailSchema.safeParse.bind(caseDetailSchema) });
5226
5266
  if (!detail) continue;
5227
- caseDetails.set(detail.caseId, detail);
5267
+ caseDetails.set(detail.caseKey ?? detail.caseId, detail);
5228
5268
  }
5229
5269
  return caseDetails;
5230
5270
  }
5231
- function getRunEvalIds(run, knownEvalIds) {
5232
- const evalIds = new Set(run.cases.map((caseRow) => caseRow.evalId));
5233
- if (run.manifest.target.mode === "evalIds") for (const evalId of run.manifest.target.evalIds ?? []) evalIds.add(evalId);
5234
- else if (run.manifest.target.mode === "all" && evalIds.size === 0) for (const evalId of knownEvalIds) evalIds.add(evalId);
5235
- return [...evalIds];
5236
- }
5237
- function getEvalStatusForRun(run, evalId, manualScoreKeys) {
5238
- const evalCases = run.cases.filter((caseRow) => caseRow.evalId === evalId);
5271
+ function getRunEvalKeys(run, knownEvals) {
5272
+ const knownEvalMetas = [...knownEvals];
5273
+ const evalKeys = new Set(run.cases.map(getCaseRowEvalKey));
5274
+ for (const caseRow of run.cases) {
5275
+ if (caseRow.evalKey !== void 0) continue;
5276
+ for (const evalMeta of knownEvalMetas) if (evalMeta.id === caseRow.evalId) evalKeys.add(evalMeta.key);
5277
+ }
5278
+ if (run.manifest.target.mode === "evalIds") {
5279
+ for (const evalKey of run.manifest.target.evalKeys ?? []) evalKeys.add(evalKey);
5280
+ for (const evalId of run.manifest.target.evalIds ?? []) for (const evalMeta of knownEvalMetas) if (evalMeta.id === evalId) evalKeys.add(evalMeta.key);
5281
+ } else if (run.manifest.target.mode === "all" && evalKeys.size === 0) for (const evalMeta of knownEvalMetas) evalKeys.add(evalMeta.key);
5282
+ return [...evalKeys];
5283
+ }
5284
+ function getEvalStatusForRun(run, evalKey, evalId, manualScoreKeys) {
5285
+ const evalCases = run.cases.filter((caseRow) => getCaseRowEvalKey(caseRow) === evalKey || caseRow.evalKey === void 0 && caseRow.evalId === evalId);
5239
5286
  if (evalCases.length > 0) {
5240
5287
  if (hasPendingManualScores(evalCases, manualScoreKeys)) return "unscored";
5241
5288
  return toLastRunStatus$1(deriveStatusFromCaseRows({ caseRows: evalCases }));
@@ -5406,8 +5453,7 @@ function resolveTracePresentation(spans, globalTraceDisplay, evalTraceDisplay) {
5406
5453
  }
5407
5454
  //#endregion
5408
5455
  //#region ../runner/src/runExecution.ts
5409
- function filterEvalCases(cases, evalIds, caseIds, evalId) {
5410
- if (evalIds && evalIds.length > 0 && !evalIds.includes(evalId)) return [];
5456
+ function filterEvalCases(cases, caseIds) {
5411
5457
  if (!caseIds || caseIds.length === 0) return cases;
5412
5458
  const selectedCaseIds = new Set(caseIds);
5413
5459
  return cases.filter((evalCase) => selectedCaseIds.has(evalCase.id));
@@ -5436,13 +5482,18 @@ async function callWithUnknownResult(fn, args) {
5436
5482
  return await Reflect.apply(fn, void 0, args);
5437
5483
  }
5438
5484
  async function runCase(params) {
5439
- const { evalDef, evalId, evalCase, globalTraceDisplay, llmCallsConfig = resolveLlmCallsConfig(void 0), apiCallsConfig = resolveApiCallsConfig(void 0), globalRemoveDefaultConfig, trial, startTime, cacheAdapter, cacheMode, codeFingerprint, moduleIsolation, evalFilePath, workspaceRoot, artifactDir, runId } = params;
5485
+ const { evalDef, evalId, evalKey = evalId, evalCase, globalTraceDisplay, llmCallsConfig = resolveLlmCallsConfig(void 0), apiCallsConfig = resolveApiCallsConfig(void 0), globalRemoveDefaultConfig, trial, startTime, cacheAdapter, cacheMode, codeFingerprint, moduleIsolation, evalFilePath, evalFileRelativePath = evalFilePath, workspaceRoot, artifactDir, runId } = params;
5440
5486
  const scopedIdPrefix = buildScopedEvalIdPrefix({
5441
5487
  evalId,
5442
5488
  evalFilePath,
5443
5489
  caseId: evalCase.id,
5444
5490
  workspaceRoot
5445
5491
  });
5492
+ const caseKey = buildCaseKey({
5493
+ filePath: evalFileRelativePath,
5494
+ evalId,
5495
+ caseId: evalCase.id
5496
+ });
5446
5497
  const { scope, error: executeError } = await runInEvalScope(evalCase.id, async () => {
5447
5498
  const execute = async () => {
5448
5499
  await Reflect.apply(evalDef.execute, evalDef, [{
@@ -5608,6 +5659,8 @@ async function runCase(params) {
5608
5659
  stack: nonAssertError.stack
5609
5660
  } : null;
5610
5661
  const caseDetail = {
5662
+ evalKey,
5663
+ caseKey,
5611
5664
  caseId: evalCase.id,
5612
5665
  evalId,
5613
5666
  status,
@@ -5693,6 +5746,56 @@ async function executeQueuedCase(params) {
5693
5746
  await queuedCase.onComplete(result);
5694
5747
  }
5695
5748
  //#endregion
5749
+ //#region ../runner/src/targeting.ts
5750
+ function escapeRegex(value) {
5751
+ return value.replace(/[|\\{}()[\]^$+?.]/g, "\\$&");
5752
+ }
5753
+ function globToRegex(pattern) {
5754
+ const normalized = pattern.replaceAll("\\", "/");
5755
+ let regex = "^";
5756
+ for (let i = 0; i < normalized.length; i++) {
5757
+ const char = normalized[i];
5758
+ const next = normalized[i + 1];
5759
+ if (char === "*" && next === "*") {
5760
+ regex += ".*";
5761
+ i++;
5762
+ } else if (char === "*") regex += "[^/]*";
5763
+ else if (char === "?") regex += "[^/]";
5764
+ else regex += escapeRegex(char ?? "");
5765
+ }
5766
+ regex += "$";
5767
+ return new RegExp(regex);
5768
+ }
5769
+ function fileMatches(pattern, filePath) {
5770
+ const normalizedPattern = pattern.replaceAll("\\", "/");
5771
+ if (normalizedPattern === filePath) return true;
5772
+ return globToRegex(normalizedPattern).test(filePath);
5773
+ }
5774
+ function matchesFiles(evalMeta, files) {
5775
+ if (files === void 0 || files.length === 0) return true;
5776
+ return files.some((file) => fileMatches(file, evalMeta.filePath));
5777
+ }
5778
+ function matchesEvalIds(evalMeta, evalIds) {
5779
+ if (evalIds === void 0 || evalIds.length === 0) return true;
5780
+ return evalIds.includes(evalMeta.id);
5781
+ }
5782
+ function matchesEvalKeys(evalMeta, evalKeys) {
5783
+ if (evalKeys === void 0 || evalKeys.length === 0) return true;
5784
+ return evalKeys.includes(evalMeta.key);
5785
+ }
5786
+ /** Return the discovered evals selected by a run target. */
5787
+ function getTargetEvals(params) {
5788
+ const { target } = params.request;
5789
+ return [...params.evals].filter((evalMeta) => matchesEvalKeys(evalMeta, target.evalKeys)).filter((evalMeta) => matchesEvalIds(evalMeta, target.evalIds)).filter((evalMeta) => matchesFiles(evalMeta, target.files)).toSorted((a, b) => a.filePath.localeCompare(b.filePath));
5790
+ }
5791
+ /** Resolve which exact eval keys a run request can affect. */
5792
+ function getTargetEvalKeys(params) {
5793
+ return getTargetEvals({
5794
+ evals: params.sortedEvals,
5795
+ request: params.request
5796
+ }).map((evalMeta) => evalMeta.key);
5797
+ }
5798
+ //#endregion
5696
5799
  //#region ../runner/src/runOrchestration.ts
5697
5800
  /**
5698
5801
  * Ranks case statuses from worst to best. Used to order trial attempts so the
@@ -5743,6 +5846,20 @@ function formatUnknownErrorDetails(error) {
5743
5846
  if (typeof error === "string") return error;
5744
5847
  return String(error);
5745
5848
  }
5849
+ function findDuplicateCaseIds(cases) {
5850
+ const counts = /* @__PURE__ */ new Map();
5851
+ for (const evalCase of cases) counts.set(evalCase.id, (counts.get(evalCase.id) ?? 0) + 1);
5852
+ return [...counts].filter(([, count]) => count > 1).map(([caseId]) => caseId).toSorted();
5853
+ }
5854
+ function findAmbiguousTargetCaseIds(preparedEvals) {
5855
+ const ownersByCaseId = /* @__PURE__ */ new Map();
5856
+ for (const preparedEval of preparedEvals) for (const preparedCase of preparedEval.preparedCases) {
5857
+ const owners = ownersByCaseId.get(preparedCase.caseId) ?? /* @__PURE__ */ new Set();
5858
+ owners.add(`${preparedEval.evalMeta.filePath}#${preparedEval.evalMeta.id}`);
5859
+ ownersByCaseId.set(preparedCase.caseId, owners);
5860
+ }
5861
+ return [...ownersByCaseId].filter(([, owners]) => owners.size > 1).map(([caseId, owners]) => `${caseId} (${[...owners].join(", ")})`);
5862
+ }
5746
5863
  function buildRunErrorMessage(errors) {
5747
5864
  return errors.map((entry) => {
5748
5865
  const [firstLine, ...detailLines] = entry.details.split("\n");
@@ -5762,14 +5879,15 @@ async function finalizePreparedCase(params) {
5762
5879
  scoreKeys: preparedEval.scoreKeys
5763
5880
  });
5764
5881
  if (winningTrial.bufferedCacheStore !== null) await winningTrial.bufferedCacheStore.commit();
5882
+ const artifactFileId = getCaseArtifactFileId(runState, winningTrial.caseRow);
5765
5883
  runState.cases.push(winningTrial.caseRow);
5766
- runState.caseDetails.set(preparedCase.caseId, winningTrial.caseDetail);
5884
+ runState.caseDetails.set(getCaseRowCaseKey(winningTrial.caseRow), winningTrial.caseDetail);
5767
5885
  preparedEval.mergeColumns(winningTrial.caseDetail.columns);
5768
5886
  if (winningTrial.caseRow.status === "pass") runState.summary.passedCases++;
5769
5887
  else if (winningTrial.caseRow.status === "error") runState.summary.errorCases++;
5770
5888
  else runState.summary.failedCases++;
5771
- await writeFile(join(runDir, "traces", `${preparedCase.caseId}.json`), JSON.stringify(winningTrial.caseDetail.trace, null, 2));
5772
- await persistCaseDetail(runDir, winningTrial.caseDetail);
5889
+ await writeFile(join(runDir, "traces", `${encodeURIComponent(artifactFileId)}.json`), JSON.stringify(winningTrial.caseDetail.trace, null, 2));
5890
+ await persistCaseDetail(runDir, winningTrial.caseDetail, artifactFileId);
5773
5891
  onCaseFinished?.(winningTrial.caseDetail, winningTrial.caseRow);
5774
5892
  emitEvent(runState, {
5775
5893
  type: "case.finished",
@@ -5780,20 +5898,24 @@ async function finalizePreparedCase(params) {
5780
5898
  preparedEval.evalCaseRows.push(winningTrial.caseRow);
5781
5899
  }
5782
5900
  function getPreparedCaseOrderKey(caseRow) {
5783
- return `${caseRow.evalId}\u0000${caseRow.caseId}`;
5901
+ return `${caseRow.evalKey ?? caseRow.evalId}\u0000${caseRow.caseId}`;
5902
+ }
5903
+ function getCaseArtifactFileId(runState, caseRow) {
5904
+ const caseKey = getCaseRowCaseKey(caseRow);
5905
+ return runState.cases.some((existing) => existing.caseId === caseRow.caseId && getCaseRowCaseKey(existing) !== caseKey) ? caseKey : caseRow.caseId;
5784
5906
  }
5785
5907
  function sortCaseRowsByPreparedOrder(caseRows, preparedEvals) {
5786
5908
  const orderByCase = /* @__PURE__ */ new Map();
5787
5909
  let order = 0;
5788
5910
  for (const preparedEval of preparedEvals) for (const preparedCase of preparedEval.preparedCases) {
5789
- orderByCase.set(`${preparedEval.evalMeta.id}\u0000${preparedCase.caseId}`, order);
5911
+ orderByCase.set(`${preparedEval.evalMeta.key}\u0000${preparedCase.caseId}`, order);
5790
5912
  order++;
5791
5913
  }
5792
5914
  caseRows.sort((left, right) => {
5793
5915
  return (orderByCase.get(getPreparedCaseOrderKey(left)) ?? Number.MAX_SAFE_INTEGER) - (orderByCase.get(getPreparedCaseOrderKey(right)) ?? Number.MAX_SAFE_INTEGER);
5794
5916
  });
5795
5917
  }
5796
- async function executeRun({ runState, request, runDir, config, evals, cacheStore, lastRunStatusMap, latestRunInfoMap, emitEvent, emitDiscoveryEvent, workspaceRoot, getSourceFingerprint, getConfiguredConcurrency, getSortedEvalMetas, getTargetEvals, onCaseFinished }) {
5918
+ async function executeRun({ runState, request, runDir, config, cacheStore, lastRunStatusMap, latestRunInfoMap, emitEvent, emitDiscoveryEvent, workspaceRoot, getSourceFingerprint, getConfiguredConcurrency, getSortedEvalMetas, getTargetEvals, onCaseFinished }) {
5797
5919
  try {
5798
5920
  const targetEvals = getTargetEvals(request);
5799
5921
  emitEvent(runState, {
@@ -5822,10 +5944,10 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
5822
5944
  codeFingerprint = "";
5823
5945
  }
5824
5946
  if (codeFingerprint.length > 0) {
5825
- runState.manifest.evalSourceFingerprints[evalMeta.id] = codeFingerprint;
5947
+ runState.manifest.evalSourceFingerprints[evalMeta.key] = codeFingerprint;
5826
5948
  evalMeta.sourceFingerprint = codeFingerprint;
5827
5949
  } else {
5828
- delete runState.manifest.evalSourceFingerprints[evalMeta.id];
5950
+ delete runState.manifest.evalSourceFingerprints[evalMeta.key];
5829
5951
  evalMeta.sourceFingerprint = null;
5830
5952
  }
5831
5953
  try {
@@ -5846,10 +5968,13 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
5846
5968
  await runWithModuleIsolation(moduleIsolation, async () => {
5847
5969
  await runInEvalRuntimeScope("cases", async () => {
5848
5970
  await entry.use(async (evalDef) => {
5849
- const cases = filterEvalCases(resolveRunnableEvalCases({
5971
+ const runnableCases = resolveRunnableEvalCases({
5850
5972
  cases: await runWithEvalClock(evalDef.startTime, async () => typeof evalDef.cases === "function" ? await evalDef.cases() : evalDef.cases ?? [], { freezeTime: evalDef.freezeTime }),
5851
5973
  evalId: evalMeta.id
5852
- }), request.target.evalIds, request.target.caseIds, evalMeta.id);
5974
+ });
5975
+ const duplicateCaseIds = findDuplicateCaseIds(runnableCases);
5976
+ if (duplicateCaseIds.length > 0) throw new Error(`Duplicate case id${duplicateCaseIds.length === 1 ? "" : "s"} in ${evalMeta.filePath}#${evalMeta.id}: ${duplicateCaseIds.join(", ")}`);
5977
+ const cases = filterEvalCases(runnableCases, request.target.caseIds);
5853
5978
  runState.summary.totalCases += cases.length;
5854
5979
  const defaultConfig = resolveEvalDefaultConfig({
5855
5980
  evalDef,
@@ -5895,6 +6020,7 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
5895
6020
  const { caseDetail, caseRowUpdate } = await runCase({
5896
6021
  evalDef,
5897
6022
  evalId: evalMeta.id,
6023
+ evalKey: evalMeta.key,
5898
6024
  evalCase,
5899
6025
  globalTraceDisplay,
5900
6026
  llmCallsConfig,
@@ -5907,6 +6033,7 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
5907
6033
  codeFingerprint,
5908
6034
  moduleIsolation,
5909
6035
  evalFilePath,
6036
+ evalFileRelativePath: evalMeta.filePath,
5910
6037
  workspaceRoot,
5911
6038
  artifactDir: join(runDir, "artifacts"),
5912
6039
  runId: runState.manifest.id
@@ -5916,6 +6043,8 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
5916
6043
  caseRow: {
5917
6044
  caseId: evalCase.id,
5918
6045
  evalId: evalMeta.id,
6046
+ evalKey: evalMeta.key,
6047
+ caseKey: caseDetail.caseKey,
5919
6048
  status: caseRowUpdate.status ?? "pending",
5920
6049
  durationMs: caseRowUpdate.durationMs ?? null,
5921
6050
  columns: caseRowUpdate.columns ?? {},
@@ -5951,16 +6080,23 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
5951
6080
  evalId: evalMeta.id,
5952
6081
  details: formatUnknownErrorDetails(error)
5953
6082
  });
5954
- lastRunStatusMap.set(evalMeta.id, "error");
5955
- latestRunInfoMap.set(evalMeta.id, {
6083
+ lastRunStatusMap.set(evalMeta.key, "error");
6084
+ latestRunInfoMap.set(evalMeta.key, {
5956
6085
  status: "error",
5957
6086
  startedAt: runState.manifest.endedAt ?? runState.manifest.startedAt,
5958
6087
  commitSha: runState.manifest.commitSha ?? null,
5959
- evalSourceFingerprint: runState.manifest.evalSourceFingerprints[evalMeta.id] ?? null
6088
+ evalSourceFingerprint: runState.manifest.evalSourceFingerprints[evalMeta.key] ?? null
5960
6089
  });
5961
6090
  }
5962
6091
  }
5963
- await executeQueuedCases({
6092
+ const ambiguousCaseTargets = request.target.caseIds && request.target.caseIds.length > 0 ? findAmbiguousTargetCaseIds(preparedEvals) : [];
6093
+ if (ambiguousCaseTargets.length > 0) {
6094
+ queuedCases.length = 0;
6095
+ evalErrors.push({
6096
+ evalId: "target",
6097
+ details: `Ambiguous --case target. Narrow it with --file and/or --eval: ${ambiguousCaseTargets.join("; ")}`
6098
+ });
6099
+ } else await executeQueuedCases({
5964
6100
  queuedCases,
5965
6101
  concurrency: getConfiguredConcurrency(),
5966
6102
  globalTraceDisplay: config.traceDisplay
@@ -5975,13 +6111,13 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
5975
6111
  emitEvent
5976
6112
  });
5977
6113
  preparedEval.evalMeta.columnDefs = [...preparedEval.accumulatedColumns.values()];
5978
- lastRunStatusMap.set(preparedEval.evalMeta.id, toLastRunStatus(deriveStatusFromCaseRows({ caseRows: preparedEval.evalCaseRows })));
5979
- const latestStatus = lastRunStatusMap.get(preparedEval.evalMeta.id) ?? null;
5980
- latestRunInfoMap.set(preparedEval.evalMeta.id, {
6114
+ lastRunStatusMap.set(preparedEval.evalMeta.key, toLastRunStatus(deriveStatusFromCaseRows({ caseRows: preparedEval.evalCaseRows })));
6115
+ const latestStatus = lastRunStatusMap.get(preparedEval.evalMeta.key) ?? null;
6116
+ latestRunInfoMap.set(preparedEval.evalMeta.key, {
5981
6117
  status: latestStatus,
5982
6118
  startedAt: runState.manifest.endedAt ?? runState.manifest.startedAt,
5983
6119
  commitSha: runState.manifest.commitSha ?? null,
5984
- evalSourceFingerprint: runState.manifest.evalSourceFingerprints[preparedEval.evalMeta.id] ?? null
6120
+ evalSourceFingerprint: runState.manifest.evalSourceFingerprints[preparedEval.evalMeta.key] ?? null
5985
6121
  });
5986
6122
  }
5987
6123
  sortCaseRowsByPreparedOrder(runState.cases, preparedEvals);
@@ -5994,20 +6130,19 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
5994
6130
  const completedRunAt = endTime.toISOString();
5995
6131
  runState.manifest.endedAt = completedRunAt;
5996
6132
  runState.summary.errorMessage = evalErrors.length > 0 ? buildRunErrorMessage(evalErrors) : null;
5997
- for (const evalId of getTargetEvalIds({
6133
+ for (const evalKey of getTargetEvalKeys({
5998
6134
  request,
5999
- sortedEvalIds: getSortedEvalMetas().map((meta) => meta.id),
6000
- knownEvalIds: new Set(evals.keys())
6135
+ sortedEvals: getSortedEvalMetas()
6001
6136
  })) {
6002
- const latestStatus = lastRunStatusMap.get(evalId) ?? toLastRunStatus(deriveStatusFromCaseRows({
6137
+ const latestStatus = lastRunStatusMap.get(evalKey) ?? toLastRunStatus(deriveStatusFromCaseRows({
6003
6138
  caseRows: [],
6004
6139
  lifecycleStatus: runState.manifest.status
6005
6140
  }));
6006
- latestRunInfoMap.set(evalId, {
6141
+ latestRunInfoMap.set(evalKey, {
6007
6142
  status: latestStatus,
6008
6143
  startedAt: completedRunAt,
6009
6144
  commitSha: runState.manifest.commitSha ?? null,
6010
- evalSourceFingerprint: runState.manifest.evalSourceFingerprints[evalId] ?? null
6145
+ evalSourceFingerprint: runState.manifest.evalSourceFingerprints[evalKey] ?? null
6011
6146
  });
6012
6147
  }
6013
6148
  await persistRunState(runState);
@@ -6050,4 +6185,4 @@ function toLastRunStatus(status) {
6050
6185
  return status === "pending" ? null : status;
6051
6186
  }
6052
6187
  //#endregion
6053
- export { llmCallsConfigSchema as $, columnFormatSchema as $t, extractApiCalls as A, runInEvalRuntimeScope as An, cacheEntryWithDebugKeySchema as At, runSummarySchema as B, traceCacheRefSchema as Bt, validateCharts as C, getCurrentScope as Cn, evalChartMetricSchema as Ct, sseEnvelopeSchema as D, isInEvalScope as Dn, cacheDebugKeyEntrySchema as Dt, updateManualScoreRequestSchema as E, incrementEvalOutput as En, evalChartsConfigSchema as Et, getEvalDisplayStatus as F, startEvalBackgroundJob as Fn, cacheRecordingOpSchema as Ft, apiCallMetricPlacementSchema as G, traceDisplayConfigSchema as Gt, DEFAULT_LLM_CALLS_CONFIG as H, traceAttributeDisplayInputSchema as Ht, deriveScopedSummaryFromCases as I, repoFile as In, cacheRecordingSchema as It, defaultConfigKeySchema as J, traceSpanKindSchema as Jt, apiCallMetricSchema as K, traceDisplayInputConfigSchema as Kt, deriveStatusFromCaseRows as L, defineEval as Ln, cacheStatusSchema as Lt, applyDerivedCallAttributes as M, runInExistingEvalScope as Mn, cacheListItemSchema as Mt, getNestedAttribute as N, setEvalOutput as Nn, cacheModeSchema as Nt, extractCacheEntries as O, mergeEvalOutput as On, cacheDebugKeyFileSchema as Ot, getEvalTitle as P, setScopeCacheContext as Pn, cacheOperationTypeSchema as Pt, llmCallPricingSchema as Q, columnDefSchema as Qt, deriveStatusFromChildStatuses as R, getEvalRegistry as Rn, serializedCacheSpanSchema as Rt, normalizeScoreDef as S, evalLog as Sn, evalChartConfigSchema as St, createRunRequestSchema as T, getEvalStartTime as Tn, evalChartTypeSchema as Tt, agentEvalsConfigSchema as U, traceAttributeDisplayPlacementSchema as Ut, DEFAULT_API_CALLS_CONFIG as V, traceAttributeDisplayFormatSchema as Vt, apiCallMetricFormatSchema as W, traceAttributeDisplaySchema as Wt, llmCallMetricPlacementSchema as X, traceSpanWarningSchema as Xt, llmCallMetricFormatSchema as Y, traceSpanSchema as Yt, llmCallMetricSchema as Z, cellValueSchema as Zt, loadEvalModule as _, EvalAssertionError as _n, scoreTraceSchema as _t, loadPersistedRunSnapshot as a, runArtifactRefSchema as an, assertionFailureSchema as at, loadConfig as b, configureEvalRunLogs as bn, evalChartBuiltinMetricSchema as bt, persistCaseDetail as c, captureEvalSpanError as cn, evalFreshnessStatusSchema as ct, recomputePersistedCaseStatus as d, hashCacheKey as dn, evalStatsConfigSchema as dt, columnKindSchema as en, removeDefaultConfigSchema as et, runTouchesEval as f, hashCacheKeySync as fn, evalSummarySchema as ft, setLatestRunInfoMap as g, serializeCacheValue as gn, runLogPhaseSchema as gt, getTargetEvalIds as h, serializeCacheRecording as hn, runLogLocationSchema as ht, getLatestRunInfos as i, repoFileRefSchema as in, trialSelectionModeSchema as it, extractLlmCalls as j, runInEvalScope as jn, cacheFileSchema as jt, extractCacheHits as k, nextEvalId as kn, cacheEntrySchema as kt, persistRunState as l, evalSpan as ln, evalStatAggregateSchema as lt, buildEvalSummary as m, deserializeCacheValue as mn, runLogLevelSchema as mt, generateRunId as n, jsonCellSchema as nn, resolveLlmCallsConfig as nt, loadPersistedRunSnapshots as o, z$1 as on, caseDetailSchema as ot, resolveArtifactPath as p, deserializeCacheRecording as pn, runLogEntrySchema as pt, apiCallsConfigSchema as q, traceSpanErrorSchema as qt, getLastRunStatuses as r, numberDisplayOptionsSchema as rn, runLogsConfigSchema as rt, nextShortIdFromSnapshots as s, buildTraceTree as sn, caseRowSchema as st, executeRun as t, fileRefSchema as tn, resolveApiCallsConfig as tt, recomputeEvalStatusesInRuns as u, evalTracer as un, evalStatItemSchema as ut, parseEvalMetas as v, advanceEvalTime as vn, evalChartAggregateSchema as vt, createFsCacheStore as w, getEvalCaseInput as wn, evalChartTooltipExtraSchema as wt, buildDeclaredColumnDefs as x, evalAssert as xn, evalChartColorSchema as xt, resolveEvalDefaultConfig as y, appendToEvalOutput as yn, evalChartAxisSchema as yt, runManifestSchema as z, spanCacheOptionsSchema as zt };
6188
+ export { llmCallsConfigSchema as $, traceSpanKindSchema as $t, extractApiCalls as A, getEvalStartTime as An, evalChartTypeSchema as At, runSummarySchema as B, startEvalBackgroundJob as Bn, cacheRecordingOpSchema as Bt, validateCharts as C, advanceEvalTime as Cn, evalChartAggregateSchema as Ct, sseEnvelopeSchema as D, evalLog as Dn, evalChartConfigSchema as Dt, updateManualScoreRequestSchema as E, evalAssert as En, evalChartColorSchema as Et, getEvalDisplayStatus as F, runInEvalRuntimeScope as Fn, cacheEntryWithDebugKeySchema as Ft, apiCallMetricPlacementSchema as G, traceCacheRefSchema as Gt, DEFAULT_LLM_CALLS_CONFIG as H, defineEval as Hn, cacheStatusSchema as Ht, deriveScopedSummaryFromCases as I, runInEvalScope as In, cacheFileSchema as It, defaultConfigKeySchema as J, traceAttributeDisplayPlacementSchema as Jt, apiCallMetricSchema as K, traceAttributeDisplayFormatSchema as Kt, deriveStatusFromCaseRows as L, runInExistingEvalScope as Ln, cacheListItemSchema as Lt, applyDerivedCallAttributes as M, isInEvalScope as Mn, cacheDebugKeyEntrySchema as Mt, getNestedAttribute as N, mergeEvalOutput as Nn, cacheDebugKeyFileSchema as Nt, extractCacheEntries as O, getCurrentScope as On, evalChartMetricSchema as Ot, getEvalTitle as P, nextEvalId as Pn, cacheEntrySchema as Pt, llmCallPricingSchema as Q, traceSpanErrorSchema as Qt, deriveStatusFromChildStatuses as R, setEvalOutput as Rn, cacheModeSchema as Rt, normalizeScoreDef as S, EvalAssertionError as Sn, scoreTraceSchema as St, createRunRequestSchema as T, configureEvalRunLogs as Tn, evalChartBuiltinMetricSchema as Tt, agentEvalsConfigSchema as U, getEvalRegistry as Un, serializedCacheSpanSchema as Ut, DEFAULT_API_CALLS_CONFIG as V, repoFile as Vn, cacheRecordingSchema as Vt, apiCallMetricFormatSchema as W, spanCacheOptionsSchema as Wt, llmCallMetricPlacementSchema as X, traceDisplayConfigSchema as Xt, llmCallMetricFormatSchema as Y, traceAttributeDisplaySchema as Yt, llmCallMetricSchema as Z, traceDisplayInputConfigSchema as Zt, loadEvalModule as _, hashCacheKeySync as _n, evalSummarySchema as _t, getLastRunStatuses as a, columnKindSchema as an, buildCaseKey as at, loadConfig as b, serializeCacheRecording as bn, runLogLocationSchema as bt, loadPersistedRunSnapshots as c, numberDisplayOptionsSchema as cn, getCaseRowEvalKey as ct, persistRunState as d, z$1 as dn, caseRowSchema as dt, traceSpanSchema as en, removeDefaultConfigSchema as et, recomputeEvalStatusesInRuns as f, buildTraceTree as fn, discoveryIssueSchema as ft, deriveEvalFreshness as g, hashCacheKey as gn, evalStatsConfigSchema as gt, resolveArtifactPath as h, evalTracer as hn, evalStatItemSchema as ht, generateRunId as i, columnFormatSchema as in, trialSelectionModeSchema as it, extractLlmCalls as j, incrementEvalOutput as jn, evalChartsConfigSchema as jt, extractCacheHits as k, getEvalCaseInput as kn, evalChartTooltipExtraSchema as kt, nextShortIdFromSnapshots as l, repoFileRefSchema as ln, assertionFailureSchema as lt, runTouchesEval as m, evalSpan as mn, evalStatAggregateSchema as mt, getTargetEvalKeys as n, cellValueSchema as nn, resolveLlmCallsConfig as nt, getLatestRunInfos as o, fileRefSchema as on, buildEvalKey as ot, recomputePersistedCaseStatus as p, captureEvalSpanError as pn, evalFreshnessStatusSchema as pt, apiCallsConfigSchema as q, traceAttributeDisplayInputSchema as qt, getTargetEvals as r, columnDefSchema as rn, runLogsConfigSchema as rt, loadPersistedRunSnapshot as s, jsonCellSchema as sn, getCaseRowCaseKey as st, executeRun as t, traceSpanWarningSchema as tn, resolveApiCallsConfig as tt, persistCaseDetail as u, runArtifactRefSchema as un, caseDetailSchema as ut, parseEvalDiscovery as v, deserializeCacheRecording as vn, runLogEntrySchema as vt, createFsCacheStore as w, appendToEvalOutput as wn, evalChartAxisSchema as wt, buildDeclaredColumnDefs as x, serializeCacheValue as xn, runLogPhaseSchema as xt, resolveEvalDefaultConfig as y, deserializeCacheValue as yn, runLogLevelSchema as yt, runManifestSchema as z, setScopeCacheContext as zn, cacheOperationTypeSchema as zt };
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-B4SosWgD.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-cj1TkR-H.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-Be0x8CS3.mjs";
2
- import "./src-D6cettg0.mjs";
1
+ import { n as createRunner } from "./cli-ETfZ15RB.mjs";
2
+ import "./src-CyNb2ycA.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance() {
@@ -0,0 +1,3 @@
1
+ import "./runOrchestration-B31SV_Bq.mjs";
2
+ import "./cli-ETfZ15RB.mjs";
3
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.24.0",
3
+ "version": "0.25.0",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -59,8 +59,8 @@
59
59
  "@types/node": "^24.7.2",
60
60
  "typescript": "^5.9.2",
61
61
  "@agent-evals/runner": "0.0.1",
62
- "@agent-evals/shared": "0.0.1",
63
- "@agent-evals/sdk": "0.0.1"
62
+ "@agent-evals/sdk": "0.0.1",
63
+ "@agent-evals/shared": "0.0.1"
64
64
  },
65
65
  "scripts": {
66
66
  "build": "pnpm --filter @agent-evals/web build && tsdown",