@ls-stack/agent-eval 0.13.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,7 +46,9 @@ function repoFile(path, mimeType) {
46
46
  //#endregion
47
47
  //#region ../sdk/src/runtime.ts
48
48
  const scopeStorage = new AsyncLocalStorage();
49
+ const runtimeScopeStorage = new AsyncLocalStorage();
49
50
  let activeEvalScopeCount = 0;
51
+ let activeEvalRuntimeScopeCount = 0;
50
52
  /** Error thrown when an eval assertion fails during case execution. */
51
53
  var EvalAssertionError = class extends Error {
52
54
  constructor(message) {
@@ -60,13 +62,16 @@ function getCurrentScope() {
60
62
  return scopeStorage.getStore();
61
63
  }
62
64
  /**
63
- * Return whether the current async execution is inside an active eval case.
65
+ * Return the current eval runner phase for this async execution.
64
66
  *
65
- * This is useful for shared workflow code that wants to branch on eval-only
66
- * behavior without importing or inspecting the full eval scope.
67
+ * Returns `null` outside eval-owned work, `env` while the runner is loading
68
+ * eval modules for a run, `cases` while generating cases, `eval` while running
69
+ * case `execute`, `derive` while deriving outputs from traces, `outputsSchema`
70
+ * while validating outputs, and `scorer` while computing scores.
67
71
  */
68
72
  function isInEvalScope() {
69
- return getCurrentScope() !== void 0;
73
+ if (activeEvalRuntimeScopeCount === 0) return null;
74
+ return runtimeScopeStorage.getStore() ?? null;
70
75
  }
71
76
  function registerBackgroundJobInScope(scope, promise) {
72
77
  const trackedPromise = promise.then(() => {
@@ -122,6 +127,31 @@ function getEvalCaseInput(path = void 0) {
122
127
  function setScopeCacheContext(scope, context) {
123
128
  scope.cacheContext = context;
124
129
  }
130
+ /** Execute a callback while `isInEvalScope()` reports a runner phase. */
131
+ async function runInEvalRuntimeScope(runtimeScope, fn) {
132
+ activeEvalRuntimeScopeCount++;
133
+ try {
134
+ return await runtimeScopeStorage.run(runtimeScope, fn);
135
+ } finally {
136
+ activeEvalRuntimeScopeCount--;
137
+ }
138
+ }
139
+ /**
140
+ * Execute a callback with an existing case scope and a specific runner phase.
141
+ *
142
+ * Runner-internal helper for post-execute phases that still need access to the
143
+ * completed case scope through output, trace, assertion, and input helpers.
144
+ */
145
+ async function runInExistingEvalScope(scope, runtimeScope, fn) {
146
+ activeEvalScopeCount++;
147
+ try {
148
+ return await scopeStorage.run(scope, async () => {
149
+ return await runInEvalRuntimeScope(runtimeScope, fn);
150
+ });
151
+ } finally {
152
+ activeEvalScopeCount--;
153
+ }
154
+ }
125
155
  /**
126
156
  * Execute a callback inside a fresh eval case scope and capture its outputs,
127
157
  * trace data, and terminal error state.
@@ -144,29 +174,24 @@ async function runInEvalScope(caseId, fn, options = {}) {
144
174
  caseCacheRefs: [],
145
175
  pendingBackgroundJobs: /* @__PURE__ */ new Set()
146
176
  };
147
- activeEvalScopeCount++;
148
- try {
149
- return await scopeStorage.run(scope, async () => {
150
- try {
151
- const result = await fn();
152
- if (options.waitForBackgroundJobs !== false) await drainBackgroundJobs(scope);
153
- return {
154
- result,
155
- scope,
156
- error: void 0
157
- };
158
- } catch (error) {
159
- if (options.waitForBackgroundJobs !== false) await drainBackgroundJobs(scope);
160
- return {
161
- result: void 0,
162
- scope,
163
- error: error instanceof Error ? error : new Error(String(error))
164
- };
165
- }
166
- });
167
- } finally {
168
- activeEvalScopeCount--;
169
- }
177
+ return await runInExistingEvalScope(scope, options.runtimeScope ?? "eval", async () => {
178
+ try {
179
+ const result = await fn();
180
+ if (options.waitForBackgroundJobs !== false) await drainBackgroundJobs(scope);
181
+ return {
182
+ result,
183
+ scope,
184
+ error: void 0
185
+ };
186
+ } catch (error) {
187
+ if (options.waitForBackgroundJobs !== false) await drainBackgroundJobs(scope);
188
+ return {
189
+ result: void 0,
190
+ scope,
191
+ error: error instanceof Error ? error : new Error(String(error))
192
+ };
193
+ }
194
+ });
170
195
  }
171
196
  /**
172
197
  * Return the next deterministic ID for the active eval case execution.
@@ -4886,6 +4911,98 @@ async function loadConfig() {
4886
4911
  }
4887
4912
  }
4888
4913
  //#endregion
4914
+ //#region ../runner/src/discovery.ts
4915
+ const evalIdMatchRegex = /\bid\s*:\s*['"]([^'"]+)['"]/;
4916
+ const evalTitleMatchRegex = /\btitle\s*:\s*['"]([^'"]+)['"]/;
4917
+ function parseEvalMetas(filePath, content) {
4918
+ const metas = [];
4919
+ let searchIndex = 0;
4920
+ while (searchIndex < content.length) {
4921
+ const defineEvalIndex = content.indexOf("defineEval", searchIndex);
4922
+ if (defineEvalIndex === -1) break;
4923
+ const extracted = extractDefineEvalObject(content, defineEvalIndex);
4924
+ if (!extracted) {
4925
+ searchIndex = defineEvalIndex + 10;
4926
+ continue;
4927
+ }
4928
+ const id = evalIdMatchRegex.exec(extracted.objectText)?.[1];
4929
+ if (id !== void 0) {
4930
+ const result = {
4931
+ filePath,
4932
+ id
4933
+ };
4934
+ const title = evalTitleMatchRegex.exec(extracted.objectText)?.[1];
4935
+ if (title !== void 0) result.title = title;
4936
+ metas.push(result);
4937
+ }
4938
+ searchIndex = extracted.nextIndex;
4939
+ }
4940
+ return metas;
4941
+ }
4942
+ function extractDefineEvalObject(content, defineEvalIndex) {
4943
+ const openParenIndex = content.indexOf("(", defineEvalIndex);
4944
+ if (openParenIndex === -1) return void 0;
4945
+ const objectStartIndex = content.indexOf("{", openParenIndex);
4946
+ if (objectStartIndex === -1) return void 0;
4947
+ let depth = 0;
4948
+ let quote;
4949
+ let inBlockComment = false;
4950
+ let inLineComment = false;
4951
+ let isEscaped = false;
4952
+ for (let index = objectStartIndex; index < content.length; index++) {
4953
+ const currentChar = content[index];
4954
+ const nextChar = content[index + 1];
4955
+ if (inLineComment) {
4956
+ if (currentChar === "\n") inLineComment = false;
4957
+ continue;
4958
+ }
4959
+ if (inBlockComment) {
4960
+ if (currentChar === "*" && nextChar === "/") {
4961
+ inBlockComment = false;
4962
+ index++;
4963
+ }
4964
+ continue;
4965
+ }
4966
+ if (quote) {
4967
+ if (isEscaped) {
4968
+ isEscaped = false;
4969
+ continue;
4970
+ }
4971
+ if (currentChar === "\\") {
4972
+ isEscaped = true;
4973
+ continue;
4974
+ }
4975
+ if (currentChar === quote) quote = void 0;
4976
+ continue;
4977
+ }
4978
+ if (currentChar === "/" && nextChar === "/") {
4979
+ inLineComment = true;
4980
+ index++;
4981
+ continue;
4982
+ }
4983
+ if (currentChar === "/" && nextChar === "*") {
4984
+ inBlockComment = true;
4985
+ index++;
4986
+ continue;
4987
+ }
4988
+ if (currentChar === "\"" || currentChar === "'" || currentChar === "`") {
4989
+ quote = currentChar;
4990
+ continue;
4991
+ }
4992
+ if (currentChar === "{") {
4993
+ depth++;
4994
+ continue;
4995
+ }
4996
+ if (currentChar === "}") {
4997
+ depth--;
4998
+ if (depth === 0) return {
4999
+ nextIndex: index + 1,
5000
+ objectText: content.slice(objectStartIndex, index + 1)
5001
+ };
5002
+ }
5003
+ }
5004
+ }
5005
+ //#endregion
4889
5006
  //#region ../runner/src/evalModuleLoader.ts
4890
5007
  /**
4891
5008
  * Import one eval module with a cache key derived from its current source so
@@ -5280,6 +5397,7 @@ const isolationParam = "agent-evals-isolate";
5280
5397
  const pathSegmentSeparatorPattern = /[\\/]+/;
5281
5398
  const isolationStorage = new AsyncLocalStorage();
5282
5399
  const activeIsolationRoots = /* @__PURE__ */ new Map();
5400
+ const clearedRequireCacheKeys = /* @__PURE__ */ new Set();
5283
5401
  let hooksRegistered = false;
5284
5402
  const requireFromRunner = createRequire(import.meta.url);
5285
5403
  const agentPackageUrlBySpecifier = new Map([
@@ -5305,7 +5423,10 @@ function getIsolationKeyFromParent(parentURL) {
5305
5423
  }
5306
5424
  function isWorkspaceFile(url, workspaceRoot) {
5307
5425
  if (url.protocol !== "file:") return false;
5308
- const relativePath = relative(workspaceRoot, fileURLToPath(url));
5426
+ return isWorkspaceFilePath(fileURLToPath(url), workspaceRoot);
5427
+ }
5428
+ function isWorkspaceFilePath(filePath, workspaceRoot) {
5429
+ const relativePath = relative(workspaceRoot, filePath);
5309
5430
  if (relativePath === "" || relativePath.startsWith("..") || isAbsolute(relativePath)) return false;
5310
5431
  const segments = relativePath.split(pathSegmentSeparatorPattern);
5311
5432
  return !segments.includes("node_modules") && !segments.includes(".agent-evals");
@@ -5340,15 +5461,23 @@ function registerModuleIsolationHooks() {
5340
5461
  };
5341
5462
  } });
5342
5463
  }
5464
+ function clearWorkspaceRequireCacheOnce(context) {
5465
+ if (clearedRequireCacheKeys.has(context.key)) return;
5466
+ clearedRequireCacheKeys.add(context.key);
5467
+ for (const filePath of Object.keys(requireFromRunner.cache)) if (isWorkspaceFilePath(filePath, context.workspaceRoot)) delete requireFromRunner.cache[filePath];
5468
+ }
5343
5469
  /**
5344
5470
  * Execute module loading and eval code with fresh workspace module URLs.
5345
5471
  *
5346
5472
  * Node does not expose an ESM cache reset API, so the runner appends a
5347
- * run-scoped query parameter to workspace file imports. Package imports are
5348
- * left alone so SDK singletons, such as the eval registry, remain shared.
5473
+ * run-scoped query parameter to workspace file imports. CommonJS modules use
5474
+ * `require.cache` behind ESM imports, so workspace entries are cleared once per
5475
+ * run. Package imports are left alone so SDK singletons, such as the eval
5476
+ * registry, remain shared.
5349
5477
  */
5350
5478
  async function runWithModuleIsolation(context, fn) {
5351
5479
  registerModuleIsolationHooks();
5480
+ clearWorkspaceRequireCacheOnce(context);
5352
5481
  activeIsolationRoots.set(context.key, context.workspaceRoot);
5353
5482
  return await isolationStorage.run(context, fn);
5354
5483
  }
@@ -5476,20 +5605,26 @@ async function runCase(params) {
5476
5605
  const traceTree = buildTraceTree(scope.spans, scope.checkpoints);
5477
5606
  const nonAssertError = executeError && !(executeError instanceof EvalAssertionError) ? executeError : null;
5478
5607
  if (executeError instanceof EvalAssertionError && scope.assertionFailures.length === 0) scope.assertionFailures.push(toAssertionFailure(executeError.message, executeError));
5479
- if (!nonAssertError && evalDef.deriveFromTracing) try {
5480
- const derived = await callWithUnknownResult(evalDef.deriveFromTracing, [{
5481
- trace: traceTree,
5482
- input: evalCase.input,
5483
- case: evalCase
5484
- }]);
5485
- if (!isRecord(derived)) throw new Error("deriveFromTracing must return an object");
5486
- for (const [key, value] of Object.entries(derived)) if (!(key in scope.outputs)) scope.outputs[key] = value;
5487
- } catch (e) {
5488
- const message = `deriveFromTracing threw: ${e instanceof Error ? e.message : String(e)}`;
5489
- scope.assertionFailures.push(toAssertionFailure(message, e instanceof Error ? e : void 0));
5608
+ if (!nonAssertError && evalDef.deriveFromTracing) {
5609
+ const { deriveFromTracing } = evalDef;
5610
+ try {
5611
+ const derived = await runInExistingEvalScope(scope, "derive", async () => {
5612
+ return await callWithUnknownResult(deriveFromTracing, [{
5613
+ trace: traceTree,
5614
+ input: evalCase.input,
5615
+ case: evalCase
5616
+ }]);
5617
+ });
5618
+ if (!isRecord(derived)) throw new Error("deriveFromTracing must return an object");
5619
+ for (const [key, value] of Object.entries(derived)) if (!(key in scope.outputs)) scope.outputs[key] = value;
5620
+ } catch (e) {
5621
+ const message = `deriveFromTracing threw: ${e instanceof Error ? e.message : String(e)}`;
5622
+ scope.assertionFailures.push(toAssertionFailure(message, e instanceof Error ? e : void 0));
5623
+ }
5490
5624
  }
5491
5625
  if (!nonAssertError && evalDef.outputsSchema) {
5492
- const parsedOutputs = evalDef.outputsSchema.safeParse(getOutputsSchemaInput(evalDef.outputsSchema, scope.outputs));
5626
+ const { outputsSchema } = evalDef;
5627
+ const parsedOutputs = await runInExistingEvalScope(scope, "outputsSchema", () => outputsSchema.safeParse(getOutputsSchemaInput(outputsSchema, scope.outputs)));
5493
5628
  if (parsedOutputs.success) scope.outputs = {
5494
5629
  ...scope.outputs,
5495
5630
  ...parsedOutputs.data
@@ -5511,6 +5646,7 @@ async function runCase(params) {
5511
5646
  }, {
5512
5647
  input: evalCase.input,
5513
5648
  idPrefix: `${scopedIdPrefix}-score-${toStableIdSegment(key)}`,
5649
+ runtimeScope: "scorer",
5514
5650
  cacheContext: cacheAdapter ? {
5515
5651
  adapter: cacheAdapter,
5516
5652
  mode: cacheMode,
@@ -5791,12 +5927,19 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
5791
5927
  } catch {
5792
5928
  codeFingerprint = "";
5793
5929
  }
5794
- if (codeFingerprint.length > 0) runState.manifest.evalSourceFingerprints[evalMeta.id] = codeFingerprint;
5795
- else delete runState.manifest.evalSourceFingerprints[evalMeta.id];
5930
+ if (codeFingerprint.length > 0) {
5931
+ runState.manifest.evalSourceFingerprints[evalMeta.id] = codeFingerprint;
5932
+ evalMeta.sourceFingerprint = codeFingerprint;
5933
+ } else {
5934
+ delete runState.manifest.evalSourceFingerprints[evalMeta.id];
5935
+ evalMeta.sourceFingerprint = null;
5936
+ }
5796
5937
  try {
5797
5938
  const registry = getEvalRegistry();
5798
5939
  await runWithModuleIsolation(moduleIsolation, async () => {
5799
- await loadEvalModule(evalFilePath, codeFingerprint);
5940
+ await runInEvalRuntimeScope("env", async () => {
5941
+ await loadEvalModule(evalFilePath, codeFingerprint);
5942
+ });
5800
5943
  });
5801
5944
  const entry = registry.get(evalMeta.id);
5802
5945
  if (!entry) {
@@ -5807,87 +5950,89 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
5807
5950
  continue;
5808
5951
  }
5809
5952
  await runWithModuleIsolation(moduleIsolation, async () => {
5810
- await entry.use(async (evalDef) => {
5811
- const cases = filterEvalCases(resolveRunnableEvalCases({
5812
- cases: typeof evalDef.cases === "function" ? await evalDef.cases() : evalDef.cases ?? [],
5813
- evalId: evalMeta.id
5814
- }), request.target.evalIds, request.target.caseIds, evalMeta.id);
5815
- runState.summary.totalCases += cases.length;
5816
- const accumulatedColumns = /* @__PURE__ */ new Map();
5817
- const evalCaseRows = [];
5818
- const preparedCases = [];
5819
- const scoreKeys = Object.freeze(Object.keys(evalDef.scores ?? {}));
5820
- const manualScoreKeys = Object.freeze(Object.keys(evalDef.manualScores ?? {}));
5821
- const preparedEval = {
5822
- evalMeta,
5823
- accumulatedColumns,
5824
- evalCaseRows,
5825
- preparedCases,
5826
- scoreKeys: Object.freeze([...scoreKeys, ...manualScoreKeys]),
5827
- mergeColumns: (columns) => {
5828
- mergeColumnDefs(accumulatedColumns, columns, evalDef.columns, evalDef.scores, evalDef.manualScores);
5829
- }
5830
- };
5831
- preparedEvals.push(preparedEval);
5832
- for (const evalCase of cases) {
5833
- const trialResults = [];
5834
- const preparedCase = {
5835
- caseId: evalCase.id,
5836
- trialResults,
5837
- finalized: false
5953
+ await runInEvalRuntimeScope("cases", async () => {
5954
+ await entry.use(async (evalDef) => {
5955
+ const cases = filterEvalCases(resolveRunnableEvalCases({
5956
+ cases: typeof evalDef.cases === "function" ? await evalDef.cases() : evalDef.cases ?? [],
5957
+ evalId: evalMeta.id
5958
+ }), request.target.evalIds, request.target.caseIds, evalMeta.id);
5959
+ runState.summary.totalCases += cases.length;
5960
+ const accumulatedColumns = /* @__PURE__ */ new Map();
5961
+ const evalCaseRows = [];
5962
+ const preparedCases = [];
5963
+ const scoreKeys = Object.freeze(Object.keys(evalDef.scores ?? {}));
5964
+ const manualScoreKeys = Object.freeze(Object.keys(evalDef.manualScores ?? {}));
5965
+ const preparedEval = {
5966
+ evalMeta,
5967
+ accumulatedColumns,
5968
+ evalCaseRows,
5969
+ preparedCases,
5970
+ scoreKeys: Object.freeze([...scoreKeys, ...manualScoreKeys]),
5971
+ mergeColumns: (columns) => {
5972
+ mergeColumnDefs(accumulatedColumns, columns, evalDef.columns, evalDef.scores, evalDef.manualScores);
5973
+ }
5838
5974
  };
5839
- preparedCases.push(preparedCase);
5840
- for (let trial = 0; trial < request.trials; trial++) {
5841
- const bufferedCacheStore = cacheEnabled && cacheMode !== "bypass" ? createBufferedCacheStore(cacheStore) : null;
5842
- queuedCases.push({
5843
- execute: async ({ startTime, globalTraceDisplay }) => {
5844
- const { caseDetail, caseRowUpdate } = await runCase({
5845
- evalDef,
5846
- evalId: evalMeta.id,
5847
- evalCase,
5848
- globalTraceDisplay,
5849
- trial,
5850
- startTime,
5851
- cacheAdapter: bufferedCacheStore ?? (cacheEnabled ? cacheStore : null),
5852
- cacheMode,
5853
- codeFingerprint,
5854
- moduleIsolation,
5855
- evalFilePath,
5856
- workspaceRoot,
5857
- artifactDir: join(runDir, "artifacts"),
5858
- runId: runState.manifest.id
5859
- });
5860
- return {
5861
- caseDetail,
5862
- caseRow: {
5863
- caseId: evalCase.id,
5975
+ preparedEvals.push(preparedEval);
5976
+ for (const evalCase of cases) {
5977
+ const trialResults = [];
5978
+ const preparedCase = {
5979
+ caseId: evalCase.id,
5980
+ trialResults,
5981
+ finalized: false
5982
+ };
5983
+ preparedCases.push(preparedCase);
5984
+ for (let trial = 0; trial < request.trials; trial++) {
5985
+ const bufferedCacheStore = cacheEnabled && cacheMode !== "bypass" ? createBufferedCacheStore(cacheStore) : null;
5986
+ queuedCases.push({
5987
+ execute: async ({ startTime, globalTraceDisplay }) => {
5988
+ const { caseDetail, caseRowUpdate } = await runCase({
5989
+ evalDef,
5864
5990
  evalId: evalMeta.id,
5865
- status: caseRowUpdate.status ?? "pending",
5866
- latencyMs: caseRowUpdate.latencyMs ?? null,
5867
- columns: caseRowUpdate.columns ?? {},
5868
- trial
5869
- }
5870
- };
5871
- },
5872
- onComplete: async ({ caseDetail, caseRow }) => {
5873
- trialResults.push({
5874
- caseDetail,
5875
- caseRow,
5876
- bufferedCacheStore
5877
- });
5878
- if (trialResults.length !== request.trials) return;
5879
- await finalizePreparedCase({
5880
- runState,
5881
- runDir,
5882
- preparedEval,
5883
- preparedCase,
5884
- onCaseFinished,
5885
- emitEvent
5886
- });
5887
- }
5888
- });
5991
+ evalCase,
5992
+ globalTraceDisplay,
5993
+ trial,
5994
+ startTime,
5995
+ cacheAdapter: bufferedCacheStore ?? (cacheEnabled ? cacheStore : null),
5996
+ cacheMode,
5997
+ codeFingerprint,
5998
+ moduleIsolation,
5999
+ evalFilePath,
6000
+ workspaceRoot,
6001
+ artifactDir: join(runDir, "artifacts"),
6002
+ runId: runState.manifest.id
6003
+ });
6004
+ return {
6005
+ caseDetail,
6006
+ caseRow: {
6007
+ caseId: evalCase.id,
6008
+ evalId: evalMeta.id,
6009
+ status: caseRowUpdate.status ?? "pending",
6010
+ latencyMs: caseRowUpdate.latencyMs ?? null,
6011
+ columns: caseRowUpdate.columns ?? {},
6012
+ trial
6013
+ }
6014
+ };
6015
+ },
6016
+ onComplete: async ({ caseDetail, caseRow }) => {
6017
+ trialResults.push({
6018
+ caseDetail,
6019
+ caseRow,
6020
+ bufferedCacheStore
6021
+ });
6022
+ if (trialResults.length !== request.trials) return;
6023
+ await finalizePreparedCase({
6024
+ runState,
6025
+ runDir,
6026
+ preparedEval,
6027
+ preparedCase,
6028
+ onCaseFinished,
6029
+ emitEvent
6030
+ });
6031
+ }
6032
+ });
6033
+ }
5889
6034
  }
5890
- }
6035
+ });
5891
6036
  });
5892
6037
  });
5893
6038
  } catch (error) {
@@ -5995,4 +6140,4 @@ function toLastRunStatus(status) {
5995
6140
  return status === "pending" ? null : status;
5996
6141
  }
5997
6142
  //#endregion
5998
- export { evalFreshnessStatusSchema as $, evalAssert as $t, getEvalDisplayStatus as A, traceDisplayInputConfigSchema as At, apiCallMetricPlacementSchema as B, jsonCellSchema as Bt, updateManualScoreRequestSchema as C, spanCacheOptionsSchema as Ct, extractLlmCalls as D, traceAttributeDisplayPlacementSchema as Dt, extractApiCalls as E, traceAttributeDisplayInputSchema as Et, runSummarySchema as F, cellValueSchema as Ft, llmCallMetricSchema as G, buildTraceTree as Gt, apiCallsConfigSchema as H, repoFileRefSchema as Ht, DEFAULT_API_CALLS_CONFIG as I, columnDefSchema as It, resolveLlmCallsConfig as J, evalTracer as Jt, llmCallsConfigSchema as K, captureEvalSpanError as Kt, DEFAULT_LLM_CALLS_CONFIG as L, columnFormatSchema as Lt, deriveStatusFromCaseRows as M, traceSpanKindSchema as Mt, deriveStatusFromChildStatuses as N, traceSpanSchema as Nt, getNestedAttribute as O, traceAttributeDisplaySchema as Ot, runManifestSchema as P, traceSpanWarningSchema as Pt, caseRowSchema as Q, appendToEvalOutput as Qt, agentEvalsConfigSchema as R, columnKindSchema as Rt, createRunRequestSchema as S, serializedCacheSpanSchema as St, extractCacheHits as T, traceAttributeDisplayFormatSchema as Tt, llmCallMetricFormatSchema as U, runArtifactRefSchema as Ut, apiCallMetricSchema as V, numberDisplayOptionsSchema as Vt, llmCallMetricPlacementSchema as W, z$1 as Wt, assertionFailureSchema as X, hashCacheKeySync as Xt, trialSelectionModeSchema as Y, hashCacheKey as Yt, caseDetailSchema as Z, EvalAssertionError as Zt, loadEvalModule as _, cacheModeSchema as _t, loadPersistedRunSnapshot as a, nextEvalId as an, evalChartAggregateSchema as at, normalizeScoreDef as b, cacheRecordingSchema as bt, persistCaseDetail as c, setScopeCacheContext as cn, evalChartColorSchema as ct, recomputePersistedCaseStatus as d, defineEval as dn, evalChartTooltipExtraSchema as dt, getCurrentScope as en, evalStatAggregateSchema as et, runTouchesEval as f, getEvalRegistry as fn, evalChartTypeSchema as ft, setLatestRunInfoMap as g, cacheListItemSchema as gt, getTargetEvalIds as h, cacheFileSchema as ht, getLatestRunInfos as i, mergeEvalOutput as in, scoreTraceSchema as it, deriveScopedSummaryFromCases as j, traceSpanErrorSchema as jt, getEvalTitle as k, traceDisplayConfigSchema as kt, persistRunState as l, startEvalBackgroundJob as ln, evalChartConfigSchema as lt, buildEvalSummary as m, cacheEntrySchema as mt, generateRunId as n, incrementEvalOutput as nn, evalStatsConfigSchema as nt, loadPersistedRunSnapshots as o, runInEvalScope as on, evalChartAxisSchema as ot, resolveArtifactPath as p, evalChartsConfigSchema as pt, resolveApiCallsConfig as q, evalSpan as qt, getLastRunStatuses as r, isInEvalScope as rn, evalSummarySchema as rt, nextShortIdFromSnapshots as s, setEvalOutput as sn, evalChartBuiltinMetricSchema as st, executeRun as t, getEvalCaseInput as tn, evalStatItemSchema as tt, recomputeEvalStatusesInRuns as u, repoFile as un, evalChartMetricSchema as ut, loadConfig as v, cacheOperationTypeSchema as vt, sseEnvelopeSchema as w, traceCacheRefSchema as wt, createFsCacheStore as x, cacheStatusSchema as xt, buildDeclaredColumnDefs as y, cacheRecordingOpSchema as yt, apiCallMetricFormatSchema as z, fileRefSchema as zt };
6143
+ export { caseRowSchema as $, appendToEvalOutput as $t, getEvalTitle as A, traceDisplayConfigSchema as At, apiCallMetricFormatSchema as B, fileRefSchema as Bt, createRunRequestSchema as C, serializedCacheSpanSchema as Ct, extractApiCalls as D, traceAttributeDisplayInputSchema as Dt, extractCacheHits as E, traceAttributeDisplayFormatSchema as Et, runManifestSchema as F, traceSpanWarningSchema as Ft, llmCallMetricPlacementSchema as G, z$1 as Gt, apiCallMetricSchema as H, numberDisplayOptionsSchema as Ht, runSummarySchema as I, cellValueSchema as It, resolveApiCallsConfig as J, evalSpan as Jt, llmCallMetricSchema as K, buildTraceTree as Kt, DEFAULT_API_CALLS_CONFIG as L, columnDefSchema as Lt, deriveScopedSummaryFromCases as M, traceSpanErrorSchema as Mt, deriveStatusFromCaseRows as N, traceSpanKindSchema as Nt, extractLlmCalls as O, traceAttributeDisplayPlacementSchema as Ot, deriveStatusFromChildStatuses as P, traceSpanSchema as Pt, caseDetailSchema as Q, EvalAssertionError as Qt, DEFAULT_LLM_CALLS_CONFIG as R, columnFormatSchema as Rt, createFsCacheStore as S, cacheStatusSchema as St, sseEnvelopeSchema as T, traceCacheRefSchema as Tt, apiCallsConfigSchema as U, repoFileRefSchema as Ut, apiCallMetricPlacementSchema as V, jsonCellSchema as Vt, llmCallMetricFormatSchema as W, runArtifactRefSchema as Wt, trialSelectionModeSchema as X, hashCacheKey as Xt, resolveLlmCallsConfig as Y, evalTracer as Yt, assertionFailureSchema as Z, hashCacheKeySync as Zt, loadEvalModule as _, cacheListItemSchema as _t, loadPersistedRunSnapshot as a, mergeEvalOutput as an, scoreTraceSchema as at, buildDeclaredColumnDefs as b, cacheRecordingOpSchema as bt, persistCaseDetail as c, runInEvalScope as cn, evalChartBuiltinMetricSchema as ct, recomputePersistedCaseStatus as d, setScopeCacheContext as dn, evalChartMetricSchema as dt, evalAssert as en, evalFreshnessStatusSchema as et, runTouchesEval as f, startEvalBackgroundJob as fn, evalChartTooltipExtraSchema as ft, setLatestRunInfoMap as g, cacheFileSchema as gt, getTargetEvalIds as h, getEvalRegistry as hn, cacheEntrySchema as ht, getLatestRunInfos as i, isInEvalScope as in, evalSummarySchema as it, getEvalDisplayStatus as j, traceDisplayInputConfigSchema as jt, getNestedAttribute as k, traceAttributeDisplaySchema as kt, persistRunState as l, runInExistingEvalScope as ln, evalChartColorSchema as lt, buildEvalSummary as m, defineEval as mn, evalChartsConfigSchema as mt, generateRunId as n, getEvalCaseInput as nn, evalStatItemSchema as nt, loadPersistedRunSnapshots as o, nextEvalId as on, evalChartAggregateSchema as ot, resolveArtifactPath as p, repoFile as pn, evalChartTypeSchema as pt, llmCallsConfigSchema as q, captureEvalSpanError as qt, getLastRunStatuses as r, incrementEvalOutput as rn, evalStatsConfigSchema as rt, nextShortIdFromSnapshots as s, runInEvalRuntimeScope as sn, evalChartAxisSchema as st, executeRun as t, getCurrentScope as tn, evalStatAggregateSchema as tt, recomputeEvalStatusesInRuns as u, setEvalOutput as un, evalChartConfigSchema as ut, parseEvalMetas as v, cacheModeSchema as vt, updateManualScoreRequestSchema as w, spanCacheOptionsSchema as wt, normalizeScoreDef as x, cacheRecordingSchema as xt, loadConfig as y, cacheOperationTypeSchema as yt, agentEvalsConfigSchema as z, columnKindSchema as zt };
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-CmbmfBG2.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-vunKoSBu.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-lOZdhO2D.mjs";
2
- import "./src-Btb9RCYD.mjs";
1
+ import { n as createRunner } from "./cli-B-sCTyz8.mjs";
2
+ import "./src-jaOlXwb5.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance() {
@@ -0,0 +1,3 @@
1
+ import "./runOrchestration-B3fYtpKo.mjs";
2
+ import "./cli-B-sCTyz8.mjs";
3
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.13.0",
3
+ "version": "0.15.0",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -13,7 +13,8 @@
13
13
  }
14
14
  },
15
15
  "files": [
16
- "dist"
16
+ "dist",
17
+ "skills"
17
18
  ],
18
19
  "tsdown": {
19
20
  "clean": true,