@ls-stack/agent-eval 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as evalChartAxisSchema, A as runManifestSchema, At as evalTracer, B as cacheRecordingSchema, Bt as mergeEvalOutput, C as updateManualScoreRequestSchema, Ct as numberDisplayOptionsSchema, D as deriveScopedSummaryFromCases, Dt as buildTraceTree, E as getEvalDisplayStatus, Et as z, F as cacheFileSchema, Ft as evalAssert, G as caseRowSchema, Gt as defineEval, H as spanCacheOptionsSchema, Ht as setEvalOutput, I as cacheListItemSchema, It as getCurrentScope, J as evalStatItemSchema, K as evalFreshnessStatusSchema, Kt as getEvalRegistry, L as cacheModeSchema, Lt as getEvalCaseInput, M as agentEvalsConfigSchema, Mt as hashCacheKeySync, N as trialSelectionModeSchema, Nt as EvalAssertionError, O as deriveStatusFromCaseRows, Ot as captureEvalSpanError, P as cacheEntrySchema, Pt as appendToEvalOutput, Q as evalChartAggregateSchema, R as cacheOperationTypeSchema, Rt as incrementEvalOutput, S as createRunRequestSchema, St as jsonCellSchema, T as getEvalTitle, Tt as runArtifactRefSchema, U as assertionFailureSchema, Ut as setScopeCacheContext, V as serializedCacheSpanSchema, Vt as runInEvalScope, W as caseDetailSchema, Wt as repoFile, X as evalSummarySchema, Y as evalStatsConfigSchema, Z as scoreTraceSchema, _t as cellValueSchema, at as evalChartTypeSchema, bt as columnKindSchema, ct as traceAttributeDisplayInputSchema, dt as traceDisplayConfigSchema, et as evalChartBuiltinMetricSchema, ft as traceDisplayInputConfigSchema, gt as traceSpanWarningSchema, ht as traceSpanSchema, it as evalChartTooltipExtraSchema, j as runSummarySchema, jt as hashCacheKey, k as deriveStatusFromChildStatuses, kt as evalSpan, lt as traceAttributeDisplayPlacementSchema, mt as traceSpanKindSchema, nt as evalChartConfigSchema, ot as evalChartsConfigSchema, pt as traceSpanErrorSchema, q as evalStatAggregateSchema, rt as evalChartMetricSchema, st as traceAttributeDisplayFormatSchema, tt as evalChartColorSchema, ut as traceAttributeDisplaySchema, vt as columnDefSchema, w as sseEnvelopeSchema, wt as repoFileRefSchema, xt as fileRefSchema, yt as columnFormatSchema, z as cacheRecordingOpSchema, zt as isInEvalScope } from "./runOrchestration-HaMahl6b.mjs";
2
- import { n as createRunner, t as runCli } from "./cli-DQK5W0je.mjs";
3
- import "./src-r3FQAaw6.mjs";
4
- export { EvalAssertionError, agentEvalsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, mergeEvalOutput, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
1
+ import { $ as evalChartAxisSchema, $t as runInEvalScope, A as deriveScopedSummaryFromCases, At as columnFormatSchema, B as llmCallsConfigSchema, Bt as evalSpan, C as updateManualScoreRequestSchema, Ct as traceDisplayInputConfigSchema, D as getNestedAttribute, Dt as traceSpanWarningSchema, E as extractLlmCalls, Et as traceSpanSchema, F as DEFAULT_LLM_CALLS_CONFIG, Ft as repoFileRefSchema, G as caseRowSchema, Gt as appendToEvalOutput, H as trialSelectionModeSchema, Ht as hashCacheKey, I as agentEvalsConfigSchema, It as runArtifactRefSchema, J as evalStatItemSchema, Jt as getEvalCaseInput, K as evalFreshnessStatusSchema, Kt as evalAssert, L as llmCallMetricFormatSchema, Lt as z, M as deriveStatusFromChildStatuses, Mt as fileRefSchema, N as runManifestSchema, Nt as jsonCellSchema, O as getEvalTitle, Ot as cellValueSchema, P as runSummarySchema, Pt as numberDisplayOptionsSchema, Q as evalChartAggregateSchema, Qt as nextEvalId, R as llmCallMetricPlacementSchema, Rt as buildTraceTree, S as createRunRequestSchema, St as traceDisplayConfigSchema, T as extractCacheHits, Tt as traceSpanKindSchema, U as assertionFailureSchema, Ut as hashCacheKeySync, V as resolveLlmCallsConfig, Vt as evalTracer, W as caseDetailSchema, Wt as EvalAssertionError, X as evalSummarySchema, Xt as isInEvalScope, Y as evalStatsConfigSchema, Yt as incrementEvalOutput, Z as scoreTraceSchema, Zt as mergeEvalOutput, _t as traceCacheRefSchema, at as evalChartTypeSchema, bt as traceAttributeDisplayPlacementSchema, ct as cacheFileSchema, dt as cacheOperationTypeSchema, en as setEvalOutput, et as evalChartBuiltinMetricSchema, ft as cacheRecordingOpSchema, gt as spanCacheOptionsSchema, ht as serializedCacheSpanSchema, in as getEvalRegistry, it as evalChartTooltipExtraSchema, j as deriveStatusFromCaseRows, jt as columnKindSchema, k as getEvalDisplayStatus, kt as columnDefSchema, lt as cacheListItemSchema, mt as cacheStatusSchema, nn as repoFile, nt as evalChartConfigSchema, ot as evalChartsConfigSchema, pt as cacheRecordingSchema, q as evalStatAggregateSchema, qt as getCurrentScope, rn as defineEval, rt as evalChartMetricSchema, st as cacheEntrySchema, tn as setScopeCacheContext, tt as evalChartColorSchema, ut as cacheModeSchema, vt as traceAttributeDisplayFormatSchema, w as sseEnvelopeSchema, wt as traceSpanErrorSchema, xt as traceAttributeDisplaySchema, yt as traceAttributeDisplayInputSchema, z as llmCallMetricSchema, zt as captureEvalSpanError } from "./runOrchestration-DwqX9_T7.mjs";
2
+ import { n as createRunner, t as runCli } from "./cli-DLlRkyLH.mjs";
3
+ import "./src-Bx-CV6Wo.mjs";
4
+ export { DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/runChild.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { A as runManifestSchema, S as createRunRequestSchema, Y as evalStatsConfigSchema, j as runSummarySchema, ot as evalChartsConfigSchema, t as executeRun, v as loadConfig, vt as columnDefSchema, x as createFsCacheStore } from "./runOrchestration-HaMahl6b.mjs";
1
+ import { N as runManifestSchema, P as runSummarySchema, S as createRunRequestSchema, Y as evalStatsConfigSchema, kt as columnDefSchema, ot as evalChartsConfigSchema, t as executeRun, v as loadConfig, x as createFsCacheStore } from "./runOrchestration-DwqX9_T7.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { z } from "zod/v4";
@@ -51,7 +51,8 @@ async function main() {
51
51
  const cacheStore = createFsCacheStore({
52
52
  workspaceRoot: context.workspaceRoot,
53
53
  dir: config.cache?.dir,
54
- maxEntriesPerEval: config.cache?.maxEntriesPerEval
54
+ maxEntriesPerNamespace: config.cache?.maxEntriesPerNamespace ?? config.cache?.maxEntriesPerEval,
55
+ maxEntriesByNamespace: config.cache?.maxEntriesByNamespace
55
56
  });
56
57
  const evals = new Map(context.evals.map((evalMeta) => [evalMeta.id, evalMeta]));
57
58
  const lastRunStatusMap = /* @__PURE__ */ new Map();