npm - @ls-stack/agent-eval - Versions diffs - 0.58.1 → 0.58.2 - Mend

@ls-stack/agent-eval 0.58.1 → 0.58.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/dist/{app-DhMIbjlE.mjs → app-BxD6aHbp.mjs} +52 -7
package/dist/apps/web/dist/assets/index-BMWBZw_u.js +377 -0
package/dist/apps/web/dist/assets/index-CHH7m5Cv.css +1 -0
package/dist/apps/web/dist/index.html +2 -2
package/dist/bin.mjs +1 -1
package/dist/caseChild.mjs +2 -1
package/dist/{cli-_g2qOMK6.mjs → cli-HBwXIJsg.mjs} +31 -5
package/dist/index.d.mts +76 -17
package/dist/index.mjs +4 -4
package/dist/runChild.mjs +2 -2
package/dist/{runExecution-d42Lm0i5.mjs → runExecution-pHJ0_TzH.mjs} +125 -21
package/dist/{runOrchestration-CvmFeOmT.mjs → runOrchestration-ngVXShH4.mjs} +73 -6
package/dist/{runner-BKogjiYd.mjs → runner-BnZMGBla.mjs} +1 -1
package/dist/{runner-MSr8sAWm.mjs → runner-D_pz2NON.mjs} +2 -2
package/dist/{src-CdZsOn6y.mjs → src-AeXGBJ26.mjs} +2 -2
package/package.json +3 -3
package/skills/agent-eval/SKILL.md +18 -3
package/dist/apps/web/dist/assets/index-Cz9p4l-t.js +0 -377
package/dist/apps/web/dist/assets/index-DtARRwsS.css +0 -1

package/dist/index.d.mts CHANGED Viewed

@@ -268,6 +268,7 @@ declare const runLogEntrySchema$1: z$1.ZodObject<{
   phase: z$1.ZodEnum<{
     eval: "eval";
     derive: "derive";
+    tracingAssertions: "tracingAssertions";
     outputsSchema: "outputsSchema";
     scorer: "scorer";
   }>;
@@ -399,10 +400,17 @@ type EvalCase$1$1<TInput = unknown> = {
 };
 /** Query helpers built from the flattened trace recorded for one eval case. */
 type EvalTraceTree = {
-  spans: EvalTraceSpan$2[];
-  rootSpans: EvalTraceSpan$2[];
-  findSpan: (name: string) => EvalTraceSpan$2 | undefined;
-  findSpansByKind: (kind: string) => EvalTraceSpan$2[];
+  /** Flat span list in creation order. */spans: EvalTraceSpan$2[]; /** Top-level spans whose `parentId` is `null`. */
+  rootSpans: EvalTraceSpan$2[]; /** Return the first span whose name exactly matches `name`. */
+  findSpan: (name: string) => EvalTraceSpan$2 | undefined; /** Return every span whose name exactly matches `name`. */
+  findSpans: (name: string) => EvalTraceSpan$2[]; /** Return whether any span name exactly matches `name`. */
+  hasSpan: (name: string) => boolean; /** Return every span whose kind exactly matches `kind`. */
+  findSpansByKind: (kind: string) => EvalTraceSpan$2[]; /** Return every span with `kind: 'tool'`. */
+  findToolCallSpans: () => EvalTraceSpan$2[]; /** Return the names of every span with `kind: 'tool'`. */
+  listToolCallSpanNames: () => string[]; /** Return whether a `kind: 'tool'` span has a name exactly matching `name`. */
+  hasToolCallSpan: (name: string) => boolean; /** Return span names in creation order, optionally filtered by kind. */
+  listSpanNames: (kind?: string) => string[]; /** Return span names in depth-first tree order, optionally filtered by kind. */
+  listSpanNamesDfs: (kind?: string) => string[]; /** Return all spans in depth-first tree order. */
   flattenDfs: () => EvalTraceSpan$2[];
   checkpoints: Map<string, unknown>;
 };
@@ -421,6 +429,12 @@ type EvalDeriveMap<TInput = unknown> = Record<string, EvalDeriveValueFn<TInput>>
 type EvalDeriveFn<TInput = unknown> = (ctx: EvalDeriveContext<TInput>) => Record<string, unknown> | Promise<Record<string, unknown>>;
 /** Trace-derived output config accepted globally and on eval definitions. */
 type EvalDeriveConfig<TInput = unknown> = EvalDeriveMap<TInput> | EvalDeriveFn<TInput>;
+/** Function that records trace-derived assertions for one case. */
+type EvalTracingAssertionsFn<TInput = unknown> = (ctx: EvalDeriveContext<TInput>) => MaybePromise$1<void>;
+/** Keyed trace-derived assertion config for grouping related checks. */
+type EvalTracingAssertionsMap<TInput = unknown> = Record<string, EvalTracingAssertionsFn<TInput>>;
+/** Trace-derived assertion config accepted globally and on eval definitions. */
+type EvalTracingAssertionsConfig<TInput = unknown> = EvalTracingAssertionsMap<TInput> | EvalTracingAssertionsFn<TInput>;
 /** UI overrides for a derived or scored column emitted by an eval. */
 type EvalColumnOverride = {
   /** Display label shown for the column in tables and detail views. */label?: string;
@@ -1142,9 +1156,18 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
    *
    * Prefer the keyed map form when each key has one derivation. The
    * object-returning callback form is also supported. Derived values only fill
-   * keys not already recorded during execution.
+   * keys not already recorded during execution. Assertion helpers are not
+   * allowed here; use `tracingAssertions` for trace-derived pass/fail checks.
    */
   deriveFromTracing?: EvalDeriveConfig<TInput>;
+  /**
+   * Record assertions from the finished execution trace.
+   *
+   * Runs after `deriveFromTracing` and before output schema validation and
+   * scores. Use `evalAssert(...)` or `evalExpect(...)` inside the callback to
+   * write normal assertion results without creating score columns.
+   */
+  tracingAssertions?: EvalTracingAssertionsConfig<TInput>;
   /**
    * Computed score columns for each case.
    *
@@ -1503,12 +1526,16 @@ type EvalCaseScope = {
  * covers run-time module/environment loading, including top-level code in
  * modules imported while a run is being prepared.
  */
-type EvalRuntimeScope = 'env' | 'cases' | 'eval' | 'derive' | 'outputsSchema' | 'scorer';
+type EvalRuntimeScope = 'env' | 'cases' | 'eval' | 'derive' | 'tracingAssertions' | 'outputsSchema' | 'scorer';
 type EvalLogLevelInput = RunLogLevel$1 | 'warning';
 /** Error thrown when an eval assertion fails during case execution. */
 declare class EvalAssertionError extends Error {
   constructor(message: string);
 }
+/** Error thrown when an SDK helper is used in an unsupported runner phase. */
+declare class EvalRuntimeUsageError extends Error {
+  constructor(message: string);
+}
 /** Return the host process clock, bypassing the eval Date shim. */
 /**
  * Eval time helpers for reading and moving the active eval clock.
@@ -1539,8 +1566,10 @@ declare function getCurrentScope(): EvalCaseScope | undefined;
  *
  * Returns `null` outside eval-owned work, `env` while the runner is loading
  * eval modules for a run, `cases` while generating cases, `eval` while running
- * case `execute`, `derive` while deriving outputs from traces, `outputsSchema`
- * while validating outputs, and `scorer` while computing scores.
+ * case `execute`, `derive` while deriving outputs from traces,
+ * `tracingAssertions` while checking trace-derived assertions,
+ * `outputsSchema` while validating outputs, and `scorer` while computing
+ * scores.
  */
 declare function isInEvalScope(): EvalRuntimeScope | null;
 /**
@@ -1656,7 +1685,8 @@ declare function incrementEvalOutput(key: string, delta: number): void;
  * Calls made outside `runInEvalScope(...)` are ignored so shared workflow code
  * can safely reuse `evalAssert(...)` when it also runs outside an eval. The
  * TypeScript assertion signature still narrows the checked value after the
- * call.
+ * call. Calls inside `deriveFromTracing` throw because derivations must only
+ * write outputs; use `tracingAssertions` for trace-derived pass/fail checks.
  */
 declare function evalAssert(condition: unknown, message: string): asserts condition; //#endregion
 //#region src/valueCache.d.ts
@@ -2753,6 +2783,7 @@ type RunLogLevel = z$1.infer<typeof runLogLevelSchema>;
 declare const runLogPhaseSchema: z$1.ZodEnum<{
   eval: "eval";
   derive: "derive";
+  tracingAssertions: "tracingAssertions";
   outputsSchema: "outputsSchema";
   scorer: "scorer";
 }>;
@@ -2779,6 +2810,7 @@ declare const runLogEntrySchema: z$1.ZodObject<{
   phase: z$1.ZodEnum<{
     eval: "eval";
     derive: "derive";
+    tracingAssertions: "tracingAssertions";
     outputsSchema: "outputsSchema";
     scorer: "scorer";
   }>;
@@ -3137,6 +3169,7 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
     phase: z$1.ZodEnum<{
       eval: "eval";
       derive: "derive";
+      tracingAssertions: "tracingAssertions";
       outputsSchema: "outputsSchema";
       scorer: "scorer";
     }>;
@@ -3610,7 +3643,7 @@ type ScopedCaseSummary = {
 //#endregion
 //#region src/evalStatus.d.ts
 /** Display status used for eval, file, and folder UI surfaces. */
-type EvalDisplayStatus = DerivedStatus | 'stale' | 'outdated' | 'unscored';
+type EvalDisplayStatus = DerivedStatus | 'enqueued' | 'stale' | 'outdated' | 'unscored';
 /**
  * Derive the user-facing eval status from the raw latest run result plus
  * freshness state.
@@ -3658,10 +3691,17 @@ type EvalCase$1<TInput = unknown> = {
 };
 /** Query helpers built from the flattened trace recorded for one eval case. */
 type EvalTraceTree$1 = {
-  spans: EvalTraceSpan$1[];
-  rootSpans: EvalTraceSpan$1[];
-  findSpan: (name: string) => EvalTraceSpan$1 | undefined;
-  findSpansByKind: (kind: string) => EvalTraceSpan$1[];
+  /** Flat span list in creation order. */spans: EvalTraceSpan$1[]; /** Top-level spans whose `parentId` is `null`. */
+  rootSpans: EvalTraceSpan$1[]; /** Return the first span whose name exactly matches `name`. */
+  findSpan: (name: string) => EvalTraceSpan$1 | undefined; /** Return every span whose name exactly matches `name`. */
+  findSpans: (name: string) => EvalTraceSpan$1[]; /** Return whether any span name exactly matches `name`. */
+  hasSpan: (name: string) => boolean; /** Return every span whose kind exactly matches `kind`. */
+  findSpansByKind: (kind: string) => EvalTraceSpan$1[]; /** Return every span with `kind: 'tool'`. */
+  findToolCallSpans: () => EvalTraceSpan$1[]; /** Return the names of every span with `kind: 'tool'`. */
+  listToolCallSpanNames: () => string[]; /** Return whether a `kind: 'tool'` span has a name exactly matching `name`. */
+  hasToolCallSpan: (name: string) => boolean; /** Return span names in creation order, optionally filtered by kind. */
+  listSpanNames: (kind?: string) => string[]; /** Return span names in depth-first tree order, optionally filtered by kind. */
+  listSpanNamesDfs: (kind?: string) => string[]; /** Return all spans in depth-first tree order. */
   flattenDfs: () => EvalTraceSpan$1[];
   checkpoints: Map<string, unknown>;
 };
@@ -3681,6 +3721,13 @@ type EvalDeriveFn$1<TInput = unknown> = (ctx: EvalDeriveContext$1<TInput>) => Re
 /** Trace-derived output config accepted globally and on eval definitions. */
 type EvalDeriveConfig$1<TInput = unknown> = EvalDeriveMap$1<TInput> | EvalDeriveFn$1<TInput>;
 /** Schema for keyed or object-returning trace-derived output config. */
+/** Function that records trace-derived assertions for one case. */
+type EvalTracingAssertionsFn$1<TInput = unknown> = (ctx: EvalDeriveContext$1<TInput>) => MaybePromise<void>;
+/** Keyed trace-derived assertion config for grouping related checks. */
+type EvalTracingAssertionsMap$1<TInput = unknown> = Record<string, EvalTracingAssertionsFn$1<TInput>>;
+/** Trace-derived assertion config accepted globally and on eval definitions. */
+type EvalTracingAssertionsConfig$1<TInput = unknown> = EvalTracingAssertionsMap$1<TInput> | EvalTracingAssertionsFn$1<TInput>;
+/** Schema for function or keyed trace-derived assertion config. */
 /** UI overrides for a derived or scored column emitted by an eval. */
 type EvalColumnOverride$1 = {
   /** Display label shown for the column in tables and detail views. */label?: string;
@@ -4133,9 +4180,19 @@ type AgentEvalsConfig$1 = {
    * Prefer the keyed map form for shared metrics:
    * `{ toolCalls: ({ trace }) => trace.findSpansByKind('tool').length }`.
    * The object-returning function form is also supported. Derived outputs
-   * only fill keys that were not already recorded by eval execution.
+   * only fill keys that were not already recorded by eval execution. Do not
+   * call assertion helpers here; use `tracingAssertions` for trace-derived
+   * pass/fail checks.
    */
   deriveFromTracing?: EvalDeriveConfig$1;
+  /**
+   * Workspace-wide assertions derived from the finished execution trace.
+   *
+   * These run after `deriveFromTracing` and before output schema validation and
+   * scores. Use `evalAssert(...)` or `evalExpect(...)` inside the callback to
+   * record normal assertion results without creating fake score columns.
+   */
+  tracingAssertions?: EvalTracingAssertionsConfig$1;
   /**
    * Workspace-wide stats prepended to every eval's stats row.
    *
@@ -6366,6 +6423,7 @@ declare const caseDetailSchema: z$1.ZodObject<{
     phase: z$1.ZodEnum<{
       eval: "eval";
       derive: "derive";
+      tracingAssertions: "tracingAssertions";
       outputsSchema: "outputsSchema";
       scorer: "scorer";
     }>;
@@ -6992,7 +7050,8 @@ type EvalRunner = {
   getEvals(): EvalSummary$1[]; /** Look up one discovered eval by id. */
   getEval(id: string): EvalSummary$1 | undefined; /** Return discovery errors that should be shown before running evals. */
   getDiscoveryIssues(): DiscoveryIssue$1[]; /** Return current config-reload state for the long-running app server. */
-  getConfigReloadState(): ConfigReloadState$1; /** Re-scan configured eval files and emit a discovery update to listeners. */
+  getConfigReloadState(): ConfigReloadState$1; /** Return the effective per-run case concurrency after applying defaults. */
+  getConfiguredConcurrency(): number; /** Re-scan configured eval files and emit a discovery update to listeners. */
   refreshDiscovery(): Promise<void>;
   startRun(request: CreateRunRequest$1): Promise<{
     manifest: RunManifest$1;
@@ -7227,4 +7286,4 @@ declare function defineEval<TInput = unknown, TOutputs extends EvalOutputs = Eva
 /** Return whether the active eval case has tags matching the typed input. */
 declare function matchesEvalTags(input: EvalTagMatchInput): boolean;
 //#endregion
-export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheRepairSummary, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalTraceTree, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
+export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheRepairSummary, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, EvalRuntimeUsageError, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalTraceTree, type EvalTracingAssertionsConfig, type EvalTracingAssertionsFn, type EvalTracingAssertionsMap, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };

package/dist/index.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-d42Lm0i5.mjs";
-import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-_g2qOMK6.mjs";
-import { n as matchesEvalTags, t as defineEval } from "./src-CdZsOn6y.mjs";
-export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
+import { $ as setScopeCacheContext, A as repoFile, B as evalTime, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as EvalRuntimeUsageError, Ft as getEvalRegistry, H as getEvalCaseInput, I as appendToEvalOutput, J as runInEvalRuntimeScope, K as mergeEvalOutput, M as readManualInputFile, N as evalExpect, O as serializeCacheRecording, P as EvalAssertionError, Q as setEvalOutput, R as evalAssert, S as evalSpan, T as hashCacheKeySync, U as incrementEvalOutput, V as getCurrentScope, W as isInEvalScope, X as runInExistingEvalScope, Y as runInEvalScope, at as extractApiCalls, b as buildTraceTree, ct as simulateTokenAllocation, et as startEvalBackgroundJob, it as extractCacheHits, j as manualInputFileValueSchema, k as serializeCacheValue, ot as extractLlmCalls, q as nextEvalId, rt as extractCacheEntries, st as simulateLlmCallCost, ut as getNestedAttribute, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalLog } from "./runExecution-pHJ0_TzH.mjs";
+import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-HBwXIJsg.mjs";
+import { n as matchesEvalTags, t as defineEval } from "./src-AeXGBJ26.mjs";
+export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };

package/dist/runChild.mjs CHANGED Viewed

@@ -1,5 +1,5 @@
-import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-d42Lm0i5.mjs";
-import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-CvmFeOmT.mjs";
+import { At as evalStatsConfigSchema, L as configureEvalRunLogs, Mt as evalChartsConfigSchema, Nt as columnDefSchema, Tt as buildEvalKey, bt as runManifestSchema, jt as manualInputDescriptorSchema, kt as evalStatAggregateSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, tt as createRunRequestSchema, v as createFsCacheStore, xt as runSummarySchema } from "./runExecution-pHJ0_TzH.mjs";
+import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-ngVXShH4.mjs";
 import { z } from "zod/v4";
 import { readFile } from "node:fs/promises";
 import { relative } from "node:path";

package/dist/{runExecution-d42Lm0i5.mjs → runExecution-pHJ0_TzH.mjs} RENAMED Viewed

@@ -851,6 +851,7 @@ const runLogLevelSchema = z.enum([
 const runLogPhaseSchema = z.enum([
 	"eval",
 	"derive",
+	"tracingAssertions",
 	"outputsSchema",
 	"scorer"
 ]);
@@ -1008,6 +1009,9 @@ const removeDefaultConfigSchema = z.union([z.literal(true), z.array(defaultConfi
 const evalDeriveValueFnSchema = z.custom((value) => typeof value === "function", { message: "Expected a derive output function" });
 /** Schema for keyed or object-returning trace-derived output config. */
 const evalDeriveConfigSchema = z.union([z.custom((value) => typeof value === "function", { message: "Expected a deriveFromTracing function" }), z.record(z.string().min(1), evalDeriveValueFnSchema)]);
+const evalTracingAssertionsFnSchema = z.custom((value) => typeof value === "function", { message: "Expected a tracing assertions function" });
+/** Schema for function or keyed trace-derived assertion config. */
+const evalTracingAssertionsConfigSchema = z.union([z.custom((value) => typeof value === "function", { message: "Expected a tracingAssertions function" }), z.record(z.string().min(1), evalTracingAssertionsFnSchema)]);
 /** Schema for UI overrides on derived or scored columns. */
 const evalColumnOverrideSchema = z.object({
 	label: z.string().optional(),
@@ -1411,6 +1415,7 @@ const agentEvalsConfigSchema = z.object({
 	traceDisplay: traceDisplayInputConfigSchema.optional(),
 	columns: evalColumnsSchema.optional(),
 	deriveFromTracing: evalDeriveConfigSchema.optional(),
+	tracingAssertions: evalTracingAssertionsConfigSchema.optional(),
 	stats: evalStatsConfigSchema.optional(),
 	defaultStatAggregate: evalStatAggregateSchema.optional(),
 	llmCalls: llmCallsConfigSchema.optional(),
@@ -1847,8 +1852,9 @@ function deriveScopedSummaryFromCases(params) {
 * freshness state.
 */
 function getEvalDisplayStatus(params) {
-	const { stale, outdated, lastRunStatus, isRunning = false } = params;
+	const { stale, outdated, lastRunStatus, isRunning = false, isEnqueued = false } = params;
 	if (isRunning || lastRunStatus === "running") return "running";
+	if (isEnqueued) return "enqueued";
 	if (lastRunStatus === "pass") {
 		if (stale) return "stale";
 		if (outdated) return "outdated";
@@ -2718,6 +2724,17 @@ var EvalAssertionError = class extends Error {
 		this.name = "EvalAssertionError";
 	}
 };
+/** Error thrown when an SDK helper is used in an unsupported runner phase. */
+var EvalRuntimeUsageError = class extends Error {
+	constructor(message) {
+		super(message);
+		this.name = "EvalRuntimeUsageError";
+	}
+};
+/** Throw when assertion helpers are used in a runner phase that forbids them. */
+function assertEvalAssertionsAllowed(apiName) {
+	if (getCurrentScope() && runtimeScopeStorage.getStore() === "derive") throw new EvalRuntimeUsageError(`${apiName} cannot be used inside deriveFromTracing. Use tracingAssertions for trace-derived assertions.`);
+}
 function getEvalClockStateNowMs(state) {
 	const elapsedMs = state.frozen ? 0 : realDate.now() - state.realStartMs;
 	return state.startMs + elapsedMs + state.offsetMs;
@@ -2823,8 +2840,10 @@ function recordSpanForActiveCacheRecording(scope, spanId) {
 *
 * Returns `null` outside eval-owned work, `env` while the runner is loading
 * eval modules for a run, `cases` while generating cases, `eval` while running
-* case `execute`, `derive` while deriving outputs from traces, `outputsSchema`
-* while validating outputs, and `scorer` while computing scores.
+* case `execute`, `derive` while deriving outputs from traces,
+* `tracingAssertions` while checking trace-derived assertions,
+* `outputsSchema` while validating outputs, and `scorer` while computing
+* scores.
 */
 function isInEvalScope() {
 	if (activeEvalRuntimeScopeCount === 0) return null;
@@ -2845,7 +2864,7 @@ function normalizeLogLevel(level) {
 }
 function getCurrentLogPhase() {
 	const runtimeScope = runtimeScopeStorage.getStore();
-	if (runtimeScope === "eval" || runtimeScope === "derive" || runtimeScope === "outputsSchema" || runtimeScope === "scorer") return runtimeScope;
+	if (runtimeScope === "eval" || runtimeScope === "derive" || runtimeScope === "tracingAssertions" || runtimeScope === "outputsSchema" || runtimeScope === "scorer") return runtimeScope;
 	return null;
 }
 function formatLogArgs(args) {
@@ -3300,10 +3319,12 @@ function incrementEvalOutput(key, delta) {
 * Calls made outside `runInEvalScope(...)` are ignored so shared workflow code
 * can safely reuse `evalAssert(...)` when it also runs outside an eval. The
 * TypeScript assertion signature still narrows the checked value after the
-* call.
+* call. Calls inside `deriveFromTracing` throw because derivations must only
+* write outputs; use `tracingAssertions` for trace-derived pass/fail checks.
 */
 function evalAssert(condition, message) {
 	const scope = getCurrentScope();
+	assertEvalAssertionsAllowed("evalAssert(...)");
 	if (condition) {
 		if (scope) scope.assertions.push({
 			message,
@@ -3454,6 +3475,7 @@ var EvalExpectationImpl = class EvalExpectationImpl {
 * case scope is active, matching `evalAssert(...)`.
 */
 function evalExpect(value) {
+	assertEvalAssertionsAllowed("evalExpect(...)");
 	return new EvalExpectationImpl(value, false);
 }
 //#endregion
@@ -5005,25 +5027,53 @@ const evalTracer = {
 };
 /** Build a queryable trace tree helper from a flat span list and checkpoints. */
 function buildTraceTree(spans, checkpoints) {
+	const rootSpans = spans.filter((s) => s.parentId === null);
+	const flattenDfs = () => {
+		const result = [];
+		function visit(parentId) {
+			for (const childSpan of spans) if (childSpan.parentId === parentId) {
+				result.push(childSpan);
+				visit(childSpan.id);
+			}
+		}
+		visit(null);
+		return result;
+	};
+	const filterSpanNames = (sourceSpans, kind) => {
+		return sourceSpans.filter((span) => kind === void 0 || span.kind === kind).map((span) => span.name);
+	};
 	return {
 		spans,
-		rootSpans: spans.filter((s) => s.parentId === null),
+		rootSpans,
 		findSpan(name) {
 			return spans.find((s) => s.name === name);
 		},
+		findSpans(name) {
+			return spans.filter((s) => s.name === name);
+		},
+		hasSpan(name) {
+			return spans.some((s) => s.name === name);
+		},
 		findSpansByKind(kind) {
 			return spans.filter((s) => s.kind === kind);
 		},
+		findToolCallSpans() {
+			return spans.filter((s) => s.kind === "tool");
+		},
+		listToolCallSpanNames() {
+			return filterSpanNames(spans, "tool");
+		},
+		hasToolCallSpan(name) {
+			return spans.some((s) => s.kind === "tool" && s.name === name);
+		},
+		listSpanNames(kind) {
+			return filterSpanNames(spans, kind);
+		},
+		listSpanNamesDfs(kind) {
+			return filterSpanNames(flattenDfs(), kind);
+		},
 		flattenDfs() {
-			const result = [];
-			function visit(parentId) {
-				for (const childSpan of spans) if (childSpan.parentId === parentId) {
-					result.push(childSpan);
-					visit(childSpan.id);
-				}
-			}
-			visit(null);
-			return result;
+			return flattenDfs();
 		},
 		checkpoints
 	};
@@ -6665,7 +6715,7 @@ async function resolveDeriveFromTracingConfig(params) {
 	return derived;
 }
 async function runDeriveFromTracingConfig(params) {
-	if (params.deriveFromTracing === void 0) return;
+	if (params.deriveFromTracing === void 0) return null;
 	const { deriveFromTracing } = params;
 	try {
 		const derived = await runInExistingEvalScope(params.scope, "derive", async () => await resolveDeriveFromTracingConfig({
@@ -6677,13 +6727,53 @@ async function runDeriveFromTracingConfig(params) {
 			outputs: params.scope.outputs,
 			derived
 		});
+		return null;
 	} catch (e) {
+		if (e instanceof EvalRuntimeUsageError) return e;
 		const message = `deriveFromTracing threw: ${e instanceof Error ? e.message : String(e)}`;
 		recordAssertionFailure(params.scope, toAssertionFailure(message, e instanceof Error ? e : void 0));
+		return null;
 	}
 }
+async function runOneTracingAssertion(params) {
+	const { label, tracingAssertion, scope, traceTree, evalCase } = params;
+	const failureCountBefore = scope.assertionFailures.length;
+	const ctx = {
+		trace: traceTree,
+		input: evalCase.input,
+		case: evalCase
+	};
+	try {
+		await runInExistingEvalScope(scope, "tracingAssertions", async () => {
+			await callUnknownFunction(tracingAssertion, [ctx]);
+		});
+	} catch (e) {
+		if (e instanceof EvalAssertionError && scope.assertionFailures.length > failureCountBefore) return;
+		recordAssertionFailure(scope, toAssertionFailure(`${label} threw: ${e instanceof Error ? e.message : String(e)}`, e instanceof Error ? e : void 0));
+	}
+}
+async function runTracingAssertionsConfig(params) {
+	if (params.tracingAssertions === void 0) return;
+	if (typeof params.tracingAssertions === "function") {
+		await runOneTracingAssertion({
+			label: "tracingAssertions",
+			tracingAssertion: params.tracingAssertions,
+			scope: params.scope,
+			traceTree: params.traceTree,
+			evalCase: params.evalCase
+		});
+		return;
+	}
+	for (const [key, tracingAssertion] of Object.entries(params.tracingAssertions)) await runOneTracingAssertion({
+		label: `tracingAssertions "${key}"`,
+		tracingAssertion,
+		scope: params.scope,
+		traceTree: params.traceTree,
+		evalCase: params.evalCase
+	});
+}
 async function runCase(params) {
-	const { evalDef, evalId, evalKey = evalId, evalCase, globalTraceDisplay, globalColumns, globalDeriveFromTracing, llmCallsConfig = resolveLlmCallsConfig(void 0), apiCallsConfig = resolveApiCallsConfig(void 0), globalRemoveDefaultConfig, trial, startTime, cacheAdapter, cacheMode, moduleIsolation, evalFilePath, evalFileRelativePath = evalFilePath, workspaceRoot, artifactDir, runId } = params;
+	const { evalDef, evalId, evalKey = evalId, evalCase, globalTraceDisplay, globalColumns, globalDeriveFromTracing, globalTracingAssertions, llmCallsConfig = resolveLlmCallsConfig(void 0), apiCallsConfig = resolveApiCallsConfig(void 0), globalRemoveDefaultConfig, trial, startTime, cacheAdapter, cacheMode, moduleIsolation, evalFilePath, evalFileRelativePath = evalFilePath, workspaceRoot, artifactDir, runId } = params;
 	const scopedIdPrefix = buildScopedEvalIdPrefix({
 		evalId,
 		evalFilePath,
@@ -6728,22 +6818,36 @@ async function runCase(params) {
 		apiCallsConfig
 	});
 	const traceTree = buildTraceTree(spansWithDerivedAttributes, scope.checkpoints);
-	const nonAssertError = executeError && !(executeError instanceof EvalAssertionError) ? executeError : null;
+	let nonAssertError = executeError && !(executeError instanceof EvalAssertionError) ? executeError : null;
 	if (executeError instanceof EvalAssertionError && scope.assertionFailures.length === 0) recordAssertionFailure(scope, toAssertionFailure(executeError.message, executeError));
 	if (!nonAssertError) {
-		await runDeriveFromTracingConfig({
+		nonAssertError = await runDeriveFromTracingConfig({
 			deriveFromTracing: globalDeriveFromTracing,
 			scope,
 			traceTree,
 			evalCase
 		});
-		await runDeriveFromTracingConfig({
+		if (!nonAssertError) nonAssertError = await runDeriveFromTracingConfig({
 			deriveFromTracing: evalDef.deriveFromTracing,
 			scope,
 			traceTree,
 			evalCase
 		});
 	}
+	if (!nonAssertError) {
+		await runTracingAssertionsConfig({
+			tracingAssertions: globalTracingAssertions,
+			scope,
+			traceTree,
+			evalCase
+		});
+		await runTracingAssertionsConfig({
+			tracingAssertions: evalDef.tracingAssertions,
+			scope,
+			traceTree,
+			evalCase
+		});
+	}
 	if (!nonAssertError) addDefaultOutputs({
 		outputs: scope.outputs,
 		spans: spansWithDerivedAttributes,
@@ -6933,4 +7037,4 @@ function recordAssertionFailure(scope, failure) {
 	});
 }
 //#endregion
-export { startEvalBackgroundJob as $, repoFile as A, evalChartsConfigSchema as At, getCurrentScope as B, evalTracer as C, buildEvalKey as Ct, deserializeCacheValue as D, evalStatAggregateSchema as Dt, deserializeCacheRecording as E, caseRowSchema as Et, appendToEvalOutput as F, mergeEvalOutput as G, incrementEvalOutput as H, configureEvalRunLogs as I, runInEvalScope as J, nextEvalId as K, evalAssert as L, readManualInputFile as M, defineEval as Mt, evalExpect as N, getEvalRegistry as Nt, serializeCacheRecording as O, evalStatsConfigSchema as Ot, EvalAssertionError as P, runWithEvalRegistry as Pt, setScopeCacheContext as Q, evalLog as R, evalSpan as S, resolveLlmCallsConfig as St, hashCacheKeySync as T, caseDetailSchema as Tt, isInEvalScope as U, getEvalCaseInput as V, matchesEvalTags as W, runWithEvalClock as X, runInExistingEvalScope as Y, setEvalOutput as Z, createBufferedCacheStore as _, validateEvalTagName as _t, isCaseChildParentMessage as a, extractLlmCalls as at, buildTraceTree as b, runSummarySchema as bt, resolveArtifactPath as c, applyDerivedCallAttributes as ct, loadEvalModule as d, getEvalDisplayStatus as dt, createRunRequestSchema as et, resolveEvalDefaultConfig as f, deriveScopedSummaryFromCases as ft, commitPendingCacheWrites as g, matchesTagsFilter as gt, normalizeScoreDef as h, dedupeEvalTags as ht, isCaseChildMessage as i, extractApiCalls as it, manualInputFileValueSchema as j, columnDefSchema as jt, serializeCacheValue as k, manualInputDescriptorSchema as kt, registerAgentEvalsPackageResolutionHooks as l, getNestedAttribute as lt, buildDeclaredColumnDefs as m, deriveStatusFromChildStatuses as mt, resolveRunnableEvalCases as n, extractCacheEntries as nt, stripTerminalControlCodes as o, simulateLlmCallCost as ot, loadConfig as p, deriveStatusFromCaseRows as pt, runInEvalRuntimeScope as q, runCase as r, extractCacheHits as rt, resolveTracePresentation as s, simulateTokenAllocation as st, filterEvalCases as t, updateManualScoreRequestSchema as tt, runWithModuleIsolation as u, getEvalTitle as ut, createFsCacheStore as v, validateTagsFilterExpression as vt, hashCacheKey as w, getCaseRowCaseKey as wt, captureEvalSpanError as x, resolveApiCallsConfig as xt, z$1 as y, runManifestSchema as yt, evalTime as z };
+export { setScopeCacheContext as $, repoFile as A, evalStatsConfigSchema as At, evalTime as B, evalTracer as C, resolveLlmCallsConfig as Ct, deserializeCacheValue as D, caseDetailSchema as Dt, deserializeCacheRecording as E, getCaseRowCaseKey as Et, EvalRuntimeUsageError as F, getEvalRegistry as Ft, matchesEvalTags as G, getEvalCaseInput as H, appendToEvalOutput as I, runWithEvalRegistry as It, runInEvalRuntimeScope as J, mergeEvalOutput as K, configureEvalRunLogs as L, readManualInputFile as M, evalChartsConfigSchema as Mt, evalExpect as N, columnDefSchema as Nt, serializeCacheRecording as O, caseRowSchema as Ot, EvalAssertionError as P, defineEval as Pt, setEvalOutput as Q, evalAssert as R, evalSpan as S, resolveApiCallsConfig as St, hashCacheKeySync as T, buildEvalKey as Tt, incrementEvalOutput as U, getCurrentScope as V, isInEvalScope as W, runInExistingEvalScope as X, runInEvalScope as Y, runWithEvalClock as Z, createBufferedCacheStore as _, matchesTagsFilter as _t, isCaseChildParentMessage as a, extractApiCalls as at, buildTraceTree as b, runManifestSchema as bt, resolveArtifactPath as c, simulateTokenAllocation as ct, loadEvalModule as d, getEvalTitle as dt, startEvalBackgroundJob as et, resolveEvalDefaultConfig as f, getEvalDisplayStatus as ft, commitPendingCacheWrites as g, dedupeEvalTags as gt, normalizeScoreDef as h, deriveStatusFromChildStatuses as ht, isCaseChildMessage as i, extractCacheHits as it, manualInputFileValueSchema as j, manualInputDescriptorSchema as jt, serializeCacheValue as k, evalStatAggregateSchema as kt, registerAgentEvalsPackageResolutionHooks as l, applyDerivedCallAttributes as lt, buildDeclaredColumnDefs as m, deriveStatusFromCaseRows as mt, resolveRunnableEvalCases as n, updateManualScoreRequestSchema as nt, stripTerminalControlCodes as o, extractLlmCalls as ot, loadConfig as p, deriveScopedSummaryFromCases as pt, nextEvalId as q, runCase as r, extractCacheEntries as rt, resolveTracePresentation as s, simulateLlmCallCost as st, filterEvalCases as t, createRunRequestSchema as tt, runWithModuleIsolation as u, getNestedAttribute as ut, createFsCacheStore as v, validateEvalTagName as vt, hashCacheKey as w, buildCaseKey as wt, captureEvalSpanError as x, runSummarySchema as xt, z$1 as y, validateTagsFilterExpression as yt, evalLog as z };