@ls-stack/agent-eval 0.58.1 → 0.58.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -268,6 +268,7 @@ declare const runLogEntrySchema$1: z$1.ZodObject<{
268
268
  phase: z$1.ZodEnum<{
269
269
  eval: "eval";
270
270
  derive: "derive";
271
+ tracingAssertions: "tracingAssertions";
271
272
  outputsSchema: "outputsSchema";
272
273
  scorer: "scorer";
273
274
  }>;
@@ -399,10 +400,17 @@ type EvalCase$1$1<TInput = unknown> = {
399
400
  };
400
401
  /** Query helpers built from the flattened trace recorded for one eval case. */
401
402
  type EvalTraceTree = {
402
- spans: EvalTraceSpan$2[];
403
- rootSpans: EvalTraceSpan$2[];
404
- findSpan: (name: string) => EvalTraceSpan$2 | undefined;
405
- findSpansByKind: (kind: string) => EvalTraceSpan$2[];
403
+ /** Flat span list in creation order. */spans: EvalTraceSpan$2[]; /** Top-level spans whose `parentId` is `null`. */
404
+ rootSpans: EvalTraceSpan$2[]; /** Return the first span whose name exactly matches `name`. */
405
+ findSpan: (name: string) => EvalTraceSpan$2 | undefined; /** Return every span whose name exactly matches `name`. */
406
+ findSpans: (name: string) => EvalTraceSpan$2[]; /** Return whether any span name exactly matches `name`. */
407
+ hasSpan: (name: string) => boolean; /** Return every span whose kind exactly matches `kind`. */
408
+ findSpansByKind: (kind: string) => EvalTraceSpan$2[]; /** Return every span with `kind: 'tool'`. */
409
+ findToolCallSpans: () => EvalTraceSpan$2[]; /** Return the names of every span with `kind: 'tool'`. */
410
+ listToolCallSpanNames: () => string[]; /** Return whether a `kind: 'tool'` span has a name exactly matching `name`. */
411
+ hasToolCallSpan: (name: string) => boolean; /** Return span names in creation order, optionally filtered by kind. */
412
+ listSpanNames: (kind?: string) => string[]; /** Return span names in depth-first tree order, optionally filtered by kind. */
413
+ listSpanNamesDfs: (kind?: string) => string[]; /** Return all spans in depth-first tree order. */
406
414
  flattenDfs: () => EvalTraceSpan$2[];
407
415
  checkpoints: Map<string, unknown>;
408
416
  };
@@ -421,6 +429,12 @@ type EvalDeriveMap<TInput = unknown> = Record<string, EvalDeriveValueFn<TInput>>
421
429
  type EvalDeriveFn<TInput = unknown> = (ctx: EvalDeriveContext<TInput>) => Record<string, unknown> | Promise<Record<string, unknown>>;
422
430
  /** Trace-derived output config accepted globally and on eval definitions. */
423
431
  type EvalDeriveConfig<TInput = unknown> = EvalDeriveMap<TInput> | EvalDeriveFn<TInput>;
432
+ /** Function that records trace-derived assertions for one case. */
433
+ type EvalTracingAssertionsFn<TInput = unknown> = (ctx: EvalDeriveContext<TInput>) => MaybePromise$1<void>;
434
+ /** Keyed trace-derived assertion config for grouping related checks. */
435
+ type EvalTracingAssertionsMap<TInput = unknown> = Record<string, EvalTracingAssertionsFn<TInput>>;
436
+ /** Trace-derived assertion config accepted globally and on eval definitions. */
437
+ type EvalTracingAssertionsConfig<TInput = unknown> = EvalTracingAssertionsMap<TInput> | EvalTracingAssertionsFn<TInput>;
424
438
  /** UI overrides for a derived or scored column emitted by an eval. */
425
439
  type EvalColumnOverride = {
426
440
  /** Display label shown for the column in tables and detail views. */label?: string;
@@ -1142,9 +1156,18 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
1142
1156
  *
1143
1157
  * Prefer the keyed map form when each key has one derivation. The
1144
1158
  * object-returning callback form is also supported. Derived values only fill
1145
- * keys not already recorded during execution.
1159
+ * keys not already recorded during execution. Assertion helpers are not
1160
+ * allowed here; use `tracingAssertions` for trace-derived pass/fail checks.
1146
1161
  */
1147
1162
  deriveFromTracing?: EvalDeriveConfig<TInput>;
1163
+ /**
1164
+ * Record assertions from the finished execution trace.
1165
+ *
1166
+ * Runs after `deriveFromTracing` and before output schema validation and
1167
+ * scores. Use `evalAssert(...)` or `evalExpect(...)` inside the callback to
1168
+ * write normal assertion results without creating score columns.
1169
+ */
1170
+ tracingAssertions?: EvalTracingAssertionsConfig<TInput>;
1148
1171
  /**
1149
1172
  * Computed score columns for each case.
1150
1173
  *
@@ -1503,12 +1526,16 @@ type EvalCaseScope = {
1503
1526
  * covers run-time module/environment loading, including top-level code in
1504
1527
  * modules imported while a run is being prepared.
1505
1528
  */
1506
- type EvalRuntimeScope = 'env' | 'cases' | 'eval' | 'derive' | 'outputsSchema' | 'scorer';
1529
+ type EvalRuntimeScope = 'env' | 'cases' | 'eval' | 'derive' | 'tracingAssertions' | 'outputsSchema' | 'scorer';
1507
1530
  type EvalLogLevelInput = RunLogLevel$1 | 'warning';
1508
1531
  /** Error thrown when an eval assertion fails during case execution. */
1509
1532
  declare class EvalAssertionError extends Error {
1510
1533
  constructor(message: string);
1511
1534
  }
1535
+ /** Error thrown when an SDK helper is used in an unsupported runner phase. */
1536
+ declare class EvalRuntimeUsageError extends Error {
1537
+ constructor(message: string);
1538
+ }
1512
1539
  /** Return the host process clock, bypassing the eval Date shim. */
1513
1540
  /**
1514
1541
  * Eval time helpers for reading and moving the active eval clock.
@@ -1539,8 +1566,10 @@ declare function getCurrentScope(): EvalCaseScope | undefined;
1539
1566
  *
1540
1567
  * Returns `null` outside eval-owned work, `env` while the runner is loading
1541
1568
  * eval modules for a run, `cases` while generating cases, `eval` while running
1542
- * case `execute`, `derive` while deriving outputs from traces, `outputsSchema`
1543
- * while validating outputs, and `scorer` while computing scores.
1569
+ * case `execute`, `derive` while deriving outputs from traces,
1570
+ * `tracingAssertions` while checking trace-derived assertions,
1571
+ * `outputsSchema` while validating outputs, and `scorer` while computing
1572
+ * scores.
1544
1573
  */
1545
1574
  declare function isInEvalScope(): EvalRuntimeScope | null;
1546
1575
  /**
@@ -1656,7 +1685,8 @@ declare function incrementEvalOutput(key: string, delta: number): void;
1656
1685
  * Calls made outside `runInEvalScope(...)` are ignored so shared workflow code
1657
1686
  * can safely reuse `evalAssert(...)` when it also runs outside an eval. The
1658
1687
  * TypeScript assertion signature still narrows the checked value after the
1659
- * call.
1688
+ * call. Calls inside `deriveFromTracing` throw because derivations must only
1689
+ * write outputs; use `tracingAssertions` for trace-derived pass/fail checks.
1660
1690
  */
1661
1691
  declare function evalAssert(condition: unknown, message: string): asserts condition; //#endregion
1662
1692
  //#region src/valueCache.d.ts
@@ -2753,6 +2783,7 @@ type RunLogLevel = z$1.infer<typeof runLogLevelSchema>;
2753
2783
  declare const runLogPhaseSchema: z$1.ZodEnum<{
2754
2784
  eval: "eval";
2755
2785
  derive: "derive";
2786
+ tracingAssertions: "tracingAssertions";
2756
2787
  outputsSchema: "outputsSchema";
2757
2788
  scorer: "scorer";
2758
2789
  }>;
@@ -2779,6 +2810,7 @@ declare const runLogEntrySchema: z$1.ZodObject<{
2779
2810
  phase: z$1.ZodEnum<{
2780
2811
  eval: "eval";
2781
2812
  derive: "derive";
2813
+ tracingAssertions: "tracingAssertions";
2782
2814
  outputsSchema: "outputsSchema";
2783
2815
  scorer: "scorer";
2784
2816
  }>;
@@ -3137,6 +3169,7 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3137
3169
  phase: z$1.ZodEnum<{
3138
3170
  eval: "eval";
3139
3171
  derive: "derive";
3172
+ tracingAssertions: "tracingAssertions";
3140
3173
  outputsSchema: "outputsSchema";
3141
3174
  scorer: "scorer";
3142
3175
  }>;
@@ -3610,7 +3643,7 @@ type ScopedCaseSummary = {
3610
3643
  //#endregion
3611
3644
  //#region src/evalStatus.d.ts
3612
3645
  /** Display status used for eval, file, and folder UI surfaces. */
3613
- type EvalDisplayStatus = DerivedStatus | 'stale' | 'outdated' | 'unscored';
3646
+ type EvalDisplayStatus = DerivedStatus | 'enqueued' | 'stale' | 'outdated' | 'unscored';
3614
3647
  /**
3615
3648
  * Derive the user-facing eval status from the raw latest run result plus
3616
3649
  * freshness state.
@@ -3658,10 +3691,17 @@ type EvalCase$1<TInput = unknown> = {
3658
3691
  };
3659
3692
  /** Query helpers built from the flattened trace recorded for one eval case. */
3660
3693
  type EvalTraceTree$1 = {
3661
- spans: EvalTraceSpan$1[];
3662
- rootSpans: EvalTraceSpan$1[];
3663
- findSpan: (name: string) => EvalTraceSpan$1 | undefined;
3664
- findSpansByKind: (kind: string) => EvalTraceSpan$1[];
3694
+ /** Flat span list in creation order. */spans: EvalTraceSpan$1[]; /** Top-level spans whose `parentId` is `null`. */
3695
+ rootSpans: EvalTraceSpan$1[]; /** Return the first span whose name exactly matches `name`. */
3696
+ findSpan: (name: string) => EvalTraceSpan$1 | undefined; /** Return every span whose name exactly matches `name`. */
3697
+ findSpans: (name: string) => EvalTraceSpan$1[]; /** Return whether any span name exactly matches `name`. */
3698
+ hasSpan: (name: string) => boolean; /** Return every span whose kind exactly matches `kind`. */
3699
+ findSpansByKind: (kind: string) => EvalTraceSpan$1[]; /** Return every span with `kind: 'tool'`. */
3700
+ findToolCallSpans: () => EvalTraceSpan$1[]; /** Return the names of every span with `kind: 'tool'`. */
3701
+ listToolCallSpanNames: () => string[]; /** Return whether a `kind: 'tool'` span has a name exactly matching `name`. */
3702
+ hasToolCallSpan: (name: string) => boolean; /** Return span names in creation order, optionally filtered by kind. */
3703
+ listSpanNames: (kind?: string) => string[]; /** Return span names in depth-first tree order, optionally filtered by kind. */
3704
+ listSpanNamesDfs: (kind?: string) => string[]; /** Return all spans in depth-first tree order. */
3665
3705
  flattenDfs: () => EvalTraceSpan$1[];
3666
3706
  checkpoints: Map<string, unknown>;
3667
3707
  };
@@ -3681,6 +3721,13 @@ type EvalDeriveFn$1<TInput = unknown> = (ctx: EvalDeriveContext$1<TInput>) => Re
3681
3721
  /** Trace-derived output config accepted globally and on eval definitions. */
3682
3722
  type EvalDeriveConfig$1<TInput = unknown> = EvalDeriveMap$1<TInput> | EvalDeriveFn$1<TInput>;
3683
3723
  /** Schema for keyed or object-returning trace-derived output config. */
3724
+ /** Function that records trace-derived assertions for one case. */
3725
+ type EvalTracingAssertionsFn$1<TInput = unknown> = (ctx: EvalDeriveContext$1<TInput>) => MaybePromise<void>;
3726
+ /** Keyed trace-derived assertion config for grouping related checks. */
3727
+ type EvalTracingAssertionsMap$1<TInput = unknown> = Record<string, EvalTracingAssertionsFn$1<TInput>>;
3728
+ /** Trace-derived assertion config accepted globally and on eval definitions. */
3729
+ type EvalTracingAssertionsConfig$1<TInput = unknown> = EvalTracingAssertionsMap$1<TInput> | EvalTracingAssertionsFn$1<TInput>;
3730
+ /** Schema for function or keyed trace-derived assertion config. */
3684
3731
  /** UI overrides for a derived or scored column emitted by an eval. */
3685
3732
  type EvalColumnOverride$1 = {
3686
3733
  /** Display label shown for the column in tables and detail views. */label?: string;
@@ -4133,9 +4180,19 @@ type AgentEvalsConfig$1 = {
4133
4180
  * Prefer the keyed map form for shared metrics:
4134
4181
  * `{ toolCalls: ({ trace }) => trace.findSpansByKind('tool').length }`.
4135
4182
  * The object-returning function form is also supported. Derived outputs
4136
- * only fill keys that were not already recorded by eval execution.
4183
+ * only fill keys that were not already recorded by eval execution. Do not
4184
+ * call assertion helpers here; use `tracingAssertions` for trace-derived
4185
+ * pass/fail checks.
4137
4186
  */
4138
4187
  deriveFromTracing?: EvalDeriveConfig$1;
4188
+ /**
4189
+ * Workspace-wide assertions derived from the finished execution trace.
4190
+ *
4191
+ * These run after `deriveFromTracing` and before output schema validation and
4192
+ * scores. Use `evalAssert(...)` or `evalExpect(...)` inside the callback to
4193
+ * record normal assertion results without creating fake score columns.
4194
+ */
4195
+ tracingAssertions?: EvalTracingAssertionsConfig$1;
4139
4196
  /**
4140
4197
  * Workspace-wide stats prepended to every eval's stats row.
4141
4198
  *
@@ -6366,6 +6423,7 @@ declare const caseDetailSchema: z$1.ZodObject<{
6366
6423
  phase: z$1.ZodEnum<{
6367
6424
  eval: "eval";
6368
6425
  derive: "derive";
6426
+ tracingAssertions: "tracingAssertions";
6369
6427
  outputsSchema: "outputsSchema";
6370
6428
  scorer: "scorer";
6371
6429
  }>;
@@ -6992,7 +7050,8 @@ type EvalRunner = {
6992
7050
  getEvals(): EvalSummary$1[]; /** Look up one discovered eval by id. */
6993
7051
  getEval(id: string): EvalSummary$1 | undefined; /** Return discovery errors that should be shown before running evals. */
6994
7052
  getDiscoveryIssues(): DiscoveryIssue$1[]; /** Return current config-reload state for the long-running app server. */
6995
- getConfigReloadState(): ConfigReloadState$1; /** Re-scan configured eval files and emit a discovery update to listeners. */
7053
+ getConfigReloadState(): ConfigReloadState$1; /** Return the effective per-run case concurrency after applying defaults. */
7054
+ getConfiguredConcurrency(): number; /** Re-scan configured eval files and emit a discovery update to listeners. */
6996
7055
  refreshDiscovery(): Promise<void>;
6997
7056
  startRun(request: CreateRunRequest$1): Promise<{
6998
7057
  manifest: RunManifest$1;
@@ -7227,4 +7286,4 @@ declare function defineEval<TInput = unknown, TOutputs extends EvalOutputs = Eva
7227
7286
  /** Return whether the active eval case has tags matching the typed input. */
7228
7287
  declare function matchesEvalTags(input: EvalTagMatchInput): boolean;
7229
7288
  //#endregion
7230
- export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheRepairSummary, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalTraceTree, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
7289
+ export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheRepairSummary, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, EvalRuntimeUsageError, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalTraceTree, type EvalTracingAssertionsConfig, type EvalTracingAssertionsFn, type EvalTracingAssertionsMap, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-d42Lm0i5.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-_g2qOMK6.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-CdZsOn6y.mjs";
4
- export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
1
+ import { $ as setScopeCacheContext, A as repoFile, B as evalTime, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as EvalRuntimeUsageError, Ft as getEvalRegistry, H as getEvalCaseInput, I as appendToEvalOutput, J as runInEvalRuntimeScope, K as mergeEvalOutput, M as readManualInputFile, N as evalExpect, O as serializeCacheRecording, P as EvalAssertionError, Q as setEvalOutput, R as evalAssert, S as evalSpan, T as hashCacheKeySync, U as incrementEvalOutput, V as getCurrentScope, W as isInEvalScope, X as runInExistingEvalScope, Y as runInEvalScope, at as extractApiCalls, b as buildTraceTree, ct as simulateTokenAllocation, et as startEvalBackgroundJob, it as extractCacheHits, j as manualInputFileValueSchema, k as serializeCacheValue, ot as extractLlmCalls, q as nextEvalId, rt as extractCacheEntries, st as simulateLlmCallCost, ut as getNestedAttribute, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalLog } from "./runExecution-pHJ0_TzH.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-HBwXIJsg.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-AeXGBJ26.mjs";
4
+ export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
package/dist/runChild.mjs CHANGED
@@ -1,5 +1,5 @@
1
- import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-d42Lm0i5.mjs";
2
- import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-CvmFeOmT.mjs";
1
+ import { At as evalStatsConfigSchema, L as configureEvalRunLogs, Mt as evalChartsConfigSchema, Nt as columnDefSchema, Tt as buildEvalKey, bt as runManifestSchema, jt as manualInputDescriptorSchema, kt as evalStatAggregateSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, tt as createRunRequestSchema, v as createFsCacheStore, xt as runSummarySchema } from "./runExecution-pHJ0_TzH.mjs";
2
+ import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-ngVXShH4.mjs";
3
3
  import { z } from "zod/v4";
4
4
  import { readFile } from "node:fs/promises";
5
5
  import { relative } from "node:path";
@@ -851,6 +851,7 @@ const runLogLevelSchema = z.enum([
851
851
  const runLogPhaseSchema = z.enum([
852
852
  "eval",
853
853
  "derive",
854
+ "tracingAssertions",
854
855
  "outputsSchema",
855
856
  "scorer"
856
857
  ]);
@@ -1008,6 +1009,9 @@ const removeDefaultConfigSchema = z.union([z.literal(true), z.array(defaultConfi
1008
1009
  const evalDeriveValueFnSchema = z.custom((value) => typeof value === "function", { message: "Expected a derive output function" });
1009
1010
  /** Schema for keyed or object-returning trace-derived output config. */
1010
1011
  const evalDeriveConfigSchema = z.union([z.custom((value) => typeof value === "function", { message: "Expected a deriveFromTracing function" }), z.record(z.string().min(1), evalDeriveValueFnSchema)]);
1012
+ const evalTracingAssertionsFnSchema = z.custom((value) => typeof value === "function", { message: "Expected a tracing assertions function" });
1013
+ /** Schema for function or keyed trace-derived assertion config. */
1014
+ const evalTracingAssertionsConfigSchema = z.union([z.custom((value) => typeof value === "function", { message: "Expected a tracingAssertions function" }), z.record(z.string().min(1), evalTracingAssertionsFnSchema)]);
1011
1015
  /** Schema for UI overrides on derived or scored columns. */
1012
1016
  const evalColumnOverrideSchema = z.object({
1013
1017
  label: z.string().optional(),
@@ -1411,6 +1415,7 @@ const agentEvalsConfigSchema = z.object({
1411
1415
  traceDisplay: traceDisplayInputConfigSchema.optional(),
1412
1416
  columns: evalColumnsSchema.optional(),
1413
1417
  deriveFromTracing: evalDeriveConfigSchema.optional(),
1418
+ tracingAssertions: evalTracingAssertionsConfigSchema.optional(),
1414
1419
  stats: evalStatsConfigSchema.optional(),
1415
1420
  defaultStatAggregate: evalStatAggregateSchema.optional(),
1416
1421
  llmCalls: llmCallsConfigSchema.optional(),
@@ -1847,8 +1852,9 @@ function deriveScopedSummaryFromCases(params) {
1847
1852
  * freshness state.
1848
1853
  */
1849
1854
  function getEvalDisplayStatus(params) {
1850
- const { stale, outdated, lastRunStatus, isRunning = false } = params;
1855
+ const { stale, outdated, lastRunStatus, isRunning = false, isEnqueued = false } = params;
1851
1856
  if (isRunning || lastRunStatus === "running") return "running";
1857
+ if (isEnqueued) return "enqueued";
1852
1858
  if (lastRunStatus === "pass") {
1853
1859
  if (stale) return "stale";
1854
1860
  if (outdated) return "outdated";
@@ -2718,6 +2724,17 @@ var EvalAssertionError = class extends Error {
2718
2724
  this.name = "EvalAssertionError";
2719
2725
  }
2720
2726
  };
2727
+ /** Error thrown when an SDK helper is used in an unsupported runner phase. */
2728
+ var EvalRuntimeUsageError = class extends Error {
2729
+ constructor(message) {
2730
+ super(message);
2731
+ this.name = "EvalRuntimeUsageError";
2732
+ }
2733
+ };
2734
+ /** Throw when assertion helpers are used in a runner phase that forbids them. */
2735
+ function assertEvalAssertionsAllowed(apiName) {
2736
+ if (getCurrentScope() && runtimeScopeStorage.getStore() === "derive") throw new EvalRuntimeUsageError(`${apiName} cannot be used inside deriveFromTracing. Use tracingAssertions for trace-derived assertions.`);
2737
+ }
2721
2738
  function getEvalClockStateNowMs(state) {
2722
2739
  const elapsedMs = state.frozen ? 0 : realDate.now() - state.realStartMs;
2723
2740
  return state.startMs + elapsedMs + state.offsetMs;
@@ -2823,8 +2840,10 @@ function recordSpanForActiveCacheRecording(scope, spanId) {
2823
2840
  *
2824
2841
  * Returns `null` outside eval-owned work, `env` while the runner is loading
2825
2842
  * eval modules for a run, `cases` while generating cases, `eval` while running
2826
- * case `execute`, `derive` while deriving outputs from traces, `outputsSchema`
2827
- * while validating outputs, and `scorer` while computing scores.
2843
+ * case `execute`, `derive` while deriving outputs from traces,
2844
+ * `tracingAssertions` while checking trace-derived assertions,
2845
+ * `outputsSchema` while validating outputs, and `scorer` while computing
2846
+ * scores.
2828
2847
  */
2829
2848
  function isInEvalScope() {
2830
2849
  if (activeEvalRuntimeScopeCount === 0) return null;
@@ -2845,7 +2864,7 @@ function normalizeLogLevel(level) {
2845
2864
  }
2846
2865
  function getCurrentLogPhase() {
2847
2866
  const runtimeScope = runtimeScopeStorage.getStore();
2848
- if (runtimeScope === "eval" || runtimeScope === "derive" || runtimeScope === "outputsSchema" || runtimeScope === "scorer") return runtimeScope;
2867
+ if (runtimeScope === "eval" || runtimeScope === "derive" || runtimeScope === "tracingAssertions" || runtimeScope === "outputsSchema" || runtimeScope === "scorer") return runtimeScope;
2849
2868
  return null;
2850
2869
  }
2851
2870
  function formatLogArgs(args) {
@@ -3300,10 +3319,12 @@ function incrementEvalOutput(key, delta) {
3300
3319
  * Calls made outside `runInEvalScope(...)` are ignored so shared workflow code
3301
3320
  * can safely reuse `evalAssert(...)` when it also runs outside an eval. The
3302
3321
  * TypeScript assertion signature still narrows the checked value after the
3303
- * call.
3322
+ * call. Calls inside `deriveFromTracing` throw because derivations must only
3323
+ * write outputs; use `tracingAssertions` for trace-derived pass/fail checks.
3304
3324
  */
3305
3325
  function evalAssert(condition, message) {
3306
3326
  const scope = getCurrentScope();
3327
+ assertEvalAssertionsAllowed("evalAssert(...)");
3307
3328
  if (condition) {
3308
3329
  if (scope) scope.assertions.push({
3309
3330
  message,
@@ -3454,6 +3475,7 @@ var EvalExpectationImpl = class EvalExpectationImpl {
3454
3475
  * case scope is active, matching `evalAssert(...)`.
3455
3476
  */
3456
3477
  function evalExpect(value) {
3478
+ assertEvalAssertionsAllowed("evalExpect(...)");
3457
3479
  return new EvalExpectationImpl(value, false);
3458
3480
  }
3459
3481
  //#endregion
@@ -5005,25 +5027,53 @@ const evalTracer = {
5005
5027
  };
5006
5028
  /** Build a queryable trace tree helper from a flat span list and checkpoints. */
5007
5029
  function buildTraceTree(spans, checkpoints) {
5030
+ const rootSpans = spans.filter((s) => s.parentId === null);
5031
+ const flattenDfs = () => {
5032
+ const result = [];
5033
+ function visit(parentId) {
5034
+ for (const childSpan of spans) if (childSpan.parentId === parentId) {
5035
+ result.push(childSpan);
5036
+ visit(childSpan.id);
5037
+ }
5038
+ }
5039
+ visit(null);
5040
+ return result;
5041
+ };
5042
+ const filterSpanNames = (sourceSpans, kind) => {
5043
+ return sourceSpans.filter((span) => kind === void 0 || span.kind === kind).map((span) => span.name);
5044
+ };
5008
5045
  return {
5009
5046
  spans,
5010
- rootSpans: spans.filter((s) => s.parentId === null),
5047
+ rootSpans,
5011
5048
  findSpan(name) {
5012
5049
  return spans.find((s) => s.name === name);
5013
5050
  },
5051
+ findSpans(name) {
5052
+ return spans.filter((s) => s.name === name);
5053
+ },
5054
+ hasSpan(name) {
5055
+ return spans.some((s) => s.name === name);
5056
+ },
5014
5057
  findSpansByKind(kind) {
5015
5058
  return spans.filter((s) => s.kind === kind);
5016
5059
  },
5060
+ findToolCallSpans() {
5061
+ return spans.filter((s) => s.kind === "tool");
5062
+ },
5063
+ listToolCallSpanNames() {
5064
+ return filterSpanNames(spans, "tool");
5065
+ },
5066
+ hasToolCallSpan(name) {
5067
+ return spans.some((s) => s.kind === "tool" && s.name === name);
5068
+ },
5069
+ listSpanNames(kind) {
5070
+ return filterSpanNames(spans, kind);
5071
+ },
5072
+ listSpanNamesDfs(kind) {
5073
+ return filterSpanNames(flattenDfs(), kind);
5074
+ },
5017
5075
  flattenDfs() {
5018
- const result = [];
5019
- function visit(parentId) {
5020
- for (const childSpan of spans) if (childSpan.parentId === parentId) {
5021
- result.push(childSpan);
5022
- visit(childSpan.id);
5023
- }
5024
- }
5025
- visit(null);
5026
- return result;
5076
+ return flattenDfs();
5027
5077
  },
5028
5078
  checkpoints
5029
5079
  };
@@ -6665,7 +6715,7 @@ async function resolveDeriveFromTracingConfig(params) {
6665
6715
  return derived;
6666
6716
  }
6667
6717
  async function runDeriveFromTracingConfig(params) {
6668
- if (params.deriveFromTracing === void 0) return;
6718
+ if (params.deriveFromTracing === void 0) return null;
6669
6719
  const { deriveFromTracing } = params;
6670
6720
  try {
6671
6721
  const derived = await runInExistingEvalScope(params.scope, "derive", async () => await resolveDeriveFromTracingConfig({
@@ -6677,13 +6727,53 @@ async function runDeriveFromTracingConfig(params) {
6677
6727
  outputs: params.scope.outputs,
6678
6728
  derived
6679
6729
  });
6730
+ return null;
6680
6731
  } catch (e) {
6732
+ if (e instanceof EvalRuntimeUsageError) return e;
6681
6733
  const message = `deriveFromTracing threw: ${e instanceof Error ? e.message : String(e)}`;
6682
6734
  recordAssertionFailure(params.scope, toAssertionFailure(message, e instanceof Error ? e : void 0));
6735
+ return null;
6683
6736
  }
6684
6737
  }
6738
+ async function runOneTracingAssertion(params) {
6739
+ const { label, tracingAssertion, scope, traceTree, evalCase } = params;
6740
+ const failureCountBefore = scope.assertionFailures.length;
6741
+ const ctx = {
6742
+ trace: traceTree,
6743
+ input: evalCase.input,
6744
+ case: evalCase
6745
+ };
6746
+ try {
6747
+ await runInExistingEvalScope(scope, "tracingAssertions", async () => {
6748
+ await callUnknownFunction(tracingAssertion, [ctx]);
6749
+ });
6750
+ } catch (e) {
6751
+ if (e instanceof EvalAssertionError && scope.assertionFailures.length > failureCountBefore) return;
6752
+ recordAssertionFailure(scope, toAssertionFailure(`${label} threw: ${e instanceof Error ? e.message : String(e)}`, e instanceof Error ? e : void 0));
6753
+ }
6754
+ }
6755
+ async function runTracingAssertionsConfig(params) {
6756
+ if (params.tracingAssertions === void 0) return;
6757
+ if (typeof params.tracingAssertions === "function") {
6758
+ await runOneTracingAssertion({
6759
+ label: "tracingAssertions",
6760
+ tracingAssertion: params.tracingAssertions,
6761
+ scope: params.scope,
6762
+ traceTree: params.traceTree,
6763
+ evalCase: params.evalCase
6764
+ });
6765
+ return;
6766
+ }
6767
+ for (const [key, tracingAssertion] of Object.entries(params.tracingAssertions)) await runOneTracingAssertion({
6768
+ label: `tracingAssertions "${key}"`,
6769
+ tracingAssertion,
6770
+ scope: params.scope,
6771
+ traceTree: params.traceTree,
6772
+ evalCase: params.evalCase
6773
+ });
6774
+ }
6685
6775
  async function runCase(params) {
6686
- const { evalDef, evalId, evalKey = evalId, evalCase, globalTraceDisplay, globalColumns, globalDeriveFromTracing, llmCallsConfig = resolveLlmCallsConfig(void 0), apiCallsConfig = resolveApiCallsConfig(void 0), globalRemoveDefaultConfig, trial, startTime, cacheAdapter, cacheMode, moduleIsolation, evalFilePath, evalFileRelativePath = evalFilePath, workspaceRoot, artifactDir, runId } = params;
6776
+ const { evalDef, evalId, evalKey = evalId, evalCase, globalTraceDisplay, globalColumns, globalDeriveFromTracing, globalTracingAssertions, llmCallsConfig = resolveLlmCallsConfig(void 0), apiCallsConfig = resolveApiCallsConfig(void 0), globalRemoveDefaultConfig, trial, startTime, cacheAdapter, cacheMode, moduleIsolation, evalFilePath, evalFileRelativePath = evalFilePath, workspaceRoot, artifactDir, runId } = params;
6687
6777
  const scopedIdPrefix = buildScopedEvalIdPrefix({
6688
6778
  evalId,
6689
6779
  evalFilePath,
@@ -6728,22 +6818,36 @@ async function runCase(params) {
6728
6818
  apiCallsConfig
6729
6819
  });
6730
6820
  const traceTree = buildTraceTree(spansWithDerivedAttributes, scope.checkpoints);
6731
- const nonAssertError = executeError && !(executeError instanceof EvalAssertionError) ? executeError : null;
6821
+ let nonAssertError = executeError && !(executeError instanceof EvalAssertionError) ? executeError : null;
6732
6822
  if (executeError instanceof EvalAssertionError && scope.assertionFailures.length === 0) recordAssertionFailure(scope, toAssertionFailure(executeError.message, executeError));
6733
6823
  if (!nonAssertError) {
6734
- await runDeriveFromTracingConfig({
6824
+ nonAssertError = await runDeriveFromTracingConfig({
6735
6825
  deriveFromTracing: globalDeriveFromTracing,
6736
6826
  scope,
6737
6827
  traceTree,
6738
6828
  evalCase
6739
6829
  });
6740
- await runDeriveFromTracingConfig({
6830
+ if (!nonAssertError) nonAssertError = await runDeriveFromTracingConfig({
6741
6831
  deriveFromTracing: evalDef.deriveFromTracing,
6742
6832
  scope,
6743
6833
  traceTree,
6744
6834
  evalCase
6745
6835
  });
6746
6836
  }
6837
+ if (!nonAssertError) {
6838
+ await runTracingAssertionsConfig({
6839
+ tracingAssertions: globalTracingAssertions,
6840
+ scope,
6841
+ traceTree,
6842
+ evalCase
6843
+ });
6844
+ await runTracingAssertionsConfig({
6845
+ tracingAssertions: evalDef.tracingAssertions,
6846
+ scope,
6847
+ traceTree,
6848
+ evalCase
6849
+ });
6850
+ }
6747
6851
  if (!nonAssertError) addDefaultOutputs({
6748
6852
  outputs: scope.outputs,
6749
6853
  spans: spansWithDerivedAttributes,
@@ -6933,4 +7037,4 @@ function recordAssertionFailure(scope, failure) {
6933
7037
  });
6934
7038
  }
6935
7039
  //#endregion
6936
- export { startEvalBackgroundJob as $, repoFile as A, evalChartsConfigSchema as At, getCurrentScope as B, evalTracer as C, buildEvalKey as Ct, deserializeCacheValue as D, evalStatAggregateSchema as Dt, deserializeCacheRecording as E, caseRowSchema as Et, appendToEvalOutput as F, mergeEvalOutput as G, incrementEvalOutput as H, configureEvalRunLogs as I, runInEvalScope as J, nextEvalId as K, evalAssert as L, readManualInputFile as M, defineEval as Mt, evalExpect as N, getEvalRegistry as Nt, serializeCacheRecording as O, evalStatsConfigSchema as Ot, EvalAssertionError as P, runWithEvalRegistry as Pt, setScopeCacheContext as Q, evalLog as R, evalSpan as S, resolveLlmCallsConfig as St, hashCacheKeySync as T, caseDetailSchema as Tt, isInEvalScope as U, getEvalCaseInput as V, matchesEvalTags as W, runWithEvalClock as X, runInExistingEvalScope as Y, setEvalOutput as Z, createBufferedCacheStore as _, validateEvalTagName as _t, isCaseChildParentMessage as a, extractLlmCalls as at, buildTraceTree as b, runSummarySchema as bt, resolveArtifactPath as c, applyDerivedCallAttributes as ct, loadEvalModule as d, getEvalDisplayStatus as dt, createRunRequestSchema as et, resolveEvalDefaultConfig as f, deriveScopedSummaryFromCases as ft, commitPendingCacheWrites as g, matchesTagsFilter as gt, normalizeScoreDef as h, dedupeEvalTags as ht, isCaseChildMessage as i, extractApiCalls as it, manualInputFileValueSchema as j, columnDefSchema as jt, serializeCacheValue as k, manualInputDescriptorSchema as kt, registerAgentEvalsPackageResolutionHooks as l, getNestedAttribute as lt, buildDeclaredColumnDefs as m, deriveStatusFromChildStatuses as mt, resolveRunnableEvalCases as n, extractCacheEntries as nt, stripTerminalControlCodes as o, simulateLlmCallCost as ot, loadConfig as p, deriveStatusFromCaseRows as pt, runInEvalRuntimeScope as q, runCase as r, extractCacheHits as rt, resolveTracePresentation as s, simulateTokenAllocation as st, filterEvalCases as t, updateManualScoreRequestSchema as tt, runWithModuleIsolation as u, getEvalTitle as ut, createFsCacheStore as v, validateTagsFilterExpression as vt, hashCacheKey as w, getCaseRowCaseKey as wt, captureEvalSpanError as x, resolveApiCallsConfig as xt, z$1 as y, runManifestSchema as yt, evalTime as z };
7040
+ export { setScopeCacheContext as $, repoFile as A, evalStatsConfigSchema as At, evalTime as B, evalTracer as C, resolveLlmCallsConfig as Ct, deserializeCacheValue as D, caseDetailSchema as Dt, deserializeCacheRecording as E, getCaseRowCaseKey as Et, EvalRuntimeUsageError as F, getEvalRegistry as Ft, matchesEvalTags as G, getEvalCaseInput as H, appendToEvalOutput as I, runWithEvalRegistry as It, runInEvalRuntimeScope as J, mergeEvalOutput as K, configureEvalRunLogs as L, readManualInputFile as M, evalChartsConfigSchema as Mt, evalExpect as N, columnDefSchema as Nt, serializeCacheRecording as O, caseRowSchema as Ot, EvalAssertionError as P, defineEval as Pt, setEvalOutput as Q, evalAssert as R, evalSpan as S, resolveApiCallsConfig as St, hashCacheKeySync as T, buildEvalKey as Tt, incrementEvalOutput as U, getCurrentScope as V, isInEvalScope as W, runInExistingEvalScope as X, runInEvalScope as Y, runWithEvalClock as Z, createBufferedCacheStore as _, matchesTagsFilter as _t, isCaseChildParentMessage as a, extractApiCalls as at, buildTraceTree as b, runManifestSchema as bt, resolveArtifactPath as c, simulateTokenAllocation as ct, loadEvalModule as d, getEvalTitle as dt, startEvalBackgroundJob as et, resolveEvalDefaultConfig as f, getEvalDisplayStatus as ft, commitPendingCacheWrites as g, dedupeEvalTags as gt, normalizeScoreDef as h, deriveStatusFromChildStatuses as ht, isCaseChildMessage as i, extractCacheHits as it, manualInputFileValueSchema as j, manualInputDescriptorSchema as jt, serializeCacheValue as k, evalStatAggregateSchema as kt, registerAgentEvalsPackageResolutionHooks as l, applyDerivedCallAttributes as lt, buildDeclaredColumnDefs as m, deriveStatusFromCaseRows as mt, resolveRunnableEvalCases as n, updateManualScoreRequestSchema as nt, stripTerminalControlCodes as o, extractLlmCalls as ot, loadConfig as p, deriveScopedSummaryFromCases as pt, nextEvalId as q, runCase as r, extractCacheEntries as rt, resolveTracePresentation as s, simulateLlmCallCost as st, filterEvalCases as t, createRunRequestSchema as tt, runWithModuleIsolation as u, getNestedAttribute as ut, createFsCacheStore as v, validateEvalTagName as vt, hashCacheKey as w, buildCaseKey as wt, captureEvalSpanError as x, runSummarySchema as xt, z$1 as y, validateTagsFilterExpression as yt, evalLog as z };