npm - @ls-stack/agent-eval - Versions diffs - 0.21.0 → 0.23.0 - Mend

@ls-stack/agent-eval 0.21.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/{app-CmwmcUgG.mjs → app-Cw79dJDr.mjs} +3 -3
package/dist/apps/web/dist/assets/index-AUDD3rNB.js +118 -0
package/dist/apps/web/dist/index.html +1 -1
package/dist/bin.mjs +1 -1
package/dist/{cli-DumvanQI.mjs → cli-D3QNOcPN.mjs} +3 -3
package/dist/index.d.mts +102 -56
package/dist/index.mjs +4 -4
package/dist/runChild.mjs +1 -1
package/dist/{runOrchestration-zYAcAPtS.mjs → runOrchestration-CimthgI7.mjs} +248 -90
package/dist/{runner-Dy_PECaf.mjs → runner-4yNYRvmF.mjs} +2 -2
package/dist/{runner-BcwyX9CO.mjs → runner-B-SYzW8w.mjs} +1 -1
package/dist/src-CcXfWT4M.mjs +3 -0
package/package.json +1 -1
package/skills/agent-eval/SKILL.md +36 -17
package/dist/apps/web/dist/assets/index-EXO08yya.js +0 -118
package/dist/src-BoAJb4wC.mjs +0 -3

package/dist/apps/web/dist/index.html CHANGED Viewed

@@ -25,7 +25,7 @@
       href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
       rel="stylesheet"
     />
-    <script type="module" crossorigin src="/assets/index-EXO08yya.js"></script>
+    <script type="module" crossorigin src="/assets/index-AUDD3rNB.js"></script>
     <link rel="stylesheet" crossorigin href="/assets/index-r0dVFK0B.css">
   </head>
   <body>

package/dist/bin.mjs CHANGED Viewed

@@ -1,5 +1,5 @@
 #!/usr/bin/env node
-import { t as runCli } from "./cli-DumvanQI.mjs";
+import { t as runCli } from "./cli-D3QNOcPN.mjs";
 import { spawn } from "node:child_process";
 //#region src/bin.ts
 const moduleMocksFlag = "--experimental-test-module-mocks";

package/dist/{cli-DumvanQI.mjs → cli-D3QNOcPN.mjs} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { C as validateCharts, F as deriveScopedSummaryFromCases, Fn as getEvalRegistry, N as getEvalTitle, P as getEvalDisplayStatus, S as normalizeScoreDef, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as loadConfig, c as persistCaseDetail, d as recomputePersistedCaseStatus, et as resolveApiCallsConfig, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, s as nextShortIdFromSnapshots, tt as resolveLlmCallsConfig, u as recomputeEvalStatusesInRuns, v as parseEvalMetas, w as createFsCacheStore, x as buildDeclaredColumnDefs, y as resolveEvalDefaultConfig, z as runSummarySchema } from "./runOrchestration-zYAcAPtS.mjs";
+import { C as validateCharts, F as deriveScopedSummaryFromCases, Ln as getEvalRegistry, N as getEvalTitle, P as getEvalDisplayStatus, S as normalizeScoreDef, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as loadConfig, c as persistCaseDetail, d as recomputePersistedCaseStatus, et as resolveApiCallsConfig, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, s as nextShortIdFromSnapshots, tt as resolveLlmCallsConfig, u as recomputeEvalStatusesInRuns, v as parseEvalMetas, w as createFsCacheStore, x as buildDeclaredColumnDefs, y as resolveEvalDefaultConfig, z as runSummarySchema } from "./runOrchestration-CimthgI7.mjs";
 import { createHash } from "node:crypto";
 import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
 import { dirname, join, relative, resolve } from "node:path";
@@ -959,8 +959,8 @@ async function commandApp(args) {
 	const { serve } = await import("@hono/node-server");
 	const bundledWebDist = resolve(currentDir, "apps/web/dist");
 	if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
-	const appModule = await import("./app-CmwmcUgG.mjs");
-	const runnerModule = await import("./runner-BcwyX9CO.mjs");
+	const appModule = await import("./app-Cw79dJDr.mjs");
+	const runnerModule = await import("./runner-B-SYzW8w.mjs");
 	if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
 	if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
 	await runnerModule.initRunner();

package/dist/index.d.mts CHANGED Viewed

@@ -645,7 +645,7 @@ declare const caseRowSchema: z$1.ZodObject<{
     cancelled: "cancelled";
     pending: "pending";
   }>;
-  latencyMs: z$1.ZodNullable<z$1.ZodNumber>;
+  durationMs: z$1.ZodNullable<z$1.ZodNumber>;
   costUsd: z$1.ZodOptional<z$1.ZodNullable<z$1.ZodNumber>>;
   columns: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnion<readonly [z$1.ZodType<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown, z$1.core.$ZodTypeInternals<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown>>, z$1.ZodUnion<readonly [z$1.ZodObject<{
     source: z$1.ZodLiteral<"repo">;
@@ -1486,7 +1486,7 @@ declare const defaultConfigKeySchema: z$1.ZodEnum<{
   cachedInputTokens: "cachedInputTokens";
   cacheCreationInputTokens: "cacheCreationInputTokens";
   reasoningTokens: "reasoningTokens";
-  llmLatencyMs: "llmLatencyMs";
+  llmDurationMs: "llmDurationMs";
 }>;
 /** Built-in eval-level output/column key. */
 type DefaultConfigKey = z$1.infer<typeof defaultConfigKeySchema>;
@@ -1501,7 +1501,7 @@ declare const removeDefaultConfigSchema: z$1.ZodUnion<readonly [z$1.ZodLiteral<t
   cachedInputTokens: "cachedInputTokens";
   cacheCreationInputTokens: "cacheCreationInputTokens";
   reasoningTokens: "reasoningTokens";
-  llmLatencyMs: "llmLatencyMs";
+  llmDurationMs: "llmDurationMs";
 }>>]>;
 /** Removal config for built-in eval-level outputs and UI metadata. */
 type RemoveDefaultConfig = z$1.infer<typeof removeDefaultConfigSchema>;
@@ -1595,7 +1595,7 @@ declare const apiCallMetricSchema: z$1.ZodObject<{
 type ApiCallMetric = z$1.infer<typeof apiCallMetricSchema>;
 /**
  * Schema for one model/provider pricing entry used to derive LLM-call costs
- * from token counts when a span does not already record explicit USD costs.
+ * from token counts.
  */
 declare const llmCallPricingSchema: z$1.ZodObject<{
   model: z$1.ZodString;
@@ -1604,6 +1604,7 @@ declare const llmCallPricingSchema: z$1.ZodObject<{
   outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
   cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
   cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+  cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
   reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
 }, z$1.core.$strip>;
 /** Model/provider pricing entry authored in `agent-evals.config.ts`. */
@@ -1618,15 +1619,9 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
     outputTokens: z$1.ZodOptional<z$1.ZodString>;
     cachedInputTokens: z$1.ZodOptional<z$1.ZodString>;
     cacheCreationInputTokens: z$1.ZodOptional<z$1.ZodString>;
+    cacheCreationInput1hTokens: z$1.ZodOptional<z$1.ZodString>;
     reasoningTokens: z$1.ZodOptional<z$1.ZodString>;
-    totalTokens: z$1.ZodOptional<z$1.ZodString>;
-    tokensPerSecond: z$1.ZodOptional<z$1.ZodString>;
-    cost: z$1.ZodOptional<z$1.ZodString>;
-    inputCost: z$1.ZodOptional<z$1.ZodString>;
-    outputCost: z$1.ZodOptional<z$1.ZodString>;
-    cachedInputCost: z$1.ZodOptional<z$1.ZodString>;
-    cacheCreationInputCost: z$1.ZodOptional<z$1.ZodString>;
-    reasoningCost: z$1.ZodOptional<z$1.ZodString>;
+    latencyMs: z$1.ZodOptional<z$1.ZodString>;
     steps: z$1.ZodOptional<z$1.ZodString>;
     finishReason: z$1.ZodOptional<z$1.ZodString>;
     input: z$1.ZodOptional<z$1.ZodString>;
@@ -1641,6 +1636,7 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
     outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
     cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
     cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+    cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
     reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
   }, z$1.core.$strip>>>;
   metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
@@ -1714,15 +1710,9 @@ type ResolvedLlmCallsConfig = {
     outputTokens: string;
     cachedInputTokens: string;
     cacheCreationInputTokens: string;
+    cacheCreationInput1hTokens: string;
     reasoningTokens: string;
-    totalTokens: string;
-    tokensPerSecond: string;
-    cost: string;
-    inputCost: string;
-    outputCost: string;
-    cachedInputCost: string;
-    cacheCreationInputCost: string;
-    reasoningCost: string;
+    latencyMs: string;
     steps: string;
     finishReason: string;
     input: string;
@@ -1776,6 +1766,7 @@ type ResolvedLlmCallPricing = {
   outputUsdPerMillion?: number;
   cachedInputUsdPerMillion?: number;
   cacheCreationInputUsdPerMillion?: number;
+  cacheCreationInput1hUsdPerMillion?: number;
   reasoningUsdPerMillion?: number;
 };
 /** Default LLM-calls config the UI uses before the workspace fetch resolves. */
@@ -1791,8 +1782,8 @@ declare const DEFAULT_API_CALLS_CONFIG: ResolvedApiCallsConfig;
  *   attribute path.
  * - Missing `metrics[].format` defaults to `'string'`.
  * - Missing `metrics[].placements` defaults to `['body']`.
- * - Missing `pricing` defaults to an empty registry; explicit span costs still
- *   take precedence over derived costs.
+ * - Missing `pricing` defaults to an empty registry; built-in costs are only
+ *   derived from configured pricing and token counts.
  */
 declare function resolveLlmCallsConfig(input: LlmCallsConfigInput | undefined): ResolvedLlmCallsConfig;
 /**
@@ -1846,10 +1837,10 @@ type AgentEvalsConfig = {
    *
    * Determines which trace spans are treated as LLM calls (`kinds`), how
    * structured fields like `model` and `usage.inputTokens` are read from
-   * span attributes, and which custom user-defined metrics are surfaced on
-   * each call. All fields are optional and fall back to the documented
-   * defaults; the LLM calls tab is shown automatically when at least one
-   * matching span exists in a case run.
+   * span attributes, which pricing table derives built-in costs, and which
+   * custom user-defined metrics are surfaced on each call. All fields are
+   * optional and fall back to the documented defaults; the LLM calls tab is
+   * shown automatically when at least one matching span exists in a case run.
    *
    * @example
    * ```ts
@@ -1982,15 +1973,9 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
       outputTokens: z$1.ZodOptional<z$1.ZodString>;
       cachedInputTokens: z$1.ZodOptional<z$1.ZodString>;
       cacheCreationInputTokens: z$1.ZodOptional<z$1.ZodString>;
+      cacheCreationInput1hTokens: z$1.ZodOptional<z$1.ZodString>;
       reasoningTokens: z$1.ZodOptional<z$1.ZodString>;
-      totalTokens: z$1.ZodOptional<z$1.ZodString>;
-      tokensPerSecond: z$1.ZodOptional<z$1.ZodString>;
-      cost: z$1.ZodOptional<z$1.ZodString>;
-      inputCost: z$1.ZodOptional<z$1.ZodString>;
-      outputCost: z$1.ZodOptional<z$1.ZodString>;
-      cachedInputCost: z$1.ZodOptional<z$1.ZodString>;
-      cacheCreationInputCost: z$1.ZodOptional<z$1.ZodString>;
-      reasoningCost: z$1.ZodOptional<z$1.ZodString>;
+      latencyMs: z$1.ZodOptional<z$1.ZodString>;
       steps: z$1.ZodOptional<z$1.ZodString>;
       finishReason: z$1.ZodOptional<z$1.ZodString>;
       input: z$1.ZodOptional<z$1.ZodString>;
@@ -2005,6 +1990,7 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
       outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
       cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
       cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+      cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
       reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
     }, z$1.core.$strip>>>;
     metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
@@ -2035,7 +2021,7 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
     cachedInputTokens: "cachedInputTokens";
     cacheCreationInputTokens: "cacheCreationInputTokens";
     reasoningTokens: "reasoningTokens";
-    llmLatencyMs: "llmLatencyMs";
+    llmDurationMs: "llmDurationMs";
   }>>]>>;
   apiCalls: z$1.ZodOptional<z$1.ZodObject<{
     kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -2104,7 +2090,8 @@ type LlmCallEntry = {
   cachedInputTokens: number | null;
   cacheCreationInputTokens: number | null;
   reasoningTokens: number | null;
-  totalTokens: number | null;
+  totalTokens: number | null; /** Time to first token for the LLM call in milliseconds, when reported by the span. */
+  latencyMs: number | null;
   tokensPerSecond: number | null;
   costUsd: number | null;
   inputCostUsd: number | null;
@@ -2114,8 +2101,8 @@ type LlmCallEntry = {
   reasoningCostUsd: number | null; /** Number of inference rounds. Derived from the array length when `stepDetails` is set. */
   stepCount: number | null; /** Per-step breakdown when the configured `steps` attribute resolves to an array. */
   stepDetails: unknown[] | null;
-  finishReason: string | null;
-  latencyMs: number | null;
+  finishReason: string | null; /** Elapsed LLM call span duration in milliseconds. */
+  durationMs: number | null;
   input: unknown;
   output: unknown;
   reasoning: unknown;
@@ -2129,16 +2116,22 @@ type LlmCallEntry = {
  * shape consumed by the LLM calls tab.
  *
  * Spans whose `kind` is not in `config.kinds` are dropped. Structured fields
- * (`model`, token counts, explicit cost, etc.) are read via
+ * (`model`, token counts, latency, etc.) are read via
  * `getNestedAttribute` from the configured paths, with safe coercion to
- * `string | null` / `number | null`. When explicit USD costs are absent,
- * configured model pricing derives per-token-type costs from token counts.
- * `totalTokens` falls back to a sum of input + output + cached when no
- * explicit total attribute is present. The `steps` attribute path may resolve
- * to either a number (rendered as the inference-round count) or an array of
- * per-step detail objects (rendered as a Steps section in the body, with
- * `stepCount` derived from the array length). `latencyMs` is `null` while the
- * span is still running. User-defined `metrics` whose path resolves to
+ * `string | null` / `number | null`. `latencyMs` is an explicit
+ * time-to-first-token attribute; full span elapsed time is reported separately
+ * as `durationMs`. Built-in USD costs are derived only from configured model
+ * pricing and token counts. `totalTokens` is always derived from input +
+ * output tokens. Cached input and cache creation tokens are reported
+ * separately because they are subsets of input/output usage. The main cache
+ * creation token field is treated as the total write count; optional one-hour
+ * cache creation tokens only split that total for cost calculation. Base input
+ * cost uses input minus cache read/write tokens so cached tokens are not
+ * charged twice. Cache read/write costs still contribute to the total USD cost
+ * at their configured rates. The `steps` attribute path may resolve to an array
+ * of per-step detail objects, with `stepCount` derived from the array length.
+ * `durationMs` and `tokensPerSecond` are `null` while the span is still
+ * running. User-defined `metrics` whose path resolves to
  * `undefined` are dropped, but `null`, `0`, and `false` are preserved as
  * legitimate values worth displaying. Original span order is preserved so the
  * LLM calls tab matches the ordering in the Trace tab.
@@ -2163,8 +2156,8 @@ type ApiCallEntry = {
   status: EvalTraceSpan['status'];
   method: string | null;
   url: string | null;
-  statusCode: number | null;
-  latencyMs: number | null;
+  statusCode: number | null; /** Elapsed API call duration in milliseconds. */
+  durationMs: number | null;
   request: unknown;
   response: unknown;
   requestBody: unknown;
@@ -2181,10 +2174,10 @@ type ApiCallEntry = {
  *
  * Spans whose `kind` is not in `config.kinds` are dropped. Structured fields
  * (`method`, `url`, `statusCode`, etc.) are read via `getNestedAttribute` from
- * the configured paths. `durationMs` takes precedence for latency, with a
- * fallback to the span start/end timestamps. User-defined `metrics` whose path
- * resolves to `undefined` are dropped, but `null`, `0`, and `false` are
- * preserved as legitimate values worth displaying. Original span order is
+ * the configured paths. An explicit `durationMs` attribute takes precedence,
+ * with a fallback to the span start/end timestamps. User-defined `metrics`
+ * whose path resolves to `undefined` are dropped, but `null`, `0`, and `false`
+ * are preserved as legitimate values worth displaying. Original span order is
  * preserved so the API calls tab matches the ordering in the Trace tab.
  */
 declare function extractApiCalls(spans: EvalTraceSpan[], config: ResolvedApiCallsConfig): ApiCallEntry[];
@@ -2765,6 +2758,12 @@ type EvalCase<TInput> = {
 };
 /** Runtime output values collected from output helpers and `deriveFromTracing`. */
 type EvalOutputs = Record<string, unknown>;
+/**
+ * Initial wall-clock time used by an eval's shifted Date clock.
+ *
+ * Pass `'now'` to opt one eval back into the real current clock.
+ */
+type EvalStartTime = Date | number | string;
 /**
  * Schema used to validate and type an eval's collected runtime outputs.
  *
@@ -2932,6 +2931,23 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
    * delay case finalization; late mutations are not guaranteed to persist.
    */
   waitForBackgroundJobs?: boolean;
+  /**
+   * Optional initial wall-clock time for this eval's runtime.
+   *
+   * When set, `new Date()` and `Date.now()` inside case generation, execution,
+   * tracing, derived outputs, and scorers start from this wall-clock value and
+   * then continue advancing with real elapsed time. The default is
+   * `2026-04-10T00:00:00.000Z`. Pass `'now'` to use the real current clock for
+   * this eval. Timers are not faked, so `setTimeout` and other asynchronous
+   * work still run normally.
+   */
+  startTime?: EvalStartTime;
+  /**
+   * Freeze the eval Date clock at `startTime` until `advanceEvalTime(...)`
+   * moves it manually. Defaults to `false`, so eval time advances with real
+   * elapsed time from the configured `startTime`.
+   */
+  freezeTime?: boolean;
   execute: (ctx: EvalExecuteContext<TInput, TOutputs>) => Promise<void> | void;
   deriveFromTracing?: (ctx: EvalDeriveContext<TInput>) => Partial<TOutputs> | Promise<Partial<TOutputs>>;
   scores?: Record<string, EvalScoreDef<TInput, TOutputs>>;
@@ -3025,6 +3041,9 @@ declare function defineEval<TInput = unknown, TOutputs extends EvalOutputs = Eva
 declare function repoFile(path: string, mimeType?: string): RepoFileRef;
 //#endregion
 //#region ../sdk/src/runtime.d.ts
+declare global {
+  var __agentEvalsRealDate: DateConstructor | undefined;
+}
 /**
  * Raw-key debug payload passed alongside cache writes.
  *
@@ -3070,7 +3089,15 @@ type CacheRecordingFrame = {
 };
 /** Mutable per-case runtime state stored in async local storage. */
 type EvalCaseScope = {
-  caseId: string; /** Stable prefix used by `nextEvalId()` for this eval case scope. */
+  caseId: string; /** Initial wall-clock time used by Date APIs inside this eval case. */
+  startTime: EvalStartTime | undefined; /** Mutable shifted wall-clock state shared across this eval case's phases. */
+  evalClockState: {
+    startMs: number;
+    realStartMs: number;
+    offsetMs: number;
+    frozen: boolean;
+    shifted: boolean;
+  }; /** Stable prefix used by `nextEvalId()` for this eval case scope. */
   idPrefix: string | undefined; /** Monotonic per-scope counter used by `nextEvalId()`. */
   nextEvalIdCounter: number; /** Authored input for the current case, when provided by the runner. */
   input?: unknown;
@@ -3108,11 +3135,28 @@ type EvalCaseScope = {
  * modules imported while a run is being prepared.
  */
 type EvalRuntimeScope = 'env' | 'cases' | 'eval' | 'derive' | 'outputsSchema' | 'scorer';
+/** Time unit accepted by `advanceEvalTime(unit, amount)`. */
+type EvalTimeUnit = 'millisecond' | 'milliseconds' | 'second' | 'seconds' | 'minute' | 'minutes' | 'hour' | 'hours' | 'day' | 'days';
 type EvalLogLevelInput = RunLogLevel | 'warning';
 /** Error thrown when an eval assertion fails during case execution. */
 declare class EvalAssertionError extends Error {
   constructor(message: string);
 }
+/**
+ * Return the wall-clock start time captured for the active eval.
+ *
+ * For `startTime: 'now'`, this is the real time captured when the eval clock
+ * context was created.
+ */
+declare function getEvalStartTime(): Date;
+/**
+ * Advance the active eval's shifted Date clock and return the new time.
+ *
+ * Throws outside an active shifted eval clock. Evals that set
+ * `startTime: 'now'` use the real current clock unless `freezeTime: true` is
+ * also set.
+ */
+declare function advanceEvalTime(unit: EvalTimeUnit, amount: number): Date;
 /** Return the current eval scope for the active async context, if any. */
 declare function getCurrentScope(): EvalCaseScope | undefined;
 /**
@@ -3162,7 +3206,9 @@ type RunInEvalScopeOptions = {
   idPrefix?: string; /** Cache adapter + mode attached to the scope before `fn` runs. */
   cacheContext?: CacheScopeContext; /** Whether registered background jobs should settle before scope finalizes. */
   waitForBackgroundJobs?: boolean; /** Eval runner phase exposed through `isInEvalScope()`. Defaults to `eval`. */
-  runtimeScope?: EvalRuntimeScope;
+  runtimeScope?: EvalRuntimeScope; /** Initial wall-clock time used by `new Date()` and `Date.now()` in this eval. */
+  startTime?: EvalStartTime; /** Whether Date APIs stay frozen until advanced manually. */
+  freezeTime?: boolean;
 };
 /** Execute a callback while `isInEvalScope()` reports a runner phase. */
 declare function runInEvalRuntimeScope<T>(runtimeScope: EvalRuntimeScope, fn: () => Promise<T> | T): Promise<T>;
@@ -3577,4 +3623,4 @@ declare function createRunner({
  */
 declare function runCli(argv: string[]): Promise<void>;
 //#endregion
-export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallsConfigInput, type NumberDisplayOptions, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
+export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTimeUnit, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallsConfigInput, type NumberDisplayOptions, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };

package/dist/index.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { $ as removeDefaultConfigSchema, $t as columnKindSchema, A as extractApiCalls, An as setEvalOutput, At as cacheFileSchema, B as DEFAULT_API_CALLS_CONFIG, Bt as traceAttributeDisplayFormatSchema, Cn as incrementEvalOutput, Ct as evalChartTooltipExtraSchema, D as sseEnvelopeSchema, Dn as runInEvalRuntimeScope, Dt as cacheDebugKeyFileSchema, E as updateManualScoreRequestSchema, En as nextEvalId, Et as cacheDebugKeyEntrySchema, F as deriveScopedSummaryFromCases, Fn as getEvalRegistry, Ft as cacheRecordingSchema, G as apiCallMetricSchema, Gt as traceDisplayInputConfigSchema, H as agentEvalsConfigSchema, Ht as traceAttributeDisplayPlacementSchema, I as deriveStatusFromCaseRows, It as cacheStatusSchema, J as llmCallMetricFormatSchema, Jt as traceSpanSchema, K as apiCallsConfigSchema, Kt as traceSpanErrorSchema, L as deriveStatusFromChildStatuses, Lt as serializedCacheSpanSchema, M as getNestedAttribute, Mn as startEvalBackgroundJob, Mt as cacheModeSchema, N as getEvalTitle, Nn as repoFile, Nt as cacheOperationTypeSchema, O as extractCacheEntries, On as runInEvalScope, Ot as cacheEntrySchema, P as getEvalDisplayStatus, Pn as defineEval, Pt as cacheRecordingOpSchema, Q as llmCallsConfigSchema, Qt as columnFormatSchema, R as runManifestSchema, Rt as spanCacheOptionsSchema, Sn as getEvalCaseInput, St as evalChartMetricSchema, T as createRunRequestSchema, Tn as mergeEvalOutput, Tt as evalChartsConfigSchema, U as apiCallMetricFormatSchema, Ut as traceAttributeDisplaySchema, V as DEFAULT_LLM_CALLS_CONFIG, Vt as traceAttributeDisplayInputSchema, W as apiCallMetricPlacementSchema, Wt as traceDisplayConfigSchema, X as llmCallMetricSchema, Xt as cellValueSchema, Y as llmCallMetricPlacementSchema, Yt as traceSpanWarningSchema, Z as llmCallPricingSchema, Zt as columnDefSchema, _n as appendToEvalOutput, _t as evalChartAggregateSchema, an as z, at as caseDetailSchema, bn as evalLog, bt as evalChartColorSchema, cn as evalSpan, ct as evalStatAggregateSchema, dn as hashCacheKeySync, dt as evalSummarySchema, en as fileRefSchema, et as resolveApiCallsConfig, fn as deserializeCacheRecording, ft as runLogEntrySchema, gn as EvalAssertionError, gt as scoreTraceSchema, hn as serializeCacheValue, ht as runLogPhaseSchema, in as runArtifactRefSchema, it as assertionFailureSchema, j as extractLlmCalls, jn as setScopeCacheContext, jt as cacheListItemSchema, k as extractCacheHits, kn as runInExistingEvalScope, kt as cacheEntryWithDebugKeySchema, ln as evalTracer, lt as evalStatItemSchema, mn as serializeCacheRecording, mt as runLogLocationSchema, nn as numberDisplayOptionsSchema, nt as runLogsConfigSchema, on as buildTraceTree, ot as caseRowSchema, pn as deserializeCacheValue, pt as runLogLevelSchema, q as defaultConfigKeySchema, qt as traceSpanKindSchema, rn as repoFileRefSchema, rt as trialSelectionModeSchema, sn as captureEvalSpanError, st as evalFreshnessStatusSchema, tn as jsonCellSchema, tt as resolveLlmCallsConfig, un as hashCacheKey, ut as evalStatsConfigSchema, vt as evalChartAxisSchema, wn as isInEvalScope, wt as evalChartTypeSchema, xn as getCurrentScope, xt as evalChartConfigSchema, yn as evalAssert, yt as evalChartBuiltinMetricSchema, z as runSummarySchema, zt as traceCacheRefSchema } from "./runOrchestration-zYAcAPtS.mjs";
-import { n as createRunner, t as runCli } from "./cli-DumvanQI.mjs";
-import "./src-BoAJb4wC.mjs";
-export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
+import { $ as removeDefaultConfigSchema, $t as columnKindSchema, A as extractApiCalls, An as runInEvalScope, At as cacheFileSchema, B as DEFAULT_API_CALLS_CONFIG, Bt as traceAttributeDisplayFormatSchema, Cn as getEvalCaseInput, Ct as evalChartTooltipExtraSchema, D as sseEnvelopeSchema, Dn as mergeEvalOutput, Dt as cacheDebugKeyFileSchema, E as updateManualScoreRequestSchema, En as isInEvalScope, Et as cacheDebugKeyEntrySchema, F as deriveScopedSummaryFromCases, Fn as repoFile, Ft as cacheRecordingSchema, G as apiCallMetricSchema, Gt as traceDisplayInputConfigSchema, H as agentEvalsConfigSchema, Ht as traceAttributeDisplayPlacementSchema, I as deriveStatusFromCaseRows, In as defineEval, It as cacheStatusSchema, J as llmCallMetricFormatSchema, Jt as traceSpanSchema, K as apiCallsConfigSchema, Kt as traceSpanErrorSchema, L as deriveStatusFromChildStatuses, Ln as getEvalRegistry, Lt as serializedCacheSpanSchema, M as getNestedAttribute, Mn as setEvalOutput, Mt as cacheModeSchema, N as getEvalTitle, Nn as setScopeCacheContext, Nt as cacheOperationTypeSchema, O as extractCacheEntries, On as nextEvalId, Ot as cacheEntrySchema, P as getEvalDisplayStatus, Pn as startEvalBackgroundJob, Pt as cacheRecordingOpSchema, Q as llmCallsConfigSchema, Qt as columnFormatSchema, R as runManifestSchema, Rt as spanCacheOptionsSchema, Sn as getCurrentScope, St as evalChartMetricSchema, T as createRunRequestSchema, Tn as incrementEvalOutput, Tt as evalChartsConfigSchema, U as apiCallMetricFormatSchema, Ut as traceAttributeDisplaySchema, V as DEFAULT_LLM_CALLS_CONFIG, Vt as traceAttributeDisplayInputSchema, W as apiCallMetricPlacementSchema, Wt as traceDisplayConfigSchema, X as llmCallMetricSchema, Xt as cellValueSchema, Y as llmCallMetricPlacementSchema, Yt as traceSpanWarningSchema, Z as llmCallPricingSchema, Zt as columnDefSchema, _n as advanceEvalTime, _t as evalChartAggregateSchema, an as z, at as caseDetailSchema, bn as evalAssert, bt as evalChartColorSchema, cn as evalSpan, ct as evalStatAggregateSchema, dn as hashCacheKeySync, dt as evalSummarySchema, en as fileRefSchema, et as resolveApiCallsConfig, fn as deserializeCacheRecording, ft as runLogEntrySchema, gn as EvalAssertionError, gt as scoreTraceSchema, hn as serializeCacheValue, ht as runLogPhaseSchema, in as runArtifactRefSchema, it as assertionFailureSchema, j as extractLlmCalls, jn as runInExistingEvalScope, jt as cacheListItemSchema, k as extractCacheHits, kn as runInEvalRuntimeScope, kt as cacheEntryWithDebugKeySchema, ln as evalTracer, lt as evalStatItemSchema, mn as serializeCacheRecording, mt as runLogLocationSchema, nn as numberDisplayOptionsSchema, nt as runLogsConfigSchema, on as buildTraceTree, ot as caseRowSchema, pn as deserializeCacheValue, pt as runLogLevelSchema, q as defaultConfigKeySchema, qt as traceSpanKindSchema, rn as repoFileRefSchema, rt as trialSelectionModeSchema, sn as captureEvalSpanError, st as evalFreshnessStatusSchema, tn as jsonCellSchema, tt as resolveLlmCallsConfig, un as hashCacheKey, ut as evalStatsConfigSchema, vn as appendToEvalOutput, vt as evalChartAxisSchema, wn as getEvalStartTime, wt as evalChartTypeSchema, xn as evalLog, xt as evalChartConfigSchema, yt as evalChartBuiltinMetricSchema, z as runSummarySchema, zt as traceCacheRefSchema } from "./runOrchestration-CimthgI7.mjs";
+import { n as createRunner, t as runCli } from "./cli-D3QNOcPN.mjs";
+import "./src-CcXfWT4M.mjs";
+export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };

package/dist/runChild.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { R as runManifestSchema, T as createRunRequestSchema, Tt as evalChartsConfigSchema, Zt as columnDefSchema, b as loadConfig, t as executeRun, ut as evalStatsConfigSchema, v as parseEvalMetas, vn as configureEvalRunLogs, w as createFsCacheStore, z as runSummarySchema } from "./runOrchestration-zYAcAPtS.mjs";
+import { R as runManifestSchema, T as createRunRequestSchema, Tt as evalChartsConfigSchema, Zt as columnDefSchema, b as loadConfig, t as executeRun, ut as evalStatsConfigSchema, v as parseEvalMetas, w as createFsCacheStore, yn as configureEvalRunLogs, z as runSummarySchema } from "./runOrchestration-CimthgI7.mjs";
 import { createHash } from "node:crypto";
 import { readFile } from "node:fs/promises";
 import { relative } from "node:path";