npm - @ls-stack/agent-eval - Versions diffs - 0.9.0 → 0.11.0 - Mend

@ls-stack/agent-eval 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/{app-hkNNN_jn.mjs → app-DI3IUGb_.mjs} +5 -4
package/dist/apps/web/dist/assets/index-BZ60j9UY.css +1 -0
package/dist/apps/web/dist/assets/index-CsSDwFI1.js +113 -0
package/dist/apps/web/dist/index.html +2 -2
package/dist/bin.mjs +1 -1
package/dist/{cli-DrPk66xh.mjs → cli-COzPxKg2.mjs} +8 -3
package/dist/index.d.mts +304 -25
package/dist/index.mjs +4 -4
package/dist/runChild.mjs +1 -1
package/dist/{runOrchestration-DA4Rh5g0.mjs → runOrchestration-COFhQvTJ.mjs} +257 -24
package/dist/{runner-DTP5Ui4_.mjs → runner-nQjuRZGC.mjs} +2 -2
package/dist/{runner-BzT3B9OF.mjs → runner-sMZXoDp3.mjs} +1 -1
package/dist/src-OZSs693X.mjs +3 -0
package/package.json +3 -3
package/dist/apps/web/dist/assets/index-ChgByJbI.css +0 -1
package/dist/apps/web/dist/assets/index-CmY0_D5Z.js +0 -113
package/dist/src-CfprG1RW.mjs +0 -3

package/dist/apps/web/dist/index.html CHANGED Viewed

@@ -25,8 +25,8 @@
       href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
       rel="stylesheet"
     />
-    <script type="module" crossorigin src="/assets/index-CmY0_D5Z.js"></script>
-    <link rel="stylesheet" crossorigin href="/assets/index-ChgByJbI.css">
+    <script type="module" crossorigin src="/assets/index-CsSDwFI1.js"></script>
+    <link rel="stylesheet" crossorigin href="/assets/index-BZ60j9UY.css">
   </head>
   <body>
     <div id="root"></div>

package/dist/bin.mjs CHANGED Viewed

@@ -1,5 +1,5 @@
 #!/usr/bin/env node
-import { t as runCli } from "./cli-DrPk66xh.mjs";
+import { t as runCli } from "./cli-COzPxKg2.mjs";
 import { spawn } from "node:child_process";
 //#region src/bin.ts
 const moduleMocksFlag = "--experimental-test-module-mocks";

package/dist/{cli-DrPk66xh.mjs → cli-COzPxKg2.mjs} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { A as deriveScopedSummaryFromCases, O as getEvalTitle, P as runSummarySchema, V as resolveLlmCallsConfig, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as normalizeScoreDef, c as persistCaseDetail, d as recomputePersistedCaseStatus, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, k as getEvalDisplayStatus, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, rn as getEvalRegistry, s as nextShortIdFromSnapshots, u as recomputeEvalStatusesInRuns, v as loadConfig, x as createFsCacheStore, y as buildDeclaredColumnDefs } from "./runOrchestration-DA4Rh5g0.mjs";
+import { A as getEvalDisplayStatus, F as runSummarySchema, J as resolveLlmCallsConfig, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as normalizeScoreDef, c as persistCaseDetail, d as recomputePersistedCaseStatus, dn as getEvalRegistry, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, j as deriveScopedSummaryFromCases, k as getEvalTitle, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, q as resolveApiCallsConfig, r as getLastRunStatuses, s as nextShortIdFromSnapshots, u as recomputeEvalStatusesInRuns, v as loadConfig, x as createFsCacheStore, y as buildDeclaredColumnDefs } from "./runOrchestration-COFhQvTJ.mjs";
 import { createHash } from "node:crypto";
 import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
 import { dirname, join, relative, resolve } from "node:path";
@@ -418,6 +418,7 @@ function createRunner({ watchForChanges = true } = {}) {
 	let localStateDir;
 	let cacheStore;
 	let llmCallsConfig = resolveLlmCallsConfig(void 0);
+	let apiCallsConfig = resolveApiCallsConfig(void 0);
 	const evals = /* @__PURE__ */ new Map();
 	const runs = /* @__PURE__ */ new Map();
 	const lastRunStatusMap = /* @__PURE__ */ new Map();
@@ -441,6 +442,7 @@ function createRunner({ watchForChanges = true } = {}) {
 			workspaceRoot = config.workspaceRoot ?? process.cwd();
 			localStateDir = resolve(workspaceRoot, ".agent-evals");
 			llmCallsConfig = resolveLlmCallsConfig(config.llmCalls);
+			apiCallsConfig = resolveApiCallsConfig(config.apiCalls);
 			await mkdir(localStateDir, { recursive: true });
 			await mkdir(join(localStateDir, "runs"), { recursive: true });
 			cacheStore = createFsCacheStore({
@@ -812,6 +814,9 @@ function createRunner({ watchForChanges = true } = {}) {
 		getLlmCallsConfig() {
 			return llmCallsConfig;
 		},
+		getApiCallsConfig() {
+			return apiCallsConfig;
+		},
 		getArtifactPath(artifactId_) {
 			return resolveArtifactPath(join(localStateDir, "runs"), artifactId_);
 		}
@@ -1045,8 +1050,8 @@ async function commandApp(args) {
 	const { serve } = await import("@hono/node-server");
 	const bundledWebDist = resolve(currentDir, "apps/web/dist");
 	if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
-	const appModule = await import("./app-hkNNN_jn.mjs");
-	const runnerModule = await import("./runner-BzT3B9OF.mjs");
+	const appModule = await import("./app-DI3IUGb_.mjs");
+	const runnerModule = await import("./runner-sMZXoDp3.mjs");
 	if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
 	if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
 	await runnerModule.initRunner();

package/dist/index.d.mts CHANGED Viewed

@@ -1406,6 +1406,16 @@ declare const llmCallMetricFormatSchema: z$1.ZodEnum<{
 }>;
 /** Render format applied to an LLM-call metric value. */
 type LlmCallMetricFormat = z$1.infer<typeof llmCallMetricFormatSchema>;
+/** Render formats supported by an API-call metric in the UI. */
+declare const apiCallMetricFormatSchema: z$1.ZodEnum<{
+  string: "string";
+  number: "number";
+  boolean: "boolean";
+  duration: "duration";
+  json: "json";
+}>;
+/** Render format applied to an API-call metric value. */
+type ApiCallMetricFormat = z$1.infer<typeof apiCallMetricFormatSchema>;
 /** Where an LLM-call metric is rendered inside the LLM calls tab. */
 declare const llmCallMetricPlacementSchema: z$1.ZodEnum<{
   header: "header";
@@ -1413,6 +1423,13 @@ declare const llmCallMetricPlacementSchema: z$1.ZodEnum<{
 }>;
 /** Placement option for an LLM-call metric. */
 type LlmCallMetricPlacement = z$1.infer<typeof llmCallMetricPlacementSchema>;
+/** Where an API-call metric is rendered inside the API calls tab. */
+declare const apiCallMetricPlacementSchema: z$1.ZodEnum<{
+  header: "header";
+  body: "body";
+}>;
+/** Placement option for an API-call metric. */
+type ApiCallMetricPlacement = z$1.infer<typeof apiCallMetricPlacementSchema>;
 /**
  * Schema for a single user-defined metric attached to LLM call rows.
  *
@@ -1440,6 +1457,33 @@ declare const llmCallMetricSchema: z$1.ZodObject<{
 }, z$1.core.$strip>;
 /** User-defined metric authored in `agent-evals.config.ts`. */
 type LlmCallMetric = z$1.infer<typeof llmCallMetricSchema>;
+/**
+ * Schema for a single user-defined metric attached to API call rows.
+ *
+ * Each metric reads `path` from the span's `attributes` and renders the value
+ * with the configured `format` and `numberFormat`. `placements` controls
+ * whether the metric appears as a chip on the collapsed row header, as a row
+ * inside the expanded body, or both. Defaults to `['body']` when omitted.
+ */
+declare const apiCallMetricSchema: z$1.ZodObject<{
+  label: z$1.ZodString;
+  tooltip: z$1.ZodOptional<z$1.ZodString>;
+  path: z$1.ZodString;
+  format: z$1.ZodOptional<z$1.ZodEnum<{
+    string: "string";
+    number: "number";
+    boolean: "boolean";
+    duration: "duration";
+    json: "json";
+  }>>;
+  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
+  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
+    header: "header";
+    body: "body";
+  }>>>;
+}, z$1.core.$strip>;
+/** User-defined API-call metric authored in `agent-evals.config.ts`. */
+type ApiCallMetric = z$1.infer<typeof apiCallMetricSchema>;
 /** Schema for the global LLM calls config block in `agent-evals.config.ts`. */
 declare const llmCallsConfigSchema: z$1.ZodObject<{
   kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -1485,6 +1529,41 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
 }, z$1.core.$strip>;
 /** Authored LLM calls config accepted from `agent-evals.config.ts`. */
 type LlmCallsConfigInput = z$1.infer<typeof llmCallsConfigSchema>;
+/** Schema for the global API calls config block in `agent-evals.config.ts`. */
+declare const apiCallsConfigSchema: z$1.ZodObject<{
+  kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
+  attributes: z$1.ZodOptional<z$1.ZodObject<{
+    method: z$1.ZodOptional<z$1.ZodString>;
+    url: z$1.ZodOptional<z$1.ZodString>;
+    statusCode: z$1.ZodOptional<z$1.ZodString>;
+    request: z$1.ZodOptional<z$1.ZodString>;
+    response: z$1.ZodOptional<z$1.ZodString>;
+    requestBody: z$1.ZodOptional<z$1.ZodString>;
+    responseBody: z$1.ZodOptional<z$1.ZodString>;
+    headers: z$1.ZodOptional<z$1.ZodString>;
+    durationMs: z$1.ZodOptional<z$1.ZodString>;
+    error: z$1.ZodOptional<z$1.ZodString>;
+  }, z$1.core.$strip>>;
+  metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
+    label: z$1.ZodString;
+    tooltip: z$1.ZodOptional<z$1.ZodString>;
+    path: z$1.ZodString;
+    format: z$1.ZodOptional<z$1.ZodEnum<{
+      string: "string";
+      number: "number";
+      boolean: "boolean";
+      duration: "duration";
+      json: "json";
+    }>>;
+    numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
+    placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
+      header: "header";
+      body: "body";
+    }>>>;
+  }, z$1.core.$strip>>>;
+}, z$1.core.$strip>;
+/** Authored API calls config accepted from `agent-evals.config.ts`. */
+type ApiCallsConfigInput = z$1.infer<typeof apiCallsConfigSchema>;
 /** Resolved LLM-calls config sent to the UI with all defaults applied. */
 type ResolvedLlmCallsConfig = {
   kinds: string[];
@@ -1512,6 +1591,23 @@ type ResolvedLlmCallsConfig = {
   };
   metrics: ResolvedLlmCallMetric[];
 };
+/** Resolved API-calls config sent to the UI with all defaults applied. */
+type ResolvedApiCallsConfig = {
+  kinds: string[];
+  attributes: {
+    method: string;
+    url: string;
+    statusCode: string;
+    request: string;
+    response: string;
+    requestBody: string;
+    responseBody: string;
+    headers: string;
+    durationMs: string;
+    error: string;
+  };
+  metrics: ResolvedApiCallMetric[];
+};
 /** Fully-resolved LLM-call metric used by the runner and UI. */
 type ResolvedLlmCallMetric = {
   label: string;
@@ -1521,8 +1617,19 @@ type ResolvedLlmCallMetric = {
   numberFormat?: NumberDisplayOptions;
   placements: LlmCallMetricPlacement[];
 };
+/** Fully-resolved API-call metric used by the runner and UI. */
+type ResolvedApiCallMetric = {
+  label: string;
+  tooltip?: string;
+  path: string;
+  format: ApiCallMetricFormat;
+  numberFormat?: NumberDisplayOptions;
+  placements: ApiCallMetricPlacement[];
+};
 /** Default LLM-calls config the UI uses before the workspace fetch resolves. */
 declare const DEFAULT_LLM_CALLS_CONFIG: ResolvedLlmCallsConfig;
+/** Default API-calls config the UI uses before the workspace fetch resolves. */
+declare const DEFAULT_API_CALLS_CONFIG: ResolvedApiCallsConfig;
 /**
  * Resolve the user-authored LLM-calls config to a fully-defaulted shape used
  * by the UI to derive the LLM calls tab.
@@ -1534,6 +1641,17 @@ declare const DEFAULT_LLM_CALLS_CONFIG: ResolvedLlmCallsConfig;
  * - Missing `metrics[].placements` defaults to `['body']`.
  */
 declare function resolveLlmCallsConfig(input: LlmCallsConfigInput | undefined): ResolvedLlmCallsConfig;
+/**
+ * Resolve the user-authored API-calls config to a fully-defaulted shape used
+ * by the UI to derive the API calls tab.
+ *
+ * - Missing or empty `kinds` falls back to common API/HTTP span kinds.
+ * - Missing `attributes.<field>` falls back to the corresponding default
+ *   attribute path.
+ * - Missing `metrics[].format` defaults to `'string'`.
+ * - Missing `metrics[].placements` defaults to `['body']`.
+ */
+declare function resolveApiCallsConfig(input: ApiCallsConfigInput | undefined): ResolvedApiCallsConfig;
 /** Top-level config authored in `agent-evals.config.ts`. */
 type AgentEvalsConfig = {
   /** Root directory used to resolve all relative paths. Defaults to `process.cwd()`. */workspaceRoot?: string; /** Glob patterns (relative to `workspaceRoot`) used to discover eval files. */
@@ -1589,6 +1707,30 @@ type AgentEvalsConfig = {
    * ```
    */
   llmCalls?: LlmCallsConfigInput;
+  /**
+   * Configuration for the "API calls" tab in the case-run drawer.
+   *
+   * Determines which trace spans are treated as API calls (`kinds`), how
+   * structured fields like `method`, `url`, and `statusCode` are read from
+   * span attributes, and which custom user-defined metrics are surfaced on
+   * each call. All fields are optional and fall back to the documented
+   * defaults; the API calls tab is shown automatically when at least one
+   * matching span exists in a case run.
+   *
+   * @example
+   * ```ts
+   * apiCalls: {
+   *   kinds: ['api', 'http.client', 'undici.request'],
+   *   attributes: {
+   *     statusCode: 'http.status_code',
+   *   },
+   *   metrics: [
+   *     { label: 'Retries', path: 'retryCount', format: 'number' },
+   *   ],
+   * }
+   * ```
+   */
+  apiCalls?: ApiCallsConfigInput;
   /**
    * Optional controls for the operation cache. When omitted, the cache is
    * enabled and stored under `<workspaceRoot>/.agent-evals/cache`.
@@ -1691,6 +1833,38 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
       }>>>;
     }, z$1.core.$strip>>>;
   }, z$1.core.$strip>>;
+  apiCalls: z$1.ZodOptional<z$1.ZodObject<{
+    kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
+    attributes: z$1.ZodOptional<z$1.ZodObject<{
+      method: z$1.ZodOptional<z$1.ZodString>;
+      url: z$1.ZodOptional<z$1.ZodString>;
+      statusCode: z$1.ZodOptional<z$1.ZodString>;
+      request: z$1.ZodOptional<z$1.ZodString>;
+      response: z$1.ZodOptional<z$1.ZodString>;
+      requestBody: z$1.ZodOptional<z$1.ZodString>;
+      responseBody: z$1.ZodOptional<z$1.ZodString>;
+      headers: z$1.ZodOptional<z$1.ZodString>;
+      durationMs: z$1.ZodOptional<z$1.ZodString>;
+      error: z$1.ZodOptional<z$1.ZodString>;
+    }, z$1.core.$strip>>;
+    metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
+      label: z$1.ZodString;
+      tooltip: z$1.ZodOptional<z$1.ZodString>;
+      path: z$1.ZodString;
+      format: z$1.ZodOptional<z$1.ZodEnum<{
+        string: "string";
+        number: "number";
+        boolean: "boolean";
+        duration: "duration";
+        json: "json";
+      }>>;
+      numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
+      placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
+        header: "header";
+        body: "body";
+      }>>>;
+    }, z$1.core.$strip>>>;
+  }, z$1.core.$strip>>;
   cache: z$1.ZodOptional<z$1.ZodObject<{
     enabled: z$1.ZodOptional<z$1.ZodBoolean>;
     dir: z$1.ZodOptional<z$1.ZodString>;
@@ -1761,6 +1935,50 @@ type LlmCallEntry = {
  */
 declare function extractLlmCalls(spans: EvalTraceSpan[], config: ResolvedLlmCallsConfig): LlmCallEntry[];
 //#endregion
+//#region ../shared/src/utils/extractApiCalls.d.ts
+/** Resolved value for one user-defined metric on an API call row. */
+type ApiCallMetricValue = {
+  label: string;
+  tooltip: string | undefined;
+  rawValue: unknown;
+  format: ApiCallMetricFormat;
+  numberFormat: NumberDisplayOptions | undefined;
+  placements: ApiCallMetricPlacement[];
+};
+/** Single entry rendered as one expandable row in the API calls tab. */
+type ApiCallEntry = {
+  id: string;
+  name: string;
+  kind: string;
+  status: EvalTraceSpan['status'];
+  method: string | null;
+  url: string | null;
+  statusCode: number | null;
+  latencyMs: number | null;
+  request: unknown;
+  response: unknown;
+  requestBody: unknown;
+  responseBody: unknown;
+  headers: unknown;
+  errorPayload: unknown;
+  metrics: ApiCallMetricValue[];
+  warnings: EvalTraceSpanWarning[];
+  error: EvalTraceSpanError | null;
+};
+/**
+ * Filter `spans` down to API calls and project each one to the structured
+ * shape consumed by the API calls tab.
+ *
+ * Spans whose `kind` is not in `config.kinds` are dropped. Structured fields
+ * (`method`, `url`, `statusCode`, etc.) are read via `getNestedAttribute` from
+ * the configured paths. `durationMs` takes precedence for latency, with a
+ * fallback to the span start/end timestamps. User-defined `metrics` whose path
+ * resolves to `undefined` are dropped, but `null`, `0`, and `false` are
+ * preserved as legitimate values worth displaying. Original span order is
+ * preserved so the API calls tab matches the ordering in the Trace tab.
+ */
+declare function extractApiCalls(spans: EvalTraceSpan[], config: ResolvedApiCallsConfig): ApiCallEntry[];
+//#endregion
 //#region ../shared/src/schemas/cache.d.ts
 /**
  * Mode that controls how the cache is consulted for a given run.
@@ -2196,7 +2414,7 @@ type EvalCase<TInput> = {
   input: TInput;
   tags?: string[];
 };
-/** Runtime output values collected from `setEvalOutput` and `deriveFromTracing`. */
+/** Runtime output values collected from output helpers and `deriveFromTracing`. */
 type EvalOutputs = Record<string, unknown>;
 /**
  * Schema used to validate and type an eval's collected runtime outputs.
@@ -2249,9 +2467,31 @@ type EvalTraceTree = {
   flattenDfs: () => EvalTraceSpan[];
   checkpoints: Map<string, unknown>;
 };
+/** Type-safe output writer passed to an eval's `execute` function. */
+type EvalSetOutput<TOutputs extends EvalOutputs = EvalOutputs> = <TKey extends Extract<keyof TOutputs, string>>(
+/**
+ * Output field to record. For narrowed output maps, this must be one of the
+ * known output keys.
+ */
+key: TKey,
+/**
+ * Value for the output field. For narrowed output maps, this must match the
+ * field's declared output type.
+ */
+value: TOutputs[TKey]) => void;
 /** Context passed to an eval's `execute` function for a single case run. */
-type EvalExecuteContext<TInput> = {
-  input: TInput;
+type EvalExecuteContext<TInput, TOutputs extends EvalOutputs = EvalOutputs> = {
+  /** Authored input for the active eval case. */input: TInput;
+  /**
+   * Record or replace an output value for the current case scope.
+   *
+   * When the eval has a narrowed outputs generic, keys and values are typed
+   * from that output map. The recorded values are still validated by
+   * `outputsSchema` before computed scores run.
+   */
+  setOutput: EvalSetOutput<TOutputs>;
 };
 /** Context passed to `deriveFromTracing` after execution has completed. */
 type EvalDeriveContext<TInput> = {
@@ -2293,8 +2533,31 @@ type EvalManualScoreDef = EvalColumnOverride & {
    */
   passThreshold?: number;
 };
-/** Complete authored eval definition consumed by `defineEval`. */
-type EvalDefinition<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs> = {
+type EvalDefinitionOutputSchemaConfig<TOutputs extends EvalOutputs> = [EvalOutputs] extends [TOutputs] ? {
+  /**
+   * Optional schema for runtime outputs collected through output helpers
+   * and `deriveFromTracing`.
+   *
+   * The runner validates configured output fields before scoring. For
+   * Zod object schemas, only declared keys are passed to the schema;
+   * parsed fields are merged back into the raw output map, so schema
+   * defaults and transforms apply to configured fields while
+   * unconfigured outputs are kept unchanged. Validation failures mark
+   * the case as failed and skip computed scores.
+   */
+  outputsSchema?: EvalOutputsSchema<TOutputs>;
+} : {
+  /**
+   * Required schema for typed runtime outputs collected through output
+   * helpers and `deriveFromTracing`.
+   *
+   * When `EvalDefinition` or `defineEval` receives an explicit narrowed
+   * outputs generic, this schema is required so scorer inputs are backed
+   * by runtime validation before computed scores run.
+   */
+  outputsSchema: EvalOutputsSchema<TOutputs>;
+};
+type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs> = {
   id: string;
   title?: string;
   /**
@@ -2304,17 +2567,6 @@ type EvalDefinition<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs
    * eval once using a synthetic case with empty object input.
    */
   cases?: EvalCase<TInput>[] | (() => Promise<EvalCase<TInput>[]>);
-  /**
-   * Optional schema for runtime outputs collected through `setEvalOutput` and
-   * `deriveFromTracing`.
-   *
-   * The runner validates configured output fields before scoring. For Zod
-   * object schemas, only declared keys are passed to the schema; parsed fields
-   * are merged back into the raw output map, so schema defaults and transforms
-   * apply to configured fields while unconfigured outputs are kept unchanged.
-   * Validation failures mark the case as failed and skip computed scores.
-   */
-  outputsSchema?: EvalOutputsSchema<TOutputs>;
   columns?: EvalColumns;
   /**
    * Per-eval trace attribute display rules for the UI.
@@ -2324,7 +2576,7 @@ type EvalDefinition<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs
    * `key` is provided.
    */
   traceDisplay?: TraceDisplayInputConfig;
-  execute: (ctx: EvalExecuteContext<TInput>) => Promise<void> | void;
+  execute: (ctx: EvalExecuteContext<TInput, TOutputs>) => Promise<void> | void;
   deriveFromTracing?: (ctx: EvalDeriveContext<TInput>) => Partial<TOutputs> | Promise<Partial<TOutputs>>;
   scores?: Record<string, EvalScoreDef<TInput, TOutputs>>;
   /**
@@ -2359,13 +2611,21 @@ type EvalDefinition<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs
    *
    * Each chart declares its `type` (`area | line | bar`) and one or more
    * `metrics`. Built-in metrics (`passRate`, `durationMs`) aggregate
-   * the run summary. Column metrics aggregate a score or numeric `setEvalOutput`
-   * column across the run using an `aggregate` reducer (`avg`, `sum`, `min`,
-   * `max`, `latest`, `passThresholdRate`). `passThresholdRate` requires a
-   * score column with `passThreshold`.
+   * the run summary. Column metrics aggregate a score or numeric output column
+   * across the run using an `aggregate` reducer (`avg`, `sum`, `min`, `max`,
+   * `latest`, `passThresholdRate`). `passThresholdRate` requires a score column
+   * with `passThreshold`.
    */
   charts?: EvalChartsConfig;
 };
+/**
+ * Complete authored eval definition consumed by `defineEval`.
+ *
+ * `outputsSchema` is optional for the default loose output map. When the
+ * `TOutputs` generic is narrowed, `outputsSchema` is required so the runtime
+ * validates collected outputs before exposing them as typed scorer inputs.
+ */
+type EvalDefinition<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs> = EvalDefinitionBase<TInput, TOutputs> & EvalDefinitionOutputSchemaConfig<TOutputs>;
 //#endregion
 //#region ../sdk/src/defineEval.d.ts
 /**
@@ -2423,7 +2683,9 @@ type CacheRecordingFrame = {
 };
 /** Mutable per-case runtime state stored in async local storage. */
 type EvalCaseScope = {
-  caseId: string; /** Authored input for the current case, when provided by the runner. */
+  caseId: string; /** Stable prefix used by `nextEvalId()` for this eval case scope. */
+  idPrefix: string | undefined; /** Monotonic per-scope counter used by `nextEvalId()`. */
+  nextEvalIdCounter: number; /** Authored input for the current case, when provided by the runner. */
   input?: unknown;
   outputs: Record<string, unknown>; /** Structured assertion failures recorded for the current case. */
   assertionFailures: AssertionFailure[];
@@ -2480,7 +2742,8 @@ declare function getEvalCaseInput(path: string): unknown;
 declare function setScopeCacheContext(scope: EvalCaseScope, context: CacheScopeContext): void;
 /** Optional inputs accepted when starting a new eval case scope. */
 type RunInEvalScopeOptions = {
-  /** Authored input for the active eval case. */input?: unknown; /** Cache adapter + mode attached to the scope before `fn` runs. */
+  /** Authored input for the active eval case. */input?: unknown; /** Stable prefix used when generating scoped IDs with `nextEvalId()`. */
+  idPrefix?: string; /** Cache adapter + mode attached to the scope before `fn` runs. */
   cacheContext?: CacheScopeContext;
 };
 /**
@@ -2492,6 +2755,15 @@ declare function runInEvalScope<T>(caseId: string, fn: () => Promise<T> | T, opt
   scope: EvalCaseScope;
   error: Error | undefined;
 }>;
+/**
+ * Return the next deterministic ID for the active eval case execution.
+ *
+ * The runner derives the ID prefix from the eval file, eval id, and case id,
+ * then this helper appends a per-scope sequence number. Calls outside an
+ * active eval case scope throw so accidental product-code usage is caught
+ * immediately.
+ */
+declare function nextEvalId(): string;
 /**
  * Record or replace an output value for the current case scope.
  *
@@ -2769,7 +3041,14 @@ type EvalRunner = {
    * Returns the workspace's `llmCalls` config block from
    * `agent-evals.config.ts` with all defaults applied.
    */
-  getLlmCallsConfig(): ResolvedLlmCallsConfig; /** Resolve a persisted artifact path when artifact storage is supported. */
+  getLlmCallsConfig(): ResolvedLlmCallsConfig;
+  /**
+   * Resolved API-calls config used by the UI to derive the API calls tab.
+   *
+   * Returns the workspace's `apiCalls` config block from
+   * `agent-evals.config.ts` with all defaults applied.
+   */
+  getApiCallsConfig(): ResolvedApiCallsConfig; /** Resolve a persisted artifact path when artifact storage is supported. */
   getArtifactPath(artifactId: string): string | undefined; /** Return summaries for every persisted cache entry in the workspace. */
   listCache(): Promise<CacheListItem[]>;
   /**
@@ -2833,4 +3112,4 @@ declare function createRunner({
  */
 declare function runCli(argv: string[]): Promise<void>;
 //#endregion
-export { type AgentEvalsConfig, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
+export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };

package/dist/index.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { $ as evalChartAxisSchema, $t as setEvalOutput, A as deriveScopedSummaryFromCases, At as columnFormatSchema, B as llmCallsConfigSchema, Bt as evalSpan, C as updateManualScoreRequestSchema, Ct as traceDisplayInputConfigSchema, D as getNestedAttribute, Dt as traceSpanWarningSchema, E as extractLlmCalls, Et as traceSpanSchema, F as DEFAULT_LLM_CALLS_CONFIG, Ft as repoFileRefSchema, G as caseRowSchema, Gt as appendToEvalOutput, H as trialSelectionModeSchema, Ht as hashCacheKey, I as agentEvalsConfigSchema, It as runArtifactRefSchema, J as evalStatItemSchema, Jt as getEvalCaseInput, K as evalFreshnessStatusSchema, Kt as evalAssert, L as llmCallMetricFormatSchema, Lt as z, M as deriveStatusFromChildStatuses, Mt as fileRefSchema, N as runManifestSchema, Nt as jsonCellSchema, O as getEvalTitle, Ot as cellValueSchema, P as runSummarySchema, Pt as numberDisplayOptionsSchema, Q as evalChartAggregateSchema, Qt as runInEvalScope, R as llmCallMetricPlacementSchema, Rt as buildTraceTree, S as createRunRequestSchema, St as traceDisplayConfigSchema, T as extractCacheHits, Tt as traceSpanKindSchema, U as assertionFailureSchema, Ut as hashCacheKeySync, V as resolveLlmCallsConfig, Vt as evalTracer, W as caseDetailSchema, Wt as EvalAssertionError, X as evalSummarySchema, Xt as isInEvalScope, Y as evalStatsConfigSchema, Yt as incrementEvalOutput, Z as scoreTraceSchema, Zt as mergeEvalOutput, _t as traceCacheRefSchema, at as evalChartTypeSchema, bt as traceAttributeDisplayPlacementSchema, ct as cacheFileSchema, dt as cacheOperationTypeSchema, en as setScopeCacheContext, et as evalChartBuiltinMetricSchema, ft as cacheRecordingOpSchema, gt as spanCacheOptionsSchema, ht as serializedCacheSpanSchema, it as evalChartTooltipExtraSchema, j as deriveStatusFromCaseRows, jt as columnKindSchema, k as getEvalDisplayStatus, kt as columnDefSchema, lt as cacheListItemSchema, mt as cacheStatusSchema, nn as defineEval, nt as evalChartConfigSchema, ot as evalChartsConfigSchema, pt as cacheRecordingSchema, q as evalStatAggregateSchema, qt as getCurrentScope, rn as getEvalRegistry, rt as evalChartMetricSchema, st as cacheEntrySchema, tn as repoFile, tt as evalChartColorSchema, ut as cacheModeSchema, vt as traceAttributeDisplayFormatSchema, w as sseEnvelopeSchema, wt as traceSpanErrorSchema, xt as traceAttributeDisplaySchema, yt as traceAttributeDisplayInputSchema, z as llmCallMetricSchema, zt as captureEvalSpanError } from "./runOrchestration-DA4Rh5g0.mjs";
-import { n as createRunner, t as runCli } from "./cli-DrPk66xh.mjs";
-import "./src-CfprG1RW.mjs";
-export { DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
+import { $ as evalFreshnessStatusSchema, $t as evalAssert, A as getEvalDisplayStatus, At as traceDisplayInputConfigSchema, B as apiCallMetricPlacementSchema, Bt as jsonCellSchema, C as updateManualScoreRequestSchema, Ct as spanCacheOptionsSchema, D as extractLlmCalls, Dt as traceAttributeDisplayPlacementSchema, E as extractApiCalls, Et as traceAttributeDisplayInputSchema, F as runSummarySchema, Ft as cellValueSchema, G as llmCallMetricSchema, Gt as buildTraceTree, H as apiCallsConfigSchema, Ht as repoFileRefSchema, I as DEFAULT_API_CALLS_CONFIG, It as columnDefSchema, J as resolveLlmCallsConfig, Jt as evalTracer, K as llmCallsConfigSchema, Kt as captureEvalSpanError, L as DEFAULT_LLM_CALLS_CONFIG, Lt as columnFormatSchema, M as deriveStatusFromCaseRows, Mt as traceSpanKindSchema, N as deriveStatusFromChildStatuses, Nt as traceSpanSchema, O as getNestedAttribute, Ot as traceAttributeDisplaySchema, P as runManifestSchema, Pt as traceSpanWarningSchema, Q as caseRowSchema, Qt as appendToEvalOutput, R as agentEvalsConfigSchema, Rt as columnKindSchema, S as createRunRequestSchema, St as serializedCacheSpanSchema, T as extractCacheHits, Tt as traceAttributeDisplayFormatSchema, U as llmCallMetricFormatSchema, Ut as runArtifactRefSchema, V as apiCallMetricSchema, Vt as numberDisplayOptionsSchema, W as llmCallMetricPlacementSchema, Wt as z, X as assertionFailureSchema, Xt as hashCacheKeySync, Y as trialSelectionModeSchema, Yt as hashCacheKey, Z as caseDetailSchema, Zt as EvalAssertionError, _t as cacheModeSchema, an as nextEvalId, at as evalChartAggregateSchema, bt as cacheRecordingSchema, cn as setScopeCacheContext, ct as evalChartColorSchema, dn as getEvalRegistry, dt as evalChartTooltipExtraSchema, en as getCurrentScope, et as evalStatAggregateSchema, ft as evalChartTypeSchema, gt as cacheListItemSchema, ht as cacheFileSchema, in as mergeEvalOutput, it as scoreTraceSchema, j as deriveScopedSummaryFromCases, jt as traceSpanErrorSchema, k as getEvalTitle, kt as traceDisplayConfigSchema, ln as repoFile, lt as evalChartConfigSchema, mt as cacheEntrySchema, nn as incrementEvalOutput, nt as evalStatsConfigSchema, on as runInEvalScope, ot as evalChartAxisSchema, pt as evalChartsConfigSchema, q as resolveApiCallsConfig, qt as evalSpan, rn as isInEvalScope, rt as evalSummarySchema, sn as setEvalOutput, st as evalChartBuiltinMetricSchema, tn as getEvalCaseInput, tt as evalStatItemSchema, un as defineEval, ut as evalChartMetricSchema, vt as cacheOperationTypeSchema, w as sseEnvelopeSchema, wt as traceCacheRefSchema, xt as cacheStatusSchema, yt as cacheRecordingOpSchema, z as apiCallMetricFormatSchema, zt as fileRefSchema } from "./runOrchestration-COFhQvTJ.mjs";
+import { n as createRunner, t as runCli } from "./cli-COzPxKg2.mjs";
+import "./src-OZSs693X.mjs";
+export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };

package/dist/runChild.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { N as runManifestSchema, P as runSummarySchema, S as createRunRequestSchema, Y as evalStatsConfigSchema, kt as columnDefSchema, ot as evalChartsConfigSchema, t as executeRun, v as loadConfig, x as createFsCacheStore } from "./runOrchestration-DA4Rh5g0.mjs";
+import { F as runSummarySchema, It as columnDefSchema, P as runManifestSchema, S as createRunRequestSchema, nt as evalStatsConfigSchema, pt as evalChartsConfigSchema, t as executeRun, v as loadConfig, x as createFsCacheStore } from "./runOrchestration-COFhQvTJ.mjs";
 import { createHash } from "node:crypto";
 import { readFile } from "node:fs/promises";
 import { z } from "zod/v4";