@ls-stack/agent-eval 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- import { C as updateManualScoreRequestSchema, S as createRunRequestSchema } from "./runOrchestration-DA4Rh5g0.mjs";
2
- import "./src-CfprG1RW.mjs";
3
- import { t as getRunnerInstance } from "./runner-DTP5Ui4_.mjs";
1
+ import { C as updateManualScoreRequestSchema, S as createRunRequestSchema } from "./runOrchestration-DwqX9_T7.mjs";
2
+ import "./src-Bx-CV6Wo.mjs";
3
+ import { t as getRunnerInstance } from "./runner-Gtlmvm3w.mjs";
4
4
  import { readFile } from "node:fs/promises";
5
5
  import { dirname, join, relative, resolve, sep } from "node:path";
6
6
  import { z } from "zod/v4";
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-DrPk66xh.mjs";
2
+ import { t as runCli } from "./cli-DLlRkyLH.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { A as deriveScopedSummaryFromCases, O as getEvalTitle, P as runSummarySchema, V as resolveLlmCallsConfig, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as normalizeScoreDef, c as persistCaseDetail, d as recomputePersistedCaseStatus, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, k as getEvalDisplayStatus, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, rn as getEvalRegistry, s as nextShortIdFromSnapshots, u as recomputeEvalStatusesInRuns, v as loadConfig, x as createFsCacheStore, y as buildDeclaredColumnDefs } from "./runOrchestration-DA4Rh5g0.mjs";
1
+ import { A as deriveScopedSummaryFromCases, O as getEvalTitle, P as runSummarySchema, V as resolveLlmCallsConfig, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as normalizeScoreDef, c as persistCaseDetail, d as recomputePersistedCaseStatus, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, in as getEvalRegistry, k as getEvalDisplayStatus, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, s as nextShortIdFromSnapshots, u as recomputeEvalStatusesInRuns, v as loadConfig, x as createFsCacheStore, y as buildDeclaredColumnDefs } from "./runOrchestration-DwqX9_T7.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
4
4
  import { dirname, join, relative, resolve } from "node:path";
@@ -1045,8 +1045,8 @@ async function commandApp(args) {
1045
1045
  const { serve } = await import("@hono/node-server");
1046
1046
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
1047
1047
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
1048
- const appModule = await import("./app-hkNNN_jn.mjs");
1049
- const runnerModule = await import("./runner-BzT3B9OF.mjs");
1048
+ const appModule = await import("./app-sK9CjpNI.mjs");
1049
+ const runnerModule = await import("./runner-JrBz8ISs.mjs");
1050
1050
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
1051
1051
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
1052
1052
  await runnerModule.initRunner();
package/dist/index.d.mts CHANGED
@@ -2196,7 +2196,7 @@ type EvalCase<TInput> = {
2196
2196
  input: TInput;
2197
2197
  tags?: string[];
2198
2198
  };
2199
- /** Runtime output values collected from `setEvalOutput` and `deriveFromTracing`. */
2199
+ /** Runtime output values collected from output helpers and `deriveFromTracing`. */
2200
2200
  type EvalOutputs = Record<string, unknown>;
2201
2201
  /**
2202
2202
  * Schema used to validate and type an eval's collected runtime outputs.
@@ -2249,9 +2249,31 @@ type EvalTraceTree = {
2249
2249
  flattenDfs: () => EvalTraceSpan[];
2250
2250
  checkpoints: Map<string, unknown>;
2251
2251
  };
2252
+ /** Type-safe output writer passed to an eval's `execute` function. */
2253
+ type EvalSetOutput<TOutputs extends EvalOutputs = EvalOutputs> = <TKey extends Extract<keyof TOutputs, string>>(
2254
+ /**
2255
+ * Output field to record. For narrowed output maps, this must be one of the
2256
+ * known output keys.
2257
+ */
2258
+
2259
+ key: TKey,
2260
+ /**
2261
+ * Value for the output field. For narrowed output maps, this must match the
2262
+ * field's declared output type.
2263
+ */
2264
+
2265
+ value: TOutputs[TKey]) => void;
2252
2266
  /** Context passed to an eval's `execute` function for a single case run. */
2253
- type EvalExecuteContext<TInput> = {
2254
- input: TInput;
2267
+ type EvalExecuteContext<TInput, TOutputs extends EvalOutputs = EvalOutputs> = {
2268
+ /** Authored input for the active eval case. */input: TInput;
2269
+ /**
2270
+ * Record or replace an output value for the current case scope.
2271
+ *
2272
+ * When the eval has a narrowed outputs generic, keys and values are typed
2273
+ * from that output map. The recorded values are still validated by
2274
+ * `outputsSchema` before computed scores run.
2275
+ */
2276
+ setOutput: EvalSetOutput<TOutputs>;
2255
2277
  };
2256
2278
  /** Context passed to `deriveFromTracing` after execution has completed. */
2257
2279
  type EvalDeriveContext<TInput> = {
@@ -2293,8 +2315,31 @@ type EvalManualScoreDef = EvalColumnOverride & {
2293
2315
  */
2294
2316
  passThreshold?: number;
2295
2317
  };
2296
- /** Complete authored eval definition consumed by `defineEval`. */
2297
- type EvalDefinition<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs> = {
2318
+ type EvalDefinitionOutputSchemaConfig<TOutputs extends EvalOutputs> = [EvalOutputs] extends [TOutputs] ? {
2319
+ /**
2320
+ * Optional schema for runtime outputs collected through output helpers
2321
+ * and `deriveFromTracing`.
2322
+ *
2323
+ * The runner validates configured output fields before scoring. For
2324
+ * Zod object schemas, only declared keys are passed to the schema;
2325
+ * parsed fields are merged back into the raw output map, so schema
2326
+ * defaults and transforms apply to configured fields while
2327
+ * unconfigured outputs are kept unchanged. Validation failures mark
2328
+ * the case as failed and skip computed scores.
2329
+ */
2330
+ outputsSchema?: EvalOutputsSchema<TOutputs>;
2331
+ } : {
2332
+ /**
2333
+ * Required schema for typed runtime outputs collected through output
2334
+ * helpers and `deriveFromTracing`.
2335
+ *
2336
+ * When `EvalDefinition` or `defineEval` receives an explicit narrowed
2337
+ * outputs generic, this schema is required so scorer inputs are backed
2338
+ * by runtime validation before computed scores run.
2339
+ */
2340
+ outputsSchema: EvalOutputsSchema<TOutputs>;
2341
+ };
2342
+ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs> = {
2298
2343
  id: string;
2299
2344
  title?: string;
2300
2345
  /**
@@ -2304,17 +2349,6 @@ type EvalDefinition<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs
2304
2349
  * eval once using a synthetic case with empty object input.
2305
2350
  */
2306
2351
  cases?: EvalCase<TInput>[] | (() => Promise<EvalCase<TInput>[]>);
2307
- /**
2308
- * Optional schema for runtime outputs collected through `setEvalOutput` and
2309
- * `deriveFromTracing`.
2310
- *
2311
- * The runner validates configured output fields before scoring. For Zod
2312
- * object schemas, only declared keys are passed to the schema; parsed fields
2313
- * are merged back into the raw output map, so schema defaults and transforms
2314
- * apply to configured fields while unconfigured outputs are kept unchanged.
2315
- * Validation failures mark the case as failed and skip computed scores.
2316
- */
2317
- outputsSchema?: EvalOutputsSchema<TOutputs>;
2318
2352
  columns?: EvalColumns;
2319
2353
  /**
2320
2354
  * Per-eval trace attribute display rules for the UI.
@@ -2324,7 +2358,7 @@ type EvalDefinition<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs
2324
2358
  * `key` is provided.
2325
2359
  */
2326
2360
  traceDisplay?: TraceDisplayInputConfig;
2327
- execute: (ctx: EvalExecuteContext<TInput>) => Promise<void> | void;
2361
+ execute: (ctx: EvalExecuteContext<TInput, TOutputs>) => Promise<void> | void;
2328
2362
  deriveFromTracing?: (ctx: EvalDeriveContext<TInput>) => Partial<TOutputs> | Promise<Partial<TOutputs>>;
2329
2363
  scores?: Record<string, EvalScoreDef<TInput, TOutputs>>;
2330
2364
  /**
@@ -2359,13 +2393,21 @@ type EvalDefinition<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs
2359
2393
  *
2360
2394
  * Each chart declares its `type` (`area | line | bar`) and one or more
2361
2395
  * `metrics`. Built-in metrics (`passRate`, `durationMs`) aggregate
2362
- * the run summary. Column metrics aggregate a score or numeric `setEvalOutput`
2363
- * column across the run using an `aggregate` reducer (`avg`, `sum`, `min`,
2364
- * `max`, `latest`, `passThresholdRate`). `passThresholdRate` requires a
2365
- * score column with `passThreshold`.
2396
+ * the run summary. Column metrics aggregate a score or numeric output column
2397
+ * across the run using an `aggregate` reducer (`avg`, `sum`, `min`, `max`,
2398
+ * `latest`, `passThresholdRate`). `passThresholdRate` requires a score column
2399
+ * with `passThreshold`.
2366
2400
  */
2367
2401
  charts?: EvalChartsConfig;
2368
2402
  };
2403
+ /**
2404
+ * Complete authored eval definition consumed by `defineEval`.
2405
+ *
2406
+ * `outputsSchema` is optional for the default loose output map. When the
2407
+ * `TOutputs` generic is narrowed, `outputsSchema` is required so the runtime
2408
+ * validates collected outputs before exposing them as typed scorer inputs.
2409
+ */
2410
+ type EvalDefinition<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs> = EvalDefinitionBase<TInput, TOutputs> & EvalDefinitionOutputSchemaConfig<TOutputs>;
2369
2411
  //#endregion
2370
2412
  //#region ../sdk/src/defineEval.d.ts
2371
2413
  /**
@@ -2423,7 +2465,9 @@ type CacheRecordingFrame = {
2423
2465
  };
2424
2466
  /** Mutable per-case runtime state stored in async local storage. */
2425
2467
  type EvalCaseScope = {
2426
- caseId: string; /** Authored input for the current case, when provided by the runner. */
2468
+ caseId: string; /** Stable prefix used by `nextEvalId()` for this eval case scope. */
2469
+ idPrefix: string | undefined; /** Monotonic per-scope counter used by `nextEvalId()`. */
2470
+ nextEvalIdCounter: number; /** Authored input for the current case, when provided by the runner. */
2427
2471
  input?: unknown;
2428
2472
  outputs: Record<string, unknown>; /** Structured assertion failures recorded for the current case. */
2429
2473
  assertionFailures: AssertionFailure[];
@@ -2480,7 +2524,8 @@ declare function getEvalCaseInput(path: string): unknown;
2480
2524
  declare function setScopeCacheContext(scope: EvalCaseScope, context: CacheScopeContext): void;
2481
2525
  /** Optional inputs accepted when starting a new eval case scope. */
2482
2526
  type RunInEvalScopeOptions = {
2483
- /** Authored input for the active eval case. */input?: unknown; /** Cache adapter + mode attached to the scope before `fn` runs. */
2527
+ /** Authored input for the active eval case. */input?: unknown; /** Stable prefix used when generating scoped IDs with `nextEvalId()`. */
2528
+ idPrefix?: string; /** Cache adapter + mode attached to the scope before `fn` runs. */
2484
2529
  cacheContext?: CacheScopeContext;
2485
2530
  };
2486
2531
  /**
@@ -2492,6 +2537,15 @@ declare function runInEvalScope<T>(caseId: string, fn: () => Promise<T> | T, opt
2492
2537
  scope: EvalCaseScope;
2493
2538
  error: Error | undefined;
2494
2539
  }>;
2540
+ /**
2541
+ * Return the next deterministic ID for the active eval case execution.
2542
+ *
2543
+ * The runner derives the ID prefix from the eval file, eval id, and case id,
2544
+ * then this helper appends a per-scope sequence number. Calls outside an
2545
+ * active eval case scope throw so accidental product-code usage is caught
2546
+ * immediately.
2547
+ */
2548
+ declare function nextEvalId(): string;
2495
2549
  /**
2496
2550
  * Record or replace an output value for the current case scope.
2497
2551
  *
@@ -2833,4 +2887,4 @@ declare function createRunner({
2833
2887
  */
2834
2888
  declare function runCli(argv: string[]): Promise<void>;
2835
2889
  //#endregion
2836
- export { type AgentEvalsConfig, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
2890
+ export { type AgentEvalsConfig, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as evalChartAxisSchema, $t as setEvalOutput, A as deriveScopedSummaryFromCases, At as columnFormatSchema, B as llmCallsConfigSchema, Bt as evalSpan, C as updateManualScoreRequestSchema, Ct as traceDisplayInputConfigSchema, D as getNestedAttribute, Dt as traceSpanWarningSchema, E as extractLlmCalls, Et as traceSpanSchema, F as DEFAULT_LLM_CALLS_CONFIG, Ft as repoFileRefSchema, G as caseRowSchema, Gt as appendToEvalOutput, H as trialSelectionModeSchema, Ht as hashCacheKey, I as agentEvalsConfigSchema, It as runArtifactRefSchema, J as evalStatItemSchema, Jt as getEvalCaseInput, K as evalFreshnessStatusSchema, Kt as evalAssert, L as llmCallMetricFormatSchema, Lt as z, M as deriveStatusFromChildStatuses, Mt as fileRefSchema, N as runManifestSchema, Nt as jsonCellSchema, O as getEvalTitle, Ot as cellValueSchema, P as runSummarySchema, Pt as numberDisplayOptionsSchema, Q as evalChartAggregateSchema, Qt as runInEvalScope, R as llmCallMetricPlacementSchema, Rt as buildTraceTree, S as createRunRequestSchema, St as traceDisplayConfigSchema, T as extractCacheHits, Tt as traceSpanKindSchema, U as assertionFailureSchema, Ut as hashCacheKeySync, V as resolveLlmCallsConfig, Vt as evalTracer, W as caseDetailSchema, Wt as EvalAssertionError, X as evalSummarySchema, Xt as isInEvalScope, Y as evalStatsConfigSchema, Yt as incrementEvalOutput, Z as scoreTraceSchema, Zt as mergeEvalOutput, _t as traceCacheRefSchema, at as evalChartTypeSchema, bt as traceAttributeDisplayPlacementSchema, ct as cacheFileSchema, dt as cacheOperationTypeSchema, en as setScopeCacheContext, et as evalChartBuiltinMetricSchema, ft as cacheRecordingOpSchema, gt as spanCacheOptionsSchema, ht as serializedCacheSpanSchema, it as evalChartTooltipExtraSchema, j as deriveStatusFromCaseRows, jt as columnKindSchema, k as getEvalDisplayStatus, kt as columnDefSchema, lt as cacheListItemSchema, mt as cacheStatusSchema, nn as defineEval, nt as evalChartConfigSchema, ot as evalChartsConfigSchema, pt as cacheRecordingSchema, q as evalStatAggregateSchema, qt as getCurrentScope, rn as getEvalRegistry, rt as evalChartMetricSchema, st as cacheEntrySchema, tn as repoFile, tt as evalChartColorSchema, ut as cacheModeSchema, vt as traceAttributeDisplayFormatSchema, w as sseEnvelopeSchema, wt as traceSpanErrorSchema, xt as traceAttributeDisplaySchema, yt as traceAttributeDisplayInputSchema, z as llmCallMetricSchema, zt as captureEvalSpanError } from "./runOrchestration-DA4Rh5g0.mjs";
2
- import { n as createRunner, t as runCli } from "./cli-DrPk66xh.mjs";
3
- import "./src-CfprG1RW.mjs";
4
- export { DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
1
+ import { $ as evalChartAxisSchema, $t as runInEvalScope, A as deriveScopedSummaryFromCases, At as columnFormatSchema, B as llmCallsConfigSchema, Bt as evalSpan, C as updateManualScoreRequestSchema, Ct as traceDisplayInputConfigSchema, D as getNestedAttribute, Dt as traceSpanWarningSchema, E as extractLlmCalls, Et as traceSpanSchema, F as DEFAULT_LLM_CALLS_CONFIG, Ft as repoFileRefSchema, G as caseRowSchema, Gt as appendToEvalOutput, H as trialSelectionModeSchema, Ht as hashCacheKey, I as agentEvalsConfigSchema, It as runArtifactRefSchema, J as evalStatItemSchema, Jt as getEvalCaseInput, K as evalFreshnessStatusSchema, Kt as evalAssert, L as llmCallMetricFormatSchema, Lt as z, M as deriveStatusFromChildStatuses, Mt as fileRefSchema, N as runManifestSchema, Nt as jsonCellSchema, O as getEvalTitle, Ot as cellValueSchema, P as runSummarySchema, Pt as numberDisplayOptionsSchema, Q as evalChartAggregateSchema, Qt as nextEvalId, R as llmCallMetricPlacementSchema, Rt as buildTraceTree, S as createRunRequestSchema, St as traceDisplayConfigSchema, T as extractCacheHits, Tt as traceSpanKindSchema, U as assertionFailureSchema, Ut as hashCacheKeySync, V as resolveLlmCallsConfig, Vt as evalTracer, W as caseDetailSchema, Wt as EvalAssertionError, X as evalSummarySchema, Xt as isInEvalScope, Y as evalStatsConfigSchema, Yt as incrementEvalOutput, Z as scoreTraceSchema, Zt as mergeEvalOutput, _t as traceCacheRefSchema, at as evalChartTypeSchema, bt as traceAttributeDisplayPlacementSchema, ct as cacheFileSchema, dt as cacheOperationTypeSchema, en as setEvalOutput, et as evalChartBuiltinMetricSchema, ft as cacheRecordingOpSchema, gt as spanCacheOptionsSchema, ht as serializedCacheSpanSchema, in as getEvalRegistry, it as evalChartTooltipExtraSchema, j as deriveStatusFromCaseRows, jt as columnKindSchema, k as getEvalDisplayStatus, kt as columnDefSchema, lt as cacheListItemSchema, mt as cacheStatusSchema, nn as repoFile, nt as evalChartConfigSchema, ot as evalChartsConfigSchema, pt as cacheRecordingSchema, q as evalStatAggregateSchema, qt as getCurrentScope, rn as defineEval, rt as evalChartMetricSchema, st as cacheEntrySchema, tn as setScopeCacheContext, tt as evalChartColorSchema, ut as cacheModeSchema, vt as traceAttributeDisplayFormatSchema, w as sseEnvelopeSchema, wt as traceSpanErrorSchema, xt as traceAttributeDisplaySchema, yt as traceAttributeDisplayInputSchema, z as llmCallMetricSchema, zt as captureEvalSpanError } from "./runOrchestration-DwqX9_T7.mjs";
2
+ import { n as createRunner, t as runCli } from "./cli-DLlRkyLH.mjs";
3
+ import "./src-Bx-CV6Wo.mjs";
4
+ export { DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/runChild.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { N as runManifestSchema, P as runSummarySchema, S as createRunRequestSchema, Y as evalStatsConfigSchema, kt as columnDefSchema, ot as evalChartsConfigSchema, t as executeRun, v as loadConfig, x as createFsCacheStore } from "./runOrchestration-DA4Rh5g0.mjs";
1
+ import { N as runManifestSchema, P as runSummarySchema, S as createRunRequestSchema, Y as evalStatsConfigSchema, kt as columnDefSchema, ot as evalChartsConfigSchema, t as executeRun, v as loadConfig, x as createFsCacheStore } from "./runOrchestration-DwqX9_T7.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { z } from "zod/v4";
@@ -105,6 +105,8 @@ function setScopeCacheContext(scope, context) {
105
105
  async function runInEvalScope(caseId, fn, options = {}) {
106
106
  const scope = {
107
107
  caseId,
108
+ idPrefix: options.idPrefix,
109
+ nextEvalIdCounter: 0,
108
110
  input: options.input,
109
111
  outputs: {},
110
112
  assertionFailures: [],
@@ -138,6 +140,21 @@ async function runInEvalScope(caseId, fn, options = {}) {
138
140
  activeEvalScopeCount--;
139
141
  }
140
142
  }
143
+ /**
144
+ * Return the next deterministic ID for the active eval case execution.
145
+ *
146
+ * The runner derives the ID prefix from the eval file, eval id, and case id,
147
+ * then this helper appends a per-scope sequence number. Calls outside an
148
+ * active eval case scope throw so accidental product-code usage is caught
149
+ * immediately.
150
+ */
151
+ function nextEvalId() {
152
+ const scope = getCurrentScope();
153
+ if (!scope) throw new Error("nextEvalId() must be called inside an active eval case");
154
+ if (scope.idPrefix === void 0) throw new Error("nextEvalId() requires a runner-provided eval id prefix");
155
+ scope.nextEvalIdCounter++;
156
+ return `${scope.idPrefix}-${scope.nextEvalIdCounter}`;
157
+ }
141
158
  function recordOpIfActive(scope, op) {
142
159
  if (scope.replayingDepth > 0) return;
143
160
  const top = scope.recordingStack.at(-1);
@@ -5186,14 +5203,35 @@ function resolveRunnableEvalCases(params) {
5186
5203
  input: {}
5187
5204
  }];
5188
5205
  }
5206
+ function toStableIdSegment(value) {
5207
+ const segment = value.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
5208
+ return segment.length > 0 ? segment : "id";
5209
+ }
5210
+ function buildScopedEvalIdPrefix(params) {
5211
+ const fileIdentity = relative(params.workspaceRoot, params.evalFilePath).replaceAll("\\", "/");
5212
+ return [
5213
+ toStableIdSegment(params.evalId),
5214
+ toStableIdSegment(fileIdentity),
5215
+ toStableIdSegment(params.caseId)
5216
+ ].join("-");
5217
+ }
5189
5218
  async function callWithUnknownResult(fn, args) {
5190
5219
  return await Reflect.apply(fn, void 0, args);
5191
5220
  }
5192
5221
  async function runCase(params) {
5193
- const { evalDef, evalId, evalCase, globalTraceDisplay, trial, startTime, cacheAdapter, cacheMode, codeFingerprint, moduleIsolation, artifactDir, runId } = params;
5222
+ const { evalDef, evalId, evalCase, globalTraceDisplay, trial, startTime, cacheAdapter, cacheMode, codeFingerprint, moduleIsolation, evalFilePath, workspaceRoot, artifactDir, runId } = params;
5223
+ const scopedIdPrefix = buildScopedEvalIdPrefix({
5224
+ evalId,
5225
+ evalFilePath,
5226
+ caseId: evalCase.id,
5227
+ workspaceRoot
5228
+ });
5194
5229
  const { scope, error: executeError } = await runInEvalScope(evalCase.id, async () => {
5195
5230
  const execute = async () => {
5196
- await Reflect.apply(evalDef.execute, evalDef, [{ input: evalCase.input }]);
5231
+ await Reflect.apply(evalDef.execute, evalDef, [{
5232
+ input: evalCase.input,
5233
+ setOutput: setEvalOutput
5234
+ }]);
5197
5235
  };
5198
5236
  if (moduleIsolation === void 0) {
5199
5237
  await execute();
@@ -5202,6 +5240,7 @@ async function runCase(params) {
5202
5240
  await runWithModuleIsolation(moduleIsolation, execute);
5203
5241
  }, {
5204
5242
  input: evalCase.input,
5243
+ idPrefix: scopedIdPrefix,
5205
5244
  cacheContext: cacheAdapter ? {
5206
5245
  adapter: cacheAdapter,
5207
5246
  mode: cacheMode,
@@ -5246,6 +5285,7 @@ async function runCase(params) {
5246
5285
  return await runWithModuleIsolation(moduleIsolation, computeScore);
5247
5286
  }, {
5248
5287
  input: evalCase.input,
5288
+ idPrefix: `${scopedIdPrefix}-score-${toStableIdSegment(key)}`,
5249
5289
  cacheContext: cacheAdapter ? {
5250
5290
  adapter: cacheAdapter,
5251
5291
  mode: cacheMode,
@@ -5573,6 +5613,8 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
5573
5613
  cacheMode,
5574
5614
  codeFingerprint,
5575
5615
  moduleIsolation,
5616
+ evalFilePath,
5617
+ workspaceRoot,
5576
5618
  artifactDir: join(runDir, "artifacts"),
5577
5619
  runId: runState.manifest.id
5578
5620
  });
@@ -5714,4 +5756,4 @@ function toLastRunStatus(status) {
5714
5756
  return status === "pending" ? null : status;
5715
5757
  }
5716
5758
  //#endregion
5717
- export { evalChartAxisSchema as $, setEvalOutput as $t, deriveScopedSummaryFromCases as A, columnFormatSchema as At, llmCallsConfigSchema as B, evalSpan as Bt, updateManualScoreRequestSchema as C, traceDisplayInputConfigSchema as Ct, getNestedAttribute as D, traceSpanWarningSchema as Dt, extractLlmCalls as E, traceSpanSchema as Et, DEFAULT_LLM_CALLS_CONFIG as F, repoFileRefSchema as Ft, caseRowSchema as G, appendToEvalOutput as Gt, trialSelectionModeSchema as H, hashCacheKey as Ht, agentEvalsConfigSchema as I, runArtifactRefSchema as It, evalStatItemSchema as J, getEvalCaseInput as Jt, evalFreshnessStatusSchema as K, evalAssert as Kt, llmCallMetricFormatSchema as L, z$1 as Lt, deriveStatusFromChildStatuses as M, fileRefSchema as Mt, runManifestSchema as N, jsonCellSchema as Nt, getEvalTitle as O, cellValueSchema as Ot, runSummarySchema as P, numberDisplayOptionsSchema as Pt, evalChartAggregateSchema as Q, runInEvalScope as Qt, llmCallMetricPlacementSchema as R, buildTraceTree as Rt, createRunRequestSchema as S, traceDisplayConfigSchema as St, extractCacheHits as T, traceSpanKindSchema as Tt, assertionFailureSchema as U, hashCacheKeySync as Ut, resolveLlmCallsConfig as V, evalTracer as Vt, caseDetailSchema as W, EvalAssertionError as Wt, evalSummarySchema as X, isInEvalScope as Xt, evalStatsConfigSchema as Y, incrementEvalOutput as Yt, scoreTraceSchema as Z, mergeEvalOutput as Zt, loadEvalModule as _, traceCacheRefSchema as _t, loadPersistedRunSnapshot as a, evalChartTypeSchema as at, normalizeScoreDef as b, traceAttributeDisplayPlacementSchema as bt, persistCaseDetail as c, cacheFileSchema as ct, recomputePersistedCaseStatus as d, cacheOperationTypeSchema as dt, setScopeCacheContext as en, evalChartBuiltinMetricSchema as et, runTouchesEval as f, cacheRecordingOpSchema as ft, setLatestRunInfoMap as g, spanCacheOptionsSchema as gt, getTargetEvalIds as h, serializedCacheSpanSchema as ht, getLatestRunInfos as i, evalChartTooltipExtraSchema as it, deriveStatusFromCaseRows as j, columnKindSchema as jt, getEvalDisplayStatus as k, columnDefSchema as kt, persistRunState as l, cacheListItemSchema as lt, buildEvalSummary as m, cacheStatusSchema as mt, generateRunId as n, defineEval as nn, evalChartConfigSchema as nt, loadPersistedRunSnapshots as o, evalChartsConfigSchema as ot, resolveArtifactPath as p, cacheRecordingSchema as pt, evalStatAggregateSchema as q, getCurrentScope as qt, getLastRunStatuses as r, getEvalRegistry as rn, evalChartMetricSchema as rt, nextShortIdFromSnapshots as s, cacheEntrySchema as st, executeRun as t, repoFile as tn, evalChartColorSchema as tt, recomputeEvalStatusesInRuns as u, cacheModeSchema as ut, loadConfig as v, traceAttributeDisplayFormatSchema as vt, sseEnvelopeSchema as w, traceSpanErrorSchema as wt, createFsCacheStore as x, traceAttributeDisplaySchema as xt, buildDeclaredColumnDefs as y, traceAttributeDisplayInputSchema as yt, llmCallMetricSchema as z, captureEvalSpanError as zt };
5759
+ export { evalChartAxisSchema as $, runInEvalScope as $t, deriveScopedSummaryFromCases as A, columnFormatSchema as At, llmCallsConfigSchema as B, evalSpan as Bt, updateManualScoreRequestSchema as C, traceDisplayInputConfigSchema as Ct, getNestedAttribute as D, traceSpanWarningSchema as Dt, extractLlmCalls as E, traceSpanSchema as Et, DEFAULT_LLM_CALLS_CONFIG as F, repoFileRefSchema as Ft, caseRowSchema as G, appendToEvalOutput as Gt, trialSelectionModeSchema as H, hashCacheKey as Ht, agentEvalsConfigSchema as I, runArtifactRefSchema as It, evalStatItemSchema as J, getEvalCaseInput as Jt, evalFreshnessStatusSchema as K, evalAssert as Kt, llmCallMetricFormatSchema as L, z$1 as Lt, deriveStatusFromChildStatuses as M, fileRefSchema as Mt, runManifestSchema as N, jsonCellSchema as Nt, getEvalTitle as O, cellValueSchema as Ot, runSummarySchema as P, numberDisplayOptionsSchema as Pt, evalChartAggregateSchema as Q, nextEvalId as Qt, llmCallMetricPlacementSchema as R, buildTraceTree as Rt, createRunRequestSchema as S, traceDisplayConfigSchema as St, extractCacheHits as T, traceSpanKindSchema as Tt, assertionFailureSchema as U, hashCacheKeySync as Ut, resolveLlmCallsConfig as V, evalTracer as Vt, caseDetailSchema as W, EvalAssertionError as Wt, evalSummarySchema as X, isInEvalScope as Xt, evalStatsConfigSchema as Y, incrementEvalOutput as Yt, scoreTraceSchema as Z, mergeEvalOutput as Zt, loadEvalModule as _, traceCacheRefSchema as _t, loadPersistedRunSnapshot as a, evalChartTypeSchema as at, normalizeScoreDef as b, traceAttributeDisplayPlacementSchema as bt, persistCaseDetail as c, cacheFileSchema as ct, recomputePersistedCaseStatus as d, cacheOperationTypeSchema as dt, setEvalOutput as en, evalChartBuiltinMetricSchema as et, runTouchesEval as f, cacheRecordingOpSchema as ft, setLatestRunInfoMap as g, spanCacheOptionsSchema as gt, getTargetEvalIds as h, serializedCacheSpanSchema as ht, getLatestRunInfos as i, getEvalRegistry as in, evalChartTooltipExtraSchema as it, deriveStatusFromCaseRows as j, columnKindSchema as jt, getEvalDisplayStatus as k, columnDefSchema as kt, persistRunState as l, cacheListItemSchema as lt, buildEvalSummary as m, cacheStatusSchema as mt, generateRunId as n, repoFile as nn, evalChartConfigSchema as nt, loadPersistedRunSnapshots as o, evalChartsConfigSchema as ot, resolveArtifactPath as p, cacheRecordingSchema as pt, evalStatAggregateSchema as q, getCurrentScope as qt, getLastRunStatuses as r, defineEval as rn, evalChartMetricSchema as rt, nextShortIdFromSnapshots as s, cacheEntrySchema as st, executeRun as t, setScopeCacheContext as tn, evalChartColorSchema as tt, recomputeEvalStatusesInRuns as u, cacheModeSchema as ut, loadConfig as v, traceAttributeDisplayFormatSchema as vt, sseEnvelopeSchema as w, traceSpanErrorSchema as wt, createFsCacheStore as x, traceAttributeDisplaySchema as xt, buildDeclaredColumnDefs as y, traceAttributeDisplayInputSchema as yt, llmCallMetricSchema as z, captureEvalSpanError as zt };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-DrPk66xh.mjs";
2
- import "./src-CfprG1RW.mjs";
1
+ import { n as createRunner } from "./cli-DLlRkyLH.mjs";
2
+ import "./src-Bx-CV6Wo.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance() {
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-DTP5Ui4_.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-Gtlmvm3w.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -0,0 +1,3 @@
1
+ import "./runOrchestration-DwqX9_T7.mjs";
2
+ import "./cli-DLlRkyLH.mjs";
3
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.9.0",
3
+ "version": "0.10.0",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -58,8 +58,8 @@
58
58
  "@types/node": "^24.7.2",
59
59
  "typescript": "^5.9.2",
60
60
  "@agent-evals/runner": "0.0.1",
61
- "@agent-evals/shared": "0.0.1",
62
- "@agent-evals/sdk": "0.0.1"
61
+ "@agent-evals/sdk": "0.0.1",
62
+ "@agent-evals/shared": "0.0.1"
63
63
  },
64
64
  "scripts": {
65
65
  "build": "pnpm --filter @agent-evals/web build && tsdown",
@@ -1,3 +0,0 @@
1
- import "./runOrchestration-DA4Rh5g0.mjs";
2
- import "./cli-DrPk66xh.mjs";
3
- export {};