@ls-stack/agent-eval 0.58.2 → 0.58.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as setScopeCacheContext, A as repoFile, B as evalTime, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as EvalRuntimeUsageError, Ft as getEvalRegistry, H as getEvalCaseInput, I as appendToEvalOutput, J as runInEvalRuntimeScope, K as mergeEvalOutput, M as readManualInputFile, N as evalExpect, O as serializeCacheRecording, P as EvalAssertionError, Q as setEvalOutput, R as evalAssert, S as evalSpan, T as hashCacheKeySync, U as incrementEvalOutput, V as getCurrentScope, W as isInEvalScope, X as runInExistingEvalScope, Y as runInEvalScope, at as extractApiCalls, b as buildTraceTree, ct as simulateTokenAllocation, et as startEvalBackgroundJob, it as extractCacheHits, j as manualInputFileValueSchema, k as serializeCacheValue, ot as extractLlmCalls, q as nextEvalId, rt as extractCacheEntries, st as simulateLlmCallCost, ut as getNestedAttribute, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalLog } from "./runExecution-pHJ0_TzH.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-HBwXIJsg.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-AeXGBJ26.mjs";
1
+ import { $ as setScopeCacheContext, A as repoFile, B as evalTime, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as EvalRuntimeUsageError, Ft as getEvalRegistry, H as getEvalCaseInput, I as appendToEvalOutput, J as runInEvalRuntimeScope, K as mergeEvalOutput, M as readManualInputFile, N as evalExpect, O as serializeCacheRecording, P as EvalAssertionError, Q as setEvalOutput, R as evalAssert, S as evalSpan, T as hashCacheKeySync, U as incrementEvalOutput, V as getCurrentScope, W as isInEvalScope, X as runInExistingEvalScope, Y as runInEvalScope, at as extractApiCalls, b as buildTraceTree, ct as simulateTokenAllocation, et as startEvalBackgroundJob, it as extractCacheHits, j as manualInputFileValueSchema, k as serializeCacheValue, ot as extractLlmCalls, q as nextEvalId, rt as extractCacheEntries, st as simulateLlmCallCost, ut as getNestedAttribute, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalLog } from "./runExecution-CFw0MQFs.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-SP4kEtYL.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-7GbQj1sb.mjs";
4
4
  export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
package/dist/runChild.mjs CHANGED
@@ -1,5 +1,5 @@
1
- import { At as evalStatsConfigSchema, L as configureEvalRunLogs, Mt as evalChartsConfigSchema, Nt as columnDefSchema, Tt as buildEvalKey, bt as runManifestSchema, jt as manualInputDescriptorSchema, kt as evalStatAggregateSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, tt as createRunRequestSchema, v as createFsCacheStore, xt as runSummarySchema } from "./runExecution-pHJ0_TzH.mjs";
2
- import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-ngVXShH4.mjs";
1
+ import { At as evalStatsConfigSchema, L as configureEvalRunLogs, Mt as evalChartsConfigSchema, Nt as columnDefSchema, Tt as buildEvalKey, bt as runManifestSchema, jt as manualInputDescriptorSchema, kt as evalStatAggregateSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, tt as createRunRequestSchema, v as createFsCacheStore, xt as runSummarySchema } from "./runExecution-CFw0MQFs.mjs";
2
+ import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-CxjiQmof.mjs";
3
3
  import { z } from "zod/v4";
4
4
  import { readFile } from "node:fs/promises";
5
5
  import { relative } from "node:path";
@@ -1009,9 +1009,8 @@ const removeDefaultConfigSchema = z.union([z.literal(true), z.array(defaultConfi
1009
1009
  const evalDeriveValueFnSchema = z.custom((value) => typeof value === "function", { message: "Expected a derive output function" });
1010
1010
  /** Schema for keyed or object-returning trace-derived output config. */
1011
1011
  const evalDeriveConfigSchema = z.union([z.custom((value) => typeof value === "function", { message: "Expected a deriveFromTracing function" }), z.record(z.string().min(1), evalDeriveValueFnSchema)]);
1012
- const evalTracingAssertionsFnSchema = z.custom((value) => typeof value === "function", { message: "Expected a tracing assertions function" });
1013
- /** Schema for function or keyed trace-derived assertion config. */
1014
- const evalTracingAssertionsConfigSchema = z.union([z.custom((value) => typeof value === "function", { message: "Expected a tracingAssertions function" }), z.record(z.string().min(1), evalTracingAssertionsFnSchema)]);
1012
+ /** Schema for trace-derived assertion config. */
1013
+ const evalTracingAssertionsConfigSchema = z.custom((value) => typeof value === "function", { message: "Expected a tracingAssertions function" });
1015
1014
  /** Schema for UI overrides on derived or scored columns. */
1016
1015
  const evalColumnOverrideSchema = z.object({
1017
1016
  label: z.string().optional(),
@@ -6754,19 +6753,9 @@ async function runOneTracingAssertion(params) {
6754
6753
  }
6755
6754
  async function runTracingAssertionsConfig(params) {
6756
6755
  if (params.tracingAssertions === void 0) return;
6757
- if (typeof params.tracingAssertions === "function") {
6758
- await runOneTracingAssertion({
6759
- label: "tracingAssertions",
6760
- tracingAssertion: params.tracingAssertions,
6761
- scope: params.scope,
6762
- traceTree: params.traceTree,
6763
- evalCase: params.evalCase
6764
- });
6765
- return;
6766
- }
6767
- for (const [key, tracingAssertion] of Object.entries(params.tracingAssertions)) await runOneTracingAssertion({
6768
- label: `tracingAssertions "${key}"`,
6769
- tracingAssertion,
6756
+ await runOneTracingAssertion({
6757
+ label: "tracingAssertions",
6758
+ tracingAssertion: params.tracingAssertions,
6770
6759
  scope: params.scope,
6771
6760
  traceTree: params.traceTree,
6772
6761
  evalCase: params.evalCase
@@ -1,4 +1,4 @@
1
- import { Dt as caseDetailSchema, Et as getCaseRowCaseKey, It as runWithEvalRegistry, J as runInEvalRuntimeScope, Ot as caseRowSchema, Z as runWithEvalClock, _t as matchesTagsFilter, bt as runManifestSchema, d as loadEvalModule, f as resolveEvalDefaultConfig, g as commitPendingCacheWrites, gt as dedupeEvalTags, ht as deriveStatusFromChildStatuses, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromCaseRows, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveScopedSummaryFromCases, t as filterEvalCases, u as runWithModuleIsolation, vt as validateEvalTagName, wt as buildCaseKey, xt as runSummarySchema, yt as validateTagsFilterExpression } from "./runExecution-pHJ0_TzH.mjs";
1
+ import { Dt as caseDetailSchema, Et as getCaseRowCaseKey, It as runWithEvalRegistry, J as runInEvalRuntimeScope, Ot as caseRowSchema, Z as runWithEvalClock, _t as matchesTagsFilter, bt as runManifestSchema, d as loadEvalModule, f as resolveEvalDefaultConfig, g as commitPendingCacheWrites, gt as dedupeEvalTags, ht as deriveStatusFromChildStatuses, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromCaseRows, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveScopedSummaryFromCases, t as filterEvalCases, u as runWithModuleIsolation, vt as validateEvalTagName, wt as buildCaseKey, xt as runSummarySchema, yt as validateTagsFilterExpression } from "./runExecution-CFw0MQFs.mjs";
2
2
  import { readFile, readdir, rm, writeFile } from "node:fs/promises";
3
3
  import { dirname, join } from "node:path";
4
4
  import { existsSync } from "node:fs";
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-D_pz2NON.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-CY3bgsjU.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-HBwXIJsg.mjs";
2
- import "./src-AeXGBJ26.mjs";
1
+ import { n as createRunner } from "./cli-SP4kEtYL.mjs";
2
+ import "./src-7GbQj1sb.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance() {
@@ -1,5 +1,5 @@
1
- import { G as matchesEvalTags$1, Pt as defineEval$1 } from "./runExecution-pHJ0_TzH.mjs";
2
- import "./cli-HBwXIJsg.mjs";
1
+ import { G as matchesEvalTags$1, Pt as defineEval$1 } from "./runExecution-CFw0MQFs.mjs";
2
+ import "./cli-SP4kEtYL.mjs";
3
3
  //#region src/index.ts
4
4
  /** Register an eval definition with typed tag support. */
5
5
  function defineEval(definition) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.58.2",
3
+ "version": "0.58.3",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -366,10 +366,11 @@ See `EvalScoreDef` / `EvalManualScoreDef` in the types for the full shape
366
366
  form, return `undefined` to omit one output for that case. Do not call
367
367
  `evalAssert(...)` or `evalExpect(...)` from `deriveFromTracing`; use
368
368
  `tracingAssertions` for trace-derived pass/fail checks.
369
- - `tracingAssertions` can be authored globally or locally on one eval when a
370
- finished-trace invariant should pass or fail the case without creating a fake
371
- score column. It receives the same `{ trace, input, case }` context as
372
- `deriveFromTracing`; call `evalAssert(...)` or `evalExpect(...)` inside it.
369
+ - `tracingAssertions` is a single function that can be authored globally or
370
+ locally on one eval when a finished-trace invariant should pass or fail the
371
+ case without creating a fake score column. It receives the same
372
+ `{ trace, input, case }` context as `deriveFromTracing`; call
373
+ `evalAssert(...)` or `evalExpect(...)` inside it.
373
374
  Useful trace helpers include `trace.findSpan(name)`, `trace.findSpans(name)`,
374
375
  `trace.hasSpan(name)`, `trace.findSpansByKind(kind)`,
375
376
  `trace.findToolCallSpans()`, `trace.listToolCallSpanNames()`,