npm - @ls-stack/agent-eval - Versions diffs - 0.58.2 → 0.58.3 - Mend

@ls-stack/agent-eval 0.58.2 → 0.58.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/{app-BxD6aHbp.mjs → app-ROCEce9X.mjs} +4 -4
package/dist/apps/web/dist/assets/{index-BMWBZw_u.js → index-PTikBbhf.js} +65 -65
package/dist/apps/web/dist/index.html +1 -1
package/dist/bin.mjs +1 -1
package/dist/caseChild.mjs +1 -1
package/dist/{cli-HBwXIJsg.mjs → cli-SP4kEtYL.mjs} +4 -4
package/dist/index.d.mts +116 -120
package/dist/index.mjs +3 -3
package/dist/runChild.mjs +2 -2
package/dist/{runExecution-pHJ0_TzH.mjs → runExecution-CFw0MQFs.mjs} +5 -16
package/dist/{runOrchestration-ngVXShH4.mjs → runOrchestration-CxjiQmof.mjs} +1 -1
package/dist/{runner-BnZMGBla.mjs → runner-BlFQyvN2.mjs} +1 -1
package/dist/{runner-D_pz2NON.mjs → runner-CY3bgsjU.mjs} +2 -2
package/dist/{src-AeXGBJ26.mjs → src-7GbQj1sb.mjs} +2 -2
package/package.json +1 -1
package/skills/agent-eval/SKILL.md +5 -4

package/dist/index.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { $ as setScopeCacheContext, A as repoFile, B as evalTime, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as EvalRuntimeUsageError, Ft as getEvalRegistry, H as getEvalCaseInput, I as appendToEvalOutput, J as runInEvalRuntimeScope, K as mergeEvalOutput, M as readManualInputFile, N as evalExpect, O as serializeCacheRecording, P as EvalAssertionError, Q as setEvalOutput, R as evalAssert, S as evalSpan, T as hashCacheKeySync, U as incrementEvalOutput, V as getCurrentScope, W as isInEvalScope, X as runInExistingEvalScope, Y as runInEvalScope, at as extractApiCalls, b as buildTraceTree, ct as simulateTokenAllocation, et as startEvalBackgroundJob, it as extractCacheHits, j as manualInputFileValueSchema, k as serializeCacheValue, ot as extractLlmCalls, q as nextEvalId, rt as extractCacheEntries, st as simulateLlmCallCost, ut as getNestedAttribute, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalLog } from "./runExecution-pHJ0_TzH.mjs";
-import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-HBwXIJsg.mjs";
-import { n as matchesEvalTags, t as defineEval } from "./src-AeXGBJ26.mjs";
+import { $ as setScopeCacheContext, A as repoFile, B as evalTime, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as EvalRuntimeUsageError, Ft as getEvalRegistry, H as getEvalCaseInput, I as appendToEvalOutput, J as runInEvalRuntimeScope, K as mergeEvalOutput, M as readManualInputFile, N as evalExpect, O as serializeCacheRecording, P as EvalAssertionError, Q as setEvalOutput, R as evalAssert, S as evalSpan, T as hashCacheKeySync, U as incrementEvalOutput, V as getCurrentScope, W as isInEvalScope, X as runInExistingEvalScope, Y as runInEvalScope, at as extractApiCalls, b as buildTraceTree, ct as simulateTokenAllocation, et as startEvalBackgroundJob, it as extractCacheHits, j as manualInputFileValueSchema, k as serializeCacheValue, ot as extractLlmCalls, q as nextEvalId, rt as extractCacheEntries, st as simulateLlmCallCost, ut as getNestedAttribute, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalLog } from "./runExecution-CFw0MQFs.mjs";
+import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-SP4kEtYL.mjs";
+import { n as matchesEvalTags, t as defineEval } from "./src-7GbQj1sb.mjs";
 export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };

package/dist/runChild.mjs CHANGED Viewed

@@ -1,5 +1,5 @@
-import { At as evalStatsConfigSchema, L as configureEvalRunLogs, Mt as evalChartsConfigSchema, Nt as columnDefSchema, Tt as buildEvalKey, bt as runManifestSchema, jt as manualInputDescriptorSchema, kt as evalStatAggregateSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, tt as createRunRequestSchema, v as createFsCacheStore, xt as runSummarySchema } from "./runExecution-pHJ0_TzH.mjs";
-import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-ngVXShH4.mjs";
+import { At as evalStatsConfigSchema, L as configureEvalRunLogs, Mt as evalChartsConfigSchema, Nt as columnDefSchema, Tt as buildEvalKey, bt as runManifestSchema, jt as manualInputDescriptorSchema, kt as evalStatAggregateSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, tt as createRunRequestSchema, v as createFsCacheStore, xt as runSummarySchema } from "./runExecution-CFw0MQFs.mjs";
+import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-CxjiQmof.mjs";
 import { z } from "zod/v4";
 import { readFile } from "node:fs/promises";
 import { relative } from "node:path";

package/dist/{runExecution-pHJ0_TzH.mjs → runExecution-CFw0MQFs.mjs} RENAMED Viewed

@@ -1009,9 +1009,8 @@ const removeDefaultConfigSchema = z.union([z.literal(true), z.array(defaultConfi
 const evalDeriveValueFnSchema = z.custom((value) => typeof value === "function", { message: "Expected a derive output function" });
 /** Schema for keyed or object-returning trace-derived output config. */
 const evalDeriveConfigSchema = z.union([z.custom((value) => typeof value === "function", { message: "Expected a deriveFromTracing function" }), z.record(z.string().min(1), evalDeriveValueFnSchema)]);
-const evalTracingAssertionsFnSchema = z.custom((value) => typeof value === "function", { message: "Expected a tracing assertions function" });
-/** Schema for function or keyed trace-derived assertion config. */
-const evalTracingAssertionsConfigSchema = z.union([z.custom((value) => typeof value === "function", { message: "Expected a tracingAssertions function" }), z.record(z.string().min(1), evalTracingAssertionsFnSchema)]);
+/** Schema for trace-derived assertion config. */
+const evalTracingAssertionsConfigSchema = z.custom((value) => typeof value === "function", { message: "Expected a tracingAssertions function" });
 /** Schema for UI overrides on derived or scored columns. */
 const evalColumnOverrideSchema = z.object({
 	label: z.string().optional(),
@@ -6754,19 +6753,9 @@ async function runOneTracingAssertion(params) {
 }
 async function runTracingAssertionsConfig(params) {
 	if (params.tracingAssertions === void 0) return;
-	if (typeof params.tracingAssertions === "function") {
-		await runOneTracingAssertion({
-			label: "tracingAssertions",
-			tracingAssertion: params.tracingAssertions,
-			scope: params.scope,
-			traceTree: params.traceTree,
-			evalCase: params.evalCase
-		});
-		return;
-	}
-	for (const [key, tracingAssertion] of Object.entries(params.tracingAssertions)) await runOneTracingAssertion({
-		label: `tracingAssertions "${key}"`,
-		tracingAssertion,
+	await runOneTracingAssertion({
+		label: "tracingAssertions",
+		tracingAssertion: params.tracingAssertions,
 		scope: params.scope,
 		traceTree: params.traceTree,
 		evalCase: params.evalCase

package/dist/{runOrchestration-ngVXShH4.mjs → runOrchestration-CxjiQmof.mjs} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { Dt as caseDetailSchema, Et as getCaseRowCaseKey, It as runWithEvalRegistry, J as runInEvalRuntimeScope, Ot as caseRowSchema, Z as runWithEvalClock, _t as matchesTagsFilter, bt as runManifestSchema, d as loadEvalModule, f as resolveEvalDefaultConfig, g as commitPendingCacheWrites, gt as dedupeEvalTags, ht as deriveStatusFromChildStatuses, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromCaseRows, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveScopedSummaryFromCases, t as filterEvalCases, u as runWithModuleIsolation, vt as validateEvalTagName, wt as buildCaseKey, xt as runSummarySchema, yt as validateTagsFilterExpression } from "./runExecution-pHJ0_TzH.mjs";
+import { Dt as caseDetailSchema, Et as getCaseRowCaseKey, It as runWithEvalRegistry, J as runInEvalRuntimeScope, Ot as caseRowSchema, Z as runWithEvalClock, _t as matchesTagsFilter, bt as runManifestSchema, d as loadEvalModule, f as resolveEvalDefaultConfig, g as commitPendingCacheWrites, gt as dedupeEvalTags, ht as deriveStatusFromChildStatuses, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromCaseRows, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveScopedSummaryFromCases, t as filterEvalCases, u as runWithModuleIsolation, vt as validateEvalTagName, wt as buildCaseKey, xt as runSummarySchema, yt as validateTagsFilterExpression } from "./runExecution-CFw0MQFs.mjs";
 import { readFile, readdir, rm, writeFile } from "node:fs/promises";
 import { dirname, join } from "node:path";
 import { existsSync } from "node:fs";

package/dist/{runner-BnZMGBla.mjs → runner-BlFQyvN2.mjs} RENAMED Viewed

@@ -1,2 +1,2 @@
-import { n as initRunner, t as getRunnerInstance } from "./runner-D_pz2NON.mjs";
+import { n as initRunner, t as getRunnerInstance } from "./runner-CY3bgsjU.mjs";
 export { getRunnerInstance, initRunner };

package/dist/{runner-D_pz2NON.mjs → runner-CY3bgsjU.mjs} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { n as createRunner } from "./cli-HBwXIJsg.mjs";
-import "./src-AeXGBJ26.mjs";
+import { n as createRunner } from "./cli-SP4kEtYL.mjs";
+import "./src-7GbQj1sb.mjs";
 //#region ../../apps/server/src/runner.ts
 let runnerInstance = null;
 function getRunnerInstance() {

package/dist/{src-AeXGBJ26.mjs → src-7GbQj1sb.mjs} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { G as matchesEvalTags$1, Pt as defineEval$1 } from "./runExecution-pHJ0_TzH.mjs";
-import "./cli-HBwXIJsg.mjs";
+import { G as matchesEvalTags$1, Pt as defineEval$1 } from "./runExecution-CFw0MQFs.mjs";
+import "./cli-SP4kEtYL.mjs";
 //#region src/index.ts
 /** Register an eval definition with typed tag support. */
 function defineEval(definition) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ls-stack/agent-eval",
-  "version": "0.58.2",
+  "version": "0.58.3",
   "type": "module",
   "bin": {
     "agent-evals": "./dist/bin.mjs"

package/skills/agent-eval/SKILL.md CHANGED Viewed

@@ -366,10 +366,11 @@ See `EvalScoreDef` / `EvalManualScoreDef` in the types for the full shape
   form, return `undefined` to omit one output for that case. Do not call
   `evalAssert(...)` or `evalExpect(...)` from `deriveFromTracing`; use
   `tracingAssertions` for trace-derived pass/fail checks.
-- `tracingAssertions` can be authored globally or locally on one eval when a
-  finished-trace invariant should pass or fail the case without creating a fake
-  score column. It receives the same `{ trace, input, case }` context as
-  `deriveFromTracing`; call `evalAssert(...)` or `evalExpect(...)` inside it.
+- `tracingAssertions` is a single function that can be authored globally or
+  locally on one eval when a finished-trace invariant should pass or fail the
+  case without creating a fake score column. It receives the same
+  `{ trace, input, case }` context as `deriveFromTracing`; call
+  `evalAssert(...)` or `evalExpect(...)` inside it.
   Useful trace helpers include `trace.findSpan(name)`, `trace.findSpans(name)`,
   `trace.hasSpan(name)`, `trace.findSpansByKind(kind)`,
   `trace.findToolCallSpans()`, `trace.listToolCallSpanNames()`,