npm - @ls-stack/agent-eval - Versions diffs - 0.55.1 → 0.55.2 - Mend

@ls-stack/agent-eval 0.55.1 → 0.55.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/{app-BD0D9-7k.mjs → app-NI4to6lp.mjs} +4 -4
package/dist/apps/web/dist/assets/{index-CvsPmlHl.js → index-C7QjETk8.js} +54 -54
package/dist/apps/web/dist/index.html +1 -1
package/dist/bin.mjs +1 -1
package/dist/caseChild.mjs +1 -1
package/dist/{cli-BR3wMZMx.mjs → cli-Bu9347r1.mjs} +60 -13
package/dist/index.d.mts +37 -24
package/dist/index.mjs +3 -3
package/dist/runChild.mjs +2 -2
package/dist/{runExecution-Sw38bCaq.mjs → runExecution-C31dpemR.mjs} +363 -114
package/dist/{runOrchestration-DJsdLYeZ.mjs → runOrchestration-3RoHLW4U.mjs} +1 -1
package/dist/{runner-72rsqJRq.mjs → runner-B4EfMn1d.mjs} +2 -2
package/dist/{runner-dB69WsnM.mjs → runner-CTp9zHbM.mjs} +1 -1
package/dist/{src-hBGtzWuA.mjs → src-FR60ZR_4.mjs} +2 -2
package/package.json +3 -3
package/skills/agent-eval/SKILL.md +19 -8

package/dist/{src-hBGtzWuA.mjs → src-FR60ZR_4.mjs} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { Mt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-Sw38bCaq.mjs";
-import "./cli-BR3wMZMx.mjs";
+import { Mt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-C31dpemR.mjs";
+import "./cli-Bu9347r1.mjs";
 //#region src/index.ts
 /** Register an eval definition with typed tag support. */
 function defineEval(definition) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ls-stack/agent-eval",
-  "version": "0.55.1",
+  "version": "0.55.2",
   "type": "module",
   "bin": {
     "agent-evals": "./dist/bin.mjs"
@@ -33,8 +33,8 @@
     "@types/node": "^24.7.2",
     "typescript": "^5.9.2",
     "@agent-evals/runner": "0.0.1",
-    "@agent-evals/shared": "0.0.1",
-    "@agent-evals/sdk": "0.0.1"
+    "@agent-evals/sdk": "0.0.1",
+    "@agent-evals/shared": "0.0.1"
   },
   "scripts": {
     "build": "pnpm --filter @agent-evals/web build && pnpm --filter @agent-evals/shared build && pnpm --filter @agent-evals/sdk build && pnpm --filter @agent-evals/runner build && tsdown --filter cli-js && tsdown --filter cli-types",

package/skills/agent-eval/SKILL.md CHANGED Viewed

@@ -513,12 +513,21 @@ Mental model:
   `serializeFileBytes: true` to a cached span or `evalTracer.cache(...)` call
   when byte-level cache invalidation is required.
 - Cache entries are stored as one Brotli-compressed JSON file per key under
-  `.agent-evals/cache/<sanitizedNamespace>/<keyHash>.json.br`; each namespace
-  is capped at 100 entries by default. Configure
+  `.agent-evals/cache/<sanitizedNamespace>/<keyHash>.json.br`, with a small
+  namespace index sidecar at
+  `.agent-evals/cache/<sanitizedNamespace>/.index-<namespaceHash>.json`.
+  Listing and retention use the index without opening cached payloads. Index
+  rows intentionally stay minimal: stored time, last access time, and external
+  JSON blob refs. Each namespace is capped at 100 entries by default. The runner
+  prunes least recently accessed indexed entries after a run finishes and the
+  runner stays idle for `cache.pruneIdleDelayMs ?? 5000` milliseconds. Configure
   `cache.maxEntriesPerNamespace` for the default cap and
   `cache.maxEntriesByNamespace` for exact namespace-specific caps.
+- Unindexed legacy cache files are ignored by normal lookup/listing. Use
+  `agent-evals cache repair` to remove unindexed cache files, stale index rows,
+  debug sidecars, and unreferenced blob files.
 - Nested cached JSON values at or above roughly 10K JSON characters are stored
-  as content-addressed Brotli blobs under `.agent-evals/cache-blobs/` and
+  as content-addressed Brotli blobs under `.agent-evals/cache/cache-blobs/` and
   referenced from cache JSON by sha256. Identical large payloads share the same
   blob.
 - Authored raw cache keys are stored for debugging under
@@ -536,11 +545,13 @@ Mental model:
 ## Artifacts
-Run output lives under `.agent-evals/runs/<run-id>/`. Cache metadata lives under
-`.agent-evals/cache/<sanitizedNamespace>/<keyHash>.json.br`. Do not rely on a
-specific cache filename when authoring evals; configure cache namespaces
-manually in eval code, then use `agent-evals cache list` to inspect the
-persisted namespace/key entries. Files in a run directory include run metadata,
+Run output lives under `.agent-evals/runs/<run-id>/`. Cache payloads live under
+`.agent-evals/cache/<sanitizedNamespace>/<keyHash>.json.br` with namespace
+index sidecars next to them. Do not rely on a specific cache filename when
+authoring evals; configure cache namespaces manually in eval code, then use
+`agent-evals cache list` to inspect persisted namespace/key entries or
+`agent-evals cache repair` to clean orphaned cache artifacts. Files in a run
+directory include run metadata,
 a run summary, per-case results, and per-case trace JSON. Inspect run files when
 debugging persisted output, costs, columns, traces, or failures; inspect cache
 entries when debugging replayed span/value-cache results.