@ls-stack/agent-eval 0.55.1 → 0.55.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
- import { Mt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-Sw38bCaq.mjs";
2
- import "./cli-BR3wMZMx.mjs";
1
+ import { Mt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-C31dpemR.mjs";
2
+ import "./cli-Bu9347r1.mjs";
3
3
  //#region src/index.ts
4
4
  /** Register an eval definition with typed tag support. */
5
5
  function defineEval(definition) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.55.1",
3
+ "version": "0.55.2",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -33,8 +33,8 @@
33
33
  "@types/node": "^24.7.2",
34
34
  "typescript": "^5.9.2",
35
35
  "@agent-evals/runner": "0.0.1",
36
- "@agent-evals/shared": "0.0.1",
37
- "@agent-evals/sdk": "0.0.1"
36
+ "@agent-evals/sdk": "0.0.1",
37
+ "@agent-evals/shared": "0.0.1"
38
38
  },
39
39
  "scripts": {
40
40
  "build": "pnpm --filter @agent-evals/web build && pnpm --filter @agent-evals/shared build && pnpm --filter @agent-evals/sdk build && pnpm --filter @agent-evals/runner build && tsdown --filter cli-js && tsdown --filter cli-types",
@@ -513,12 +513,21 @@ Mental model:
513
513
  `serializeFileBytes: true` to a cached span or `evalTracer.cache(...)` call
514
514
  when byte-level cache invalidation is required.
515
515
  - Cache entries are stored as one Brotli-compressed JSON file per key under
516
- `.agent-evals/cache/<sanitizedNamespace>/<keyHash>.json.br`; each namespace
517
- is capped at 100 entries by default. Configure
516
+ `.agent-evals/cache/<sanitizedNamespace>/<keyHash>.json.br`, with a small
517
+ namespace index sidecar at
518
+ `.agent-evals/cache/<sanitizedNamespace>/.index-<namespaceHash>.json`.
519
+ Listing and retention use the index without opening cached payloads. Index
520
+ rows intentionally stay minimal: stored time, last access time, and external
521
+ JSON blob refs. Each namespace is capped at 100 entries by default. The runner
522
+ prunes least recently accessed indexed entries after a run finishes and the
523
+ runner stays idle for `cache.pruneIdleDelayMs ?? 5000` milliseconds. Configure
518
524
  `cache.maxEntriesPerNamespace` for the default cap and
519
525
  `cache.maxEntriesByNamespace` for exact namespace-specific caps.
526
+ - Unindexed legacy cache files are ignored by normal lookup/listing. Use
527
+ `agent-evals cache repair` to remove unindexed cache files, stale index rows,
528
+ debug sidecars, and unreferenced blob files.
520
529
  - Nested cached JSON values at or above roughly 10K JSON characters are stored
521
- as content-addressed Brotli blobs under `.agent-evals/cache-blobs/` and
530
+ as content-addressed Brotli blobs under `.agent-evals/cache/cache-blobs/` and
522
531
  referenced from cache JSON by sha256. Identical large payloads share the same
523
532
  blob.
524
533
  - Authored raw cache keys are stored for debugging under
@@ -536,11 +545,13 @@ Mental model:
536
545
 
537
546
  ## Artifacts
538
547
 
539
- Run output lives under `.agent-evals/runs/<run-id>/`. Cache metadata lives under
540
- `.agent-evals/cache/<sanitizedNamespace>/<keyHash>.json.br`. Do not rely on a
541
- specific cache filename when authoring evals; configure cache namespaces
542
- manually in eval code, then use `agent-evals cache list` to inspect the
543
- persisted namespace/key entries. Files in a run directory include run metadata,
548
+ Run output lives under `.agent-evals/runs/<run-id>/`. Cache payloads live under
549
+ `.agent-evals/cache/<sanitizedNamespace>/<keyHash>.json.br` with namespace
550
+ index sidecars next to them. Do not rely on a specific cache filename when
551
+ authoring evals; configure cache namespaces manually in eval code, then use
552
+ `agent-evals cache list` to inspect persisted namespace/key entries or
553
+ `agent-evals cache repair` to clean orphaned cache artifacts. Files in a run
554
+ directory include run metadata,
544
555
  a run summary, per-case results, and per-case trace JSON. Inspect run files when
545
556
  debugging persisted output, costs, columns, traces, or failures; inspect cache
546
557
  entries when debugging replayed span/value-cache results.