npm - @ls-stack/agent-eval - Versions diffs - 0.42.2 → 0.45.0 - Mend

@ls-stack/agent-eval 0.42.2 → 0.45.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/{app-DPamBr5R.mjs → app-BZmhhSFZ.mjs} +10 -4
package/dist/apps/web/dist/assets/index-BU3IqUso.css +1 -0
package/dist/apps/web/dist/assets/index-am2Knit6.js +140 -0
package/dist/apps/web/dist/index.html +2 -2
package/dist/bin.mjs +1 -1
package/dist/{cli-BeJCJMQo.mjs → cli-vdJYkEVk.mjs} +28 -5
package/dist/index.d.mts +368 -75
package/dist/index.mjs +3 -3
package/dist/runChild.mjs +1 -1
package/dist/{runOrchestration-OVUFw1fL.mjs → runOrchestration-BFdxG9ws.mjs} +259 -250
package/dist/{runner-BJQq7cpd.mjs → runner--aH0jO4Z.mjs} +1 -1
package/dist/{runner-B1KygirW.mjs → runner-DJWn_7p0.mjs} +2 -2
package/dist/{src-D7_xKo7h.mjs → src-BRqs3kSA.mjs} +2 -2
package/package.json +3 -3
package/skills/agent-eval/SKILL.md +14 -12
package/dist/apps/web/dist/assets/index-DH0qYD76.js +0 -140
package/dist/apps/web/dist/assets/index-eFM9VIsz.css +0 -1

package/dist/{runner-B1KygirW.mjs → runner-DJWn_7p0.mjs} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { n as createRunner } from "./cli-BeJCJMQo.mjs";
-import "./src-D7_xKo7h.mjs";
+import { n as createRunner } from "./cli-vdJYkEVk.mjs";
+import "./src-BRqs3kSA.mjs";
 //#region ../../apps/server/src/runner.ts
 let runnerInstance = null;
 function getRunnerInstance() {

package/dist/{src-D7_xKo7h.mjs → src-BRqs3kSA.mjs} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { It as defineEval$1, rt as matchesEvalTags$1 } from "./runOrchestration-OVUFw1fL.mjs";
-import "./cli-BeJCJMQo.mjs";
+import { It as defineEval$1, rt as matchesEvalTags$1 } from "./runOrchestration-BFdxG9ws.mjs";
+import "./cli-vdJYkEVk.mjs";
 //#region src/index.ts
 /** Register an eval definition with typed tag support. */
 function defineEval(definition) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ls-stack/agent-eval",
-  "version": "0.42.2",
+  "version": "0.45.0",
   "type": "module",
   "bin": {
     "agent-evals": "./dist/bin.mjs"
@@ -33,8 +33,8 @@
     "@types/node": "^24.7.2",
     "typescript": "^5.9.2",
     "@agent-evals/runner": "0.0.1",
-    "@agent-evals/shared": "0.0.1",
-    "@agent-evals/sdk": "0.0.1"
+    "@agent-evals/sdk": "0.0.1",
+    "@agent-evals/shared": "0.0.1"
   },
   "scripts": {
     "build": "pnpm --filter @agent-evals/web build && pnpm --filter @agent-evals/shared build && pnpm --filter @agent-evals/sdk build && pnpm --filter @agent-evals/runner build && tsdown --filter cli-js && tsdown --filter cli-types",

package/skills/agent-eval/SKILL.md CHANGED Viewed

@@ -120,7 +120,7 @@ export async function runRefundWorkflow(input: RefundInput) {
           kind: 'llm',
           name: 'plan-refund',
           cache: {
-            namespace: 'refund-workflow__plan-refund',
+            namespace: 'refund-workflow.plan-refund',
             key: { prompt: input.message, model: 'gpt-4o-mini' },
           },
         },
@@ -444,7 +444,7 @@ await evalTracer.span(
     kind: 'llm',
     name: 'plan-refund',
     cache: {
-      namespace: 'refund-workflow__plan-refund',
+      namespace: 'refund-workflow.plan-refund',
       key: { prompt: input.message, model: 'gpt-4o-mini' },
     },
   },
@@ -510,21 +510,23 @@ Mental model:
   `name`/`lastModified` for `File`) and do not read file bytes. Add
   `serializeFileBytes: true` to a cached span or `evalTracer.cache(...)` call
   when byte-level cache invalidation is required.
-- Cache entries are stored in inspectable owner files under
-  `.agent-evals/cache/<owner>.json`; each namespace is capped at 100 entries by
-  default. Configure `cache.maxEntriesPerNamespace` for the default cap and
+- Cache entries are stored as one Brotli-compressed JSON file per key under
+  `.agent-evals/cache/<sanitizedNamespace>/<keyHash>.json.br`; each namespace
+  is capped at 100 entries by default. Configure
+  `cache.maxEntriesPerNamespace` for the default cap and
   `cache.maxEntriesByNamespace` for exact namespace-specific caps.
 - Nested cached JSON values at or above roughly 10K JSON characters are stored
   as content-addressed Brotli blobs under `.agent-evals/cache-blobs/` and
   referenced from cache JSON by sha256. Identical large payloads share the same
   blob.
 - Authored raw cache keys are stored for debugging under
-  `.agent-evals/cache-debug/<owner>.json`. This folder may include prompts,
-  user inputs, or other sensitive data, should be gitignored, and is not needed
-  for cache reuse. The UI Cache tab shows the raw key when it is available and
-  can be filtered to hits or new entries added by cache misses/refreshes.
-  Misses/refreshes with `cache.store: false` are shown as non-stored activity
-  without fetch/delete controls.
+  `.agent-evals/cache-debug/<sanitizedNamespace>/<keyHash>.json`. This folder
+  may include prompts, user inputs, full serialized cache payloads, or other
+  sensitive data, should be gitignored, and is not needed for cache reuse. The
+  UI Cache tab shows the raw key when it is available and can be filtered to
+  hits or new entries added by cache misses/refreshes. Misses/refreshes with
+  `cache.store: false` are shown as non-stored activity without fetch/delete
+  controls.
 - Cached payloads use JSON-safe tagged serialization, so return values and
   recorded SDK effects preserve richer built-ins such as `Date`, `Map`, `Set`,
   typed arrays, `URL`, `Headers`, `Blob`, and `File` on hits. Undefined values
@@ -537,7 +539,7 @@ Mental model:
 ## Artifacts
 Run output lives under `.agent-evals/runs/<run-id>/`. Cache metadata lives under
-`.agent-evals/cache/`, grouped into runner-managed owner files. Do not rely on a
+`.agent-evals/cache/<sanitizedNamespace>/<keyHash>.json.br`. Do not rely on a
 specific cache filename when authoring evals; configure cache namespaces
 manually in eval code, then use `agent-evals cache list` or the UI Cache tab to
 inspect the persisted namespace/key entries. Files in a run directory include