npm - @ls-stack/agent-eval - Versions diffs - 0.42.1 → 0.42.3 - Mend

@ls-stack/agent-eval 0.42.1 → 0.42.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/{app-mOYjX9zq.mjs → app-DPamBr5R.mjs} +4 -4
package/dist/apps/web/dist/assets/{index-eFM9VIsz.css → index-S3J5Nm0o.css} +1 -1
package/dist/apps/web/dist/assets/{index-CANDLTsq.js → index-XLJByNnS.js} +2 -2
package/dist/apps/web/dist/index.html +2 -2
package/dist/bin.mjs +1 -1
package/dist/{cli-DbVfkr9T.mjs → cli-BeJCJMQo.mjs} +49 -23
package/dist/index.d.mts +42 -43
package/dist/index.mjs +3 -3
package/dist/runChild.mjs +1 -1
package/dist/{runOrchestration-SPaHx-SC.mjs → runOrchestration-OVUFw1fL.mjs} +12 -23
package/dist/{runner-DiCQ57JQ.mjs → runner-B1KygirW.mjs} +2 -2
package/dist/{runner-BYOdLBle.mjs → runner-BJQq7cpd.mjs} +1 -1
package/dist/{src-CANi3gpd.mjs → src-D7_xKo7h.mjs} +2 -2
package/package.json +3 -3
package/skills/agent-eval/SKILL.md +15 -8

package/dist/{runner-DiCQ57JQ.mjs → runner-B1KygirW.mjs} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { n as createRunner } from "./cli-DbVfkr9T.mjs";
-import "./src-CANi3gpd.mjs";
+import { n as createRunner } from "./cli-BeJCJMQo.mjs";
+import "./src-D7_xKo7h.mjs";
 //#region ../../apps/server/src/runner.ts
 let runnerInstance = null;
 function getRunnerInstance() {

package/dist/{runner-BYOdLBle.mjs → runner-BJQq7cpd.mjs} RENAMED Viewed

@@ -1,2 +1,2 @@
-import { n as initRunner, t as getRunnerInstance } from "./runner-DiCQ57JQ.mjs";
+import { n as initRunner, t as getRunnerInstance } from "./runner-B1KygirW.mjs";
 export { getRunnerInstance, initRunner };

package/dist/{src-CANi3gpd.mjs → src-D7_xKo7h.mjs} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { Lt as defineEval$1, rt as matchesEvalTags$1 } from "./runOrchestration-SPaHx-SC.mjs";
-import "./cli-DbVfkr9T.mjs";
+import { It as defineEval$1, rt as matchesEvalTags$1 } from "./runOrchestration-OVUFw1fL.mjs";
+import "./cli-BeJCJMQo.mjs";
 //#region src/index.ts
 /** Register an eval definition with typed tag support. */
 function defineEval(definition) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ls-stack/agent-eval",
-  "version": "0.42.1",
+  "version": "0.42.3",
   "type": "module",
   "bin": {
     "agent-evals": "./dist/bin.mjs"
@@ -32,9 +32,9 @@
   "devDependencies": {
     "@types/node": "^24.7.2",
     "typescript": "^5.9.2",
+    "@agent-evals/sdk": "0.0.1",
     "@agent-evals/runner": "0.0.1",
-    "@agent-evals/shared": "0.0.1",
-    "@agent-evals/sdk": "0.0.1"
+    "@agent-evals/shared": "0.0.1"
   },
   "scripts": {
     "build": "pnpm --filter @agent-evals/web build && pnpm --filter @agent-evals/shared build && pnpm --filter @agent-evals/sdk build && pnpm --filter @agent-evals/runner build && tsdown --filter cli-js && tsdown --filter cli-types",

package/skills/agent-eval/SKILL.md CHANGED Viewed

@@ -490,10 +490,10 @@ Mental model:
 - Cache identity is the namespace plus the authored key. Source-file
   fingerprints are tracked for run freshness separately, but do not participate
   in cache-key hashing.
-- Cached spans require an explicit `cache.namespace`; value caches default to
-  `${evalId}__${name}` and can be overridden with `namespace`. Matching
-  namespaces share entries across operations/evals that use the same authored
-  key.
+- Cached spans require an explicit `cache.namespace`. Value caches can also set
+  an explicit `namespace`; prefer doing that when the cache is part of a
+  documented workflow. Matching namespaces share entries across operations/evals
+  that use the same authored key.
 - Per eval, `cache: { read?: boolean; store?: boolean }` controls whether
   authored cached operations may read or persist entries. Both default to
   `true`. Use `read: false` to always execute instead of replaying hits, and
@@ -536,10 +536,17 @@ Mental model:
 ## Artifacts
-Run output lives under `.agent-evals/runs/<run-id>/` and cache entries under
-`.agent-evals/cache/<eval-id>.json`. Files in a run directory include run
-metadata, a run summary, per-case results, and per-case trace JSON. Inspect
-these when debugging persisted output, costs, columns, traces, or failures.
+Run output lives under `.agent-evals/runs/<run-id>/`. Cache metadata lives under
+`.agent-evals/cache/`, grouped into runner-managed owner files. Do not rely on a
+specific cache filename when authoring evals; configure cache namespaces
+manually in eval code, then use `agent-evals cache list` or the UI Cache tab to
+inspect the persisted namespace/key entries. Files in a run directory include
+run metadata, a run summary, per-case results, and per-case trace JSON. Inspect
+run files when debugging persisted output, costs, columns, traces, or failures;
+inspect cache entries when debugging replayed span/value-cache results.
+Targeted evals in `run.json` are recorded by exact `evalKeys`
+(`filePath + evalId`) rather than authored eval ids, so duplicate eval ids stay
+unambiguous in saved history.
 Temporary runs use the same directory layout, but are removed before the next
 run of any kind starts.