npm - @ls-stack/agent-eval - Versions diffs - 0.40.0 → 0.41.0 - Mend

@ls-stack/agent-eval 0.40.0 → 0.41.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/{app-0k8Y1OBk.mjs → app-1vE5Ryry.mjs} +5 -5
package/dist/apps/web/dist/assets/index-DKfAipoE.js +140 -0
package/dist/apps/web/dist/assets/{index-C58_zLA9.css → index-pKAZgRwO.css} +1 -1
package/dist/apps/web/dist/index.html +2 -2
package/dist/bin.mjs +1 -1
package/dist/{cli-Sg8UdOnm.mjs → cli-Bk5g-bat.mjs} +43 -11
package/dist/index.d.mts +3118 -3452
package/dist/index.mjs +4 -4
package/dist/runChild.mjs +5 -3
package/dist/{runOrchestration-CTzVNrDP.mjs → runOrchestration-DhTiT4V0.mjs} +4635 -4262
package/dist/{runner-njK_CtXC.mjs → runner-B1Cyevvr.mjs} +1 -1
package/dist/{runner-Bb9JdFkg.mjs → runner-BG0L4yId.mjs} +2 -2
package/dist/src-t6OVp1li.mjs +13 -0
package/package.json +1 -1
package/skills/agent-eval/SKILL.md +21 -1
package/dist/apps/web/dist/assets/index-C4v6dWcv.js +0 -140
package/dist/src-BZzPFS8r.mjs +0 -3

package/dist/{runner-njK_CtXC.mjs → runner-B1Cyevvr.mjs} RENAMED Viewed

@@ -1,2 +1,2 @@
-import { n as initRunner, t as getRunnerInstance } from "./runner-Bb9JdFkg.mjs";
+import { n as initRunner, t as getRunnerInstance } from "./runner-BG0L4yId.mjs";
 export { getRunnerInstance, initRunner };

package/dist/{runner-Bb9JdFkg.mjs → runner-BG0L4yId.mjs} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { n as createRunner } from "./cli-Sg8UdOnm.mjs";
-import "./src-BZzPFS8r.mjs";
+import { n as createRunner } from "./cli-Bk5g-bat.mjs";
+import "./src-t6OVp1li.mjs";
 //#region ../../apps/server/src/runner.ts
 let runnerInstance = null;
 function getRunnerInstance() {

package/dist/src-t6OVp1li.mjs ADDED Viewed

@@ -0,0 +1,13 @@
+import { Lt as defineEval$1, rt as matchesEvalTags$1 } from "./runOrchestration-DhTiT4V0.mjs";
+import "./cli-Bk5g-bat.mjs";
+//#region src/index.ts
+/** Register an eval definition with typed tag support. */
+function defineEval(definition) {
+	defineEval$1(definition);
+}
+/** Return whether the active eval case has tags matching the typed input. */
+function matchesEvalTags(input) {
+	return matchesEvalTags$1(input);
+}
+//#endregion
+export { matchesEvalTags as n, defineEval as t };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ls-stack/agent-eval",
-  "version": "0.40.0",
+  "version": "0.41.0",
   "type": "module",
   "bin": {
     "agent-evals": "./dist/bin.mjs"

package/skills/agent-eval/SKILL.md CHANGED Viewed

@@ -25,7 +25,8 @@ display rules), read the TypeScript declarations shipped with the package:
 - The CLI automatically loads `.env` from the current workspace. Shell-provided
   environment variables win; pass `--no-env` to disable `.env` loading once.
 - Unfiltered `agent-evals run` is disabled by default; use `--eval` or `--case`
-  for targeted CLI runs. Set `allowCliRunAll: true` in
+  for targeted CLI runs, or `--tags-filter <expr>` to run cases matching tags.
+  Set `allowCliRunAll: true` in
   `agent-evals.config.ts` to opt into run-all CLI behavior. The web UI can
   still run grouped evals and confirms before starting more than five. On a
   single eval page, the Run chevron can open a picker to run specific authored
@@ -69,6 +70,25 @@ during case-owned phases by default; log arguments are stored as JSON-safe
 values and rendered with the JSON viewer, collapsed previews include best-effort
 code locations when stack data is available, previews are capped, and logs
 inside cached operations are not replayed from cache hits.
+Use eval tags to target related coverage without naming every case:
+`AgentEvalsConfig.tags` applies workspace-wide tags, `defineEval({ tags })`
+adds eval tags, `case.tags` adds case-only tags, and `removeTags` disables a
+configured global tag for one eval. CLI filters support Vitest-style tag
+expressions such as `agent-evals run --tags-filter "refunds && !slow"`.
+Inside eval-scoped code, use `matchesEvalTags('tag')` or
+`matchesEvalTags({ all, any, not })`; it uses typed exact tag names and returns
+`false` outside a case scope. Projects can narrow tag names with a `.d.ts`
+module augmentation:
+```ts
+import '@ls-stack/agent-eval';
+declare module '@ls-stack/agent-eval' {
+  interface AgentEvalTagRegistry {
+    tags: 'refunds' | 'media' | 'manual' | 'slow';
+  }
+}
+```
 ### Product code (instrumented once, reused everywhere)