@ls-stack/agent-eval 0.40.0 → 0.41.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-Bb9JdFkg.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-BG0L4yId.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-Sg8UdOnm.mjs";
2
- import "./src-BZzPFS8r.mjs";
1
+ import { n as createRunner } from "./cli-Bk5g-bat.mjs";
2
+ import "./src-t6OVp1li.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance() {
@@ -0,0 +1,13 @@
1
+ import { Lt as defineEval$1, rt as matchesEvalTags$1 } from "./runOrchestration-DhTiT4V0.mjs";
2
+ import "./cli-Bk5g-bat.mjs";
3
+ //#region src/index.ts
4
+ /** Register an eval definition with typed tag support. */
5
+ function defineEval(definition) {
6
+ defineEval$1(definition);
7
+ }
8
+ /** Return whether the active eval case has tags matching the typed input. */
9
+ function matchesEvalTags(input) {
10
+ return matchesEvalTags$1(input);
11
+ }
12
+ //#endregion
13
+ export { matchesEvalTags as n, defineEval as t };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.40.0",
3
+ "version": "0.41.0",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -25,7 +25,8 @@ display rules), read the TypeScript declarations shipped with the package:
25
25
  - The CLI automatically loads `.env` from the current workspace. Shell-provided
26
26
  environment variables win; pass `--no-env` to disable `.env` loading once.
27
27
  - Unfiltered `agent-evals run` is disabled by default; use `--eval` or `--case`
28
- for targeted CLI runs. Set `allowCliRunAll: true` in
28
+ for targeted CLI runs, or `--tags-filter <expr>` to run cases matching tags.
29
+ Set `allowCliRunAll: true` in
29
30
  `agent-evals.config.ts` to opt into run-all CLI behavior. The web UI can
30
31
  still run grouped evals and confirms before starting more than five. On a
31
32
  single eval page, the Run chevron can open a picker to run specific authored
@@ -69,6 +70,25 @@ during case-owned phases by default; log arguments are stored as JSON-safe
69
70
  values and rendered with the JSON viewer, collapsed previews include best-effort
70
71
  code locations when stack data is available, previews are capped, and logs
71
72
  inside cached operations are not replayed from cache hits.
73
+ Use eval tags to target related coverage without naming every case:
74
+ `AgentEvalsConfig.tags` applies workspace-wide tags, `defineEval({ tags })`
75
+ adds eval tags, `case.tags` adds case-only tags, and `removeTags` disables a
76
+ configured global tag for one eval. CLI filters support Vitest-style tag
77
+ expressions such as `agent-evals run --tags-filter "refunds && !slow"`.
78
+ Inside eval-scoped code, use `matchesEvalTags('tag')` or
79
+ `matchesEvalTags({ all, any, not })`; it uses typed exact tag names and returns
80
+ `false` outside a case scope. Projects can narrow tag names with a `.d.ts`
81
+ module augmentation:
82
+
83
+ ```ts
84
+ import '@ls-stack/agent-eval';
85
+
86
+ declare module '@ls-stack/agent-eval' {
87
+ interface AgentEvalTagRegistry {
88
+ tags: 'refunds' | 'media' | 'manual' | 'slow';
89
+ }
90
+ }
91
+ ```
72
92
 
73
93
  ### Product code (instrumented once, reused everywhere)
74
94