@ls-stack/agent-eval 0.40.0 → 0.41.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-0k8Y1OBk.mjs → app-1vE5Ryry.mjs} +5 -5
- package/dist/apps/web/dist/assets/index-DKfAipoE.js +140 -0
- package/dist/apps/web/dist/assets/{index-C58_zLA9.css → index-pKAZgRwO.css} +1 -1
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-Sg8UdOnm.mjs → cli-Bk5g-bat.mjs} +43 -11
- package/dist/index.d.mts +3118 -3452
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +5 -3
- package/dist/{runOrchestration-CTzVNrDP.mjs → runOrchestration-DhTiT4V0.mjs} +4635 -4262
- package/dist/{runner-njK_CtXC.mjs → runner-B1Cyevvr.mjs} +1 -1
- package/dist/{runner-Bb9JdFkg.mjs → runner-BG0L4yId.mjs} +2 -2
- package/dist/src-t6OVp1li.mjs +13 -0
- package/package.json +1 -1
- package/skills/agent-eval/SKILL.md +21 -1
- package/dist/apps/web/dist/assets/index-C4v6dWcv.js +0 -140
- package/dist/src-BZzPFS8r.mjs +0 -3
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-BG0L4yId.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { n as createRunner } from "./cli-Bk5g-bat.mjs";
|
|
2
|
+
import "./src-t6OVp1li.mjs";
|
|
3
3
|
//#region ../../apps/server/src/runner.ts
|
|
4
4
|
let runnerInstance = null;
|
|
5
5
|
function getRunnerInstance() {
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { Lt as defineEval$1, rt as matchesEvalTags$1 } from "./runOrchestration-DhTiT4V0.mjs";
|
|
2
|
+
import "./cli-Bk5g-bat.mjs";
|
|
3
|
+
//#region src/index.ts
|
|
4
|
+
/** Register an eval definition with typed tag support. */
|
|
5
|
+
function defineEval(definition) {
|
|
6
|
+
defineEval$1(definition);
|
|
7
|
+
}
|
|
8
|
+
/** Return whether the active eval case has tags matching the typed input. */
|
|
9
|
+
function matchesEvalTags(input) {
|
|
10
|
+
return matchesEvalTags$1(input);
|
|
11
|
+
}
|
|
12
|
+
//#endregion
|
|
13
|
+
export { matchesEvalTags as n, defineEval as t };
|
package/package.json
CHANGED
|
@@ -25,7 +25,8 @@ display rules), read the TypeScript declarations shipped with the package:
|
|
|
25
25
|
- The CLI automatically loads `.env` from the current workspace. Shell-provided
|
|
26
26
|
environment variables win; pass `--no-env` to disable `.env` loading once.
|
|
27
27
|
- Unfiltered `agent-evals run` is disabled by default; use `--eval` or `--case`
|
|
28
|
-
for targeted CLI runs
|
|
28
|
+
for targeted CLI runs, or `--tags-filter <expr>` to run cases matching tags.
|
|
29
|
+
Set `allowCliRunAll: true` in
|
|
29
30
|
`agent-evals.config.ts` to opt into run-all CLI behavior. The web UI can
|
|
30
31
|
still run grouped evals and confirms before starting more than five. On a
|
|
31
32
|
single eval page, the Run chevron can open a picker to run specific authored
|
|
@@ -69,6 +70,25 @@ during case-owned phases by default; log arguments are stored as JSON-safe
|
|
|
69
70
|
values and rendered with the JSON viewer, collapsed previews include best-effort
|
|
70
71
|
code locations when stack data is available, previews are capped, and logs
|
|
71
72
|
inside cached operations are not replayed from cache hits.
|
|
73
|
+
Use eval tags to target related coverage without naming every case:
|
|
74
|
+
`AgentEvalsConfig.tags` applies workspace-wide tags, `defineEval({ tags })`
|
|
75
|
+
adds eval tags, `case.tags` adds case-only tags, and `removeTags` disables a
|
|
76
|
+
configured global tag for one eval. CLI filters support Vitest-style tag
|
|
77
|
+
expressions such as `agent-evals run --tags-filter "refunds && !slow"`.
|
|
78
|
+
Inside eval-scoped code, use `matchesEvalTags('tag')` or
|
|
79
|
+
`matchesEvalTags({ all, any, not })`; it uses typed exact tag names and returns
|
|
80
|
+
`false` outside a case scope. Projects can narrow tag names with a `.d.ts`
|
|
81
|
+
module augmentation:
|
|
82
|
+
|
|
83
|
+
```ts
|
|
84
|
+
import '@ls-stack/agent-eval';
|
|
85
|
+
|
|
86
|
+
declare module '@ls-stack/agent-eval' {
|
|
87
|
+
interface AgentEvalTagRegistry {
|
|
88
|
+
tags: 'refunds' | 'media' | 'manual' | 'slow';
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
```
|
|
72
92
|
|
|
73
93
|
### Product code (instrumented once, reused everywhere)
|
|
74
94
|
|