@ls-stack/agent-eval 0.41.0 → 0.42.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/apps/web/dist/assets/index-5CB9eJZy.js +140 -0
- package/dist/apps/web/dist/assets/index-eFM9VIsz.css +1 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/index.d.mts +91 -91
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +3 -0
- package/dist/apps/web/dist/assets/index-DKfAipoE.js +0 -140
- package/dist/apps/web/dist/assets/index-pKAZgRwO.css +0 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ls-stack/agent-eval",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.42.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"bin": {
|
|
6
6
|
"agent-evals": "./dist/bin.mjs"
|
|
@@ -33,8 +33,8 @@
|
|
|
33
33
|
"@types/node": "^24.7.2",
|
|
34
34
|
"typescript": "^5.9.2",
|
|
35
35
|
"@agent-evals/runner": "0.0.1",
|
|
36
|
-
"@agent-evals/
|
|
37
|
-
"@agent-evals/
|
|
36
|
+
"@agent-evals/shared": "0.0.1",
|
|
37
|
+
"@agent-evals/sdk": "0.0.1"
|
|
38
38
|
},
|
|
39
39
|
"scripts": {
|
|
40
40
|
"build": "pnpm --filter @agent-evals/web build && pnpm --filter @agent-evals/shared build && pnpm --filter @agent-evals/sdk build && pnpm --filter @agent-evals/runner build && tsdown --filter cli-js && tsdown --filter cli-types",
|
|
@@ -353,6 +353,9 @@ See `EvalScoreDef` / `EvalManualScoreDef` in the types for the full shape
|
|
|
353
353
|
column from the runs table when every rendered row is missing the value,
|
|
354
354
|
`null`, or an empty string; `0` and `false` still count as values, and the
|
|
355
355
|
value remains available in case details and raw output data.
|
|
356
|
+
In the case detail Output tab, string outputs that look like Markdown render
|
|
357
|
+
as Markdown even without `format: 'markdown'`, with a Preview/Raw toggle for
|
|
358
|
+
inspecting the original text.
|
|
356
359
|
- `deriveFromTracing` can be authored globally in `agent-evals.config.ts` or
|
|
357
360
|
locally on one eval. Prefer the keyed map form for shared metrics:
|
|
358
361
|
`deriveFromTracing: { toolCalls: ({ trace }) => trace.findSpansByKind('tool').length }`.
|