@ls-stack/agent-eval 0.17.0 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-DTotEBoY.mjs → app-hAlVvT-Q.mjs} +23 -4
- package/dist/apps/web/dist/assets/index-C761goIh.css +1 -0
- package/dist/apps/web/dist/assets/index-DS552a3u.js +118 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-CULTt3Xp.mjs → cli-3zANEAhG.mjs} +3 -3
- package/dist/index.d.mts +63 -350
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-D2okEB3I.mjs → runOrchestration-BBg_VUH5.mjs} +416 -1710
- package/dist/{runner-DyM0Gp8G.mjs → runner-DxlahWDo.mjs} +1 -1
- package/dist/{runner-BSXZiQIi.mjs → runner-RmZPRz-h.mjs} +2 -2
- package/dist/src-BC4OrajN.mjs +3 -0
- package/package.json +1 -1
- package/skills/agent-eval/SKILL.md +8 -4
- package/dist/apps/web/dist/assets/index-C5IRkeUz.js +0 -118
- package/dist/apps/web/dist/assets/index-Cn9WoTj5.css +0 -1
- package/dist/src-CNf3xwVw.mjs +0 -3
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-RmZPRz-h.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { n as createRunner } from "./cli-3zANEAhG.mjs";
|
|
2
|
+
import "./src-BC4OrajN.mjs";
|
|
3
3
|
//#region ../../apps/server/src/runner.ts
|
|
4
4
|
let runnerInstance = null;
|
|
5
5
|
function getRunnerInstance() {
|
package/package.json
CHANGED
|
@@ -55,8 +55,9 @@ a per-case sequence number, and throws outside an eval case scope.
|
|
|
55
55
|
Use `evalLog(level, ...args)` for intentional per-case logs. The runner also
|
|
56
56
|
captures `console.log`, `console.info`, `console.warn`, and `console.error`
|
|
57
57
|
during case-owned phases by default; log arguments are stored as JSON-safe
|
|
58
|
-
values and rendered with the JSON viewer, collapsed previews
|
|
59
|
-
|
|
58
|
+
values and rendered with the JSON viewer, collapsed previews include best-effort
|
|
59
|
+
code locations when stack data is available, previews are capped, and logs
|
|
60
|
+
inside cached operations are not replayed from cache hits.
|
|
60
61
|
|
|
61
62
|
### Product code (instrumented once, reused everywhere)
|
|
62
63
|
|
|
@@ -158,7 +159,9 @@ events, use `evalTracer.startSpan(...)`, `evalTracer.updateSpan(...)`,
|
|
|
158
159
|
`evalTracer.endSpan(...)`, or `evalTracer.recordSpan(...)` to translate those
|
|
159
160
|
events into the eval trace tree without wrapping the upstream work in a
|
|
160
161
|
callback. Pass the upstream span id and parent id when available so the UI keeps
|
|
161
|
-
the original hierarchy.
|
|
162
|
+
the original hierarchy. The Trace tab can switch between that recorded hierarchy
|
|
163
|
+
and UI-only timeline nesting for flat exported traces; saved trace JSON and
|
|
164
|
+
`deriveFromTracing` continue to use the recorded parent ids.
|
|
162
165
|
|
|
163
166
|
### Eval file (thin)
|
|
164
167
|
|
|
@@ -341,7 +344,8 @@ Mental model:
|
|
|
341
344
|
- Authored raw cache keys are stored for debugging under
|
|
342
345
|
`.agent-evals/cache-debug/<owner>.json`. This folder may include prompts,
|
|
343
346
|
user inputs, or other sensitive data, should be gitignored, and is not needed
|
|
344
|
-
for cache reuse. The UI Cache
|
|
347
|
+
for cache reuse. The UI Cache tab shows the raw key when it is available and
|
|
348
|
+
can be filtered to hits or new entries added by cache misses/refreshes.
|
|
345
349
|
- Cached payloads use advance serialization/deserialization with the Web API plugin set, so return values and
|
|
346
350
|
recorded SDK effects preserve richer built-ins such as `Date`, `Map`, `Set`,
|
|
347
351
|
typed arrays, `URL`, `Headers`, `Blob`, and `File` on hits. Cache keys still
|