@ls-stack/agent-eval 0.60.0 → 0.60.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-opbcrpvt.mjs → app-CxKEVlng.mjs} +3 -3
- package/dist/apps/web/dist/assets/{index-Dowobz-z.js → index-CM6MDNqo.js} +73 -73
- package/dist/apps/web/dist/index.html +1 -1
- package/dist/bin.mjs +1 -1
- package/dist/{cli-FOyPC8UD.mjs → cli-CVBSlTD8.mjs} +70 -25
- package/dist/index.d.mts +121 -112
- package/dist/index.mjs +2 -2
- package/dist/{runner-CIxj7jYj.mjs → runner-Cu1CQPTB.mjs} +1 -1
- package/dist/runner-DzDRasWV.mjs +15 -0
- package/dist/{src-p-GRSVDb.mjs → src-DjOTPnDz.mjs} +1 -1
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +4 -3
- package/dist/runner-Dv5cseOt.mjs +0 -15
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
import { $ as setScopeCacheContext, A as repoFile, B as evalTime, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as EvalRuntimeUsageError, Ft as getEvalRegistry, H as getEvalCaseInput, I as appendToEvalOutput, J as runInEvalRuntimeScope, K as mergeEvalOutput, M as readManualInputFile, N as evalExpect, O as serializeCacheRecording, P as EvalAssertionError, Q as setEvalOutput, R as evalAssert, S as evalSpan, T as hashCacheKeySync, U as incrementEvalOutput, V as getCurrentScope, W as isInEvalScope, X as runInExistingEvalScope, Y as runInEvalScope, at as extractApiCalls, b as buildTraceTree, ct as simulateTokenAllocation, et as startEvalBackgroundJob, it as extractCacheHits, j as manualInputFileValueSchema, k as serializeCacheValue, ot as extractLlmCalls, q as nextEvalId, rt as extractCacheEntries, st as simulateLlmCallCost, ut as getNestedAttribute, w as hashCacheKey, x as captureEvalSpanError, z as evalLog } from "./runExecution-CjWJUUZ5.mjs";
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import { n as matchesEvalTags, t as defineEval } from "./src-
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CVBSlTD8.mjs";
|
|
3
|
+
import { n as matchesEvalTags, t as defineEval } from "./src-DjOTPnDz.mjs";
|
|
4
4
|
export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob };
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-DzDRasWV.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { n as createRunner } from "./cli-CVBSlTD8.mjs";
|
|
2
|
+
import "./src-DjOTPnDz.mjs";
|
|
3
|
+
//#region ../../apps/server/src/runner.ts
|
|
4
|
+
let runnerInstance = null;
|
|
5
|
+
function getRunnerInstance({ loadEnv = true } = {}) {
|
|
6
|
+
if (!runnerInstance) runnerInstance = createRunner({ loadEnv });
|
|
7
|
+
return runnerInstance;
|
|
8
|
+
}
|
|
9
|
+
async function initRunner(options = {}) {
|
|
10
|
+
const runner = getRunnerInstance(options);
|
|
11
|
+
await runner.init();
|
|
12
|
+
return runner;
|
|
13
|
+
}
|
|
14
|
+
//#endregion
|
|
15
|
+
export { initRunner as n, getRunnerInstance as t };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { G as matchesEvalTags$1, Pt as defineEval$1 } from "./runExecution-CjWJUUZ5.mjs";
|
|
2
|
-
import "./cli-
|
|
2
|
+
import "./cli-CVBSlTD8.mjs";
|
|
3
3
|
//#region src/index.ts
|
|
4
4
|
/** Register an eval definition with typed tag support. */
|
|
5
5
|
function defineEval(definition) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ls-stack/agent-eval",
|
|
3
|
-
"version": "0.60.
|
|
3
|
+
"version": "0.60.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"bin": {
|
|
6
6
|
"agent-evals": "./dist/bin.mjs"
|
|
@@ -32,9 +32,9 @@
|
|
|
32
32
|
"devDependencies": {
|
|
33
33
|
"@types/node": "^24.7.2",
|
|
34
34
|
"typescript": "^5.9.2",
|
|
35
|
-
"@agent-evals/
|
|
35
|
+
"@agent-evals/sdk": "0.0.1",
|
|
36
36
|
"@agent-evals/shared": "0.0.1",
|
|
37
|
-
"@agent-evals/
|
|
37
|
+
"@agent-evals/runner": "0.0.1"
|
|
38
38
|
},
|
|
39
39
|
"scripts": {
|
|
40
40
|
"build": "pnpm --filter @agent-evals/web build && pnpm --filter @agent-evals/shared build && pnpm --filter @agent-evals/sdk build && pnpm --filter @agent-evals/runner build && tsdown --filter cli-js && tsdown --filter cli-types",
|
|
@@ -34,9 +34,10 @@ display rules), read the TypeScript declarations shipped with the package:
|
|
|
34
34
|
it before the next run starts. Temporary runs appear in `show-runs` while
|
|
35
35
|
present; normal runs are never deleted by temporary-run cleanup. In the app,
|
|
36
36
|
the run drawer can promote a temporary run to durable history.
|
|
37
|
-
- `agent-evals app` watches `agent-evals.config.ts` and
|
|
38
|
-
place when the runner is idle. If config
|
|
39
|
-
reload applies after the current run
|
|
37
|
+
- `agent-evals app` watches `agent-evals.config.ts` and the workspace `.env`
|
|
38
|
+
and reloads them in place when the runner is idle. If config or `.env`
|
|
39
|
+
changes during an active run, the reload applies after the current run
|
|
40
|
+
reaches a terminal state.
|
|
40
41
|
- App-triggered runs log the queued target evals, resolved case concurrency,
|
|
41
42
|
each case start for evals that are actually running, and the terminal run
|
|
42
43
|
summary in the server terminal.
|
package/dist/runner-Dv5cseOt.mjs
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-FOyPC8UD.mjs";
|
|
2
|
-
import "./src-p-GRSVDb.mjs";
|
|
3
|
-
//#region ../../apps/server/src/runner.ts
|
|
4
|
-
let runnerInstance = null;
|
|
5
|
-
function getRunnerInstance() {
|
|
6
|
-
if (!runnerInstance) runnerInstance = createRunner();
|
|
7
|
-
return runnerInstance;
|
|
8
|
-
}
|
|
9
|
-
async function initRunner() {
|
|
10
|
-
const runner = getRunnerInstance();
|
|
11
|
-
await runner.init();
|
|
12
|
-
return runner;
|
|
13
|
-
}
|
|
14
|
-
//#endregion
|
|
15
|
-
export { initRunner as n, getRunnerInstance as t };
|