@ls-stack/agent-eval 0.24.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-DYRmucgj.mjs → app-DS3j_AyX.mjs} +6 -3
- package/dist/apps/web/dist/assets/index-DNsZjOms.css +1 -0
- package/dist/apps/web/dist/assets/index-DqR1YaMG.js +118 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-Be0x8CS3.mjs → cli-ETfZ15RB.mjs} +151 -42
- package/dist/index.d.mts +61 -6
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +28 -18
- package/dist/{runOrchestration-D697g6Qe.mjs → runOrchestration-B31SV_Bq.mjs} +222 -87
- package/dist/{runner-jSujaSKt.mjs → runner-B2f2TEjp.mjs} +1 -1
- package/dist/{runner-B4SosWgD.mjs → runner-cj1TkR-H.mjs} +2 -2
- package/dist/src-CyNb2ycA.mjs +3 -0
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +6 -1
- package/dist/apps/web/dist/assets/index-KbbX3NYr.js +0 -118
- package/dist/apps/web/dist/assets/index-r0dVFK0B.css +0 -1
- package/dist/src-D6cettg0.mjs +0 -3
package/dist/runChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { B as runSummarySchema,
|
|
1
|
+
import { B as runSummarySchema, T as createRunRequestSchema, Tn as configureEvalRunLogs, b as loadConfig, gt as evalStatsConfigSchema, jt as evalChartsConfigSchema, ot as buildEvalKey, r as getTargetEvals$1, rn as columnDefSchema, t as executeRun, v as parseEvalDiscovery, w as createFsCacheStore, z as runManifestSchema } from "./runOrchestration-B31SV_Bq.mjs";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { readFile } from "node:fs/promises";
|
|
4
4
|
import { relative } from "node:path";
|
|
@@ -6,6 +6,7 @@ import { z } from "zod/v4";
|
|
|
6
6
|
import { glob } from "glob";
|
|
7
7
|
//#region ../runner/src/runChild.ts
|
|
8
8
|
const evalMetaSchema = z.object({
|
|
9
|
+
key: z.string(),
|
|
9
10
|
id: z.string(),
|
|
10
11
|
title: z.string().optional(),
|
|
11
12
|
filePath: z.string(),
|
|
@@ -36,8 +37,10 @@ function getConfiguredConcurrency(configConcurrency) {
|
|
|
36
37
|
return Math.max(1, Math.floor(configConcurrency));
|
|
37
38
|
}
|
|
38
39
|
function getTargetEvals(params) {
|
|
39
|
-
|
|
40
|
-
|
|
40
|
+
return getTargetEvals$1({
|
|
41
|
+
evals: params.evals.values(),
|
|
42
|
+
request: params.request
|
|
43
|
+
});
|
|
41
44
|
}
|
|
42
45
|
function toWorkspaceRelativePath(params) {
|
|
43
46
|
return relative(params.workspaceRoot, params.filePath).replaceAll("\\", "/");
|
|
@@ -55,21 +58,29 @@ async function discoverRunEvals(params) {
|
|
|
55
58
|
for (const filePath of discovered) {
|
|
56
59
|
const source = await readFile(filePath, "utf-8");
|
|
57
60
|
const sourceFingerprint = getSourceFingerprint(source);
|
|
58
|
-
const metas =
|
|
59
|
-
for (const meta of metas)
|
|
60
|
-
|
|
61
|
-
title: meta.title,
|
|
62
|
-
filePath: toWorkspaceRelativePath({
|
|
61
|
+
const metas = parseEvalDiscovery(filePath, source).metas;
|
|
62
|
+
for (const meta of metas) {
|
|
63
|
+
const relativeFilePath = toWorkspaceRelativePath({
|
|
63
64
|
filePath: meta.filePath,
|
|
64
65
|
workspaceRoot: params.workspaceRoot
|
|
65
|
-
})
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
66
|
+
});
|
|
67
|
+
const key = buildEvalKey({
|
|
68
|
+
filePath: relativeFilePath,
|
|
69
|
+
evalId: meta.id
|
|
70
|
+
});
|
|
71
|
+
evals.set(key, {
|
|
72
|
+
key,
|
|
73
|
+
id: meta.id,
|
|
74
|
+
title: meta.title,
|
|
75
|
+
filePath: relativeFilePath,
|
|
76
|
+
sourceFilePath: meta.filePath,
|
|
77
|
+
sourceFingerprint,
|
|
78
|
+
columnDefs: [],
|
|
79
|
+
caseCount: null
|
|
80
|
+
});
|
|
81
|
+
}
|
|
71
82
|
}
|
|
72
|
-
return [...evals.values()].toSorted((a, b) => a.filePath.localeCompare(b.filePath));
|
|
83
|
+
return [...evals.values()].toSorted((a, b) => a.filePath.localeCompare(b.filePath) || a.id.localeCompare(b.id));
|
|
73
84
|
}
|
|
74
85
|
async function readContext(contextPath) {
|
|
75
86
|
if (contextPath === void 0) throw new Error("Missing run child context path");
|
|
@@ -93,7 +104,7 @@ async function main() {
|
|
|
93
104
|
config,
|
|
94
105
|
workspaceRoot: context.workspaceRoot
|
|
95
106
|
});
|
|
96
|
-
const evals = new Map(evalMetas.map((evalMeta) => [evalMeta.
|
|
107
|
+
const evals = new Map(evalMetas.map((evalMeta) => [evalMeta.key, evalMeta]));
|
|
97
108
|
const lastRunStatusMap = /* @__PURE__ */ new Map();
|
|
98
109
|
const latestRunInfoMap = /* @__PURE__ */ new Map();
|
|
99
110
|
await executeRun({
|
|
@@ -108,7 +119,6 @@ async function main() {
|
|
|
108
119
|
request: context.request,
|
|
109
120
|
runDir: context.runDir,
|
|
110
121
|
config,
|
|
111
|
-
evals,
|
|
112
122
|
cacheStore,
|
|
113
123
|
lastRunStatusMap,
|
|
114
124
|
latestRunInfoMap,
|
|
@@ -123,7 +133,7 @@ async function main() {
|
|
|
123
133
|
workspaceRoot: context.workspaceRoot,
|
|
124
134
|
getSourceFingerprint,
|
|
125
135
|
getConfiguredConcurrency: () => getConfiguredConcurrency(config.concurrency),
|
|
126
|
-
getSortedEvalMetas: () => [...evals.values()].toSorted((a, b) => a.filePath.localeCompare(b.filePath)),
|
|
136
|
+
getSortedEvalMetas: () => [...evals.values()].toSorted((a, b) => a.filePath.localeCompare(b.filePath) || a.id.localeCompare(b.id)),
|
|
127
137
|
getTargetEvals: (request) => getTargetEvals({
|
|
128
138
|
evals,
|
|
129
139
|
request
|