@ls-stack/agent-eval 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-C5CJ1sX6.mjs → app-TjV5nDMM.mjs} +5 -5
- package/dist/apps/web/dist/assets/index-ClE28i5w.css +1 -0
- package/dist/apps/web/dist/assets/index-gGumCEnD.js +112 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +39 -6
- package/dist/cli-BTtgQLjB.mjs +1285 -0
- package/dist/index.d.mts +1072 -829
- package/dist/index.mjs +4 -3
- package/dist/runChild.d.mts +1 -0
- package/dist/runChild.mjs +107 -0
- package/dist/{cli-C5FL7C4G.mjs → runOrchestration-HaMahl6b.mjs} +1216 -1697
- package/dist/{runner-Cdlvk56X.mjs → runner-CBDZos0Z.mjs} +1 -1
- package/dist/{runner-K2bN8KRS.mjs → runner-DGVoOyJt.mjs} +2 -2
- package/dist/src-Bt5Fz9HS.mjs +3 -0
- package/package.json +3 -2
- package/dist/apps/web/dist/assets/index-CBvHVkE7.js +0 -109
- package/dist/apps/web/dist/assets/index-Dd7I28ts.css +0 -1
- package/dist/src-gqm1z1Nu.mjs +0 -2
package/dist/index.mjs
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
-
import { $ as
|
|
2
|
-
import "./
|
|
3
|
-
|
|
1
|
+
import { $ as evalChartAxisSchema, A as runManifestSchema, At as evalTracer, B as cacheRecordingSchema, Bt as mergeEvalOutput, C as updateManualScoreRequestSchema, Ct as numberDisplayOptionsSchema, D as deriveScopedSummaryFromCases, Dt as buildTraceTree, E as getEvalDisplayStatus, Et as z, F as cacheFileSchema, Ft as evalAssert, G as caseRowSchema, Gt as defineEval, H as spanCacheOptionsSchema, Ht as setEvalOutput, I as cacheListItemSchema, It as getCurrentScope, J as evalStatItemSchema, K as evalFreshnessStatusSchema, Kt as getEvalRegistry, L as cacheModeSchema, Lt as getEvalCaseInput, M as agentEvalsConfigSchema, Mt as hashCacheKeySync, N as trialSelectionModeSchema, Nt as EvalAssertionError, O as deriveStatusFromCaseRows, Ot as captureEvalSpanError, P as cacheEntrySchema, Pt as appendToEvalOutput, Q as evalChartAggregateSchema, R as cacheOperationTypeSchema, Rt as incrementEvalOutput, S as createRunRequestSchema, St as jsonCellSchema, T as getEvalTitle, Tt as runArtifactRefSchema, U as assertionFailureSchema, Ut as setScopeCacheContext, V as serializedCacheSpanSchema, Vt as runInEvalScope, W as caseDetailSchema, Wt as repoFile, X as evalSummarySchema, Y as evalStatsConfigSchema, Z as scoreTraceSchema, _t as cellValueSchema, at as evalChartTypeSchema, bt as columnKindSchema, ct as traceAttributeDisplayInputSchema, dt as traceDisplayConfigSchema, et as evalChartBuiltinMetricSchema, ft as traceDisplayInputConfigSchema, gt as traceSpanWarningSchema, ht as traceSpanSchema, it as evalChartTooltipExtraSchema, j as runSummarySchema, jt as hashCacheKey, k as deriveStatusFromChildStatuses, kt as evalSpan, lt as traceAttributeDisplayPlacementSchema, mt as traceSpanKindSchema, nt as evalChartConfigSchema, ot as evalChartsConfigSchema, pt as traceSpanErrorSchema, q as evalStatAggregateSchema, rt as evalChartMetricSchema, st as traceAttributeDisplayFormatSchema, tt as evalChartColorSchema, ut as traceAttributeDisplaySchema, vt as columnDefSchema, w as sseEnvelopeSchema, wt as repoFileRefSchema, xt as fileRefSchema, yt as columnFormatSchema, z as cacheRecordingOpSchema, zt as isInEvalScope } from "./runOrchestration-HaMahl6b.mjs";
|
|
2
|
+
import { n as createRunner, t as runCli } from "./cli-BTtgQLjB.mjs";
|
|
3
|
+
import "./src-Bt5Fz9HS.mjs";
|
|
4
|
+
export { EvalAssertionError, agentEvalsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, mergeEvalOutput, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { };
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { A as runManifestSchema, S as createRunRequestSchema, Y as evalStatsConfigSchema, j as runSummarySchema, ot as evalChartsConfigSchema, t as executeRun, v as loadConfig, vt as columnDefSchema, x as createFsCacheStore } from "./runOrchestration-HaMahl6b.mjs";
|
|
2
|
+
import { createHash } from "node:crypto";
|
|
3
|
+
import { readFile } from "node:fs/promises";
|
|
4
|
+
import { z } from "zod/v4";
|
|
5
|
+
//#region ../runner/src/runChild.ts
|
|
6
|
+
const evalMetaSchema = z.object({
|
|
7
|
+
id: z.string(),
|
|
8
|
+
title: z.string().optional(),
|
|
9
|
+
filePath: z.string(),
|
|
10
|
+
sourceFilePath: z.string(),
|
|
11
|
+
sourceFingerprint: z.string().nullable(),
|
|
12
|
+
columnDefs: z.array(columnDefSchema),
|
|
13
|
+
caseCount: z.number().nullable(),
|
|
14
|
+
stats: evalStatsConfigSchema.optional(),
|
|
15
|
+
charts: evalChartsConfigSchema.optional()
|
|
16
|
+
});
|
|
17
|
+
const runChildContextSchema = z.object({
|
|
18
|
+
request: createRunRequestSchema,
|
|
19
|
+
workspaceRoot: z.string(),
|
|
20
|
+
runDir: z.string(),
|
|
21
|
+
manifest: runManifestSchema,
|
|
22
|
+
summary: runSummarySchema,
|
|
23
|
+
evals: z.array(evalMetaSchema)
|
|
24
|
+
});
|
|
25
|
+
function sendMessage(message) {
|
|
26
|
+
if (process.send === void 0) return;
|
|
27
|
+
process.send(message);
|
|
28
|
+
}
|
|
29
|
+
function getSourceFingerprint(source) {
|
|
30
|
+
return createHash("sha256").update(source).digest("hex");
|
|
31
|
+
}
|
|
32
|
+
function getConfiguredConcurrency(configConcurrency) {
|
|
33
|
+
if (typeof configConcurrency !== "number" || !Number.isFinite(configConcurrency)) return 1;
|
|
34
|
+
return Math.max(1, Math.floor(configConcurrency));
|
|
35
|
+
}
|
|
36
|
+
function getTargetEvals(params) {
|
|
37
|
+
if (params.request.target.evalIds && params.request.target.evalIds.length > 0) return params.request.target.evalIds.map((id) => params.evals.get(id)).filter((entry) => entry !== void 0);
|
|
38
|
+
return [...params.evals.values()].toSorted((a, b) => a.filePath.localeCompare(b.filePath));
|
|
39
|
+
}
|
|
40
|
+
async function readContext(contextPath) {
|
|
41
|
+
if (contextPath === void 0) throw new Error("Missing run child context path");
|
|
42
|
+
return runChildContextSchema.parse(JSON.parse(await readFile(contextPath, "utf-8")));
|
|
43
|
+
}
|
|
44
|
+
async function main() {
|
|
45
|
+
process.on("disconnect", () => {
|
|
46
|
+
process.exit(1);
|
|
47
|
+
});
|
|
48
|
+
const context = await readContext(process.argv[2]);
|
|
49
|
+
process.chdir(context.workspaceRoot);
|
|
50
|
+
const config = await loadConfig();
|
|
51
|
+
const cacheStore = createFsCacheStore({
|
|
52
|
+
workspaceRoot: context.workspaceRoot,
|
|
53
|
+
dir: config.cache?.dir,
|
|
54
|
+
maxEntriesPerEval: config.cache?.maxEntriesPerEval
|
|
55
|
+
});
|
|
56
|
+
const evals = new Map(context.evals.map((evalMeta) => [evalMeta.id, evalMeta]));
|
|
57
|
+
const lastRunStatusMap = /* @__PURE__ */ new Map();
|
|
58
|
+
const latestRunInfoMap = /* @__PURE__ */ new Map();
|
|
59
|
+
await executeRun({
|
|
60
|
+
runState: {
|
|
61
|
+
runDir: context.runDir,
|
|
62
|
+
manifest: context.manifest,
|
|
63
|
+
summary: context.summary,
|
|
64
|
+
cases: [],
|
|
65
|
+
caseDetails: /* @__PURE__ */ new Map(),
|
|
66
|
+
listeners: /* @__PURE__ */ new Set()
|
|
67
|
+
},
|
|
68
|
+
request: context.request,
|
|
69
|
+
runDir: context.runDir,
|
|
70
|
+
config,
|
|
71
|
+
evals,
|
|
72
|
+
cacheStore,
|
|
73
|
+
lastRunStatusMap,
|
|
74
|
+
latestRunInfoMap,
|
|
75
|
+
emitEvent(_runState, event) {
|
|
76
|
+
if (event.type === "case.finished") return;
|
|
77
|
+
sendMessage({
|
|
78
|
+
type: "event",
|
|
79
|
+
event
|
|
80
|
+
});
|
|
81
|
+
},
|
|
82
|
+
emitDiscoveryEvent() {},
|
|
83
|
+
workspaceRoot: context.workspaceRoot,
|
|
84
|
+
getSourceFingerprint,
|
|
85
|
+
getConfiguredConcurrency: () => getConfiguredConcurrency(config.concurrency),
|
|
86
|
+
getSortedEvalMetas: () => [...evals.values()].toSorted((a, b) => a.filePath.localeCompare(b.filePath)),
|
|
87
|
+
getTargetEvals: (request) => getTargetEvals({
|
|
88
|
+
evals,
|
|
89
|
+
request
|
|
90
|
+
}),
|
|
91
|
+
onCaseFinished(caseDetail, caseRow) {
|
|
92
|
+
sendMessage({
|
|
93
|
+
type: "case.finished",
|
|
94
|
+
caseDetail,
|
|
95
|
+
caseRow
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
});
|
|
99
|
+
sendMessage({
|
|
100
|
+
type: "done",
|
|
101
|
+
evals: [...evals.values()]
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
await main();
|
|
105
|
+
process.disconnect();
|
|
106
|
+
//#endregion
|
|
107
|
+
export {};
|