@ls-stack/agent-eval 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-7qDBq_ub.mjs → app-ZFLdu8-r.mjs} +43 -7
- package/dist/apps/web/dist/assets/{index-CdxG9-O-.css → index-ClE28i5w.css} +1 -1
- package/dist/apps/web/dist/assets/index-CvJmtK1T.js +113 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +39 -6
- package/dist/cli-DQK5W0je.mjs +1285 -0
- package/dist/index.d.mts +2 -3
- package/dist/index.mjs +3 -2
- package/dist/runChild.d.mts +1 -0
- package/dist/runChild.mjs +107 -0
- package/dist/{cli-C-n-Fd4o.mjs → runOrchestration-HaMahl6b.mjs} +394 -1352
- package/dist/{runner-uzzY8kk1.mjs → runner--XPZ5D7N.mjs} +1 -1
- package/dist/{runner-CwEtnUFf.mjs → runner-CmVPWava.mjs} +2 -2
- package/dist/src-r3FQAaw6.mjs +3 -0
- package/package.json +5 -4
- package/dist/apps/web/dist/assets/index-J1yKYGfN.js +0 -112
- package/dist/src-Dy31CPXH.mjs +0 -2
package/dist/index.d.mts
CHANGED
|
@@ -1887,7 +1887,6 @@ type EvalTraceTree = {
|
|
|
1887
1887
|
/** Context passed to an eval's `execute` function for a single case run. */
|
|
1888
1888
|
type EvalExecuteContext<TInput> = {
|
|
1889
1889
|
input: TInput;
|
|
1890
|
-
signal: AbortSignal;
|
|
1891
1890
|
};
|
|
1892
1891
|
/** Context passed to `deriveFromTracing` after execution has completed. */
|
|
1893
1892
|
type EvalDeriveContext<TInput> = {
|
|
@@ -2383,8 +2382,8 @@ type EvalRunner = {
|
|
|
2383
2382
|
manifest: RunManifest;
|
|
2384
2383
|
summary: RunSummary;
|
|
2385
2384
|
cases: CaseRow[];
|
|
2386
|
-
} | undefined; /** Request cancellation for an in-flight run. */
|
|
2387
|
-
cancelRun(id: string): void
|
|
2385
|
+
} | undefined; /** Request cancellation for an in-flight run and persist its cancelled state. */
|
|
2386
|
+
cancelRun(id: string): Promise<void>; /** Return full details for a single case in a run, when available. */
|
|
2388
2387
|
getCaseDetail(runId: string, caseId: string): CaseDetail | undefined; /** Subscribe to streamed events for a specific run. */
|
|
2389
2388
|
subscribe(runId: string, listener: (event: SseEnvelope) => void): () => void; /** Subscribe to discovery updates triggered by file changes or manual refresh. */
|
|
2390
2389
|
subscribeDiscovery(listener: (event: SseEnvelope) => void): () => void; /** Stop background filesystem watchers owned by this runner instance. */
|
package/dist/index.mjs
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
-
import { $ as
|
|
2
|
-
import "./
|
|
1
|
+
import { $ as evalChartAxisSchema, A as runManifestSchema, At as evalTracer, B as cacheRecordingSchema, Bt as mergeEvalOutput, C as updateManualScoreRequestSchema, Ct as numberDisplayOptionsSchema, D as deriveScopedSummaryFromCases, Dt as buildTraceTree, E as getEvalDisplayStatus, Et as z, F as cacheFileSchema, Ft as evalAssert, G as caseRowSchema, Gt as defineEval, H as spanCacheOptionsSchema, Ht as setEvalOutput, I as cacheListItemSchema, It as getCurrentScope, J as evalStatItemSchema, K as evalFreshnessStatusSchema, Kt as getEvalRegistry, L as cacheModeSchema, Lt as getEvalCaseInput, M as agentEvalsConfigSchema, Mt as hashCacheKeySync, N as trialSelectionModeSchema, Nt as EvalAssertionError, O as deriveStatusFromCaseRows, Ot as captureEvalSpanError, P as cacheEntrySchema, Pt as appendToEvalOutput, Q as evalChartAggregateSchema, R as cacheOperationTypeSchema, Rt as incrementEvalOutput, S as createRunRequestSchema, St as jsonCellSchema, T as getEvalTitle, Tt as runArtifactRefSchema, U as assertionFailureSchema, Ut as setScopeCacheContext, V as serializedCacheSpanSchema, Vt as runInEvalScope, W as caseDetailSchema, Wt as repoFile, X as evalSummarySchema, Y as evalStatsConfigSchema, Z as scoreTraceSchema, _t as cellValueSchema, at as evalChartTypeSchema, bt as columnKindSchema, ct as traceAttributeDisplayInputSchema, dt as traceDisplayConfigSchema, et as evalChartBuiltinMetricSchema, ft as traceDisplayInputConfigSchema, gt as traceSpanWarningSchema, ht as traceSpanSchema, it as evalChartTooltipExtraSchema, j as runSummarySchema, jt as hashCacheKey, k as deriveStatusFromChildStatuses, kt as evalSpan, lt as traceAttributeDisplayPlacementSchema, mt as traceSpanKindSchema, nt as evalChartConfigSchema, ot as evalChartsConfigSchema, pt as traceSpanErrorSchema, q as evalStatAggregateSchema, rt as evalChartMetricSchema, st as traceAttributeDisplayFormatSchema, tt as evalChartColorSchema, ut as traceAttributeDisplaySchema, vt as columnDefSchema, w as sseEnvelopeSchema, wt as repoFileRefSchema, xt as fileRefSchema, yt as columnFormatSchema, z as cacheRecordingOpSchema, zt as isInEvalScope } from "./runOrchestration-HaMahl6b.mjs";
|
|
2
|
+
import { n as createRunner, t as runCli } from "./cli-DQK5W0je.mjs";
|
|
3
|
+
import "./src-r3FQAaw6.mjs";
|
|
3
4
|
export { EvalAssertionError, agentEvalsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, mergeEvalOutput, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { };
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { A as runManifestSchema, S as createRunRequestSchema, Y as evalStatsConfigSchema, j as runSummarySchema, ot as evalChartsConfigSchema, t as executeRun, v as loadConfig, vt as columnDefSchema, x as createFsCacheStore } from "./runOrchestration-HaMahl6b.mjs";
|
|
2
|
+
import { createHash } from "node:crypto";
|
|
3
|
+
import { readFile } from "node:fs/promises";
|
|
4
|
+
import { z } from "zod/v4";
|
|
5
|
+
//#region ../runner/src/runChild.ts
|
|
6
|
+
const evalMetaSchema = z.object({
|
|
7
|
+
id: z.string(),
|
|
8
|
+
title: z.string().optional(),
|
|
9
|
+
filePath: z.string(),
|
|
10
|
+
sourceFilePath: z.string(),
|
|
11
|
+
sourceFingerprint: z.string().nullable(),
|
|
12
|
+
columnDefs: z.array(columnDefSchema),
|
|
13
|
+
caseCount: z.number().nullable(),
|
|
14
|
+
stats: evalStatsConfigSchema.optional(),
|
|
15
|
+
charts: evalChartsConfigSchema.optional()
|
|
16
|
+
});
|
|
17
|
+
const runChildContextSchema = z.object({
|
|
18
|
+
request: createRunRequestSchema,
|
|
19
|
+
workspaceRoot: z.string(),
|
|
20
|
+
runDir: z.string(),
|
|
21
|
+
manifest: runManifestSchema,
|
|
22
|
+
summary: runSummarySchema,
|
|
23
|
+
evals: z.array(evalMetaSchema)
|
|
24
|
+
});
|
|
25
|
+
function sendMessage(message) {
|
|
26
|
+
if (process.send === void 0) return;
|
|
27
|
+
process.send(message);
|
|
28
|
+
}
|
|
29
|
+
function getSourceFingerprint(source) {
|
|
30
|
+
return createHash("sha256").update(source).digest("hex");
|
|
31
|
+
}
|
|
32
|
+
function getConfiguredConcurrency(configConcurrency) {
|
|
33
|
+
if (typeof configConcurrency !== "number" || !Number.isFinite(configConcurrency)) return 1;
|
|
34
|
+
return Math.max(1, Math.floor(configConcurrency));
|
|
35
|
+
}
|
|
36
|
+
function getTargetEvals(params) {
|
|
37
|
+
if (params.request.target.evalIds && params.request.target.evalIds.length > 0) return params.request.target.evalIds.map((id) => params.evals.get(id)).filter((entry) => entry !== void 0);
|
|
38
|
+
return [...params.evals.values()].toSorted((a, b) => a.filePath.localeCompare(b.filePath));
|
|
39
|
+
}
|
|
40
|
+
async function readContext(contextPath) {
|
|
41
|
+
if (contextPath === void 0) throw new Error("Missing run child context path");
|
|
42
|
+
return runChildContextSchema.parse(JSON.parse(await readFile(contextPath, "utf-8")));
|
|
43
|
+
}
|
|
44
|
+
async function main() {
|
|
45
|
+
process.on("disconnect", () => {
|
|
46
|
+
process.exit(1);
|
|
47
|
+
});
|
|
48
|
+
const context = await readContext(process.argv[2]);
|
|
49
|
+
process.chdir(context.workspaceRoot);
|
|
50
|
+
const config = await loadConfig();
|
|
51
|
+
const cacheStore = createFsCacheStore({
|
|
52
|
+
workspaceRoot: context.workspaceRoot,
|
|
53
|
+
dir: config.cache?.dir,
|
|
54
|
+
maxEntriesPerEval: config.cache?.maxEntriesPerEval
|
|
55
|
+
});
|
|
56
|
+
const evals = new Map(context.evals.map((evalMeta) => [evalMeta.id, evalMeta]));
|
|
57
|
+
const lastRunStatusMap = /* @__PURE__ */ new Map();
|
|
58
|
+
const latestRunInfoMap = /* @__PURE__ */ new Map();
|
|
59
|
+
await executeRun({
|
|
60
|
+
runState: {
|
|
61
|
+
runDir: context.runDir,
|
|
62
|
+
manifest: context.manifest,
|
|
63
|
+
summary: context.summary,
|
|
64
|
+
cases: [],
|
|
65
|
+
caseDetails: /* @__PURE__ */ new Map(),
|
|
66
|
+
listeners: /* @__PURE__ */ new Set()
|
|
67
|
+
},
|
|
68
|
+
request: context.request,
|
|
69
|
+
runDir: context.runDir,
|
|
70
|
+
config,
|
|
71
|
+
evals,
|
|
72
|
+
cacheStore,
|
|
73
|
+
lastRunStatusMap,
|
|
74
|
+
latestRunInfoMap,
|
|
75
|
+
emitEvent(_runState, event) {
|
|
76
|
+
if (event.type === "case.finished") return;
|
|
77
|
+
sendMessage({
|
|
78
|
+
type: "event",
|
|
79
|
+
event
|
|
80
|
+
});
|
|
81
|
+
},
|
|
82
|
+
emitDiscoveryEvent() {},
|
|
83
|
+
workspaceRoot: context.workspaceRoot,
|
|
84
|
+
getSourceFingerprint,
|
|
85
|
+
getConfiguredConcurrency: () => getConfiguredConcurrency(config.concurrency),
|
|
86
|
+
getSortedEvalMetas: () => [...evals.values()].toSorted((a, b) => a.filePath.localeCompare(b.filePath)),
|
|
87
|
+
getTargetEvals: (request) => getTargetEvals({
|
|
88
|
+
evals,
|
|
89
|
+
request
|
|
90
|
+
}),
|
|
91
|
+
onCaseFinished(caseDetail, caseRow) {
|
|
92
|
+
sendMessage({
|
|
93
|
+
type: "case.finished",
|
|
94
|
+
caseDetail,
|
|
95
|
+
caseRow
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
});
|
|
99
|
+
sendMessage({
|
|
100
|
+
type: "done",
|
|
101
|
+
evals: [...evals.values()]
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
await main();
|
|
105
|
+
process.disconnect();
|
|
106
|
+
//#endregion
|
|
107
|
+
export {};
|