@ls-stack/agent-eval 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-Dg3qYVku.mjs → app-DKWm1oxc.mjs} +2 -2
- package/dist/apps/web/dist/assets/index-Cx2CvM6a.js +117 -0
- package/dist/apps/web/dist/index.html +1 -1
- package/dist/bin.mjs +8 -3
- package/dist/{cli-lOZdhO2D.mjs → cli-CMiCEQ-3.mjs} +11 -2
- package/dist/index.mjs +2 -2
- package/dist/{runner-CmbmfBG2.mjs → runner-DLnj18MO.mjs} +2 -2
- package/dist/{runner-BK1KX2SA.mjs → runner-Dx1sMCbh.mjs} +1 -1
- package/dist/{src-Btb9RCYD.mjs → src-BgGL7DDp.mjs} +1 -1
- package/package.json +1 -1
- package/dist/apps/web/dist/assets/index-Drw0IpOd.js +0 -117
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
|
|
26
26
|
rel="stylesheet"
|
|
27
27
|
/>
|
|
28
|
-
<script type="module" crossorigin src="/assets/index-
|
|
28
|
+
<script type="module" crossorigin src="/assets/index-Cx2CvM6a.js"></script>
|
|
29
29
|
<link rel="stylesheet" crossorigin href="/assets/index-BVnLr79e.css">
|
|
30
30
|
</head>
|
|
31
31
|
<body>
|
package/dist/bin.mjs
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { t as runCli } from "./cli-
|
|
2
|
+
import { t as runCli } from "./cli-CMiCEQ-3.mjs";
|
|
3
3
|
import { spawn } from "node:child_process";
|
|
4
4
|
//#region src/bin.ts
|
|
5
5
|
const moduleMocksFlag = "--experimental-test-module-mocks";
|
|
6
6
|
const inspectFlagPrefix = "--inspect";
|
|
7
7
|
const inspectBrkFlagPrefix = "--inspect-brk";
|
|
8
|
+
const runChildInspectArgEnv = "AGENT_EVALS_RUN_CHILD_INSPECT_ARG";
|
|
8
9
|
function needsModuleMocksFlag() {
|
|
9
10
|
return !process.execArgv.includes(moduleMocksFlag);
|
|
10
11
|
}
|
|
@@ -32,10 +33,13 @@ function isInspectArg(arg) {
|
|
|
32
33
|
}
|
|
33
34
|
function buildExecArgv(inspectArg) {
|
|
34
35
|
const nextExecArgv = [moduleMocksFlag, ...process.execArgv.filter((arg) => arg !== moduleMocksFlag && !isInspectArg(arg))];
|
|
35
|
-
if (inspectArg
|
|
36
|
-
else nextExecArgv.push(...process.execArgv.filter(isInspectArg));
|
|
36
|
+
if (inspectArg === void 0) nextExecArgv.push(...process.execArgv.filter(isInspectArg));
|
|
37
37
|
return nextExecArgv;
|
|
38
38
|
}
|
|
39
|
+
function setRunChildInspectArg(inspectArg) {
|
|
40
|
+
if (inspectArg === void 0) return;
|
|
41
|
+
process.env[runChildInspectArgEnv] = inspectArg;
|
|
42
|
+
}
|
|
39
43
|
function execArgvMatches(nextExecArgv) {
|
|
40
44
|
return process.execArgv.length === nextExecArgv.length && process.execArgv.every((arg, index) => arg === nextExecArgv[index]);
|
|
41
45
|
}
|
|
@@ -67,6 +71,7 @@ async function reexecWithNodeArgs(argv, execArgv) {
|
|
|
67
71
|
});
|
|
68
72
|
}
|
|
69
73
|
const { argv, inspectArg } = parseDebugFlags(process.argv.slice(2));
|
|
74
|
+
setRunChildInspectArg(inspectArg);
|
|
70
75
|
const execArgv = buildExecArgv(inspectArg);
|
|
71
76
|
if (needsModuleMocksFlag() || !execArgvMatches(execArgv)) await reexecWithNodeArgs(argv, execArgv);
|
|
72
77
|
else await runCli(argv);
|
|
@@ -208,6 +208,9 @@ function isRunChildMessage(value) {
|
|
|
208
208
|
}
|
|
209
209
|
//#endregion
|
|
210
210
|
//#region ../runner/src/runChildManager.ts
|
|
211
|
+
const runChildInspectArgEnv = "AGENT_EVALS_RUN_CHILD_INSPECT_ARG";
|
|
212
|
+
const inspectFlagPrefix = "--inspect";
|
|
213
|
+
const inspectBrkFlagPrefix = "--inspect-brk";
|
|
211
214
|
function startRunChild(params) {
|
|
212
215
|
const child = spawn(process.execPath, [
|
|
213
216
|
...getRunChildExecArgv(),
|
|
@@ -256,10 +259,16 @@ function getRunChildExecArgv() {
|
|
|
256
259
|
if (arg === "--input-type") skipNext = true;
|
|
257
260
|
continue;
|
|
258
261
|
}
|
|
262
|
+
if (isInspectArg(arg)) continue;
|
|
259
263
|
execArgv.push(arg);
|
|
260
264
|
}
|
|
265
|
+
const inspectArg = process.env[runChildInspectArgEnv];
|
|
266
|
+
if (inspectArg !== void 0 && isInspectArg(inspectArg)) execArgv.push(inspectArg);
|
|
261
267
|
return execArgv;
|
|
262
268
|
}
|
|
269
|
+
function isInspectArg(arg) {
|
|
270
|
+
return arg === inspectFlagPrefix || arg.startsWith(`${inspectFlagPrefix}=`) || arg === inspectBrkFlagPrefix || arg.startsWith(`${inspectBrkFlagPrefix}=`);
|
|
271
|
+
}
|
|
263
272
|
function killRunChild(runState) {
|
|
264
273
|
const child = runState.childProcess;
|
|
265
274
|
runState.childProcess = void 0;
|
|
@@ -1050,8 +1059,8 @@ async function commandApp(args) {
|
|
|
1050
1059
|
const { serve } = await import("@hono/node-server");
|
|
1051
1060
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
1052
1061
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
1053
|
-
const appModule = await import("./app-
|
|
1054
|
-
const runnerModule = await import("./runner-
|
|
1062
|
+
const appModule = await import("./app-DKWm1oxc.mjs");
|
|
1063
|
+
const runnerModule = await import("./runner-Dx1sMCbh.mjs");
|
|
1055
1064
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
1056
1065
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
1057
1066
|
await runnerModule.initRunner();
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
import { $ as evalFreshnessStatusSchema, $t as evalAssert, A as getEvalDisplayStatus, At as traceDisplayInputConfigSchema, B as apiCallMetricPlacementSchema, Bt as jsonCellSchema, C as updateManualScoreRequestSchema, Ct as spanCacheOptionsSchema, D as extractLlmCalls, Dt as traceAttributeDisplayPlacementSchema, E as extractApiCalls, Et as traceAttributeDisplayInputSchema, F as runSummarySchema, Ft as cellValueSchema, G as llmCallMetricSchema, Gt as buildTraceTree, H as apiCallsConfigSchema, Ht as repoFileRefSchema, I as DEFAULT_API_CALLS_CONFIG, It as columnDefSchema, J as resolveLlmCallsConfig, Jt as evalTracer, K as llmCallsConfigSchema, Kt as captureEvalSpanError, L as DEFAULT_LLM_CALLS_CONFIG, Lt as columnFormatSchema, M as deriveStatusFromCaseRows, Mt as traceSpanKindSchema, N as deriveStatusFromChildStatuses, Nt as traceSpanSchema, O as getNestedAttribute, Ot as traceAttributeDisplaySchema, P as runManifestSchema, Pt as traceSpanWarningSchema, Q as caseRowSchema, Qt as appendToEvalOutput, R as agentEvalsConfigSchema, Rt as columnKindSchema, S as createRunRequestSchema, St as serializedCacheSpanSchema, T as extractCacheHits, Tt as traceAttributeDisplayFormatSchema, U as llmCallMetricFormatSchema, Ut as runArtifactRefSchema, V as apiCallMetricSchema, Vt as numberDisplayOptionsSchema, W as llmCallMetricPlacementSchema, Wt as z, X as assertionFailureSchema, Xt as hashCacheKeySync, Y as trialSelectionModeSchema, Yt as hashCacheKey, Z as caseDetailSchema, Zt as EvalAssertionError, _t as cacheModeSchema, an as nextEvalId, at as evalChartAggregateSchema, bt as cacheRecordingSchema, cn as setScopeCacheContext, ct as evalChartColorSchema, dn as defineEval, dt as evalChartTooltipExtraSchema, en as getCurrentScope, et as evalStatAggregateSchema, fn as getEvalRegistry, ft as evalChartTypeSchema, gt as cacheListItemSchema, ht as cacheFileSchema, in as mergeEvalOutput, it as scoreTraceSchema, j as deriveScopedSummaryFromCases, jt as traceSpanErrorSchema, k as getEvalTitle, kt as traceDisplayConfigSchema, ln as startEvalBackgroundJob, lt as evalChartConfigSchema, mt as cacheEntrySchema, nn as incrementEvalOutput, nt as evalStatsConfigSchema, on as runInEvalScope, ot as evalChartAxisSchema, pt as evalChartsConfigSchema, q as resolveApiCallsConfig, qt as evalSpan, rn as isInEvalScope, rt as evalSummarySchema, sn as setEvalOutput, st as evalChartBuiltinMetricSchema, tn as getEvalCaseInput, tt as evalStatItemSchema, un as repoFile, ut as evalChartMetricSchema, vt as cacheOperationTypeSchema, w as sseEnvelopeSchema, wt as traceCacheRefSchema, xt as cacheStatusSchema, yt as cacheRecordingOpSchema, z as apiCallMetricFormatSchema, zt as fileRefSchema } from "./runOrchestration-H0pSUl3I.mjs";
|
|
2
|
-
import { n as createRunner, t as runCli } from "./cli-
|
|
3
|
-
import "./src-
|
|
2
|
+
import { n as createRunner, t as runCli } from "./cli-CMiCEQ-3.mjs";
|
|
3
|
+
import "./src-BgGL7DDp.mjs";
|
|
4
4
|
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { n as createRunner } from "./cli-CMiCEQ-3.mjs";
|
|
2
|
+
import "./src-BgGL7DDp.mjs";
|
|
3
3
|
//#region ../../apps/server/src/runner.ts
|
|
4
4
|
let runnerInstance = null;
|
|
5
5
|
function getRunnerInstance() {
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-DLnj18MO.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|