@ls-stack/agent-eval 0.12.0 → 0.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-DI3IUGb_.mjs → app-D3wtTfbu.mjs} +4 -4
- package/dist/apps/web/dist/assets/{index-BMe8ZR3n.js → index-Drw0IpOd.js} +37 -33
- package/dist/apps/web/dist/index.html +1 -1
- package/dist/bin.mjs +1 -1
- package/dist/{cli-COzPxKg2.mjs → cli-BEPaYHmX.mjs} +3 -3
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-COFhQvTJ.mjs → runOrchestration-DrgpaDaf.mjs} +19 -5
- package/dist/{runner-sMZXoDp3.mjs → runner-BHCokR_t.mjs} +1 -1
- package/dist/{runner-nQjuRZGC.mjs → runner-BVC9yBDu.mjs} +2 -2
- package/dist/src-BU6ZtVIB.mjs +3 -0
- package/package.json +1 -1
- package/dist/src-OZSs693X.mjs +0 -3
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
|
|
26
26
|
rel="stylesheet"
|
|
27
27
|
/>
|
|
28
|
-
<script type="module" crossorigin src="/assets/index-
|
|
28
|
+
<script type="module" crossorigin src="/assets/index-Drw0IpOd.js"></script>
|
|
29
29
|
<link rel="stylesheet" crossorigin href="/assets/index-BVnLr79e.css">
|
|
30
30
|
</head>
|
|
31
31
|
<body>
|
package/dist/bin.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as getEvalDisplayStatus, F as runSummarySchema, J as resolveLlmCallsConfig, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as normalizeScoreDef, c as persistCaseDetail, d as recomputePersistedCaseStatus, dn as getEvalRegistry, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, j as deriveScopedSummaryFromCases, k as getEvalTitle, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, q as resolveApiCallsConfig, r as getLastRunStatuses, s as nextShortIdFromSnapshots, u as recomputeEvalStatusesInRuns, v as loadConfig, x as createFsCacheStore, y as buildDeclaredColumnDefs } from "./runOrchestration-
|
|
1
|
+
import { A as getEvalDisplayStatus, F as runSummarySchema, J as resolveLlmCallsConfig, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as normalizeScoreDef, c as persistCaseDetail, d as recomputePersistedCaseStatus, dn as getEvalRegistry, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, j as deriveScopedSummaryFromCases, k as getEvalTitle, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, q as resolveApiCallsConfig, r as getLastRunStatuses, s as nextShortIdFromSnapshots, u as recomputeEvalStatusesInRuns, v as loadConfig, x as createFsCacheStore, y as buildDeclaredColumnDefs } from "./runOrchestration-DrgpaDaf.mjs";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
4
4
|
import { dirname, join, relative, resolve } from "node:path";
|
|
@@ -1050,8 +1050,8 @@ async function commandApp(args) {
|
|
|
1050
1050
|
const { serve } = await import("@hono/node-server");
|
|
1051
1051
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
1052
1052
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
1053
|
-
const appModule = await import("./app-
|
|
1054
|
-
const runnerModule = await import("./runner-
|
|
1053
|
+
const appModule = await import("./app-D3wtTfbu.mjs");
|
|
1054
|
+
const runnerModule = await import("./runner-BHCokR_t.mjs");
|
|
1055
1055
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
1056
1056
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
1057
1057
|
await runnerModule.initRunner();
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as evalFreshnessStatusSchema, $t as evalAssert, A as getEvalDisplayStatus, At as traceDisplayInputConfigSchema, B as apiCallMetricPlacementSchema, Bt as jsonCellSchema, C as updateManualScoreRequestSchema, Ct as spanCacheOptionsSchema, D as extractLlmCalls, Dt as traceAttributeDisplayPlacementSchema, E as extractApiCalls, Et as traceAttributeDisplayInputSchema, F as runSummarySchema, Ft as cellValueSchema, G as llmCallMetricSchema, Gt as buildTraceTree, H as apiCallsConfigSchema, Ht as repoFileRefSchema, I as DEFAULT_API_CALLS_CONFIG, It as columnDefSchema, J as resolveLlmCallsConfig, Jt as evalTracer, K as llmCallsConfigSchema, Kt as captureEvalSpanError, L as DEFAULT_LLM_CALLS_CONFIG, Lt as columnFormatSchema, M as deriveStatusFromCaseRows, Mt as traceSpanKindSchema, N as deriveStatusFromChildStatuses, Nt as traceSpanSchema, O as getNestedAttribute, Ot as traceAttributeDisplaySchema, P as runManifestSchema, Pt as traceSpanWarningSchema, Q as caseRowSchema, Qt as appendToEvalOutput, R as agentEvalsConfigSchema, Rt as columnKindSchema, S as createRunRequestSchema, St as serializedCacheSpanSchema, T as extractCacheHits, Tt as traceAttributeDisplayFormatSchema, U as llmCallMetricFormatSchema, Ut as runArtifactRefSchema, V as apiCallMetricSchema, Vt as numberDisplayOptionsSchema, W as llmCallMetricPlacementSchema, Wt as z, X as assertionFailureSchema, Xt as hashCacheKeySync, Y as trialSelectionModeSchema, Yt as hashCacheKey, Z as caseDetailSchema, Zt as EvalAssertionError, _t as cacheModeSchema, an as nextEvalId, at as evalChartAggregateSchema, bt as cacheRecordingSchema, cn as setScopeCacheContext, ct as evalChartColorSchema, dn as getEvalRegistry, dt as evalChartTooltipExtraSchema, en as getCurrentScope, et as evalStatAggregateSchema, ft as evalChartTypeSchema, gt as cacheListItemSchema, ht as cacheFileSchema, in as mergeEvalOutput, it as scoreTraceSchema, j as deriveScopedSummaryFromCases, jt as traceSpanErrorSchema, k as getEvalTitle, kt as traceDisplayConfigSchema, ln as repoFile, lt as evalChartConfigSchema, mt as cacheEntrySchema, nn as incrementEvalOutput, nt as evalStatsConfigSchema, on as runInEvalScope, ot as evalChartAxisSchema, pt as evalChartsConfigSchema, q as resolveApiCallsConfig, qt as evalSpan, rn as isInEvalScope, rt as evalSummarySchema, sn as setEvalOutput, st as evalChartBuiltinMetricSchema, tn as getEvalCaseInput, tt as evalStatItemSchema, un as defineEval, ut as evalChartMetricSchema, vt as cacheOperationTypeSchema, w as sseEnvelopeSchema, wt as traceCacheRefSchema, xt as cacheStatusSchema, yt as cacheRecordingOpSchema, z as apiCallMetricFormatSchema, zt as fileRefSchema } from "./runOrchestration-
|
|
2
|
-
import { n as createRunner, t as runCli } from "./cli-
|
|
3
|
-
import "./src-
|
|
1
|
+
import { $ as evalFreshnessStatusSchema, $t as evalAssert, A as getEvalDisplayStatus, At as traceDisplayInputConfigSchema, B as apiCallMetricPlacementSchema, Bt as jsonCellSchema, C as updateManualScoreRequestSchema, Ct as spanCacheOptionsSchema, D as extractLlmCalls, Dt as traceAttributeDisplayPlacementSchema, E as extractApiCalls, Et as traceAttributeDisplayInputSchema, F as runSummarySchema, Ft as cellValueSchema, G as llmCallMetricSchema, Gt as buildTraceTree, H as apiCallsConfigSchema, Ht as repoFileRefSchema, I as DEFAULT_API_CALLS_CONFIG, It as columnDefSchema, J as resolveLlmCallsConfig, Jt as evalTracer, K as llmCallsConfigSchema, Kt as captureEvalSpanError, L as DEFAULT_LLM_CALLS_CONFIG, Lt as columnFormatSchema, M as deriveStatusFromCaseRows, Mt as traceSpanKindSchema, N as deriveStatusFromChildStatuses, Nt as traceSpanSchema, O as getNestedAttribute, Ot as traceAttributeDisplaySchema, P as runManifestSchema, Pt as traceSpanWarningSchema, Q as caseRowSchema, Qt as appendToEvalOutput, R as agentEvalsConfigSchema, Rt as columnKindSchema, S as createRunRequestSchema, St as serializedCacheSpanSchema, T as extractCacheHits, Tt as traceAttributeDisplayFormatSchema, U as llmCallMetricFormatSchema, Ut as runArtifactRefSchema, V as apiCallMetricSchema, Vt as numberDisplayOptionsSchema, W as llmCallMetricPlacementSchema, Wt as z, X as assertionFailureSchema, Xt as hashCacheKeySync, Y as trialSelectionModeSchema, Yt as hashCacheKey, Z as caseDetailSchema, Zt as EvalAssertionError, _t as cacheModeSchema, an as nextEvalId, at as evalChartAggregateSchema, bt as cacheRecordingSchema, cn as setScopeCacheContext, ct as evalChartColorSchema, dn as getEvalRegistry, dt as evalChartTooltipExtraSchema, en as getCurrentScope, et as evalStatAggregateSchema, ft as evalChartTypeSchema, gt as cacheListItemSchema, ht as cacheFileSchema, in as mergeEvalOutput, it as scoreTraceSchema, j as deriveScopedSummaryFromCases, jt as traceSpanErrorSchema, k as getEvalTitle, kt as traceDisplayConfigSchema, ln as repoFile, lt as evalChartConfigSchema, mt as cacheEntrySchema, nn as incrementEvalOutput, nt as evalStatsConfigSchema, on as runInEvalScope, ot as evalChartAxisSchema, pt as evalChartsConfigSchema, q as resolveApiCallsConfig, qt as evalSpan, rn as isInEvalScope, rt as evalSummarySchema, sn as setEvalOutput, st as evalChartBuiltinMetricSchema, tn as getEvalCaseInput, tt as evalStatItemSchema, un as defineEval, ut as evalChartMetricSchema, vt as cacheOperationTypeSchema, w as sseEnvelopeSchema, wt as traceCacheRefSchema, xt as cacheStatusSchema, yt as cacheRecordingOpSchema, z as apiCallMetricFormatSchema, zt as fileRefSchema } from "./runOrchestration-DrgpaDaf.mjs";
|
|
2
|
+
import { n as createRunner, t as runCli } from "./cli-BEPaYHmX.mjs";
|
|
3
|
+
import "./src-BU6ZtVIB.mjs";
|
|
4
4
|
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { F as runSummarySchema, It as columnDefSchema, P as runManifestSchema, S as createRunRequestSchema, nt as evalStatsConfigSchema, pt as evalChartsConfigSchema, t as executeRun, v as loadConfig, x as createFsCacheStore } from "./runOrchestration-
|
|
1
|
+
import { F as runSummarySchema, It as columnDefSchema, P as runManifestSchema, S as createRunRequestSchema, nt as evalStatsConfigSchema, pt as evalChartsConfigSchema, t as executeRun, v as loadConfig, x as createFsCacheStore } from "./runOrchestration-DrgpaDaf.mjs";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { readFile } from "node:fs/promises";
|
|
4
4
|
import { z } from "zod/v4";
|
|
@@ -5676,6 +5676,20 @@ function pickWinningTrial(params) {
|
|
|
5676
5676
|
if (medianAttempt === void 0) throw new Error("Expected at least one trial attempt");
|
|
5677
5677
|
return medianAttempt;
|
|
5678
5678
|
}
|
|
5679
|
+
function formatUnknownErrorDetails(error) {
|
|
5680
|
+
if (error instanceof Error) return error.stack ?? error.message;
|
|
5681
|
+
if (typeof error === "string") return error;
|
|
5682
|
+
return String(error);
|
|
5683
|
+
}
|
|
5684
|
+
function buildRunErrorMessage(errors) {
|
|
5685
|
+
return errors.map((entry) => {
|
|
5686
|
+
const [firstLine, ...detailLines] = entry.details.split("\n");
|
|
5687
|
+
const messageLine = firstLine?.trim() ?? "Unknown error";
|
|
5688
|
+
const details = detailLines.join("\n").trim();
|
|
5689
|
+
if (details.length === 0) return `[${entry.evalId}] ${messageLine}`;
|
|
5690
|
+
return `[${entry.evalId}] ${messageLine}\n${details}`;
|
|
5691
|
+
}).join("\n");
|
|
5692
|
+
}
|
|
5679
5693
|
async function finalizePreparedCase(params) {
|
|
5680
5694
|
const { runState, runDir, preparedEval, preparedCase, onCaseFinished, emitEvent } = params;
|
|
5681
5695
|
if (preparedCase.finalized || preparedCase.trialResults.length === 0) return;
|
|
@@ -5754,7 +5768,7 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
|
|
|
5754
5768
|
if (!entry) {
|
|
5755
5769
|
evalErrors.push({
|
|
5756
5770
|
evalId: evalMeta.id,
|
|
5757
|
-
|
|
5771
|
+
details: `Eval "${evalMeta.id}" was not registered after importing ${evalFilePath}`
|
|
5758
5772
|
});
|
|
5759
5773
|
continue;
|
|
5760
5774
|
}
|
|
@@ -5846,7 +5860,7 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
|
|
|
5846
5860
|
console.error(`Error running eval ${evalMeta.id}:`, error);
|
|
5847
5861
|
evalErrors.push({
|
|
5848
5862
|
evalId: evalMeta.id,
|
|
5849
|
-
|
|
5863
|
+
details: formatUnknownErrorDetails(error)
|
|
5850
5864
|
});
|
|
5851
5865
|
lastRunStatusMap.set(evalMeta.id, "error");
|
|
5852
5866
|
latestRunInfoMap.set(evalMeta.id, {
|
|
@@ -5890,7 +5904,7 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
|
|
|
5890
5904
|
runState.manifest.status = finalStatus;
|
|
5891
5905
|
const completedRunAt = endTime.toISOString();
|
|
5892
5906
|
runState.manifest.endedAt = completedRunAt;
|
|
5893
|
-
runState.summary.errorMessage = evalErrors.length > 0 ? evalErrors
|
|
5907
|
+
runState.summary.errorMessage = evalErrors.length > 0 ? buildRunErrorMessage(evalErrors) : null;
|
|
5894
5908
|
for (const evalId of getTargetEvalIds({
|
|
5895
5909
|
request,
|
|
5896
5910
|
sortedEvalIds: getSortedEvalMetas().map((meta) => meta.id),
|
|
@@ -5918,7 +5932,7 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
|
|
|
5918
5932
|
type: "run.error",
|
|
5919
5933
|
runId: runState.manifest.id,
|
|
5920
5934
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
5921
|
-
payload: { message: evalErrors
|
|
5935
|
+
payload: { message: buildRunErrorMessage(evalErrors) }
|
|
5922
5936
|
});
|
|
5923
5937
|
else emitEvent(runState, {
|
|
5924
5938
|
type: "run.finished",
|
|
@@ -5928,7 +5942,7 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
|
|
|
5928
5942
|
});
|
|
5929
5943
|
emitDiscoveryEvent();
|
|
5930
5944
|
} catch (error) {
|
|
5931
|
-
const message =
|
|
5945
|
+
const message = formatUnknownErrorDetails(error);
|
|
5932
5946
|
runState.manifest.status = "error";
|
|
5933
5947
|
runState.manifest.endedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
5934
5948
|
runState.summary.status = "error";
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-BVC9yBDu.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { n as createRunner } from "./cli-BEPaYHmX.mjs";
|
|
2
|
+
import "./src-BU6ZtVIB.mjs";
|
|
3
3
|
//#region ../../apps/server/src/runner.ts
|
|
4
4
|
let runnerInstance = null;
|
|
5
5
|
function getRunnerInstance() {
|
package/package.json
CHANGED
package/dist/src-OZSs693X.mjs
DELETED