@ls-stack/agent-eval 0.58.2 → 0.58.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-BxD6aHbp.mjs → app-sGeXC4AT.mjs} +63 -11
- package/dist/apps/web/dist/assets/index-BXFsxHVc.js +377 -0
- package/dist/apps/web/dist/index.html +1 -1
- package/dist/bin.mjs +1 -1
- package/dist/caseChild.mjs +4 -3
- package/dist/{cli-HBwXIJsg.mjs → cli-Bf5RzM8O.mjs} +7 -6
- package/dist/index.d.mts +129 -123
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +5 -4
- package/dist/{runExecution-pHJ0_TzH.mjs → runExecution-CLkC-4Z1.mjs} +40 -19
- package/dist/{runOrchestration-ngVXShH4.mjs → runOrchestration-BS-WxTee.mjs} +1 -1
- package/dist/{runner-BnZMGBla.mjs → runner-Bz5ZPqmm.mjs} +1 -1
- package/dist/{runner-D_pz2NON.mjs → runner-DW-11txl.mjs} +2 -2
- package/dist/{src-AeXGBJ26.mjs → src-BjMMDm_O.mjs} +2 -2
- package/package.json +1 -1
- package/skills/agent-eval/SKILL.md +7 -6
- package/dist/apps/web/dist/assets/index-BMWBZw_u.js +0 -377
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
|
|
26
26
|
rel="stylesheet"
|
|
27
27
|
/>
|
|
28
|
-
<script type="module" crossorigin src="/assets/index-
|
|
28
|
+
<script type="module" crossorigin src="/assets/index-BXFsxHVc.js"></script>
|
|
29
29
|
<link rel="stylesheet" crossorigin href="/assets/index-CHH7m5Cv.css">
|
|
30
30
|
</head>
|
|
31
31
|
<body>
|
package/dist/bin.mjs
CHANGED
package/dist/caseChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Ct as
|
|
1
|
+
import { Ct as resolveApiCallsConfig, Lt as runWithEvalRegistry, R as configureEvalRunLogs, Y as runInEvalRuntimeScope, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, r as runCase, v as createFsCacheStore, wt as resolveLlmCallsConfig, y as getCacheRetentionOptions } from "./runExecution-CLkC-4Z1.mjs";
|
|
2
2
|
//#region ../runner/src/caseChild.ts
|
|
3
3
|
let fatalErrorReported = false;
|
|
4
4
|
let disconnectExpected = false;
|
|
@@ -52,11 +52,12 @@ async function executeCaseChild(context) {
|
|
|
52
52
|
registerAgentEvalsPackageResolutionHooks();
|
|
53
53
|
const config = await loadConfig();
|
|
54
54
|
configureEvalRunLogs({ captureConsole: config.runLogs?.captureConsole !== false });
|
|
55
|
+
const cacheRetentionOptions = getCacheRetentionOptions(config.cache);
|
|
55
56
|
const cacheStore = createFsCacheStore({
|
|
56
57
|
workspaceRoot: context.workspaceRoot,
|
|
57
58
|
dir: config.cache?.dir,
|
|
58
|
-
maxEntriesPerNamespace:
|
|
59
|
-
maxEntriesByNamespace:
|
|
59
|
+
maxEntriesPerNamespace: cacheRetentionOptions.maxEntriesPerNamespace,
|
|
60
|
+
maxEntriesByNamespace: cacheRetentionOptions.maxEntriesByNamespace,
|
|
60
61
|
lastAccessedAtUpdateIntervalMs: config.cache?.lastAccessedAtUpdateIntervalMs
|
|
61
62
|
});
|
|
62
63
|
const bufferedCacheStore = context.cacheEnabled && context.cacheMode !== "bypass" ? createBufferedCacheStore(cacheStore) : null;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { Ct as
|
|
2
|
-
import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-
|
|
1
|
+
import { Ct as resolveApiCallsConfig, Dt as getCaseRowCaseKey, Et as buildEvalKey, It as getEvalRegistry, St as runSummarySchema, c as resolveArtifactPath, f as resolveEvalDefaultConfig, ft as getEvalTitle, h as normalizeScoreDef, kt as caseRowSchema, m as buildDeclaredColumnDefs, mt as deriveScopedSummaryFromCases, o as stripTerminalControlCodes, p as loadConfig, pt as getEvalDisplayStatus, s as resolveTracePresentation, ut as applyDerivedCallAttributes, v as createFsCacheStore, vt as matchesTagsFilter, wt as resolveLlmCallsConfig, y as getCacheRetentionOptions } from "./runExecution-CLkC-4Z1.mjs";
|
|
2
|
+
import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-BS-WxTee.mjs";
|
|
3
3
|
import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
4
4
|
import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
|
|
5
5
|
import { createHash, randomUUID } from "node:crypto";
|
|
@@ -1483,11 +1483,12 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
1483
1483
|
await mkdir(localStateDir, { recursive: true });
|
|
1484
1484
|
await mkdir(join(localStateDir, "runs"), { recursive: true });
|
|
1485
1485
|
await cleanupStagedManualInputFiles(workspaceRoot);
|
|
1486
|
+
const cacheRetentionOptions = getCacheRetentionOptions(config.cache);
|
|
1486
1487
|
cacheStore = createFsCacheStore({
|
|
1487
1488
|
workspaceRoot,
|
|
1488
1489
|
dir: config.cache?.dir,
|
|
1489
|
-
maxEntriesPerNamespace:
|
|
1490
|
-
maxEntriesByNamespace:
|
|
1490
|
+
maxEntriesPerNamespace: cacheRetentionOptions.maxEntriesPerNamespace,
|
|
1491
|
+
maxEntriesByNamespace: cacheRetentionOptions.maxEntriesByNamespace,
|
|
1491
1492
|
lastAccessedAtUpdateIntervalMs: config.cache?.lastAccessedAtUpdateIntervalMs
|
|
1492
1493
|
});
|
|
1493
1494
|
await loadPersistedRuns();
|
|
@@ -2198,8 +2199,8 @@ async function commandApp(args) {
|
|
|
2198
2199
|
const { serve } = await import("@hono/node-server");
|
|
2199
2200
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
2200
2201
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
2201
|
-
const appModule = await import("./app-
|
|
2202
|
-
const runnerModule = await import("./runner-
|
|
2202
|
+
const appModule = await import("./app-sGeXC4AT.mjs");
|
|
2203
|
+
const runnerModule = await import("./runner-Bz5ZPqmm.mjs");
|
|
2203
2204
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
2204
2205
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
2205
2206
|
await runnerModule.initRunner();
|