@ls-stack/agent-eval 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-ZFLdu8-r.mjs → app-sK9CjpNI.mjs} +17 -5
- package/dist/apps/web/dist/assets/index-ChgByJbI.css +1 -0
- package/dist/apps/web/dist/assets/index-CmY0_D5Z.js +113 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-DQK5W0je.mjs → cli-DLlRkyLH.mjs} +13 -4
- package/dist/index.d.mts +543 -101
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +3 -2
- package/dist/{runOrchestration-HaMahl6b.mjs → runOrchestration-DwqX9_T7.mjs} +2528 -286
- package/dist/{runner-CmVPWava.mjs → runner-Gtlmvm3w.mjs} +2 -2
- package/dist/{runner--XPZ5D7N.mjs → runner-JrBz8ISs.mjs} +1 -1
- package/dist/src-Bx-CV6Wo.mjs +3 -0
- package/package.json +1 -1
- package/dist/apps/web/dist/assets/index-ClE28i5w.css +0 -1
- package/dist/apps/web/dist/assets/index-CvJmtK1T.js +0 -113
- package/dist/src-r3FQAaw6.mjs +0 -3
|
@@ -25,8 +25,8 @@
|
|
|
25
25
|
href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
|
|
26
26
|
rel="stylesheet"
|
|
27
27
|
/>
|
|
28
|
-
<script type="module" crossorigin src="/assets/index-
|
|
29
|
-
<link rel="stylesheet" crossorigin href="/assets/index-
|
|
28
|
+
<script type="module" crossorigin src="/assets/index-CmY0_D5Z.js"></script>
|
|
29
|
+
<link rel="stylesheet" crossorigin href="/assets/index-ChgByJbI.css">
|
|
30
30
|
</head>
|
|
31
31
|
<body>
|
|
32
32
|
<div id="root"></div>
|
package/dist/bin.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { A as deriveScopedSummaryFromCases, O as getEvalTitle, P as runSummarySchema, V as resolveLlmCallsConfig, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as normalizeScoreDef, c as persistCaseDetail, d as recomputePersistedCaseStatus, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, in as getEvalRegistry, k as getEvalDisplayStatus, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, s as nextShortIdFromSnapshots, u as recomputeEvalStatusesInRuns, v as loadConfig, x as createFsCacheStore, y as buildDeclaredColumnDefs } from "./runOrchestration-DwqX9_T7.mjs";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
4
4
|
import { dirname, join, relative, resolve } from "node:path";
|
|
@@ -417,6 +417,7 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
417
417
|
let workspaceRoot;
|
|
418
418
|
let localStateDir;
|
|
419
419
|
let cacheStore;
|
|
420
|
+
let llmCallsConfig = resolveLlmCallsConfig(void 0);
|
|
420
421
|
const evals = /* @__PURE__ */ new Map();
|
|
421
422
|
const runs = /* @__PURE__ */ new Map();
|
|
422
423
|
const lastRunStatusMap = /* @__PURE__ */ new Map();
|
|
@@ -439,12 +440,14 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
439
440
|
config = await loadConfig();
|
|
440
441
|
workspaceRoot = config.workspaceRoot ?? process.cwd();
|
|
441
442
|
localStateDir = resolve(workspaceRoot, ".agent-evals");
|
|
443
|
+
llmCallsConfig = resolveLlmCallsConfig(config.llmCalls);
|
|
442
444
|
await mkdir(localStateDir, { recursive: true });
|
|
443
445
|
await mkdir(join(localStateDir, "runs"), { recursive: true });
|
|
444
446
|
cacheStore = createFsCacheStore({
|
|
445
447
|
workspaceRoot,
|
|
446
448
|
dir: config.cache?.dir,
|
|
447
|
-
|
|
449
|
+
maxEntriesPerNamespace: config.cache?.maxEntriesPerNamespace ?? config.cache?.maxEntriesPerEval,
|
|
450
|
+
maxEntriesByNamespace: config.cache?.maxEntriesByNamespace
|
|
448
451
|
});
|
|
449
452
|
await loadPersistedRuns();
|
|
450
453
|
await runner.refreshDiscovery();
|
|
@@ -453,6 +456,9 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
453
456
|
async listCache() {
|
|
454
457
|
return cacheStore.list();
|
|
455
458
|
},
|
|
459
|
+
async getCacheEntry(namespace, key) {
|
|
460
|
+
return cacheStore.lookup(namespace, key);
|
|
461
|
+
},
|
|
456
462
|
async clearCache(filter) {
|
|
457
463
|
await cacheStore.clear(filter);
|
|
458
464
|
},
|
|
@@ -803,6 +809,9 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
803
809
|
getWorkspaceRoot() {
|
|
804
810
|
return workspaceRoot;
|
|
805
811
|
},
|
|
812
|
+
getLlmCallsConfig() {
|
|
813
|
+
return llmCallsConfig;
|
|
814
|
+
},
|
|
806
815
|
getArtifactPath(artifactId_) {
|
|
807
816
|
return resolveArtifactPath(join(localStateDir, "runs"), artifactId_);
|
|
808
817
|
}
|
|
@@ -1036,8 +1045,8 @@ async function commandApp(args) {
|
|
|
1036
1045
|
const { serve } = await import("@hono/node-server");
|
|
1037
1046
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
1038
1047
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
1039
|
-
const appModule = await import("./app-
|
|
1040
|
-
const runnerModule = await import("./runner
|
|
1048
|
+
const appModule = await import("./app-sK9CjpNI.mjs");
|
|
1049
|
+
const runnerModule = await import("./runner-JrBz8ISs.mjs");
|
|
1041
1050
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
1042
1051
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
1043
1052
|
await runnerModule.initRunner();
|