@ls-stack/agent-eval 0.60.3 → 0.61.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-l3ynaNsb.mjs → app-Dm_9ZTVa.mjs} +4 -4
- package/dist/apps/web/dist/assets/index-CM_zUhl_.css +1 -0
- package/dist/apps/web/dist/assets/{index-CM6MDNqo.js → index-DxZsizjg.js} +76 -76
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/caseChild.mjs +1 -1
- package/dist/{cli-BSVUCUxr.mjs → cli-CPBIcMP-.mjs} +11 -26
- package/dist/index.d.mts +99 -90
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +2 -2
- package/dist/{runExecution-Bq0Y3y_1.mjs → runExecution-D-CnSRYy.mjs} +18 -2
- package/dist/{runOrchestration-C7qQISz2.mjs → runOrchestration-Basvyp4u.mjs} +1 -1
- package/dist/{runner-C9xNJHt3.mjs → runner-B6UT1K7L.mjs} +1 -1
- package/dist/{runner-DmkSq-QG.mjs → runner-DwNb5TCb.mjs} +2 -2
- package/dist/{src-D5vGo2iv.mjs → src-SixIk0b7.mjs} +2 -2
- package/package.json +1 -1
- package/skills/agent-eval/SKILL.md +76 -432
- package/dist/apps/web/dist/assets/index-CqWfzcFb.css +0 -1
|
@@ -25,8 +25,8 @@
|
|
|
25
25
|
href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
|
|
26
26
|
rel="stylesheet"
|
|
27
27
|
/>
|
|
28
|
-
<script type="module" crossorigin src="/assets/index-
|
|
29
|
-
<link rel="stylesheet" crossorigin href="/assets/index-
|
|
28
|
+
<script type="module" crossorigin src="/assets/index-DxZsizjg.js"></script>
|
|
29
|
+
<link rel="stylesheet" crossorigin href="/assets/index-CM_zUhl_.css">
|
|
30
30
|
</head>
|
|
31
31
|
<body>
|
|
32
32
|
<div id="root"></div>
|
package/dist/bin.mjs
CHANGED
package/dist/caseChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { Ft as runWithEvalRegistry, I as configureEvalRunLogs, St as resolveLlmCallsConfig, _ as createFsCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, g as createBufferedCacheStore, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as getCacheRetentionOptions, xt as resolveApiCallsConfig } from "./runExecution-D-CnSRYy.mjs";
|
|
2
2
|
//#region ../runner/src/caseChild.ts
|
|
3
3
|
let fatalErrorReported = false;
|
|
4
4
|
let disconnectExpected = false;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { C as parseEvalDiscovery, S as loadIsolatedEvalRegistry, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as parseManualInputValues, c as getLatestRunInfos, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, l as loadPersistedCaseDetail, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as generateRunId, p as persistCaseDetail, s as getLastRunStatuses, u as loadPersistedRunSnapshot, v as runTouchesEval, w as validateCharts, x as deriveEvalFreshness, y as buildManualInputDescriptor } from "./runOrchestration-
|
|
1
|
+
import { Dt as caseRowSchema, Pt as getEvalRegistry, St as resolveLlmCallsConfig, Tt as getCaseRowCaseKey, _ as createFsCacheStore, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as getCacheRetentionOptions, wt as buildEvalKey, xt as resolveApiCallsConfig } from "./runExecution-D-CnSRYy.mjs";
|
|
2
|
+
import { C as parseEvalDiscovery, S as loadIsolatedEvalRegistry, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as parseManualInputValues, c as getLatestRunInfos, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, l as loadPersistedCaseDetail, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as generateRunId, p as persistCaseDetail, s as getLastRunStatuses, u as loadPersistedRunSnapshot, v as runTouchesEval, w as validateCharts, x as deriveEvalFreshness, y as buildManualInputDescriptor } from "./runOrchestration-Basvyp4u.mjs";
|
|
3
3
|
import { parseEnv } from "node:util";
|
|
4
4
|
import { resultify } from "t-result";
|
|
5
5
|
import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
@@ -973,7 +973,6 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
|
|
|
973
973
|
let discoveryRefreshTimer;
|
|
974
974
|
let runHistoryRefreshTimer;
|
|
975
975
|
let cachePruneIdleTimer;
|
|
976
|
-
let registryLoadCounter = 0;
|
|
977
976
|
const configReload = createConfigReloadController({
|
|
978
977
|
getActiveRunCount,
|
|
979
978
|
closeRunnerWatchers: closeWatchers,
|
|
@@ -1019,9 +1018,8 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
|
|
|
1019
1018
|
if (caseRow === void 0) return void 0;
|
|
1020
1019
|
return hydrateCaseDetailForRow(run, caseRow);
|
|
1021
1020
|
}
|
|
1022
|
-
function
|
|
1023
|
-
|
|
1024
|
-
return `${prefix}:${String(registryLoadCounter)}:${filePath}`;
|
|
1021
|
+
function getDiscoveryModuleIsolationKey(filePath) {
|
|
1022
|
+
return `discovery:${filePath}`;
|
|
1025
1023
|
}
|
|
1026
1024
|
const runner = {
|
|
1027
1025
|
async init() {
|
|
@@ -1042,24 +1040,11 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
|
|
|
1042
1040
|
async recomputeStatusesForEval(evalKey) {
|
|
1043
1041
|
const evalMeta = resolveEvalMeta(evalKey);
|
|
1044
1042
|
if (!evalMeta) return { updatedRuns: 0 };
|
|
1045
|
-
const entry = (await loadIsolatedEvalRegistry({
|
|
1046
|
-
evalFilePath: evalMeta.sourceFilePath,
|
|
1047
|
-
sourceFingerprint: evalMeta.sourceFingerprint ?? void 0,
|
|
1048
|
-
moduleIsolation: {
|
|
1049
|
-
key: nextRegistryLoadIsolationKey("recompute-status", evalMeta.sourceFilePath),
|
|
1050
|
-
workspaceRoot
|
|
1051
|
-
},
|
|
1052
|
-
runtimeScope: "env"
|
|
1053
|
-
})).get(evalMeta.id);
|
|
1054
|
-
if (!entry) return { updatedRuns: 0 };
|
|
1055
1043
|
const scoreThresholds = /* @__PURE__ */ new Map();
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
}
|
|
1061
|
-
for (const [key, def] of Object.entries(evalDef.manualScores ?? {})) if (def.passThreshold !== void 0) scoreThresholds.set(key, def.passThreshold);
|
|
1062
|
-
});
|
|
1044
|
+
for (const columnDef of evalMeta.columnDefs) {
|
|
1045
|
+
if (columnDef.isScore !== true || columnDef.passThreshold === void 0) continue;
|
|
1046
|
+
scoreThresholds.set(columnDef.key, columnDef.passThreshold);
|
|
1047
|
+
}
|
|
1063
1048
|
const updatedRuns = await recomputeEvalStatusesInRuns({
|
|
1064
1049
|
runs: runs.values(),
|
|
1065
1050
|
evalKey: evalMeta.key,
|
|
@@ -1264,7 +1249,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
|
|
|
1264
1249
|
evalFilePath: filePath,
|
|
1265
1250
|
sourceFingerprint,
|
|
1266
1251
|
moduleIsolation: {
|
|
1267
|
-
key:
|
|
1252
|
+
key: getDiscoveryModuleIsolationKey(filePath),
|
|
1268
1253
|
workspaceRoot
|
|
1269
1254
|
},
|
|
1270
1255
|
runtimeScope: "env"
|
|
@@ -2258,8 +2243,8 @@ async function commandApp(args) {
|
|
|
2258
2243
|
const { serve } = await import("@hono/node-server");
|
|
2259
2244
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
2260
2245
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
2261
|
-
const appModule = await import("./app-
|
|
2262
|
-
const runnerModule = await import("./runner-
|
|
2246
|
+
const appModule = await import("./app-Dm_9ZTVa.mjs");
|
|
2247
|
+
const runnerModule = await import("./runner-B6UT1K7L.mjs");
|
|
2263
2248
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
2264
2249
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
2265
2250
|
await runnerModule.initRunner({ loadEnv: args.loadEnv });
|