@ls-stack/agent-eval 0.55.1 → 0.55.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-BD0D9-7k.mjs → app-NI4to6lp.mjs} +4 -4
- package/dist/apps/web/dist/assets/{index-CvsPmlHl.js → index-C7QjETk8.js} +54 -54
- package/dist/apps/web/dist/index.html +1 -1
- package/dist/bin.mjs +1 -1
- package/dist/caseChild.mjs +1 -1
- package/dist/{cli-BR3wMZMx.mjs → cli-Bu9347r1.mjs} +60 -13
- package/dist/index.d.mts +37 -24
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +2 -2
- package/dist/{runExecution-Sw38bCaq.mjs → runExecution-C31dpemR.mjs} +363 -114
- package/dist/{runOrchestration-DJsdLYeZ.mjs → runOrchestration-3RoHLW4U.mjs} +1 -1
- package/dist/{runner-72rsqJRq.mjs → runner-B4EfMn1d.mjs} +2 -2
- package/dist/{runner-dB69WsnM.mjs → runner-CTp9zHbM.mjs} +1 -1
- package/dist/{src-hBGtzWuA.mjs → src-FR60ZR_4.mjs} +2 -2
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +19 -8
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
|
|
26
26
|
rel="stylesheet"
|
|
27
27
|
/>
|
|
28
|
-
<script type="module" crossorigin src="/assets/index-
|
|
28
|
+
<script type="module" crossorigin src="/assets/index-C7QjETk8.js"></script>
|
|
29
29
|
<link rel="stylesheet" crossorigin href="/assets/index-2I-eWzVL.css">
|
|
30
30
|
</head>
|
|
31
31
|
<body>
|
package/dist/bin.mjs
CHANGED
package/dist/caseChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { I as configureEvalRunLogs, Pt as runWithEvalRegistry, St as resolveLlmCallsConfig, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as createFsCacheStore, xt as resolveApiCallsConfig } from "./runExecution-
|
|
1
|
+
import { I as configureEvalRunLogs, Pt as runWithEvalRegistry, St as resolveLlmCallsConfig, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as createFsCacheStore, xt as resolveApiCallsConfig } from "./runExecution-C31dpemR.mjs";
|
|
2
2
|
//#region ../runner/src/caseChild.ts
|
|
3
3
|
let fatalErrorReported = false;
|
|
4
4
|
let disconnectExpected = false;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { Ct as buildEvalKey, Nt as getEvalRegistry, St as resolveLlmCallsConfig, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as normalizeScoreDef, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as createFsCacheStore, wt as getCaseRowCaseKey, xt as resolveApiCallsConfig } from "./runExecution-
|
|
2
|
-
import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-
|
|
1
|
+
import { Ct as buildEvalKey, Nt as getEvalRegistry, St as resolveLlmCallsConfig, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as normalizeScoreDef, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as createFsCacheStore, wt as getCaseRowCaseKey, xt as resolveApiCallsConfig } from "./runExecution-C31dpemR.mjs";
|
|
2
|
+
import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-3RoHLW4U.mjs";
|
|
3
3
|
import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
4
4
|
import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
|
|
5
5
|
import { createHash, randomUUID } from "node:crypto";
|
|
@@ -794,6 +794,7 @@ async function markRunErrored(runState, message, managerContext) {
|
|
|
794
794
|
payload: { message }
|
|
795
795
|
});
|
|
796
796
|
managerContext.emitDiscoveryEvent();
|
|
797
|
+
managerContext.onRunTerminal();
|
|
797
798
|
}
|
|
798
799
|
async function markRunTerminalFromChild(runState, event, managerContext) {
|
|
799
800
|
const snapshot = await loadPersistedRunSnapshot(runState.runDir);
|
|
@@ -815,6 +816,7 @@ async function markRunTerminalFromChild(runState, event, managerContext) {
|
|
|
815
816
|
}
|
|
816
817
|
managerContext.emitEvent(runState, event);
|
|
817
818
|
managerContext.emitDiscoveryEvent();
|
|
819
|
+
managerContext.onRunTerminal();
|
|
818
820
|
}
|
|
819
821
|
//#endregion
|
|
820
822
|
//#region ../runner/src/runnerStateHydration.ts
|
|
@@ -883,6 +885,7 @@ function getWatchRootsForIncludePatterns(params) {
|
|
|
883
885
|
}
|
|
884
886
|
//#endregion
|
|
885
887
|
//#region ../runner/src/runner.ts
|
|
888
|
+
const defaultCachePruneIdleDelayMs = 5e3;
|
|
886
889
|
/** Create an in-memory eval runner bound to the current workspace config. */
|
|
887
890
|
function createRunner({ watchForChanges = true } = {}) {
|
|
888
891
|
let config;
|
|
@@ -903,6 +906,7 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
903
906
|
let runHistoryWatcher;
|
|
904
907
|
let discoveryRefreshTimer;
|
|
905
908
|
let runHistoryRefreshTimer;
|
|
909
|
+
let cachePruneIdleTimer;
|
|
906
910
|
let registryLoadCounter = 0;
|
|
907
911
|
const configReload = createConfigReloadController({
|
|
908
912
|
getActiveRunCount,
|
|
@@ -942,6 +946,9 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
942
946
|
async clearCache(filter) {
|
|
943
947
|
await cacheStore.clear(filter);
|
|
944
948
|
},
|
|
949
|
+
async repairCache() {
|
|
950
|
+
return cacheStore.repair();
|
|
951
|
+
},
|
|
945
952
|
async recomputeStatusesForEval(evalKey) {
|
|
946
953
|
const evalMeta = resolveEvalMeta(evalKey);
|
|
947
954
|
if (!evalMeta) return { updatedRuns: 0 };
|
|
@@ -1252,6 +1259,7 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
1252
1259
|
emitDiscoveryEvent();
|
|
1253
1260
|
},
|
|
1254
1261
|
async startRun(request) {
|
|
1262
|
+
cancelCacheRetentionPrune();
|
|
1255
1263
|
const tagsFilterError = validateTagsFilters(request.target.tagsFilter);
|
|
1256
1264
|
if (tagsFilterError !== null) throw new Error(tagsFilterError);
|
|
1257
1265
|
const deletedTemporaryRuns = await deleteTemporaryRuns({
|
|
@@ -1349,7 +1357,8 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
1349
1357
|
workspaceRoot,
|
|
1350
1358
|
evals,
|
|
1351
1359
|
emitEvent,
|
|
1352
|
-
emitDiscoveryEvent
|
|
1360
|
+
emitDiscoveryEvent,
|
|
1361
|
+
onRunTerminal: scheduleCacheRetentionPrune
|
|
1353
1362
|
}
|
|
1354
1363
|
});
|
|
1355
1364
|
if (deletedTemporaryRuns > 0) emitDiscoveryEvent();
|
|
@@ -1398,6 +1407,7 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
1398
1407
|
payload: run.summary
|
|
1399
1408
|
});
|
|
1400
1409
|
emitDiscoveryEvent();
|
|
1410
|
+
scheduleCacheRetentionPrune();
|
|
1401
1411
|
},
|
|
1402
1412
|
getCaseDetail(runId, caseId) {
|
|
1403
1413
|
const run = runs.get(runId);
|
|
@@ -1419,6 +1429,7 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
1419
1429
|
};
|
|
1420
1430
|
},
|
|
1421
1431
|
async close() {
|
|
1432
|
+
cancelCacheRetentionPrune();
|
|
1422
1433
|
await Promise.all([closeWatchers(), configReload.close()]);
|
|
1423
1434
|
},
|
|
1424
1435
|
getWorkspaceRoot() {
|
|
@@ -1522,6 +1533,28 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
1522
1533
|
function getActiveRunCount() {
|
|
1523
1534
|
return [...runs.values()].filter((run) => run.manifest.status === "running").length;
|
|
1524
1535
|
}
|
|
1536
|
+
function getCachePruneIdleDelayMs() {
|
|
1537
|
+
const configured = config.cache?.pruneIdleDelayMs;
|
|
1538
|
+
if (configured === void 0 || !Number.isFinite(configured) || configured <= 0) return defaultCachePruneIdleDelayMs;
|
|
1539
|
+
return Math.floor(configured);
|
|
1540
|
+
}
|
|
1541
|
+
function cancelCacheRetentionPrune() {
|
|
1542
|
+
if (cachePruneIdleTimer === void 0) return;
|
|
1543
|
+
clearTimeout(cachePruneIdleTimer);
|
|
1544
|
+
cachePruneIdleTimer = void 0;
|
|
1545
|
+
}
|
|
1546
|
+
function scheduleCacheRetentionPrune() {
|
|
1547
|
+
cancelCacheRetentionPrune();
|
|
1548
|
+
cachePruneIdleTimer = setTimeout(() => {
|
|
1549
|
+
cachePruneIdleTimer = void 0;
|
|
1550
|
+
pruneCacheRetentionIfIdle();
|
|
1551
|
+
}, getCachePruneIdleDelayMs());
|
|
1552
|
+
cachePruneIdleTimer.unref();
|
|
1553
|
+
}
|
|
1554
|
+
async function pruneCacheRetentionIfIdle() {
|
|
1555
|
+
if (getActiveRunCount() > 0) return;
|
|
1556
|
+
await cacheStore.pruneRetention();
|
|
1557
|
+
}
|
|
1525
1558
|
function emitDiscoveryEvent() {
|
|
1526
1559
|
const lastRunStatuses = getLastRunStatuses({
|
|
1527
1560
|
runs: runs.values(),
|
|
@@ -1657,19 +1690,20 @@ Flags:
|
|
|
1657
1690
|
`);
|
|
1658
1691
|
return;
|
|
1659
1692
|
}
|
|
1660
|
-
if (topic === "cache" || topic === "cache list" || topic === "cache clear") {
|
|
1693
|
+
if (topic === "cache" || topic === "cache list" || topic === "cache clear" || topic === "cache repair") {
|
|
1661
1694
|
console.info(`
|
|
1662
|
-
agent-evals cache - Manage cached
|
|
1695
|
+
agent-evals cache - Manage cached namespace/key entries
|
|
1663
1696
|
|
|
1664
1697
|
Usage:
|
|
1665
1698
|
agent-evals cache list [flags]
|
|
1666
1699
|
agent-evals cache clear --eval <id>
|
|
1667
1700
|
agent-evals cache clear --all
|
|
1701
|
+
agent-evals cache repair [flags]
|
|
1668
1702
|
|
|
1669
1703
|
Flags:
|
|
1670
1704
|
--eval <id> Clear entries for specific eval(s) (comma-separated)
|
|
1671
1705
|
--all Confirm clearing every cached entry
|
|
1672
|
-
--json Output cache listing as JSON
|
|
1706
|
+
--json Output cache listing or repair summary as JSON
|
|
1673
1707
|
--no-env Disable automatic .env loading
|
|
1674
1708
|
--help, -h Show this help
|
|
1675
1709
|
`);
|
|
@@ -1683,9 +1717,10 @@ Commands:
|
|
|
1683
1717
|
list List discovered evals
|
|
1684
1718
|
run Run evals
|
|
1685
1719
|
show-runs [id|latest] Show saved run artifact file paths
|
|
1686
|
-
cache list List cached
|
|
1720
|
+
cache list List cached namespace/key entries
|
|
1687
1721
|
cache clear --eval <id> Clear cache entries for one eval
|
|
1688
1722
|
cache clear --all Clear every cached entry
|
|
1723
|
+
cache repair Remove unindexed/orphaned cache files
|
|
1689
1724
|
help Show this help
|
|
1690
1725
|
|
|
1691
1726
|
Options:
|
|
@@ -1961,7 +1996,7 @@ function parseArgs(argv) {
|
|
|
1961
1996
|
let cursor = 1;
|
|
1962
1997
|
if (args.command === "cache") {
|
|
1963
1998
|
const sub = normalizedArgv[cursor];
|
|
1964
|
-
if (sub === "list" || sub === "clear") {
|
|
1999
|
+
if (sub === "list" || sub === "clear" || sub === "repair") {
|
|
1965
2000
|
args.subcommand = sub;
|
|
1966
2001
|
args.helpTopic = `cache ${sub}`;
|
|
1967
2002
|
cursor++;
|
|
@@ -2136,8 +2171,8 @@ async function commandApp(args) {
|
|
|
2136
2171
|
const { serve } = await import("@hono/node-server");
|
|
2137
2172
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
2138
2173
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
2139
|
-
const appModule = await import("./app-
|
|
2140
|
-
const runnerModule = await import("./runner-
|
|
2174
|
+
const appModule = await import("./app-NI4to6lp.mjs");
|
|
2175
|
+
const runnerModule = await import("./runner-CTp9zHbM.mjs");
|
|
2141
2176
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
2142
2177
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
2143
2178
|
await runnerModule.initRunner();
|
|
@@ -2311,10 +2346,8 @@ async function commandCache(args) {
|
|
|
2311
2346
|
for (const entry of entries) {
|
|
2312
2347
|
console.info(` ${entry.namespace}`);
|
|
2313
2348
|
console.info(` key: ${entry.key}`);
|
|
2314
|
-
const operationLabel = entry.operationType === "span" ? `${entry.operationName} (span ${entry.spanKind ?? "unknown"})` : `${entry.operationName} (value)`;
|
|
2315
|
-
console.info(` operation: ${operationLabel}`);
|
|
2316
2349
|
console.info(` stored: ${entry.storedAt}`);
|
|
2317
|
-
console.info(`
|
|
2350
|
+
console.info(` last accessed: ${entry.lastAccessedAt}`);
|
|
2318
2351
|
console.info("");
|
|
2319
2352
|
}
|
|
2320
2353
|
return;
|
|
@@ -2342,6 +2375,20 @@ async function commandCache(args) {
|
|
|
2342
2375
|
console.info("Refusing to clear cache without --eval <id> or --all. Use one of these flags to confirm.");
|
|
2343
2376
|
process.exit(1);
|
|
2344
2377
|
}
|
|
2378
|
+
if (args.subcommand === "repair") {
|
|
2379
|
+
const summary = await runner.repairCache();
|
|
2380
|
+
if (args.json) {
|
|
2381
|
+
console.info(JSON.stringify(summary, null, 2));
|
|
2382
|
+
return;
|
|
2383
|
+
}
|
|
2384
|
+
console.info("Cache repair complete.");
|
|
2385
|
+
console.info(`Removed cache files: ${String(summary.removedCacheFiles)}`);
|
|
2386
|
+
console.info(`Removed debug files: ${String(summary.removedDebugFiles)}`);
|
|
2387
|
+
console.info(`Removed blob files: ${String(summary.removedBlobFiles)}`);
|
|
2388
|
+
console.info(`Removed index rows: ${String(summary.removedIndexRows)}`);
|
|
2389
|
+
console.info(`Rewritten indexes: ${String(summary.rewrittenIndexes)}`);
|
|
2390
|
+
return;
|
|
2391
|
+
}
|
|
2345
2392
|
printHelp(args.helpTopic);
|
|
2346
2393
|
}
|
|
2347
2394
|
function getSortedRunSnapshots(runner) {
|
package/dist/index.d.mts
CHANGED
|
@@ -1399,7 +1399,7 @@ type EvalLogLevelInput = RunLogLevel$1 | 'warning';
|
|
|
1399
1399
|
declare class EvalAssertionError extends Error {
|
|
1400
1400
|
constructor(message: string);
|
|
1401
1401
|
}
|
|
1402
|
-
/** Return the
|
|
1402
|
+
/** Return the host process clock, bypassing the eval Date shim. */
|
|
1403
1403
|
/**
|
|
1404
1404
|
* Eval time helpers for reading and moving the active eval clock.
|
|
1405
1405
|
*
|
|
@@ -4042,7 +4042,13 @@ type AgentEvalsConfig$1 = {
|
|
|
4042
4042
|
* Exact namespace-specific retention caps. Values override
|
|
4043
4043
|
* `maxEntriesPerNamespace` for matching namespaces.
|
|
4044
4044
|
*/
|
|
4045
|
-
maxEntriesByNamespace?: Record<string, number>;
|
|
4045
|
+
maxEntriesByNamespace?: Record<string, number>;
|
|
4046
|
+
/**
|
|
4047
|
+
* Milliseconds the runner waits after becoming idle before pruning indexed
|
|
4048
|
+
* cache entries. Defaults to `5000`; non-positive or non-finite values use
|
|
4049
|
+
* the default.
|
|
4050
|
+
*/
|
|
4051
|
+
pruneIdleDelayMs?: number; /** Legacy alias for `maxEntriesPerNamespace`, retained so older config files keep working. */
|
|
4046
4052
|
maxEntriesPerEval?: number;
|
|
4047
4053
|
};
|
|
4048
4054
|
};
|
|
@@ -4304,22 +4310,25 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
|
|
|
4304
4310
|
}, z$1.core.$strip>;
|
|
4305
4311
|
/** Reference to a value-cache lookup performed via `evalTracer.cache(...)`. */
|
|
4306
4312
|
type TraceCacheRef$1 = z$1.infer<typeof traceCacheRefSchema>;
|
|
4307
|
-
/**
|
|
4313
|
+
/** Minimal index-backed summary of a persisted cache entry. */
|
|
4308
4314
|
declare const cacheListItemSchema$1: z$1.ZodObject<{
|
|
4309
4315
|
key: z$1.ZodString;
|
|
4310
4316
|
namespace: z$1.ZodString;
|
|
4311
|
-
operationType: z$1.ZodEnum<{
|
|
4312
|
-
value: "value";
|
|
4313
|
-
span: "span";
|
|
4314
|
-
}>;
|
|
4315
|
-
operationName: z$1.ZodString;
|
|
4316
|
-
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
4317
|
-
spanKind: z$1.ZodOptional<z$1.ZodString>;
|
|
4318
4317
|
storedAt: z$1.ZodString;
|
|
4319
|
-
|
|
4318
|
+
lastAccessedAt: z$1.ZodString;
|
|
4320
4319
|
}, z$1.core.$strip>;
|
|
4321
|
-
/**
|
|
4320
|
+
/** Minimal summary row for a single cache entry. */
|
|
4322
4321
|
type CacheListItem = z$1.infer<typeof cacheListItemSchema$1>;
|
|
4322
|
+
/** Summary of cleanup performed by manual cache repair. */
|
|
4323
|
+
declare const cacheRepairSummarySchema$1: z$1.ZodObject<{
|
|
4324
|
+
removedCacheFiles: z$1.ZodNumber;
|
|
4325
|
+
removedDebugFiles: z$1.ZodNumber;
|
|
4326
|
+
removedBlobFiles: z$1.ZodNumber;
|
|
4327
|
+
removedIndexRows: z$1.ZodNumber;
|
|
4328
|
+
rewrittenIndexes: z$1.ZodNumber;
|
|
4329
|
+
}, z$1.core.$strip>;
|
|
4330
|
+
/** Stable JSON summary returned by manual cache repair. */
|
|
4331
|
+
type CacheRepairSummary = z$1.infer<typeof cacheRepairSummarySchema$1>;
|
|
4323
4332
|
/** Serialized nested span captured while recording a cached operation. */
|
|
4324
4333
|
type SerializedCacheSpan = {
|
|
4325
4334
|
kind: string;
|
|
@@ -6097,22 +6106,25 @@ type ResolvedLlmCallCostCurrency$1 = {
|
|
|
6097
6106
|
numberFormat?: NumberDisplayOptions$1;
|
|
6098
6107
|
}; //#endregion
|
|
6099
6108
|
//#region ../shared/src/schemas/cache.d.ts
|
|
6100
|
-
/**
|
|
6109
|
+
/** Minimal index-backed summary of a persisted cache entry. */
|
|
6101
6110
|
declare const cacheListItemSchema: z$1.ZodObject<{
|
|
6102
6111
|
key: z$1.ZodString;
|
|
6103
6112
|
namespace: z$1.ZodString;
|
|
6104
|
-
operationType: z$1.ZodEnum<{
|
|
6105
|
-
span: "span";
|
|
6106
|
-
value: "value";
|
|
6107
|
-
}>;
|
|
6108
|
-
operationName: z$1.ZodString;
|
|
6109
|
-
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
6110
|
-
spanKind: z$1.ZodOptional<z$1.ZodString>;
|
|
6111
6113
|
storedAt: z$1.ZodString;
|
|
6112
|
-
|
|
6114
|
+
lastAccessedAt: z$1.ZodString;
|
|
6113
6115
|
}, z$1.core.$strip>;
|
|
6114
|
-
/**
|
|
6116
|
+
/** Minimal summary row for a single cache entry. */
|
|
6115
6117
|
type CacheListItem$1 = z$1.infer<typeof cacheListItemSchema>;
|
|
6118
|
+
/** Summary of cleanup performed by manual cache repair. */
|
|
6119
|
+
declare const cacheRepairSummarySchema: z$1.ZodObject<{
|
|
6120
|
+
removedCacheFiles: z$1.ZodNumber;
|
|
6121
|
+
removedDebugFiles: z$1.ZodNumber;
|
|
6122
|
+
removedBlobFiles: z$1.ZodNumber;
|
|
6123
|
+
removedIndexRows: z$1.ZodNumber;
|
|
6124
|
+
rewrittenIndexes: z$1.ZodNumber;
|
|
6125
|
+
}, z$1.core.$strip>;
|
|
6126
|
+
/** Stable JSON summary returned by manual cache repair. */
|
|
6127
|
+
type CacheRepairSummary$1 = z$1.infer<typeof cacheRepairSummarySchema>;
|
|
6116
6128
|
/** Serialized nested span captured while recording a cached operation. */
|
|
6117
6129
|
type SerializedCacheSpan$1 = {
|
|
6118
6130
|
kind: string;
|
|
@@ -6454,7 +6466,8 @@ type EvalRunner = {
|
|
|
6454
6466
|
* Remove cache entries matching `filter`, or all entries when no filter is
|
|
6455
6467
|
* supplied.
|
|
6456
6468
|
*/
|
|
6457
|
-
clearCache(filter?: CacheClearFilter): Promise<void>;
|
|
6469
|
+
clearCache(filter?: CacheClearFilter): Promise<void>; /** Remove cache/debug/blob files that are not referenced by cache indexes. */
|
|
6470
|
+
repairCache(): Promise<CacheRepairSummary$1>;
|
|
6458
6471
|
/**
|
|
6459
6472
|
* Recompute persisted case and run statuses for terminal runs touching one
|
|
6460
6473
|
* eval. Accepts the exact eval key.
|
|
@@ -6635,4 +6648,4 @@ declare function defineEval<TInput = unknown, TOutputs extends EvalOutputs = Eva
|
|
|
6635
6648
|
/** Return whether the active eval case has tags matching the typed input. */
|
|
6636
6649
|
declare function matchesEvalTags(input: EvalTagMatchInput): boolean;
|
|
6637
6650
|
//#endregion
|
|
6638
|
-
export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalTraceTree, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
|
|
6651
|
+
export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheRepairSummary, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalTraceTree, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import { n as matchesEvalTags, t as defineEval } from "./src-
|
|
1
|
+
import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-C31dpemR.mjs";
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Bu9347r1.mjs";
|
|
3
|
+
import { n as matchesEvalTags, t as defineEval } from "./src-FR60ZR_4.mjs";
|
|
4
4
|
export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-
|
|
2
|
-
import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-
|
|
1
|
+
import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-C31dpemR.mjs";
|
|
2
|
+
import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-3RoHLW4U.mjs";
|
|
3
3
|
import { z } from "zod/v4";
|
|
4
4
|
import { readFile } from "node:fs/promises";
|
|
5
5
|
import { relative } from "node:path";
|