@ls-stack/agent-eval 0.16.1 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-B8e-oWYc.mjs → app-hAlVvT-Q.mjs} +23 -4
- package/dist/apps/web/dist/assets/index-BovjyzD8.css +1 -0
- package/dist/apps/web/dist/assets/index-BxcwUS7V.js +118 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-BmrtjQj_.mjs → cli-3zANEAhG.mjs} +64 -13
- package/dist/index.d.mts +311 -20
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +2 -1
- package/dist/{runOrchestration-BDyNrRQT.mjs → runOrchestration-BBg_VUH5.mjs} +937 -1842
- package/dist/{runner-DABFPXkx.mjs → runner-DxlahWDo.mjs} +1 -1
- package/dist/{runner-CsZqhbiA.mjs → runner-RmZPRz-h.mjs} +2 -2
- package/dist/src-BC4OrajN.mjs +3 -0
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +19 -4
- package/dist/apps/web/dist/assets/index-BZ1TdyEg.js +0 -117
- package/dist/apps/web/dist/assets/index-MARPw1bH.css +0 -1
- package/dist/src-CEAJYN_X.mjs +0 -3
|
@@ -25,8 +25,8 @@
|
|
|
25
25
|
href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
|
|
26
26
|
rel="stylesheet"
|
|
27
27
|
/>
|
|
28
|
-
<script type="module" crossorigin src="/assets/index-
|
|
29
|
-
<link rel="stylesheet" crossorigin href="/assets/index-
|
|
28
|
+
<script type="module" crossorigin src="/assets/index-BxcwUS7V.js"></script>
|
|
29
|
+
<link rel="stylesheet" crossorigin href="/assets/index-BovjyzD8.css">
|
|
30
30
|
</head>
|
|
31
31
|
<body>
|
|
32
32
|
<div id="root"></div>
|
package/dist/bin.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { An as getEvalRegistry, L as runSummarySchema, M as getEvalDisplayStatus, N as deriveScopedSummaryFromCases, S as createFsCacheStore, X as resolveLlmCallsConfig, Y as resolveApiCallsConfig, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as buildDeclaredColumnDefs, c as persistCaseDetail, d as recomputePersistedCaseStatus, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, j as getEvalTitle, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, s as nextShortIdFromSnapshots, u as recomputeEvalStatusesInRuns, v as parseEvalMetas, x as normalizeScoreDef, y as loadConfig } from "./runOrchestration-BBg_VUH5.mjs";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
4
4
|
import { dirname, join, relative, resolve } from "node:path";
|
|
@@ -346,7 +346,9 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
346
346
|
const discoveryListeners = /* @__PURE__ */ new Set();
|
|
347
347
|
let nextShortIdNum = 0;
|
|
348
348
|
let discoveryWatcher;
|
|
349
|
+
let runHistoryWatcher;
|
|
349
350
|
let discoveryRefreshTimer;
|
|
351
|
+
let runHistoryRefreshTimer;
|
|
350
352
|
function toWorkspaceRelativePath(filePath) {
|
|
351
353
|
return relative(workspaceRoot, filePath).replaceAll("\\", "/");
|
|
352
354
|
}
|
|
@@ -379,7 +381,7 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
379
381
|
return cacheStore.list();
|
|
380
382
|
},
|
|
381
383
|
async getCacheEntry(namespace, key) {
|
|
382
|
-
return cacheStore.
|
|
384
|
+
return cacheStore.lookupWithDebug(namespace, key);
|
|
383
385
|
},
|
|
384
386
|
async clearCache(filter) {
|
|
385
387
|
await cacheStore.clear(filter);
|
|
@@ -722,10 +724,14 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
722
724
|
clearTimeout(discoveryRefreshTimer);
|
|
723
725
|
discoveryRefreshTimer = void 0;
|
|
724
726
|
}
|
|
725
|
-
|
|
726
|
-
|
|
727
|
+
if (runHistoryRefreshTimer !== void 0) {
|
|
728
|
+
clearTimeout(runHistoryRefreshTimer);
|
|
729
|
+
runHistoryRefreshTimer = void 0;
|
|
730
|
+
}
|
|
731
|
+
const watchers = [discoveryWatcher, runHistoryWatcher].filter((watcher) => watcher !== void 0);
|
|
727
732
|
discoveryWatcher = void 0;
|
|
728
|
-
|
|
733
|
+
runHistoryWatcher = void 0;
|
|
734
|
+
await Promise.all(watchers.map((watcher) => watcher.close()));
|
|
729
735
|
},
|
|
730
736
|
getWorkspaceRoot() {
|
|
731
737
|
return workspaceRoot;
|
|
@@ -764,6 +770,29 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
764
770
|
watcher.on("unlink", scheduleRefresh);
|
|
765
771
|
watcher.on("addDir", scheduleRefresh);
|
|
766
772
|
watcher.on("unlinkDir", scheduleRefresh);
|
|
773
|
+
await setupRunHistoryWatcher();
|
|
774
|
+
await new Promise((ready) => {
|
|
775
|
+
watcher.once("ready", ready);
|
|
776
|
+
});
|
|
777
|
+
}
|
|
778
|
+
async function setupRunHistoryWatcher() {
|
|
779
|
+
const watcher = watch(join(localStateDir, "runs"), {
|
|
780
|
+
ignoreInitial: true,
|
|
781
|
+
persistent: true
|
|
782
|
+
});
|
|
783
|
+
runHistoryWatcher = watcher;
|
|
784
|
+
const scheduleRefresh = () => {
|
|
785
|
+
if (runHistoryRefreshTimer !== void 0) clearTimeout(runHistoryRefreshTimer);
|
|
786
|
+
runHistoryRefreshTimer = setTimeout(() => {
|
|
787
|
+
runHistoryRefreshTimer = void 0;
|
|
788
|
+
refreshPersistedRunsFromDisk();
|
|
789
|
+
}, 50);
|
|
790
|
+
};
|
|
791
|
+
watcher.on("change", scheduleRefresh);
|
|
792
|
+
watcher.on("add", scheduleRefresh);
|
|
793
|
+
watcher.on("unlink", scheduleRefresh);
|
|
794
|
+
watcher.on("addDir", scheduleRefresh);
|
|
795
|
+
watcher.on("unlinkDir", scheduleRefresh);
|
|
767
796
|
await new Promise((ready) => {
|
|
768
797
|
watcher.once("ready", ready);
|
|
769
798
|
});
|
|
@@ -797,12 +826,34 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
797
826
|
runs.clear();
|
|
798
827
|
const persistedRuns = await loadPersistedRunSnapshots(localStateDir);
|
|
799
828
|
nextShortIdNum = nextShortIdFromSnapshots(persistedRuns);
|
|
800
|
-
for (const persistedRun of persistedRuns) runs.set(persistedRun.manifest.id,
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
829
|
+
for (const persistedRun of persistedRuns) runs.set(persistedRun.manifest.id, toRunnerRunState(persistedRun));
|
|
830
|
+
}
|
|
831
|
+
async function refreshPersistedRunsFromDisk() {
|
|
832
|
+
const persistedRuns = await loadPersistedRunSnapshots(localStateDir);
|
|
833
|
+
const persistedRunIds = new Set(persistedRuns.map((snapshot) => snapshot.manifest.id));
|
|
834
|
+
let changed = false;
|
|
835
|
+
for (const persistedRun of persistedRuns) {
|
|
836
|
+
const existing = runs.get(persistedRun.manifest.id);
|
|
837
|
+
if (existing?.manifest.status === "running" && existing.childProcess) continue;
|
|
838
|
+
runs.set(persistedRun.manifest.id, toRunnerRunState(persistedRun, existing));
|
|
839
|
+
changed = true;
|
|
840
|
+
}
|
|
841
|
+
for (const [runId, existing] of [...runs]) {
|
|
842
|
+
if (persistedRunIds.has(runId)) continue;
|
|
843
|
+
if (existing.manifest.status === "running") continue;
|
|
844
|
+
runs.delete(runId);
|
|
845
|
+
changed = true;
|
|
846
|
+
}
|
|
847
|
+
nextShortIdNum = Math.max(nextShortIdNum, nextShortIdFromSnapshots(persistedRuns));
|
|
848
|
+
if (changed) emitDiscoveryEvent();
|
|
849
|
+
}
|
|
850
|
+
function toRunnerRunState(snapshot, existing) {
|
|
851
|
+
return {
|
|
852
|
+
...snapshot,
|
|
853
|
+
listeners: existing?.listeners ?? /* @__PURE__ */ new Set(),
|
|
854
|
+
childProcess: existing?.childProcess,
|
|
855
|
+
childTerminalReceived: existing?.childTerminalReceived ?? false
|
|
856
|
+
};
|
|
806
857
|
}
|
|
807
858
|
return runner;
|
|
808
859
|
}
|
|
@@ -978,8 +1029,8 @@ async function commandApp(args) {
|
|
|
978
1029
|
const { serve } = await import("@hono/node-server");
|
|
979
1030
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
980
1031
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
981
|
-
const appModule = await import("./app-
|
|
982
|
-
const runnerModule = await import("./runner-
|
|
1032
|
+
const appModule = await import("./app-hAlVvT-Q.mjs");
|
|
1033
|
+
const runnerModule = await import("./runner-DxlahWDo.mjs");
|
|
983
1034
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
984
1035
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
985
1036
|
await runnerModule.initRunner();
|
package/dist/index.d.mts
CHANGED
|
@@ -666,6 +666,59 @@ declare const assertionFailureSchema: z$1.ZodObject<{
|
|
|
666
666
|
}, z$1.core.$strip>;
|
|
667
667
|
/** Assertion failure metadata captured for one case run. */
|
|
668
668
|
type AssertionFailure = z$1.infer<typeof assertionFailureSchema>;
|
|
669
|
+
/** Severity level for one log captured during a case run. */
|
|
670
|
+
declare const runLogLevelSchema: z$1.ZodEnum<{
|
|
671
|
+
error: "error";
|
|
672
|
+
log: "log";
|
|
673
|
+
info: "info";
|
|
674
|
+
warn: "warn";
|
|
675
|
+
}>;
|
|
676
|
+
/** Severity level for one log captured during a case run. */
|
|
677
|
+
type RunLogLevel = z$1.infer<typeof runLogLevelSchema>;
|
|
678
|
+
/** Eval runner phase that emitted a captured case log. */
|
|
679
|
+
declare const runLogPhaseSchema: z$1.ZodEnum<{
|
|
680
|
+
eval: "eval";
|
|
681
|
+
derive: "derive";
|
|
682
|
+
outputsSchema: "outputsSchema";
|
|
683
|
+
scorer: "scorer";
|
|
684
|
+
}>;
|
|
685
|
+
/** Eval runner phase that emitted a captured case log. */
|
|
686
|
+
type RunLogPhase = z$1.infer<typeof runLogPhaseSchema>;
|
|
687
|
+
/** Schema for one persisted log entry captured during a case run. */
|
|
688
|
+
declare const runLogLocationSchema: z$1.ZodObject<{
|
|
689
|
+
file: z$1.ZodString;
|
|
690
|
+
line: z$1.ZodNumber;
|
|
691
|
+
column: z$1.ZodNumber;
|
|
692
|
+
}, z$1.core.$strip>;
|
|
693
|
+
/** Best-effort source location for one captured case log. */
|
|
694
|
+
type RunLogLocation = z$1.infer<typeof runLogLocationSchema>;
|
|
695
|
+
/** Schema for one persisted log entry captured during a case run. */
|
|
696
|
+
declare const runLogEntrySchema: z$1.ZodObject<{
|
|
697
|
+
timestamp: z$1.ZodString;
|
|
698
|
+
level: z$1.ZodEnum<{
|
|
699
|
+
error: "error";
|
|
700
|
+
log: "log";
|
|
701
|
+
info: "info";
|
|
702
|
+
warn: "warn";
|
|
703
|
+
}>;
|
|
704
|
+
phase: z$1.ZodEnum<{
|
|
705
|
+
eval: "eval";
|
|
706
|
+
derive: "derive";
|
|
707
|
+
outputsSchema: "outputsSchema";
|
|
708
|
+
scorer: "scorer";
|
|
709
|
+
}>;
|
|
710
|
+
message: z$1.ZodString;
|
|
711
|
+
args: z$1.ZodDefault<z$1.ZodArray<z$1.ZodUnknown>>;
|
|
712
|
+
truncated: z$1.ZodDefault<z$1.ZodBoolean>;
|
|
713
|
+
location: z$1.ZodOptional<z$1.ZodObject<{
|
|
714
|
+
file: z$1.ZodString;
|
|
715
|
+
line: z$1.ZodNumber;
|
|
716
|
+
column: z$1.ZodNumber;
|
|
717
|
+
}, z$1.core.$strip>>;
|
|
718
|
+
source: z$1.ZodOptional<z$1.ZodString>;
|
|
719
|
+
}, z$1.core.$strip>;
|
|
720
|
+
/** Persisted log entry captured during a case run. */
|
|
721
|
+
type RunLogEntry = z$1.infer<typeof runLogEntrySchema>;
|
|
669
722
|
/** Trace payload captured while computing one score for a case. */
|
|
670
723
|
declare const scoreTraceSchema: z$1.ZodObject<{
|
|
671
724
|
trace: z$1.ZodArray<z$1.ZodObject<{
|
|
@@ -907,6 +960,30 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
907
960
|
message: string;
|
|
908
961
|
stack?: string | undefined;
|
|
909
962
|
}, string>>]>>;
|
|
963
|
+
logs: z$1.ZodDefault<z$1.ZodArray<z$1.ZodObject<{
|
|
964
|
+
timestamp: z$1.ZodString;
|
|
965
|
+
level: z$1.ZodEnum<{
|
|
966
|
+
error: "error";
|
|
967
|
+
log: "log";
|
|
968
|
+
info: "info";
|
|
969
|
+
warn: "warn";
|
|
970
|
+
}>;
|
|
971
|
+
phase: z$1.ZodEnum<{
|
|
972
|
+
eval: "eval";
|
|
973
|
+
derive: "derive";
|
|
974
|
+
outputsSchema: "outputsSchema";
|
|
975
|
+
scorer: "scorer";
|
|
976
|
+
}>;
|
|
977
|
+
message: z$1.ZodString;
|
|
978
|
+
args: z$1.ZodDefault<z$1.ZodArray<z$1.ZodUnknown>>;
|
|
979
|
+
truncated: z$1.ZodDefault<z$1.ZodBoolean>;
|
|
980
|
+
location: z$1.ZodOptional<z$1.ZodObject<{
|
|
981
|
+
file: z$1.ZodString;
|
|
982
|
+
line: z$1.ZodNumber;
|
|
983
|
+
column: z$1.ZodNumber;
|
|
984
|
+
}, z$1.core.$strip>>;
|
|
985
|
+
source: z$1.ZodOptional<z$1.ZodString>;
|
|
986
|
+
}, z$1.core.$strip>>>;
|
|
910
987
|
error: z$1.ZodNullable<z$1.ZodObject<{
|
|
911
988
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
912
989
|
message: z$1.ZodString;
|
|
@@ -1564,6 +1641,12 @@ declare const apiCallsConfigSchema: z$1.ZodObject<{
|
|
|
1564
1641
|
}, z$1.core.$strip>;
|
|
1565
1642
|
/** Authored API calls config accepted from `agent-evals.config.ts`. */
|
|
1566
1643
|
type ApiCallsConfigInput = z$1.infer<typeof apiCallsConfigSchema>;
|
|
1644
|
+
/** Schema for workspace-level run log capture options. */
|
|
1645
|
+
declare const runLogsConfigSchema: z$1.ZodObject<{
|
|
1646
|
+
captureConsole: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
1647
|
+
}, z$1.core.$strip>;
|
|
1648
|
+
/** Workspace-level run log capture options. */
|
|
1649
|
+
type RunLogsConfigInput = z$1.infer<typeof runLogsConfigSchema>;
|
|
1567
1650
|
/** Resolved LLM-calls config sent to the UI with all defaults applied. */
|
|
1568
1651
|
type ResolvedLlmCallsConfig = {
|
|
1569
1652
|
kinds: string[];
|
|
@@ -1737,6 +1820,16 @@ type AgentEvalsConfig = {
|
|
|
1737
1820
|
* ```
|
|
1738
1821
|
*/
|
|
1739
1822
|
apiCalls?: ApiCallsConfigInput;
|
|
1823
|
+
/**
|
|
1824
|
+
* Configuration for case run logs.
|
|
1825
|
+
*
|
|
1826
|
+
* Console capture is enabled by default and stores `console.log`,
|
|
1827
|
+
* `console.info`, `console.warn`, and `console.error` calls made during
|
|
1828
|
+
* active case-owned phases. Set `captureConsole: false` to keep console
|
|
1829
|
+
* output visible in the terminal without persisting it to case details.
|
|
1830
|
+
* Manual `evalLog(...)` calls are still persisted.
|
|
1831
|
+
*/
|
|
1832
|
+
runLogs?: RunLogsConfigInput;
|
|
1740
1833
|
/**
|
|
1741
1834
|
* Optional controls for the operation cache. When omitted, the cache is
|
|
1742
1835
|
* enabled and stored under `<workspaceRoot>/.agent-evals/cache`.
|
|
@@ -1872,6 +1965,9 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
1872
1965
|
}>>>;
|
|
1873
1966
|
}, z$1.core.$strip>>>;
|
|
1874
1967
|
}, z$1.core.$strip>>;
|
|
1968
|
+
runLogs: z$1.ZodOptional<z$1.ZodObject<{
|
|
1969
|
+
captureConsole: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
1970
|
+
}, z$1.core.$strip>>;
|
|
1875
1971
|
cache: z$1.ZodOptional<z$1.ZodObject<{
|
|
1876
1972
|
enabled: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
1877
1973
|
dir: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -2247,6 +2343,110 @@ declare const cacheEntrySchema: z$1.ZodObject<{
|
|
|
2247
2343
|
}, z$1.core.$strip>;
|
|
2248
2344
|
/** Persisted cache file contents. */
|
|
2249
2345
|
type CacheEntry = z$1.infer<typeof cacheEntrySchema>;
|
|
2346
|
+
/** Debug-only raw key metadata stored outside the reusable cache entry. */
|
|
2347
|
+
declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
|
|
2348
|
+
version: z$1.ZodLiteral<1>;
|
|
2349
|
+
key: z$1.ZodString;
|
|
2350
|
+
namespace: z$1.ZodString;
|
|
2351
|
+
operationType: z$1.ZodEnum<{
|
|
2352
|
+
span: "span";
|
|
2353
|
+
value: "value";
|
|
2354
|
+
}>;
|
|
2355
|
+
operationName: z$1.ZodString;
|
|
2356
|
+
storedAt: z$1.ZodString;
|
|
2357
|
+
codeFingerprint: z$1.ZodString;
|
|
2358
|
+
rawKey: z$1.ZodUnknown;
|
|
2359
|
+
}, z$1.core.$strip>;
|
|
2360
|
+
/** Debug-only raw cache key entry. May contain sensitive prompt/input data. */
|
|
2361
|
+
type CacheDebugKeyEntry = z$1.infer<typeof cacheDebugKeyEntrySchema>;
|
|
2362
|
+
/** Cache lookup response with optional debug-only raw key data. */
|
|
2363
|
+
declare const cacheEntryWithDebugKeySchema: z$1.ZodObject<{
|
|
2364
|
+
version: z$1.ZodLiteral<1>;
|
|
2365
|
+
key: z$1.ZodString;
|
|
2366
|
+
namespace: z$1.ZodString;
|
|
2367
|
+
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2368
|
+
span: "span";
|
|
2369
|
+
value: "value";
|
|
2370
|
+
}>>;
|
|
2371
|
+
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
2372
|
+
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
2373
|
+
spanKind: z$1.ZodOptional<z$1.ZodString>;
|
|
2374
|
+
storedAt: z$1.ZodString;
|
|
2375
|
+
codeFingerprint: z$1.ZodString;
|
|
2376
|
+
recording: z$1.ZodObject<{
|
|
2377
|
+
returnValue: z$1.ZodUnknown;
|
|
2378
|
+
finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
|
|
2379
|
+
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2380
|
+
error: "error";
|
|
2381
|
+
running: "running";
|
|
2382
|
+
ok: "ok";
|
|
2383
|
+
cancelled: "cancelled";
|
|
2384
|
+
}>>;
|
|
2385
|
+
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
2386
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
2387
|
+
message: z$1.ZodString;
|
|
2388
|
+
stack: z$1.ZodOptional<z$1.ZodString>;
|
|
2389
|
+
capturedAt: z$1.ZodOptional<z$1.ZodString>;
|
|
2390
|
+
}, z$1.core.$catchall<z$1.ZodUnknown>>>;
|
|
2391
|
+
finalErrors: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
2392
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
2393
|
+
message: z$1.ZodString;
|
|
2394
|
+
stack: z$1.ZodOptional<z$1.ZodString>;
|
|
2395
|
+
capturedAt: z$1.ZodOptional<z$1.ZodString>;
|
|
2396
|
+
}, z$1.core.$catchall<z$1.ZodUnknown>>>>;
|
|
2397
|
+
finalWarning: z$1.ZodOptional<z$1.ZodObject<{
|
|
2398
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
2399
|
+
message: z$1.ZodString;
|
|
2400
|
+
stack: z$1.ZodOptional<z$1.ZodString>;
|
|
2401
|
+
capturedAt: z$1.ZodOptional<z$1.ZodString>;
|
|
2402
|
+
}, z$1.core.$catchall<z$1.ZodUnknown>>>;
|
|
2403
|
+
finalWarnings: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
2404
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
2405
|
+
message: z$1.ZodString;
|
|
2406
|
+
stack: z$1.ZodOptional<z$1.ZodString>;
|
|
2407
|
+
capturedAt: z$1.ZodOptional<z$1.ZodString>;
|
|
2408
|
+
}, z$1.core.$catchall<z$1.ZodUnknown>>>>;
|
|
2409
|
+
ops: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
2410
|
+
kind: z$1.ZodLiteral<"setOutput">;
|
|
2411
|
+
key: z$1.ZodString;
|
|
2412
|
+
value: z$1.ZodUnknown;
|
|
2413
|
+
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2414
|
+
kind: z$1.ZodLiteral<"appendOutput">;
|
|
2415
|
+
key: z$1.ZodString;
|
|
2416
|
+
value: z$1.ZodUnknown;
|
|
2417
|
+
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2418
|
+
kind: z$1.ZodLiteral<"mergeOutput">;
|
|
2419
|
+
key: z$1.ZodString;
|
|
2420
|
+
patch: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
|
|
2421
|
+
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2422
|
+
kind: z$1.ZodLiteral<"incrementOutput">;
|
|
2423
|
+
key: z$1.ZodString;
|
|
2424
|
+
delta: z$1.ZodNumber;
|
|
2425
|
+
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2426
|
+
kind: z$1.ZodLiteral<"checkpoint">;
|
|
2427
|
+
name: z$1.ZodString;
|
|
2428
|
+
data: z$1.ZodUnknown;
|
|
2429
|
+
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2430
|
+
kind: z$1.ZodLiteral<"subSpan">;
|
|
2431
|
+
span: z$1.ZodType<SerializedCacheSpan, unknown, z$1.core.$ZodTypeInternals<SerializedCacheSpan, unknown>>;
|
|
2432
|
+
}, z$1.core.$strip>], "kind">>;
|
|
2433
|
+
}, z$1.core.$strip>;
|
|
2434
|
+
debugKey: z$1.ZodOptional<z$1.ZodObject<{
|
|
2435
|
+
version: z$1.ZodLiteral<1>;
|
|
2436
|
+
key: z$1.ZodString;
|
|
2437
|
+
namespace: z$1.ZodString;
|
|
2438
|
+
operationType: z$1.ZodEnum<{
|
|
2439
|
+
span: "span";
|
|
2440
|
+
value: "value";
|
|
2441
|
+
}>;
|
|
2442
|
+
operationName: z$1.ZodString;
|
|
2443
|
+
storedAt: z$1.ZodString;
|
|
2444
|
+
codeFingerprint: z$1.ZodString;
|
|
2445
|
+
rawKey: z$1.ZodUnknown;
|
|
2446
|
+
}, z$1.core.$strip>>;
|
|
2447
|
+
}, z$1.core.$strip>;
|
|
2448
|
+
/** Cache lookup response returned by cache APIs when raw-key debug data exists. */
|
|
2449
|
+
type CacheEntryWithDebugKey = z$1.infer<typeof cacheEntryWithDebugKeySchema>;
|
|
2250
2450
|
/** Persisted per-owner cache file containing multiple cache entries. */
|
|
2251
2451
|
declare const cacheFileSchema: z$1.ZodObject<{
|
|
2252
2452
|
version: z$1.ZodLiteral<1>;
|
|
@@ -2326,21 +2526,44 @@ declare const cacheFileSchema: z$1.ZodObject<{
|
|
|
2326
2526
|
}, z$1.core.$strip>;
|
|
2327
2527
|
/** Persisted per-owner cache file contents. */
|
|
2328
2528
|
type CacheFile = z$1.infer<typeof cacheFileSchema>;
|
|
2529
|
+
/** Persisted per-owner debug file containing raw cache key metadata. */
|
|
2530
|
+
declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
|
|
2531
|
+
version: z$1.ZodLiteral<1>;
|
|
2532
|
+
owner: z$1.ZodString;
|
|
2533
|
+
entries: z$1.ZodRecord<z$1.ZodString, z$1.ZodObject<{
|
|
2534
|
+
version: z$1.ZodLiteral<1>;
|
|
2535
|
+
key: z$1.ZodString;
|
|
2536
|
+
namespace: z$1.ZodString;
|
|
2537
|
+
operationType: z$1.ZodEnum<{
|
|
2538
|
+
span: "span";
|
|
2539
|
+
value: "value";
|
|
2540
|
+
}>;
|
|
2541
|
+
operationName: z$1.ZodString;
|
|
2542
|
+
storedAt: z$1.ZodString;
|
|
2543
|
+
codeFingerprint: z$1.ZodString;
|
|
2544
|
+
rawKey: z$1.ZodUnknown;
|
|
2545
|
+
}, z$1.core.$strip>>;
|
|
2546
|
+
}, z$1.core.$strip>;
|
|
2547
|
+
/** Persisted per-owner raw cache key debug file contents. */
|
|
2548
|
+
type CacheDebugKeyFile = z$1.infer<typeof cacheDebugKeyFileSchema>;
|
|
2329
2549
|
//#endregion
|
|
2330
2550
|
//#region ../shared/src/utils/extractCacheHits.d.ts
|
|
2331
2551
|
/**
|
|
2332
|
-
* Single cache
|
|
2333
|
-
*
|
|
2552
|
+
* Single cache activity entry rendered as one row in the case drawer's Cache
|
|
2553
|
+
* tab.
|
|
2334
2554
|
*
|
|
2335
|
-
* `
|
|
2336
|
-
*
|
|
2337
|
-
*
|
|
2338
|
-
* (
|
|
2555
|
+
* `action === 'hit'` rows reused an existing persisted cache entry.
|
|
2556
|
+
* `action === 'added'` rows came from a miss or refresh that wrote a persisted
|
|
2557
|
+
* cache entry during the run. `origin === 'caseRoot'` rows came from
|
|
2558
|
+
* `evalTracer.cache(...)` calls made directly from the case body (no
|
|
2559
|
+
* surrounding `traceSpan`), which would otherwise be invisible.
|
|
2339
2560
|
*/
|
|
2340
|
-
type
|
|
2561
|
+
type CacheActivityEntry = {
|
|
2341
2562
|
id: string;
|
|
2342
2563
|
source: 'span' | 'value';
|
|
2343
2564
|
origin: 'span' | 'caseRoot';
|
|
2565
|
+
action: 'hit' | 'added';
|
|
2566
|
+
status: 'hit' | 'miss' | 'refresh';
|
|
2344
2567
|
name: string;
|
|
2345
2568
|
namespace: string;
|
|
2346
2569
|
key: string;
|
|
@@ -2348,14 +2571,26 @@ type CacheHitEntry = {
|
|
|
2348
2571
|
age: number | undefined;
|
|
2349
2572
|
spanId: string | undefined;
|
|
2350
2573
|
};
|
|
2574
|
+
/** Cache activity row narrowed to cache hits for compatibility helpers. */
|
|
2575
|
+
type CacheHitEntry = CacheActivityEntry & {
|
|
2576
|
+
action: 'hit';
|
|
2577
|
+
status: 'hit';
|
|
2578
|
+
};
|
|
2579
|
+
/**
|
|
2580
|
+
* Collect every cache hit or cache write recorded for a case run.
|
|
2581
|
+
*
|
|
2582
|
+
* Walks `spans` for span-level cache activity (`attributes['cache.status']`)
|
|
2583
|
+
* and per-span value-cache refs (`attributes['cache.refs']`), then appends
|
|
2584
|
+
* spanless value-cache refs persisted on the case scope. Bypasses are skipped
|
|
2585
|
+
* because they do not read or write a persisted cache entry.
|
|
2586
|
+
*/
|
|
2587
|
+
declare function extractCacheEntries(spans: EvalTraceSpan[], caseCacheRefs: TraceCacheRef[]): CacheActivityEntry[];
|
|
2351
2588
|
/**
|
|
2352
2589
|
* Collect every `status === 'hit'` cache event recorded for a case run.
|
|
2353
2590
|
*
|
|
2354
|
-
*
|
|
2355
|
-
*
|
|
2356
|
-
*
|
|
2357
|
-
* statuses (`miss`/`refresh`/`bypass`) are skipped — they remain visible
|
|
2358
|
-
* inline in the Trace tab.
|
|
2591
|
+
* This compatibility helper returns only rows that reused an existing
|
|
2592
|
+
* persisted cache entry. Use `extractCacheEntries(...)` when the UI should
|
|
2593
|
+
* include cache misses and refreshes that wrote entries during the run.
|
|
2359
2594
|
*/
|
|
2360
2595
|
declare function extractCacheHits(spans: EvalTraceSpan[], caseCacheRefs: TraceCacheRef[]): CacheHitEntry[];
|
|
2361
2596
|
//#endregion
|
|
@@ -2673,6 +2908,19 @@ declare function defineEval<TInput = unknown, TOutputs extends EvalOutputs = Eva
|
|
|
2673
2908
|
declare function repoFile(path: string, mimeType?: string): RepoFileRef;
|
|
2674
2909
|
//#endregion
|
|
2675
2910
|
//#region ../sdk/src/runtime.d.ts
|
|
2911
|
+
/**
|
|
2912
|
+
* Raw-key debug payload passed alongside cache writes.
|
|
2913
|
+
*
|
|
2914
|
+
* `rawKey` may include prompt text, user input, or other sensitive material.
|
|
2915
|
+
* Runners store it outside the reusable cache so projects can gitignore the
|
|
2916
|
+
* debug folder while keeping hash-only cache entries shareable.
|
|
2917
|
+
*/
|
|
2918
|
+
type CacheDebugKeyWrite = {
|
|
2919
|
+
rawKey: unknown;
|
|
2920
|
+
operationType: CacheOperationType;
|
|
2921
|
+
operationName: string;
|
|
2922
|
+
codeFingerprint: string;
|
|
2923
|
+
};
|
|
2676
2924
|
/**
|
|
2677
2925
|
* Adapter used by the SDK to read and write cache entries.
|
|
2678
2926
|
*
|
|
@@ -2680,8 +2928,15 @@ declare function repoFile(path: string, mimeType?: string): RepoFileRef;
|
|
|
2680
2928
|
* starts executing.
|
|
2681
2929
|
*/
|
|
2682
2930
|
type CacheAdapter = {
|
|
2683
|
-
/** Return the stored entry for `keyHash` under `namespace`, or `null`. */lookup(namespace: string, keyHash: string): Promise<CacheEntry | null>;
|
|
2684
|
-
|
|
2931
|
+
/** Return the stored entry for `keyHash` under `namespace`, or `null`. */lookup(namespace: string, keyHash: string): Promise<CacheEntry | null>;
|
|
2932
|
+
/**
|
|
2933
|
+
* Persist a cache entry. Must be safe under concurrent calls.
|
|
2934
|
+
*
|
|
2935
|
+
* `debugKey` is optional and contains the authored raw key value for
|
|
2936
|
+
* debugging. It may contain sensitive prompt/input data and should be stored
|
|
2937
|
+
* separately from reusable cache files.
|
|
2938
|
+
*/
|
|
2939
|
+
write(entry: CacheEntry, debugKey?: CacheDebugKeyWrite): Promise<void>;
|
|
2685
2940
|
};
|
|
2686
2941
|
/** Runner-supplied cache context attached to an eval case scope. */
|
|
2687
2942
|
type CacheScopeContext = {
|
|
@@ -2703,7 +2958,8 @@ type EvalCaseScope = {
|
|
|
2703
2958
|
nextEvalIdCounter: number; /** Authored input for the current case, when provided by the runner. */
|
|
2704
2959
|
input?: unknown;
|
|
2705
2960
|
outputs: Record<string, unknown>; /** Structured assertion failures recorded for the current case. */
|
|
2706
|
-
assertionFailures: AssertionFailure[];
|
|
2961
|
+
assertionFailures: AssertionFailure[]; /** Logs captured from manual `evalLog(...)` calls and enabled console calls. */
|
|
2962
|
+
logs: RunLogEntry[];
|
|
2707
2963
|
spans: EvalTraceSpan[];
|
|
2708
2964
|
checkpoints: Map<string, unknown>;
|
|
2709
2965
|
spanStack: string[];
|
|
@@ -2735,6 +2991,7 @@ type EvalCaseScope = {
|
|
|
2735
2991
|
* modules imported while a run is being prepared.
|
|
2736
2992
|
*/
|
|
2737
2993
|
type EvalRuntimeScope = 'env' | 'cases' | 'eval' | 'derive' | 'outputsSchema' | 'scorer';
|
|
2994
|
+
type EvalLogLevelInput = RunLogLevel | 'warning';
|
|
2738
2995
|
/** Error thrown when an eval assertion fails during case execution. */
|
|
2739
2996
|
declare class EvalAssertionError extends Error {
|
|
2740
2997
|
constructor(message: string);
|
|
@@ -2750,6 +3007,14 @@ declare function getCurrentScope(): EvalCaseScope | undefined;
|
|
|
2750
3007
|
* while validating outputs, and `scorer` while computing scores.
|
|
2751
3008
|
*/
|
|
2752
3009
|
declare function isInEvalScope(): EvalRuntimeScope | null;
|
|
3010
|
+
/**
|
|
3011
|
+
* Record a manual log entry on the active eval case.
|
|
3012
|
+
*
|
|
3013
|
+
* Values are formatted with Node-style console formatting and capped before
|
|
3014
|
+
* persistence so a single log cannot make run artifacts unbounded. Calls made
|
|
3015
|
+
* outside active case-owned eval phases are ignored.
|
|
3016
|
+
*/
|
|
3017
|
+
declare function evalLog(level: EvalLogLevelInput, ...args: unknown[]): void;
|
|
2753
3018
|
/**
|
|
2754
3019
|
* Register background work that should settle before eval finalization.
|
|
2755
3020
|
*
|
|
@@ -2845,6 +3110,32 @@ declare function incrementEvalOutput(key: string, delta: number): void;
|
|
|
2845
3110
|
*/
|
|
2846
3111
|
declare function evalAssert(condition: boolean, message: string): void;
|
|
2847
3112
|
//#endregion
|
|
3113
|
+
//#region ../sdk/src/cacheSerialization.d.ts
|
|
3114
|
+
declare const serializedCacheValueMarker = "__agentEvalsCacheSerialization";
|
|
3115
|
+
declare const jsonSafeCacheValueVersion = "json-safe-v1";
|
|
3116
|
+
type JsonSafeCacheValueType = 'ArrayBuffer' | 'BigInt' | 'Blob' | 'CompressedJson' | 'CompressedString' | 'Date' | 'Error' | 'File' | 'Float64Array' | 'Headers' | 'Map' | 'Number' | 'Object' | 'RegExp' | 'Set' | 'URL' | 'URLSearchParams' | 'Undefined';
|
|
3117
|
+
type JsonSafeSerializedCacheValue = {
|
|
3118
|
+
[serializedCacheValueMarker]: typeof jsonSafeCacheValueVersion;
|
|
3119
|
+
codec?: 'gzip';
|
|
3120
|
+
length?: number;
|
|
3121
|
+
type: JsonSafeCacheValueType;
|
|
3122
|
+
value?: unknown;
|
|
3123
|
+
};
|
|
3124
|
+
/** JSON-safe persisted representation for one rich cached value. */
|
|
3125
|
+
type SerializedCacheValue = JsonSafeSerializedCacheValue;
|
|
3126
|
+
/**
|
|
3127
|
+
* Serialize one cached value while keeping plain JSON as plain JSON.
|
|
3128
|
+
*
|
|
3129
|
+
* Rich runtime values use small tagged wrappers.
|
|
3130
|
+
*/
|
|
3131
|
+
declare function serializeCacheValue(value: unknown): Promise<unknown>;
|
|
3132
|
+
/** Revive one cached value, while preserving legacy JSON-round-tripped data. */
|
|
3133
|
+
declare function deserializeCacheValue(value: unknown): unknown;
|
|
3134
|
+
/** Serialize all rich values captured in a cache recording before persistence. */
|
|
3135
|
+
declare function serializeCacheRecording(recording: CacheRecording): Promise<CacheRecording>;
|
|
3136
|
+
/** Revive all rich values captured in a cache recording after lookup. */
|
|
3137
|
+
declare function deserializeCacheRecording(recording: CacheRecording): CacheRecording;
|
|
3138
|
+
//#endregion
|
|
2848
3139
|
//#region ../sdk/src/traceDiagnostics.d.ts
|
|
2849
3140
|
/** Severity used when attaching a recoverable diagnostic to an active span. */
|
|
2850
3141
|
type CaptureEvalSpanErrorLevel = 'error' | 'warning';
|
|
@@ -3110,11 +3401,11 @@ type EvalRunner = {
|
|
|
3110
3401
|
listCache(): Promise<CacheListItem[]>;
|
|
3111
3402
|
/**
|
|
3112
3403
|
* Return the full persisted cache entry for `namespace` + `key`, including
|
|
3113
|
-
* its recording. Returns `null` when no
|
|
3114
|
-
* drawer's Cache
|
|
3115
|
-
* row is expanded.
|
|
3404
|
+
* its recording and optional raw-key debug metadata. Returns `null` when no
|
|
3405
|
+
* entry matches. Used by the case drawer's Cache tab to lazily fetch the
|
|
3406
|
+
* cached return value when a row is expanded.
|
|
3116
3407
|
*/
|
|
3117
|
-
getCacheEntry(namespace: string, key: string): Promise<
|
|
3408
|
+
getCacheEntry(namespace: string, key: string): Promise<CacheEntryWithDebugKey | null>;
|
|
3118
3409
|
/**
|
|
3119
3410
|
* Remove cache entries matching `filter`, or all entries when no filter is
|
|
3120
3411
|
* supplied.
|
|
@@ -3169,4 +3460,4 @@ declare function createRunner({
|
|
|
3169
3460
|
*/
|
|
3170
3461
|
declare function runCli(argv: string[]): Promise<void>;
|
|
3171
3462
|
//#endregion
|
|
3172
|
-
export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
3463
|
+
export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as
|
|
2
|
-
import { n as createRunner, t as runCli } from "./cli-
|
|
3
|
-
import "./src-
|
|
4
|
-
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
1
|
+
import { $ as assertionFailureSchema, $t as runArtifactRefSchema, A as getNestedAttribute, An as getEvalRegistry, At as cacheRecordingSchema, B as agentEvalsConfigSchema, Bt as traceDisplayInputConfigSchema, C as createRunRequestSchema, Cn as runInEvalScope, Ct as cacheEntrySchema, D as extractCacheHits, Dn as startEvalBackgroundJob, Dt as cacheModeSchema, E as extractCacheEntries, En as setScopeCacheContext, Et as cacheListItemSchema, F as deriveStatusFromChildStatuses, Ft as traceAttributeDisplayFormatSchema, G as llmCallMetricFormatSchema, Gt as cellValueSchema, H as apiCallMetricPlacementSchema, Ht as traceSpanKindSchema, I as runManifestSchema, It as traceAttributeDisplayInputSchema, J as llmCallsConfigSchema, Jt as columnKindSchema, K as llmCallMetricPlacementSchema, Kt as columnDefSchema, L as runSummarySchema, Lt as traceAttributeDisplayPlacementSchema, M as getEvalDisplayStatus, Mt as serializedCacheSpanSchema, N as deriveScopedSummaryFromCases, Nt as spanCacheOptionsSchema, O as extractApiCalls, On as repoFile, Ot as cacheOperationTypeSchema, P as deriveStatusFromCaseRows, Pt as traceCacheRefSchema, Q as trialSelectionModeSchema, Qt as repoFileRefSchema, R as DEFAULT_API_CALLS_CONFIG, Rt as traceAttributeDisplaySchema, Sn as runInEvalRuntimeScope, St as cacheDebugKeyFileSchema, T as sseEnvelopeSchema, Tn as setEvalOutput, Tt as cacheFileSchema, U as apiCallMetricSchema, Ut as traceSpanSchema, V as apiCallMetricFormatSchema, Vt as traceSpanErrorSchema, W as apiCallsConfigSchema, Wt as traceSpanWarningSchema, X as resolveLlmCallsConfig, Xt as jsonCellSchema, Y as resolveApiCallsConfig, Yt as fileRefSchema, Z as runLogsConfigSchema, Zt as numberDisplayOptionsSchema, _n as getEvalCaseInput, _t as evalChartMetricSchema, an as hashCacheKey, at as evalStatsConfigSchema, bn as mergeEvalOutput, bt as evalChartsConfigSchema, cn as deserializeCacheValue, ct as runLogLevelSchema, dn as EvalAssertionError, dt as scoreTraceSchema, en as z, et as caseDetailSchema, fn as appendToEvalOutput, ft as evalChartAggregateSchema, gn as getCurrentScope, gt as evalChartConfigSchema, hn as evalLog, ht as evalChartColorSchema, in as evalTracer, it as evalStatItemSchema, j as getEvalTitle, jt as cacheStatusSchema, k as extractLlmCalls, kn as defineEval, kt as cacheRecordingOpSchema, ln as serializeCacheRecording, lt as runLogLocationSchema, mn as evalAssert, mt as evalChartBuiltinMetricSchema, nn as captureEvalSpanError, nt as evalFreshnessStatusSchema, on as hashCacheKeySync, ot as evalSummarySchema, pt as evalChartAxisSchema, q as llmCallMetricSchema, qt as columnFormatSchema, rn as evalSpan, rt as evalStatAggregateSchema, sn as deserializeCacheRecording, st as runLogEntrySchema, tn as buildTraceTree, tt as caseRowSchema, un as serializeCacheValue, ut as runLogPhaseSchema, vn as incrementEvalOutput, vt as evalChartTooltipExtraSchema, w as updateManualScoreRequestSchema, wn as runInExistingEvalScope, wt as cacheEntryWithDebugKeySchema, xn as nextEvalId, xt as cacheDebugKeyEntrySchema, yn as isInEvalScope, yt as evalChartTypeSchema, z as DEFAULT_LLM_CALLS_CONFIG, zt as traceDisplayConfigSchema } from "./runOrchestration-BBg_VUH5.mjs";
|
|
2
|
+
import { n as createRunner, t as runCli } from "./cli-3zANEAhG.mjs";
|
|
3
|
+
import "./src-BC4OrajN.mjs";
|
|
4
|
+
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|