@ls-stack/agent-eval 0.52.1 → 0.52.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-CzLj4ZX0.mjs → app-BsFcUIQp.mjs} +4 -4
- package/dist/apps/web/dist/assets/index-BHc4gfUO.css +1 -0
- package/dist/apps/web/dist/assets/index-D9HUAH8K.js +373 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-Cvs7tc2v.mjs → cli-DbVFgRO3.mjs} +3 -3
- package/dist/index.d.mts +80 -32
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-o38J7uZO.mjs → runOrchestration-DT6cje9E.mjs} +9 -3
- package/dist/{runner-iWtmKx9z.mjs → runner-CyTUvbHE.mjs} +1 -1
- package/dist/{runner-LdMiDmAN.mjs → runner-WRQdfG0r.mjs} +2 -2
- package/dist/{src-Jahivm6d.mjs → src-DlvYXPxG.mjs} +2 -2
- package/package.json +1 -1
- package/skills/agent-eval/SKILL.md +4 -2
- package/dist/apps/web/dist/assets/index-CFASvC2z.css +0 -1
- package/dist/apps/web/dist/assets/index-dAgwnqH2.js +0 -373
|
@@ -25,8 +25,8 @@
|
|
|
25
25
|
href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
|
|
26
26
|
rel="stylesheet"
|
|
27
27
|
/>
|
|
28
|
-
<script type="module" crossorigin src="/assets/index-
|
|
29
|
-
<link rel="stylesheet" crossorigin href="/assets/index-
|
|
28
|
+
<script type="module" crossorigin src="/assets/index-D9HUAH8K.js"></script>
|
|
29
|
+
<link rel="stylesheet" crossorigin href="/assets/index-BHc4gfUO.css">
|
|
30
30
|
</head>
|
|
31
31
|
<body>
|
|
32
32
|
<div id="root"></div>
|
package/dist/bin.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as validateCharts, At as buildEvalKey, C as deriveEvalFreshness, Ct as getEvalDisplayStatus, D as loadConfig, Dt as runSummarySchema, E as resolveEvalDefaultConfig, Lt as getEvalRegistry, O as buildDeclaredColumnDefs, Ot as resolveApiCallsConfig, S as parseManualInputValues, St as getEvalTitle, T as parseEvalDiscovery, Tt as matchesTagsFilter, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as resolveArtifactPath, bt as applyDerivedCallAttributes, c as getLastRunStatuses, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, j as createFsCacheStore, jt as getCaseRowCaseKey, k as normalizeScoreDef, kt as resolveLlmCallsConfig, l as getLatestRunInfos, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as stripTerminalControlCodes, p as persistCaseDetail, s as generateRunId, u as loadPersistedRunSnapshot, v as runTouchesEval, w as loadEvalModule, wt as deriveScopedSummaryFromCases, x as buildManualInputDescriptor, y as resolveTracePresentation } from "./runOrchestration-
|
|
1
|
+
import { A as validateCharts, At as buildEvalKey, C as deriveEvalFreshness, Ct as getEvalDisplayStatus, D as loadConfig, Dt as runSummarySchema, E as resolveEvalDefaultConfig, Lt as getEvalRegistry, O as buildDeclaredColumnDefs, Ot as resolveApiCallsConfig, S as parseManualInputValues, St as getEvalTitle, T as parseEvalDiscovery, Tt as matchesTagsFilter, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as resolveArtifactPath, bt as applyDerivedCallAttributes, c as getLastRunStatuses, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, j as createFsCacheStore, jt as getCaseRowCaseKey, k as normalizeScoreDef, kt as resolveLlmCallsConfig, l as getLatestRunInfos, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as stripTerminalControlCodes, p as persistCaseDetail, s as generateRunId, u as loadPersistedRunSnapshot, v as runTouchesEval, w as loadEvalModule, wt as deriveScopedSummaryFromCases, x as buildManualInputDescriptor, y as resolveTracePresentation } from "./runOrchestration-DT6cje9E.mjs";
|
|
2
2
|
import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
3
3
|
import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
|
|
4
4
|
import { createHash, randomUUID } from "node:crypto";
|
|
@@ -2095,8 +2095,8 @@ async function commandApp(args) {
|
|
|
2095
2095
|
const { serve } = await import("@hono/node-server");
|
|
2096
2096
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
2097
2097
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
2098
|
-
const appModule = await import("./app-
|
|
2099
|
-
const runnerModule = await import("./runner-
|
|
2098
|
+
const appModule = await import("./app-BsFcUIQp.mjs");
|
|
2099
|
+
const runnerModule = await import("./runner-CyTUvbHE.mjs");
|
|
2100
2100
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
2101
2101
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
2102
2102
|
await runnerModule.initRunner();
|
package/dist/index.d.mts
CHANGED
|
@@ -2254,8 +2254,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2254
2254
|
}>;
|
|
2255
2255
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2256
2256
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2257
|
-
error: "error";
|
|
2258
2257
|
success: "success";
|
|
2258
|
+
error: "error";
|
|
2259
2259
|
warning: "warning";
|
|
2260
2260
|
accent: "accent";
|
|
2261
2261
|
accentDim: "accentDim";
|
|
@@ -2278,8 +2278,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2278
2278
|
}>;
|
|
2279
2279
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2280
2280
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2281
|
-
error: "error";
|
|
2282
2281
|
success: "success";
|
|
2282
|
+
error: "error";
|
|
2283
2283
|
warning: "warning";
|
|
2284
2284
|
accent: "accent";
|
|
2285
2285
|
accentDim: "accentDim";
|
|
@@ -2568,6 +2568,22 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
2568
2568
|
}>>;
|
|
2569
2569
|
}, z$1.core.$strip>>>;
|
|
2570
2570
|
}, z$1.core.$strip>;
|
|
2571
|
+
cacheRefs: z$1.ZodDefault<z$1.ZodArray<z$1.ZodObject<{
|
|
2572
|
+
type: z$1.ZodLiteral<"value">;
|
|
2573
|
+
name: z$1.ZodString;
|
|
2574
|
+
namespace: z$1.ZodString;
|
|
2575
|
+
key: z$1.ZodString;
|
|
2576
|
+
status: z$1.ZodEnum<{
|
|
2577
|
+
hit: "hit";
|
|
2578
|
+
miss: "miss";
|
|
2579
|
+
refresh: "refresh";
|
|
2580
|
+
bypass: "bypass";
|
|
2581
|
+
}>;
|
|
2582
|
+
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2583
|
+
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2584
|
+
storedAt: z$1.ZodOptional<z$1.ZodString>;
|
|
2585
|
+
age: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2586
|
+
}, z$1.core.$strip>>>;
|
|
2571
2587
|
}, z$1.core.$strip>;
|
|
2572
2588
|
/** Trace payload captured while computing one score for a case. */
|
|
2573
2589
|
type ScoreTrace = z$1.infer<typeof scoreTraceSchema>;
|
|
@@ -2724,6 +2740,22 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2724
2740
|
}>>;
|
|
2725
2741
|
}, z$1.core.$strip>>>;
|
|
2726
2742
|
}, z$1.core.$strip>;
|
|
2743
|
+
cacheRefs: z$1.ZodDefault<z$1.ZodArray<z$1.ZodObject<{
|
|
2744
|
+
type: z$1.ZodLiteral<"value">;
|
|
2745
|
+
name: z$1.ZodString;
|
|
2746
|
+
namespace: z$1.ZodString;
|
|
2747
|
+
key: z$1.ZodString;
|
|
2748
|
+
status: z$1.ZodEnum<{
|
|
2749
|
+
hit: "hit";
|
|
2750
|
+
miss: "miss";
|
|
2751
|
+
refresh: "refresh";
|
|
2752
|
+
bypass: "bypass";
|
|
2753
|
+
}>;
|
|
2754
|
+
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2755
|
+
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2756
|
+
storedAt: z$1.ZodOptional<z$1.ZodString>;
|
|
2757
|
+
age: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2758
|
+
}, z$1.core.$strip>>>;
|
|
2727
2759
|
}, z$1.core.$strip>>>;
|
|
2728
2760
|
columns: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnion<readonly [z$1.ZodType<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown, z$1.core.$ZodTypeInternals<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown>>, z$1.ZodUnion<readonly [z$1.ZodObject<{
|
|
2729
2761
|
source: z$1.ZodLiteral<"repo">;
|
|
@@ -2781,10 +2813,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2781
2813
|
namespace: z$1.ZodString;
|
|
2782
2814
|
key: z$1.ZodString;
|
|
2783
2815
|
status: z$1.ZodEnum<{
|
|
2784
|
-
bypass: "bypass";
|
|
2785
|
-
refresh: "refresh";
|
|
2786
2816
|
hit: "hit";
|
|
2787
2817
|
miss: "miss";
|
|
2818
|
+
refresh: "refresh";
|
|
2819
|
+
bypass: "bypass";
|
|
2788
2820
|
}>;
|
|
2789
2821
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2790
2822
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -2851,8 +2883,8 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
|
|
|
2851
2883
|
* not emit raw hex so authored evals stay decoupled from the web theme.
|
|
2852
2884
|
*/
|
|
2853
2885
|
declare const evalChartColorSchema: z$1.ZodEnum<{
|
|
2854
|
-
error: "error";
|
|
2855
2886
|
success: "success";
|
|
2887
|
+
error: "error";
|
|
2856
2888
|
warning: "warning";
|
|
2857
2889
|
accent: "accent";
|
|
2858
2890
|
accentDim: "accentDim";
|
|
@@ -2880,8 +2912,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2880
2912
|
}>;
|
|
2881
2913
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2882
2914
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2883
|
-
error: "error";
|
|
2884
2915
|
success: "success";
|
|
2916
|
+
error: "error";
|
|
2885
2917
|
warning: "warning";
|
|
2886
2918
|
accent: "accent";
|
|
2887
2919
|
accentDim: "accentDim";
|
|
@@ -2904,8 +2936,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2904
2936
|
}>;
|
|
2905
2937
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2906
2938
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2907
|
-
error: "error";
|
|
2908
2939
|
success: "success";
|
|
2940
|
+
error: "error";
|
|
2909
2941
|
warning: "warning";
|
|
2910
2942
|
accent: "accent";
|
|
2911
2943
|
accentDim: "accentDim";
|
|
@@ -2963,8 +2995,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
2963
2995
|
}>;
|
|
2964
2996
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2965
2997
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2966
|
-
error: "error";
|
|
2967
2998
|
success: "success";
|
|
2999
|
+
error: "error";
|
|
2968
3000
|
warning: "warning";
|
|
2969
3001
|
accent: "accent";
|
|
2970
3002
|
accentDim: "accentDim";
|
|
@@ -2987,8 +3019,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
2987
3019
|
}>;
|
|
2988
3020
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2989
3021
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2990
|
-
error: "error";
|
|
2991
3022
|
success: "success";
|
|
3023
|
+
error: "error";
|
|
2992
3024
|
warning: "warning";
|
|
2993
3025
|
accent: "accent";
|
|
2994
3026
|
accentDim: "accentDim";
|
|
@@ -3053,8 +3085,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3053
3085
|
}>;
|
|
3054
3086
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3055
3087
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3056
|
-
error: "error";
|
|
3057
3088
|
success: "success";
|
|
3089
|
+
error: "error";
|
|
3058
3090
|
warning: "warning";
|
|
3059
3091
|
accent: "accent";
|
|
3060
3092
|
accentDim: "accentDim";
|
|
@@ -3077,8 +3109,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3077
3109
|
}>;
|
|
3078
3110
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3079
3111
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3080
|
-
error: "error";
|
|
3081
3112
|
success: "success";
|
|
3113
|
+
error: "error";
|
|
3082
3114
|
warning: "warning";
|
|
3083
3115
|
accent: "accent";
|
|
3084
3116
|
accentDim: "accentDim";
|
|
@@ -3157,9 +3189,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
|
|
|
3157
3189
|
median: "median";
|
|
3158
3190
|
}>>>;
|
|
3159
3191
|
cacheMode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3160
|
-
use: "use";
|
|
3161
|
-
bypass: "bypass";
|
|
3162
3192
|
refresh: "refresh";
|
|
3193
|
+
bypass: "bypass";
|
|
3194
|
+
use: "use";
|
|
3163
3195
|
}>>;
|
|
3164
3196
|
}, z$1.core.$strip>;
|
|
3165
3197
|
/** Persisted lifecycle metadata for a single eval run. */
|
|
@@ -4061,9 +4093,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
|
|
|
4061
4093
|
* - `refresh`: never read, always write (forces re-execution and overwrites).
|
|
4062
4094
|
*/
|
|
4063
4095
|
declare const cacheModeSchema: z$1.ZodEnum<{
|
|
4064
|
-
use: "use";
|
|
4065
|
-
bypass: "bypass";
|
|
4066
4096
|
refresh: "refresh";
|
|
4097
|
+
bypass: "bypass";
|
|
4098
|
+
use: "use";
|
|
4067
4099
|
}>;
|
|
4068
4100
|
/** Mode controlling how cached spans behave during a run. */
|
|
4069
4101
|
type CacheMode = z$1.infer<typeof cacheModeSchema>;
|
|
@@ -4077,17 +4109,17 @@ declare const spanCacheOptionsSchema: z$1.ZodObject<{
|
|
|
4077
4109
|
type SpanCacheOptions = z$1.infer<typeof spanCacheOptionsSchema>;
|
|
4078
4110
|
/** Category of operation stored in the eval cache. */
|
|
4079
4111
|
declare const cacheOperationTypeSchema: z$1.ZodEnum<{
|
|
4080
|
-
span: "span";
|
|
4081
4112
|
value: "value";
|
|
4113
|
+
span: "span";
|
|
4082
4114
|
}>;
|
|
4083
4115
|
/** Category of operation stored in the eval cache. */
|
|
4084
4116
|
type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
|
|
4085
4117
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
4086
4118
|
declare const cacheStatusSchema: z$1.ZodEnum<{
|
|
4087
|
-
bypass: "bypass";
|
|
4088
|
-
refresh: "refresh";
|
|
4089
4119
|
hit: "hit";
|
|
4090
4120
|
miss: "miss";
|
|
4121
|
+
refresh: "refresh";
|
|
4122
|
+
bypass: "bypass";
|
|
4091
4123
|
}>;
|
|
4092
4124
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
4093
4125
|
type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
|
|
@@ -4104,10 +4136,10 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
|
|
|
4104
4136
|
namespace: z$1.ZodString;
|
|
4105
4137
|
key: z$1.ZodString;
|
|
4106
4138
|
status: z$1.ZodEnum<{
|
|
4107
|
-
bypass: "bypass";
|
|
4108
|
-
refresh: "refresh";
|
|
4109
4139
|
hit: "hit";
|
|
4110
4140
|
miss: "miss";
|
|
4141
|
+
refresh: "refresh";
|
|
4142
|
+
bypass: "bypass";
|
|
4111
4143
|
}>;
|
|
4112
4144
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
4113
4145
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -4121,8 +4153,8 @@ declare const cacheListItemSchema$1: z$1.ZodObject<{
|
|
|
4121
4153
|
key: z$1.ZodString;
|
|
4122
4154
|
namespace: z$1.ZodString;
|
|
4123
4155
|
operationType: z$1.ZodEnum<{
|
|
4124
|
-
span: "span";
|
|
4125
4156
|
value: "value";
|
|
4157
|
+
span: "span";
|
|
4126
4158
|
}>;
|
|
4127
4159
|
operationName: z$1.ZodString;
|
|
4128
4160
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4244,8 +4276,8 @@ declare const cacheEntrySchema: z$1.ZodObject<{
|
|
|
4244
4276
|
key: z$1.ZodString;
|
|
4245
4277
|
namespace: z$1.ZodString;
|
|
4246
4278
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4247
|
-
span: "span";
|
|
4248
4279
|
value: "value";
|
|
4280
|
+
span: "span";
|
|
4249
4281
|
}>>;
|
|
4250
4282
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
4251
4283
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4323,8 +4355,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
|
|
|
4323
4355
|
key: z$1.ZodString;
|
|
4324
4356
|
namespace: z$1.ZodString;
|
|
4325
4357
|
operationType: z$1.ZodEnum<{
|
|
4326
|
-
span: "span";
|
|
4327
4358
|
value: "value";
|
|
4359
|
+
span: "span";
|
|
4328
4360
|
}>;
|
|
4329
4361
|
operationName: z$1.ZodString;
|
|
4330
4362
|
storedAt: z$1.ZodString;
|
|
@@ -4334,8 +4366,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
|
|
|
4334
4366
|
key: z$1.ZodString;
|
|
4335
4367
|
namespace: z$1.ZodString;
|
|
4336
4368
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4337
|
-
span: "span";
|
|
4338
4369
|
value: "value";
|
|
4370
|
+
span: "span";
|
|
4339
4371
|
}>>;
|
|
4340
4372
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
4341
4373
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4413,8 +4445,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
4413
4445
|
key: z$1.ZodString;
|
|
4414
4446
|
namespace: z$1.ZodString;
|
|
4415
4447
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4416
|
-
span: "span";
|
|
4417
4448
|
value: "value";
|
|
4449
|
+
span: "span";
|
|
4418
4450
|
}>>;
|
|
4419
4451
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
4420
4452
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4483,8 +4515,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
4483
4515
|
key: z$1.ZodString;
|
|
4484
4516
|
namespace: z$1.ZodString;
|
|
4485
4517
|
operationType: z$1.ZodEnum<{
|
|
4486
|
-
span: "span";
|
|
4487
4518
|
value: "value";
|
|
4519
|
+
span: "span";
|
|
4488
4520
|
}>;
|
|
4489
4521
|
operationName: z$1.ZodString;
|
|
4490
4522
|
storedAt: z$1.ZodString;
|
|
@@ -4494,8 +4526,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
4494
4526
|
key: z$1.ZodString;
|
|
4495
4527
|
namespace: z$1.ZodString;
|
|
4496
4528
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4497
|
-
span: "span";
|
|
4498
4529
|
value: "value";
|
|
4530
|
+
span: "span";
|
|
4499
4531
|
}>>;
|
|
4500
4532
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
4501
4533
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4573,8 +4605,8 @@ declare const cacheFileSchema: z$1.ZodObject<{
|
|
|
4573
4605
|
key: z$1.ZodString;
|
|
4574
4606
|
namespace: z$1.ZodString;
|
|
4575
4607
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4576
|
-
span: "span";
|
|
4577
4608
|
value: "value";
|
|
4609
|
+
span: "span";
|
|
4578
4610
|
}>>;
|
|
4579
4611
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
4580
4612
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4651,8 +4683,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
|
|
|
4651
4683
|
key: z$1.ZodString;
|
|
4652
4684
|
namespace: z$1.ZodString;
|
|
4653
4685
|
operationType: z$1.ZodEnum<{
|
|
4654
|
-
span: "span";
|
|
4655
4686
|
value: "value";
|
|
4687
|
+
span: "span";
|
|
4656
4688
|
}>;
|
|
4657
4689
|
operationName: z$1.ZodString;
|
|
4658
4690
|
storedAt: z$1.ZodString;
|
|
@@ -4662,8 +4694,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
|
|
|
4662
4694
|
key: z$1.ZodString;
|
|
4663
4695
|
namespace: z$1.ZodString;
|
|
4664
4696
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4665
|
-
span: "span";
|
|
4666
4697
|
value: "value";
|
|
4698
|
+
span: "span";
|
|
4667
4699
|
}>>;
|
|
4668
4700
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
4669
4701
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4846,9 +4878,9 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
|
|
|
4846
4878
|
temporary: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
4847
4879
|
cache: z$1.ZodOptional<z$1.ZodObject<{
|
|
4848
4880
|
mode: z$1.ZodDefault<z$1.ZodEnum<{
|
|
4849
|
-
use: "use";
|
|
4850
|
-
bypass: "bypass";
|
|
4851
4881
|
refresh: "refresh";
|
|
4882
|
+
bypass: "bypass";
|
|
4883
|
+
use: "use";
|
|
4852
4884
|
}>>;
|
|
4853
4885
|
}, z$1.core.$strip>>;
|
|
4854
4886
|
manualInputs: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
@@ -5566,6 +5598,22 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
5566
5598
|
}>>;
|
|
5567
5599
|
}, z$1.core.$strip>>>;
|
|
5568
5600
|
}, z$1.core.$strip>;
|
|
5601
|
+
cacheRefs: z$1.ZodDefault<z$1.ZodArray<z$1.ZodObject<{
|
|
5602
|
+
type: z$1.ZodLiteral<"value">;
|
|
5603
|
+
name: z$1.ZodString;
|
|
5604
|
+
namespace: z$1.ZodString;
|
|
5605
|
+
key: z$1.ZodString;
|
|
5606
|
+
status: z$1.ZodEnum<{
|
|
5607
|
+
hit: "hit";
|
|
5608
|
+
miss: "miss";
|
|
5609
|
+
refresh: "refresh";
|
|
5610
|
+
bypass: "bypass";
|
|
5611
|
+
}>;
|
|
5612
|
+
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
5613
|
+
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
5614
|
+
storedAt: z$1.ZodOptional<z$1.ZodString>;
|
|
5615
|
+
age: z$1.ZodOptional<z$1.ZodNumber>;
|
|
5616
|
+
}, z$1.core.$strip>>>;
|
|
5569
5617
|
}, z$1.core.$strip>>>;
|
|
5570
5618
|
columns: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnion<readonly [z$1.ZodType<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown, z$1.core.$ZodTypeInternals<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown>>, z$1.ZodUnion<readonly [z$1.ZodObject<{
|
|
5571
5619
|
source: z$1.ZodLiteral<"repo">;
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, Lt as getEvalRegistry, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync, U as repoFile, V as serializeCacheRecording, W as manualInputFileValueSchema, X as evalAssert, Z as evalLog, _t as extractLlmCalls, at as nextEvalId, ct as runInExistingEvalScope, dt as startEvalBackgroundJob, et as getEvalCaseInput, gt as extractApiCalls, ht as extractCacheHits, it as mergeEvalOutput, lt as setEvalOutput, mt as extractCacheEntries, nt as isInEvalScope, ot as runInEvalRuntimeScope, q as EvalAssertionError, st as runInEvalScope, tt as incrementEvalOutput, ut as setScopeCacheContext, vt as simulateLlmCallCost, xt as getNestedAttribute, yt as simulateTokenAllocation, z as deserializeCacheRecording } from "./runOrchestration-
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import { n as matchesEvalTags, t as defineEval } from "./src-
|
|
1
|
+
import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, Lt as getEvalRegistry, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync, U as repoFile, V as serializeCacheRecording, W as manualInputFileValueSchema, X as evalAssert, Z as evalLog, _t as extractLlmCalls, at as nextEvalId, ct as runInExistingEvalScope, dt as startEvalBackgroundJob, et as getEvalCaseInput, gt as extractApiCalls, ht as extractCacheHits, it as mergeEvalOutput, lt as setEvalOutput, mt as extractCacheEntries, nt as isInEvalScope, ot as runInEvalRuntimeScope, q as EvalAssertionError, st as runInEvalScope, tt as incrementEvalOutput, ut as setScopeCacheContext, vt as simulateLlmCallCost, xt as getNestedAttribute, yt as simulateTokenAllocation, z as deserializeCacheRecording } from "./runOrchestration-DT6cje9E.mjs";
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-DbVFgRO3.mjs";
|
|
3
|
+
import { n as matchesEvalTags, t as defineEval } from "./src-DlvYXPxG.mjs";
|
|
4
4
|
export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as columnDefSchema, Mt as evalStatsConfigSchema, Nt as manualInputDescriptorSchema, Pt as evalChartsConfigSchema, T as parseEvalDiscovery, Y as configureEvalRunLogs, ft as createRunRequestSchema, h as persistRunState, j as createFsCacheStore, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-
|
|
1
|
+
import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as columnDefSchema, Mt as evalStatsConfigSchema, Nt as manualInputDescriptorSchema, Pt as evalChartsConfigSchema, T as parseEvalDiscovery, Y as configureEvalRunLogs, ft as createRunRequestSchema, h as persistRunState, j as createFsCacheStore, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-DT6cje9E.mjs";
|
|
2
2
|
import { z } from "zod/v4";
|
|
3
3
|
import { readFile } from "node:fs/promises";
|
|
4
4
|
import { relative } from "node:path";
|
|
@@ -827,7 +827,12 @@ const runLogEntrySchema = z.object({
|
|
|
827
827
|
/** Trace payload captured while computing one score for a case. */
|
|
828
828
|
const scoreTraceSchema = z.object({
|
|
829
829
|
trace: z.array(traceSpanSchema),
|
|
830
|
-
traceDisplay: traceDisplayConfigSchema
|
|
830
|
+
traceDisplay: traceDisplayConfigSchema,
|
|
831
|
+
/**
|
|
832
|
+
* Value-cache refs recorded by `evalTracer.cache(...)` calls made directly
|
|
833
|
+
* from the score compute body, with no surrounding scorer span.
|
|
834
|
+
*/
|
|
835
|
+
cacheRefs: z.array(traceCacheRefSchema).default([])
|
|
831
836
|
});
|
|
832
837
|
/** Schema for the detailed payload shown when opening a specific case. */
|
|
833
838
|
const caseDetailSchema = z.object({
|
|
@@ -7134,9 +7139,10 @@ async function runCase(params) {
|
|
|
7134
7139
|
...entry,
|
|
7135
7140
|
source: key
|
|
7136
7141
|
})));
|
|
7137
|
-
if (trace.length > 0) scoringTraces[key] = {
|
|
7142
|
+
if (trace.length > 0 || scoreRun.scope.caseCacheRefs.length > 0) scoringTraces[key] = {
|
|
7138
7143
|
trace,
|
|
7139
|
-
traceDisplay
|
|
7144
|
+
traceDisplay,
|
|
7145
|
+
cacheRefs: scoreRun.scope.caseCacheRefs
|
|
7140
7146
|
};
|
|
7141
7147
|
const rawValue = scoreRun.result;
|
|
7142
7148
|
if (scoreRun.error) {
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-WRQdfG0r.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { n as createRunner } from "./cli-DbVFgRO3.mjs";
|
|
2
|
+
import "./src-DlvYXPxG.mjs";
|
|
3
3
|
//#region ../../apps/server/src/runner.ts
|
|
4
4
|
let runnerInstance = null;
|
|
5
5
|
function getRunnerInstance() {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { It as defineEval$1, rt as matchesEvalTags$1 } from "./runOrchestration-
|
|
2
|
-
import "./cli-
|
|
1
|
+
import { It as defineEval$1, rt as matchesEvalTags$1 } from "./runOrchestration-DT6cje9E.mjs";
|
|
2
|
+
import "./cli-DbVFgRO3.mjs";
|
|
3
3
|
//#region src/index.ts
|
|
4
4
|
/** Register an eval definition with typed tag support. */
|
|
5
5
|
function defineEval(definition) {
|
package/package.json
CHANGED
|
@@ -303,7 +303,8 @@ or if the case errors. Scores without `passThreshold` are informational.
|
|
|
303
303
|
Score functions run in their own trace scope, separate from the execution
|
|
304
304
|
trace, so LLM-as-judge scorers can use `evalTracer.span(...)` and cached spans
|
|
305
305
|
without polluting the agent trajectory. Outputs set inside a scorer stay
|
|
306
|
-
private to that score.
|
|
306
|
+
private to that score. Spanless `evalTracer.cache(...)` calls made directly
|
|
307
|
+
inside a scorer are stored on that score trace's `cacheRefs` payload.
|
|
307
308
|
|
|
308
309
|
`manualScores` declares score columns that reviewers fill in after a run.
|
|
309
310
|
Pending values keep the eval in an `unscored` state instead of failing.
|
|
@@ -473,7 +474,8 @@ Mental model:
|
|
|
473
474
|
span, that span gets a `cache.refs` entry with the value cache name, key,
|
|
474
475
|
namespace, and hit/miss status. When called directly from the case body
|
|
475
476
|
(no surrounding span), the ref is recorded on the case detail's `cacheRefs`
|
|
476
|
-
array.
|
|
477
|
+
array. When called directly from a scorer, the ref is recorded on that
|
|
478
|
+
scoring trace's `cacheRefs` array.
|
|
477
479
|
- Cache identity is the namespace plus the authored key. Source-file
|
|
478
480
|
fingerprints are tracked for run freshness separately, but do not participate
|
|
479
481
|
in cache-key hashing.
|